| 
									
										
										
										
											2021-05-20 12:49:33 +02:00
										 |  |  | // Copyright 2009-2021 Intel Corporation
 | 
					
						
							| 
									
										
										
										
											2021-04-20 18:38:09 +02:00
										 |  |  | // SPDX-License-Identifier: Apache-2.0
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "barrier.h"
 | 
					
						
							|  |  |  | #include "condition.h"
 | 
					
						
							|  |  |  | #include "regression.h"
 | 
					
						
							|  |  |  | #include "thread.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if defined (__WIN32__)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define WIN32_LEAN_AND_MEAN
 | 
					
						
							|  |  |  | #include <windows.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace embree | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   struct BarrierSysImplementation | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     __forceinline BarrierSysImplementation (size_t N)  | 
					
						
							|  |  |  |       : i(0), enterCount(0), exitCount(0), barrierSize(0)  | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr); | 
					
						
							|  |  |  |       events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr); | 
					
						
							|  |  |  |       init(N); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     __forceinline ~BarrierSysImplementation () | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       CloseHandle(events[0]); | 
					
						
							|  |  |  |       CloseHandle(events[1]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     __forceinline void init(size_t N)  | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       barrierSize = N; | 
					
						
							|  |  |  |       enterCount.store(N); | 
					
						
							|  |  |  |       exitCount.store(N); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     __forceinline void wait() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       /* every thread entering the barrier decrements this count */ | 
					
						
							|  |  |  |       size_t i0 = i; | 
					
						
							|  |  |  |       size_t cnt0 = enterCount--; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* all threads except the last one are wait in the barrier */ | 
					
						
							|  |  |  |       if (cnt0 > 1)  | 
					
						
							|  |  |  |       { | 
					
						
							|  |  |  |         if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0) | 
					
						
							|  |  |  |           THROW_RUNTIME_ERROR("WaitForSingleObjects failed"); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       /* the last thread starts all threads waiting at the barrier */ | 
					
						
							|  |  |  |       else  | 
					
						
							|  |  |  |       { | 
					
						
							|  |  |  |         i = 1-i; | 
					
						
							|  |  |  |         enterCount.store(barrierSize); | 
					
						
							|  |  |  |         if (SetEvent(events[i0]) == 0) | 
					
						
							|  |  |  |           THROW_RUNTIME_ERROR("SetEvent failed"); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* every thread leaving the barrier decrements this count */ | 
					
						
							|  |  |  |       size_t cnt1 = exitCount--; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* the last thread that left the barrier resets the event again */ | 
					
						
							|  |  |  |       if (cnt1 == 1)  | 
					
						
							|  |  |  |       { | 
					
						
							|  |  |  |         exitCount.store(barrierSize); | 
					
						
							|  |  |  |         if (ResetEvent(events[i0]) == 0) | 
					
						
							|  |  |  |           THROW_RUNTIME_ERROR("ResetEvent failed"); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   public: | 
					
						
							|  |  |  |     HANDLE events[2]; | 
					
						
							|  |  |  |     atomic<size_t> i; | 
					
						
							|  |  |  |     atomic<size_t> enterCount; | 
					
						
							|  |  |  |     atomic<size_t> exitCount; | 
					
						
							|  |  |  |     size_t barrierSize; | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace embree | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   struct BarrierSysImplementation | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     __forceinline BarrierSysImplementation (size_t N)  | 
					
						
							|  |  |  |       : count(0), barrierSize(0)  | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       init(N); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     __forceinline void init(size_t N)  | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       assert(count == 0); | 
					
						
							|  |  |  |       count = 0; | 
					
						
							|  |  |  |       barrierSize = N; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     __forceinline void wait() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       mutex.lock(); | 
					
						
							|  |  |  |       count++; | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       if (count == barrierSize) { | 
					
						
							|  |  |  |         count = 0; | 
					
						
							|  |  |  |         cond.notify_all(); | 
					
						
							|  |  |  |         mutex.unlock(); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |        | 
					
						
							|  |  |  |       cond.wait(mutex); | 
					
						
							|  |  |  |       mutex.unlock(); | 
					
						
							|  |  |  |       return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   public: | 
					
						
							|  |  |  |     MutexSys mutex; | 
					
						
							|  |  |  |     ConditionSys cond; | 
					
						
							|  |  |  |     volatile size_t count; | 
					
						
							|  |  |  |     volatile size_t barrierSize; | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace embree | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   BarrierSys::BarrierSys (size_t N) { | 
					
						
							|  |  |  |     opaque = new BarrierSysImplementation(N); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   BarrierSys::~BarrierSys () { | 
					
						
							|  |  |  |     delete (BarrierSysImplementation*) opaque; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void BarrierSys::init(size_t count) { | 
					
						
							|  |  |  |     ((BarrierSysImplementation*) opaque)->init(count); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void BarrierSys::wait() { | 
					
						
							|  |  |  |     ((BarrierSysImplementation*) opaque)->wait(); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LinearBarrierActive::LinearBarrierActive (size_t N)  | 
					
						
							|  |  |  |     : count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0) | 
					
						
							|  |  |  |   {  | 
					
						
							|  |  |  |     if (N == 0) N = getNumberOfLogicalThreads(); | 
					
						
							|  |  |  |     init(N); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   LinearBarrierActive::~LinearBarrierActive()  | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     delete[] count0; | 
					
						
							|  |  |  |     delete[] count1; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void LinearBarrierActive::init(size_t N)  | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     if (threadCount != N) { | 
					
						
							|  |  |  |       threadCount = N; | 
					
						
							|  |  |  |       if (count0) delete[] count0; count0 = new unsigned char[N]; | 
					
						
							|  |  |  |       if (count1) delete[] count1; count1 = new unsigned char[N]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     mode      = 0; | 
					
						
							|  |  |  |     flag0     = 0; | 
					
						
							|  |  |  |     flag1     = 0; | 
					
						
							|  |  |  |     for (size_t i=0; i<N; i++) count0[i] = 0; | 
					
						
							|  |  |  |     for (size_t i=0; i<N; i++) count1[i] = 0; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void LinearBarrierActive::wait (const size_t threadIndex) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     if (mode == 0) | 
					
						
							|  |  |  |     {			 | 
					
						
							|  |  |  |       if (threadIndex == 0) | 
					
						
							|  |  |  |       {	 | 
					
						
							|  |  |  |         for (size_t i=0; i<threadCount; i++) | 
					
						
							|  |  |  |           count1[i] = 0; | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         for (size_t i=1; i<threadCount; i++) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |           while (likely(count0[i] == 0))  | 
					
						
							|  |  |  |             pause_cpu(); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         mode  = 1; | 
					
						
							|  |  |  |         flag1 = 0; | 
					
						
							|  |  |  |         __memory_barrier(); | 
					
						
							|  |  |  |         flag0 = 1; | 
					
						
							|  |  |  |       }			 | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  |       {					 | 
					
						
							|  |  |  |         count0[threadIndex] = 1; | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |           while (likely(flag0 == 0)) | 
					
						
							|  |  |  |             pause_cpu(); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |       }		 | 
					
						
							|  |  |  |     }					 | 
					
						
							|  |  |  |     else						 | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       if (threadIndex == 0) | 
					
						
							|  |  |  |       {	 | 
					
						
							|  |  |  |         for (size_t i=0; i<threadCount; i++) | 
					
						
							|  |  |  |           count0[i] = 0; | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         for (size_t i=1; i<threadCount; i++) | 
					
						
							|  |  |  |         {		 | 
					
						
							|  |  |  |           while (likely(count1[i] == 0)) | 
					
						
							|  |  |  |             pause_cpu(); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         mode  = 0; | 
					
						
							|  |  |  |         flag0 = 0; | 
					
						
							|  |  |  |         __memory_barrier(); | 
					
						
							|  |  |  |         flag1 = 1; | 
					
						
							|  |  |  |       }			 | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  |       {					 | 
					
						
							|  |  |  |         count1[threadIndex] = 1; | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |           while (likely(flag1 == 0)) | 
					
						
							|  |  |  |             pause_cpu(); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |       }		 | 
					
						
							|  |  |  |     }					 | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   struct barrier_sys_regression_test : public RegressionTest | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     BarrierSys barrier; | 
					
						
							|  |  |  |     std::atomic<size_t> threadID; | 
					
						
							|  |  |  |     std::atomic<size_t> numFailed; | 
					
						
							|  |  |  |     std::vector<size_t> threadResults; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     barrier_sys_regression_test()  | 
					
						
							|  |  |  |       : RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       registerRegressionTest(this); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     static void thread_alloc(barrier_sys_regression_test* This) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       size_t tid = This->threadID++; | 
					
						
							|  |  |  |       for (size_t j=0; j<1000; j++) | 
					
						
							|  |  |  |       { | 
					
						
							|  |  |  |         This->barrier.wait(); | 
					
						
							|  |  |  |         This->threadResults[tid] = tid; | 
					
						
							|  |  |  |         This->barrier.wait(); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     bool run () | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       threadID.store(0); | 
					
						
							|  |  |  |       numFailed.store(0); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       size_t numThreads = getNumberOfLogicalThreads(); | 
					
						
							|  |  |  |       threadResults.resize(numThreads); | 
					
						
							|  |  |  |       barrier.init(numThreads+1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* create threads */ | 
					
						
							|  |  |  |       std::vector<thread_t> threads; | 
					
						
							|  |  |  |       for (size_t i=0; i<numThreads; i++) | 
					
						
							|  |  |  |         threads.push_back(createThread((thread_func)thread_alloc,this)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* run test */  | 
					
						
							|  |  |  |       for (size_t i=0; i<1000; i++) | 
					
						
							|  |  |  |       { | 
					
						
							|  |  |  |         for (size_t i=0; i<numThreads; i++) threadResults[i] = 0; | 
					
						
							|  |  |  |         barrier.wait(); | 
					
						
							|  |  |  |         barrier.wait(); | 
					
						
							|  |  |  |         for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* destroy threads */ | 
					
						
							|  |  |  |       for (size_t i=0; i<numThreads; i++) | 
					
						
							|  |  |  |         join(threads[i]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return numFailed == 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   barrier_sys_regression_test barrier_sys_regression_test; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 |