mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-29 20:51:14 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			485 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			485 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2009-2021 Intel Corporation
 | |
| // SPDX-License-Identifier: Apache-2.0
 | |
| 
 | |
| #include "thread.h"
 | |
| #include "sysinfo.h"
 | |
| #include "string.h"
 | |
| 
 | |
| #include <iostream>
 | |
| #if defined(__ARM_NEON)
 | |
| #include "../simd/arm/emulation.h"
 | |
| #else
 | |
| #include <xmmintrin.h>
 | |
| #if defined(__EMSCRIPTEN__)
 | |
| #include "../simd/wasm/emulation.h"
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| #if defined(PTHREADS_WIN32)
 | |
| #pragma comment (lib, "pthreadVC.lib")
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// Windows Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__WIN32__)
 | |
| 
 | |
| #define WIN32_LEAN_AND_MEAN
 | |
| #include <windows.h>
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /*! set the affinity of a given thread */
 | |
|   void setAffinity(HANDLE thread, ssize_t affinity)
 | |
|   {
 | |
|     typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
 | |
|     typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
 | |
|     typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
 | |
|     typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
 | |
|     HMODULE hlib = LoadLibrary("Kernel32");
 | |
|     GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
 | |
|     GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
 | |
|     SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
 | |
|     SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
 | |
|     if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) 
 | |
|     {
 | |
|       int groups = pGetActiveProcessorGroupCount();
 | |
|       int totalProcessors = 0, group = 0, number = 0;
 | |
|       for (int i = 0; i<groups; i++) {
 | |
|         int processors = pGetActiveProcessorCount(i);
 | |
|         if (totalProcessors + processors > affinity) {
 | |
|           group = i;
 | |
|           number = (int)affinity - totalProcessors;
 | |
|           break;
 | |
|         }
 | |
|         totalProcessors += processors;
 | |
|       }
 | |
|   
 | |
|       GROUP_AFFINITY groupAffinity;
 | |
|       groupAffinity.Group = (WORD)group;
 | |
|       groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
 | |
|       groupAffinity.Reserved[0] = 0;
 | |
|       groupAffinity.Reserved[1] = 0;
 | |
|       groupAffinity.Reserved[2] = 0;
 | |
|       if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
 | |
|         WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
 | |
|   
 | |
|       PROCESSOR_NUMBER processorNumber;
 | |
|       processorNumber.Group = group;
 | |
|       processorNumber.Number = number;
 | |
|       processorNumber.Reserved = 0;
 | |
|       if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
 | |
|         WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
 | |
|     } 
 | |
|     else 
 | |
|     {
 | |
|       if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
 | |
|         WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
 | |
|       if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
 | |
|         WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
 | |
|       }
 | |
|   }
 | |
| 
 | |
|   /*! set affinity of the calling thread */
 | |
|   void setAffinity(ssize_t affinity) {
 | |
|     setAffinity(GetCurrentThread(), affinity);
 | |
|   }
 | |
| 
 | |
|   struct ThreadStartupData 
 | |
|   {
 | |
|   public:
 | |
|     ThreadStartupData (thread_func f, void* arg) 
 | |
|       : f(f), arg(arg) {}
 | |
|   public:
 | |
|     thread_func f;
 | |
|     void* arg;
 | |
|   };
 | |
| 
 | |
|   DWORD WINAPI threadStartup(LPVOID ptr)
 | |
|   {
 | |
|     ThreadStartupData* parg = (ThreadStartupData*) ptr;
 | |
|     _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
 | |
|     parg->f(parg->arg);
 | |
|     delete parg;
 | |
|     return 0;
 | |
|   }
 | |
| 
 | |
| #if !defined(PTHREADS_WIN32)
 | |
| 
 | |
|   /*! creates a hardware thread running on specific core */
 | |
|   thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
 | |
|   {
 | |
|     HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
 | |
|     if (thread == nullptr) FATAL("CreateThread failed");
 | |
|     if (threadID >= 0) setAffinity(thread, threadID);
 | |
|     return thread_t(thread);
 | |
|   }
 | |
| 
 | |
|   /*! the thread calling this function gets yielded */
 | |
|   void yield() {
 | |
|     SwitchToThread();
 | |
|   }
 | |
| 
 | |
|   /*! waits until the given thread has terminated */
 | |
|   void join(thread_t tid) {
 | |
|     WaitForSingleObject(HANDLE(tid), INFINITE);
 | |
|     CloseHandle(HANDLE(tid));
 | |
|   }
 | |
| 
 | |
|   /*! destroy a hardware thread by its handle */
 | |
|   void destroyThread(thread_t tid) {
 | |
|     TerminateThread(HANDLE(tid),0);
 | |
|     CloseHandle(HANDLE(tid));
 | |
|   }
 | |
| 
 | |
|   /*! creates thread local storage */
 | |
|   tls_t createTls() {
 | |
|     return tls_t(size_t(TlsAlloc()));
 | |
|   }
 | |
| 
 | |
|   /*! set the thread local storage pointer */
 | |
|   void setTls(tls_t tls, void* const ptr) {
 | |
|     TlsSetValue(DWORD(size_t(tls)), ptr);
 | |
|   }
 | |
| 
 | |
|   /*! return the thread local storage pointer */
 | |
|   void* getTls(tls_t tls) {
 | |
|     return TlsGetValue(DWORD(size_t(tls)));
 | |
|   }
 | |
| 
 | |
|   /*! destroys thread local storage identifier */
 | |
|   void destroyTls(tls_t tls) {
 | |
|     TlsFree(DWORD(size_t(tls)));
 | |
|   }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// Linux Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__LINUX__) && !defined(__ANDROID__)
 | |
| 
 | |
| #include <fstream>
 | |
| #include <sstream>
 | |
| #include <algorithm>
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   static MutexSys mutex;
 | |
|   static std::vector<size_t> threadIDs;
 | |
|   
 | |
|   /* changes thread ID mapping such that we first fill up all thread on one core */
 | |
|   size_t mapThreadID(size_t threadID)
 | |
|   {
 | |
|     Lock<MutexSys> lock(mutex);
 | |
|     
 | |
|     if (threadIDs.size() == 0)
 | |
|     {
 | |
|       /* parse thread/CPU topology */
 | |
|       for (size_t cpuID=0;;cpuID++)
 | |
|       {
 | |
|         std::fstream fs;
 | |
|         std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
 | |
|         fs.open (cpu.c_str(), std::fstream::in);
 | |
|         if (fs.fail()) break;
 | |
| 
 | |
|         int i;
 | |
|         while (fs >> i) 
 | |
|         {
 | |
|           if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
 | |
|             threadIDs.push_back(i);
 | |
|           if (fs.peek() == ',') 
 | |
|             fs.ignore();
 | |
|         }
 | |
|         fs.close();
 | |
|       }
 | |
| 
 | |
| #if 0
 | |
|       for (size_t i=0;i<threadIDs.size();i++)
 | |
|         std::cout << i << " -> " << threadIDs[i] << std::endl;
 | |
| #endif
 | |
| 
 | |
|       /* verify the mapping and do not use it if the mapping has errors */
 | |
|       for (size_t i=0;i<threadIDs.size();i++) {
 | |
|         for (size_t j=0;j<threadIDs.size();j++) {
 | |
|           if (i != j && threadIDs[i] == threadIDs[j]) {
 | |
|             threadIDs.clear();
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     /* re-map threadIDs if mapping is available */
 | |
|     size_t ID = threadID;
 | |
|     if (threadID < threadIDs.size())
 | |
|       ID = threadIDs[threadID];
 | |
| 
 | |
|     /* find correct thread to affinitize to */
 | |
|     cpu_set_t set;
 | |
|     CPU_ZERO(&set);
 | |
|     
 | |
|     if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
 | |
|     {
 | |
|       for (int i=0, j=0; i<CPU_SETSIZE; i++)
 | |
|       {
 | |
|         if (!CPU_ISSET(i,&set)) continue;
 | |
| 
 | |
|         if (j == ID) {
 | |
|           ID = i;
 | |
|           break;
 | |
|         }
 | |
|         j++;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     return ID;
 | |
|   }
 | |
| 
 | |
|   /*! set affinity of the calling thread */
 | |
|   void setAffinity(ssize_t affinity)
 | |
|   {
 | |
|     cpu_set_t cset;
 | |
|     CPU_ZERO(&cset);
 | |
|     //size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled
 | |
|     size_t threadID = affinity;
 | |
|     CPU_SET(threadID, &cset);
 | |
| 
 | |
|     pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
 | |
|   }
 | |
| }
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// Android Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__ANDROID__)
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /*! set affinity of the calling thread */
 | |
|   void setAffinity(ssize_t affinity)
 | |
|   {
 | |
|     cpu_set_t cset;
 | |
|     CPU_ZERO(&cset);
 | |
|     CPU_SET(affinity, &cset);
 | |
| 
 | |
|     sched_setaffinity(0, sizeof(cset), &cset);
 | |
|   }
 | |
| }
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// FreeBSD Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__FreeBSD__)
 | |
| 
 | |
| #include <pthread_np.h>
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /*! set affinity of the calling thread */
 | |
|   void setAffinity(ssize_t affinity)
 | |
|   {
 | |
|     cpuset_t cset;
 | |
|     CPU_ZERO(&cset);
 | |
|     CPU_SET(affinity, &cset);
 | |
| 
 | |
|     pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
 | |
|   }
 | |
| }
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// WebAssembly Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__EMSCRIPTEN__)
 | |
| namespace embree
 | |
| {
 | |
|   /*! set affinity of the calling thread */
 | |
|   void setAffinity(ssize_t affinity)
 | |
|   {
 | |
|       // Setting thread affinity is not supported in WASM.
 | |
|   }
 | |
| }
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// MacOSX Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__MACOSX__)
 | |
| 
 | |
| #include <mach/thread_act.h>
 | |
| #include <mach/thread_policy.h>
 | |
| #include <mach/mach_init.h>
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /*! set affinity of the calling thread */
 | |
|   void setAffinity(ssize_t affinity)
 | |
|   {
 | |
| #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
 | |
|     
 | |
|     thread_affinity_policy ap;
 | |
|     ap.affinity_tag = affinity;
 | |
|     if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
 | |
|       WARNING("setting thread affinity failed"); // on purpose only a warning
 | |
|     
 | |
| #endif
 | |
|   }
 | |
| }
 | |
| #endif
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| /// Unix Platform
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #if defined(__UNIX__) || defined(PTHREADS_WIN32)
 | |
| 
 | |
| #include <pthread.h>
 | |
| #include <sched.h>
 | |
| 
 | |
| #if defined(__USE_NUMA__)
 | |
| #include <numa.h>
 | |
| #endif
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   struct ThreadStartupData 
 | |
|   {
 | |
|   public:
 | |
|     ThreadStartupData (thread_func f, void* arg, int affinity) 
 | |
|       : f(f), arg(arg), affinity(affinity) {}
 | |
|   public: 
 | |
|     thread_func f;
 | |
|     void* arg;
 | |
|     ssize_t affinity;
 | |
|   };
 | |
|   
 | |
|   static void* threadStartup(ThreadStartupData* parg)
 | |
|   {
 | |
|     _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
 | |
|     
 | |
|     /*! Mac OS X does not support setting affinity at thread creation time */
 | |
| #if defined(__MACOSX__)
 | |
|     if (parg->affinity >= 0)
 | |
| 	setAffinity(parg->affinity);
 | |
| #endif
 | |
| 
 | |
|     parg->f(parg->arg);
 | |
|     delete parg;
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   /*! creates a hardware thread running on specific core */
 | |
|   thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
 | |
|   {
 | |
|     /* set stack size */
 | |
|     pthread_attr_t attr;
 | |
|     pthread_attr_init(&attr);
 | |
|     if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
 | |
| 
 | |
|     /* create thread */
 | |
|     pthread_t* tid = new pthread_t;
 | |
|     if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
 | |
|       pthread_attr_destroy(&attr);
 | |
|       delete tid; 
 | |
|       FATAL("pthread_create failed");
 | |
|     }
 | |
|     pthread_attr_destroy(&attr);
 | |
| 
 | |
|     /* set affinity */
 | |
| #if defined(__LINUX__) && !defined(__ANDROID__)
 | |
|     if (threadID >= 0) {
 | |
|       cpu_set_t cset;
 | |
|       CPU_ZERO(&cset);
 | |
|       threadID = mapThreadID(threadID);
 | |
|       CPU_SET(threadID, &cset);
 | |
|       pthread_setaffinity_np(*tid, sizeof(cset), &cset);
 | |
|     }
 | |
| #elif defined(__FreeBSD__)
 | |
|     if (threadID >= 0) {
 | |
|       cpuset_t cset;
 | |
|       CPU_ZERO(&cset);
 | |
|       CPU_SET(threadID, &cset);
 | |
|       pthread_setaffinity_np(*tid, sizeof(cset), &cset);
 | |
|     }
 | |
| #elif defined(__ANDROID__)
 | |
|     if (threadID >= 0) {
 | |
|       cpu_set_t cset;
 | |
|       CPU_ZERO(&cset);
 | |
|       CPU_SET(threadID, &cset);
 | |
|       sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
 | |
|     }
 | |
| #endif
 | |
| 
 | |
|     return thread_t(tid);
 | |
|   }
 | |
| 
 | |
|   /*! the thread calling this function gets yielded */
 | |
|   void yield() {
 | |
|     sched_yield();
 | |
|   }
 | |
| 
 | |
|   /*! waits until the given thread has terminated */
 | |
|   void join(thread_t tid) {
 | |
|     if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
 | |
|       FATAL("pthread_join failed");
 | |
|     delete (pthread_t*)tid;
 | |
|   }
 | |
| 
 | |
|   /*! destroy a hardware thread by its handle */
 | |
|   void destroyThread(thread_t tid) {
 | |
| #if defined(__ANDROID__)
 | |
|     FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
 | |
| #else
 | |
|     pthread_cancel(*(pthread_t*)tid);
 | |
|     delete (pthread_t*)tid;
 | |
| #endif
 | |
|   }
 | |
| 
 | |
|   /*! creates thread local storage */
 | |
|   tls_t createTls() 
 | |
|   {
 | |
|     pthread_key_t* key = new pthread_key_t;
 | |
|     if (pthread_key_create(key,nullptr) != 0) {
 | |
|       delete key;
 | |
|       FATAL("pthread_key_create failed");
 | |
|     }
 | |
| 
 | |
|     return tls_t(key);
 | |
|   }
 | |
| 
 | |
|   /*! return the thread local storage pointer */
 | |
|   void* getTls(tls_t tls) 
 | |
|   {
 | |
|     assert(tls);
 | |
|     return pthread_getspecific(*(pthread_key_t*)tls);
 | |
|   }
 | |
| 
 | |
|   /*! set the thread local storage pointer */
 | |
|   void setTls(tls_t tls, void* const ptr) 
 | |
|   {
 | |
|     assert(tls);
 | |
|     if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
 | |
|       FATAL("pthread_setspecific failed");
 | |
|   }
 | |
| 
 | |
|   /*! destroys thread local storage identifier */
 | |
|   void destroyTls(tls_t tls) 
 | |
|   {
 | |
|     assert(tls);
 | |
|     if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
 | |
|       FATAL("pthread_key_delete failed");
 | |
|     delete (pthread_key_t*)tls;
 | |
|   }
 | |
| }
 | |
| 
 | |
| #endif
 | 
