mirror of
https://github.com/python/cpython.git
synced 2026-04-13 23:31:02 +00:00
Remove PyThread_type_lock (now uses PyMutex internally). Add new benchmark options: - work_inside/work_outside: control work inside and outside the critical section to vary contention levels - num_locks: use multiple independent locks with threads assigned round-robin - total_iters: fixed iteration count per thread instead of time-based, useful for measuring fairness - num_acquisitions: lock acquisitions per loop iteration - random_locks: acquire random lock each iteration Also return elapsed time from benchmark_locks() and switch lockbench.py to use argparse.
572 lines
15 KiB
C
572 lines
15 KiB
C
// C Extension module to test pycore_lock.h API
|
|
|
|
#include "parts.h"
|
|
#include "pycore_lock.h"
|
|
#include "pycore_pythread.h" // PyThread_get_thread_ident_ex()
|
|
|
|
#include "clinic/test_lock.c.h"
|
|
|
|
#ifdef MS_WINDOWS
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#include <windows.h>
|
|
#else
|
|
#include <unistd.h> // usleep()
|
|
#endif
|
|
|
|
/*[clinic input]
|
|
module _testinternalcapi
|
|
[clinic start generated code]*/
|
|
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/
|
|
|
|
|
|
static void
|
|
pysleep(int ms)
|
|
{
|
|
#ifdef MS_WINDOWS
|
|
Sleep(ms);
|
|
#else
|
|
usleep(ms * 1000);
|
|
#endif
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_basic(PyObject *self, PyObject *obj)
|
|
{
|
|
PyMutex m = (PyMutex){0};
|
|
|
|
// uncontended lock and unlock
|
|
PyMutex_Lock(&m);
|
|
assert(m._bits == 1);
|
|
PyMutex_Unlock(&m);
|
|
assert(m._bits == 0);
|
|
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
struct test_lock2_data {
|
|
PyMutex m;
|
|
PyEvent done;
|
|
int started;
|
|
};
|
|
|
|
static void
|
|
lock_thread(void *arg)
|
|
{
|
|
struct test_lock2_data *test_data = arg;
|
|
PyMutex *m = &test_data->m;
|
|
_Py_atomic_store_int(&test_data->started, 1);
|
|
|
|
PyMutex_Lock(m);
|
|
// gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set
|
|
// (m->_bits == 3) due to bucket collisions in the parking lot hash table
|
|
// between this mutex and the `test_data.done` event.
|
|
assert(m->_bits == 1 || m->_bits == 3);
|
|
|
|
PyMutex_Unlock(m);
|
|
assert(m->_bits == 0);
|
|
|
|
_PyEvent_Notify(&test_data->done);
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_two_threads(PyObject *self, PyObject *obj)
|
|
{
|
|
// lock attempt by two threads
|
|
struct test_lock2_data test_data;
|
|
memset(&test_data, 0, sizeof(test_data));
|
|
|
|
PyMutex_Lock(&test_data.m);
|
|
assert(test_data.m._bits == 1);
|
|
|
|
PyThread_start_new_thread(lock_thread, &test_data);
|
|
|
|
// wait up to two seconds for the lock_thread to attempt to lock "m"
|
|
int iters = 0;
|
|
uint8_t v;
|
|
do {
|
|
pysleep(10); // allow some time for the other thread to try to lock
|
|
v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits);
|
|
assert(v == 1 || v == 3);
|
|
iters++;
|
|
} while (v != 3 && iters < 200);
|
|
|
|
// both the "locked" and the "has parked" bits should be set
|
|
v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits);
|
|
assert(v == 3);
|
|
|
|
PyMutex_Unlock(&test_data.m);
|
|
PyEvent_Wait(&test_data.done);
|
|
assert(test_data.m._bits == 0);
|
|
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
#define COUNTER_THREADS 5
|
|
#define COUNTER_ITERS 10000
|
|
|
|
struct test_data_counter {
|
|
PyMutex m;
|
|
Py_ssize_t counter;
|
|
};
|
|
|
|
struct thread_data_counter {
|
|
struct test_data_counter *test_data;
|
|
PyEvent done_event;
|
|
};
|
|
|
|
static void
|
|
counter_thread(void *arg)
|
|
{
|
|
struct thread_data_counter *thread_data = arg;
|
|
struct test_data_counter *test_data = thread_data->test_data;
|
|
|
|
for (Py_ssize_t i = 0; i < COUNTER_ITERS; i++) {
|
|
PyMutex_Lock(&test_data->m);
|
|
test_data->counter++;
|
|
PyMutex_Unlock(&test_data->m);
|
|
}
|
|
_PyEvent_Notify(&thread_data->done_event);
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_counter(PyObject *self, PyObject *obj)
|
|
{
|
|
// Test with rapidly locking and unlocking mutex
|
|
struct test_data_counter test_data;
|
|
memset(&test_data, 0, sizeof(test_data));
|
|
|
|
struct thread_data_counter thread_data[COUNTER_THREADS];
|
|
memset(&thread_data, 0, sizeof(thread_data));
|
|
|
|
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
|
thread_data[i].test_data = &test_data;
|
|
PyThread_start_new_thread(counter_thread, &thread_data[i]);
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
|
PyEvent_Wait(&thread_data[i].done_event);
|
|
}
|
|
|
|
assert(test_data.counter == COUNTER_THREADS * COUNTER_ITERS);
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
#define SLOW_COUNTER_ITERS 100
|
|
|
|
static void
|
|
slow_counter_thread(void *arg)
|
|
{
|
|
struct thread_data_counter *thread_data = arg;
|
|
struct test_data_counter *test_data = thread_data->test_data;
|
|
|
|
for (Py_ssize_t i = 0; i < SLOW_COUNTER_ITERS; i++) {
|
|
PyMutex_Lock(&test_data->m);
|
|
if (i % 7 == 0) {
|
|
pysleep(2);
|
|
}
|
|
test_data->counter++;
|
|
PyMutex_Unlock(&test_data->m);
|
|
}
|
|
_PyEvent_Notify(&thread_data->done_event);
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_counter_slow(PyObject *self, PyObject *obj)
|
|
{
|
|
// Test lock/unlock with occasional "long" critical section, which will
|
|
// trigger handoff of the lock.
|
|
struct test_data_counter test_data;
|
|
memset(&test_data, 0, sizeof(test_data));
|
|
|
|
struct thread_data_counter thread_data[COUNTER_THREADS];
|
|
memset(&thread_data, 0, sizeof(thread_data));
|
|
|
|
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
|
thread_data[i].test_data = &test_data;
|
|
PyThread_start_new_thread(slow_counter_thread, &thread_data[i]);
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) {
|
|
PyEvent_Wait(&thread_data[i].done_event);
|
|
}
|
|
|
|
assert(test_data.counter == COUNTER_THREADS * SLOW_COUNTER_ITERS);
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
struct bench_lock {
|
|
char padding[200];
|
|
PyMutex m;
|
|
double value;
|
|
};
|
|
|
|
struct bench_config {
|
|
int stop;
|
|
int work_inside;
|
|
int work_outside;
|
|
int num_acquisitions;
|
|
int random_locks;
|
|
Py_ssize_t target_iters;
|
|
Py_ssize_t num_locks;
|
|
struct bench_lock *locks;
|
|
};
|
|
|
|
struct bench_thread_data {
|
|
struct bench_config *config;
|
|
struct bench_lock *lock;
|
|
uint64_t rng_state;
|
|
Py_ssize_t iters;
|
|
PyEvent done;
|
|
};
|
|
|
|
static uint64_t
|
|
splitmix64(uint64_t *state)
|
|
{
|
|
uint64_t z = (*state += 0x9e3779b97f4a7c15);
|
|
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
|
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
|
return z ^ (z >> 31);
|
|
}
|
|
|
|
static void
|
|
thread_benchmark_locks(void *arg)
|
|
{
|
|
struct bench_thread_data *td = arg;
|
|
struct bench_config *config = td->config;
|
|
int work_inside = config->work_inside;
|
|
int work_outside = config->work_outside;
|
|
int num_acquisitions = config->num_acquisitions;
|
|
Py_ssize_t target_iters = config->target_iters;
|
|
uint64_t rng_state = td->rng_state;
|
|
|
|
double local_value = 0.0;
|
|
double my_value = 1.0;
|
|
Py_ssize_t iters = 0;
|
|
for (;;) {
|
|
if (target_iters > 0) {
|
|
if (iters >= target_iters) {
|
|
break;
|
|
}
|
|
}
|
|
else if (_Py_atomic_load_int_relaxed(&config->stop)) {
|
|
break;
|
|
}
|
|
struct bench_lock *lock = td->lock;
|
|
if (config->random_locks) {
|
|
uint32_t r = (uint32_t)splitmix64(&rng_state);
|
|
// Fast modulo reduction to pick a random lock, adapted from:
|
|
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
|
Py_ssize_t idx = ((uint64_t)r * (uint32_t)config->num_locks) >> 32;
|
|
lock = &config->locks[idx];
|
|
}
|
|
for (int acq = 0; acq < num_acquisitions; acq++) {
|
|
PyMutex_Lock(&lock->m);
|
|
for (int i = 0; i < work_inside; i++) {
|
|
lock->value += my_value;
|
|
my_value = lock->value;
|
|
}
|
|
PyMutex_Unlock(&lock->m);
|
|
}
|
|
for (int i = 0; i < work_outside; i++) {
|
|
local_value += my_value;
|
|
my_value = local_value;
|
|
}
|
|
iters += num_acquisitions;
|
|
}
|
|
|
|
td->iters = iters;
|
|
_PyEvent_Notify(&td->done);
|
|
}
|
|
|
|
/*[clinic input]
|
|
_testinternalcapi.benchmark_locks
|
|
|
|
num_threads: Py_ssize_t
|
|
work_inside: int = 1
|
|
work_outside: int = 0
|
|
time_ms: int = 1000
|
|
num_acquisitions: int = 1
|
|
total_iters: Py_ssize_t = 0
|
|
num_locks: Py_ssize_t = 1
|
|
random_locks: bool = False
|
|
/
|
|
|
|
[clinic start generated code]*/
|
|
|
|
static PyObject *
|
|
_testinternalcapi_benchmark_locks_impl(PyObject *module,
|
|
Py_ssize_t num_threads,
|
|
int work_inside, int work_outside,
|
|
int time_ms, int num_acquisitions,
|
|
Py_ssize_t total_iters,
|
|
Py_ssize_t num_locks,
|
|
int random_locks)
|
|
/*[clinic end generated code: output=6258dc9de8cb9af1 input=d622cf4e1c4d008b]*/
|
|
{
|
|
// Run from Tools/lockbench/lockbench.py
|
|
// Based on the WebKit lock benchmarks:
|
|
// https://github.com/WebKit/WebKit/blob/main/Source/WTF/benchmarks/LockSpeedTest.cpp
|
|
// See also https://webkit.org/blog/6161/locking-in-webkit/
|
|
PyObject *thread_iters = NULL;
|
|
PyObject *res = NULL;
|
|
struct bench_thread_data *thread_data = NULL;
|
|
|
|
struct bench_config config = {
|
|
.work_inside = work_inside,
|
|
.work_outside = work_outside,
|
|
.num_acquisitions = num_acquisitions,
|
|
.target_iters = total_iters,
|
|
.num_locks = num_locks,
|
|
.random_locks = random_locks,
|
|
};
|
|
|
|
config.locks = PyMem_Calloc(num_locks, sizeof(*config.locks));
|
|
if (config.locks == NULL) {
|
|
PyErr_NoMemory();
|
|
goto exit;
|
|
}
|
|
|
|
thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data));
|
|
if (thread_data == NULL) {
|
|
PyErr_NoMemory();
|
|
goto exit;
|
|
}
|
|
thread_iters = PyList_New(num_threads);
|
|
if (thread_iters == NULL) {
|
|
goto exit;
|
|
}
|
|
|
|
PyTime_t start, end;
|
|
if (PyTime_PerfCounter(&start) < 0) {
|
|
goto exit;
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < num_threads; i++) {
|
|
thread_data[i].config = &config;
|
|
thread_data[i].lock = &config.locks[i % num_locks];
|
|
thread_data[i].rng_state = (uint64_t)i + 1;
|
|
PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]);
|
|
}
|
|
|
|
if (total_iters == 0) {
|
|
pysleep(time_ms);
|
|
_Py_atomic_store_int(&config.stop, 1);
|
|
}
|
|
|
|
for (Py_ssize_t i = 0; i < num_threads; i++) {
|
|
PyEvent_Wait(&thread_data[i].done);
|
|
}
|
|
|
|
if (PyTime_PerfCounter(&end) < 0) {
|
|
goto exit;
|
|
}
|
|
|
|
Py_ssize_t sum_iters = 0;
|
|
for (Py_ssize_t i = 0; i < num_threads; i++) {
|
|
PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters);
|
|
if (iter == NULL) {
|
|
goto exit;
|
|
}
|
|
PyList_SET_ITEM(thread_iters, i, iter);
|
|
sum_iters += thread_data[i].iters;
|
|
}
|
|
|
|
assert(end != start);
|
|
PyTime_t elapsed_ns = end - start;
|
|
double rate = sum_iters * 1e9 / elapsed_ns;
|
|
res = Py_BuildValue("(dOL)", rate, thread_iters,
|
|
(long long)elapsed_ns);
|
|
|
|
exit:
|
|
PyMem_Free(config.locks);
|
|
PyMem_Free(thread_data);
|
|
Py_XDECREF(thread_iters);
|
|
return res;
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_benchmark(PyObject *module, PyObject *obj)
|
|
{
|
|
// Just make sure the benchmark runs without crashing
|
|
PyObject *res = _testinternalcapi_benchmark_locks_impl(
|
|
module, 1, 1, 0, 100, 1, 0, 1, 0);
|
|
if (res == NULL) {
|
|
return NULL;
|
|
}
|
|
Py_DECREF(res);
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
static int
|
|
init_maybe_fail(void *arg)
|
|
{
|
|
int *counter = (int *)arg;
|
|
(*counter)++;
|
|
if (*counter < 5) {
|
|
// failure
|
|
return -1;
|
|
}
|
|
assert(*counter == 5);
|
|
return 0;
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_once(PyObject *self, PyObject *obj)
|
|
{
|
|
_PyOnceFlag once = {0};
|
|
int counter = 0;
|
|
for (int i = 0; i < 10; i++) {
|
|
int res = _PyOnceFlag_CallOnce(&once, init_maybe_fail, &counter);
|
|
if (i < 4) {
|
|
assert(res == -1);
|
|
}
|
|
else {
|
|
assert(res == 0);
|
|
assert(counter == 5);
|
|
}
|
|
}
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
struct test_rwlock_data {
|
|
Py_ssize_t nthreads;
|
|
_PyRWMutex rw;
|
|
PyEvent step1;
|
|
PyEvent step2;
|
|
PyEvent step3;
|
|
PyEvent done;
|
|
};
|
|
|
|
static void
|
|
rdlock_thread(void *arg)
|
|
{
|
|
struct test_rwlock_data *test_data = arg;
|
|
|
|
// Acquire the lock in read mode
|
|
_PyRWMutex_RLock(&test_data->rw);
|
|
PyEvent_Wait(&test_data->step1);
|
|
_PyRWMutex_RUnlock(&test_data->rw);
|
|
|
|
_PyRWMutex_RLock(&test_data->rw);
|
|
PyEvent_Wait(&test_data->step3);
|
|
_PyRWMutex_RUnlock(&test_data->rw);
|
|
|
|
if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) {
|
|
_PyEvent_Notify(&test_data->done);
|
|
}
|
|
}
|
|
static void
|
|
wrlock_thread(void *arg)
|
|
{
|
|
struct test_rwlock_data *test_data = arg;
|
|
|
|
// First acquire the lock in write mode
|
|
_PyRWMutex_Lock(&test_data->rw);
|
|
PyEvent_Wait(&test_data->step2);
|
|
_PyRWMutex_Unlock(&test_data->rw);
|
|
|
|
if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) {
|
|
_PyEvent_Notify(&test_data->done);
|
|
}
|
|
}
|
|
|
|
static void
|
|
wait_until(uintptr_t *ptr, uintptr_t value)
|
|
{
|
|
// wait up to two seconds for *ptr == value
|
|
int iters = 0;
|
|
uintptr_t bits;
|
|
do {
|
|
pysleep(10);
|
|
bits = _Py_atomic_load_uintptr(ptr);
|
|
iters++;
|
|
} while (bits != value && iters < 200);
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_rwlock(PyObject *self, PyObject *obj)
|
|
{
|
|
struct test_rwlock_data test_data = {.nthreads = 3};
|
|
|
|
_PyRWMutex_Lock(&test_data.rw);
|
|
assert(test_data.rw.bits == 1);
|
|
|
|
_PyRWMutex_Unlock(&test_data.rw);
|
|
assert(test_data.rw.bits == 0);
|
|
|
|
// Start two readers
|
|
PyThread_start_new_thread(rdlock_thread, &test_data);
|
|
PyThread_start_new_thread(rdlock_thread, &test_data);
|
|
|
|
// wait up to two seconds for the threads to attempt to read-lock "rw"
|
|
wait_until(&test_data.rw.bits, 8);
|
|
assert(test_data.rw.bits == 8);
|
|
|
|
// start writer (while readers hold lock)
|
|
PyThread_start_new_thread(wrlock_thread, &test_data);
|
|
wait_until(&test_data.rw.bits, 10);
|
|
assert(test_data.rw.bits == 10);
|
|
|
|
// readers release lock, writer should acquire it
|
|
_PyEvent_Notify(&test_data.step1);
|
|
wait_until(&test_data.rw.bits, 3);
|
|
assert(test_data.rw.bits == 3);
|
|
|
|
// writer releases lock, readers acquire it
|
|
_PyEvent_Notify(&test_data.step2);
|
|
wait_until(&test_data.rw.bits, 8);
|
|
assert(test_data.rw.bits == 8);
|
|
|
|
// readers release lock again
|
|
_PyEvent_Notify(&test_data.step3);
|
|
wait_until(&test_data.rw.bits, 0);
|
|
assert(test_data.rw.bits == 0);
|
|
|
|
PyEvent_Wait(&test_data.done);
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
static PyObject *
|
|
test_lock_recursive(PyObject *self, PyObject *obj)
|
|
{
|
|
_PyRecursiveMutex m = (_PyRecursiveMutex){0};
|
|
assert(!_PyRecursiveMutex_IsLockedByCurrentThread(&m));
|
|
|
|
_PyRecursiveMutex_Lock(&m);
|
|
assert(m.thread == PyThread_get_thread_ident_ex());
|
|
assert(PyMutex_IsLocked(&m.mutex));
|
|
assert(m.level == 0);
|
|
|
|
_PyRecursiveMutex_Lock(&m);
|
|
assert(m.level == 1);
|
|
_PyRecursiveMutex_Unlock(&m);
|
|
|
|
_PyRecursiveMutex_Unlock(&m);
|
|
assert(m.thread == 0);
|
|
assert(!PyMutex_IsLocked(&m.mutex));
|
|
assert(m.level == 0);
|
|
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
static PyMethodDef test_methods[] = {
|
|
{"test_lock_basic", test_lock_basic, METH_NOARGS},
|
|
{"test_lock_two_threads", test_lock_two_threads, METH_NOARGS},
|
|
{"test_lock_counter", test_lock_counter, METH_NOARGS},
|
|
{"test_lock_counter_slow", test_lock_counter_slow, METH_NOARGS},
|
|
_TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF
|
|
{"test_lock_benchmark", test_lock_benchmark, METH_NOARGS},
|
|
{"test_lock_once", test_lock_once, METH_NOARGS},
|
|
{"test_lock_rwlock", test_lock_rwlock, METH_NOARGS},
|
|
{"test_lock_recursive", test_lock_recursive, METH_NOARGS},
|
|
{NULL, NULL} /* sentinel */
|
|
};
|
|
|
|
int
|
|
_PyTestInternalCapi_Init_Lock(PyObject *mod)
|
|
{
|
|
if (PyModule_AddFunctions(mod, test_methods) < 0) {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|