Refactor CommandQueueMT to use vararg templates

In order to make CommandQueueMT more maintainable this PR changes the
previous macro hell with variadic templates instead. This makes the
class far more explicit and will allow us to more easily change the way
the class functions in the future.

Furthermore this refactoring has allowed for some optimizations. In
particular by using std::forward to delay the decision of decaying the
type to as late as possible we are able to move the data from the
callsite into our Command buffer and later move it to the call.

In practice what this means is that compared to the old version instead
of copying values 3 times, we can now get away with 1 copy, and 1 move
for lvalues, and just 2 moves for rvalues. This saves quite a few
operations in a hot codepath.

We also now test to make sure that the amount of copies and moves are
what we expect. This way we can spot performance regressions in this
code easily.

Somewhat unscientifically, running TPS-demo by pressing enter and not
touching the controls average mspf, repeatable across many runs:

before: 6.467
after : 6.202
This commit is contained in:
HP van Braam 2024-12-23 18:21:39 +01:00
parent a7a2a12bfd
commit cccd2432c3
5 changed files with 346 additions and 322 deletions

View file

@ -201,10 +201,10 @@ public:
command_queue.push_and_sync(this, &SharedThreadState::func2, tr, f);
break;
case TEST_MSGRET_FUNC1_TRANSFORM:
command_queue.push_and_ret(this, &SharedThreadState::func1r, tr, &otr);
command_queue.push_and_ret(this, &SharedThreadState::func1r, &otr, tr);
break;
case TEST_MSGRET_FUNC2_TRANSFORM_FLOAT:
command_queue.push_and_ret(this, &SharedThreadState::func2r, tr, f, &otr);
command_queue.push_and_ret(this, &SharedThreadState::func2r, &otr, tr, f);
break;
default:
break;
@ -244,6 +244,44 @@ public:
}
writer_thread.wait_to_finish();
}
struct CopyMoveTestType {
inline static int copy_count;
inline static int move_count;
int value = 0;
CopyMoveTestType(int p_value = 0) :
value(p_value) {}
CopyMoveTestType(const CopyMoveTestType &p_other) :
value(p_other.value) {
copy_count++;
}
CopyMoveTestType(CopyMoveTestType &&p_other) :
value(p_other.value) {
move_count++;
}
CopyMoveTestType &operator=(const CopyMoveTestType &p_other) {
value = p_other.value;
copy_count++;
return *this;
}
CopyMoveTestType &operator=(CopyMoveTestType &&p_other) {
value = p_other.value;
move_count++;
return *this;
}
};
void copy_move_test_copy(CopyMoveTestType p_test_type) {
}
void copy_move_test_ref(const CopyMoveTestType &p_test_type) {
}
void copy_move_test_move(CopyMoveTestType &&p_test_type) {
}
};
static void test_command_queue_basic(bool p_use_thread_pool_sync) {
@ -446,6 +484,83 @@ TEST_CASE("[Stress][CommandQueue] Stress test command queue") {
ProjectSettings::get_singleton()->set_setting(COMMAND_QUEUE_SETTING,
ProjectSettings::get_singleton()->property_get_revert(COMMAND_QUEUE_SETTING));
}
TEST_CASE("[CommandQueue] Test Parameter Passing Semantics") {
SharedThreadState sts;
sts.init_threads();
SUBCASE("Testing with lvalue") {
SharedThreadState::CopyMoveTestType::copy_count = 0;
SharedThreadState::CopyMoveTestType::move_count = 0;
SharedThreadState::CopyMoveTestType lvalue(42);
SUBCASE("Pass by copy") {
sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_copy, lvalue);
sts.message_count_to_read = -1;
sts.reader_threadwork.main_start_work();
sts.reader_threadwork.main_wait_for_done();
CHECK(SharedThreadState::CopyMoveTestType::copy_count == 1);
CHECK(SharedThreadState::CopyMoveTestType::move_count == 1);
}
SUBCASE("Pass by reference") {
sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_ref, lvalue);
sts.message_count_to_read = -1;
sts.reader_threadwork.main_start_work();
sts.reader_threadwork.main_wait_for_done();
CHECK(SharedThreadState::CopyMoveTestType::copy_count == 1);
CHECK(SharedThreadState::CopyMoveTestType::move_count == 0);
}
}
SUBCASE("Testing with rvalue") {
SharedThreadState::CopyMoveTestType::copy_count = 0;
SharedThreadState::CopyMoveTestType::move_count = 0;
SUBCASE("Pass by copy") {
sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_copy,
SharedThreadState::CopyMoveTestType(43));
sts.message_count_to_read = -1;
sts.reader_threadwork.main_start_work();
sts.reader_threadwork.main_wait_for_done();
CHECK(SharedThreadState::CopyMoveTestType::copy_count == 0);
CHECK(SharedThreadState::CopyMoveTestType::move_count == 2);
}
SUBCASE("Pass by reference") {
sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_ref,
SharedThreadState::CopyMoveTestType(43));
sts.message_count_to_read = -1;
sts.reader_threadwork.main_start_work();
sts.reader_threadwork.main_wait_for_done();
CHECK(SharedThreadState::CopyMoveTestType::copy_count == 0);
CHECK(SharedThreadState::CopyMoveTestType::move_count == 1);
}
SUBCASE("Pass by rvalue reference") {
sts.command_queue.push(&sts, &SharedThreadState::copy_move_test_move,
SharedThreadState::CopyMoveTestType(43));
sts.message_count_to_read = -1;
sts.reader_threadwork.main_start_work();
sts.reader_threadwork.main_wait_for_done();
CHECK(SharedThreadState::CopyMoveTestType::copy_count == 0);
CHECK(SharedThreadState::CopyMoveTestType::move_count == 1);
}
}
sts.destroy_threads();
}
} // namespace TestCommandQueue
#endif // TEST_COMMAND_QUEUE_H