Merge pull request #174 from IntQuant/faster_world_sync

Made chunk upload in world sync ~3 times faster.
This commit is contained in:
IQuant 2024-09-27 19:47:38 +03:00 committed by GitHub
commit fe4cf3f4bc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 359 additions and 116 deletions

15
.vscode/c_cpp_properties.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"cStandard": "c17",
"cppStandard": "c++17",
"intelliSenseMode": "linux-clang-x64"
}
],
"version": 4
}

View file

@ -9,5 +9,6 @@
"Lua.diagnostics.globals": [
"wait",
"async"
]
],
"C_Cpp.default.compilerPath": "/usr/bin/clang++"
}

View file

@ -26,6 +26,10 @@ build_ext:
cd ewext && cargo build --release --target=i686-pc-windows-gnu
cp ewext/target/i686-pc-windows-gnu/release/ewext.dll quant.ew/ewext.dll
build_ext_debug:
cd ewext && cargo build --target=i686-pc-windows-gnu
cp ewext/target/i686-pc-windows-gnu/debug/ewext.dll quant.ew/ewext.dll
##
run-rel: add_dylib_release

View file

@ -1,58 +1,77 @@
use std::{ffi::c_int, sync::LazyLock};
use std::{
cell::{LazyCell, RefCell},
ffi::{c_int, c_void},
sync::LazyLock,
};
use lua_bindings::{lua_State, Lua51};
use noita::{NoitaPixelRun, ParticleWorldState};
mod lua_bindings;
mod noita;
static LUA: LazyLock<Lua51> = LazyLock::new(|| unsafe {
let lib = libloading::Library::new("./lua51.dll").expect("library to exist");
Lua51::from_library(lib).expect("library to be lua")
});
thread_local! {
static STATE: LazyCell<RefCell<ExtState>> = LazyCell::new(|| ExtState::default().into());
}
#[derive(Default)]
struct ExtState {
particle_world_state: Option<ParticleWorldState>,
}
// const EWEXT: [(&'static str, Function); 1] = [("testfn", None)];
extern "C" fn test_fn(_lua: *mut lua_State) -> c_int {
println!("\nStarting trace");
backtrace::trace(|frame| {
// let ip = frame.ip();
let symbol_address = frame.symbol_address();
extern "C" fn init_particle_world_state(lua: *mut lua_State) -> c_int {
println!("\nInitializing particle world state");
let world_pointer = unsafe { LUA.lua_tointeger(lua, 1) };
let chunk_map_pointer = unsafe { LUA.lua_tointeger(lua, 2) };
let material_list_pointer = unsafe { LUA.lua_tointeger(lua, 3) };
println!("pws stuff: {world_pointer:?} {chunk_map_pointer:?}");
print!("symbol: {:#08X}", symbol_address as usize);
if let Some(base) = frame.module_base_address() {
print!(" base: {:#08X}", base as usize);
}
// Resolve this instruction pointer to a symbol name
backtrace::resolve_frame(frame, |symbol| {
if let Some(name) = symbol.name() {
print!(" name: {name}");
}
if let Some(filename) = symbol.filename() {
print!(" file: {}", filename.display());
}
STATE.with(|state| {
state.borrow_mut().particle_world_state = Some(ParticleWorldState {
_world_ptr: world_pointer as *mut c_void,
chunk_map_ptr: chunk_map_pointer as *mut c_void,
material_list_ptr: material_list_pointer as _,
runner: Default::default(),
});
println!();
for i in 0..16 {
let b: u8 =
unsafe { std::ptr::read_volatile((symbol_address as *const u8).wrapping_add(i)) };
print!("{:02X} ", b);
}
println!();
true // keep going to the next frame
});
println!("End trace\n");
0
}
extern "C" fn encode_area(lua: *mut lua_State) -> c_int {
let start_x = unsafe { LUA.lua_tointeger(lua, 1) } as i32;
let start_y = unsafe { LUA.lua_tointeger(lua, 2) } as i32;
let end_x = unsafe { LUA.lua_tointeger(lua, 3) } as i32;
let end_y = unsafe { LUA.lua_tointeger(lua, 4) } as i32;
let encoded_buffer = unsafe { LUA.lua_tointeger(lua, 5) } as *mut NoitaPixelRun;
STATE.with(|state| {
let mut state = state.borrow_mut();
let pws = state.particle_world_state.as_mut().unwrap();
let runs = unsafe { pws.encode_area(start_x, start_y, end_x, end_y, encoded_buffer) };
unsafe { LUA.lua_pushinteger(lua, runs as isize) };
});
1
}
#[no_mangle]
pub extern "C" fn luaopen_ewext(lua: *mut lua_State) -> c_int {
println!("Initializing ewext");
unsafe {
LUA.lua_pushcclosure(lua, Some(test_fn), 0);
// LUA.lua_setfield(lua, LUA_GLOBALSINDEX, c"ewext".as_ptr())
LUA.lua_createtable(lua, 0, 0);
LUA.lua_pushcclosure(lua, Some(init_particle_world_state), 0);
LUA.lua_setfield(lua, -2, c"init_particle_world_state".as_ptr());
LUA.lua_pushcclosure(lua, Some(encode_area), 0);
LUA.lua_setfield(lua, -2, c"encode_area".as_ptr());
}
// let mut luastate = unsafe { State::from_ptr(luastateptr) };
// luastate.new_lib(&EWEXT);
println!("Initializing ewext - Ok");
1
}

181
ewext/src/noita.rs Normal file
View file

@ -0,0 +1,181 @@
use std::{ffi::c_void, mem};
mod ntypes;
#[repr(packed)]
pub(crate) struct NoitaPixelRun {
length: u16,
material: u16,
flags: u8,
}
/// Copied from proxy.
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub(crate) struct RawPixel {
pub material: u16,
pub flags: u8,
}
/// Copied from proxy.
/// Stores a run of pixels.
/// Not specific to Noita side - length is an actual length
#[derive(Debug)]
struct PixelRun<Pixel> {
pub length: u32,
pub data: Pixel,
}
/// Copied from proxy.
/// Converts a normal sequence of pixels to a run-length-encoded one.
pub(crate) struct PixelRunner<Pixel> {
current_pixel: Option<Pixel>,
current_run_len: u32,
runs: Vec<PixelRun<Pixel>>,
}
impl<Pixel: Eq + Copy> Default for PixelRunner<Pixel> {
fn default() -> Self {
Self::new()
}
}
impl<Pixel: Eq + Copy> PixelRunner<Pixel> {
fn new() -> Self {
Self {
current_pixel: None,
current_run_len: 0,
runs: Vec::new(),
}
}
fn put_pixel(&mut self, pixel: Pixel) {
if let Some(current) = self.current_pixel {
if pixel != current {
self.runs.push(PixelRun {
length: self.current_run_len,
data: current,
});
self.current_pixel = Some(pixel);
self.current_run_len = 1;
} else {
self.current_run_len += 1;
}
} else {
self.current_pixel = Some(pixel);
self.current_run_len = 1;
}
}
fn build(&mut self) -> &[PixelRun<Pixel>] {
if self.current_run_len > 0 {
self.runs.push(PixelRun {
length: self.current_run_len,
data: self.current_pixel.expect("has current pixel"),
});
}
&mut self.runs
}
fn clear(&mut self) {
self.current_pixel = None;
self.current_run_len = 0;
self.runs.clear();
}
}
pub(crate) struct ParticleWorldState {
pub(crate) _world_ptr: *mut c_void,
pub(crate) chunk_map_ptr: *mut c_void,
pub(crate) material_list_ptr: *const c_void,
pub(crate) runner: PixelRunner<RawPixel>,
}
impl ParticleWorldState {
fn get_cell_raw(&self, x: i32, y: i32) -> Option<&ntypes::Cell> {
let x = x as isize;
let y = y as isize;
let chunk_index = (((((y) >> 9) - 256) & 511) * 512 + ((((x) >> 9) - 256) & 511)) * 4;
// Deref 1/3
let chunk_arr = unsafe { self.chunk_map_ptr.offset(8).cast::<*const c_void>().read() };
// Deref 2/3
let chunk = unsafe { chunk_arr.offset(chunk_index).cast::<*const c_void>().read() };
if chunk.is_null() {
return None;
}
// Deref 3/3
let pixel_array = unsafe { chunk.cast::<*const c_void>().read() };
let pixel = unsafe { pixel_array.offset(((y & 511) << 9 | x & 511) * 4) };
if pixel.is_null() {
return None;
}
unsafe { pixel.cast::<*const ntypes::Cell>().read().as_ref() }
}
fn get_cell_material_id(&self, cell: &ntypes::Cell) -> u16 {
let mat_ptr = cell.material_ptr();
let offset = unsafe { mat_ptr.cast::<c_void>().offset_from(self.material_list_ptr) };
let mat_id = (offset / ntypes::CELLDATA_SIZE) as u16;
mat_id
}
fn get_cell_type(&self, cell: &ntypes::Cell) -> ntypes::CellType {
unsafe { cell.material_ptr().as_ref().unwrap().cell_type }
}
pub(crate) unsafe fn encode_area(
&mut self,
start_x: i32,
start_y: i32,
end_x: i32,
end_y: i32,
pixel_runs: *mut NoitaPixelRun,
) -> usize {
// Allow compiler to generate better code.
assert!(start_x % 128 == 0);
assert!(start_y % 128 == 0);
assert!((end_x - start_x) <= 128);
assert!((end_y - start_y) <= 128);
for y in start_y..end_y {
for x in start_x..end_x {
let mut raw_pixel = RawPixel {
material: 0,
flags: 0,
};
let cell = self.get_cell_raw(x, y);
if let Some(cell) = cell {
let cell_type = self.get_cell_type(cell);
match cell_type {
ntypes::CellType::None => {}
// Nobody knows how box2d pixels work.
ntypes::CellType::Solid => {}
ntypes::CellType::Liquid => {
raw_pixel.material = self.get_cell_material_id(cell);
let cell: &ntypes::LiquidCell = unsafe { mem::transmute(cell) };
raw_pixel.flags = cell.is_static as u8;
}
ntypes::CellType::Gas | ntypes::CellType::Fire => {
raw_pixel.material = self.get_cell_material_id(cell);
}
// ???
ntypes::CellType::Invalid => {}
}
}
self.runner.put_pixel(raw_pixel);
}
}
let mut pixel_runs = pixel_runs;
let built_runner = self.runner.build();
let runs = built_runner.len();
for run in built_runner {
let noita_pixel_run = pixel_runs.as_mut().unwrap();
noita_pixel_run.length = (run.length - 1) as u16;
noita_pixel_run.material = run.data.material;
noita_pixel_run.flags = run.data.flags;
pixel_runs = pixel_runs.offset(1);
}
self.runner.clear();
runs
}
}

67
ewext/src/noita/ntypes.rs Normal file
View file

@ -0,0 +1,67 @@
// Type defs borrowed from NoitaPatcher.
use std::ffi::c_char;
pub(crate) const CELLDATA_SIZE: isize = 0x290;
#[repr(C)]
#[derive(Debug)]
pub(crate) struct StdString {
buffer: *const i8,
sso_buffer: [i8; 12],
size: usize,
capacity: usize,
}
#[repr(u32)]
#[derive(Debug, PartialEq, Clone, Copy)]
#[expect(dead_code)]
pub(crate) enum CellType {
None = 0,
Liquid = 1,
Gas = 2,
Solid = 3,
Fire = 4,
Invalid = 4294967295,
}
#[repr(C)]
pub(crate) struct CellData {
name: StdString,
ui_name: StdString,
material_type: i32,
id_2: i32,
pub(crate) cell_type: CellType,
// Has a bunch of other fields that aren't that relevant.
}
#[repr(C)]
pub(crate) struct CellVTable {}
#[repr(C)]
pub(crate) struct Cell {
pub(crate) vtable: *const CellVTable,
hp: i32,
unknown1: [u8; 8],
is_burning: bool,
unknown2: [u8; 3],
material_ptr: *const CellData,
}
#[repr(C)]
pub(crate) struct LiquidCell {
cell: Cell,
x: i32,
y: i32,
unknown1: c_char,
unknown2: c_char,
pub(crate) is_static: bool,
// Has a bunch of other fields that aren't that relevant.
}
impl Cell {
pub(crate) fn material_ptr(&self) -> *const CellData {
self.material_ptr
}
}

View file

@ -25,6 +25,10 @@ pub(crate) struct RawPixel {
pub flags: u8,
}
struct ByteParser<'a> {
data: &'a [u8],
}
/// Stores a run of pixels.
/// Not specific to Noita side - length is an actual length
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Encode, Decode)]
@ -33,10 +37,6 @@ pub struct PixelRun<Pixel> {
pub data: Pixel,
}
struct ByteParser<'a> {
data: &'a [u8],
}
/// Converts a normal sequence of pixels to a run-length-encoded one.
pub struct PixelRunner<Pixel> {
current_pixel: Option<Pixel>,

View file

@ -0,0 +1,19 @@
local ffi = require("ffi")
local world_ffi = require("noitapatcher.nsew.world_ffi")
local module = {}
function module.on_world_initialized()
local grid_world = world_ffi.get_grid_world()
local chunk_map = grid_world.vtable.get_chunk_map(grid_world)
grid_world = tonumber(ffi.cast("intptr_t", grid_world))
chunk_map = tonumber(ffi.cast("intptr_t", chunk_map))
local material_list = tonumber(ffi.cast("intptr_t", world_ffi.get_material_ptr(0)))
ewext.init_particle_world_state(grid_world, chunk_map, material_list)
end
function module.on_local_player_spawn()
end
return module

View file

@ -5,6 +5,8 @@ local world = {}
local ffi = require("ffi")
local world_ffi = require("noitapatcher.nsew.world_ffi")
print("get_cell: " .. tostring(world_ffi.get_cell))
local C = ffi.C
ffi.cdef([[
@ -66,11 +68,11 @@ end
-- @tparam EncodedArea encoded_area memory to use, if nil this function allocates its own memory
-- @return returns an EncodedArea or nil if the area could not be encoded
-- @see decode
function world.encode_area(chunk_map, start_x, start_y, end_x, end_y, encoded_area)
start_x = ffi.cast('int32_t', start_x)
start_y = ffi.cast('int32_t', start_y)
end_x = ffi.cast('int32_t', end_x)
end_y = ffi.cast('int32_t', end_y)
function world.encode_area(chunk_map, start_x_ini, start_y_ini, end_x_ini, end_y_ini, encoded_area)
start_x = ffi.cast('int32_t', start_x_ini)
start_y = ffi.cast('int32_t', start_y_ini)
end_x = ffi.cast('int32_t', end_x_ini)
end_y = ffi.cast('int32_t', end_y_ini)
encoded_area = encoded_area or world.EncodedArea()
@ -82,8 +84,8 @@ function world.encode_area(chunk_map, start_x, start_y, end_x, end_y, encoded_ar
return nil
end
if width > 256 or height > 256 then
print("Invalid world part, dimension greater than 256")
if width > 128 or height > 128 then
print("Invalid world part, dimension greater than 128")
return nil
end
@ -92,74 +94,7 @@ function world.encode_area(chunk_map, start_x, start_y, end_x, end_y, encoded_ar
encoded_area.header.width = width - 1
encoded_area.header.height = height - 1
local run_count = 1
local current_run = encoded_area.pixel_runs[0]
local run_length = 0
local current_material = 0
local current_flags = 0
local y = start_y
while y < end_y do
local x = start_x
while x < end_x do
local material_number = 0
local flags = 0
local ppixel = world_ffi.get_cell(chunk_map, x, y)
local pixel = ppixel[0]
if pixel ~= nil then
local cell_type = pixel.vtable.get_cell_type(pixel)
if cell_type ~= C.CELL_TYPE_SOLID then
local material_ptr = pixel.vtable.get_material(pixel)
material_number = world_ffi.get_material_id(material_ptr)
end
if cell_type == C.CELL_TYPE_LIQUID then
local liquid_cell = ffi.cast(pliquid_cell, pixel)
if liquid_cell.is_static then
flags = bit.bor(flags, C.LIQUID_FLAG_STATIC)
end
end
end
if x == start_x and y == start_y then
-- Initial run
current_material = material_number
current_flags = flags
elseif current_material ~= material_number or current_flags ~= flags then
-- Next run
current_run.length = run_length - 1
current_run.material = current_material
current_run.flags = current_flags
if run_count == C.PIXEL_RUN_MAX then
print("Area too complicated to encode")
return nil
end
current_run = encoded_area.pixel_runs[run_count]
run_count = run_count + 1
run_length = 0
current_material = material_number
current_flags = flags
end
run_length = run_length + 1
x = x + 1
end
y = y + 1
end
current_run.length = run_length - 1
current_run.material = current_material
current_run.flags = current_flags
encoded_area.header.pixel_run_count = run_count
encoded_area.header.pixel_run_count = ewext.encode_area(start_x_ini, start_y_ini, end_x_ini, end_y_ini, tonumber(ffi.cast("intptr_t", encoded_area.pixel_runs)))
return encoded_area
end

View file

@ -51,6 +51,7 @@ function world_sync.on_world_initialized()
c = c - 1
print("Last material id: "..c)
world.last_material_id = c
do_benchmark()
end
local function send_chunks(cx, cy, chunk_map)

View file

@ -5,9 +5,6 @@ package.cpath = package.cpath .. ";./mods/quant.ew/?.dll"
package.path = package.path .. ";./mods/quant.ew/?.lua"
print(package.cpath)
-- ewext = require("ewext")
-- ewext()
dofile_once( "data/scripts/lib/utilities.lua" )
dofile_once("mods/quant.ew/files/system/player/player_cosmetics.lua")
@ -17,6 +14,8 @@ np.EnableGameSimulatePausing(false)
np.InstallDamageDetailsPatch()
np.SilenceLogs("Warning - streaming didn\'t find any chunks it could stream away...\n")
ewext = require("ewext")
-- Make some stuff global, as it's way too annoying to import each time.
ctx = dofile_once("mods/quant.ew/files/core/ctx.lua")
player_fns = dofile_once("mods/quant.ew/files/core/player_fns.lua")
@ -42,6 +41,8 @@ np.CrossCallAdd("ew_per_peer_seed", function()
end)
local function load_modules()
ctx.load_system("ewext_init")
ctx.dofile_and_add_hooks("mods/quant.ew/files/system/item_sync.lua")
ctx.dofile_and_add_hooks("mods/quant.ew/files/system/player_sync.lua")