diff --git a/libswscale/Makefile b/libswscale/Makefile index f33754ce67..fa9231aff1 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -50,3 +50,10 @@ TESTPROGS = colorspace \ pixdesc_query \ swscale \ sws_ops \ + sws_ops_aarch64 \ + +sws_ops_entries_aarch64: TAG = GEN +sws_ops_entries_aarch64: $(SUBDIR)tests/sws_ops_aarch64$(EXESUF) + $(M)$< > $(SRC_PATH)/libswscale/aarch64/ops_entries.c.tmp + $(CP) $(SRC_PATH)/libswscale/aarch64/ops_entries.c.tmp $(SRC_PATH)/libswscale/aarch64/ops_entries.c + $(RM) $(SRC_PATH)/libswscale/aarch64/ops_entries.c.tmp diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c new file mode 100644 index 0000000000..61ff8bf760 --- /dev/null +++ b/libswscale/aarch64/ops_entries.c @@ -0,0 +1,390 @@ +/* + * This file is automatically generated. Do not edit manually. + * To regenerate, run: make sws_ops_entries_aarch64 + */ + +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_READ_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_NIBBLE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PACKED, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_WRITE_PLANAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWAP_BYTES, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0001, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0003, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0123, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0132, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0213, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0231, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0312, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x0321, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1000, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1023, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1203, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1230, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x1320, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2013, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2103, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x2130, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3000, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3012, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3021, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3102, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3120, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SWIZZLE, .swizzle = 0x3201, .block_size = 32, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0121, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0233, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0233, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0332, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0332, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0444, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0444, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0555, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0555, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0565, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x0565, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x2aaa, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0x2aaa, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0xaaa2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_UNPACK, .pack = 0xaaa2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0121, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0121, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0233, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0233, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0332, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0332, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0444, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0444, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0555, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0555, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0565, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x0565, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0x2aaa, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_PACK, .pack = 0xaaa2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 1, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 1, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 1, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 2, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 3, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 5, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 7, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LSHIFT, .shift = 8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 4, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 6, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_RSHIFT, .shift = 6, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1011 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1101 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0110 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CLEAR, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U8, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_U32, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1100 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U16, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_CONVERT, .to_type = AARCH64_PIXEL_F32, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_EXPAND, .to_type = AARCH64_PIXEL_U16, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_MIN, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_MAX, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_U32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_SCALE, .block_size = 16, .type = AARCH64_PIXEL_U16, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000000fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x00000000fdULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000000c000ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000373dcc7ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x0003f3fccfULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c00c00cULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10c40dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c10cc0dULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000c30cc0fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcfcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcffULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0x000ff3fcffULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc000000000ULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc000000000ULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00000000fULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00000000fULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc0000000fcULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc0000000fcULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc003f3fccfULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc003f3fccfULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00c00c00cULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00c00c00cULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00ff3fcffULL, .linear.fmla = 0, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_LINEAR, .linear.mask = 0xc00ff3fcffULL, .linear.fmla = 1, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x0325, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x032f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x2305, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x230f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x3000, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x302f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1110 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x3ff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5023, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5032, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5230, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5ff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1001 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0x5fff, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x1000 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf023, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf032, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf203, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf230, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf2f0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0101 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf2ff, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0100 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xf302, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0111 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xff30, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0011 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xff3f, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0010 }, +{ .op = AARCH64_SWS_OP_DITHER, .dither.y_offset = 0xfff0, .dither.size_log2 = 4, .block_size = 8, .type = AARCH64_PIXEL_F32, .mask = 0x0001 }, diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c new file mode 100644 index 0000000000..1e2f42ef14 --- /dev/null +++ b/libswscale/aarch64/ops_impl.c @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include + +#include "libavutil/attributes.h" + +#include "libavutil/avassert.h" + +#include "ops_impl.h" + +/*********************************************************************/ +static const char pixel_types[AARCH64_PIXEL_TYPE_NB][32] = { + [AARCH64_PIXEL_U8 ] = "AARCH64_PIXEL_U8", + [AARCH64_PIXEL_U16] = "AARCH64_PIXEL_U16", + [AARCH64_PIXEL_U32] = "AARCH64_PIXEL_U32", + [AARCH64_PIXEL_F32] = "AARCH64_PIXEL_F32", +}; + +static const char *aarch64_pixel_type(SwsAArch64PixelType fmt) +{ + if (fmt >= AARCH64_PIXEL_TYPE_NB) { + av_assert0(!"Invalid pixel type!"); + return NULL; + } + return pixel_types[fmt]; +} + +/*********************************************************************/ +static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = { + [AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE", + [AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS", + [AARCH64_SWS_OP_PROCESS_RETURN] = "AARCH64_SWS_OP_PROCESS_RETURN", + [AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT", + [AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE", + [AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED", + [AARCH64_SWS_OP_READ_PLANAR ] = "AARCH64_SWS_OP_READ_PLANAR", + [AARCH64_SWS_OP_WRITE_BIT ] = "AARCH64_SWS_OP_WRITE_BIT", + [AARCH64_SWS_OP_WRITE_NIBBLE ] = "AARCH64_SWS_OP_WRITE_NIBBLE", + [AARCH64_SWS_OP_WRITE_PACKED ] = "AARCH64_SWS_OP_WRITE_PACKED", + [AARCH64_SWS_OP_WRITE_PLANAR ] = "AARCH64_SWS_OP_WRITE_PLANAR", + [AARCH64_SWS_OP_SWAP_BYTES ] = "AARCH64_SWS_OP_SWAP_BYTES", + [AARCH64_SWS_OP_SWIZZLE ] = "AARCH64_SWS_OP_SWIZZLE", + [AARCH64_SWS_OP_UNPACK ] = "AARCH64_SWS_OP_UNPACK", + [AARCH64_SWS_OP_PACK ] = "AARCH64_SWS_OP_PACK", + [AARCH64_SWS_OP_LSHIFT ] = "AARCH64_SWS_OP_LSHIFT", + [AARCH64_SWS_OP_RSHIFT ] = "AARCH64_SWS_OP_RSHIFT", + [AARCH64_SWS_OP_CLEAR ] = "AARCH64_SWS_OP_CLEAR", + [AARCH64_SWS_OP_CONVERT ] = "AARCH64_SWS_OP_CONVERT", + [AARCH64_SWS_OP_EXPAND ] = "AARCH64_SWS_OP_EXPAND", + [AARCH64_SWS_OP_MIN ] = "AARCH64_SWS_OP_MIN", + [AARCH64_SWS_OP_MAX ] = "AARCH64_SWS_OP_MAX", + [AARCH64_SWS_OP_SCALE ] = "AARCH64_SWS_OP_SCALE", + [AARCH64_SWS_OP_LINEAR ] = "AARCH64_SWS_OP_LINEAR", + [AARCH64_SWS_OP_DITHER ] = "AARCH64_SWS_OP_DITHER", +}; + +static const char *aarch64_op_type(SwsAArch64OpType op) +{ + if (op == AARCH64_SWS_OP_NONE || op >= AARCH64_SWS_OP_TYPE_NB) { + av_assert0(!"Invalid op type!"); + return NULL; + } + return op_types[op]; +} + +/*********************************************************************/ +/* + * Helper string concatenation function that does not depend on the + * FFmpeg libraries, so it may be used standalone. + */ +av_printf_format(3, 4) +static void buf_appendf(char **pbuf, size_t *prem, const char *fmt, ...) +{ + char *buf = *pbuf; + size_t rem = *prem; + if (!rem) + return; + + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf, rem, fmt, ap); + va_end(ap); + + if (n > 0) { + if (n < rem) { + buf += n; + rem -= n; + } else { + buf += rem - 1; + rem = 0; + } + *pbuf = buf; + *prem = rem; + } +} + +/*********************************************************************/ +/** + * The following structure is used to describe one field from + * SwsAArch64OpImplParams. This will be used to serialize the parameter + * structure, generate function names and lookup strings, and compare + * two sets of parameters. + */ + +typedef struct ParamField { + const char *name; + size_t offset; + size_t size; + void (*print_val)(char **pbuf, size_t *prem, void *p); + int (*cmp_val)(void *pa, void *pb); +} ParamField; + +#define PARAM_FIELD(name) #name, offsetof(SwsAArch64OpImplParams, name), sizeof(((SwsAArch64OpImplParams *) 0)->name) + +static void print_op_val(char **pbuf, size_t *prem, void *p) +{ + SwsAArch64OpType op = *(SwsAArch64OpType *) p; + buf_appendf(pbuf, prem, "%s", aarch64_op_type(op)); +} + +static int cmp_op(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((SwsAArch64OpType *) pa); + int64_t ib = (int64_t) *((SwsAArch64OpType *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_pixel_val(char **pbuf, size_t *prem, void *p) +{ + SwsAArch64PixelType type = *(SwsAArch64PixelType *) p; + buf_appendf(pbuf, prem, "%s", aarch64_pixel_type(type)); +} + +static int cmp_pixel(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((SwsAArch64PixelType *) pa); + int64_t ib = (int64_t) *((SwsAArch64PixelType *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_u8_val(char **pbuf, size_t *prem, void *p) +{ + uint8_t val = *(uint8_t *) p; + buf_appendf(pbuf, prem, "%u", val); +} + +static int cmp_u8(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((uint8_t *) pa); + int64_t ib = (int64_t) *((uint8_t *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_u16_val(char **pbuf, size_t *prem, void *p) +{ + uint16_t val = *(uint16_t *) p; + buf_appendf(pbuf, prem, "0x%04x", val); +} + +static int cmp_u16(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((uint16_t *) pa); + int64_t ib = (int64_t) *((uint16_t *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +static void print_u40_val(char **pbuf, size_t *prem, void *p) +{ + uint64_t val = *(uint64_t *) p; + buf_appendf(pbuf, prem, "0x%010" PRIx64 "ULL", val); +} + +static int cmp_u40(void *pa, void *pb) +{ + int64_t ia = (int64_t) *((uint64_t *) pa); + int64_t ib = (int64_t) *((uint64_t *) pb); + int64_t diff = ia - ib; + if (diff) + return diff < 0 ? -1 : 1; + return 0; +} + +/*********************************************************************/ +static const ParamField field_op = { PARAM_FIELD(op), print_op_val, cmp_op }; +static const ParamField field_mask = { PARAM_FIELD(mask), print_u16_val, cmp_u16 }; +static const ParamField field_type = { PARAM_FIELD(type), print_pixel_val, cmp_pixel }; +static const ParamField field_block_size = { PARAM_FIELD(block_size), print_u8_val, cmp_u8 }; +static const ParamField field_shift = { PARAM_FIELD(shift), print_u8_val, cmp_u8 }; +static const ParamField field_swizzle = { PARAM_FIELD(swizzle), print_u16_val, cmp_u16 }; +static const ParamField field_pack = { PARAM_FIELD(pack), print_u16_val, cmp_u16 }; +static const ParamField field_to_type = { PARAM_FIELD(to_type), print_pixel_val, cmp_pixel }; +static const ParamField field_linear_mask = { PARAM_FIELD(linear.mask), print_u40_val, cmp_u40 }; +static const ParamField field_linear_fmla = { PARAM_FIELD(linear.fmla), print_u8_val, cmp_u8 }; +static const ParamField field_dither_y_offset = { PARAM_FIELD(dither.y_offset), print_u16_val, cmp_u16 }; +static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2), print_u8_val, cmp_u8 }; + +/* Fields needed to uniquely identify each SwsAArch64OpType. */ +#define MAX_LEVELS 8 +static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = { + [AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask }, + [AARCH64_SWS_OP_PROCESS_RETURN] = { &field_op, &field_mask }, + [AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_READ_PLANAR ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_WRITE_PLANAR ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_SWAP_BYTES ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_SWIZZLE ] = { &field_op, &field_swizzle, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_UNPACK ] = { &field_op, &field_pack, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_PACK ] = { &field_op, &field_pack, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_LSHIFT ] = { &field_op, &field_shift, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_RSHIFT ] = { &field_op, &field_shift, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_CLEAR ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_CONVERT ] = { &field_op, &field_to_type, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_EXPAND ] = { &field_op, &field_to_type, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_MIN ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_MAX ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_SCALE ] = { &field_op, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_LINEAR ] = { &field_op, &field_linear_mask, &field_linear_fmla, &field_block_size, &field_type, &field_mask }, + [AARCH64_SWS_OP_DITHER ] = { &field_op, &field_dither_y_offset, &field_dither_size_log2, &field_block_size, &field_type, &field_mask }, +}; diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h new file mode 100644 index 0000000000..7bd23dd8e8 --- /dev/null +++ b/libswscale/aarch64/ops_impl.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWSCALE_AARCH64_OPS_IMPL_H +#define SWSCALE_AARCH64_OPS_IMPL_H + +#include +#include +#include + +/* Similar to SwsPixelType */ +typedef enum SwsAArch64PixelType { + AARCH64_PIXEL_U8, + AARCH64_PIXEL_U16, + AARCH64_PIXEL_U32, + AARCH64_PIXEL_F32, + AARCH64_PIXEL_TYPE_NB, +} SwsAArch64PixelType; + +/* Similar to SwsOpType */ +typedef enum SwsAArch64OpType { + AARCH64_SWS_OP_NONE = 0, + AARCH64_SWS_OP_PROCESS, + AARCH64_SWS_OP_PROCESS_RETURN, + AARCH64_SWS_OP_READ_BIT, + AARCH64_SWS_OP_READ_NIBBLE, + AARCH64_SWS_OP_READ_PACKED, + AARCH64_SWS_OP_READ_PLANAR, + AARCH64_SWS_OP_WRITE_BIT, + AARCH64_SWS_OP_WRITE_NIBBLE, + AARCH64_SWS_OP_WRITE_PACKED, + AARCH64_SWS_OP_WRITE_PLANAR, + AARCH64_SWS_OP_SWAP_BYTES, + AARCH64_SWS_OP_SWIZZLE, + AARCH64_SWS_OP_UNPACK, + AARCH64_SWS_OP_PACK, + AARCH64_SWS_OP_LSHIFT, + AARCH64_SWS_OP_RSHIFT, + AARCH64_SWS_OP_CLEAR, + AARCH64_SWS_OP_CONVERT, + AARCH64_SWS_OP_EXPAND, + AARCH64_SWS_OP_MIN, + AARCH64_SWS_OP_MAX, + AARCH64_SWS_OP_SCALE, + AARCH64_SWS_OP_LINEAR, + AARCH64_SWS_OP_DITHER, + AARCH64_SWS_OP_TYPE_NB, +} SwsAArch64OpType; + +/* Each nibble in the mask corresponds to one component. */ +typedef uint16_t SwsAArch64OpMask; + +/** + * Affine coefficient mask for linear op. Packs a 4x5 matrix in execution + * order, where the offset is the first element, with 2 bits per element: + * 00: m[i][j] == 0 + * 01: m[i][j] == 1 + * 11: m[i][j] is any other coefficient + */ +typedef uint64_t SwsAArch64LinearOpMask; + +typedef struct SwsAArch64LinearOp { + SwsAArch64LinearOpMask mask; + uint8_t fmla; +} SwsAArch64LinearOp; + +typedef struct SwsAArch64DitherOp { + uint16_t y_offset; + uint8_t size_log2; +} SwsAArch64DitherOp; + +/** + * SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType + * operation. It consists of simplified parameters from the SwsOp structure, + * with the purpose of being straight-forward to implement and execute. + */ +typedef struct SwsAArch64OpImplParams { + SwsAArch64OpType op; + SwsAArch64OpMask mask; + SwsAArch64PixelType type; + uint8_t block_size; + union { + uint8_t shift; + SwsAArch64OpMask swizzle; + SwsAArch64OpMask pack; + SwsAArch64PixelType to_type; + SwsAArch64LinearOp linear; + SwsAArch64DitherOp dither; + }; +} SwsAArch64OpImplParams; + +/* SwsAArch64OpMask-related helpers. */ + +#define MASK_SET(mask, idx, val) do { (mask) |= (((val) & 0xf) << ((idx) << 2)); } while (0) + +#define LINEAR_MASK_SET(mask, idx, jdx, val) do { \ + (mask) |= ((((SwsAArch64LinearOpMask) (val)) & 3) << (2 * ((5 * (idx) + (jdx))))); \ +} while (0) +#define LINEAR_MASK_0 0 +#define LINEAR_MASK_1 1 +#define LINEAR_MASK_X 3 + +/** + * These values will be used by ops_asmgen to access fields inside of + * SwsOpExec and SwsOpImpl. The sizes are checked below when compiling + * for AArch64 to make sure there is no mismatch. + */ +#define offsetof_exec_in 0 +#define offsetof_exec_out 32 +#define offsetof_exec_in_bump 128 +#define offsetof_exec_out_bump 160 +#define offsetof_impl_cont 0 +#define offsetof_impl_priv 16 +#define sizeof_impl 32 + +#if ARCH_AARCH64 && HAVE_NEON +static_assert(offsetof_exec_in == offsetof(SwsOpExec, in), "SwsOpExec layout mismatch"); +static_assert(offsetof_exec_out == offsetof(SwsOpExec, out), "SwsOpExec layout mismatch"); +static_assert(offsetof_exec_in_bump == offsetof(SwsOpExec, in_bump), "SwsOpExec layout mismatch"); +static_assert(offsetof_exec_out_bump == offsetof(SwsOpExec, out_bump), "SwsOpExec layout mismatch"); +static_assert(offsetof_impl_cont == offsetof(SwsOpImpl, cont), "SwsOpImpl layout mismatch"); +static_assert(offsetof_impl_priv == offsetof(SwsOpImpl, priv), "SwsOpImpl layout mismatch"); +#endif + +#endif /* SWSCALE_AARCH64_OPS_IMPL_H */ diff --git a/libswscale/aarch64/ops_impl_conv.c b/libswscale/aarch64/ops_impl_conv.c new file mode 100644 index 0000000000..fdc398392f --- /dev/null +++ b/libswscale/aarch64/ops_impl_conv.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * NOTE: This file is #include'd directly by both the NEON backend and + * the sws_ops_aarch64 tool. + */ + +#include "libavutil/error.h" +#include "libavutil/rational.h" +#include "libswscale/ops.h" + +#include "ops_impl.h" + +static uint8_t sws_pixel_to_aarch64(SwsPixelType type) +{ + switch (type) { + case SWS_PIXEL_U8: return AARCH64_PIXEL_U8; + case SWS_PIXEL_U16: return AARCH64_PIXEL_U16; + case SWS_PIXEL_U32: return AARCH64_PIXEL_U32; + case SWS_PIXEL_F32: return AARCH64_PIXEL_F32; + } + return 0; +} + +/** + * The column index order for SwsLinearOp.mask follows the affine transform + * order, where the offset is the last element. SwsAArch64LinearOpMask, on + * the other hand, follows execution order, where the offset is the first + * element. + */ +static int linear_index_from_sws_op(int idx) +{ + const int reorder_col[5] = { 1, 2, 3, 4, 0 }; + return reorder_col[idx]; +} + +/** + * Convert SwsOp to a SwsAArch64OpImplParams. Read the comments regarding + * SwsAArch64OpImplParams in ops_impl.h for more information. + */ +static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, + int block_size, SwsAArch64OpImplParams *out) +{ + const SwsOp *op = &ops->ops[n]; + const SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : op; + + out->block_size = block_size; + + /** + * Most SwsOp work on fields described by next->comps.unused. + * The few that don't will override this field later. + */ + out->mask = 0; + for (int i = 0; i < 4; i++) { + if (!next->comps.unused[i]) + MASK_SET(out->mask, i, 1); + } + + out->type = sws_pixel_to_aarch64(op->type); + + /* Map SwsOpType to SwsAArch64OpType */ + switch (op->op) { + case SWS_OP_READ: + if (op->rw.filter) + return AVERROR(ENOTSUP); + /** + * The different types of read operations have been split into + * their own SwsAArch64OpType to simplify the implementation. + */ + if (op->rw.frac == 1) + out->op = AARCH64_SWS_OP_READ_NIBBLE; + else if (op->rw.frac == 3) + out->op = AARCH64_SWS_OP_READ_BIT; + else if (op->rw.packed && op->rw.elems != 1) + out->op = AARCH64_SWS_OP_READ_PACKED; + else + out->op = AARCH64_SWS_OP_READ_PLANAR; + break; + case SWS_OP_WRITE: + if (op->rw.filter) + return AVERROR(ENOTSUP); + /** + * The different types of write operations have been split into + * their own SwsAArch64OpType to simplify the implementation. + */ + if (op->rw.frac == 1) + out->op = AARCH64_SWS_OP_WRITE_NIBBLE; + else if (op->rw.frac == 3) + out->op = AARCH64_SWS_OP_WRITE_BIT; + else if (op->rw.packed && op->rw.elems != 1) + out->op = AARCH64_SWS_OP_WRITE_PACKED; + else + out->op = AARCH64_SWS_OP_WRITE_PLANAR; + break; + case SWS_OP_SWAP_BYTES: out->op = AARCH64_SWS_OP_SWAP_BYTES; break; + case SWS_OP_SWIZZLE: out->op = AARCH64_SWS_OP_SWIZZLE; break; + case SWS_OP_UNPACK: out->op = AARCH64_SWS_OP_UNPACK; break; + case SWS_OP_PACK: out->op = AARCH64_SWS_OP_PACK; break; + case SWS_OP_LSHIFT: out->op = AARCH64_SWS_OP_LSHIFT; break; + case SWS_OP_RSHIFT: out->op = AARCH64_SWS_OP_RSHIFT; break; + case SWS_OP_CLEAR: out->op = AARCH64_SWS_OP_CLEAR; break; + case SWS_OP_CONVERT: + out->op = op->convert.expand ? AARCH64_SWS_OP_EXPAND : AARCH64_SWS_OP_CONVERT; + break; + case SWS_OP_MIN: out->op = AARCH64_SWS_OP_MIN; break; + case SWS_OP_MAX: out->op = AARCH64_SWS_OP_MAX; break; + case SWS_OP_SCALE: out->op = AARCH64_SWS_OP_SCALE; break; + case SWS_OP_LINEAR: out->op = AARCH64_SWS_OP_LINEAR; break; + case SWS_OP_DITHER: out->op = AARCH64_SWS_OP_DITHER; break; + } + + switch (out->op) { + case AARCH64_SWS_OP_READ_BIT: + case AARCH64_SWS_OP_READ_NIBBLE: + case AARCH64_SWS_OP_READ_PACKED: + case AARCH64_SWS_OP_READ_PLANAR: + case AARCH64_SWS_OP_WRITE_BIT: + case AARCH64_SWS_OP_WRITE_NIBBLE: + case AARCH64_SWS_OP_WRITE_PACKED: + case AARCH64_SWS_OP_WRITE_PLANAR: + switch (op->rw.elems) { + case 1: out->mask = 0x0001; break; + case 2: out->mask = 0x0011; break; + case 3: out->mask = 0x0111; break; + case 4: out->mask = 0x1111; break; + }; + break; + case AARCH64_SWS_OP_SWAP_BYTES: + /* Only the element size matters, not the type. */ + if (out->type == AARCH64_PIXEL_F32) + out->type = AARCH64_PIXEL_U32; + break; + case AARCH64_SWS_OP_SWIZZLE: + out->mask = 0; + MASK_SET(out->mask, 0, op->swizzle.in[0] != 0); + MASK_SET(out->mask, 1, op->swizzle.in[1] != 1); + MASK_SET(out->mask, 2, op->swizzle.in[2] != 2); + MASK_SET(out->mask, 3, op->swizzle.in[3] != 3); + MASK_SET(out->swizzle, 0, op->swizzle.in[0]); + MASK_SET(out->swizzle, 1, op->swizzle.in[1]); + MASK_SET(out->swizzle, 2, op->swizzle.in[2]); + MASK_SET(out->swizzle, 3, op->swizzle.in[3]); + /* The element size and type don't matter. */ + out->block_size = block_size * ff_sws_pixel_type_size(op->type); + out->type = AARCH64_PIXEL_U8; + break; + case AARCH64_SWS_OP_UNPACK: + MASK_SET(out->pack, 0, op->pack.pattern[0]); + MASK_SET(out->pack, 1, op->pack.pattern[1]); + MASK_SET(out->pack, 2, op->pack.pattern[2]); + MASK_SET(out->pack, 3, op->pack.pattern[3]); + break; + case AARCH64_SWS_OP_PACK: + out->mask = 0; + MASK_SET(out->mask, 0, !op->comps.unused[0]); + MASK_SET(out->mask, 1, !op->comps.unused[1]); + MASK_SET(out->mask, 2, !op->comps.unused[2]); + MASK_SET(out->mask, 3, !op->comps.unused[3]); + MASK_SET(out->pack, 0, op->pack.pattern[0]); + MASK_SET(out->pack, 1, op->pack.pattern[1]); + MASK_SET(out->pack, 2, op->pack.pattern[2]); + MASK_SET(out->pack, 3, op->pack.pattern[3]); + break; + case AARCH64_SWS_OP_LSHIFT: + case AARCH64_SWS_OP_RSHIFT: + out->shift = op->c.u; + break; + case AARCH64_SWS_OP_CLEAR: + out->mask = 0; + MASK_SET(out->mask, 0, !!op->c.q4[0].den); + MASK_SET(out->mask, 1, !!op->c.q4[1].den); + MASK_SET(out->mask, 2, !!op->c.q4[2].den); + MASK_SET(out->mask, 3, !!op->c.q4[3].den); + break; + case AARCH64_SWS_OP_EXPAND: + case AARCH64_SWS_OP_CONVERT: + out->to_type = sws_pixel_to_aarch64(op->convert.to); + break; + case AARCH64_SWS_OP_LINEAR: + /** + * The out->linear.mask field packs the 4x5 matrix from SwsLinearOp as + * 2 bits per element: + * 00: m[i][j] == 0 + * 01: m[i][j] == 1 + * 11: m[i][j] is any other coefficient + */ + out->mask = 0; + for (int i = 0; i < 4; i++) { + /* Skip unused or identity rows */ + if (op->comps.unused[i] || !(op->lin.mask & SWS_MASK_ROW(i))) + continue; + MASK_SET(out->mask, i, 1); + for (int j = 0; j < 5; j++) { + int jj = linear_index_from_sws_op(j); + if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1))) + LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1); + else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1))) + LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X); + } + } + out->linear.fmla = !(ctx->flags & SWS_BITEXACT); + break; + case AARCH64_SWS_OP_DITHER: + out->mask = 0; + MASK_SET(out->mask, 0, op->dither.y_offset[0] >= 0); + MASK_SET(out->mask, 1, op->dither.y_offset[1] >= 0); + MASK_SET(out->mask, 2, op->dither.y_offset[2] >= 0); + MASK_SET(out->mask, 3, op->dither.y_offset[3] >= 0); + MASK_SET(out->dither.y_offset, 0, op->dither.y_offset[0]); + MASK_SET(out->dither.y_offset, 1, op->dither.y_offset[1]); + MASK_SET(out->dither.y_offset, 2, op->dither.y_offset[2]); + MASK_SET(out->dither.y_offset, 3, op->dither.y_offset[3]); + out->dither.size_log2 = op->dither.size_log2; + break; + } + + return 0; +} diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c new file mode 100644 index 0000000000..21948ca71b --- /dev/null +++ b/libswscale/tests/sws_ops_aarch64.c @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2026 Ramiro Polla + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/mem.h" +#include "libavutil/tree.h" +#include "libswscale/ops.h" +#include "libswscale/ops_chain.h" + +#include "libswscale/aarch64/ops_impl.c" +#include "libswscale/aarch64/ops_impl_conv.c" + +#ifdef _WIN32 +#include +#include +#endif + +/*********************************************************************/ +static int aarch64_op_impl_cmp(const void *a, const void *b) +{ + const SwsAArch64OpImplParams *pa = (const SwsAArch64OpImplParams *) a; + const SwsAArch64OpImplParams *pb = (const SwsAArch64OpImplParams *) b; + + const ParamField **fields = op_fields[pa->op]; + for (int i = 0; fields[i]; i++) { + const ParamField *field = fields[i]; + int diff = field->cmp_val((void *) (((uintptr_t) pa) + field->offset), + (void *) (((uintptr_t) pb) + field->offset)); + if (diff) + return diff; + } + return 0; +} + +/*********************************************************************/ +/* Insert the SwsAArch64OpImplParams structure into the AVTreeNode. */ +static int aarch64_collect_op(const SwsAArch64OpImplParams *params, struct AVTreeNode **root) +{ + int ret = 0; + + struct AVTreeNode *node = av_tree_node_alloc(); + SwsAArch64OpImplParams *copy = av_memdup(params, sizeof(*params)); + if (!node || !copy) { + ret = AVERROR(ENOMEM); + goto error; + } + av_tree_insert(root, copy, aarch64_op_impl_cmp, &node); + if (!node) + copy = NULL; + +error: + av_free(node); + av_free(copy); + return ret; +} + +/* Collect the parameters for the process/process_return functions. */ +static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root) +{ + const SwsOp *read = ff_sws_op_list_input(ops); + const SwsOp *write = ff_sws_op_list_output(ops); + const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; + const int write_planes = write->rw.packed ? 1 : write->rw.elems; + int ret; + + SwsAArch64OpMask mask = 0; + for (int i = 0; i < FFMAX(read_planes, write_planes); i++) + MASK_SET(mask, i, 1); + SwsAArch64OpImplParams params = { + .op = AARCH64_SWS_OP_PROCESS, + .mask = mask, + }; + + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + return ret; + + params.op = AARCH64_SWS_OP_PROCESS_RETURN; + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + return ret; + + return 0; +} + +static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) +{ + struct AVTreeNode **root = (struct AVTreeNode **) opaque; + int ret; + + /* Make on-stack copy of `ops` to iterate over */ + SwsOpList rest = *ops; + /* Use at most two full vregs during the widest precision section */ + int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16; + + ret = aarch64_collect_process(&rest, root); + if (ret < 0) + return ret; + + for (int i = 0; i < rest.num_ops; i++) { + SwsAArch64OpImplParams params = { 0 }; + ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, ¶ms); + if (ret < 0) + goto end; + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + goto end; + if (params.op == AARCH64_SWS_OP_LINEAR) { + /** + * Generate both sets of linear op functions that do use + * and do not use fmla (selected by SWS_BITEXACT). + */ + params.linear.fmla = !params.linear.fmla; + ret = aarch64_collect_op(¶ms, root); + if (ret < 0) + goto end; + } + } + + ret = 0; + +end: + return ret; +} + +/*********************************************************************/ +static void serialize_op(char *buf, size_t size, const SwsAArch64OpImplParams *params) +{ + buf_appendf(&buf, &size, "{"); + const ParamField **fields = op_fields[params->op]; + for (int i = 0; fields[i]; i++) { + const ParamField *field = fields[i]; + void *p = (void *) (((uintptr_t) params) + field->offset); + if (i) + buf_appendf(&buf, &size, ","); + buf_appendf(&buf, &size, " .%s = ", field->name); + field->print_val(&buf, &size, p); + } + buf_appendf(&buf, &size, " }"); + av_assert0(size && "string buffer exhausted"); +} + +/* Serialize SwsAArch64OpImplParams for one function. */ +static int print_op(void *opaque, void *elem) +{ + SwsAArch64OpImplParams *params = (SwsAArch64OpImplParams *) elem; + FILE *fp = (FILE *) opaque; + + char buf[256]; + serialize_op(buf, sizeof(buf), params); + fprintf(fp, "%s,\n", buf); + + av_free(params); + + return 0; +} + +/*********************************************************************/ +int main(int argc, char *argv[]) +{ + struct AVTreeNode *root = NULL; + int ret = 1; + +#ifdef _WIN32 + _setmode(_fileno(stdout), _O_BINARY); +#endif + + SwsContext *ctx = sws_alloc_context(); + if (!ctx) + goto fail; + + ret = ff_sws_enum_op_lists(ctx, &root, AV_PIX_FMT_NONE, AV_PIX_FMT_NONE, + register_op); + + /** + * Generate a C file with all the unique function parameter entries + * collected by aarch64_enum_ops(). + */ + printf("/*\n"); + printf(" * This file is automatically generated. Do not edit manually.\n"); + printf(" * To regenerate, run: make sws_ops_entries_aarch64\n"); + printf(" */\n"); + printf("\n"); + av_tree_enumerate(root, stdout, NULL, print_op); + +fail: + av_tree_destroy(root); + sws_free_context(&ctx); + return ret; +} diff --git a/tests/ref/fate/source b/tests/ref/fate/source index 78d3a2e0fa..cbbd347ef2 100644 --- a/tests/ref/fate/source +++ b/tests/ref/fate/source @@ -16,6 +16,7 @@ libavformat/log2_tab.c libavformat/rangecoder_dec.c libavformat/riscv/cpu_common.c libswresample/log2_tab.c +libswscale/aarch64/ops_entries.c libswscale/log2_tab.c libswscale/riscv/cpu_common.c tools/uncoded_frame.c