migrate 9pfs to use TextEncoder to fix 3 byte utf bug (#1139)

This commit is contained in:
Neal Shah 2024-08-29 17:42:46 -04:00 committed by GitHub
parent 4061c8d762
commit f3339aa78e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 16 additions and 82 deletions

View file

@ -84,7 +84,7 @@ CORE_FILES=const.js config.js io.js main.js lib.js buffer.js ide.js pci.js flopp
state.js ne2k.js sb16.js virtio.js virtio_console.js bus.js log.js \
cpu.js debug.js \
elf.js kernel.js
LIB_FILES=9p.js filesystem.js jor1k.js marshall.js utf8.js
LIB_FILES=9p.js filesystem.js jor1k.js marshall.js
BROWSER_FILES=screen.js keyboard.js mouse.js speaker.js serial.js \
network.js starter.js worker_bus.js dummy_screen.js \
fake_network.js wisp_network.js fetch_network.js print_stats.js filestorage.js

View file

@ -1338,7 +1338,7 @@ FS.prototype.FillDirectory = function(dirid) {
let size = 0;
for(const name of inode.direntries.keys())
{
size += 13 + 8 + 1 + 2 + UTF8.UTF8Length(name);
size += 13 + 8 + 1 + 2 + texten.encode(name).length;
}
const data = this.inodedata[dirid] = new Uint8Array(size);
inode.size = size;
@ -1350,7 +1350,7 @@ FS.prototype.FillDirectory = function(dirid) {
offset += marshall.Marshall(
["Q", "d", "b", "s"],
[child.qid,
offset+13+8+1+2+UTF8.UTF8Length(name),
offset+13+8+1+2+texten.encode(name).length,
child.mode >> 12,
name],
data, offset);

View file

@ -7,6 +7,8 @@
var marshall = {};
const textde = new TextDecoder();
const texten = new TextEncoder();
// Inserts data from an array to a byte aligned struct in memory
marshall.Marshall = function(typelist, input, struct, offset) {
@ -48,14 +50,13 @@ marshall.Marshall = function(typelist, input, struct, offset) {
struct[offset++] = 0; // set the length later
struct[offset++] = 0;
size += 2;
for(var j of item) {
var utf8 = UnicodeToUTF8Stream(j.charCodeAt(0));
utf8.forEach( function(c) {
struct[offset++] = c;
size += 1;
length++;
});
}
var stringBytes = texten.encode(item);
size += stringBytes.byteLength;
length += stringBytes.byteLength;
struct.set(stringBytes, offset);
offset += stringBytes.byteLength;
struct[lengthoffset+0] = length & 0xFF;
struct[lengthoffset+1] = (length >> 8) & 0xFF;
break;
@ -104,14 +105,10 @@ marshall.Unmarshall = function(typelist, struct, state) {
case "s":
var len = struct[offset++];
len += struct[offset++] << 8;
var str = "";
var utf8converter = new UTF8StreamToUnicode();
for(var j=0; j < len; j++) {
var c = utf8converter.Put(struct[offset++]);
if(c === -1) continue;
str += String.fromCharCode(c);
}
output.push(str);
var stringBytes = struct.slice(offset, offset + len);
offset += len;
output.push(textde.decode(stringBytes));
break;
case "Q":
state.offset = offset;

View file

@ -1,63 +0,0 @@
// -------------------------------------------------
// ------------------ UTF8 Helpers -----------------
// -------------------------------------------------
"use strict";
var UTF8 = {};
/** @constructor */
function UTF8StreamToUnicode() {
this.stream = new Uint8Array(5);
this.ofs = 0;
this.Put = function(key) {
this.stream[this.ofs] = key;
this.ofs++;
switch(this.ofs) {
case 1:
if(this.stream[0] < 128) {
this.ofs = 0;
return this.stream[0];
}
break;
case 2:
if((this.stream[0]&0xE0) === 0xC0)
if((this.stream[1]&0xC0) === 0x80) {
this.ofs = 0;
return ((this.stream[0]&0x1F)<<6) | (this.stream[1]&0x3F);
}
break;
case 3:
break;
case 4:
break;
default:
return -1;
//this.ofs = 0;
//break;
}
return -1;
};
}
function UnicodeToUTF8Stream(key)
{
if(key < 0x80) return [key];
if(key < 0x800) return [0xC0|((key>>6)&0x1F), 0x80|(key&0x3F)];
}
UTF8.UTF8Length = function(s)
{
var length = 0;
for(var i=0; i<s.length; i++) {
var c = s.charCodeAt(i);
length += c<128?1:2;
}
return length;
};