import assert from "assert"; import { toUtf8Bytes, toUtf8CodePoints, toUtf8String, Utf8ErrorFuncs } from "../index.js"; export type TestCaseBadString = { name: string, bytes: Uint8Array, ignore: string, replace: string, error: string }; export type TestCaseCodePoints = { name: string; text: string; codepoints: Array; }; describe("Tests UTF-8 bad strings", function() { const tests: Array = [ { name: "unexpected continue", bytes: new Uint8Array([ 0x41, 0x80, 0x42, 0x43 ]), ignore: "ABC", replace: "A\ufffdBC", error: "UNEXPECTED_CONTINUE" }, { name: "bad prefix", bytes: new Uint8Array([ 0x41, 0xf8, 0x42, 0x43 ]), ignore: "ABC", replace: "A\ufffdBC", error: "BAD_PREFIX" }, { name: "bad prefix (multiple)", bytes: new Uint8Array([ 0x41, 0xf8, 0x88, 0x88, 0x42, 0x43 ]), ignore: "ABC", replace: "A\ufffdBC", error: "BAD_PREFIX" }, { name: "OVERRUN", bytes: new Uint8Array([ 0x41, 0x42, 0xe2, 0x82 /* 0xac */ ]), ignore: "AB", replace: "AB\ufffd", error: "OVERRUN" }, { name: "missing continue", bytes: new Uint8Array([ 0x41, 0x42, 0xe2, 0xe2, 0x82, 0xac, 0x43 ]), ignore: "AB\u20acC", replace: "AB\ufffd\u20acC", error: "MISSING_CONTINUE" }, { name: "out-of-range", bytes: new Uint8Array([ 0x41, 0x42, 0xf7, 0xbf, 0xbf, 0xbf, 0x43 ]), ignore: "ABC", replace: "AB\ufffdC", error: "OUT_OF_RANGE" }, { name: "UTF-16 surrogate (low)", bytes: new Uint8Array([ 0x41, 0x42, 0xed, 0xa0, 0x80, 0x43 ]), ignore: "ABC", replace: "AB\ufffdC", error: "UTF16_SURROGATE" }, { name: "UTF-16 surrogate (high)", bytes: new Uint8Array([ 0x41, 0x42, 0xed, 0xbf, 0xbf, 0x43 ]), ignore: "ABC", replace: "AB\ufffdC", error: "UTF16_SURROGATE" }, { name: "overlong", bytes: new Uint8Array([ 0xf0, 0x82, 0x82, 0xac ]), ignore: "", replace: "\u20ac", error: "OVERLONG" } ]; for (const { name, bytes, ignore, replace, error } of tests) { it(`correctly handles ${ name }: replace strategy`, function() { const result = toUtf8String(bytes, Utf8ErrorFuncs.replace); assert.equal(result, replace); }); it(`correctly handles ${ name }: ignore strategy`, function() { const result = toUtf8String(bytes, Utf8ErrorFuncs.ignore); assert.equal(result, ignore); }); it(`correctly handles ${ name }: error strategy`, function() { assert.throws(() => { const result = toUtf8String(bytes); console.log(result); }, (e: any) => { return (e.message.indexOf(error) >= 0); }); }); } it("correctly fails to get UTF-8 bytes from incomplete surrogate", function() { assert.throws(() => { const text = String.fromCharCode(0xd800);; const result = toUtf8Bytes(text); console.log(result); }, (error: any) => { return (error.message.startsWith("invalid surrogate pair")); }); }); it("correctly fails to get UTF-8 bytes from invalid surrogate pair", function() { assert.throws(() => { const text = String.fromCharCode(0xd800, 0xdbff);; const result = toUtf8Bytes(text); console.log(result); }, (error: any) => { return (error.message.startsWith("invalid surrogate pair")); }); }); }); describe("Tests UTF-8 bad strings", function() { const tests: Array = [ { name: "the Euro symbol", text: "AB\u20acC", codepoints: [ 0x41, 0x42, 0x20ac, 0x43 ] }, ]; for (const { name, text, codepoints } of tests) { it(`expands strings to codepoints: ${ name }`, function() { const result = toUtf8CodePoints(text); assert.equal(result.length, codepoints.length, "codepoints.length"); for (let i = 0; i < result.length; i++) { assert.equal(result[i], codepoints[i], `codepoints[${ i }]`); } }); } });