Applied lastest changes from ens-normalize (#42, #2376, #2754).

This commit is contained in:
Richard Moore 2022-08-17 23:03:27 -04:00
parent fce9aaa734
commit f274104865
4 changed files with 101 additions and 90 deletions

@ -216,10 +216,10 @@ export function read_zero_terminated_array(next: NextFunc): Array<number> {
return v;
}
function read_transposed(n: number, w: number, next: NextFunc, lookup?: NextFunc): Array<Array<number>> {
function read_transposed(n: number, w: number, next: NextFunc): Array<Array<number>> {
let m = Array(n).fill(undefined).map(() => []);
for (let i = 0; i < w; i++) {
read_deltas(n, next).forEach((x, j) => m[j].push(lookup ? lookup(x) : x));
read_deltas(n, next).forEach((x, j) => m[j].push(x));
}
return m;
}
@ -254,7 +254,7 @@ export type Branch = {
export type Node = {
branches: Array<Branch>;
valid: boolean;
valid: number;
fe0f: boolean;
save: boolean;
check: boolean;
@ -266,18 +266,18 @@ export function read_emoji_trie(next: NextFunc): Node {
function read(): Node {
let branches = [];
while (true) {
let keys = read_member_array(next);
let keys = read_member_array(next, sorted);
if (keys.length == 0) break;
branches.push({set: new Set(keys.map(i => sorted[i])), node: read()});
branches.push({set: new Set(keys), node: read()});
}
branches.sort((a, b) => b.set.size - a.set.size);
let flag = next();
return {
branches,
valid: (flag & 1) != 0,
fe0f: (flag & 2) != 0,
save: (flag & 4) != 0,
check: (flag & 8) != 0,
};
branches.sort((a, b) => b.set.size - a.set.size); // sort by likelihood
let temp = next();
let valid = temp % 3;
temp = (temp / 3)|0;
let fe0f = !!(temp & 1);
temp >>= 1;
let save = temp == 1;
let check = temp == 2;
return {branches, valid, fe0f, save, check};
}
}

File diff suppressed because one or more lines are too long

@ -34,8 +34,6 @@ const r = getData();
import {read_member_array, read_mapped_map, read_emoji_trie} from './decoder.js';
import type { Node } from "./decoder.js";
// @TODO: This should be lazily loaded
const VALID = new Set(read_member_array(r));
@ -44,64 +42,99 @@ const MAPPED = read_mapped_map(r);
const EMOJI_ROOT = read_emoji_trie(r);
//const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b)));
function nfc(s: string): string {
return s.normalize('NFC');
//const STOP = 0x2E;
const HYPHEN = 0x2D;
const UNDERSCORE = 0x5F;
function explode_cp(name: string): Array<number> {
return toUtf8CodePoints(name);
}
function filter_fe0f(cps: Array<number>): Array<number> {
return cps.filter(cp => cp != 0xFE0F);
}
export function ens_normalize(name: string, beautify = false): string {
const input = toUtf8CodePoints(name).reverse(); // flip for pop
const output = [];
while (input.length) {
const emoji = consume_emoji_reversed(input, EMOJI_ROOT);
if (emoji) {
output.push(...(beautify ? emoji : filter_fe0f(emoji)));
continue;
}
const cp = input.pop();
if (VALID.has(cp)) {
output.push(cp);
continue;
}
if (IGNORED.has(cp)) {
continue;
}
let cps = MAPPED[cp];
if (cps) {
output.push(...cps);
continue;
}
throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`);
}
return nfc(String.fromCodePoint(...output));
export function ens_normalize_post_check(name: string): string {
for (let label of name.split('.')) {
let cps = explode_cp(label);
try {
for (let i = cps.lastIndexOf(UNDERSCORE) - 1; i >= 0; i--) {
if (cps[i] !== UNDERSCORE) {
throw new Error(`underscore only allowed at start`);
}
}
if (cps.length >= 4 && cps.every(cp => cp < 0x80) && cps[2] === HYPHEN && cps[3] === HYPHEN) {
throw new Error(`invalid label extension`);
}
} catch (err) {
throw new Error(`Invalid label "${label}": ${err.message}`);
}
}
return name;
}
function consume_emoji_reversed(cps: Array<number>, node: Node, eaten?: Array<number>) {
let emoji;
const stack = [];
let pos = cps.length;
if (eaten) { eaten.length = 0; } // clear input buffer (if needed)
while (pos) {
const cp = cps[--pos];
const branch = node.branches.find(x => x.set.has(cp));
if (branch == null) { break; }
node = branch.node;
if (!node) { break; }
stack.push(cp);
if (node.fe0f) {
stack.push(0xFE0F);
if (pos > 0 && cps[pos - 1] == 0xFE0F) { pos--; }
}
if (node.valid) { // this is a valid emoji (so far)
emoji = stack.slice(); // copy stack
if (eaten) { eaten.push(...cps.slice(pos).reverse()); } // copy input (if needed)
cps.length = pos; // truncate
}
}
return emoji;
export function ens_normalize(name: string): string {
return ens_normalize_post_check(normalize(name, filter_fe0f));
}
function normalize(name: string, emoji_filter: (a: Array<number>) => Array<number>): string {
let input = explode_cp(name).reverse(); // flip for pop
let output = [];
while (input.length) {
let emoji = consume_emoji_reversed(input);
if (emoji) {
output.push(...emoji_filter(emoji));
continue;
}
let cp = input.pop();
if (VALID.has(cp)) {
output.push(cp);
continue;
}
if (IGNORED.has(cp)) {
continue;
}
let cps = MAPPED[cp];
if (cps) {
output.push(...cps);
continue;
}
throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`);
}
return ens_normalize_post_check(nfc(String.fromCodePoint(...output)));
}
function nfc(s: string): string {
return s.normalize('NFC');
}
function consume_emoji_reversed(cps: Array<number>, eaten?: Array<number>) {
let node = EMOJI_ROOT;
let emoji;
let saved;
let stack = [];
let pos = cps.length;
if (eaten) eaten.length = 0; // clear input buffer (if needed)
while (pos) {
let cp = cps[--pos];
node = node.branches.find(x => x.set.has(cp))?.node;
if (!node) break;
if (node.save) { // remember
saved = cp;
} else if (node.check) { // check exclusion
if (cp === saved) break;
}
stack.push(cp);
if (node.fe0f) {
stack.push(0xFE0F);
if (pos > 0 && cps[pos - 1] == 0xFE0F) pos--; // consume optional FE0F
}
if (node.valid) { // this is a valid emoji (so far)
emoji = stack.slice(); // copy stack
if (node.valid == 2) emoji.splice(1, 1); // delete FE0F at position 1 (RGI ZWJ don't follow spec!)
if (eaten) eaten.push(...cps.slice(pos).reverse()); // copy input (if needed)
cps.length = pos; // truncate
}
}
return emoji;
}

@ -13,28 +13,6 @@ Zeros.fill(0);
function checkComponent(comp: Uint8Array): Uint8Array {
if (comp.length === 0) { throw new Error("invalid ENS name; empty component"); }
let nonUnder = false;
let allAscii = true;
for (let i = 0; i < comp.length; i++) {
const c = comp[i];
// An underscore (i.e. "_"); only allows at the beginning
if (c === 0x5f) {
if (nonUnder) { throw new Error("invalid ENS name; non-prefix underscore"); }
} else {
// Non-ASCII byte
if (c & 0x80) { allAscii = false; }
// Non-underscore found
nonUnder = true;
}
}
// Prevent punycode-looking components
if (allAscii && comp[2] === 0x2d && comp[3] === 0x2d) {
throw new Error("invalid ENS name; punycode conflict");
}
return comp;
}