parent
fce9aaa734
commit
f274104865
@ -216,10 +216,10 @@ export function read_zero_terminated_array(next: NextFunc): Array<number> {
|
||||
return v;
|
||||
}
|
||||
|
||||
function read_transposed(n: number, w: number, next: NextFunc, lookup?: NextFunc): Array<Array<number>> {
|
||||
function read_transposed(n: number, w: number, next: NextFunc): Array<Array<number>> {
|
||||
let m = Array(n).fill(undefined).map(() => []);
|
||||
for (let i = 0; i < w; i++) {
|
||||
read_deltas(n, next).forEach((x, j) => m[j].push(lookup ? lookup(x) : x));
|
||||
read_deltas(n, next).forEach((x, j) => m[j].push(x));
|
||||
}
|
||||
return m;
|
||||
}
|
||||
@ -254,7 +254,7 @@ export type Branch = {
|
||||
|
||||
export type Node = {
|
||||
branches: Array<Branch>;
|
||||
valid: boolean;
|
||||
valid: number;
|
||||
fe0f: boolean;
|
||||
save: boolean;
|
||||
check: boolean;
|
||||
@ -266,18 +266,18 @@ export function read_emoji_trie(next: NextFunc): Node {
|
||||
function read(): Node {
|
||||
let branches = [];
|
||||
while (true) {
|
||||
let keys = read_member_array(next);
|
||||
let keys = read_member_array(next, sorted);
|
||||
if (keys.length == 0) break;
|
||||
branches.push({set: new Set(keys.map(i => sorted[i])), node: read()});
|
||||
branches.push({set: new Set(keys), node: read()});
|
||||
}
|
||||
branches.sort((a, b) => b.set.size - a.set.size);
|
||||
let flag = next();
|
||||
return {
|
||||
branches,
|
||||
valid: (flag & 1) != 0,
|
||||
fe0f: (flag & 2) != 0,
|
||||
save: (flag & 4) != 0,
|
||||
check: (flag & 8) != 0,
|
||||
};
|
||||
branches.sort((a, b) => b.set.size - a.set.size); // sort by likelihood
|
||||
let temp = next();
|
||||
let valid = temp % 3;
|
||||
temp = (temp / 3)|0;
|
||||
let fe0f = !!(temp & 1);
|
||||
temp >>= 1;
|
||||
let save = temp == 1;
|
||||
let check = temp == 2;
|
||||
return {branches, valid, fe0f, save, check};
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
@ -34,8 +34,6 @@ const r = getData();
|
||||
|
||||
import {read_member_array, read_mapped_map, read_emoji_trie} from './decoder.js';
|
||||
|
||||
import type { Node } from "./decoder.js";
|
||||
|
||||
// @TODO: This should be lazily loaded
|
||||
|
||||
const VALID = new Set(read_member_array(r));
|
||||
@ -44,64 +42,99 @@ const MAPPED = read_mapped_map(r);
|
||||
const EMOJI_ROOT = read_emoji_trie(r);
|
||||
//const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b)));
|
||||
|
||||
function nfc(s: string): string {
|
||||
return s.normalize('NFC');
|
||||
//const STOP = 0x2E;
|
||||
const HYPHEN = 0x2D;
|
||||
const UNDERSCORE = 0x5F;
|
||||
|
||||
function explode_cp(name: string): Array<number> {
|
||||
return toUtf8CodePoints(name);
|
||||
}
|
||||
|
||||
function filter_fe0f(cps: Array<number>): Array<number> {
|
||||
return cps.filter(cp => cp != 0xFE0F);
|
||||
}
|
||||
|
||||
export function ens_normalize(name: string, beautify = false): string {
|
||||
const input = toUtf8CodePoints(name).reverse(); // flip for pop
|
||||
const output = [];
|
||||
while (input.length) {
|
||||
const emoji = consume_emoji_reversed(input, EMOJI_ROOT);
|
||||
if (emoji) {
|
||||
output.push(...(beautify ? emoji : filter_fe0f(emoji)));
|
||||
continue;
|
||||
}
|
||||
const cp = input.pop();
|
||||
if (VALID.has(cp)) {
|
||||
output.push(cp);
|
||||
continue;
|
||||
}
|
||||
if (IGNORED.has(cp)) {
|
||||
continue;
|
||||
}
|
||||
let cps = MAPPED[cp];
|
||||
if (cps) {
|
||||
output.push(...cps);
|
||||
continue;
|
||||
}
|
||||
throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`);
|
||||
}
|
||||
return nfc(String.fromCodePoint(...output));
|
||||
export function ens_normalize_post_check(name: string): string {
|
||||
for (let label of name.split('.')) {
|
||||
let cps = explode_cp(label);
|
||||
try {
|
||||
for (let i = cps.lastIndexOf(UNDERSCORE) - 1; i >= 0; i--) {
|
||||
if (cps[i] !== UNDERSCORE) {
|
||||
throw new Error(`underscore only allowed at start`);
|
||||
}
|
||||
}
|
||||
if (cps.length >= 4 && cps.every(cp => cp < 0x80) && cps[2] === HYPHEN && cps[3] === HYPHEN) {
|
||||
throw new Error(`invalid label extension`);
|
||||
}
|
||||
} catch (err) {
|
||||
throw new Error(`Invalid label "${label}": ${err.message}`);
|
||||
}
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
function consume_emoji_reversed(cps: Array<number>, node: Node, eaten?: Array<number>) {
|
||||
let emoji;
|
||||
const stack = [];
|
||||
let pos = cps.length;
|
||||
if (eaten) { eaten.length = 0; } // clear input buffer (if needed)
|
||||
while (pos) {
|
||||
const cp = cps[--pos];
|
||||
const branch = node.branches.find(x => x.set.has(cp));
|
||||
if (branch == null) { break; }
|
||||
node = branch.node;
|
||||
if (!node) { break; }
|
||||
stack.push(cp);
|
||||
if (node.fe0f) {
|
||||
stack.push(0xFE0F);
|
||||
if (pos > 0 && cps[pos - 1] == 0xFE0F) { pos--; }
|
||||
}
|
||||
if (node.valid) { // this is a valid emoji (so far)
|
||||
emoji = stack.slice(); // copy stack
|
||||
if (eaten) { eaten.push(...cps.slice(pos).reverse()); } // copy input (if needed)
|
||||
cps.length = pos; // truncate
|
||||
}
|
||||
}
|
||||
return emoji;
|
||||
export function ens_normalize(name: string): string {
|
||||
return ens_normalize_post_check(normalize(name, filter_fe0f));
|
||||
}
|
||||
|
||||
function normalize(name: string, emoji_filter: (a: Array<number>) => Array<number>): string {
|
||||
let input = explode_cp(name).reverse(); // flip for pop
|
||||
let output = [];
|
||||
while (input.length) {
|
||||
let emoji = consume_emoji_reversed(input);
|
||||
if (emoji) {
|
||||
output.push(...emoji_filter(emoji));
|
||||
continue;
|
||||
}
|
||||
let cp = input.pop();
|
||||
if (VALID.has(cp)) {
|
||||
output.push(cp);
|
||||
continue;
|
||||
}
|
||||
if (IGNORED.has(cp)) {
|
||||
continue;
|
||||
}
|
||||
let cps = MAPPED[cp];
|
||||
if (cps) {
|
||||
output.push(...cps);
|
||||
continue;
|
||||
}
|
||||
throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`);
|
||||
}
|
||||
return ens_normalize_post_check(nfc(String.fromCodePoint(...output)));
|
||||
}
|
||||
|
||||
function nfc(s: string): string {
|
||||
return s.normalize('NFC');
|
||||
}
|
||||
|
||||
function consume_emoji_reversed(cps: Array<number>, eaten?: Array<number>) {
|
||||
let node = EMOJI_ROOT;
|
||||
let emoji;
|
||||
let saved;
|
||||
let stack = [];
|
||||
let pos = cps.length;
|
||||
if (eaten) eaten.length = 0; // clear input buffer (if needed)
|
||||
while (pos) {
|
||||
let cp = cps[--pos];
|
||||
node = node.branches.find(x => x.set.has(cp))?.node;
|
||||
if (!node) break;
|
||||
if (node.save) { // remember
|
||||
saved = cp;
|
||||
} else if (node.check) { // check exclusion
|
||||
if (cp === saved) break;
|
||||
}
|
||||
stack.push(cp);
|
||||
if (node.fe0f) {
|
||||
stack.push(0xFE0F);
|
||||
if (pos > 0 && cps[pos - 1] == 0xFE0F) pos--; // consume optional FE0F
|
||||
}
|
||||
if (node.valid) { // this is a valid emoji (so far)
|
||||
emoji = stack.slice(); // copy stack
|
||||
if (node.valid == 2) emoji.splice(1, 1); // delete FE0F at position 1 (RGI ZWJ don't follow spec!)
|
||||
if (eaten) eaten.push(...cps.slice(pos).reverse()); // copy input (if needed)
|
||||
cps.length = pos; // truncate
|
||||
}
|
||||
}
|
||||
return emoji;
|
||||
}
|
||||
|
@ -13,28 +13,6 @@ Zeros.fill(0);
|
||||
|
||||
function checkComponent(comp: Uint8Array): Uint8Array {
|
||||
if (comp.length === 0) { throw new Error("invalid ENS name; empty component"); }
|
||||
let nonUnder = false;
|
||||
let allAscii = true;
|
||||
for (let i = 0; i < comp.length; i++) {
|
||||
const c = comp[i];
|
||||
|
||||
// An underscore (i.e. "_"); only allows at the beginning
|
||||
if (c === 0x5f) {
|
||||
if (nonUnder) { throw new Error("invalid ENS name; non-prefix underscore"); }
|
||||
} else {
|
||||
// Non-ASCII byte
|
||||
if (c & 0x80) { allAscii = false; }
|
||||
|
||||
// Non-underscore found
|
||||
nonUnder = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Prevent punycode-looking components
|
||||
if (allAscii && comp[2] === 0x2d && comp[3] === 0x2d) {
|
||||
throw new Error("invalid ENS name; punycode conflict");
|
||||
}
|
||||
|
||||
return comp;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user