213 lines
11 KiB
TypeScript
213 lines
11 KiB
TypeScript
"use strict";
|
|
|
|
import { toUtf8CodePoints, _toUtf8String, UnicodeNormalizationForm } from "./utf8";
|
|
|
|
type Ranged = {
|
|
l: number, // Lo value
|
|
h: number, // High value (less the lo)
|
|
d?: number, // Delta/stride (default: 1)
|
|
s?: number, // Shift (default: 1)
|
|
e?: Array<number> // Exceptions to skip
|
|
};
|
|
|
|
type Table = { [ src: number ]: Array<number> };
|
|
|
|
function bytes2(data: string): Array<number> {
|
|
if ((data.length % 4) !== 0) { throw new Error("bad data"); }
|
|
let result = [];
|
|
for (let i = 0; i < data.length; i += 4) {
|
|
result.push(parseInt(data.substring(i, i + 4), 16));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function createTable(data: string, func?: (value: string) => Array<number>): Table {
|
|
if (!func) {
|
|
func = function(value: string) { return [ parseInt(value, 16) ]; }
|
|
}
|
|
|
|
let lo = 0;
|
|
|
|
let result: Table = { };
|
|
data.split(",").forEach((pair) => {
|
|
let comps = pair.split(":");
|
|
lo += parseInt(comps[0], 16);
|
|
result[lo] = func(comps[1]);
|
|
});
|
|
|
|
return result;
|
|
}
|
|
|
|
function createRangeTable(data: string): Array<Ranged> {
|
|
let hi = 0;
|
|
return data.split(",").map((v) => {
|
|
let comps = v.split("-");
|
|
if (comps.length === 1) {
|
|
comps[1] = "0";
|
|
} else if (comps[1] === "") {
|
|
comps[1] = "1";
|
|
}
|
|
|
|
let lo = hi + parseInt(comps[0], 16);
|
|
hi = parseInt(comps[1], 16);
|
|
return { l: lo, h: hi };
|
|
});
|
|
}
|
|
|
|
function matchMap(value: number, ranges: Array<Ranged>): Ranged {
|
|
let lo = 0;
|
|
for (let i = 0; i < ranges.length; i++) {
|
|
let range = ranges[i];
|
|
lo += range.l;
|
|
if (value >= lo && value <= lo + range.h && ((value - lo) % (range.d || 1)) === 0) {
|
|
if (range.e && range.e.indexOf(value - lo) !== -1) { continue; }
|
|
return range;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
const Table_A_1_ranges = createRangeTable("221,13-1b,5f-,40-10,51-f,11-3,3-3,2-2,2-4,8,2,15,2d,28-8,88,48,27-,3-5,11-20,27-,8,28,3-5,12,18,b-a,1c-4,6-16,2-d,2-2,2,1b-4,17-9,8f-,10,f,1f-2,1c-34,33-14e,4,36-,13-,6-2,1a-f,4,9-,3-,17,8,2-2,5-,2,8-,3-,4-8,2-3,3,6-,16-6,2-,7-3,3-,17,8,3,3,3-,2,6-3,3-,4-a,5,2-6,10-b,4,8,2,4,17,8,3,6-,b,4,4-,2-e,2-4,b-10,4,9-,3-,17,8,3-,5-,9-2,3-,4-7,3-3,3,4-3,c-10,3,7-2,4,5-2,3,2,3-2,3-2,4-2,9,4-3,6-2,4,5-8,2-e,d-d,4,9,4,18,b,6-3,8,4,5-6,3-8,3-3,b-11,3,9,4,18,b,6-3,8,4,5-6,3-6,2,3-3,b-11,3,9,4,18,11-3,7-,4,5-8,2-7,3-3,b-11,3,13-2,19,a,2-,8-2,2-3,7,2,9-11,4-b,3b-3,1e-24,3,2-,3,2-,2-5,5,8,4,2,2-,3,e,4-,6,2,7-,b-,3-21,49,23-5,1c-3,9,25,10-,2-2f,23,6,3,8-2,5-5,1b-45,27-9,2a-,2-3,5b-4,45-4,53-5,8,40,2,5-,8,2,5-,28,2,5-,20,2,5-,8,2,5-,8,8,18,20,2,5-,8,28,14-5,1d-22,56-b,277-8,1e-2,52-e,e,8-a,18-8,15-b,e,4,3-b,5e-2,b-15,10,b-5,59-7,2b-555,9d-3,5b-5,17-,7-,27-,7-,9,2,2,2,20-,36,10,f-,7,14-,4,a,54-3,2-6,6-5,9-,1c-10,13-1d,1c-14,3c-,10-6,32-b,240-30,28-18,c-14,a0,115-,3,66-,b-76,5,5-,1d,24,2,5-2,2,8-,35-2,19,f-10,1d-3,311-37f,1b,5a-b,d7-19,d-3,41,57-,68-4,29-3,5f,29-37,2e-2,25-c,2c-2,4e-3,30,78-3,64-,20,19b7-49,51a7-59,48e-2,38-738,2ba5-5b,222f-,3c-94,8-b,6-4,1b,6,2,3,3,6d-20,16e-f,41-,37-7,2e-2,11-f,5-b,18-,b,14,5-3,6,88-,2,bf-2,7-,7-,7-,4-2,8,8-9,8-2ff,20,5-b,1c-b4,27-,27-cbb1,f7-9,28-2,b5-221,56,48,3-,2-,3-,5,d,2,5,3,42,5-,9,8,1d,5,6,2-2,8,153-3,123-3,33-27fd,a6da-5128,21f-5df,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3,2-1d,61-ff7d");
|
|
|
|
// @TODO: Make this relative...
|
|
const Table_B_1_flags = "ad,34f,1806,180b,180c,180d,200b,200c,200d,2060,feff".split(",").map((v) => parseInt(v, 16));
|
|
|
|
const Table_B_2_ranges: Array<Ranged> = [
|
|
{ h: 25, s: 32, l: 65 },
|
|
{ h: 30, s: 32, e: [ 23 ], l: 127 },
|
|
{ h: 54, s: 1, e: [ 48 ], l: 64, d: 2 },
|
|
{ h: 14, s: 1, l: 57, d: 2 },
|
|
{ h: 44, s: 1, l: 17, d: 2 },
|
|
{ h: 10, s: 1, e: [ 2, 6, 8 ], l: 61, d: 2 },
|
|
{ h: 16, s: 1, l: 68, d: 2 },
|
|
{ h: 84, s: 1, e: [ 18, 24, 66 ], l: 19, d: 2 },
|
|
{ h: 26, s: 32, e: [ 17 ], l: 435 },
|
|
{ h: 22, s: 1, l: 71, d: 2 },
|
|
{ h: 15, s: 80, l: 40 },
|
|
{ h: 31, s: 32, l: 16 },
|
|
{ h: 32, s: 1, l: 80, d: 2 },
|
|
{ h: 52, s: 1, l: 42, d: 2 },
|
|
{ h: 12, s: 1, l: 55, d: 2 },
|
|
{ h: 40, s: 1, e: [ 38 ], l: 15, d: 2 },
|
|
{ h: 14, s: 1, l: 48, d: 2 },
|
|
{ h: 37, s: 48, l: 49 },
|
|
{ h: 148, s: 1, l: 6351, d: 2 },
|
|
{ h: 88, s: 1, l: 160, d: 2 },
|
|
{ h: 15, s: 16, l: 704 },
|
|
{ h: 25, s: 26, l: 854 },
|
|
{ h: 25, s: 32, l: 55915 },
|
|
{ h: 37, s: 40, l: 1247 },
|
|
{ h: 25, s: -119711, l: 53248 },
|
|
{ h: 25, s: -119763, l: 52 },
|
|
{ h: 25, s: -119815, l: 52 },
|
|
{ h: 25, s: -119867, e: [ 1, 4, 5, 7, 8, 11, 12, 17 ], l: 52 },
|
|
{ h: 25, s: -119919, l: 52 },
|
|
{ h: 24, s: -119971, e: [ 2, 7, 8, 17 ], l: 52 },
|
|
{ h: 24, s: -120023, e: [ 2, 7, 13, 15, 16, 17 ], l: 52 },
|
|
{ h: 25, s: -120075, l: 52 },
|
|
{ h: 25, s: -120127, l: 52 },
|
|
{ h: 25, s: -120179, l: 52 },
|
|
{ h: 25, s: -120231, l: 52 },
|
|
{ h: 25, s: -120283, l: 52 },
|
|
{ h: 25, s: -120335, l: 52 },
|
|
{ h: 24, s: -119543, e: [ 17 ], l: 56 },
|
|
{ h: 24, s: -119601, e: [ 17 ], l: 58 },
|
|
{ h: 24, s: -119659, e: [ 17 ], l: 58 },
|
|
{ h: 24, s: -119717, e: [ 17 ], l: 58 },
|
|
{ h: 24, s: -119775, e: [ 17 ], l: 58 }
|
|
];
|
|
const Table_B_2_lut_abs = createTable("b5:3bc,c3:ff,7:73,2:253,5:254,3:256,1:257,5:259,1:25b,3:260,1:263,2:269,1:268,5:26f,1:272,2:275,7:280,3:283,5:288,3:28a,1:28b,5:292,3f:195,1:1bf,29:19e,125:3b9,8b:3b2,1:3b8,1:3c5,3:3c6,1:3c0,1a:3ba,1:3c1,1:3c3,2:3b8,1:3b5,1bc9:3b9,1c:1f76,1:1f77,f:1f7a,1:1f7b,d:1f78,1:1f79,1:1f7c,1:1f7d,107:63,5:25b,4:68,1:68,1:68,3:69,1:69,1:6c,3:6e,4:70,1:71,1:72,1:72,1:72,7:7a,2:3c9,2:7a,2:6b,1:e5,1:62,1:63,3:65,1:66,2:6d,b:3b3,1:3c0,6:64,1b574:3b8,1a:3c3,20:3b8,1a:3c3,20:3b8,1a:3c3,20:3b8,1a:3c3,20:3b8,1a:3c3");
|
|
const Table_B_2_lut_rel = createTable("179:1,2:1,2:1,5:1,2:1,a:4f,a:1,8:1,2:1,2:1,3:1,5:1,3:1,4:1,2:1,3:1,4:1,8:2,1:1,2:2,1:1,2:2,27:2,195:26,2:25,1:25,1:25,2:40,2:3f,1:3f,33:1,11:-6,1:-9,1ac7:-3a,6d:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,b:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,c:-8,2:-8,2:-8,2:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,49:-8,1:-8,1:-4a,1:-4a,d:-56,1:-56,1:-56,1:-56,d:-8,1:-8,f:-8,1:-8,3:-7");
|
|
const Table_B_2_complex = createTable("df:00730073,51:00690307,19:02BC006E,a7:006A030C,18a:002003B9,16:03B903080301,20:03C503080301,1d7:05650582,190f:00680331,1:00740308,1:0077030A,1:0079030A,1:006102BE,b6:03C50313,2:03C503130300,2:03C503130301,2:03C503130342,2a:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,3:1F7003B9,1:03B103B9,1:03AC03B9,2:03B10342,1:03B1034203B9,5:03B103B9,6:1F7403B9,1:03B703B9,1:03AE03B9,2:03B70342,1:03B7034203B9,5:03B703B9,6:03B903080300,1:03B903080301,3:03B90342,1:03B903080342,b:03C503080300,1:03C503080301,1:03C10313,2:03C50342,1:03C503080342,b:1F7C03B9,1:03C903B9,1:03CE03B9,2:03C90342,1:03C9034203B9,5:03C903B9,ac:00720073,5b:00B00063,6:00B00066,d:006E006F,a:0073006D,1:00740065006C,1:0074006D,124f:006800700061,2:00610075,2:006F0076,b:00700061,1:006E0061,1:03BC0061,1:006D0061,1:006B0061,1:006B0062,1:006D0062,1:00670062,3:00700066,1:006E0066,1:03BC0066,4:0068007A,1:006B0068007A,1:006D0068007A,1:00670068007A,1:00740068007A,15:00700061,1:006B00700061,1:006D00700061,1:006700700061,8:00700076,1:006E0076,1:03BC0076,1:006D0076,1:006B0076,1:006D0076,1:00700077,1:006E0077,1:03BC0077,1:006D0077,1:006B0077,1:006D0077,1:006B03C9,1:006D03C9,2:00620071,3:00632215006B0067,1:0063006F002E,1:00640062,1:00670079,2:00680070,2:006B006B,1:006B006D,9:00700068,2:00700070006D,1:00700072,2:00730076,1:00770062,c723:00660066,1:00660069,1:0066006C,1:006600660069,1:00660066006C,1:00730074,1:00730074,d:05740576,1:05740565,1:0574056B,1:057E0576,1:0574056D", bytes2);
|
|
|
|
const Table_C_ranges = createRangeTable("80-20,2a0-,39c,32,f71,18e,7f2-f,19-7,30-4,7-5,f81-b,5,a800-20ff,4d1-1f,110,fa-6,d174-7,2e84-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,2,1f-5f,ff7f-20001");
|
|
|
|
|
|
function flatten(values: Array<Array<number>>): Array<number> {
|
|
return values.reduce((accum, value) => {
|
|
value.forEach((value) => { accum.push(value); });
|
|
return accum;
|
|
}, [ ]);
|
|
}
|
|
|
|
export function _nameprepTableA1(codepoint: number): boolean {
|
|
return !!matchMap(codepoint, Table_A_1_ranges);
|
|
}
|
|
|
|
export function _nameprepTableB2(codepoint: number): Array<number> {
|
|
let range = matchMap(codepoint, Table_B_2_ranges);
|
|
if (range) { return [ codepoint + range.s ]; }
|
|
|
|
let codes = Table_B_2_lut_abs[codepoint];
|
|
if (codes) { return codes; }
|
|
|
|
let shift = Table_B_2_lut_rel[codepoint];
|
|
if (shift) { return [ codepoint + shift[0] ]; }
|
|
|
|
let complex = Table_B_2_complex[codepoint];
|
|
if (complex) { return complex; }
|
|
|
|
return null;
|
|
}
|
|
|
|
export function _nameprepTableC(codepoint: number): boolean {
|
|
return !!matchMap(codepoint, Table_C_ranges);
|
|
}
|
|
|
|
export function nameprep(value: string): string {
|
|
|
|
// This allows platforms with incomplete normalize to bypass
|
|
// it for very basic names which the built-in toLowerCase
|
|
// will certainly handle correctly
|
|
if (value.match(/^[a-z0-9-]*$/i) && value.length <= 59) { return value.toLowerCase(); }
|
|
|
|
// Get the code points (keeping the current normalization)
|
|
let codes = toUtf8CodePoints(value);
|
|
|
|
codes = flatten(codes.map((code) => {
|
|
// Substitute Table B.1 (Maps to Nothing)
|
|
if (Table_B_1_flags.indexOf(code) >= 0) { return [ ]; }
|
|
if (code >= 0xfe00 && code <= 0xfe0f) { return [ ]; }
|
|
|
|
// Substitute Table B.2 (Case Folding)
|
|
let codesTableB2 = _nameprepTableB2(code);
|
|
if (codesTableB2) { return codesTableB2; }
|
|
|
|
// No Substitution
|
|
return [ code ];
|
|
}));
|
|
|
|
// Normalize using form KC
|
|
codes = toUtf8CodePoints(_toUtf8String(codes), UnicodeNormalizationForm.NFKC);
|
|
|
|
// Prohibit Tables C.1.2, C.2.2, C.3, C.4, C.5, C.6, C.7, C.8, C.9
|
|
codes.forEach((code) => {
|
|
if (_nameprepTableC(code)) {
|
|
throw new Error("STRINGPREP_CONTAINS_PROHIBITED");
|
|
}
|
|
});
|
|
|
|
// Prohibit Unassigned Code Points (Table A.1)
|
|
codes.forEach((code) => {
|
|
if (_nameprepTableA1(code)) {
|
|
throw new Error("STRINGPREP_CONTAINS_UNASSIGNED");
|
|
}
|
|
});
|
|
|
|
// IDNA extras
|
|
let name = _toUtf8String(codes);
|
|
|
|
// IDNA: 4.2.3.1
|
|
if (name.substring(0, 1) === "-" || name.substring(2, 4) === "--" || name.substring(name.length - 1) === "-") {
|
|
throw new Error("invalid hyphen");
|
|
}
|
|
|
|
// IDNA: 4.2.4
|
|
if (name.length > 63) { throw new Error("too long"); }
|
|
|
|
|
|
|
|
return name;
|
|
}
|
|
|