From 0af95f4a655106e67c2ba8f445af88c9e9e24339 Mon Sep 17 00:00:00 2001 From: Richard Moore Date: Fri, 2 Aug 2019 01:58:42 -0400 Subject: [PATCH] Full case-folding for IDNA in namehash. --- packages/strings/package.json | 2 +- packages/strings/src.ts/idna.ts | 165 ++++++++++-------- packages/strings/src.ts/utf8.ts | 16 +- .../input/nameprep-josefsson-idn.json | 1 + .../src.ts/generation-scripts/nameprep.ts | 11 ++ packages/testcases/testcases/nameprep.json.gz | Bin 0 -> 1529 bytes packages/tests/src.ts/test-utils.ts | 29 +++ 7 files changed, 150 insertions(+), 74 deletions(-) create mode 100644 packages/testcases/input/nameprep-josefsson-idn.json create mode 100644 packages/testcases/src.ts/generation-scripts/nameprep.ts create mode 100644 packages/testcases/testcases/nameprep.json.gz diff --git a/packages/strings/package.json b/packages/strings/package.json index eaa40b6ac..f0f8d91d3 100644 --- a/packages/strings/package.json +++ b/packages/strings/package.json @@ -9,7 +9,7 @@ "dependencies": { "@ethersproject/bytes": ">5.0.0-beta.0", "@ethersproject/constants": ">5.0.0-beta.0", - "@ethersproject/errors": ">5.0.0-beta.0" + "@ethersproject/logger": ">5.0.0-beta.0" }, "keywords": [ "Ethereum", diff --git a/packages/strings/src.ts/idna.ts b/packages/strings/src.ts/idna.ts index 7848d8b8e..c93ca1064 100644 --- a/packages/strings/src.ts/idna.ts +++ b/packages/strings/src.ts/idna.ts @@ -1,6 +1,8 @@ "use strict"; -import { toUtf8CodePoints, UnicodeNormalizationForm } from "./utf8"; +import { toUtf8CodePoints, _toUtf8String, UnicodeNormalizationForm } from "./utf8"; + +let _tmp = 0; type Ranged = { l: number, @@ -26,77 +28,89 @@ function createTable(data: string, func?: (value: string) => Array): Tab func = function(value: string) { return [ parseInt(value, 16) ]; } } + let lo = 0; + let result: Table = { }; data.split(",").forEach((pair) => { let comps = pair.split(":"); - result[parseInt(comps[0], 16)] = func(comps[1]); + lo += parseInt(comps[0], 16); + result[lo] = func(comps[1]); }); return result; } +// @TODO: Make this relative... const Table_B_1_flags = "ad,34f,1806,180b,180c,180d,200b,200c,200d,2060,feff".split(",").map((v) => parseInt(v, 16)); const Table_B_2_ranges: Array = [ - { h: 25, s: -120335, l: 120432 }, - { h: 25, s: -120283, l: 120380 }, - { h: 25, s: -120231, l: 120328 }, - { h: 25, s: -120179, l: 120276 }, - { h: 25, s: -120127, l: 120224 }, - { h: 25, s: -120075, l: 120172 }, - { h: 24, s: -120023, e: [ 2, 7, 13, 15, 16, 17 ], l: 120120 }, - { h: 24, s: -119971, e: [ 2, 7, 8, 17 ], l: 120068 }, - { h: 25, s: -119919, l: 120016 }, - { h: 25, s: -119867, e: [ 1, 4, 5, 7, 8, 11, 12, 17 ], l: 119964 }, - { h: 25, s: -119815, l: 119912 }, - { h: 24, s: -119775, e: [ 17 ], l: 120720 }, - { h: 25, s: -119763, l: 119860 }, - { h: 24, s: -119717, e: [ 17 ], l: 120662 }, - { h: 25, s: -119711, l: 119808 }, - { h: 24, s: -119659, e: [ 17 ], l: 120604 }, - { h: 24, s: -119601, e: [ 17 ], l: 120546 }, - { h: 24, s: -119543, e: [ 17 ], l: 120488 }, - { h: 54, s: 1, e: [ 48 ], l: 256, d: 2 }, - { h: 14, s: 1, l: 313, d: 2 }, - { h: 44, s: 1, l: 330, d: 2 }, - { h: 10, s: 1, e: [ 2, 6, 8 ], l: 391, d: 2 }, - { h: 16, s: 1, l: 459, d: 2 }, - { h: 84, s: 1, e: [ 18, 24, 66 ], l: 478, d: 2 }, - { h: 22, s: 1, l: 984, d: 2 }, - { h: 32, s: 1, l: 1120, d: 2 }, - { h: 52, s: 1, l: 1162, d: 2 }, - { h: 12, s: 1, l: 1217, d: 2 }, - { h: 40, s: 1, e: [ 38 ], l: 1232, d: 2 }, - { h: 14, s: 1, l: 1280, d: 2 }, - { h: 148, s: 1, l: 7680, d: 2 }, - { h: 88, s: 1, l: 7840, d: 2 }, - { h: 15, s: 16, l: 8544 }, - { h: 25, s: 26, l: 9398 }, { h: 25, s: 32, l: 65 }, - { h: 30, s: 32, e: [ 23 ], l: 192 }, - { h: 26, s: 32, e: [ 17 ], l: 913 }, - { h: 31, s: 32, l: 1040 }, - { h: 25, s: 32, l: 65313 }, - { h: 37, s: 40, l: 66560 }, - { h: 37, s: 48, l: 1329 }, - { h: 15, s: 80, l: 1024 } + { h: 30, s: 32, e: [ 23 ], l: 127 }, + { h: 54, s: 1, e: [ 48 ], l: 64, d: 2 }, + { h: 14, s: 1, l: 57, d: 2 }, + { h: 44, s: 1, l: 17, d: 2 }, + { h: 10, s: 1, e: [ 2, 6, 8 ], l: 61, d: 2 }, + { h: 16, s: 1, l: 68, d: 2 }, + { h: 84, s: 1, e: [ 18, 24, 66 ], l: 19, d: 2 }, + { h: 26, s: 32, e: [ 17 ], l: 435 }, + { h: 22, s: 1, l: 71, d: 2 }, + { h: 15, s: 80, l: 40 }, + { h: 31, s: 32, l: 16 }, + { h: 32, s: 1, l: 80, d: 2 }, + { h: 52, s: 1, l: 42, d: 2 }, + { h: 12, s: 1, l: 55, d: 2 }, + { h: 40, s: 1, e: [ 38 ], l: 15, d: 2 }, + { h: 14, s: 1, l: 48, d: 2 }, + { h: 37, s: 48, l: 49 }, + { h: 148, s: 1, l: 6351, d: 2 }, + { h: 88, s: 1, l: 160, d: 2 }, + { h: 15, s: 16, l: 704 }, + { h: 25, s: 26, l: 854 }, + { h: 25, s: 32, l: 55915 }, + { h: 37, s: 40, l: 1247 }, + { h: 25, s: -119711, l: 53248 }, + { h: 25, s: -119763, l: 52 }, + { h: 25, s: -119815, l: 52 }, + { h: 25, s: -119867, e: [ 1, 4, 5, 7, 8, 11, 12, 17 ], l: 52 }, + { h: 25, s: -119919, l: 52 }, + { h: 24, s: -119971, e: [ 2, 7, 8, 17 ], l: 52 }, + { h: 24, s: -120023, e: [ 2, 7, 13, 15, 16, 17 ], l: 52 }, + { h: 25, s: -120075, l: 52 }, + { h: 25, s: -120127, l: 52 }, + { h: 25, s: -120179, l: 52 }, + { h: 25, s: -120231, l: 52 }, + { h: 25, s: -120283, l: 52 }, + { h: 25, s: -120335, l: 52 }, + { h: 24, s: -119543, e: [ 17 ], l: 56 }, + { h: 24, s: -119601, e: [ 17 ], l: 58 }, + { h: 24, s: -119659, e: [ 17 ], l: 58 }, + { h: 24, s: -119717, e: [ 17 ], l: 58 }, + { h: 24, s: -119775, e: [ 17 ], l: 58 } ]; -const Table_B_2_lut_abs = createTable("b5:956,178:255,17f:115,181:595,186:596,189:598,18a:599,18f:601,190:603,193:608,194:611,196:617,197:616,19c:623,19d:626,19f:629,1a6:640,1a9:643,1ae:648,1b1:650,1b2:651,1b7:658,1f6:405,1f7:447,220:414,345:953,3d0:946,3d1:952,3d2:965,3d5:966,3d6:960,3f0:954,3f1:961,3f2:963,3f4:952,3f5:949,1fbe:953,1fda:8054,1fdb:8055,1fea:8058,1feb:8059,1ff8:8056,1ff9:8057,1ffa:8060,1ffb:8061,2102:99,2107:603,210b:104,210c:104,210d:104,2110:105,2111:105,2112:108,2115:110,2119:112,211a:113,211b:114,211c:114,211d:114,2124:122,2126:969,2128:122,212a:107,212b:229,212c:98,212d:99,2130:101,2131:102,2133:109,213e:947,213f:960,2145:100,1d6b9:952,1d6d3:963,1d6f3:952,1d70d:963,1d72d:952,1d747:963,1d767:952,1d781:963,1d7a1:952,1d7bb:963"); -const Table_B_2_lut_rel = createTable("18e:79,1c4:2,1c7:2,1ca:2,1f1:2,2f2:1,2f4:1,2f6:1,304:1,306:1,330:1,340:1,342:1,344:1,34e:1,358:1,35e:1,360:1,362:1,364:1,370:1,372:1,374:1,386:38,388:37,389:37,38a:37,38c:64,38e:63,38f:63,390:1,3d3:-6,3d4:-9,784:1,1e9b:-58,1fba:-74,1fbb:-74,1fc8:-86,1fc9:-86,1fca:-86,1fcb:-86,1fec:-7,3e10:-8,3e11:-8,3e12:-8,3e13:-8,3e14:-8,3e15:-8,3e16:-8,3e17:-8,3e30:-8,3e31:-8,3e32:-8,3e33:-8,3e34:-8,3e35:-8,3e50:-8,3e51:-8,3e52:-8,3e53:-8,3e54:-8,3e55:-8,3e56:-8,3e57:-8,3e70:-8,3e71:-8,3e72:-8,3e73:-8,3e74:-8,3e75:-8,3e76:-8,3e77:-8,3e90:-8,3e91:-8,3e92:-8,3e93:-8,3e94:-8,3e95:-8,3eb2:-8,3eb3:-8,3eb4:-8,3eb5:-8,3eb6:-8,3eb7:-8,3eb8:-8,3ed0:-8,3ed1:-8,3ed2:-8,3ed3:-8,3ed4:-8,3ed5:-8,3ed6:-8,3ed7:-8,3f70:-8,3f71:-8,3fb0:-8,3fb1:-8,3fd0:-8,3fd1:-8"); -const Table_B_2_complex = createTable("df:00730073,130:00690307,149:02BC006E,1f0:006A030C,37a:002003B9,390:03B903080301,3b0:03C503080301,587:05650582,1e96:00680331,1e97:00740308,1e98:0077030A,1e99:0079030A,1e9a:006102BE,1f50:03C50313,1f52:03C503130300,1f54:03C503130301,1f56:03C503130342,1f80:1F0003B9,1f81:1F0103B9,1f82:1F0203B9,1f83:1F0303B9,1f84:1F0403B9,1f85:1F0503B9,1f86:1F0603B9,1f87:1F0703B9,1f88:1F0003B9,1f89:1F0103B9,1f8a:1F0203B9,1f8b:1F0303B9,1f8c:1F0403B9,1f8d:1F0503B9,1f8e:1F0603B9,1f8f:1F0703B9,1f90:1F2003B9,1f91:1F2103B9,1f92:1F2203B9,1f93:1F2303B9,1f94:1F2403B9,1f95:1F2503B9,1f96:1F2603B9,1f97:1F2703B9,1f98:1F2003B9,1f99:1F2103B9,1f9a:1F2203B9,1f9b:1F2303B9,1f9c:1F2403B9,1f9d:1F2503B9,1f9e:1F2603B9,1f9f:1F2703B9,1fa0:1F6003B9,1fa1:1F6103B9,1fa2:1F6203B9,1fa3:1F6303B9,1fa4:1F6403B9,1fa5:1F6503B9,1fa6:1F6603B9,1fa7:1F6703B9,1fa8:1F6003B9,1fa9:1F6103B9,1faa:1F6203B9,1fab:1F6303B9,1fac:1F6403B9,1fad:1F6503B9,1fae:1F6603B9,1faf:1F6703B9,1fb2:1F7003B9,1fb3:03B103B9,1fb4:03AC03B9,1fb6:03B10342,1fb7:03B1034203B9,1fbc:03B103B9,1fc2:1F7403B9,1fc3:03B703B9,1fc4:03AE03B9,1fc6:03B70342,1fc7:03B7034203B9,1fcc:03B703B9,1fd2:03B903080300,1fd3:03B903080301,1fd6:03B90342,1fd7:03B903080342,1fe2:03C503080300,1fe3:03C503080301,1fe4:03C10313,1fe6:03C50342,1fe7:03C503080342,1ff2:1F7C03B9,1ff3:03C903B9,1ff4:03CE03B9,1ff6:03C90342,1ff7:03C9034203B9,1ffc:03C903B9,20a8:00720073,2103:00B00063,2109:00B00066,2116:006E006F,2120:0073006D,2121:00740065006C,2122:0074006D,3371:006800700061,3373:00610075,3375:006F0076,3380:00700061,3381:006E0061,3382:03BC0061,3383:006D0061,3384:006B0061,3385:006B0062,3386:006D0062,3387:00670062,338a:00700066,338b:006E0066,338c:03BC0066,3390:0068007A,3391:006B0068007A,3392:006D0068007A,3393:00670068007A,3394:00740068007A,33a9:00700061,33aa:006B00700061,33ab:006D00700061,33ac:006700700061,33b4:00700076,33b5:006E0076,33b6:03BC0076,33b7:006D0076,33b8:006B0076,33b9:006D0076,33ba:00700077,33bb:006E0077,33bc:03BC0077,33bd:006D0077,33be:006B0077,33bf:006D0077,33c0:006B03C9,33c1:006D03C9,33c3:00620071,33c6:00632215006B0067,33c7:0063006F002E,33c8:00640062,33c9:00670079,33cb:00680070,33cd:006B006B,33ce:006B006D,33d7:00700068,33d9:00700070006D,33da:00700072,33dc:00730076,33dd:00770062,fb00:00660066,fb01:00660069,fb02:0066006C,fb03:006600660069,fb04:00660066006C,fb05:00730074,fb06:00730074,fb13:05740576,fb14:05740565,fb15:0574056B,fb16:057E0576,fb17:0574056D", bytes2); +const Table_B_2_lut_abs = createTable("b5:3bc,c3:ff,7:73,2:253,5:254,3:256,1:257,5:259,1:25b,3:260,1:263,2:269,1:268,5:26f,1:272,2:275,7:280,3:283,5:288,3:28a,1:28b,5:292,3f:195,1:1bf,29:19e,125:3b9,8b:3b2,1:3b8,1:3c5,3:3c6,1:3c0,1a:3ba,1:3c1,1:3c3,2:3b8,1:3b5,1bc9:3b9,1c:1f76,1:1f77,f:1f7a,1:1f7b,d:1f78,1:1f79,1:1f7c,1:1f7d,107:63,5:25b,4:68,1:68,1:68,3:69,1:69,1:6c,3:6e,4:70,1:71,1:72,1:72,1:72,7:7a,2:3c9,2:7a,2:6b,1:e5,1:62,1:63,3:65,1:66,2:6d,b:3b3,1:3c0,6:64,1b574:3b8,1a:3c3,20:3b8,1a:3c3,20:3b8,1a:3c3,20:3b8,1a:3c3,20:3b8,1a:3c3"); +const Table_B_2_lut_rel = createTable("179:1,2:1,2:1,5:1,2:1,a:4f,a:1,8:1,2:1,2:1,3:1,5:1,3:1,4:1,2:1,3:1,4:1,8:2,1:1,2:2,1:1,2:2,27:2,195:26,2:25,1:25,1:25,2:40,2:3f,1:3f,33:1,11:-6,1:-9,1ac7:-3a,6d:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,b:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,c:-8,2:-8,2:-8,2:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,49:-8,1:-8,1:-4a,1:-4a,d:-56,1:-56,1:-56,1:-56,d:-8,1:-8,f:-8,1:-8,3:-7"); +const Table_B_2_complex = createTable("df:00730073,51:00690307,19:02BC006E,a7:006A030C,18a:002003B9,16:03B903080301,20:03C503080301,1d7:05650582,190f:00680331,1:00740308,1:0077030A,1:0079030A,1:006102BE,b6:03C50313,2:03C503130300,2:03C503130301,2:03C503130342,2a:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,3:1F7003B9,1:03B103B9,1:03AC03B9,2:03B10342,1:03B1034203B9,5:03B103B9,6:1F7403B9,1:03B703B9,1:03AE03B9,2:03B70342,1:03B7034203B9,5:03B703B9,6:03B903080300,1:03B903080301,3:03B90342,1:03B903080342,b:03C503080300,1:03C503080301,1:03C10313,2:03C50342,1:03C503080342,b:1F7C03B9,1:03C903B9,1:03CE03B9,2:03C90342,1:03C9034203B9,5:03C903B9,ac:00720073,5b:00B00063,6:00B00066,d:006E006F,a:0073006D,1:00740065006C,1:0074006D,124f:006800700061,2:00610075,2:006F0076,b:00700061,1:006E0061,1:03BC0061,1:006D0061,1:006B0061,1:006B0062,1:006D0062,1:00670062,3:00700066,1:006E0066,1:03BC0066,4:0068007A,1:006B0068007A,1:006D0068007A,1:00670068007A,1:00740068007A,15:00700061,1:006B00700061,1:006D00700061,1:006700700061,8:00700076,1:006E0076,1:03BC0076,1:006D0076,1:006B0076,1:006D0076,1:00700077,1:006E0077,1:03BC0077,1:006D0077,1:006B0077,1:006D0077,1:006B03C9,1:006D03C9,2:00620071,3:00632215006B0067,1:0063006F002E,1:00640062,1:00670079,2:00680070,2:006B006B,1:006B006D,9:00700068,2:00700070006D,1:00700072,2:00730076,1:00770062,c723:00660066,1:00660069,1:0066006C,1:006600660069,1:00660066006C,1:00730074,1:00730074,d:05740576,1:05740565,1:0574056B,1:057E0576,1:0574056D", bytes2); -const Table_C_flags = "80,70f,1680,180e,2000,d800,fff9,e0020".split(",").map((v) => parseInt(v, 16)); -const Table_C_ranges = "80-20,340,2000-f,2028-7,205f-4,206a-5,2ff0-b,d800-20ff,fdd0-1f,fff9-6,1d173-7,1fffe,2fffe,3fffe,4fffe,5fffe,6fffe,7fffe,8fffe,9fffe,afffe,bfffe,cfffe,dfffe,e0020-5f".split(",").map((v) => { +_tmp = 0; +const Table_C_flags = "70f,f71,18e".split(",").map((v) => { + _tmp += parseInt(v, 16); + return _tmp; +}); +_tmp = 0; +const Table_C_ranges = "80-20,2c0,1cc0-f,28-7,37-4,b-5,f86-b,a810-20ff,25d0-1f,229-6,d17a-7,2e8b,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,22-5f".split(",").map((v) => { let comps = v.split("-"); if (comps.length === 1) { comps[1] = "1"; } - return { l: parseInt(comps[0], 16), h: parseInt(comps[1], 16) } + _tmp += parseInt(comps[0], 16); + return { l: _tmp, h: parseInt(comps[1], 16) } }); function matchMap(value: number, ranges: Array): Ranged { + let lo = 0; for (let i = 0; i < ranges.length; i++) { let range = ranges[i]; - if (value >= range.l && value <= range.l + range.h) { - if (range.e && range.e.indexOf(value - range.l) == -1) { continue; } + lo += range.l; + if (value >= lo && value <= lo + range.h && ((value - lo) % (range.d || 1)) === 0) { + if (range.e && range.e.indexOf(value - lo) !== -1) { continue; } return range; } } @@ -110,6 +124,22 @@ function flatten(values: Array>): Array { }, [ ]); } +export function _nameprepTableB2(codepoint: number): Array { + let match = matchMap(codepoint, Table_B_2_ranges); + if (match) { return [ codepoint + match.s ]; } + + let codes = Table_B_2_lut_abs[codepoint]; + if (codes) { return codes; } + + let shift = Table_B_2_lut_rel[codepoint]; + if (shift) { return [ codepoint + shift[0] ]; } + + let complex = Table_B_2_complex[codepoint]; + if (complex) { return complex; } + + return null; +} + export function nameprep(value: string): string { // This allows platforms with incomplete normalize to bypass @@ -121,43 +151,42 @@ export function nameprep(value: string): string { let codes = toUtf8CodePoints(value); codes = flatten(codes.map((code) => { - - // Substitute Table B.1 (Maps to Nothin) + // Substitute Table B.1 (Maps to Nothing) if (Table_B_1_flags.indexOf(code) >= 0) { return [ ]; } if (code >= 0xfe00 && code <= 0xfe0f) { return [ ]; } // Substitute Table B.2 (Case Folding) - let match = matchMap(code, Table_B_2_ranges); - if (match) { return [ code + match.s ]; } - - let codes = Table_B_2_lut_abs[code]; - if (codes) { return codes; } - - let shift = Table_B_2_lut_rel[code]; - if (shift) { return [ code + shift[0] ]; } - - let complex = Table_B_2_complex[code]; - if (complex) { return complex; } + let codesTableB2 = _nameprepTableB2(code); + if (codesTableB2) { return codesTableB2; } // No Substitution return [ code ]; })); // Normalize using fomr KC - codes = toUtf8CodePoints(String.fromCharCode(...codes), UnicodeNormalizationForm.NFKC); + codes = toUtf8CodePoints(_toUtf8String(codes), UnicodeNormalizationForm.NFKC); // Prohibit C.1.2, C.2.2, C.3, C.4, C.5, C.6, C.7, C.8, C.9 codes.forEach((code) => { if (Table_C_flags.indexOf(code) >= 0) { throw new Error("invalid character code"); } Table_C_ranges.forEach((range) => { if (code >= range.l && code <= range.l + range.h) { - throw new Error("invalid character code"); + throw new Error("STRINGPREP_CONTAINS_PROHIBITED"); } }); }); - // Prohibit IDNA (@TODO: add this list) + // IDNA extras + let name = _toUtf8String(codes); - return String.fromCharCode(...codes); + // IDNA: 4.2.3.1 + if (name.substring(0, 1) === "-" || name.substring(2, 4) === "--" || name.substring(name.length - 1) === "-") { + throw new Error("invalid hyphen"); + } + + // IDNA: 4.2.4 + if (name.length > 63) { throw new Error("too long"); } + + return name; } diff --git a/packages/strings/src.ts/utf8.ts b/packages/strings/src.ts/utf8.ts index 2f1df5d6e..b1121f7e3 100644 --- a/packages/strings/src.ts/utf8.ts +++ b/packages/strings/src.ts/utf8.ts @@ -1,9 +1,11 @@ "use strict"; -import { checkNormalize } from "@ethersproject/errors"; - import { arrayify, BytesLike } from "@ethersproject/bytes"; +import { Logger } from "@ethersproject/logger"; +import { version } from "./_version"; +const logger = new Logger(version); + /////////////////////////////// export enum UnicodeNormalizationForm { @@ -121,7 +123,7 @@ function getUtf8CodePoints(bytes: BytesLike, ignoreErrors?: boolean): Array { +export function _toUtf8String(codePoints: Array): string { + return codePoints.map((codePoint) => { if (codePoint <= 0xffff) { return String.fromCharCode(codePoint); } @@ -205,6 +207,10 @@ export function toUtf8String(bytes: BytesLike, ignoreErrors?: boolean): string { }).join(""); } +export function toUtf8String(bytes: BytesLike, ignoreErrors?: boolean): string { + return _toUtf8String(getUtf8CodePoints(bytes, ignoreErrors)); +} + export function toUtf8CodePoints(str: string, form: UnicodeNormalizationForm = UnicodeNormalizationForm.current): Array { return getUtf8CodePoints(toUtf8Bytes(str, form)); } diff --git a/packages/testcases/input/nameprep-josefsson-idn.json b/packages/testcases/input/nameprep-josefsson-idn.json new file mode 100644 index 000000000..923ff2d38 --- /dev/null +++ b/packages/testcases/input/nameprep-josefsson-idn.json @@ -0,0 +1 @@ +[{"comment": "Map to nothing", "output": [102, 111, 111, 98, 97, 114, 98, 97, 122], "input": [102, 111, 111, 194, 173, 205, 143, 225, 160, 134, 225, 160, 139, 98, 97, 114, 226, 128, 139, 226, 129, 160, 98, 97, 122, 239, 184, 128, 239, 184, 136, 239, 184, 143, 239, 187, 191]}, {"comment": "Case folding ASCII U+0043 U+0041 U+0046 U+0045", "output": [99, 97, 102, 101], "input": [67, 65, 70, 69]}, {"comment": "Case folding 8bit U+00DF (german sharp s)", "output": [115, 115], "input": [195, 159]}, {"comment": "Case folding U+0130 (turkish capital I with dot)", "output": [105, 204, 135], "input": [196, 176]}, {"comment": "Case folding multibyte U+0143 U+037A", "output": [197, 132, 32, 206, 185], "input": [197, 131, 205, 186]}, {"comment": "Case folding U+2121 U+33C6 U+1D7BB", "output": [116, 101, 108, 99, 226, 136, 149, 107, 103, 207, 131], "input": [226, 132, 161, 227, 143, 134, 240, 157, 158, 187]}, {"comment": "Normalization of U+006a U+030c U+00A0 U+00AA", "output": [199, 176, 32, 97], "input": [106, 204, 140, 194, 160, 194, 170]}, {"comment": "Case folding U+1FB7 and normalization", "output": [225, 190, 182, 206, 185], "input": [225, 190, 183]}, {"comment": "Self-reverting case folding U+01F0 and normalization", "output": [199, 176], "input": [199, 176]}, {"comment": "Self-reverting case folding U+0390 and normalization", "output": [206, 144], "input": [206, 144]}, {"comment": "Self-reverting case folding U+03B0 and normalization", "output": [206, 176], "input": [206, 176]}, {"comment": "Self-reverting case folding U+1E96 and normalization", "output": [225, 186, 150], "input": [225, 186, 150]}, {"comment": "Self-reverting case folding U+1F56 and normalization", "output": [225, 189, 150], "input": [225, 189, 150]}, {"comment": "ASCII space character U+0020", "output": [32], "input": [32]}, {"comment": "Non-ASCII 8bit space character U+00A0", "output": [32], "input": [194, 160]}, {"comment": "Non-ASCII multibyte space character U+1680", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [225, 154, 128], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Non-ASCII multibyte space character U+2000", "output": [32], "input": [226, 128, 128]}, {"comment": "Zero Width Space U+200b", "output": [], "input": [226, 128, 139]}, {"comment": "Non-ASCII multibyte space character U+3000", "output": [32], "input": [227, 128, 128]}, {"comment": "ASCII control characters U+0010 U+007F", "output": [16, 127], "input": [16, 127]}, {"comment": "Non-ASCII 8bit control character U+0085", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [194, 133], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Non-ASCII multibyte control character U+180E", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [225, 160, 142], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Zero Width No-Break Space U+FEFF", "output": [], "input": [239, 187, 191]}, {"comment": "Non-ASCII control character U+1D175", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [240, 157, 133, 181], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Plane 0 private use character U+F123", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [239, 132, 163], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Plane 15 private use character U+F1234", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [243, 177, 136, 180], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Plane 16 private use character U+10F234", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [244, 143, 136, 180], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Non-character code point U+8FFFE", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [242, 143, 191, 190], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Non-character code point U+10FFFF", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [244, 143, 191, 191], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Surrogate code U+DF42", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [237, 189, 130], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Non-plain text character U+FFFD", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [239, 191, 189], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Ideographic description character U+2FF5", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [226, 191, 181], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Display property character U+0341", "output": [204, 129], "input": [205, 129]}, {"comment": "Left-to-right mark U+200E", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [226, 128, 142], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": [204, 129]}, {"comment": "Deprecated U+202A", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [226, 128, 170], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": [204, 129]}, {"comment": "Language tagging character U+E0001", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [243, 160, 128, 129], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": [204, 129]}, {"comment": "Language tagging character U+E0042", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [243, 160, 129, 130], "flags": "0", "rc": "STRINGPREP_CONTAINS_PROHIBITED", "output": null}, {"comment": "Bidi: RandALCat character U+05BE and LCat characters", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [102, 111, 111, 214, 190, 98, 97, 114], "flags": "0", "rc": "STRINGPREP_BIDI_BOTH_L_AND_RAL", "output": null}, {"comment": "Bidi: RandALCat character U+FD50 and LCat characters", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [102, 111, 111, 239, 181, 144, 98, 97, 114], "flags": "0", "rc": "STRINGPREP_BIDI_BOTH_L_AND_RAL", "output": null}, {"comment": "Bidi: RandALCat character U+FB38 and LCat characters", "output": [102, 111, 111, 32, 217, 142, 98, 97, 114], "input": [102, 111, 111, 239, 185, 182, 98, 97, 114]}, {"comment": "Bidi: RandALCat without trailing RandALCat U+0627 U+0031", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [216, 167, 49], "flags": "0", "rc": "STRINGPREP_BIDI_LEADTRAIL_NOT_RAL", "output": null}, {"comment": "Bidi: RandALCat character U+0627 U+0031 U+0628", "output": [216, 167, 49, 216, 168], "input": [216, 167, 49, 216, 168]}, {"comment": "Unassigned code point U+E0002", "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [243, 160, 128, 130], "flags": "STRINGPREP_NO_UNASSIGNED", "rc": "STRINGPREP_CONTAINS_UNASSIGNED", "output": null}, {"comment": "Larger test (shrinking)", "output": [120, 115, 115, 105, 204, 135, 116, 101, 108, 199, 176, 32, 97, 206, 176, 32], "profile": [78, 97, 109, 101, 112, 114, 101, 112], "input": [88, 194, 173, 195, 159, 196, 176, 226, 132, 161, 106, 204, 140, 194, 160, 194, 170, 206, 176, 226, 128, 128]}, {"comment": "Larger test (expanding)", "output": [120, 115, 115, 227, 130, 173, 227, 131, 173, 227, 131, 161, 227, 131, 188, 227, 131, 136, 227, 131, 171, 105, 204, 135, 116, 101, 108, 40, 100, 41, 227, 130, 162, 227, 131, 145, 227, 131, 188, 227, 131, 136], "input": [88, 195, 159, 227, 140, 150, 196, 176, 226, 132, 161, 226, 146, 159, 227, 140, 128]}] diff --git a/packages/testcases/src.ts/generation-scripts/nameprep.ts b/packages/testcases/src.ts/generation-scripts/nameprep.ts new file mode 100644 index 000000000..d740698bf --- /dev/null +++ b/packages/testcases/src.ts/generation-scripts/nameprep.ts @@ -0,0 +1,11 @@ +'use strict'; + +import fs from "fs"; +import { resolve } from "path"; + +import { saveTests, TestCase } from ".."; + +const testcases: Array = JSON.parse(fs.readFileSync(resolve(__dirname, "../input/nameprep-josefsson-idn.json")).toString()); + +saveTests("nameprep", testcases); + diff --git a/packages/testcases/testcases/nameprep.json.gz b/packages/testcases/testcases/nameprep.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..100c30e264e8ad0ae5caebdb95e901fc3a3f5c2c GIT binary patch literal 1529 zcmV)1ke~n zG2I5y=JL9D9q(weE8c#|p8$gRx-1bAO2@7Wi`D9~T8#7JYuRfM_2?8iRX51e$`V7P zde+%9X3r+9N=@&mcaDI>*ktf*q4mDr5qfJGRkdbcAzK%EYt?I}yq?=s*R%c|RFHN7 z{<&`Z>y~6bSjXWi_ZAo|ZYLA)_yIy)s|r*VW>pw6e>#p}40i-Xa!;96JG0`~st7Ti zo(S!40WGnNTyVAV(`OQaY)jGvWWPzphL|6b(IFg%H8A=@$VW#S1XpyIJ_Xqpcq9oZ z3Bd&X70@kM#Z-=jDGx*L#ZE*_G?v*D(eo~(!M7Lcm)I)TY1TlBg1bqp+{6kD*_d`G zDE=K1=aJnP8gq)x0;DswLoPr*(`&?XllC1k z#hT1CYzfn;4hcF71qOR>*nFCGQ!tBjiH5;XLW4L0@w(KmNlFhvuY4N7s_5{S!^Lpu zFURT%gQh4lj+uu+v+-=f-tI&0&;le{<+1(1CK>2Dcsgt}yQj+9A8Ot37Jj(CN&WAB zN(+EqbB?;OAKC+oyR4~Q1NY^_!5*xe@&G1|_UEDMpyao*#JLZ|Xve}>p zJfS18;j7TO#@XnczUZ9O=&F>QCB*ZAmnS;nQ9msM28J>eTE_yZs_zO?(M>f@nOLnmpx4nm)gVTo%f$g==n|MB&p-mE|26r)~z)eYAn*<=M| zh9vb$-8{@EvrqT)@%{4lZuT&k%ofZ0`Q7KqaPlx7HRy_VVfbHYz!0A0f!2z&GDi;K zPd|;pAHgbLy)4R+%kjQ7$LlkUc4QUR&rOO7MAJCj51W-5M^z%WTxpiz zhIVCM;eD@p)`C$EZyPG)Ue8cXdq0`W2tYO*E1mCrMWkajL>x@ES$s20eezV#I(O{0 z#3#dk&IWH!yp-+{vXls}wR+ar>e@zXc)t%xHz3E=O@<9>@QE|#*yRNGy z80%Oh!QrQO?@vN51(!!<#ZstQP!u8;yEKhAq|D5#A|H)h9jl3ntRuc7(iHkV2}uwE z>i?i(G<4mO(tNBy9S-o5)c5k3vns8}gLkpsdeeB*-})(uHaoKM0VSJFdHQr9zj3}3MoQOXd@odQ zrQb9Zy&WxzvQmo{#o1{Xtb&hVPNLOddP~Ig1dZXiJeL&TWJ(SXIxbr=@4%5>b0WPz zZNtfEvK-z$d|pnMgV|^~A57nbHg{yeUyinVJJ0HM>DBiyLElibyXot9&(E(M5x-`` zC#*e_M1Ht_q}zUm7uUJDNr0v#2#YARf0fTa6I8riTL-swZ0-^IPDr#cI2+(X#L4s2?MeWx*!O*A>Ecu1Koyls)(nxg>iW zcx!gIe4Gsyi^-=MPZixsrP;EFJEtVwGFzX%=t5yGBDd|m$yyyEU%YX4bRH-b0)A4xt-&-G?~ zfuccyhkZRdUf=w0g4Z*CTlM_+rB?r($Me+1X@*s)H_@=yCYFz?H(5A|{)C!D_3Zy9 f;pg961^rZ2*X!o-eruIne7*P=NN)x9m?r=L2cZA1 literal 0 HcmV?d00001 diff --git a/packages/tests/src.ts/test-utils.ts b/packages/tests/src.ts/test-utils.ts index e68c907f4..21522a898 100644 --- a/packages/tests/src.ts/test-utils.ts +++ b/packages/tests/src.ts/test-utils.ts @@ -380,3 +380,32 @@ describe('Test BigNumber', function() { ].forEach(testAbs); }); }); + +function getHex(value: string): string { + return "0x" + Buffer.from(value).toString("hex"); +} + +describe("Test nameprep", function() { + const Tests: Array = loadTests("nameprep"); + Tests.forEach((test) => { + it(test.comment, function() { + let input = ethers.utils.toUtf8String(test.input); + if (test.output) { + let expected = ethers.utils.toUtf8String(test.output) + let actual = ethers.utils.nameprep(input); + assert.equal(actual, expected, `actual("${ getHex(actual) }") !== expected("${ getHex(expected) }")`); + } else { + let ok = true; + let reason = ""; + try { + let actual = ethers.utils.nameprep(input); + console.log(actual); + reason = `should has thrown ${ test.rc } - actual("${ getHex(actual) }")`; + ok = false; + } catch (error) { + } + assert.ok(ok, reason); + } + }); + }); +});