Fixed some case-folding and added Table A.1 for IDNA (#42).

2019-08-03 00:43:22 -04:00 · 2019-08-03 00:43:22 -04:00 · f955dca417
commit f955dca417
parent c09de16347
10 changed files with 10456 additions and 87 deletions
--- a/packages/strings/src.ts/idna.ts
+++ b/packages/strings/src.ts/idna.ts
@ -2,14 +2,12 @@

 import { toUtf8CodePoints, _toUtf8String, UnicodeNormalizationForm } from "./utf8";

-let _tmp = 0;
-
 type Ranged = {
-    l: number,
-    h: number,
-    d?: number,
-    s?: number,
-    e?: Array<number>
+    l: number,          // Lo value
+    h: number,          // High value (less the lo)
+    d?: number,         // Delta/stride (default: 1)
+    s?: number,         // Shift (default: 1)
+    e?: Array<number>   // Exceptions to skip
 };

 type Table = { [ src: number ]: Array<number> };
@ -40,6 +38,37 @@ function createTable(data: string, func?: (value: string) => Array<number>): Tab
    return result;
 }

+function createRangeTable(data: string): Array<Ranged> {
+    let hi = 0;
+    return data.split(",").map((v) => {
+        let comps = v.split("-");
+        if (comps.length === 1) {
+            comps[1] = "0";
+        } else if (comps[1] === "") {
+            comps[1] = "1";
+        }
+
+        let lo = hi + parseInt(comps[0], 16);
+        hi = parseInt(comps[1], 16);
+        return { l: lo, h: hi };
+    });
+}
+
+function matchMap(value: number, ranges: Array<Ranged>): Ranged {
+    let lo = 0;
+    for (let i = 0; i < ranges.length; i++) {
+        let range = ranges[i];
+        lo += range.l;
+        if (value >= lo && value <= lo + range.h && ((value - lo) % (range.d || 1)) === 0) {
+            if (range.e && range.e.indexOf(value - lo) !== -1) { continue; }
+            return range;
+        }
+    }
+    return null;
+}
+
+const Table_A_1_ranges = createRangeTable("221,13-1b,5f-,40-10,51-f,11-3,3-3,2-2,2-4,8,2,15,2d,28-8,88,48,27-,3-5,11-20,27-,8,28,3-5,12,18,b-a,1c-4,6-16,2-d,2-2,2,1b-4,17-9,8f-,10,f,1f-2,1c-34,33-14e,4,36-,13-,6-2,1a-f,4,9-,3-,17,8,2-2,5-,2,8-,3-,4-8,2-3,3,6-,16-6,2-,7-3,3-,17,8,3,3,3-,2,6-3,3-,4-a,5,2-6,10-b,4,8,2,4,17,8,3,6-,b,4,4-,2-e,2-4,b-10,4,9-,3-,17,8,3-,5-,9-2,3-,4-7,3-3,3,4-3,c-10,3,7-2,4,5-2,3,2,3-2,3-2,4-2,9,4-3,6-2,4,5-8,2-e,d-d,4,9,4,18,b,6-3,8,4,5-6,3-8,3-3,b-11,3,9,4,18,b,6-3,8,4,5-6,3-6,2,3-3,b-11,3,9,4,18,11-3,7-,4,5-8,2-7,3-3,b-11,3,13-2,19,a,2-,8-2,2-3,7,2,9-11,4-b,3b-3,1e-24,3,2-,3,2-,2-5,5,8,4,2,2-,3,e,4-,6,2,7-,b-,3-21,49,23-5,1c-3,9,25,10-,2-2f,23,6,3,8-2,5-5,1b-45,27-9,2a-,2-3,5b-4,45-4,53-5,8,40,2,5-,8,2,5-,28,2,5-,20,2,5-,8,2,5-,8,8,18,20,2,5-,8,28,14-5,1d-22,56-b,277-8,1e-2,52-e,e,8-a,18-8,15-b,e,4,3-b,5e-2,b-15,10,b-5,59-7,2b-555,9d-3,5b-5,17-,7-,27-,7-,9,2,2,2,20-,36,10,f-,7,14-,4,a,54-3,2-6,6-5,9-,1c-10,13-1d,1c-14,3c-,10-6,32-b,240-30,28-18,c-14,a0,115-,3,66-,b-76,5,5-,1d,24,2,5-2,2,8-,35-2,19,f-10,1d-3,311-37f,1b,5a-b,d7-19,d-3,41,57-,68-4,29-3,5f,29-37,2e-2,25-c,2c-2,4e-3,30,78-3,64-,20,19b7-49,51a7-59,48e-2,38-738,2ba5-5b,222f-,3c-94,8-b,6-4,1b,6,2,3,3,6d-20,16e-f,41-,37-7,2e-2,11-f,5-b,18-,b,14,5-3,6,88-,2,bf-2,7-,7-,7-,4-2,8,8-9,8-2ff,20,5-b,1c-b4,27-,27-cbb1,f7-9,28-2,b5-221,56,48,3-,2-,3-,5,d,2,5,3,42,5-,9,8,1d,5,6,2-2,8,153-3,123-3,33-27fd,a6da-5128,21f-5df,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3,2-1d,61-ff7d");
+
 // @TODO: Make this relative...
 const Table_B_1_flags = "ad,34f,1806,180b,180c,180d,200b,200c,200d,2060,feff".split(",").map((v) => parseInt(v, 16));

@ -91,31 +120,8 @@ const Table_B_2_lut_abs = createTable("b5:3bc,c3:ff,7:73,2:253,5:254,3:256,1:257
 const Table_B_2_lut_rel = createTable("179:1,2:1,2:1,5:1,2:1,a:4f,a:1,8:1,2:1,2:1,3:1,5:1,3:1,4:1,2:1,3:1,4:1,8:2,1:1,2:2,1:1,2:2,27:2,195:26,2:25,1:25,1:25,2:40,2:3f,1:3f,33:1,11:-6,1:-9,1ac7:-3a,6d:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,b:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,c:-8,2:-8,2:-8,2:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,49:-8,1:-8,1:-4a,1:-4a,d:-56,1:-56,1:-56,1:-56,d:-8,1:-8,f:-8,1:-8,3:-7");
 const Table_B_2_complex = createTable("df:00730073,51:00690307,19:02BC006E,a7:006A030C,18a:002003B9,16:03B903080301,20:03C503080301,1d7:05650582,190f:00680331,1:00740308,1:0077030A,1:0079030A,1:006102BE,b6:03C50313,2:03C503130300,2:03C503130301,2:03C503130342,2a:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,3:1F7003B9,1:03B103B9,1:03AC03B9,2:03B10342,1:03B1034203B9,5:03B103B9,6:1F7403B9,1:03B703B9,1:03AE03B9,2:03B70342,1:03B7034203B9,5:03B703B9,6:03B903080300,1:03B903080301,3:03B90342,1:03B903080342,b:03C503080300,1:03C503080301,1:03C10313,2:03C50342,1:03C503080342,b:1F7C03B9,1:03C903B9,1:03CE03B9,2:03C90342,1:03C9034203B9,5:03C903B9,ac:00720073,5b:00B00063,6:00B00066,d:006E006F,a:0073006D,1:00740065006C,1:0074006D,124f:006800700061,2:00610075,2:006F0076,b:00700061,1:006E0061,1:03BC0061,1:006D0061,1:006B0061,1:006B0062,1:006D0062,1:00670062,3:00700066,1:006E0066,1:03BC0066,4:0068007A,1:006B0068007A,1:006D0068007A,1:00670068007A,1:00740068007A,15:00700061,1:006B00700061,1:006D00700061,1:006700700061,8:00700076,1:006E0076,1:03BC0076,1:006D0076,1:006B0076,1:006D0076,1:00700077,1:006E0077,1:03BC0077,1:006D0077,1:006B0077,1:006D0077,1:006B03C9,1:006D03C9,2:00620071,3:00632215006B0067,1:0063006F002E,1:00640062,1:00670079,2:00680070,2:006B006B,1:006B006D,9:00700068,2:00700070006D,1:00700072,2:00730076,1:00770062,c723:00660066,1:00660069,1:0066006C,1:006600660069,1:00660066006C,1:00730074,1:00730074,d:05740576,1:05740565,1:0574056B,1:057E0576,1:0574056D", bytes2);

-_tmp = 0;
-const Table_C_flags = "70f,f71,18e".split(",").map((v) => {
-    _tmp += parseInt(v, 16);
-    return _tmp;
-});
-_tmp = 0;
-const Table_C_ranges = "80-20,2c0,1cc0-f,28-7,37-4,b-5,f86-b,a810-20ff,25d0-1f,229-6,d17a-7,2e8b,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,22-5f".split(",").map((v) => {
-    let comps = v.split("-");
-    if (comps.length === 1) { comps[1] = "1"; }
-    _tmp += parseInt(comps[0], 16);
-    return { l: _tmp, h: parseInt(comps[1], 16) }
-});
+const Table_C_ranges = createRangeTable("80-20,2a0-,39c,32,f71,18e,7f2-f,19-7,30-4,7-5,f81-b,5,a800-20ff,4d1-1f,110,fa-6,d174-7,2e84-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,2,1f-5f,ff7f-20001");

-function matchMap(value: number, ranges: Array<Ranged>): Ranged {
-    let lo = 0;
-    for (let i = 0; i < ranges.length; i++) {
-        let range = ranges[i];
-        lo += range.l;
-        if (value >= lo && value <= lo + range.h && ((value - lo) % (range.d || 1)) === 0) {
-            if (range.e && range.e.indexOf(value - lo) !== -1) { continue; }
-            return range;
-        }
-    }
-    return null;
-}

 function flatten(values: Array<Array<number>>): Array<number> {
    return values.reduce((accum, value) => {
@ -124,20 +130,28 @@ function flatten(values: Array<Array<number>>): Array<number> {
    }, [ ]);
 }

+export function _nameprepTableA1(codepoint: number): boolean {
+    return !!matchMap(codepoint, Table_A_1_ranges);
+}
+
 export function _nameprepTableB2(codepoint: number): Array<number> {
-        let match = matchMap(codepoint, Table_B_2_ranges);
-        if (match) { return [ codepoint + match.s ]; }
+    let range = matchMap(codepoint, Table_B_2_ranges);
+    if (range) { return [ codepoint + range.s ]; }

-        let codes = Table_B_2_lut_abs[codepoint];
-        if (codes) { return codes; }
+    let codes = Table_B_2_lut_abs[codepoint];
+    if (codes) { return codes; }

-        let shift = Table_B_2_lut_rel[codepoint];
-        if (shift) { return [ codepoint + shift[0] ]; }
+    let shift = Table_B_2_lut_rel[codepoint];
+    if (shift) { return [ codepoint + shift[0] ]; }

-        let complex = Table_B_2_complex[codepoint];
-        if (complex) { return complex; }
+    let complex = Table_B_2_complex[codepoint];
+    if (complex) { return complex; }

-        return null;
+    return null;
+}
+
+export function _nameprepTableC(codepoint: number): boolean {
+    return !!matchMap(codepoint, Table_C_ranges);
 }

 export function nameprep(value: string): string {
@ -145,7 +159,7 @@ export function nameprep(value: string): string {
    // This allows platforms with incomplete normalize to bypass
    // it for very basic names which the built-in toLowerCase
    // will certainly handle correctly
-    if (value.match(/^[a-z0-9-]*$/i)) { return value.toLowerCase(); }
+    if (value.match(/^[a-z0-9-]*$/i) && value.length <= 59) { return value.toLowerCase(); }

    // Get the code points (keeping the current normalization)
    let codes = toUtf8CodePoints(value);
@ -163,17 +177,21 @@ export function nameprep(value: string): string {
        return [ code ];
    }));

-    // Normalize using fomr KC
+    // Normalize using form KC
    codes = toUtf8CodePoints(_toUtf8String(codes), UnicodeNormalizationForm.NFKC);

-    // Prohibit C.1.2, C.2.2, C.3, C.4, C.5, C.6, C.7, C.8, C.9
+    // Prohibit Tables C.1.2, C.2.2, C.3, C.4, C.5, C.6, C.7, C.8, C.9
    codes.forEach((code) => {
-        if (Table_C_flags.indexOf(code) >= 0) { throw new Error("invalid character code"); }
-        Table_C_ranges.forEach((range) => {
-            if (code >= range.l && code <= range.l + range.h) {
-                throw new Error("STRINGPREP_CONTAINS_PROHIBITED");
-            }
-        });
+        if (_nameprepTableC(code)) {
+            throw new Error("STRINGPREP_CONTAINS_PROHIBITED");
+        }
+    });
+
+    // Prohibit Unassigned Code Points (Table A.1)
+    codes.forEach((code) => {
+        if (_nameprepTableA1(code)) {
+            throw new Error("STRINGPREP_CONTAINS_UNASSIGNED");
+        }
    });

    // IDNA extras
@ -187,6 +205,8 @@ export function nameprep(value: string): string {
    // IDNA: 4.2.4
    if (name.length > 63) { throw new Error("too long"); }

+
+
    return name;
 }

--- a/packages/testcases/input/nameprep/extract-tests.py
+++ b/packages/testcases/input/nameprep/extract-tests.py
@ -0,0 +1,46 @@
+import json
+import re
+
+output = ""
+for line in file("test-vectors-00.txt"):
+    line = line.strip()
+    if line == "" or line[0:1] == "#":
+        continue
+    if line.startswith("Josefsson") or line.startswith("Internet-Draft"):
+        continue
+    output += line.replace("\n", "")
+
+Tests = [ ]
+
+def get_byte(v):
+    if len(v) == 1:
+        return ord(v)
+    return int(v[2:4], 16)
+
+def get_string(value):
+    value = value.strip()
+    if value[0] == '"' and value[-1] == '"':
+        return map(get_byte,  re.findall("(\\\\x[0-9a-fA-F]{2}|.)", value[1:-1].replace('""', '')))
+    if value.lower() == "null":
+        return None
+    raise Exception("unhandled")
+
+Tests = [ ]
+
+matches = re.findall("({(?:.|\n)*?})", output)
+for m in matches:
+    comps = m[1:-1].split(",")
+    test = dict(
+        comment = comps[0].strip()[1:-1],
+        input = get_string(comps[1]),
+        output = get_string(comps[2])
+    )
+    if len(comps) >= 4:
+        test["profile"] = get_string(comps[3])
+    if len(comps) >= 5:
+        test["flags"] = comps[4].strip()
+    if len(comps) >= 6:
+        test["rc"] = comps[5].strip()
+    Tests.append(test)
+
+print json.dumps(Tests)
--- a/packages/testcases/input/nameprep/generate-b2.py
+++ b/packages/testcases/input/nameprep/generate-b2.py
@ -149,17 +149,28 @@ for delta in deltas:
        mappings.append(data)
        debug[data["l"]] = "MAP:" + str(data)

+# Create complex table (things that map to more than one byte)
 complex = { }
+complex_output = [ ];
 for (src, dst, reason) in weird:
    for word in dst.split(" "):
+        complex_output.append(int(word, 16))
        if len(word) != 4: raise Exception("hmmm")
    complex[int(src, 16)] = dst.replace(" ", "")
+
+# Experimenting: We can easily create a LUT for the individual
+# components, as there is substantial overlap.
+#complex_output = dict((x, True) for x in complex_output).keys()
+#complex_output.sort()
+#print "COM", complex_output, len(complex_output)
+
+# Sort mappings by lo
 mappings.sort(lambda a, b: cmp(a["l"], b["l"]))

-debug_keys = debug.keys()
-debug_keys.sort()
-for d in debug_keys:
-   print d, debug[d]
+#debug_keys = debug.keys()
+#debug_keys.sort()
+#for d in debug_keys:
+#   print d, debug[d]

 #print mappings

--- a/packages/testcases/input/nameprep/generate-c.py
+++ b/packages/testcases/input/nameprep/generate-c.py
@ -1,3 +1,6 @@
+def hexify(v):
+    return hex(v)[2:]
+
 prohibit = [ ]

 table = None
@ -28,42 +31,27 @@ print prohibit
 prohibit = list(dict([(p, True) for p in prohibit]).keys())
 prohibit.sort()

-prohibit_single = [ ]
-prohibit_range = [ ]
+output = [ dict(lo = prohibit[0], hi = prohibit[0]) ]

-last_range_start = None
-last = 0
-for p in prohibit:
-    if p - 1 == last:
-        if last_range_start is None:
-            last_range_start = last
-            if len(prohibit_single) > 0 and prohibit_single[-1] == last:
-                prohibit_single.pop()
+for p in prohibit[1:]:
+    if p - 1 == output[-1]["hi"]:
+         output[-1]["hi"] = p
    else:
-        if last_range_start is not None:
-            print "Range", last_range_start, last - last_range_start, hex(last_range_start)
-            length = last - last_range_start
-            if length == 1:
-                length = ""
-            else:
-                length = "-" + hex(length)[2:]
-            prohibit_range.append([ last_range_start, length ])
-            last_range_start = None
-        else:
-            print "Single", p, hex(p)
-            prohibit_single.append(p)
-    last = p
+         output.append(dict(lo = p, hi = p))
+
+print output

 last = 0
-for i in xrange(0, len(prohibit_single)):
-    v = prohibit_single[i]
-    prohibit_single[i] -= last
-    last = v
-print 'const Table_C_lut = "' + ",".join(hex(x)[2:] for x in prohibit_single) + '";'
+for r in output:
+    r["h"] = r["hi"] - r["lo"]
+    r["l"] = r["lo"] - last
+    last = r["hi"]
+
+    r["range"] = hexify(r["l"])
+    if r["h"] > 1:
+        r["range"] += "-" + hexify(r["h"])
+    elif r["h"] > 0:
+        r["range"] += "-"
+
+print 'const Table_C_ranges = "' + ",".join(x["range"] for x in output) + '";'

-last = 0
-for item in prohibit_range:
-    v = item[0]
-    item[0] -= last
-    last = v
-print 'const Table_C_ranges = "' + ",".join(("%s%s" % (hex(p[0])[2:], p[1])) for p in prohibit_range) + '";';
--- a/packages/testcases/input/nameprep/rfc/draft-josefsson-idn-test-vectors-00.txt
+++ b/packages/testcases/input/nameprep/rfc/draft-josefsson-idn-test-vectors-00.txt
--- a/packages/testcases/input/nameprep/rfc/rfc3454.txt
+++ b/packages/testcases/input/nameprep/rfc/rfc3454.txt
--- a/packages/testcases/input/nameprep/rfc/rfc3491.txt
+++ b/packages/testcases/input/nameprep/rfc/rfc3491.txt
@ -0,0 +1,395 @@
+
+
+
+
+
+
+Network Working Group                                         P. Hoffman
+Request for Comments: 3491                                    IMC & VPNC
+Category: Standards Track                                    M. Blanchet
+                                                                Viagenie
+                                                              March 2003
+
+
+                   Nameprep: A Stringprep Profile for
+                  Internationalized Domain Names (IDN)
+
+Status of this Memo
+
+   This document specifies an Internet standards track protocol for the
+   Internet community, and requests discussion and suggestions for
+   improvements.  Please refer to the current edition of the "Internet
+   Official Protocol Standards" (STD 1) for the standardization state
+   and status of this protocol.  Distribution of this memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2003).  All Rights Reserved.
+
+Abstract
+
+   This document describes how to prepare internationalized domain name
+   (IDN) labels in order to increase the likelihood that name input and
+   name comparison work in ways that make sense for typical users
+   throughout the world.  This profile of the stringprep protocol is
+   used as part of a suite of on-the-wire protocols for
+   internationalizing the Domain Name System (DNS).
+
+1. Introduction
+
+   This document specifies processing rules that will allow users to
+   enter internationalized domain names (IDNs) into applications and
+   have the highest chance of getting the content of the strings
+   correct.  It is a profile of stringprep [STRINGPREP].  These
+   processing rules are only intended for internationalized domain
+   names, not for arbitrary text.
+
+   This profile defines the following, as required by [STRINGPREP].
+
+   -  The intended applicability of the profile: internationalized
+      domain names processed by IDNA.
+
+   -  The character repertoire that is the input and output to
+      stringprep:  Unicode 3.2, specified in section 2.
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 1]
+
+RFC 3491                      IDN Nameprep                    March 2003
+
+
+   -  The mappings used: specified in section 3.
+
+   -  The Unicode normalization used: specified in section 4.
+
+   -  The characters that are prohibited as output: specified in section
+      5.
+
+   -  Bidirectional character handling: specified in section 6.
+
+1.1 Interaction of protocol parts
+
+   Nameprep is used by the IDNA [IDNA] protocol for preparing domain
+   names; it is not designed for any other purpose.  It is explicitly
+   not designed for processing arbitrary free text and SHOULD NOT be
+   used for that purpose.  Nameprep is a profile of Stringprep
+   [STRINGPREP].  Implementations of Nameprep MUST fully implement
+   Stringprep.
+
+   Nameprep is used to process domain name labels, not domain names.
+   IDNA calls nameprep for each label in a domain name, not for the
+   whole domain name.
+
+1.2 Terminology
+
+   The key words "MUST", "MUST NOT", "SHOULD", "SHOULD NOT", and "MAY"
+   in this document are to be interpreted as described in BCP 14, RFC
+   2119 [RFC2119].
+
+2. Character Repertoire
+
+   This profile uses Unicode 3.2, as defined in [STRINGPREP] Appendix A.
+
+3. Mapping
+
+   This profile specifies mapping using the following tables from
+   [STRINGPREP]:
+
+   Table B.1
+   Table B.2
+
+4. Normalization
+
+   This profile specifies using Unicode normalization form KC, as
+   described in [STRINGPREP].
+
+
+
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 2]
+
+RFC 3491                      IDN Nameprep                    March 2003
+
+
+5. Prohibited Output
+
+   This profile specifies prohibiting using the following tables from
+   [STRINGPREP]:
+
+   Table C.1.2
+   Table C.2.2
+   Table C.3
+   Table C.4
+   Table C.5
+   Table C.6
+   Table C.7
+   Table C.8
+   Table C.9
+
+   IMPORTANT NOTE: This profile MUST be used with the IDNA protocol.
+   The IDNA protocol has additional prohibitions that are checked
+   outside of this profile.
+
+6. Bidirectional characters
+
+   This profile specifies checking bidirectional strings as described in
+   [STRINGPREP] section 6.
+
+7. Unassigned Code Points in Internationalized Domain Names
+
+   If the processing in [IDNA] specifies that a list of unassigned code
+   points be used, the system uses table A.1 from [STRINGPREP] as its
+   list of unassigned code points.
+
+8. References
+
+8.1 Normative References
+
+   [RFC2119]    Bradner, S., "Key words for use in RFCs to Indicate
+                Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [STRINGPREP] Hoffman, P. and M. Blanchet, "Preparation of
+                Internationalized Strings ("stringprep")", RFC 3454,
+                December 2002.
+
+   [IDNA]       Faltstrom, P., Hoffman, P. and A. Costello,
+                "Internationalizing Domain Names in Applications
+                (IDNA)", RFC 3490, March 2003.
+
+
+
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 3]
+
+RFC 3491                      IDN Nameprep                    March 2003
+
+
+8.2 Informative references
+
+   [STD13]      Mockapetris, P., "Domain names - concepts and
+                facilities", STD 13, RFC 1034, and "Domain names -
+                implementation and specification", STD 13, RFC 1035,
+                November 1987.
+
+9. Security Considerations
+
+   The Unicode and ISO/IEC 10646 repertoires have many characters that
+   look similar.  In many cases, users of security protocols might do
+   visual matching, such as when comparing the names of trusted third
+   parties.  Because it is impossible to map similar-looking characters
+   without a great deal of context such as knowing the fonts used,
+   stringprep does nothing to map similar-looking characters together
+   nor to prohibit some characters because they look like others.
+
+   Security on the Internet partly relies on the DNS.  Thus, any change
+   to the characteristics of the DNS can change the security of much of
+   the Internet.
+
+   Domain names are used by users to connect to Internet servers.  The
+   security of the Internet would be compromised if a user entering a
+   single internationalized name could be connected to different servers
+   based on different interpretations of the internationalized domain
+   name.
+
+   Current applications might assume that the characters allowed in
+   domain names will always be the same as they are in [STD13].  This
+   document vastly increases the number of characters available in
+   domain names.  Every program that uses "special" characters in
+   conjunction with domain names may be vulnerable to attack based on
+   the new characters allowed by this specification.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 4]
+
+RFC 3491                      IDN Nameprep                    March 2003
+
+
+10. IANA Considerations
+
+   This is a profile of stringprep.  It has been registered by the IANA
+   in the stringprep profile registry
+   (www.iana.org/assignments/stringprep-profiles).
+
+      Name of this profile:
+         Nameprep
+
+      RFC in which the profile is defined:
+         This document.
+
+      Indicator whether or not this is the newest version of the
+      profile:
+         This is the first version of Nameprep.
+
+11. Acknowledgements
+
+   Many people from the IETF IDN Working Group and the Unicode Technical
+   Committee contributed ideas that went into this document.
+
+   The IDN Nameprep design team made many useful changes to the
+   document.  That team and its advisors include:
+
+      Asmus Freytag
+      Cathy Wissink
+      Francois Yergeau
+      James Seng
+      Marc Blanchet
+      Mark Davis
+      Martin Duerst
+      Patrik Faltstrom
+      Paul Hoffman
+
+   Additional significant improvements were proposed by:
+
+      Jonathan Rosenne
+      Kent Karlsson
+      Scott Hollenbeck
+      Dave Crocker
+      Erik Nordmark
+      Matitiahu Allouche
+
+
+
+
+
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 5]
+
+RFC 3491                      IDN Nameprep                    March 2003
+
+
+12. Authors' Addresses
+
+   Paul Hoffman
+   Internet Mail Consortium and VPN Consortium
+   127 Segre Place
+   Santa Cruz, CA  95060 USA
+
+   EMail: paul.hoffman@imc.org and paul.hoffman@vpnc.org
+
+
+   Marc Blanchet
+   Viagenie inc.
+   2875 boul. Laurier, bur. 300
+   Ste-Foy, Quebec, Canada, G1V 2M2
+
+   EMail: Marc.Blanchet@viagenie.qc.ca
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 6]
+
+RFC 3491                      IDN Nameprep                    March 2003
+
+
+13.  Full Copyright Statement
+
+   Copyright (C) The Internet Society (2003).  All Rights Reserved.
+
+   This document and translations of it may be copied and furnished to
+   others, and derivative works that comment on or otherwise explain it
+   or assist in its implementation may be prepared, copied, published
+   and distributed, in whole or in part, without restriction of any
+   kind, provided that the above copyright notice and this paragraph are
+   included on all such copies and derivative works.  However, this
+   document itself may not be modified in any way, such as by removing
+   the copyright notice or references to the Internet Society or other
+   Internet organizations, except as needed for the purpose of
+   developing Internet standards in which case the procedures for
+   copyrights defined in the Internet Standards process must be
+   followed, or as required to translate it into languages other than
+   English.
+
+   The limited permissions granted above are perpetual and will not be
+   revoked by the Internet Society or its successors or assigns.
+
+   This document and the information contained herein is provided on an
+   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is currently provided by the
+   Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hoffman & Blanchet          Standards Track                     [Page 7]
+
--- a/packages/testcases/input/nameprep/rfc/rfc5890.txt
+++ b/packages/testcases/input/nameprep/rfc/rfc5890.txt
--- a/packages/testcases/input/nameprep/rfc/rfc5891.txt
+++ b/packages/testcases/input/nameprep/rfc/rfc5891.txt
@ -0,0 +1,955 @@
+
+
+
+
+
+
+Internet Engineering Task Force (IETF)                        J. Klensin
+Request for Comments: 5891                                   August 2010
+Obsoletes: 3490, 3491
+Updates: 3492
+Category: Standards Track
+ISSN: 2070-1721
+
+
+    Internationalized Domain Names in Applications (IDNA): Protocol
+
+Abstract
+
+   This document is the revised protocol definition for
+   Internationalized Domain Names (IDNs).  The rationale for changes,
+   the relationship to the older specification, and important
+   terminology are provided in other documents.  This document specifies
+   the protocol mechanism, called Internationalized Domain Names in
+   Applications (IDNA), for registering and looking up IDNs in a way
+   that does not require changes to the DNS itself.  IDNA is only meant
+   for processing domain names, not free text.
+
+Status of This Memo
+
+   This is an Internet Standards Track document.
+
+   This document is a product of the Internet Engineering Task Force
+   (IETF).  It represents the consensus of the IETF community.  It has
+   received public review and has been approved for publication by the
+   Internet Engineering Steering Group (IESG).  Further information on
+   Internet Standards is available in Section 2 of RFC 5741.
+
+   Information about the current status of this document, any errata,
+   and how to provide feedback on it may be obtained at
+   http://www.rfc-editor.org/info/rfc5891.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Klensin                      Standards Track                    [Page 1]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+Copyright Notice
+
+   Copyright (c) 2010 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+
+   This document is subject to BCP 78 and the IETF Trust's Legal
+   Provisions Relating to IETF Documents
+   (http://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the Simplified BSD License.
+
+   This document may contain material from IETF Documents or IETF
+   Contributions published or made publicly available before November
+   10, 2008.  The person(s) controlling the copyright in some of this
+   material may not have granted the IETF Trust the right to allow
+   modifications of such material outside the IETF Standards Process.
+   Without obtaining an adequate license from the person(s) controlling
+   the copyright in such materials, this document may not be modified
+   outside the IETF Standards Process, and derivative works of it may
+   not be created outside the IETF Standards Process, except to format
+   it for publication as an RFC or to translate it into languages other
+   than English.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Klensin                      Standards Track                    [Page 2]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+Table of Contents
+
+   1.  Introduction . . . . . . . . . . . . . . . . . . . . . . . . .  4
+   2.  Terminology  . . . . . . . . . . . . . . . . . . . . . . . . .  4
+   3.  Requirements and Applicability . . . . . . . . . . . . . . . .  5
+     3.1.  Requirements . . . . . . . . . . . . . . . . . . . . . . .  5
+     3.2.  Applicability  . . . . . . . . . . . . . . . . . . . . . .  5
+       3.2.1.  DNS Resource Records . . . . . . . . . . . . . . . . .  6
+       3.2.2.  Non-Domain-Name Data Types Stored in the DNS . . . . .  6
+   4.  Registration Protocol  . . . . . . . . . . . . . . . . . . . .  6
+     4.1.  Input to IDNA Registration . . . . . . . . . . . . . . . .  7
+     4.2.  Permitted Character and Label Validation . . . . . . . . .  7
+       4.2.1.  Input Format . . . . . . . . . . . . . . . . . . . . .  7
+       4.2.2.  Rejection of Characters That Are Not Permitted . . . .  8
+       4.2.3.  Label Validation . . . . . . . . . . . . . . . . . . .  8
+       4.2.4.  Registration Validation Requirements . . . . . . . . .  9
+     4.3.  Registry Restrictions  . . . . . . . . . . . . . . . . . .  9
+     4.4.  Punycode Conversion  . . . . . . . . . . . . . . . . . . .  9
+     4.5.  Insertion in the Zone  . . . . . . . . . . . . . . . . . . 10
+   5.  Domain Name Lookup Protocol  . . . . . . . . . . . . . . . . . 10
+     5.1.  Label String Input . . . . . . . . . . . . . . . . . . . . 10
+     5.2.  Conversion to Unicode  . . . . . . . . . . . . . . . . . . 10
+     5.3.  A-label Input  . . . . . . . . . . . . . . . . . . . . . . 10
+     5.4.  Validation and Character List Testing  . . . . . . . . . . 11
+     5.5.  Punycode Conversion  . . . . . . . . . . . . . . . . . . . 13
+     5.6.  DNS Name Resolution  . . . . . . . . . . . . . . . . . . . 13
+   6.  Security Considerations  . . . . . . . . . . . . . . . . . . . 13
+   7.  IANA Considerations  . . . . . . . . . . . . . . . . . . . . . 13
+   8.  Contributors . . . . . . . . . . . . . . . . . . . . . . . . . 13
+   9.  Acknowledgments  . . . . . . . . . . . . . . . . . . . . . . . 14
+   10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+     10.1. Normative References . . . . . . . . . . . . . . . . . . . 14
+     10.2. Informative References . . . . . . . . . . . . . . . . . . 15
+   Appendix A.  Summary of Major Changes from IDNA2003  . . . . . . . 17
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Klensin                      Standards Track                    [Page 3]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+1.  Introduction
+
+   This document supplies the protocol definition for Internationalized
+   Domain Names in Applications (IDNA), with the version specified here
+   known as IDNA2008.  Essential definitions and terminology for
+   understanding this document and a road map of the collection of
+   documents that make up IDNA2008 appear in a separate Definitions
+   document [RFC5890].  Appendix A discusses the relationship between
+   this specification and the earlier version of IDNA (referred to here
+   as "IDNA2003").  The rationale for these changes, along with
+   considerable explanatory material and advice to zone administrators
+   who support IDNs, is provided in another document, known informally
+   in this series as the "Rationale document" [RFC5894].
+
+   IDNA works by allowing applications to use certain ASCII [ASCII]
+   string labels (beginning with a special prefix) to represent
+   non-ASCII name labels.  Lower-layer protocols need not be aware of
+   this; therefore, IDNA does not change any infrastructure.  In
+   particular, IDNA does not depend on any changes to DNS servers,
+   resolvers, or DNS protocol elements, because the ASCII name service
+   provided by the existing DNS can be used for IDNA.
+
+   IDNA applies only to a specific subset of DNS labels.  The base DNS
+   standards [RFC1034] [RFC1035] and their various updates specify how
+   to combine labels into fully-qualified domain names and parse labels
+   out of those names.
+
+   This document describes two separate protocols, one for IDN
+   registration (Section 4) and one for IDN lookup (Section 5).  These
+   two protocols share some terminology, reference data, and operations.
+
+2.  Terminology
+
+   As mentioned above, terminology used as part of the definition of
+   IDNA appears in the Definitions document [RFC5890].  It is worth
+   noting that some of this terminology overlaps with, and is consistent
+   with, that used in Unicode or other character set standards and the
+   DNS.  Readers of this document are assumed to be familiar with the
+   associated Definitions document and with the DNS-specific terminology
+   in RFC 1034 [RFC1034].
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in BCP 14, RFC 2119
+   [RFC2119].
+
+
+
+
+
+
+Klensin                      Standards Track                    [Page 4]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+3.  Requirements and Applicability
+
+3.1.  Requirements
+
+   IDNA makes the following requirements:
+
+   1.  Whenever a domain name is put into a domain name slot that is not
+       IDNA-aware (see Section 2.3.2.6 of the Definitions document
+       [RFC5890]), it MUST contain only ASCII characters (i.e., its
+       labels must be either A-labels or NR-LDH labels), unless the DNS
+       application is not subject to historical recommendations for
+       "hostname"-style names (see RFC 1034 [RFC1034] and
+       Section 3.2.1).
+
+   2.  Labels MUST be compared using equivalent forms: either both
+       A-label forms or both U-label forms.  Because A-labels and
+       U-labels can be transformed into each other without loss of
+       information, these comparisons are equivalent (however, in
+       practice, comparison of U-labels requires first verifying that
+       they actually are U-labels and not just Unicode strings).  A pair
+       of A-labels MUST be compared as case-insensitive ASCII (as with
+       all comparisons of ASCII DNS labels).  U-labels MUST be compared
+       as-is, without case folding or other intermediate steps.  While
+       it is not necessary to validate labels in order to compare them,
+       successful comparison does not imply validity.  In many cases,
+       not limited to comparison, validation may be important for other
+       reasons and SHOULD be performed.
+
+   3.  Labels being registered MUST conform to the requirements of
+       Section 4.  Labels being looked up and the lookup process MUST
+       conform to the requirements of Section 5.
+
+3.2.  Applicability
+
+   IDNA applies to all domain names in all domain name slots in
+   protocols except where it is explicitly excluded.  It does not apply
+   to domain name slots that do not use the LDH syntax rules as
+   described in the Definitions document [RFC5890].
+
+   Because it uses the DNS, IDNA applies to many protocols that were
+   specified before it was designed.  IDNs occupying domain name slots
+   in those older protocols MUST be in A-label form until and unless
+   those protocols and their implementations are explicitly upgraded to
+   be aware of IDNs and to accept the U-label form.  IDNs actually
+   appearing in DNS queries or responses MUST be A-labels.
+
+
+
+
+
+
+Klensin                      Standards Track                    [Page 5]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   IDNA-aware protocols and implementations MAY accept U-labels,
+   A-labels, or both as those particular protocols specify.  IDNA is not
+   defined for extended label types (see RFC 2671 [RFC2671], Section 3).
+
+3.2.1.  DNS Resource Records
+
+   IDNA applies only to domain names in the NAME and RDATA fields of DNS
+   resource records whose CLASS is IN.  See the DNS specification
+   [RFC1035] for precise definitions of these terms.
+
+   The application of IDNA to DNS resource records depends entirely on
+   the CLASS of the record, and not on the TYPE except as noted below.
+   This will remain true, even as new TYPEs are defined, unless a new
+   TYPE defines TYPE-specific rules.  Special naming conventions for SRV
+   records (and "underscore labels" more generally) are incompatible
+   with IDNA coding as discussed in the Definitions document [RFC5890],
+   especially Section 2.3.2.3.  Of course, underscore labels may be part
+   of a domain that uses IDN labels at higher levels in the tree.
+
+3.2.2.  Non-Domain-Name Data Types Stored in the DNS
+
+   Although IDNA enables the representation of non-ASCII characters in
+   domain names, that does not imply that IDNA enables the
+   representation of non-ASCII characters in other data types that are
+   stored in domain names, specifically in the RDATA field for types
+   that have structured RDATA format.  For example, an email address
+   local part is stored in a domain name in the RNAME field as part of
+   the RDATA of an SOA record (e.g., hostmaster@example.com would be
+   represented as hostmaster.example.com).  IDNA does not update the
+   existing email standards, which allow only ASCII characters in local
+   parts.  Even though work is in progress to define
+   internationalization for email addresses [RFC4952], changes to the
+   email address part of the SOA RDATA would require action in, or
+   updates to, other standards, specifically those that specify the
+   format of the SOA RR.
+
+4.  Registration Protocol
+
+   This section defines the model for registering an IDN.  The model is
+   implementation independent; any sequence of steps that produces
+   exactly the same result for all labels is considered a valid
+   implementation.
+
+   Note that, while the registration (this section) and lookup protocols
+   (Section 5) are very similar in most respects, they are not
+   identical, and implementers should carefully follow the steps
+   described in this specification.
+
+
+
+
+Klensin                      Standards Track                    [Page 6]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+4.1.  Input to IDNA Registration
+
+   Registration processes, especially processing by entities (often
+   called "registrars") who deal with registrants before the request
+   actually reaches the zone manager ("registry") are outside the scope
+   of this definition and may differ significantly depending on local
+   needs.  By the time a string enters the IDNA registration process as
+   described in this specification, it MUST be in Unicode and in
+   Normalization Form C (NFC [Unicode-UAX15]).  Entities responsible for
+   zone files ("registries") MUST accept only the exact string for which
+   registration is requested, free of any mappings or local adjustments.
+   They MAY accept that input in any of three forms:
+
+   1.  As a pair of A-label and U-label.
+
+   2.  As an A-label only.
+
+   3.  As a U-label only.
+
+   The first two of these forms are RECOMMENDED because the use of
+   A-labels avoids any possibility of ambiguity.  The first is normally
+   preferred over the second because it permits further verification of
+   user intent (see Section 4.2.1).
+
+4.2.  Permitted Character and Label Validation
+
+4.2.1.  Input Format
+
+   If both the U-label and A-label forms are available, the registry
+   MUST ensure that the A-label form is in lowercase, perform a
+   conversion to a U-label, perform the steps and tests described below
+   on that U-label, and then verify that the A-label produced by the
+   step in Section 4.4 matches the one provided as input.  In addition,
+   the U-label that was provided as input and the one obtained by
+   conversion of the A-label MUST match exactly.  If, for some reason,
+   these tests fail, the registration MUST be rejected.
+
+   If only an A-label was provided and the conversion to a U-label is
+   not performed, the registry MUST still verify that the A-label is
+   superficially valid, i.e., that it does not violate any of the rules
+   of Punycode encoding [RFC3492] such as the prohibition on trailing
+   hyphen-minus, the requirement that all characters be ASCII, and so
+   on.  Strings that appear to be A-labels (e.g., they start with
+   "xn--") and strings that are supplied to the registry in a context
+   reserved for A-labels (such as a field in a form to be filled out),
+   but that are not valid A-labels as described in this paragraph, MUST
+   NOT be placed in DNS zones that support IDNA.
+
+
+
+
+Klensin                      Standards Track                    [Page 7]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   If only an A-label is provided, the conversion to a U-label is not
+   performed, but the superficial tests described in the previous
+   paragraph are performed, registration procedures MAY, and usually
+   will, bypass the tests and actions in the balance of Section 4.2 and
+   in Sections 4.3 and 4.4.
+
+4.2.2.  Rejection of Characters That Are Not Permitted
+
+   The candidate Unicode string MUST NOT contain characters that appear
+   in the "DISALLOWED" and "UNASSIGNED" lists specified in the Tables
+   document [RFC5892].
+
+4.2.3.  Label Validation
+
+   The proposed label (in the form of a Unicode string, i.e., a string
+   that at least superficially appears to be a U-label) is then examined
+   using tests that require examination of more than one character.
+   Character order is considered to be the on-the-wire order.  That
+   order may not be the same as the display order.
+
+4.2.3.1.  Hyphen Restrictions
+
+   The Unicode string MUST NOT contain "--" (two consecutive hyphens) in
+   the third and fourth character positions and MUST NOT start or end
+   with a "-" (hyphen).
+
+4.2.3.2.  Leading Combining Marks
+
+   The Unicode string MUST NOT begin with a combining mark or combining
+   character (see The Unicode Standard, Section 2.11 [Unicode] for an
+   exact definition).
+
+4.2.3.3.  Contextual Rules
+
+   The Unicode string MUST NOT contain any characters whose validity is
+   context-dependent, unless the validity is positively confirmed by a
+   contextual rule.  To check this, each code point identified as
+   CONTEXTJ or CONTEXTO in the Tables document [RFC5892] MUST have a
+   non-null rule.  If such a code point is missing a rule, the label is
+   invalid.  If the rule exists but the result of applying the rule is
+   negative or inconclusive, the proposed label is invalid.
+
+4.2.3.4.  Labels Containing Characters Written Right to Left
+
+   If the proposed label contains any characters from scripts that are
+   written from right to left, it MUST meet the Bidi criteria [RFC5893].
+
+
+
+
+
+Klensin                      Standards Track                    [Page 8]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+4.2.4.  Registration Validation Requirements
+
+   Strings that contain at least one non-ASCII character, have been
+   produced by the steps above, whose contents pass all of the tests in
+   Section 4.2.3, and are 63 or fewer characters long in
+   ASCII-compatible encoding (ACE) form (see Section 4.4), are U-labels.
+
+   To summarize, tests are made in Section 4.2 for invalid characters,
+   invalid combinations of characters, for labels that are invalid even
+   if the characters they contain are valid individually, and for labels
+   that do not conform to the restrictions for strings containing
+   right-to-left characters.
+
+4.3.  Registry Restrictions
+
+   In addition to the rules and tests above, there are many reasons why
+   a registry could reject a label.  Registries at all levels of the
+   DNS, not just the top level, are expected to establish policies about
+   label registrations.  Policies are likely to be informed by the local
+   languages and the scripts that are used to write them and may depend
+   on many factors including what characters are in the label (for
+   example, a label may be rejected based on other labels already
+   registered).  See the Rationale document [RFC5894], Section 3.2, for
+   further discussion and recommendations about registry policies.
+
+   The string produced by the steps in Section 4.2 is checked and
+   processed as appropriate to local registry restrictions.  Application
+   of those registry restrictions may result in the rejection of some
+   labels or the application of special restrictions to others.
+
+4.4.  Punycode Conversion
+
+   The resulting U-label is converted to an A-label (defined in Section
+   2.3.2.1 of the Definitions document [RFC5890]).  The A-label is the
+   encoding of the U-label according to the Punycode algorithm [RFC3492]
+   with the ACE prefix "xn--" added at the beginning of the string.  The
+   resulting string must, of course, conform to the length limits
+   imposed by the DNS.  This document does not update or alter the
+   Punycode algorithm specified in RFC 3492 in any way.  RFC 3492 does
+   make a non-normative reference to the information about the value and
+   construction of the ACE prefix that appears in RFC 3490 or Nameprep
+   [RFC3491].  For consistency and reader convenience, IDNA2008
+   effectively updates that reference to point to this document.  That
+   change does not alter the prefix itself.  The prefix, "xn--", is the
+   same in both sets of documents.
+
+
+
+
+
+
+Klensin                      Standards Track                    [Page 9]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   With the exception of the maximum string length test on Punycode
+   output, the failure conditions identified in the Punycode encoding
+   procedure cannot occur if the input is a U-label as determined by the
+   steps in Sections 4.1 through 4.3 above.
+
+4.5.  Insertion in the Zone
+
+   The label is registered in the DNS by inserting the A-label into a
+   zone.
+
+5.  Domain Name Lookup Protocol
+
+   Lookup is different from registration and different tests are applied
+   on the client.  Although some validity checks are necessary to avoid
+   serious problems with the protocol, the lookup-side tests are more
+   permissive and rely on the assumption that names that are present in
+   the DNS are valid.  That assumption is, however, a weak one because
+   the presence of wildcards in the DNS might cause a string that is not
+   actually registered in the DNS to be successfully looked up.
+
+5.1.  Label String Input
+
+   The user supplies a string in the local character set, for example,
+   by typing it, clicking on it, or copying and pasting it from a
+   resource identifier, e.g., a Uniform Resource Identifier (URI)
+   [RFC3986] or an Internationalized Resource Identifier (IRI)
+   [RFC3987], from which the domain name is extracted.  Alternately,
+   some process not directly involving the user may read the string from
+   a file or obtain it in some other way.  Processing in this step and
+   the one specified in Section 5.2 are local matters, to be
+   accomplished prior to actual invocation of IDNA.
+
+5.2.  Conversion to Unicode
+
+   The string is converted from the local character set into Unicode, if
+   it is not already in Unicode.  Depending on local needs, this
+   conversion may involve mapping some characters into other characters
+   as well as coding conversions.  Those issues are discussed in the
+   mapping-related sections (Sections 4.2, 4.4, 6, and 7.3) of the
+   Rationale document [RFC5894] and in the separate Mapping document
+   [IDNA2008-Mapping].  The result MUST be a Unicode string in NFC form.
+
+5.3.  A-label Input
+
+   If the input to this procedure appears to be an A-label (i.e., it
+   starts in "xn--", interpreted case-insensitively), the lookup
+   application MAY attempt to convert it to a U-label, first ensuring
+   that the A-label is entirely in lowercase (converting it to lowercase
+
+
+
+Klensin                      Standards Track                   [Page 10]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   if necessary), and apply the tests of Section 5.4 and the conversion
+   of Section 5.5 to that form.  If the label is converted to Unicode
+   (i.e., to U-label form) using the Punycode decoding algorithm, then
+   the processing specified in those two sections MUST be performed, and
+   the label MUST be rejected if the resulting label is not identical to
+   the original.  See Section 8.1 of the Rationale document [RFC5894]
+   for additional discussion on this topic.
+
+   Conversion from the A-label and testing that the result is a U-label
+   SHOULD be performed if the domain name will later be presented to the
+   user in native character form (this requires that the lookup
+   application be IDNA-aware).  If those steps are not performed, the
+   lookup process SHOULD at least test to determine that the string is
+   actually an A-label, examining it for the invalid formats specified
+   in the Punycode decoding specification.  Applications that are not
+   IDNA-aware will obviously omit that testing; others MAY treat the
+   string as opaque to avoid the additional processing at the expense of
+   providing less protection and information to users.
+
+5.4.  Validation and Character List Testing
+
+   As with the registration procedure described in Section 4, the
+   Unicode string is checked to verify that all characters that appear
+   in it are valid as input to IDNA lookup processing.  As discussed
+   above and in the Rationale document [RFC5894], the lookup check is
+   more liberal than the registration one.  Labels that have not been
+   fully evaluated for conformance to the applicable rules are referred
+   to as "putative" labels as discussed in Section 2.3.2.1 of the
+   Definitions document [RFC5890].  Putative U-labels with any of the
+   following characteristics MUST be rejected prior to DNS lookup:
+
+   o  Labels that are not in NFC [Unicode-UAX15].
+
+   o  Labels containing "--" (two consecutive hyphens) in the third and
+      fourth character positions.
+
+   o  Labels whose first character is a combining mark (see The Unicode
+      Standard, Section 2.11 [Unicode]).
+
+   o  Labels containing prohibited code points, i.e., those that are
+      assigned to the "DISALLOWED" category of the Tables document
+      [RFC5892].
+
+   o  Labels containing code points that are identified in the Tables
+      document as "CONTEXTJ", i.e., requiring exceptional contextual
+      rule processing on lookup, but that do not conform to those rules.
+      Note that this implies that a rule must be defined, not null: a
+
+
+
+
+Klensin                      Standards Track                   [Page 11]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+      character that requires a contextual rule but for which the rule
+      is null is treated in this step as having failed to conform to the
+      rule.
+
+   o  Labels containing code points that are identified in the Tables
+      document as "CONTEXTO", but for which no such rule appears in the
+      table of rules.  Applications resolving DNS names or carrying out
+      equivalent operations are not required to test contextual rules
+      for "CONTEXTO" characters, only to verify that a rule is defined
+      (although they MAY make such tests to provide better protection or
+      give better information to the user).
+
+   o  Labels containing code points that are unassigned in the version
+      of Unicode being used by the application, i.e., in the UNASSIGNED
+      category of the Tables document.
+
+      This requirement means that the application must use a list of
+      unassigned characters that is matched to the version of Unicode
+      that is being used for the other requirements in this section.  It
+      is not required that the application know which version of Unicode
+      is being used; that information might be part of the operating
+      environment in which the application is running.
+
+   In addition, the application SHOULD apply the following test.
+
+   o  Verification that the string is compliant with the requirements
+      for right-to-left characters specified in the Bidi document
+      [RFC5893].
+
+   This test may be omitted in special circumstances, such as when the
+   lookup application knows that the conditions are enforced elsewhere,
+   because an attempt to look up and resolve such strings will almost
+   certainly lead to a DNS lookup failure except when wildcards are
+   present in the zone.  However, applying the test is likely to give
+   much better information about the reason for a lookup failure --
+   information that may be usefully passed to the user when that is
+   feasible -- than DNS resolution failure information alone.
+
+   For all other strings, the lookup application MUST rely on the
+   presence or absence of labels in the DNS to determine the validity of
+   those labels and the validity of the characters they contain.  If
+   they are registered, they are presumed to be valid; if they are not,
+   their possible validity is not relevant.  While a lookup application
+   may reasonably issue warnings about strings it believes may be
+   problematic, applications that decline to process a string that
+   conforms to the rules above (i.e., does not look it up in the DNS)
+   are not in conformance with this protocol.
+
+
+
+
+Klensin                      Standards Track                   [Page 12]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+5.5.  Punycode Conversion
+
+   The string that has now been validated for lookup is converted to ACE
+   form by applying the Punycode algorithm to the string and then adding
+   the ACE prefix ("xn--").
+
+5.6.  DNS Name Resolution
+
+   The A-label resulting from the conversion in Section 5.5 or supplied
+   directly (see Section 5.3) is combined with other labels as needed to
+   form a fully-qualified domain name that is then looked up in the DNS,
+   using normal DNS resolver procedures.  The lookup can obviously
+   either succeed (returning information) or fail.
+
+6.  Security Considerations
+
+   Security Considerations for this version of IDNA are described in the
+   Definitions document [RFC5890], except for the special issues
+   associated with right-to-left scripts and characters.  The latter are
+   discussed in the Bidi document [RFC5893].
+
+   In order to avoid intentional or accidental attacks from labels that
+   might be confused with others, special problems in rendering, and so
+   on, the IDNA model requires that registries exercise care and
+   thoughtfulness about what labels they choose to permit.  That issue
+   is discussed in Section 4.3 of this document which, in turn, points
+   to a somewhat more extensive discussion in the Rationale document
+   [RFC5894].
+
+7.  IANA Considerations
+
+   IANA actions for this version of IDNA are specified in the Tables
+   document [RFC5892] and discussed informally in the Rationale document
+   [RFC5894].  The components of IDNA described in this document do not
+   require any IANA actions.
+
+8.  Contributors
+
+   While the listed editor held the pen, the original versions of this
+   document represent the joint work and conclusions of an ad hoc design
+   team consisting of the editor and, in alphabetic order, Harald
+   Alvestrand, Tina Dam, Patrik Faltstrom, and Cary Karp.  This document
+   draws significantly on the original version of IDNA [RFC3490] both
+   conceptually and for specific text.  This second-generation version
+   would not have been possible without the work that went into that
+   first version and especially the contributions of its authors Patrik
+   Faltstrom, Paul Hoffman, and Adam Costello.  While Faltstrom was
+
+
+
+
+Klensin                      Standards Track                   [Page 13]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   actively involved in the creation of this version, Hoffman and
+   Costello were not and should not be held responsible for any errors
+   or omissions.
+
+9.  Acknowledgments
+
+   This revision to IDNA would have been impossible without the
+   accumulated experience since RFC 3490 was published and resulting
+   comments and complaints of many people in the IETF, ICANN, and other
+   communities (too many people to list here).  Nor would it have been
+   possible without RFC 3490 itself and the efforts of the Working Group
+   that defined it.  Those people whose contributions are acknowledged
+   in RFC 3490, RFC 4690 [RFC4690], and the Rationale document [RFC5894]
+   were particularly important.
+
+   Specific textual changes were incorporated into this document after
+   suggestions from the other contributors, Stephane Bortzmeyer, Vint
+   Cerf, Lisa Dusseault, Paul Hoffman, Kent Karlsson, James Mitchell,
+   Erik van der Poel, Marcos Sanz, Andrew Sullivan, Wil Tan, Ken
+   Whistler, Chris Wright, and other WG participants and reviewers
+   including Martin Duerst, James Mitchell, Subramanian Moonesamy, Peter
+   Saint-Andre, Margaret Wasserman, and Dan Winship who caught specific
+   errors and recommended corrections.  Special thanks are due to Paul
+   Hoffman for permission to extract material to form the basis for
+   Appendix A from a draft document that he prepared.
+
+10.  References
+
+10.1.  Normative References
+
+   [RFC1034]    Mockapetris, P., "Domain names - concepts and
+                facilities", STD 13, RFC 1034, November 1987.
+
+   [RFC1035]    Mockapetris, P., "Domain names - implementation and
+                specification", STD 13, RFC 1035, November 1987.
+
+   [RFC2119]    Bradner, S., "Key words for use in RFCs to Indicate
+                Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC3492]    Costello, A., "Punycode: A Bootstring encoding of
+                Unicode for Internationalized Domain Names in
+                Applications (IDNA)", RFC 3492, March 2003.
+
+   [RFC5890]    Klensin, J., "Internationalized Domain Names for
+                Applications (IDNA): Definitions and Document
+                Framework", RFC 5890, August 2010.
+
+
+
+
+
+Klensin                      Standards Track                   [Page 14]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   [RFC5892]    Faltstrom, P., Ed., "The Unicode Code Points and
+                Internationalized Domain Names for Applications (IDNA)",
+                RFC 5892, August 2010.
+
+   [RFC5893]    Alvestrand, H., Ed. and C. Karp, "Right-to-Left Scripts
+                for Internationalized Domain Names for Applications
+                (IDNA)", RFC 5893, August 2010.
+
+   [Unicode-UAX15]
+                The Unicode Consortium, "Unicode Standard Annex #15:
+                Unicode Normalization Forms", September 2009,
+                <http://www.unicode.org/reports/tr15/>.
+
+10.2.  Informative References
+
+   [ASCII]      American National Standards Institute (formerly United
+                States of America Standards Institute), "USA Code for
+                Information Interchange", ANSI X3.4-1968, 1968.  ANSI
+                X3.4-1968 has been replaced by newer versions with
+                slight modifications, but the 1968 version remains
+                definitive for the Internet.
+
+   [IDNA2008-Mapping]
+                Resnick, P. and P. Hoffman, "Mapping Characters in
+                Internationalized Domain Names for Applications (IDNA)",
+                Work in Progress, April 2010.
+
+   [RFC2671]    Vixie, P., "Extension Mechanisms for DNS (EDNS0)",
+                RFC 2671, August 1999.
+
+   [RFC3490]    Faltstrom, P., Hoffman, P., and A. Costello,
+                "Internationalizing Domain Names in Applications
+                (IDNA)", RFC 3490, March 2003.
+
+   [RFC3491]    Hoffman, P. and M. Blanchet, "Nameprep: A Stringprep
+                Profile for Internationalized Domain Names (IDN)",
+                RFC 3491, March 2003.
+
+   [RFC3986]    Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform
+                Resource Identifier (URI): Generic Syntax", STD 66,
+                RFC 3986, January 2005.
+
+   [RFC3987]    Duerst, M. and M. Suignard, "Internationalized Resource
+                Identifiers (IRIs)", RFC 3987, January 2005.
+
+   [RFC4690]    Klensin, J., Faltstrom, P., Karp, C., and IAB, "Review
+                and Recommendations for Internationalized Domain Names
+                (IDNs)", RFC 4690, September 2006.
+
+
+
+Klensin                      Standards Track                   [Page 15]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+   [RFC4952]    Klensin, J. and Y. Ko, "Overview and Framework for
+                Internationalized Email", RFC 4952, July 2007.
+
+   [RFC5894]    Klensin, J., "Internationalized Domain Names for
+                Applications (IDNA): Background, Explanation, and
+                Rationale", RFC 5894, August 2010.
+
+   [Unicode]    The Unicode Consortium, "The Unicode Standard, Version
+                5.0", 2007.  Boston, MA, USA: Addison-Wesley.  ISBN
+                0-321-48091-0.  This printed reference has now been
+                updated online to reflect additional code points.  For
+                code points, the reference at the time this document was
+                published is to Unicode 5.2.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Klensin                      Standards Track                   [Page 16]
+
+RFC 5891                    IDNA2008 Protocol                August 2010
+
+
+Appendix A.  Summary of Major Changes from IDNA2003
+
+   1.   Update base character set from Unicode 3.2 to Unicode version
+        agnostic.
+
+   2.   Separate the definitions for the "registration" and "lookup"
+        activities.
+
+   3.   Disallow symbol and punctuation characters except where special
+        exceptions are necessary.
+
+   4.   Remove the mapping and normalization steps from the protocol and
+        have them, instead, done by the applications themselves,
+        possibly in a local fashion, before invoking the protocol.
+
+   5.   Change the way that the protocol specifies which characters are
+        allowed in labels from "humans decide what the table of code
+        points contains" to "decision about code points are based on
+        Unicode properties plus a small exclusion list created by
+        humans".
+
+   6.   Introduce the new concept of characters that can be used only in
+        specific contexts.
+
+   7.   Allow typical words and names in languages such as Dhivehi and
+        Yiddish to be expressed.
+
+   8.   Make bidirectional domain names (delimited strings of labels,
+        not just labels standing on their own) display in a less
+        surprising fashion, whether they appear in obvious domain name
+        contexts or as part of running text in paragraphs.
+
+   9.   Remove the dot separator from the mandatory part of the
+        protocol.
+
+   10.  Make some currently valid labels that are not actually IDNA
+        labels invalid.
+
+Author's Address
+
+   John C Klensin
+   1770 Massachusetts Ave, Ste 322
+   Cambridge, MA  02140
+   USA
+
+   Phone: +1 617 245 1457
+   EMail: john+ietf@jck.com
+
+
+
+
+Klensin                      Standards Track                   [Page 17]
+
--- a/packages/testcases/input/nameprep/test-vectors-00.txt
+++ b/packages/testcases/input/nameprep/test-vectors-00.txt
@ -0,0 +1,269 @@
+
+#   struct stringprep
+#   {
+#     char *comment;
+#     char *in;
+#     char *out;
+#     char *profile;
+#     int flags;
+#     int rc;
+#   }
+     {
+       "Map to nothing",
+       "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B"
+       "bar""\xE2\x80\x8B\xE2\x81\xA0""baz\xEF\xB8\x80\xEF\xB8\x88"
+       "\xEF\xB8\x8F\xEF\xBB\xBF", "foobarbaz"
+     },
+     {
+       "Case folding ASCII U+0043 U+0041 U+0046 U+0045",
+       "CAFE", "cafe"
+
+
+
+Josefsson                Expires August 2, 2003                [Page 31]
+
+Internet-Draft       Nameprep and IDNA Test Vectors        February 2003
+
+
+     },
+     {
+       "Case folding 8bit U+00DF (german sharp s)",
+# There is a bug here: RicMoo
+#              VV
+#       "\xC3\xDF", "ss"
+       "\xC3\x9f", "ss"
+     },
+     {
+       "Case folding U+0130 (turkish capital I with dot)",
+       "\xC4\xB0", "i\xcc\x87"
+     },
+     {
+       "Case folding multibyte U+0143 U+037A",
+       "\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9"
+     },
+     {
+       "Case folding U+2121 U+33C6 U+1D7BB",
+       "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB",
+       "telc\xE2\x88\x95""kg\xCF\x83"
+     },
+     {
+       "Normalization of U+006a U+030c U+00A0 U+00AA",
+       "\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a"
+     },
+     {
+       "Case folding U+1FB7 and normalization",
+       "\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9"
+     },
+     {
+       "Self-reverting case folding U+01F0 and normalization",
+# There is a bug here: RicMoo
+#              VV
+#       "\xC7\xF0", "\xC7\xB0"
+       "\xC7\xb0", "\xC7\xB0"
+     },
+     {
+       "Self-reverting case folding U+0390 and normalization",
+       "\xCE\x90", "\xCE\x90"
+     },
+     {
+       "Self-reverting case folding U+03B0 and normalization",
+       "\xCE\xB0", "\xCE\xB0"
+     },
+     {
+       "Self-reverting case folding U+1E96 and normalization",
+       "\xE1\xBA\x96", "\xE1\xBA\x96"
+     },
+     {
+       "Self-reverting case folding U+1F56 and normalization",
+       "\xE1\xBD\x96", "\xE1\xBD\x96"
+     },
+     {
+       "ASCII space character U+0020",
+
+
+
+Josefsson                Expires August 2, 2003                [Page 32]
+
+Internet-Draft       Nameprep and IDNA Test Vectors        February 2003
+
+
+       "\x20", "\x20"
+     },
+     {
+       "Non-ASCII 8bit space character U+00A0",
+       "\xC2\xA0", "\x20"
+     },
+     {
+       "Non-ASCII multibyte space character U+1680",
+       "\xE1\x9A\x80", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Non-ASCII multibyte space character U+2000",
+       "\xE2\x80\x80", "\x20"
+     },
+     {
+       "Zero Width Space U+200b",
+       "\xE2\x80\x8b", ""
+     },
+     {
+       "Non-ASCII multibyte space character U+3000",
+       "\xE3\x80\x80", "\x20"
+     },
+     {
+       "ASCII control characters U+0010 U+007F",
+       "\x10\x7F", "\x10\x7F"
+     },
+     {
+       "Non-ASCII 8bit control character U+0085",
+       "\xC2\x85", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Non-ASCII multibyte control character U+180E",
+       "\xE1\xA0\x8E", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Zero Width No-Break Space U+FEFF",
+       "\xEF\xBB\xBF", ""
+     },
+     {
+       "Non-ASCII control character U+1D175",
+       "\xF0\x9D\x85\xB5", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Plane 0 private use character U+F123",
+
+
+
+Josefsson                Expires August 2, 2003                [Page 33]
+
+Internet-Draft       Nameprep and IDNA Test Vectors        February 2003
+
+
+       "\xEF\x84\xA3", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Plane 15 private use character U+F1234",
+       "\xF3\xB1\x88\xB4", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Plane 16 private use character U+10F234",
+       "\xF4\x8F\x88\xB4", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Non-character code point U+8FFFE",
+       "\xF2\x8F\xBF\xBE", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Non-character code point U+10FFFF",
+       "\xF4\x8F\xBF\xBF", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Surrogate code U+DF42",
+       "\xED\xBD\x82", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Non-plain text character U+FFFD",
+       "\xEF\xBF\xBD", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Ideographic description character U+2FF5",
+       "\xE2\xBF\xB5", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Display property character U+0341",
+       "\xCD\x81", "\xCC\x81"
+     },
+     {
+       "Left-to-right mark U+200E",
+       "\xE2\x80\x8E", "\xCC\x81", "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+
+
+
+Josefsson                Expires August 2, 2003                [Page 34]
+
+Internet-Draft       Nameprep and IDNA Test Vectors        February 2003
+
+
+       "Deprecated U+202A",
+       "\xE2\x80\xAA", "\xCC\x81", "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Language tagging character U+E0001",
+       "\xF3\xA0\x80\x81", "\xCC\x81", "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Language tagging character U+E0042",
+       "\xF3\xA0\x81\x82", NULL, "Nameprep", 0,
+       STRINGPREP_CONTAINS_PROHIBITED
+     },
+     {
+       "Bidi: RandALCat character U+05BE and LCat characters",
+       "foo\xD6\xBE""bar", NULL, "Nameprep", 0,
+       STRINGPREP_BIDI_BOTH_L_AND_RAL
+     },
+     {
+       "Bidi: RandALCat character U+FD50 and LCat characters",
+       "foo\xEF\xB5\x90""bar", NULL, "Nameprep", 0,
+       STRINGPREP_BIDI_BOTH_L_AND_RAL
+     },
+     {
+       "Bidi: RandALCat character U+FB38 and LCat characters",
+       "foo\xEF\xB9\xB6""bar", "foo \xd9\x8e""bar"
+     },
+     { "Bidi: RandALCat without trailing RandALCat U+0627 U+0031",
+       "\xD8\xA7\x31", NULL, "Nameprep", 0,
+       STRINGPREP_BIDI_LEADTRAIL_NOT_RAL}
+     ,
+     {
+       "Bidi: RandALCat character U+0627 U+0031 U+0628",
+       "\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8"
+     },
+     {
+       "Unassigned code point U+E0002",
+       "\xF3\xA0\x80\x82", NULL, "Nameprep", STRINGPREP_NO_UNASSIGNED,
+       STRINGPREP_CONTAINS_UNASSIGNED
+     },
+     {
+       "Larger test (shrinking)",
+# There is a bug here: RicMoo
+#                       VV
+#       "X\xC2\xAD\xC3\xDF\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2"
+       "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2"
+       "\xaa\xce\xb0\xe2\x80\x80", "xssi\xcc\x87""tel\xc7\xb0 a\xce\xb0 ",
+       "Nameprep"
+     },
+     {
+
+
+
+Josefsson                Expires August 2, 2003                [Page 35]
+
+Internet-Draft       Nameprep and IDNA Test Vectors        February 2003
+
+
+       "Larger test (expanding)",
+# There is a bug here: RicMoo
+#               VV
+#       "X\xC3\xDF\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
+       "X\xc3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
+       "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88"
+       "\xe3\x83\xab""i\xcc\x87""tel\x28""d\x29\xe3\x82\xa2\xe3\x83\x91"
+       "\xe3\x83\xbc\xe3\x83\x88"
+     },