xref: /Lucene/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt (revision 0eeba8d37cbc851d017c07a529279ac7e7e4236a)
1# Copyright 2001-2010 Unicode, Inc.
2#
3# Disclaimer
4#
5# This source code is provided as is by Unicode, Inc. No claims are
6# made as to fitness for any particular purpose. No warranties of any
7# kind are expressed or implied. The recipient agrees to determine
8# applicability of information provided. If this file has been
9# purchased on magnetic or optical media from Unicode, Inc., the
10# sole remedy for any claim will be exchange of defective media
11# within 90 days of receipt.
12#
13# Limitations on Rights to Redistribute This Code
14#
15# Unicode, Inc. hereby grants the right to freely use the information
16# supplied in this file in the creation of products supporting the
17# Unicode Standard, and to make copies of this file in any form
18# for internal or external distribution as long as this notice
19# remains attached.
20
21### Custom Normalization mappings for UTR#30
22### (http://www.unicode.org/reports/tr30/tr30-4.html)
23
24#### WARNING ####
25#### Rule: lines direct content generation.
26#### All non-comments will be REMOVED when this file's contents
27#### are generated by 'ant gen-utr30-data-files'.
28#### Use "# Rule: verbatim" to keep non-comments up until
29#### the next "# Rule:" line.
30#### WARNING ####
31
32## Accent removal
33# See DiacriticFolding.txt
34
35## Case Folding (done by cf)
36
37## Canonical Duplicates Folding (done by cd)
38
39## Dashes folding
40# Rule: [[[[:Dash:][:Pd:]]-[\u2053\uFE31\uFE32]] - [\u002D]] > 002D
41058A>002D
4205BE>002D
431400>002D
441806>002D
452010..2015>002D
46207B>002D
47208B>002D
482212>002D
492E17>002D
502E1A>002D
512E3A..2E3B>002D
522E40>002D
532E5D>002D
54301C>002D
553030>002D
5630A0>002D
57FE58>002D
58FE63>002D
59FF0D>002D
6010EAD>002D
61
62## Greek letterforms folding (done by kd)
63
64## Hebrew alternates folding (done by kd)
65
66## Jamo folding (done by kd)
67
68## Math symbol folding (done by kd)
69
70## Native digit folding
71# See NativeDigitFolding.txt
72
73## Nobreak folding (done by kd)
74
75## Overline Folding (done by kd)
76
77## Positional forms folding (done by kd)
78
79## Small forms folding (done by kd)
80
81## Space Folding
82# Rule: [[:Zs:] - [:Changes_When_NFKC_Casefolded=Yes:] - [\u0020]] > 0020
831680>0020
84
85## Spacing Accents folding (done by kd)
86
87## Subscript folding (done by kd)
88
89## Symbol folding (done by kd)
90
91## Underline Folding
92# Rule: verbatim
932017>005E
94FE4D..FE4F>005E
95
96## Diacritic Folding
97# See DiacriticFolding.txt
98
99## Vertical forms folding (done by kd)
100
101## Han Radical Folding
102# See HanRadicalFolding.txt
103
104## Letter Form Folding (done by kd)
105## Superscript folding
106# Additions to kd:
107# Rule: verbatim
10802C0>0294
10902C1>0295
11006E5>0648
11106E6>064A
112## Suzhou Numeral Folding
113# Additions to kd:
114# Rule: verbatim
1153021>4E00
1163022>4E8C
1173023>4E09
1183024>56DB
1193025>4E94
1203026>516D
1213027>4E03
1223028>516B
1233029>4E5D
124## Width Folding (done by kd)
125# Punctuation Folding
126# Rule: verbatim
12700AB>0022
12800BB>0022
129201C..201E>0022
1302018..201B>0027
1312032>0027
1322035>0027
1332039..203A>0027
1342045>005B
1352046>005D
1362E28>0028 0028
1372E29>0029 0029
1382052>0025
139204E>002A
1402044>002F
141204F>003B
1422038>005E
1432053>007E
144