xref: /Lucene/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt (revision 0eeba8d37cbc851d017c07a529279ac7e7e4236a)
1# Copyright 2001-2010 Unicode, Inc.
2#
3# Disclaimer
4#
5# This source code is provided as is by Unicode, Inc. No claims are
6# made as to fitness for any particular purpose. No warranties of any
7# kind are expressed or implied. The recipient agrees to determine
8# applicability of information provided. If this file has been
9# purchased on magnetic or optical media from Unicode, Inc., the
10# sole remedy for any claim will be exchange of defective media
11# within 90 days of receipt.
12#
13# Limitations on Rights to Redistribute This Code
14#
15# Unicode, Inc. hereby grants the right to freely use the information
16# supplied in this file in the creation of products supporting the
17# Unicode Standard, and to make copies of this file in any form
18# for internal or external distribution as long as this notice
19# remains attached.
20
21### Custom Normalization mappings for UTR#30
22### (http://www.unicode.org/reports/tr30/tr30-4.html)
23###
24### Created from Unicode 5.2 UCD
25###
26
27#### WARNING ####
28#### Rule: lines direct content generation.
29#### All non-comments will be REMOVED when this file's contents
30#### are generated by 'ant gen-utr30-data-files'.
31#### Use "# Rule: verbatim" to keep non-comments up until
32#### the next "# Rule:" line.
33#### WARNING ####
34
35## Remove diacritics
36# Rule: [:Diacritic:] >
37005E>
380060>
3900A8>
4000AF>
4100B4>
4200B7..00B8>
4302B0..034E>
440350..0357>
45035D..0362>
460374..0375>
47037A>
480384..0385>
490483..0487>
500559>
510591..05A1>
5205A3..05BD>
5305BF>
5405C1..05C2>
5505C4>
56064B..0652>
570657..0658>
5806DF..06E0>
5906E5..06E6>
6006EA..06EC>
610730..074A>
6207A6..07B0>
6307EB..07F5>
640818..0819>
650898..089F>
6608C9..08D2>
6708E3..08FE>
68093C>
69094D>
700951..0954>
710971>
7209BC>
7309CD>
740A3C>
750A4D>
760ABC>
770ACD>
780AFD..0AFF>
790B3C>
800B4D>
810B55>
820BCD>
830C3C>
840C4D>
850CBC>
860CCD>
870D3B..0D3C>
880D4D>
890DCA>
900E47..0E4C>
910E4E>
920EBA>
930EC8..0ECC>
940F18..0F19>
950F35>
960F37>
970F39>
980F3E..0F3F>
990F82..0F84>
1000F86..0F87>
1010FC6>
1021037>
1031039..103A>
1041063..1064>
1051069..106D>
1061087..108D>
107108F>
108109A..109B>
109135D..135F>
1101714..1715>
11117C9..17D3>
11217DD>
1131939..193B>
1141A75..1A7C>
1151A7F>
1161AB0..1ABE>
1171AC1..1ACB>
1181B34>
1191B44>
1201B6B..1B73>
1211BAA..1BAB>
1221C36..1C37>
1231C78..1C7D>
1241CD0..1CE8>
1251CED>
1261CF4>
1271CF7..1CF9>
1281D2C..1D6A>
1291DC4..1DCF>
1301DF5..1DFF>
1311FBD>
1321FBF..1FC1>
1331FCD..1FCF>
1341FDD..1FDF>
1351FED..1FEF>
1361FFD..1FFE>
1372CEF..2CF1>
1382E2F>
139302A..302F>
1403099..309C>
14130FC>
142A66F>
143A67C..A67D>
144A67F>
145A69C..A69D>
146A6F0..A6F1>
147A700..A721>
148A788..A78A>
149A7F8..A7F9>
150A8C4>
151A8E0..A8F1>
152A92B..A92E>
153A953>
154A9B3>
155A9C0>
156A9E5>
157AA7B..AA7D>
158AABF..AAC2>
159AAF6>
160AB5B..AB5F>
161AB69..AB6B>
162ABEC..ABED>
163FB1E>
164FE20..FE2F>
165FF3E>
166FF40>
167FF70>
168FF9E..FF9F>
169FFE3>
170102E0>
17110780..10785>
17210787..107B0>
173107B2..107BA>
17410AE5..10AE6>
17510D22..10D27>
17610F46..10F50>
17710F82..10F85>
17811046>
17911070>
180110B9..110BA>
18111133..11134>
18211173>
183111C0>
184111CA..111CC>
18511235..11236>
186112E9..112EA>
1871133C>
1881134D>
18911366..1136C>
19011370..11374>
19111442>
19211446>
193114C2..114C3>
194115BF..115C0>
1951163F>
196116B6..116B7>
1971172B>
19811839..1183A>
1991193D..1193E>
20011943>
201119E0>
20211A34>
20311A47>
20411A99>
20511C3F>
20611D42>
20711D44..11D45>
20811D97>
20916AF0..16AF4>
21016B30..16B36>
21116F8F..16F9F>
21216FF0..16FF1>
2131AFF0..1AFF3>
2141AFF5..1AFFB>
2151AFFD..1AFFE>
2161CF00..1CF2D>
2171CF30..1CF46>
2181D167..1D169>
2191D16D..1D172>
2201D17B..1D182>
2211D185..1D18B>
2221D1AA..1D1AD>
2231E130..1E136>
2241E2AE>
2251E2EC..1E2EF>
2261E8D0..1E8D6>
2271E944..1E946>
2281E948..1E94A>
229
230# Latin script "composed" that do not further decompose, so decompose here
231# These are from AsciiFoldingFilter
232# Rule: verbatim
23300E6>0061 0065
23400F0>0064
23500F8>006F
23600FE>0074 0068
2370111>0064
2380127>0068
2390131>0069
2400138>0071
2410142>006C
242014B>006E
2430153>006F 0065
2440167>0074
2450180>0062
2460183>0062
2470185>0062
2480188>0063
249018C>0064
250018D>0064
2510192>0066
2520195>0068 0076
2530199>006B
254019A>006C
255#019B>
256019E>006E
257#01A3>
25801A5>0070
259#01A8>
260#01AA>
26101AB>0074
26201AD>0074
26301B4>0079
26401B6>007A
265#01B9>
266#01BA>
26701BB>0032
26801BD>0035
269#01BE>
27001BF>0077
27101C0>007C
27201C1>007C 007C
273#01C2>
27401C3>0021
27501DD>0065
27601E5>0047
277021D>007A
2780221>0064
2790223>006F 0075
2800225>007A
2810234>006C
2820235>006E
2830236>0074
2840237>006A
2850238>0064 0062
2860239>0071 0070
287023C>0063
288023F>0073
2890240>007A
290#0242>
2910247>0065
2920249>006A
293024B>0071
294024D>0072
295024F>0079
2960250>0061
2970251>0061
2980252>0061
2990253>0062
3000254>006F
3010255>0063
3020256>0064
3030257>0064
3040258>0065
3050259>0061
306025A>0061
307025B>0065
308025C>0065
309025D>0065
310025E>0065
311025F>006A
3120260>0067
3130261>0067
3140262>0047
315#0263>
316#0264>
3170265>0068
3180266>0068
319#0267>
3200268>0069
3210269>0069
322026A>0049
323026B>006C
324026C>006C
325026D>006C
326#026E>
327026F>006D
3280270>006D
3290271>006D
3300272>006E
3310273>006E
3320274>004E
3330275>006F
3340276>004F 0045
335#0277>
336#0278>
337#0279>
338#027A>
339#027B>
340027C>0072
341027D>0072
342027E>0072
343027F>0072
3440280>0052
3450281>0052
3460282>0073
347#0283>
3480284>006A
349#0285>
350#0286>
3510287>0074
3520288>0074
3530289>0075
354#028A>
355028B>0076
356028C>0076
357028D>0077
358028E>0079
359028F>0059
3600290>007A
3610291>007A
362#0292>
363#0293>
364#0294>
365#0295>
366#0296>
3670297>0043
3680298>006F
3690299>0042
370029A>0065
371029B>0047
372029C>0048
373029D>006A
374029E>006B
375029F>004C
37602A0>0071
377#02A1>
378#02A2>
37902A3>0064 007A
380#02A4>
38102A5>0064 007A
38202A6>0074 0073
383#02A7>
38402A8>0074 0063
38502A9>0066 006E
38602AA>006C 0073
38702AB>006C 007A
38802AC>0077 0077
389#02AD>
39002AE>0068
39102AF>0068
3921D00>0041
3931D01>0041 0045
3941D02>0061 0065
3951D03>0042
3961D04>0043
3971D05>0044
3981D06>0044
3991D07>0045
4001D08>0065
4011D09>0069
4021D0A>004A
4031D0B>004B
4041D0C>004C
4051D0D>004D
4061D0E>004E
4071D0F>004F
4081D10>004F
4091D11>006F
410#1D12>
4111D13>006F
4121D14>006F 0065
4131D15>004F 0055
4141D16>006F
4151D17>006F
4161D18>0050
4171D19>0052
4181D1A>0052
4191D1B>0054
4201D1C>0055
4211D1D>0075
4221D1E>0075
4231D1F>006D
4241D20>0056
4251D21>0057
4261D22>005A
427#1D23>
428#1D24>
429#1D25>
4301D6B>0075 0065
4311D6C>0062
4321D6D>0064
4331D6E>0066
4341D6F>006D
4351D70>006E
4361D71>0070
4371D72>0072
4381D73>0072
4391D74>0073
4401D75>0074
4411D76>007A
4421D77>0067
4431D79>0067
4441D7A>0074 0068
4451D7B>0049
4461D7C>0069
4471D7D>0070
4481D7E>0055
449#1D7F>
4501D80>0062
4511D81>0064
4521D82>0066
4531D83>0067
4541D84>006B
4551D85>006C
4561D86>006D
4571D87>006E
4581D88>0070
4591D89>0072
4601D8A>0073
461#1D8B>
4621D8C>0076
4631D8D>0078
4641D8E>007A
4651D8F>0061
4661D90>0061
4671D91>0064
4681D92>0065
4691D93>0065
4701D94>0065
4711D95>0061
4721D96>0069
4731D97>006F
474#1D98>
4751D99>0075
476#1D9A>
4771E9C>0073
4781E9D>0073
4791E9F>0064
4801EFB>006C 006C
4811EFD>0076
4821EFF>0079
483214E>0066
484#2180>
485#2181>
486#2182>
4872184>0063
488#2185>
489#2186>
490#2187>
491#2188>
4922C61>006C
4932C65>0061
4942C66>0074
4952C68>0068
4962C6A>006B
4972C6C>007A
4982C71>0076
4992C73>0077
5002C74>0076
5012C76>0068
502#2C77>
5032C78>0065
504#2C79>
5052C7A>006F
5062C7B>0045
507#A723>
508#A725>
509#A727>
510A729>0074 007A
511#A72B>
512#A72D>
513#A72F>
514A730>0046
515A731>0053
516A733>0061 0061
517A735>0061 006F
518A737>0061 0075
519A739>0061 0076
520A73B>0061 0076
521A73D>0061 0079
522A73F>0063
523A741>006B
524A743>006B
525A745>006B
526A747>006C
527A749>006C
528A74B>006F
529A74D>006F
530A74F>006F 006F
531A751>0070
532A753>0070
533A755>0070
534A757>0071
535A759>0071
536A75B>0072
537#A75D>
538A75F>0076
539A761>0076 0079
540A763>007A
541A765>0074 0068
542A767>0074 0068
543A769>0076
544#A76B>
545#A76D>
546#A76F>
547#A771>
548#A772>
549#A773>
550#A774>
551#A775>
552#A776>
553#A777>
554#A778>
555A77A>0064
556A77C>0066
557A77F>0067
558A781>006C
559A783>0072
560A785>0053
561A787>0074
562A78C>0027
563A7FB>0046
564A7FC>0070
565A7FD>004D
566A7FE>0049
567A7FF>004D
568
569# Cyrillic script "composed" that do not further decompose, so decompose here
570# These are from UTR#30 DiacriticFolding.txt
571# Rule: verbatim
572
573047D>0461
574048B>0439
575048F>0440
5760491>0433
5770493>0433
5780495>0433
5790497>0436
5800499>0437
581049B>043A
582049D>043A
583049F>043A
58404A3>043D
58504A7>043F
58604AB>0441
58704AD>0442
58804B1>04AF
58904B3>0425
59004B7>04BC
59104B9>0447
59204BF>04BC
59304C4>043A
59404C6>043B
59504C8>043D
59604CA>043D
59704CC>04BC
59804CE>043C
599
600# Additional signs and diacritic, from examination of [:Mark:]&[:Lm:]
601# Rule: verbatim
6020358..035C>
60305A2>
60405C5>
60505C7>
6060610..061A>
6070640>
60806D6..06DE>
60906E1..06E4>
61006E7..06E9>
61106ED>
6120653..0656>
6130659..065F>
6140670>
6150711>
61607FA>
6170816..0817>
618081B..0823>
6190825..0827>
6200829>
621082A..082D>
6220900>0901
6231734>
6241DC0..1DC3>
6251DD0..1DE6>
62620D0..20F0>
6272DE0..2DFF>
628A670..A672>
629A802>
63010A3F>
6311D165..1D166>
6321D242..1D244>
633
634# Additional Arabic/Hebrew decompositions
635# Rule: verbatim
63605F3>0027
63705F4>0022
6380629>0647
6390649>064A
64006A9>0643
64106CC>064A
642