xref: /Lucene/gradle/generation/jflex/htmlentity.py (revision 8eb4eb26119d602e79080852a601e26c26f62900)
1*8eb4eb26SDawid Weiss# Licensed to the Apache Software Foundation (ASF) under one or more
2*8eb4eb26SDawid Weiss# contributor license agreements.  See the NOTICE file distributed with
3*8eb4eb26SDawid Weiss# this work for additional information regarding copyright ownership.
4*8eb4eb26SDawid Weiss# The ASF licenses this file to You under the Apache License, Version 2.0
5*8eb4eb26SDawid Weiss# (the "License"); you may not use this file except in compliance with
6*8eb4eb26SDawid Weiss# the License.  You may obtain a copy of the License at
7*8eb4eb26SDawid Weiss#
8*8eb4eb26SDawid Weiss#     http://www.apache.org/licenses/LICENSE-2.0
9*8eb4eb26SDawid Weiss#
10*8eb4eb26SDawid Weiss# Unless required by applicable law or agreed to in writing, software
11*8eb4eb26SDawid Weiss# distributed under the License is distributed on an "AS IS" BASIS,
12*8eb4eb26SDawid Weiss# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*8eb4eb26SDawid Weiss# See the License for the specific language governing permissions and
14*8eb4eb26SDawid Weiss# limitations under the License.
15*8eb4eb26SDawid Weiss
16*8eb4eb26SDawid Weissimport re
17*8eb4eb26SDawid Weissimport sys
18*8eb4eb26SDawid Weiss
19*8eb4eb26SDawid Weiss# A simple python script to generate an HTML entity map and a regex alternation
20*8eb4eb26SDawid Weiss# for inclusion in HTMLStripCharFilter.jflex.
21*8eb4eb26SDawid Weiss
22*8eb4eb26SDawid Weissdef main():
23*8eb4eb26SDawid Weiss  with open(sys.argv[1], 'w') as f:
24*8eb4eb26SDawid Weiss      sys.stdout = f
25*8eb4eb26SDawid Weiss
26*8eb4eb26SDawid Weiss      print(get_apache_license())
27*8eb4eb26SDawid Weiss      codes = {}
28*8eb4eb26SDawid Weiss      regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
29*8eb4eb26SDawid Weiss      for line in get_entity_text().split('\n'):
30*8eb4eb26SDawid Weiss        match = regex.match(line)
31*8eb4eb26SDawid Weiss        if match:
32*8eb4eb26SDawid Weiss          key = match.group(1)
33*8eb4eb26SDawid Weiss          if   key == 'quot': codes[key] = r'\"'
34*8eb4eb26SDawid Weiss          elif key == 'nbsp': codes[key] = ' ';
35*8eb4eb26SDawid Weiss          else              : codes[key] = r'\u%04X' % int(match.group(2))
36*8eb4eb26SDawid Weiss
37*8eb4eb26SDawid Weiss      keys = sorted(codes)
38*8eb4eb26SDawid Weiss
39*8eb4eb26SDawid Weiss      first_entry = True
40*8eb4eb26SDawid Weiss      output_line = 'CharacterEntities = ( '
41*8eb4eb26SDawid Weiss      for key in keys:
42*8eb4eb26SDawid Weiss        new_entry = ('"%s"' if first_entry else ' | "%s"') % key
43*8eb4eb26SDawid Weiss        first_entry = False
44*8eb4eb26SDawid Weiss        if len(output_line) + len(new_entry) >= 80:
45*8eb4eb26SDawid Weiss          print(output_line)
46*8eb4eb26SDawid Weiss          output_line = '                   '
47*8eb4eb26SDawid Weiss        output_line += new_entry
48*8eb4eb26SDawid Weiss        if key in ('quot','copy','gt','lt','reg','amp'):
49*8eb4eb26SDawid Weiss          new_entry = ' | "%s"' % key.upper()
50*8eb4eb26SDawid Weiss          if len(output_line) + len(new_entry) >= 80:
51*8eb4eb26SDawid Weiss            print(output_line)
52*8eb4eb26SDawid Weiss            output_line = '                   '
53*8eb4eb26SDawid Weiss          output_line += new_entry
54*8eb4eb26SDawid Weiss      print(output_line, ')')
55*8eb4eb26SDawid Weiss
56*8eb4eb26SDawid Weiss      print('%{')
57*8eb4eb26SDawid Weiss      print('  private static final Map<String,String> upperCaseVariantsAccepted')
58*8eb4eb26SDawid Weiss      print('      = new HashMap<>();')
59*8eb4eb26SDawid Weiss      print('  static {')
60*8eb4eb26SDawid Weiss      print('    upperCaseVariantsAccepted.put("quot", "QUOT");')
61*8eb4eb26SDawid Weiss      print('    upperCaseVariantsAccepted.put("copy", "COPY");')
62*8eb4eb26SDawid Weiss      print('    upperCaseVariantsAccepted.put("gt", "GT");')
63*8eb4eb26SDawid Weiss      print('    upperCaseVariantsAccepted.put("lt", "LT");')
64*8eb4eb26SDawid Weiss      print('    upperCaseVariantsAccepted.put("reg", "REG");')
65*8eb4eb26SDawid Weiss      print('    upperCaseVariantsAccepted.put("amp", "AMP");')
66*8eb4eb26SDawid Weiss      print('  }')
67*8eb4eb26SDawid Weiss      print('  private static final CharArrayMap<Character> entityValues')
68*8eb4eb26SDawid Weiss      print('      = new CharArrayMap<>(%i, false);' % len(keys))
69*8eb4eb26SDawid Weiss      print('  static {')
70*8eb4eb26SDawid Weiss      print('    String[] entities = {')
71*8eb4eb26SDawid Weiss      output_line = '     '
72*8eb4eb26SDawid Weiss      for key in keys:
73*8eb4eb26SDawid Weiss        new_entry = ' "%s", "%s",' % (key, codes[key])
74*8eb4eb26SDawid Weiss        if len(output_line) + len(new_entry) >= 80:
75*8eb4eb26SDawid Weiss          print(output_line)
76*8eb4eb26SDawid Weiss          output_line = '     '
77*8eb4eb26SDawid Weiss        output_line += new_entry
78*8eb4eb26SDawid Weiss      print(output_line[:-1])
79*8eb4eb26SDawid Weiss      print('    };')
80*8eb4eb26SDawid Weiss      print('    for (int i = 0 ; i < entities.length ; i += 2) {')
81*8eb4eb26SDawid Weiss      print('      Character value = entities[i + 1].charAt(0);')
82*8eb4eb26SDawid Weiss      print('      entityValues.put(entities[i], value);')
83*8eb4eb26SDawid Weiss      print('      String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);')
84*8eb4eb26SDawid Weiss      print('      if (upperCaseVariant != null) {')
85*8eb4eb26SDawid Weiss      print('        entityValues.put(upperCaseVariant, value);')
86*8eb4eb26SDawid Weiss      print('      }')
87*8eb4eb26SDawid Weiss      print('    }')
88*8eb4eb26SDawid Weiss      print("  }")
89*8eb4eb26SDawid Weiss      print("%}")
90*8eb4eb26SDawid Weiss
91*8eb4eb26SDawid Weissdef get_entity_text():
92*8eb4eb26SDawid Weiss# The text below is taken verbatim from
93*8eb4eb26SDawid Weiss# <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
94*8eb4eb26SDawid Weiss  text = r"""
95*8eb4eb26SDawid WeissF.1. XHTML Character Entities
96*8eb4eb26SDawid Weiss
97*8eb4eb26SDawid WeissXHTML DTDs make available a standard collection of named character entities. Those entities are defined in this section.
98*8eb4eb26SDawid WeissF.1.1. XHTML Latin 1 Character Entities
99*8eb4eb26SDawid Weiss
100*8eb4eb26SDawid WeissYou can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-lat1.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent.
101*8eb4eb26SDawid Weiss
102*8eb4eb26SDawid Weiss<!-- ...................................................................... -->
103*8eb4eb26SDawid Weiss<!-- XML-compatible ISO Latin 1 Character Entity Set for XHTML ............ -->
104*8eb4eb26SDawid Weiss<!-- file: xhtml-lat1.ent
105*8eb4eb26SDawid Weiss
106*8eb4eb26SDawid Weiss     Typical invocation:
107*8eb4eb26SDawid Weiss
108*8eb4eb26SDawid Weiss       <!ENTITY % xhtml-lat1
109*8eb4eb26SDawid Weiss           PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
110*8eb4eb26SDawid Weiss                  "xhtml-lat1.ent" >
111*8eb4eb26SDawid Weiss       %xhtml-lat1;
112*8eb4eb26SDawid Weiss
113*8eb4eb26SDawid Weiss     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
114*8eb4eb26SDawid Weiss
115*8eb4eb26SDawid Weiss       PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
116*8eb4eb26SDawid Weiss       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent"
117*8eb4eb26SDawid Weiss
118*8eb4eb26SDawid Weiss     Revision:  Id: xhtml-lat1.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
119*8eb4eb26SDawid Weiss
120*8eb4eb26SDawid Weiss     Portions (C) International Organization for Standardization 1986:
121*8eb4eb26SDawid Weiss     Permission to copy in any form is granted for use with conforming
122*8eb4eb26SDawid Weiss     SGML systems and applications as defined in ISO 8879, provided
123*8eb4eb26SDawid Weiss     this notice is included in all copies.
124*8eb4eb26SDawid Weiss-->
125*8eb4eb26SDawid Weiss
126*8eb4eb26SDawid Weiss<!ENTITY nbsp   "&#160;" ><!-- no-break space = non-breaking space, U+00A0 ISOnum -->
127*8eb4eb26SDawid Weiss<!ENTITY iexcl  "&#161;" ><!-- inverted exclamation mark, U+00A1 ISOnum -->
128*8eb4eb26SDawid Weiss<!ENTITY cent   "&#162;" ><!-- cent sign, U+00A2 ISOnum -->
129*8eb4eb26SDawid Weiss<!ENTITY pound  "&#163;" ><!-- pound sign, U+00A3 ISOnum -->
130*8eb4eb26SDawid Weiss<!ENTITY curren "&#164;" ><!-- currency sign, U+00A4 ISOnum -->
131*8eb4eb26SDawid Weiss<!ENTITY yen    "&#165;" ><!-- yen sign = yuan sign, U+00A5 ISOnum -->
132*8eb4eb26SDawid Weiss<!ENTITY brvbar "&#166;" ><!-- broken bar = broken vertical bar, U+00A6 ISOnum -->
133*8eb4eb26SDawid Weiss<!ENTITY sect   "&#167;" ><!-- section sign, U+00A7 ISOnum -->
134*8eb4eb26SDawid Weiss<!ENTITY uml    "&#168;" ><!-- diaeresis = spacing diaeresis, U+00A8 ISOdia -->
135*8eb4eb26SDawid Weiss<!ENTITY copy   "&#169;" ><!-- copyright sign, U+00A9 ISOnum -->
136*8eb4eb26SDawid Weiss<!ENTITY ordf   "&#170;" ><!-- feminine ordinal indicator, U+00AA ISOnum -->
137*8eb4eb26SDawid Weiss<!ENTITY laquo  "&#171;" ><!-- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum -->
138*8eb4eb26SDawid Weiss<!ENTITY not    "&#172;" ><!-- not sign, U+00AC ISOnum -->
139*8eb4eb26SDawid Weiss<!ENTITY shy    "&#173;" ><!-- soft hyphen = discretionary hyphen, U+00AD ISOnum -->
140*8eb4eb26SDawid Weiss<!ENTITY reg    "&#174;" ><!-- registered sign = registered trade mark sign, U+00AE ISOnum -->
141*8eb4eb26SDawid Weiss<!ENTITY macr   "&#175;" ><!-- macron = spacing macron = overline = APL overbar, U+00AF ISOdia -->
142*8eb4eb26SDawid Weiss<!ENTITY deg    "&#176;" ><!-- degree sign, U+00B0 ISOnum -->
143*8eb4eb26SDawid Weiss<!ENTITY plusmn "&#177;" ><!-- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum -->
144*8eb4eb26SDawid Weiss<!ENTITY sup2   "&#178;" ><!-- superscript two = superscript digit two = squared, U+00B2 ISOnum -->
145*8eb4eb26SDawid Weiss<!ENTITY sup3   "&#179;" ><!-- superscript three = superscript digit three = cubed, U+00B3 ISOnum -->
146*8eb4eb26SDawid Weiss<!ENTITY acute  "&#180;" ><!-- acute accent = spacing acute, U+00B4 ISOdia -->
147*8eb4eb26SDawid Weiss<!ENTITY micro  "&#181;" ><!-- micro sign, U+00B5 ISOnum -->
148*8eb4eb26SDawid Weiss<!ENTITY para   "&#182;" ><!-- pilcrow sign = paragraph sign, U+00B6 ISOnum -->
149*8eb4eb26SDawid Weiss<!ENTITY middot "&#183;" ><!-- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum -->
150*8eb4eb26SDawid Weiss<!ENTITY cedil  "&#184;" ><!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
151*8eb4eb26SDawid Weiss<!ENTITY sup1   "&#185;" ><!-- superscript one = superscript digit one, U+00B9 ISOnum -->
152*8eb4eb26SDawid Weiss<!ENTITY ordm   "&#186;" ><!-- masculine ordinal indicator, U+00BA ISOnum -->
153*8eb4eb26SDawid Weiss<!ENTITY raquo  "&#187;" ><!-- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum -->
154*8eb4eb26SDawid Weiss<!ENTITY frac14 "&#188;" ><!-- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum -->
155*8eb4eb26SDawid Weiss<!ENTITY frac12 "&#189;" ><!-- vulgar fraction one half = fraction one half, U+00BD ISOnum -->
156*8eb4eb26SDawid Weiss<!ENTITY frac34 "&#190;" ><!-- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum -->
157*8eb4eb26SDawid Weiss<!ENTITY iquest "&#191;" ><!-- inverted question mark = turned question mark, U+00BF ISOnum -->
158*8eb4eb26SDawid Weiss<!ENTITY Agrave "&#192;" ><!-- latin capital A with grave = latin capital A grave, U+00C0 ISOlat1 -->
159*8eb4eb26SDawid Weiss<!ENTITY Aacute "&#193;" ><!-- latin capital A with acute, U+00C1 ISOlat1 -->
160*8eb4eb26SDawid Weiss<!ENTITY Acirc  "&#194;" ><!-- latin capital A with circumflex, U+00C2 ISOlat1 -->
161*8eb4eb26SDawid Weiss<!ENTITY Atilde "&#195;" ><!-- latin capital A with tilde, U+00C3 ISOlat1 -->
162*8eb4eb26SDawid Weiss<!ENTITY Auml   "&#196;" ><!-- latin capital A with diaeresis, U+00C4 ISOlat1 -->
163*8eb4eb26SDawid Weiss<!ENTITY Aring  "&#197;" ><!-- latin capital A with ring above = latin capital A ring, U+00C5 ISOlat1 -->
164*8eb4eb26SDawid Weiss<!ENTITY AElig  "&#198;" ><!-- latin capital AE = latin capital ligature AE, U+00C6 ISOlat1 -->
165*8eb4eb26SDawid Weiss<!ENTITY Ccedil "&#199;" ><!-- latin capital C with cedilla, U+00C7 ISOlat1 -->
166*8eb4eb26SDawid Weiss<!ENTITY Egrave "&#200;" ><!-- latin capital E with grave, U+00C8 ISOlat1 -->
167*8eb4eb26SDawid Weiss<!ENTITY Eacute "&#201;" ><!-- latin capital E with acute, U+00C9 ISOlat1 -->
168*8eb4eb26SDawid Weiss<!ENTITY Ecirc  "&#202;" ><!-- latin capital E with circumflex, U+00CA ISOlat1 -->
169*8eb4eb26SDawid Weiss<!ENTITY Euml   "&#203;" ><!-- latin capital E with diaeresis, U+00CB ISOlat1 -->
170*8eb4eb26SDawid Weiss<!ENTITY Igrave "&#204;" ><!-- latin capital I with grave, U+00CC ISOlat1 -->
171*8eb4eb26SDawid Weiss<!ENTITY Iacute "&#205;" ><!-- latin capital I with acute, U+00CD ISOlat1 -->
172*8eb4eb26SDawid Weiss<!ENTITY Icirc  "&#206;" ><!-- latin capital I with circumflex, U+00CE ISOlat1 -->
173*8eb4eb26SDawid Weiss<!ENTITY Iuml   "&#207;" ><!-- latin capital I with diaeresis, U+00CF ISOlat1 -->
174*8eb4eb26SDawid Weiss<!ENTITY ETH    "&#208;" ><!-- latin capital ETH, U+00D0 ISOlat1 -->
175*8eb4eb26SDawid Weiss<!ENTITY Ntilde "&#209;" ><!-- latin capital N with tilde, U+00D1 ISOlat1 -->
176*8eb4eb26SDawid Weiss<!ENTITY Ograve "&#210;" ><!-- latin capital O with grave, U+00D2 ISOlat1 -->
177*8eb4eb26SDawid Weiss<!ENTITY Oacute "&#211;" ><!-- latin capital O with acute, U+00D3 ISOlat1 -->
178*8eb4eb26SDawid Weiss<!ENTITY Ocirc  "&#212;" ><!-- latin capital O with circumflex, U+00D4 ISOlat1 -->
179*8eb4eb26SDawid Weiss<!ENTITY Otilde "&#213;" ><!-- latin capital O with tilde, U+00D5 ISOlat1 -->
180*8eb4eb26SDawid Weiss<!ENTITY Ouml   "&#214;" ><!-- latin capital O with diaeresis, U+00D6 ISOlat1 -->
181*8eb4eb26SDawid Weiss<!ENTITY times  "&#215;" ><!-- multiplication sign, U+00D7 ISOnum -->
182*8eb4eb26SDawid Weiss<!ENTITY Oslash "&#216;" ><!-- latin capital O with stroke = latin capital O slash, U+00D8 ISOlat1 -->
183*8eb4eb26SDawid Weiss<!ENTITY Ugrave "&#217;" ><!-- latin capital U with grave, U+00D9 ISOlat1 -->
184*8eb4eb26SDawid Weiss<!ENTITY Uacute "&#218;" ><!-- latin capital U with acute, U+00DA ISOlat1 -->
185*8eb4eb26SDawid Weiss<!ENTITY Ucirc  "&#219;" ><!-- latin capital U with circumflex, U+00DB ISOlat1 -->
186*8eb4eb26SDawid Weiss<!ENTITY Uuml   "&#220;" ><!-- latin capital U with diaeresis, U+00DC ISOlat1 -->
187*8eb4eb26SDawid Weiss<!ENTITY Yacute "&#221;" ><!-- latin capital Y with acute, U+00DD ISOlat1 -->
188*8eb4eb26SDawid Weiss<!ENTITY THORN  "&#222;" ><!-- latin capital THORN, U+00DE ISOlat1 -->
189*8eb4eb26SDawid Weiss<!ENTITY szlig  "&#223;" ><!-- latin small sharp s = ess-zed, U+00DF ISOlat1 -->
190*8eb4eb26SDawid Weiss<!ENTITY agrave "&#224;" ><!-- latin small a with grave = latin small a grave, U+00E0 ISOlat1 -->
191*8eb4eb26SDawid Weiss<!ENTITY aacute "&#225;" ><!-- latin small a with acute, U+00E1 ISOlat1 -->
192*8eb4eb26SDawid Weiss<!ENTITY acirc  "&#226;" ><!-- latin small a with circumflex, U+00E2 ISOlat1 -->
193*8eb4eb26SDawid Weiss<!ENTITY atilde "&#227;" ><!-- latin small a with tilde, U+00E3 ISOlat1 -->
194*8eb4eb26SDawid Weiss<!ENTITY auml   "&#228;" ><!-- latin small a with diaeresis, U+00E4 ISOlat1 -->
195*8eb4eb26SDawid Weiss<!ENTITY aring  "&#229;" ><!-- latin small a with ring above = latin small a ring, U+00E5 ISOlat1 -->
196*8eb4eb26SDawid Weiss<!ENTITY aelig  "&#230;" ><!-- latin small ae = latin small ligature ae, U+00E6 ISOlat1 -->
197*8eb4eb26SDawid Weiss<!ENTITY ccedil "&#231;" ><!-- latin small c with cedilla, U+00E7 ISOlat1 -->
198*8eb4eb26SDawid Weiss<!ENTITY egrave "&#232;" ><!-- latin small e with grave, U+00E8 ISOlat1 -->
199*8eb4eb26SDawid Weiss<!ENTITY eacute "&#233;" ><!-- latin small e with acute, U+00E9 ISOlat1 -->
200*8eb4eb26SDawid Weiss<!ENTITY ecirc  "&#234;" ><!-- latin small e with circumflex, U+00EA ISOlat1 -->
201*8eb4eb26SDawid Weiss<!ENTITY euml   "&#235;" ><!-- latin small e with diaeresis, U+00EB ISOlat1 -->
202*8eb4eb26SDawid Weiss<!ENTITY igrave "&#236;" ><!-- latin small i with grave, U+00EC ISOlat1 -->
203*8eb4eb26SDawid Weiss<!ENTITY iacute "&#237;" ><!-- latin small i with acute, U+00ED ISOlat1 -->
204*8eb4eb26SDawid Weiss<!ENTITY icirc  "&#238;" ><!-- latin small i with circumflex, U+00EE ISOlat1 -->
205*8eb4eb26SDawid Weiss<!ENTITY iuml   "&#239;" ><!-- latin small i with diaeresis, U+00EF ISOlat1 -->
206*8eb4eb26SDawid Weiss<!ENTITY eth    "&#240;" ><!-- latin small eth, U+00F0 ISOlat1 -->
207*8eb4eb26SDawid Weiss<!ENTITY ntilde "&#241;" ><!-- latin small n with tilde, U+00F1 ISOlat1 -->
208*8eb4eb26SDawid Weiss<!ENTITY ograve "&#242;" ><!-- latin small o with grave, U+00F2 ISOlat1 -->
209*8eb4eb26SDawid Weiss<!ENTITY oacute "&#243;" ><!-- latin small o with acute, U+00F3 ISOlat1 -->
210*8eb4eb26SDawid Weiss<!ENTITY ocirc  "&#244;" ><!-- latin small o with circumflex, U+00F4 ISOlat1 -->
211*8eb4eb26SDawid Weiss<!ENTITY otilde "&#245;" ><!-- latin small o with tilde, U+00F5 ISOlat1 -->
212*8eb4eb26SDawid Weiss<!ENTITY ouml   "&#246;" ><!-- latin small o with diaeresis, U+00F6 ISOlat1 -->
213*8eb4eb26SDawid Weiss<!ENTITY divide "&#247;" ><!-- division sign, U+00F7 ISOnum -->
214*8eb4eb26SDawid Weiss<!ENTITY oslash "&#248;" ><!-- latin small o with stroke, = latin small o slash, U+00F8 ISOlat1 -->
215*8eb4eb26SDawid Weiss<!ENTITY ugrave "&#249;" ><!-- latin small u with grave, U+00F9 ISOlat1 -->
216*8eb4eb26SDawid Weiss<!ENTITY uacute "&#250;" ><!-- latin small u with acute, U+00FA ISOlat1 -->
217*8eb4eb26SDawid Weiss<!ENTITY ucirc  "&#251;" ><!-- latin small u with circumflex, U+00FB ISOlat1 -->
218*8eb4eb26SDawid Weiss<!ENTITY uuml   "&#252;" ><!-- latin small u with diaeresis, U+00FC ISOlat1 -->
219*8eb4eb26SDawid Weiss<!ENTITY yacute "&#253;" ><!-- latin small y with acute, U+00FD ISOlat1 -->
220*8eb4eb26SDawid Weiss<!ENTITY thorn  "&#254;" ><!-- latin small thorn with, U+00FE ISOlat1 -->
221*8eb4eb26SDawid Weiss<!ENTITY yuml   "&#255;" ><!-- latin small y with diaeresis, U+00FF ISOlat1 -->
222*8eb4eb26SDawid Weiss<!-- end of xhtml-lat1.ent -->
223*8eb4eb26SDawid Weiss
224*8eb4eb26SDawid WeissF.1.2. XHTML Special Characters
225*8eb4eb26SDawid Weiss
226*8eb4eb26SDawid WeissYou can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-special.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-special.ent.
227*8eb4eb26SDawid Weiss
228*8eb4eb26SDawid Weiss<!-- ...................................................................... -->
229*8eb4eb26SDawid Weiss<!-- XML-compatible ISO Special Character Entity Set for XHTML ............ -->
230*8eb4eb26SDawid Weiss<!-- file: xhtml-special.ent
231*8eb4eb26SDawid Weiss
232*8eb4eb26SDawid Weiss     Typical invocation:
233*8eb4eb26SDawid Weiss
234*8eb4eb26SDawid Weiss       <!ENTITY % xhtml-special
235*8eb4eb26SDawid Weiss           PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
236*8eb4eb26SDawid Weiss                  "xhtml-special.ent" >
237*8eb4eb26SDawid Weiss       %xhtml-special;
238*8eb4eb26SDawid Weiss
239*8eb4eb26SDawid Weiss     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
240*8eb4eb26SDawid Weiss
241*8eb4eb26SDawid Weiss       PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
242*8eb4eb26SDawid Weiss       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-special.ent"
243*8eb4eb26SDawid Weiss
244*8eb4eb26SDawid Weiss     Revision:  Id: xhtml-special.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
245*8eb4eb26SDawid Weiss
246*8eb4eb26SDawid Weiss     Portions (C) International Organization for Standardization 1986:
247*8eb4eb26SDawid Weiss     Permission to copy in any form is granted for use with conforming
248*8eb4eb26SDawid Weiss     SGML systems and applications as defined in ISO 8879, provided
249*8eb4eb26SDawid Weiss     this notice is included in all copies.
250*8eb4eb26SDawid Weiss
251*8eb4eb26SDawid Weiss     Revisions:
252*8eb4eb26SDawid Weiss2000-10-28: added &apos; and altered XML Predefined Entities for compatibility
253*8eb4eb26SDawid Weiss-->
254*8eb4eb26SDawid Weiss
255*8eb4eb26SDawid Weiss<!-- Relevant ISO entity set is given unless names are newly introduced.
256*8eb4eb26SDawid Weiss     New names (i.e., not in ISO 8879 [SGML] list) do not clash with
257*8eb4eb26SDawid Weiss     any existing ISO 8879 entity names. ISO 10646 [ISO10646] character
258*8eb4eb26SDawid Weiss     numbers are given for each character, in hex. Entity values are
259*8eb4eb26SDawid Weiss     decimal conversions of the ISO 10646 values and refer to the
260*8eb4eb26SDawid Weiss     document character set. Names are Unicode [UNICODE] names.
261*8eb4eb26SDawid Weiss-->
262*8eb4eb26SDawid Weiss
263*8eb4eb26SDawid Weiss<!-- C0 Controls and Basic Latin -->
264*8eb4eb26SDawid Weiss<!ENTITY lt      "&#38;#60;" ><!-- less-than sign, U+003C ISOnum -->
265*8eb4eb26SDawid Weiss<!ENTITY gt      "&#62;" ><!-- greater-than sign, U+003E ISOnum -->
266*8eb4eb26SDawid Weiss<!ENTITY amp     "&#38;#38;" ><!-- ampersand, U+0026 ISOnum -->
267*8eb4eb26SDawid Weiss<!ENTITY apos    "&#39;" ><!-- The Apostrophe (Apostrophe Quote, APL Quote), U+0027 ISOnum -->
268*8eb4eb26SDawid Weiss<!ENTITY quot    "&#34;" ><!-- quotation mark (Quote Double), U+0022 ISOnum -->
269*8eb4eb26SDawid Weiss
270*8eb4eb26SDawid Weiss<!-- Latin Extended-A -->
271*8eb4eb26SDawid Weiss<!ENTITY OElig   "&#338;" ><!-- latin capital ligature OE, U+0152 ISOlat2 -->
272*8eb4eb26SDawid Weiss<!ENTITY oelig   "&#339;" ><!-- latin small ligature oe, U+0153 ISOlat2 -->
273*8eb4eb26SDawid Weiss
274*8eb4eb26SDawid Weiss<!-- ligature is a misnomer, this is a separate character in some languages -->
275*8eb4eb26SDawid Weiss<!ENTITY Scaron  "&#352;" ><!-- latin capital letter S with caron, U+0160 ISOlat2 -->
276*8eb4eb26SDawid Weiss<!ENTITY scaron  "&#353;" ><!-- latin small letter s with caron, U+0161 ISOlat2 -->
277*8eb4eb26SDawid Weiss<!ENTITY Yuml    "&#376;" ><!-- latin capital letter Y with diaeresis, U+0178 ISOlat2 -->
278*8eb4eb26SDawid Weiss
279*8eb4eb26SDawid Weiss<!-- Spacing Modifier Letters -->
280*8eb4eb26SDawid Weiss<!ENTITY circ    "&#710;" ><!-- modifier letter circumflex accent, U+02C6 ISOpub -->
281*8eb4eb26SDawid Weiss<!ENTITY tilde   "&#732;" ><!-- small tilde, U+02DC ISOdia -->
282*8eb4eb26SDawid Weiss
283*8eb4eb26SDawid Weiss<!-- General Punctuation -->
284*8eb4eb26SDawid Weiss<!ENTITY ensp    "&#8194;" ><!-- en space, U+2002 ISOpub -->
285*8eb4eb26SDawid Weiss<!ENTITY emsp    "&#8195;" ><!-- em space, U+2003 ISOpub -->
286*8eb4eb26SDawid Weiss<!ENTITY thinsp  "&#8201;" ><!-- thin space, U+2009 ISOpub -->
287*8eb4eb26SDawid Weiss<!ENTITY zwnj    "&#8204;" ><!-- zero width non-joiner, U+200C NEW RFC 2070 -->
288*8eb4eb26SDawid Weiss<!ENTITY zwj     "&#8205;" ><!-- zero width joiner, U+200D NEW RFC 2070 -->
289*8eb4eb26SDawid Weiss<!ENTITY lrm     "&#8206;" ><!-- left-to-right mark, U+200E NEW RFC 2070 -->
290*8eb4eb26SDawid Weiss<!ENTITY rlm     "&#8207;" ><!-- right-to-left mark, U+200F NEW RFC 2070 -->
291*8eb4eb26SDawid Weiss<!ENTITY ndash   "&#8211;" ><!-- en dash, U+2013 ISOpub -->
292*8eb4eb26SDawid Weiss<!ENTITY mdash   "&#8212;" ><!-- em dash, U+2014 ISOpub -->
293*8eb4eb26SDawid Weiss<!ENTITY lsquo   "&#8216;" ><!-- left single quotation mark, U+2018 ISOnum -->
294*8eb4eb26SDawid Weiss<!ENTITY rsquo   "&#8217;" ><!-- right single quotation mark, U+2019 ISOnum -->
295*8eb4eb26SDawid Weiss<!ENTITY sbquo   "&#8218;" ><!-- single low-9 quotation mark, U+201A NEW -->
296*8eb4eb26SDawid Weiss<!ENTITY ldquo   "&#8220;" ><!-- left double quotation mark, U+201C ISOnum -->
297*8eb4eb26SDawid Weiss<!ENTITY rdquo   "&#8221;" ><!-- right double quotation mark, U+201D ISOnum -->
298*8eb4eb26SDawid Weiss<!ENTITY bdquo   "&#8222;" ><!-- double low-9 quotation mark, U+201E NEW -->
299*8eb4eb26SDawid Weiss<!ENTITY dagger  "&#8224;" ><!-- dagger, U+2020 ISOpub -->
300*8eb4eb26SDawid Weiss<!ENTITY Dagger  "&#8225;" ><!-- double dagger, U+2021 ISOpub -->
301*8eb4eb26SDawid Weiss<!ENTITY permil  "&#8240;" ><!-- per mille sign, U+2030 ISOtech -->
302*8eb4eb26SDawid Weiss
303*8eb4eb26SDawid Weiss<!-- lsaquo is proposed but not yet ISO standardized -->
304*8eb4eb26SDawid Weiss<!ENTITY lsaquo  "&#8249;" ><!-- single left-pointing angle quotation mark, U+2039 ISO proposed -->
305*8eb4eb26SDawid Weiss<!-- rsaquo is proposed but not yet ISO standardized -->
306*8eb4eb26SDawid Weiss<!ENTITY rsaquo  "&#8250;" ><!-- single right-pointing angle quotation mark, U+203A ISO proposed -->
307*8eb4eb26SDawid Weiss<!ENTITY euro    "&#8364;" ><!-- euro sign, U+20AC NEW -->
308*8eb4eb26SDawid Weiss
309*8eb4eb26SDawid Weiss<!-- end of xhtml-special.ent -->
310*8eb4eb26SDawid Weiss
311*8eb4eb26SDawid WeissF.1.3. XHTML Mathematical, Greek, and Symbolic Characters
312*8eb4eb26SDawid Weiss
313*8eb4eb26SDawid WeissYou can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-symbol.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent.
314*8eb4eb26SDawid Weiss
315*8eb4eb26SDawid Weiss<!-- ...................................................................... -->
316*8eb4eb26SDawid Weiss<!-- ISO Math, Greek and Symbolic Character Entity Set for XHTML .......... -->
317*8eb4eb26SDawid Weiss<!-- file: xhtml-symbol.ent
318*8eb4eb26SDawid Weiss
319*8eb4eb26SDawid Weiss     Typical invocation:
320*8eb4eb26SDawid Weiss
321*8eb4eb26SDawid Weiss       <!ENTITY % xhtml-symbol
322*8eb4eb26SDawid Weiss           PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
323*8eb4eb26SDawid Weiss                  "xhtml-symbol.ent" >
324*8eb4eb26SDawid Weiss       %xhtml-symbol;
325*8eb4eb26SDawid Weiss
326*8eb4eb26SDawid Weiss     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
327*8eb4eb26SDawid Weiss
328*8eb4eb26SDawid Weiss       PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
329*8eb4eb26SDawid Weiss       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent"
330*8eb4eb26SDawid Weiss
331*8eb4eb26SDawid Weiss     Revision:  Id: xhtml-symbol.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
332*8eb4eb26SDawid Weiss
333*8eb4eb26SDawid Weiss     Portions (C) International Organization for Standardization 1986:
334*8eb4eb26SDawid Weiss     Permission to copy in any form is granted for use with conforming
335*8eb4eb26SDawid Weiss     SGML systems and applications as defined in ISO 8879, provided
336*8eb4eb26SDawid Weiss     this notice is included in all copies.
337*8eb4eb26SDawid Weiss-->
338*8eb4eb26SDawid Weiss
339*8eb4eb26SDawid Weiss<!-- Relevant ISO entity set is given unless names are newly introduced.
340*8eb4eb26SDawid Weiss     New names (i.e., not in ISO 8879 [SGML] list) do not clash with
341*8eb4eb26SDawid Weiss     any existing ISO 8879 entity names. ISO 10646 [ISO10646] character
342*8eb4eb26SDawid Weiss     numbers are given for each character, in hex. Entity values are
343*8eb4eb26SDawid Weiss     decimal conversions of the ISO 10646 values and refer to the
344*8eb4eb26SDawid Weiss     document character set. Names are Unicode [UNICODE] names.
345*8eb4eb26SDawid Weiss-->
346*8eb4eb26SDawid Weiss
347*8eb4eb26SDawid Weiss<!-- Latin Extended-B -->
348*8eb4eb26SDawid Weiss<!ENTITY fnof     "&#402;" ><!-- latin small f with hook = function
349*8eb4eb26SDawid Weiss                              = florin, U+0192 ISOtech -->
350*8eb4eb26SDawid Weiss
351*8eb4eb26SDawid Weiss<!-- Greek -->
352*8eb4eb26SDawid Weiss<!ENTITY Alpha    "&#913;" ><!-- greek capital letter alpha, U+0391 -->
353*8eb4eb26SDawid Weiss<!ENTITY Beta     "&#914;" ><!-- greek capital letter beta, U+0392 -->
354*8eb4eb26SDawid Weiss<!ENTITY Gamma    "&#915;" ><!-- greek capital letter gamma, U+0393 ISOgrk3 -->
355*8eb4eb26SDawid Weiss<!ENTITY Delta    "&#916;" ><!-- greek capital letter delta, U+0394 ISOgrk3 -->
356*8eb4eb26SDawid Weiss<!ENTITY Epsilon  "&#917;" ><!-- greek capital letter epsilon, U+0395 -->
357*8eb4eb26SDawid Weiss<!ENTITY Zeta     "&#918;" ><!-- greek capital letter zeta, U+0396 -->
358*8eb4eb26SDawid Weiss<!ENTITY Eta      "&#919;" ><!-- greek capital letter eta, U+0397 -->
359*8eb4eb26SDawid Weiss<!ENTITY Theta    "&#920;" ><!-- greek capital letter theta, U+0398 ISOgrk3 -->
360*8eb4eb26SDawid Weiss<!ENTITY Iota     "&#921;" ><!-- greek capital letter iota, U+0399 -->
361*8eb4eb26SDawid Weiss<!ENTITY Kappa    "&#922;" ><!-- greek capital letter kappa, U+039A -->
362*8eb4eb26SDawid Weiss<!ENTITY Lambda   "&#923;" ><!-- greek capital letter lambda, U+039B ISOgrk3 -->
363*8eb4eb26SDawid Weiss<!ENTITY Mu       "&#924;" ><!-- greek capital letter mu, U+039C -->
364*8eb4eb26SDawid Weiss<!ENTITY Nu       "&#925;" ><!-- greek capital letter nu, U+039D -->
365*8eb4eb26SDawid Weiss<!ENTITY Xi       "&#926;" ><!-- greek capital letter xi, U+039E ISOgrk3 -->
366*8eb4eb26SDawid Weiss<!ENTITY Omicron  "&#927;" ><!-- greek capital letter omicron, U+039F -->
367*8eb4eb26SDawid Weiss<!ENTITY Pi       "&#928;" ><!-- greek capital letter pi, U+03A0 ISOgrk3 -->
368*8eb4eb26SDawid Weiss<!ENTITY Rho      "&#929;" ><!-- greek capital letter rho, U+03A1 -->
369*8eb4eb26SDawid Weiss<!-- there is no Sigmaf, and no U+03A2 character either -->
370*8eb4eb26SDawid Weiss<!ENTITY Sigma    "&#931;" ><!-- greek capital letter sigma, U+03A3 ISOgrk3 -->
371*8eb4eb26SDawid Weiss<!ENTITY Tau      "&#932;" ><!-- greek capital letter tau, U+03A4 -->
372*8eb4eb26SDawid Weiss<!ENTITY Upsilon  "&#933;" ><!-- greek capital letter upsilon,
373*8eb4eb26SDawid Weiss                              U+03A5 ISOgrk3 -->
374*8eb4eb26SDawid Weiss<!ENTITY Phi      "&#934;" ><!-- greek capital letter phi, U+03A6 ISOgrk3 -->
375*8eb4eb26SDawid Weiss<!ENTITY Chi      "&#935;" ><!-- greek capital letter chi, U+03A7 -->
376*8eb4eb26SDawid Weiss<!ENTITY Psi      "&#936;" ><!-- greek capital letter psi, U+03A8 ISOgrk3 -->
377*8eb4eb26SDawid Weiss<!ENTITY Omega    "&#937;" ><!-- greek capital letter omega, U+03A9 ISOgrk3 -->
378*8eb4eb26SDawid Weiss<!ENTITY alpha    "&#945;" ><!-- greek small letter alpha, U+03B1 ISOgrk3 -->
379*8eb4eb26SDawid Weiss<!ENTITY beta     "&#946;" ><!-- greek small letter beta, U+03B2 ISOgrk3 -->
380*8eb4eb26SDawid Weiss<!ENTITY gamma    "&#947;" ><!-- greek small letter gamma, U+03B3 ISOgrk3 -->
381*8eb4eb26SDawid Weiss<!ENTITY delta    "&#948;" ><!-- greek small letter delta, U+03B4 ISOgrk3 -->
382*8eb4eb26SDawid Weiss<!ENTITY epsilon  "&#949;" ><!-- greek small letter epsilon, U+03B5 ISOgrk3 -->
383*8eb4eb26SDawid Weiss<!ENTITY zeta     "&#950;" ><!-- greek small letter zeta, U+03B6 ISOgrk3 -->
384*8eb4eb26SDawid Weiss<!ENTITY eta      "&#951;" ><!-- greek small letter eta, U+03B7 ISOgrk3 -->
385*8eb4eb26SDawid Weiss<!ENTITY theta    "&#952;" ><!-- greek small letter theta, U+03B8 ISOgrk3 -->
386*8eb4eb26SDawid Weiss<!ENTITY iota     "&#953;" ><!-- greek small letter iota, U+03B9 ISOgrk3 -->
387*8eb4eb26SDawid Weiss<!ENTITY kappa    "&#954;" ><!-- greek small letter kappa, U+03BA ISOgrk3 -->
388*8eb4eb26SDawid Weiss<!ENTITY lambda   "&#955;" ><!-- greek small letter lambda, U+03BB ISOgrk3 -->
389*8eb4eb26SDawid Weiss<!ENTITY mu       "&#956;" ><!-- greek small letter mu, U+03BC ISOgrk3 -->
390*8eb4eb26SDawid Weiss<!ENTITY nu       "&#957;" ><!-- greek small letter nu, U+03BD ISOgrk3 -->
391*8eb4eb26SDawid Weiss<!ENTITY xi       "&#958;" ><!-- greek small letter xi, U+03BE ISOgrk3 -->
392*8eb4eb26SDawid Weiss<!ENTITY omicron  "&#959;" ><!-- greek small letter omicron, U+03BF NEW -->
393*8eb4eb26SDawid Weiss<!ENTITY pi       "&#960;" ><!-- greek small letter pi, U+03C0 ISOgrk3 -->
394*8eb4eb26SDawid Weiss<!ENTITY rho      "&#961;" ><!-- greek small letter rho, U+03C1 ISOgrk3 -->
395*8eb4eb26SDawid Weiss<!ENTITY sigmaf   "&#962;" ><!-- greek small letter final sigma, U+03C2 ISOgrk3 -->
396*8eb4eb26SDawid Weiss<!ENTITY sigma    "&#963;" ><!-- greek small letter sigma, U+03C3 ISOgrk3 -->
397*8eb4eb26SDawid Weiss<!ENTITY tau      "&#964;" ><!-- greek small letter tau, U+03C4 ISOgrk3 -->
398*8eb4eb26SDawid Weiss<!ENTITY upsilon  "&#965;" ><!-- greek small letter upsilon, U+03C5 ISOgrk3 -->
399*8eb4eb26SDawid Weiss<!ENTITY phi      "&#966;" ><!-- greek small letter phi, U+03C6 ISOgrk3 -->
400*8eb4eb26SDawid Weiss<!ENTITY chi      "&#967;" ><!-- greek small letter chi, U+03C7 ISOgrk3 -->
401*8eb4eb26SDawid Weiss<!ENTITY psi      "&#968;" ><!-- greek small letter psi, U+03C8 ISOgrk3 -->
402*8eb4eb26SDawid Weiss<!ENTITY omega    "&#969;" ><!-- greek small letter omega, U+03C9 ISOgrk3 -->
403*8eb4eb26SDawid Weiss<!ENTITY thetasym "&#977;" ><!-- greek small letter theta symbol, U+03D1 NEW -->
404*8eb4eb26SDawid Weiss<!ENTITY upsih    "&#978;" ><!-- greek upsilon with hook symbol, U+03D2 NEW -->
405*8eb4eb26SDawid Weiss<!ENTITY piv      "&#982;" ><!-- greek pi symbol, U+03D6 ISOgrk3 -->
406*8eb4eb26SDawid Weiss
407*8eb4eb26SDawid Weiss<!-- General Punctuation -->
408*8eb4eb26SDawid Weiss<!ENTITY bull     "&#8226;" ><!-- bullet = black small circle, U+2022 ISOpub  -->
409*8eb4eb26SDawid Weiss<!-- bullet is NOT the same as bullet operator, U+2219 -->
410*8eb4eb26SDawid Weiss<!ENTITY hellip   "&#8230;" ><!-- horizontal ellipsis = three dot leader, U+2026 ISOpub  -->
411*8eb4eb26SDawid Weiss<!ENTITY prime    "&#8242;" ><!-- prime = minutes = feet, U+2032 ISOtech -->
412*8eb4eb26SDawid Weiss<!ENTITY Prime    "&#8243;" ><!-- double prime = seconds = inches, U+2033 ISOtech -->
413*8eb4eb26SDawid Weiss<!ENTITY oline    "&#8254;" ><!-- overline = spacing overscore, U+203E NEW -->
414*8eb4eb26SDawid Weiss<!ENTITY frasl    "&#8260;" ><!-- fraction slash, U+2044 NEW -->
415*8eb4eb26SDawid Weiss
416*8eb4eb26SDawid Weiss<!-- Letterlike Symbols -->
417*8eb4eb26SDawid Weiss<!ENTITY weierp   "&#8472;" ><!-- script capital P = power set = Weierstrass p, U+2118 ISOamso -->
418*8eb4eb26SDawid Weiss<!ENTITY image    "&#8465;" ><!-- blackletter capital I = imaginary part, U+2111 ISOamso -->
419*8eb4eb26SDawid Weiss<!ENTITY real     "&#8476;" ><!-- blackletter capital R = real part symbol, U+211C ISOamso -->
420*8eb4eb26SDawid Weiss<!ENTITY trade    "&#8482;" ><!-- trade mark sign, U+2122 ISOnum -->
421*8eb4eb26SDawid Weiss<!ENTITY alefsym  "&#8501;" ><!-- alef symbol = first transfinite cardinal, U+2135 NEW -->
422*8eb4eb26SDawid Weiss<!-- alef symbol is NOT the same as hebrew letter alef, U+05D0 although
423*8eb4eb26SDawid Weiss     the same glyph could be used to depict both characters -->
424*8eb4eb26SDawid Weiss
425*8eb4eb26SDawid Weiss<!-- Arrows -->
426*8eb4eb26SDawid Weiss<!ENTITY larr     "&#8592;" ><!-- leftwards arrow, U+2190 ISOnum -->
427*8eb4eb26SDawid Weiss<!ENTITY uarr     "&#8593;" ><!-- upwards arrow, U+2191 ISOnum-->
428*8eb4eb26SDawid Weiss<!ENTITY rarr     "&#8594;" ><!-- rightwards arrow, U+2192 ISOnum -->
429*8eb4eb26SDawid Weiss<!ENTITY darr     "&#8595;" ><!-- downwards arrow, U+2193 ISOnum -->
430*8eb4eb26SDawid Weiss<!ENTITY harr     "&#8596;" ><!-- left right arrow, U+2194 ISOamsa -->
431*8eb4eb26SDawid Weiss<!ENTITY crarr    "&#8629;" ><!-- downwards arrow with corner leftwards
432*8eb4eb26SDawid Weiss                               = carriage return, U+21B5 NEW -->
433*8eb4eb26SDawid Weiss<!ENTITY lArr     "&#8656;" ><!-- leftwards double arrow, U+21D0 ISOtech -->
434*8eb4eb26SDawid Weiss<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
435*8eb4eb26SDawid Weiss    but also does not have any other character for that function. So ? lArr can
436*8eb4eb26SDawid Weiss    be used for 'is implied by' as ISOtech suggests -->
437*8eb4eb26SDawid Weiss<!ENTITY uArr     "&#8657;" ><!-- upwards double arrow, U+21D1 ISOamsa -->
438*8eb4eb26SDawid Weiss<!ENTITY rArr     "&#8658;" ><!-- rightwards double arrow, U+21D2 ISOtech -->
439*8eb4eb26SDawid Weiss<!-- Unicode does not say this is the 'implies' character but does not have
440*8eb4eb26SDawid Weiss     another character with this function so ?
441*8eb4eb26SDawid Weiss     rArr can be used for 'implies' as ISOtech suggests -->
442*8eb4eb26SDawid Weiss<!ENTITY dArr     "&#8659;" ><!-- downwards double arrow, U+21D3 ISOamsa -->
443*8eb4eb26SDawid Weiss<!ENTITY hArr     "&#8660;" ><!-- left right double arrow, U+21D4 ISOamsa -->
444*8eb4eb26SDawid Weiss
445*8eb4eb26SDawid Weiss<!-- Mathematical Operators -->
446*8eb4eb26SDawid Weiss<!ENTITY forall   "&#8704;" ><!-- for all, U+2200 ISOtech -->
447*8eb4eb26SDawid Weiss<!ENTITY part     "&#8706;" ><!-- partial differential, U+2202 ISOtech  -->
448*8eb4eb26SDawid Weiss<!ENTITY exist    "&#8707;" ><!-- there exists, U+2203 ISOtech -->
449*8eb4eb26SDawid Weiss<!ENTITY empty    "&#8709;" ><!-- empty set = null set, U+2205 ISOamso -->
450*8eb4eb26SDawid Weiss<!ENTITY nabla    "&#8711;" ><!-- nabla = backward difference, U+2207 ISOtech -->
451*8eb4eb26SDawid Weiss<!ENTITY isin     "&#8712;" ><!-- element of, U+2208 ISOtech -->
452*8eb4eb26SDawid Weiss<!ENTITY notin    "&#8713;" ><!-- not an element of, U+2209 ISOtech -->
453*8eb4eb26SDawid Weiss<!ENTITY ni       "&#8715;" ><!-- contains as member, U+220B ISOtech -->
454*8eb4eb26SDawid Weiss<!-- should there be a more memorable name than 'ni'? -->
455*8eb4eb26SDawid Weiss<!ENTITY prod     "&#8719;" ><!-- n-ary product = product sign, U+220F ISOamsb -->
456*8eb4eb26SDawid Weiss<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
457*8eb4eb26SDawid Weiss     the same glyph might be used for both -->
458*8eb4eb26SDawid Weiss<!ENTITY sum      "&#8721;" ><!-- n-ary sumation, U+2211 ISOamsb -->
459*8eb4eb26SDawid Weiss<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
460*8eb4eb26SDawid Weiss     though the same glyph might be used for both -->
461*8eb4eb26SDawid Weiss<!ENTITY minus    "&#8722;" ><!-- minus sign, U+2212 ISOtech -->
462*8eb4eb26SDawid Weiss<!ENTITY lowast   "&#8727;" ><!-- asterisk operator, U+2217 ISOtech -->
463*8eb4eb26SDawid Weiss<!ENTITY radic    "&#8730;" ><!-- square root = radical sign, U+221A ISOtech -->
464*8eb4eb26SDawid Weiss<!ENTITY prop     "&#8733;" ><!-- proportional to, U+221D ISOtech -->
465*8eb4eb26SDawid Weiss<!ENTITY infin    "&#8734;" ><!-- infinity, U+221E ISOtech -->
466*8eb4eb26SDawid Weiss<!ENTITY ang      "&#8736;" ><!-- angle, U+2220 ISOamso -->
467*8eb4eb26SDawid Weiss<!ENTITY and      "&#8743;" ><!-- logical and = wedge, U+2227 ISOtech -->
468*8eb4eb26SDawid Weiss<!ENTITY or       "&#8744;" ><!-- logical or = vee, U+2228 ISOtech -->
469*8eb4eb26SDawid Weiss<!ENTITY cap      "&#8745;" ><!-- intersection = cap, U+2229 ISOtech -->
470*8eb4eb26SDawid Weiss<!ENTITY cup      "&#8746;" ><!-- union = cup, U+222A ISOtech -->
471*8eb4eb26SDawid Weiss<!ENTITY int      "&#8747;" ><!-- integral, U+222B ISOtech -->
472*8eb4eb26SDawid Weiss<!ENTITY there4   "&#8756;" ><!-- therefore, U+2234 ISOtech -->
473*8eb4eb26SDawid Weiss<!ENTITY sim      "&#8764;" ><!-- tilde operator = varies with = similar to, U+223C ISOtech -->
474*8eb4eb26SDawid Weiss<!-- tilde operator is NOT the same character as the tilde, U+007E,
475*8eb4eb26SDawid Weiss     although the same glyph might be used to represent both  -->
476*8eb4eb26SDawid Weiss<!ENTITY cong     "&#8773;" ><!-- approximately equal to, U+2245 ISOtech -->
477*8eb4eb26SDawid Weiss<!ENTITY asymp    "&#8776;" ><!-- almost equal to = asymptotic to, U+2248 ISOamsr -->
478*8eb4eb26SDawid Weiss<!ENTITY ne       "&#8800;" ><!-- not equal to, U+2260 ISOtech -->
479*8eb4eb26SDawid Weiss<!ENTITY equiv    "&#8801;" ><!-- identical to, U+2261 ISOtech -->
480*8eb4eb26SDawid Weiss<!ENTITY le       "&#8804;" ><!-- less-than or equal to, U+2264 ISOtech -->
481*8eb4eb26SDawid Weiss<!ENTITY ge       "&#8805;" ><!-- greater-than or equal to, U+2265 ISOtech -->
482*8eb4eb26SDawid Weiss<!ENTITY sub      "&#8834;" ><!-- subset of, U+2282 ISOtech -->
483*8eb4eb26SDawid Weiss<!ENTITY sup      "&#8835;" ><!-- superset of, U+2283 ISOtech -->
484*8eb4eb26SDawid Weiss<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
485*8eb4eb26SDawid Weiss     font encoding and is not included. Should it be, for symmetry?
486*8eb4eb26SDawid Weiss     It is in ISOamsn  -->
487*8eb4eb26SDawid Weiss<!ENTITY nsub     "&#8836;" ><!-- not a subset of, U+2284 ISOamsn -->
488*8eb4eb26SDawid Weiss<!ENTITY sube     "&#8838;" ><!-- subset of or equal to, U+2286 ISOtech -->
489*8eb4eb26SDawid Weiss<!ENTITY supe     "&#8839;" ><!-- superset of or equal to, U+2287 ISOtech -->
490*8eb4eb26SDawid Weiss<!ENTITY oplus    "&#8853;" ><!-- circled plus = direct sum, U+2295 ISOamsb -->
491*8eb4eb26SDawid Weiss<!ENTITY otimes   "&#8855;" ><!-- circled times = vector product, U+2297 ISOamsb -->
492*8eb4eb26SDawid Weiss<!ENTITY perp     "&#8869;" ><!-- up tack = orthogonal to = perpendicular, U+22A5 ISOtech -->
493*8eb4eb26SDawid Weiss<!ENTITY sdot     "&#8901;" ><!-- dot operator, U+22C5 ISOamsb -->
494*8eb4eb26SDawid Weiss<!-- dot operator is NOT the same character as U+00B7 middle dot -->
495*8eb4eb26SDawid Weiss
496*8eb4eb26SDawid Weiss<!-- Miscellaneous Technical -->
497*8eb4eb26SDawid Weiss<!ENTITY lceil    "&#8968;" ><!-- left ceiling = apl upstile, U+2308 ISOamsc  -->
498*8eb4eb26SDawid Weiss<!ENTITY rceil    "&#8969;" ><!-- right ceiling, U+2309 ISOamsc  -->
499*8eb4eb26SDawid Weiss<!ENTITY lfloor   "&#8970;" ><!-- left floor = apl downstile, U+230A ISOamsc  -->
500*8eb4eb26SDawid Weiss<!ENTITY rfloor   "&#8971;" ><!-- right floor, U+230B ISOamsc  -->
501*8eb4eb26SDawid Weiss<!ENTITY lang     "&#9001;" ><!-- left-pointing angle bracket = bra, U+2329 ISOtech -->
502*8eb4eb26SDawid Weiss<!-- lang is NOT the same character as U+003C 'less than'
503*8eb4eb26SDawid Weiss     or U+2039 'single left-pointing angle quotation mark' -->
504*8eb4eb26SDawid Weiss<!ENTITY rang     "&#9002;" ><!-- right-pointing angle bracket = ket, U+232A ISOtech -->
505*8eb4eb26SDawid Weiss<!-- rang is NOT the same character as U+003E 'greater than'
506*8eb4eb26SDawid Weiss     or U+203A 'single right-pointing angle quotation mark' -->
507*8eb4eb26SDawid Weiss
508*8eb4eb26SDawid Weiss<!-- Geometric Shapes -->
509*8eb4eb26SDawid Weiss<!ENTITY loz      "&#9674;" ><!-- lozenge, U+25CA ISOpub -->
510*8eb4eb26SDawid Weiss
511*8eb4eb26SDawid Weiss<!-- Miscellaneous Symbols -->
512*8eb4eb26SDawid Weiss<!ENTITY spades   "&#9824;" ><!-- black spade suit, U+2660 ISOpub -->
513*8eb4eb26SDawid Weiss<!-- black here seems to mean filled as opposed to hollow -->
514*8eb4eb26SDawid Weiss<!ENTITY clubs    "&#9827;" ><!-- black club suit = shamrock, U+2663 ISOpub -->
515*8eb4eb26SDawid Weiss<!ENTITY hearts   "&#9829;" ><!-- black heart suit = valentine, U+2665 ISOpub -->
516*8eb4eb26SDawid Weiss<!ENTITY diams    "&#9830;" ><!-- black diamond suit, U+2666 ISOpub -->
517*8eb4eb26SDawid Weiss
518*8eb4eb26SDawid Weiss<!-- end of xhtml-symbol.ent -->
519*8eb4eb26SDawid Weiss"""
520*8eb4eb26SDawid Weiss  return text
521*8eb4eb26SDawid Weiss
522*8eb4eb26SDawid Weissdef get_apache_license():
523*8eb4eb26SDawid Weiss  license = r"""/*
524*8eb4eb26SDawid Weiss * Licensed to the Apache Software Foundation (ASF) under one or more
525*8eb4eb26SDawid Weiss * contributor license agreements.  See the NOTICE file distributed with
526*8eb4eb26SDawid Weiss * this work for additional information regarding copyright ownership.
527*8eb4eb26SDawid Weiss * The ASF licenses this file to You under the Apache License, Version 2.0
528*8eb4eb26SDawid Weiss * (the "License"); you may not use this file except in compliance with
529*8eb4eb26SDawid Weiss * the License.  You may obtain a copy of the License at
530*8eb4eb26SDawid Weiss *
531*8eb4eb26SDawid Weiss *     http://www.apache.org/licenses/LICENSE-2.0
532*8eb4eb26SDawid Weiss *
533*8eb4eb26SDawid Weiss * Unless required by applicable law or agreed to in writing, software
534*8eb4eb26SDawid Weiss * distributed under the License is distributed on an "AS IS" BASIS,
535*8eb4eb26SDawid Weiss * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
536*8eb4eb26SDawid Weiss * See the License for the specific language governing permissions and
537*8eb4eb26SDawid Weiss * limitations under the License.
538*8eb4eb26SDawid Weiss */
539*8eb4eb26SDawid Weiss
540*8eb4eb26SDawid Weiss"""
541*8eb4eb26SDawid Weiss  return license
542*8eb4eb26SDawid Weiss
543*8eb4eb26SDawid Weissmain()
544