1*8eb4eb26SDawid Weiss# Licensed to the Apache Software Foundation (ASF) under one or more 2*8eb4eb26SDawid Weiss# contributor license agreements. See the NOTICE file distributed with 3*8eb4eb26SDawid Weiss# this work for additional information regarding copyright ownership. 4*8eb4eb26SDawid Weiss# The ASF licenses this file to You under the Apache License, Version 2.0 5*8eb4eb26SDawid Weiss# (the "License"); you may not use this file except in compliance with 6*8eb4eb26SDawid Weiss# the License. You may obtain a copy of the License at 7*8eb4eb26SDawid Weiss# 8*8eb4eb26SDawid Weiss# http://www.apache.org/licenses/LICENSE-2.0 9*8eb4eb26SDawid Weiss# 10*8eb4eb26SDawid Weiss# Unless required by applicable law or agreed to in writing, software 11*8eb4eb26SDawid Weiss# distributed under the License is distributed on an "AS IS" BASIS, 12*8eb4eb26SDawid Weiss# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*8eb4eb26SDawid Weiss# See the License for the specific language governing permissions and 14*8eb4eb26SDawid Weiss# limitations under the License. 15*8eb4eb26SDawid Weiss 16*8eb4eb26SDawid Weissimport re 17*8eb4eb26SDawid Weissimport sys 18*8eb4eb26SDawid Weiss 19*8eb4eb26SDawid Weiss# A simple python script to generate an HTML entity map and a regex alternation 20*8eb4eb26SDawid Weiss# for inclusion in HTMLStripCharFilter.jflex. 21*8eb4eb26SDawid Weiss 22*8eb4eb26SDawid Weissdef main(): 23*8eb4eb26SDawid Weiss with open(sys.argv[1], 'w') as f: 24*8eb4eb26SDawid Weiss sys.stdout = f 25*8eb4eb26SDawid Weiss 26*8eb4eb26SDawid Weiss print(get_apache_license()) 27*8eb4eb26SDawid Weiss codes = {} 28*8eb4eb26SDawid Weiss regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"') 29*8eb4eb26SDawid Weiss for line in get_entity_text().split('\n'): 30*8eb4eb26SDawid Weiss match = regex.match(line) 31*8eb4eb26SDawid Weiss if match: 32*8eb4eb26SDawid Weiss key = match.group(1) 33*8eb4eb26SDawid Weiss if key == 'quot': codes[key] = r'\"' 34*8eb4eb26SDawid Weiss elif key == 'nbsp': codes[key] = ' '; 35*8eb4eb26SDawid Weiss else : codes[key] = r'\u%04X' % int(match.group(2)) 36*8eb4eb26SDawid Weiss 37*8eb4eb26SDawid Weiss keys = sorted(codes) 38*8eb4eb26SDawid Weiss 39*8eb4eb26SDawid Weiss first_entry = True 40*8eb4eb26SDawid Weiss output_line = 'CharacterEntities = ( ' 41*8eb4eb26SDawid Weiss for key in keys: 42*8eb4eb26SDawid Weiss new_entry = ('"%s"' if first_entry else ' | "%s"') % key 43*8eb4eb26SDawid Weiss first_entry = False 44*8eb4eb26SDawid Weiss if len(output_line) + len(new_entry) >= 80: 45*8eb4eb26SDawid Weiss print(output_line) 46*8eb4eb26SDawid Weiss output_line = ' ' 47*8eb4eb26SDawid Weiss output_line += new_entry 48*8eb4eb26SDawid Weiss if key in ('quot','copy','gt','lt','reg','amp'): 49*8eb4eb26SDawid Weiss new_entry = ' | "%s"' % key.upper() 50*8eb4eb26SDawid Weiss if len(output_line) + len(new_entry) >= 80: 51*8eb4eb26SDawid Weiss print(output_line) 52*8eb4eb26SDawid Weiss output_line = ' ' 53*8eb4eb26SDawid Weiss output_line += new_entry 54*8eb4eb26SDawid Weiss print(output_line, ')') 55*8eb4eb26SDawid Weiss 56*8eb4eb26SDawid Weiss print('%{') 57*8eb4eb26SDawid Weiss print(' private static final Map<String,String> upperCaseVariantsAccepted') 58*8eb4eb26SDawid Weiss print(' = new HashMap<>();') 59*8eb4eb26SDawid Weiss print(' static {') 60*8eb4eb26SDawid Weiss print(' upperCaseVariantsAccepted.put("quot", "QUOT");') 61*8eb4eb26SDawid Weiss print(' upperCaseVariantsAccepted.put("copy", "COPY");') 62*8eb4eb26SDawid Weiss print(' upperCaseVariantsAccepted.put("gt", "GT");') 63*8eb4eb26SDawid Weiss print(' upperCaseVariantsAccepted.put("lt", "LT");') 64*8eb4eb26SDawid Weiss print(' upperCaseVariantsAccepted.put("reg", "REG");') 65*8eb4eb26SDawid Weiss print(' upperCaseVariantsAccepted.put("amp", "AMP");') 66*8eb4eb26SDawid Weiss print(' }') 67*8eb4eb26SDawid Weiss print(' private static final CharArrayMap<Character> entityValues') 68*8eb4eb26SDawid Weiss print(' = new CharArrayMap<>(%i, false);' % len(keys)) 69*8eb4eb26SDawid Weiss print(' static {') 70*8eb4eb26SDawid Weiss print(' String[] entities = {') 71*8eb4eb26SDawid Weiss output_line = ' ' 72*8eb4eb26SDawid Weiss for key in keys: 73*8eb4eb26SDawid Weiss new_entry = ' "%s", "%s",' % (key, codes[key]) 74*8eb4eb26SDawid Weiss if len(output_line) + len(new_entry) >= 80: 75*8eb4eb26SDawid Weiss print(output_line) 76*8eb4eb26SDawid Weiss output_line = ' ' 77*8eb4eb26SDawid Weiss output_line += new_entry 78*8eb4eb26SDawid Weiss print(output_line[:-1]) 79*8eb4eb26SDawid Weiss print(' };') 80*8eb4eb26SDawid Weiss print(' for (int i = 0 ; i < entities.length ; i += 2) {') 81*8eb4eb26SDawid Weiss print(' Character value = entities[i + 1].charAt(0);') 82*8eb4eb26SDawid Weiss print(' entityValues.put(entities[i], value);') 83*8eb4eb26SDawid Weiss print(' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);') 84*8eb4eb26SDawid Weiss print(' if (upperCaseVariant != null) {') 85*8eb4eb26SDawid Weiss print(' entityValues.put(upperCaseVariant, value);') 86*8eb4eb26SDawid Weiss print(' }') 87*8eb4eb26SDawid Weiss print(' }') 88*8eb4eb26SDawid Weiss print(" }") 89*8eb4eb26SDawid Weiss print("%}") 90*8eb4eb26SDawid Weiss 91*8eb4eb26SDawid Weissdef get_entity_text(): 92*8eb4eb26SDawid Weiss# The text below is taken verbatim from 93*8eb4eb26SDawid Weiss# <http://www.w3.org/TR/REC-html40/sgml/entities.html>: 94*8eb4eb26SDawid Weiss text = r""" 95*8eb4eb26SDawid WeissF.1. XHTML Character Entities 96*8eb4eb26SDawid Weiss 97*8eb4eb26SDawid WeissXHTML DTDs make available a standard collection of named character entities. Those entities are defined in this section. 98*8eb4eb26SDawid WeissF.1.1. XHTML Latin 1 Character Entities 99*8eb4eb26SDawid Weiss 100*8eb4eb26SDawid WeissYou can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-lat1.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent. 101*8eb4eb26SDawid Weiss 102*8eb4eb26SDawid Weiss<!-- ...................................................................... --> 103*8eb4eb26SDawid Weiss<!-- XML-compatible ISO Latin 1 Character Entity Set for XHTML ............ --> 104*8eb4eb26SDawid Weiss<!-- file: xhtml-lat1.ent 105*8eb4eb26SDawid Weiss 106*8eb4eb26SDawid Weiss Typical invocation: 107*8eb4eb26SDawid Weiss 108*8eb4eb26SDawid Weiss <!ENTITY % xhtml-lat1 109*8eb4eb26SDawid Weiss PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" 110*8eb4eb26SDawid Weiss "xhtml-lat1.ent" > 111*8eb4eb26SDawid Weiss %xhtml-lat1; 112*8eb4eb26SDawid Weiss 113*8eb4eb26SDawid Weiss This DTD module is identified by the PUBLIC and SYSTEM identifiers: 114*8eb4eb26SDawid Weiss 115*8eb4eb26SDawid Weiss PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" 116*8eb4eb26SDawid Weiss SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent" 117*8eb4eb26SDawid Weiss 118*8eb4eb26SDawid Weiss Revision: Id: xhtml-lat1.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI 119*8eb4eb26SDawid Weiss 120*8eb4eb26SDawid Weiss Portions (C) International Organization for Standardization 1986: 121*8eb4eb26SDawid Weiss Permission to copy in any form is granted for use with conforming 122*8eb4eb26SDawid Weiss SGML systems and applications as defined in ISO 8879, provided 123*8eb4eb26SDawid Weiss this notice is included in all copies. 124*8eb4eb26SDawid Weiss--> 125*8eb4eb26SDawid Weiss 126*8eb4eb26SDawid Weiss<!ENTITY nbsp " " ><!-- no-break space = non-breaking space, U+00A0 ISOnum --> 127*8eb4eb26SDawid Weiss<!ENTITY iexcl "¡" ><!-- inverted exclamation mark, U+00A1 ISOnum --> 128*8eb4eb26SDawid Weiss<!ENTITY cent "¢" ><!-- cent sign, U+00A2 ISOnum --> 129*8eb4eb26SDawid Weiss<!ENTITY pound "£" ><!-- pound sign, U+00A3 ISOnum --> 130*8eb4eb26SDawid Weiss<!ENTITY curren "¤" ><!-- currency sign, U+00A4 ISOnum --> 131*8eb4eb26SDawid Weiss<!ENTITY yen "¥" ><!-- yen sign = yuan sign, U+00A5 ISOnum --> 132*8eb4eb26SDawid Weiss<!ENTITY brvbar "¦" ><!-- broken bar = broken vertical bar, U+00A6 ISOnum --> 133*8eb4eb26SDawid Weiss<!ENTITY sect "§" ><!-- section sign, U+00A7 ISOnum --> 134*8eb4eb26SDawid Weiss<!ENTITY uml "¨" ><!-- diaeresis = spacing diaeresis, U+00A8 ISOdia --> 135*8eb4eb26SDawid Weiss<!ENTITY copy "©" ><!-- copyright sign, U+00A9 ISOnum --> 136*8eb4eb26SDawid Weiss<!ENTITY ordf "ª" ><!-- feminine ordinal indicator, U+00AA ISOnum --> 137*8eb4eb26SDawid Weiss<!ENTITY laquo "«" ><!-- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum --> 138*8eb4eb26SDawid Weiss<!ENTITY not "¬" ><!-- not sign, U+00AC ISOnum --> 139*8eb4eb26SDawid Weiss<!ENTITY shy "­" ><!-- soft hyphen = discretionary hyphen, U+00AD ISOnum --> 140*8eb4eb26SDawid Weiss<!ENTITY reg "®" ><!-- registered sign = registered trade mark sign, U+00AE ISOnum --> 141*8eb4eb26SDawid Weiss<!ENTITY macr "¯" ><!-- macron = spacing macron = overline = APL overbar, U+00AF ISOdia --> 142*8eb4eb26SDawid Weiss<!ENTITY deg "°" ><!-- degree sign, U+00B0 ISOnum --> 143*8eb4eb26SDawid Weiss<!ENTITY plusmn "±" ><!-- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum --> 144*8eb4eb26SDawid Weiss<!ENTITY sup2 "²" ><!-- superscript two = superscript digit two = squared, U+00B2 ISOnum --> 145*8eb4eb26SDawid Weiss<!ENTITY sup3 "³" ><!-- superscript three = superscript digit three = cubed, U+00B3 ISOnum --> 146*8eb4eb26SDawid Weiss<!ENTITY acute "´" ><!-- acute accent = spacing acute, U+00B4 ISOdia --> 147*8eb4eb26SDawid Weiss<!ENTITY micro "µ" ><!-- micro sign, U+00B5 ISOnum --> 148*8eb4eb26SDawid Weiss<!ENTITY para "¶" ><!-- pilcrow sign = paragraph sign, U+00B6 ISOnum --> 149*8eb4eb26SDawid Weiss<!ENTITY middot "·" ><!-- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum --> 150*8eb4eb26SDawid Weiss<!ENTITY cedil "¸" ><!-- cedilla = spacing cedilla, U+00B8 ISOdia --> 151*8eb4eb26SDawid Weiss<!ENTITY sup1 "¹" ><!-- superscript one = superscript digit one, U+00B9 ISOnum --> 152*8eb4eb26SDawid Weiss<!ENTITY ordm "º" ><!-- masculine ordinal indicator, U+00BA ISOnum --> 153*8eb4eb26SDawid Weiss<!ENTITY raquo "»" ><!-- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum --> 154*8eb4eb26SDawid Weiss<!ENTITY frac14 "¼" ><!-- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum --> 155*8eb4eb26SDawid Weiss<!ENTITY frac12 "½" ><!-- vulgar fraction one half = fraction one half, U+00BD ISOnum --> 156*8eb4eb26SDawid Weiss<!ENTITY frac34 "¾" ><!-- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum --> 157*8eb4eb26SDawid Weiss<!ENTITY iquest "¿" ><!-- inverted question mark = turned question mark, U+00BF ISOnum --> 158*8eb4eb26SDawid Weiss<!ENTITY Agrave "À" ><!-- latin capital A with grave = latin capital A grave, U+00C0 ISOlat1 --> 159*8eb4eb26SDawid Weiss<!ENTITY Aacute "Á" ><!-- latin capital A with acute, U+00C1 ISOlat1 --> 160*8eb4eb26SDawid Weiss<!ENTITY Acirc "Â" ><!-- latin capital A with circumflex, U+00C2 ISOlat1 --> 161*8eb4eb26SDawid Weiss<!ENTITY Atilde "Ã" ><!-- latin capital A with tilde, U+00C3 ISOlat1 --> 162*8eb4eb26SDawid Weiss<!ENTITY Auml "Ä" ><!-- latin capital A with diaeresis, U+00C4 ISOlat1 --> 163*8eb4eb26SDawid Weiss<!ENTITY Aring "Å" ><!-- latin capital A with ring above = latin capital A ring, U+00C5 ISOlat1 --> 164*8eb4eb26SDawid Weiss<!ENTITY AElig "Æ" ><!-- latin capital AE = latin capital ligature AE, U+00C6 ISOlat1 --> 165*8eb4eb26SDawid Weiss<!ENTITY Ccedil "Ç" ><!-- latin capital C with cedilla, U+00C7 ISOlat1 --> 166*8eb4eb26SDawid Weiss<!ENTITY Egrave "È" ><!-- latin capital E with grave, U+00C8 ISOlat1 --> 167*8eb4eb26SDawid Weiss<!ENTITY Eacute "É" ><!-- latin capital E with acute, U+00C9 ISOlat1 --> 168*8eb4eb26SDawid Weiss<!ENTITY Ecirc "Ê" ><!-- latin capital E with circumflex, U+00CA ISOlat1 --> 169*8eb4eb26SDawid Weiss<!ENTITY Euml "Ë" ><!-- latin capital E with diaeresis, U+00CB ISOlat1 --> 170*8eb4eb26SDawid Weiss<!ENTITY Igrave "Ì" ><!-- latin capital I with grave, U+00CC ISOlat1 --> 171*8eb4eb26SDawid Weiss<!ENTITY Iacute "Í" ><!-- latin capital I with acute, U+00CD ISOlat1 --> 172*8eb4eb26SDawid Weiss<!ENTITY Icirc "Î" ><!-- latin capital I with circumflex, U+00CE ISOlat1 --> 173*8eb4eb26SDawid Weiss<!ENTITY Iuml "Ï" ><!-- latin capital I with diaeresis, U+00CF ISOlat1 --> 174*8eb4eb26SDawid Weiss<!ENTITY ETH "Ð" ><!-- latin capital ETH, U+00D0 ISOlat1 --> 175*8eb4eb26SDawid Weiss<!ENTITY Ntilde "Ñ" ><!-- latin capital N with tilde, U+00D1 ISOlat1 --> 176*8eb4eb26SDawid Weiss<!ENTITY Ograve "Ò" ><!-- latin capital O with grave, U+00D2 ISOlat1 --> 177*8eb4eb26SDawid Weiss<!ENTITY Oacute "Ó" ><!-- latin capital O with acute, U+00D3 ISOlat1 --> 178*8eb4eb26SDawid Weiss<!ENTITY Ocirc "Ô" ><!-- latin capital O with circumflex, U+00D4 ISOlat1 --> 179*8eb4eb26SDawid Weiss<!ENTITY Otilde "Õ" ><!-- latin capital O with tilde, U+00D5 ISOlat1 --> 180*8eb4eb26SDawid Weiss<!ENTITY Ouml "Ö" ><!-- latin capital O with diaeresis, U+00D6 ISOlat1 --> 181*8eb4eb26SDawid Weiss<!ENTITY times "×" ><!-- multiplication sign, U+00D7 ISOnum --> 182*8eb4eb26SDawid Weiss<!ENTITY Oslash "Ø" ><!-- latin capital O with stroke = latin capital O slash, U+00D8 ISOlat1 --> 183*8eb4eb26SDawid Weiss<!ENTITY Ugrave "Ù" ><!-- latin capital U with grave, U+00D9 ISOlat1 --> 184*8eb4eb26SDawid Weiss<!ENTITY Uacute "Ú" ><!-- latin capital U with acute, U+00DA ISOlat1 --> 185*8eb4eb26SDawid Weiss<!ENTITY Ucirc "Û" ><!-- latin capital U with circumflex, U+00DB ISOlat1 --> 186*8eb4eb26SDawid Weiss<!ENTITY Uuml "Ü" ><!-- latin capital U with diaeresis, U+00DC ISOlat1 --> 187*8eb4eb26SDawid Weiss<!ENTITY Yacute "Ý" ><!-- latin capital Y with acute, U+00DD ISOlat1 --> 188*8eb4eb26SDawid Weiss<!ENTITY THORN "Þ" ><!-- latin capital THORN, U+00DE ISOlat1 --> 189*8eb4eb26SDawid Weiss<!ENTITY szlig "ß" ><!-- latin small sharp s = ess-zed, U+00DF ISOlat1 --> 190*8eb4eb26SDawid Weiss<!ENTITY agrave "à" ><!-- latin small a with grave = latin small a grave, U+00E0 ISOlat1 --> 191*8eb4eb26SDawid Weiss<!ENTITY aacute "á" ><!-- latin small a with acute, U+00E1 ISOlat1 --> 192*8eb4eb26SDawid Weiss<!ENTITY acirc "â" ><!-- latin small a with circumflex, U+00E2 ISOlat1 --> 193*8eb4eb26SDawid Weiss<!ENTITY atilde "ã" ><!-- latin small a with tilde, U+00E3 ISOlat1 --> 194*8eb4eb26SDawid Weiss<!ENTITY auml "ä" ><!-- latin small a with diaeresis, U+00E4 ISOlat1 --> 195*8eb4eb26SDawid Weiss<!ENTITY aring "å" ><!-- latin small a with ring above = latin small a ring, U+00E5 ISOlat1 --> 196*8eb4eb26SDawid Weiss<!ENTITY aelig "æ" ><!-- latin small ae = latin small ligature ae, U+00E6 ISOlat1 --> 197*8eb4eb26SDawid Weiss<!ENTITY ccedil "ç" ><!-- latin small c with cedilla, U+00E7 ISOlat1 --> 198*8eb4eb26SDawid Weiss<!ENTITY egrave "è" ><!-- latin small e with grave, U+00E8 ISOlat1 --> 199*8eb4eb26SDawid Weiss<!ENTITY eacute "é" ><!-- latin small e with acute, U+00E9 ISOlat1 --> 200*8eb4eb26SDawid Weiss<!ENTITY ecirc "ê" ><!-- latin small e with circumflex, U+00EA ISOlat1 --> 201*8eb4eb26SDawid Weiss<!ENTITY euml "ë" ><!-- latin small e with diaeresis, U+00EB ISOlat1 --> 202*8eb4eb26SDawid Weiss<!ENTITY igrave "ì" ><!-- latin small i with grave, U+00EC ISOlat1 --> 203*8eb4eb26SDawid Weiss<!ENTITY iacute "í" ><!-- latin small i with acute, U+00ED ISOlat1 --> 204*8eb4eb26SDawid Weiss<!ENTITY icirc "î" ><!-- latin small i with circumflex, U+00EE ISOlat1 --> 205*8eb4eb26SDawid Weiss<!ENTITY iuml "ï" ><!-- latin small i with diaeresis, U+00EF ISOlat1 --> 206*8eb4eb26SDawid Weiss<!ENTITY eth "ð" ><!-- latin small eth, U+00F0 ISOlat1 --> 207*8eb4eb26SDawid Weiss<!ENTITY ntilde "ñ" ><!-- latin small n with tilde, U+00F1 ISOlat1 --> 208*8eb4eb26SDawid Weiss<!ENTITY ograve "ò" ><!-- latin small o with grave, U+00F2 ISOlat1 --> 209*8eb4eb26SDawid Weiss<!ENTITY oacute "ó" ><!-- latin small o with acute, U+00F3 ISOlat1 --> 210*8eb4eb26SDawid Weiss<!ENTITY ocirc "ô" ><!-- latin small o with circumflex, U+00F4 ISOlat1 --> 211*8eb4eb26SDawid Weiss<!ENTITY otilde "õ" ><!-- latin small o with tilde, U+00F5 ISOlat1 --> 212*8eb4eb26SDawid Weiss<!ENTITY ouml "ö" ><!-- latin small o with diaeresis, U+00F6 ISOlat1 --> 213*8eb4eb26SDawid Weiss<!ENTITY divide "÷" ><!-- division sign, U+00F7 ISOnum --> 214*8eb4eb26SDawid Weiss<!ENTITY oslash "ø" ><!-- latin small o with stroke, = latin small o slash, U+00F8 ISOlat1 --> 215*8eb4eb26SDawid Weiss<!ENTITY ugrave "ù" ><!-- latin small u with grave, U+00F9 ISOlat1 --> 216*8eb4eb26SDawid Weiss<!ENTITY uacute "ú" ><!-- latin small u with acute, U+00FA ISOlat1 --> 217*8eb4eb26SDawid Weiss<!ENTITY ucirc "û" ><!-- latin small u with circumflex, U+00FB ISOlat1 --> 218*8eb4eb26SDawid Weiss<!ENTITY uuml "ü" ><!-- latin small u with diaeresis, U+00FC ISOlat1 --> 219*8eb4eb26SDawid Weiss<!ENTITY yacute "ý" ><!-- latin small y with acute, U+00FD ISOlat1 --> 220*8eb4eb26SDawid Weiss<!ENTITY thorn "þ" ><!-- latin small thorn with, U+00FE ISOlat1 --> 221*8eb4eb26SDawid Weiss<!ENTITY yuml "ÿ" ><!-- latin small y with diaeresis, U+00FF ISOlat1 --> 222*8eb4eb26SDawid Weiss<!-- end of xhtml-lat1.ent --> 223*8eb4eb26SDawid Weiss 224*8eb4eb26SDawid WeissF.1.2. XHTML Special Characters 225*8eb4eb26SDawid Weiss 226*8eb4eb26SDawid WeissYou can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-special.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-special.ent. 227*8eb4eb26SDawid Weiss 228*8eb4eb26SDawid Weiss<!-- ...................................................................... --> 229*8eb4eb26SDawid Weiss<!-- XML-compatible ISO Special Character Entity Set for XHTML ............ --> 230*8eb4eb26SDawid Weiss<!-- file: xhtml-special.ent 231*8eb4eb26SDawid Weiss 232*8eb4eb26SDawid Weiss Typical invocation: 233*8eb4eb26SDawid Weiss 234*8eb4eb26SDawid Weiss <!ENTITY % xhtml-special 235*8eb4eb26SDawid Weiss PUBLIC "-//W3C//ENTITIES Special for XHTML//EN" 236*8eb4eb26SDawid Weiss "xhtml-special.ent" > 237*8eb4eb26SDawid Weiss %xhtml-special; 238*8eb4eb26SDawid Weiss 239*8eb4eb26SDawid Weiss This DTD module is identified by the PUBLIC and SYSTEM identifiers: 240*8eb4eb26SDawid Weiss 241*8eb4eb26SDawid Weiss PUBLIC "-//W3C//ENTITIES Special for XHTML//EN" 242*8eb4eb26SDawid Weiss SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-special.ent" 243*8eb4eb26SDawid Weiss 244*8eb4eb26SDawid Weiss Revision: Id: xhtml-special.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI 245*8eb4eb26SDawid Weiss 246*8eb4eb26SDawid Weiss Portions (C) International Organization for Standardization 1986: 247*8eb4eb26SDawid Weiss Permission to copy in any form is granted for use with conforming 248*8eb4eb26SDawid Weiss SGML systems and applications as defined in ISO 8879, provided 249*8eb4eb26SDawid Weiss this notice is included in all copies. 250*8eb4eb26SDawid Weiss 251*8eb4eb26SDawid Weiss Revisions: 252*8eb4eb26SDawid Weiss2000-10-28: added ' and altered XML Predefined Entities for compatibility 253*8eb4eb26SDawid Weiss--> 254*8eb4eb26SDawid Weiss 255*8eb4eb26SDawid Weiss<!-- Relevant ISO entity set is given unless names are newly introduced. 256*8eb4eb26SDawid Weiss New names (i.e., not in ISO 8879 [SGML] list) do not clash with 257*8eb4eb26SDawid Weiss any existing ISO 8879 entity names. ISO 10646 [ISO10646] character 258*8eb4eb26SDawid Weiss numbers are given for each character, in hex. Entity values are 259*8eb4eb26SDawid Weiss decimal conversions of the ISO 10646 values and refer to the 260*8eb4eb26SDawid Weiss document character set. Names are Unicode [UNICODE] names. 261*8eb4eb26SDawid Weiss--> 262*8eb4eb26SDawid Weiss 263*8eb4eb26SDawid Weiss<!-- C0 Controls and Basic Latin --> 264*8eb4eb26SDawid Weiss<!ENTITY lt "&#60;" ><!-- less-than sign, U+003C ISOnum --> 265*8eb4eb26SDawid Weiss<!ENTITY gt ">" ><!-- greater-than sign, U+003E ISOnum --> 266*8eb4eb26SDawid Weiss<!ENTITY amp "&#38;" ><!-- ampersand, U+0026 ISOnum --> 267*8eb4eb26SDawid Weiss<!ENTITY apos "'" ><!-- The Apostrophe (Apostrophe Quote, APL Quote), U+0027 ISOnum --> 268*8eb4eb26SDawid Weiss<!ENTITY quot """ ><!-- quotation mark (Quote Double), U+0022 ISOnum --> 269*8eb4eb26SDawid Weiss 270*8eb4eb26SDawid Weiss<!-- Latin Extended-A --> 271*8eb4eb26SDawid Weiss<!ENTITY OElig "Œ" ><!-- latin capital ligature OE, U+0152 ISOlat2 --> 272*8eb4eb26SDawid Weiss<!ENTITY oelig "œ" ><!-- latin small ligature oe, U+0153 ISOlat2 --> 273*8eb4eb26SDawid Weiss 274*8eb4eb26SDawid Weiss<!-- ligature is a misnomer, this is a separate character in some languages --> 275*8eb4eb26SDawid Weiss<!ENTITY Scaron "Š" ><!-- latin capital letter S with caron, U+0160 ISOlat2 --> 276*8eb4eb26SDawid Weiss<!ENTITY scaron "š" ><!-- latin small letter s with caron, U+0161 ISOlat2 --> 277*8eb4eb26SDawid Weiss<!ENTITY Yuml "Ÿ" ><!-- latin capital letter Y with diaeresis, U+0178 ISOlat2 --> 278*8eb4eb26SDawid Weiss 279*8eb4eb26SDawid Weiss<!-- Spacing Modifier Letters --> 280*8eb4eb26SDawid Weiss<!ENTITY circ "ˆ" ><!-- modifier letter circumflex accent, U+02C6 ISOpub --> 281*8eb4eb26SDawid Weiss<!ENTITY tilde "˜" ><!-- small tilde, U+02DC ISOdia --> 282*8eb4eb26SDawid Weiss 283*8eb4eb26SDawid Weiss<!-- General Punctuation --> 284*8eb4eb26SDawid Weiss<!ENTITY ensp " " ><!-- en space, U+2002 ISOpub --> 285*8eb4eb26SDawid Weiss<!ENTITY emsp " " ><!-- em space, U+2003 ISOpub --> 286*8eb4eb26SDawid Weiss<!ENTITY thinsp " " ><!-- thin space, U+2009 ISOpub --> 287*8eb4eb26SDawid Weiss<!ENTITY zwnj "‌" ><!-- zero width non-joiner, U+200C NEW RFC 2070 --> 288*8eb4eb26SDawid Weiss<!ENTITY zwj "‍" ><!-- zero width joiner, U+200D NEW RFC 2070 --> 289*8eb4eb26SDawid Weiss<!ENTITY lrm "‎" ><!-- left-to-right mark, U+200E NEW RFC 2070 --> 290*8eb4eb26SDawid Weiss<!ENTITY rlm "‏" ><!-- right-to-left mark, U+200F NEW RFC 2070 --> 291*8eb4eb26SDawid Weiss<!ENTITY ndash "–" ><!-- en dash, U+2013 ISOpub --> 292*8eb4eb26SDawid Weiss<!ENTITY mdash "—" ><!-- em dash, U+2014 ISOpub --> 293*8eb4eb26SDawid Weiss<!ENTITY lsquo "‘" ><!-- left single quotation mark, U+2018 ISOnum --> 294*8eb4eb26SDawid Weiss<!ENTITY rsquo "’" ><!-- right single quotation mark, U+2019 ISOnum --> 295*8eb4eb26SDawid Weiss<!ENTITY sbquo "‚" ><!-- single low-9 quotation mark, U+201A NEW --> 296*8eb4eb26SDawid Weiss<!ENTITY ldquo "“" ><!-- left double quotation mark, U+201C ISOnum --> 297*8eb4eb26SDawid Weiss<!ENTITY rdquo "”" ><!-- right double quotation mark, U+201D ISOnum --> 298*8eb4eb26SDawid Weiss<!ENTITY bdquo "„" ><!-- double low-9 quotation mark, U+201E NEW --> 299*8eb4eb26SDawid Weiss<!ENTITY dagger "†" ><!-- dagger, U+2020 ISOpub --> 300*8eb4eb26SDawid Weiss<!ENTITY Dagger "‡" ><!-- double dagger, U+2021 ISOpub --> 301*8eb4eb26SDawid Weiss<!ENTITY permil "‰" ><!-- per mille sign, U+2030 ISOtech --> 302*8eb4eb26SDawid Weiss 303*8eb4eb26SDawid Weiss<!-- lsaquo is proposed but not yet ISO standardized --> 304*8eb4eb26SDawid Weiss<!ENTITY lsaquo "‹" ><!-- single left-pointing angle quotation mark, U+2039 ISO proposed --> 305*8eb4eb26SDawid Weiss<!-- rsaquo is proposed but not yet ISO standardized --> 306*8eb4eb26SDawid Weiss<!ENTITY rsaquo "›" ><!-- single right-pointing angle quotation mark, U+203A ISO proposed --> 307*8eb4eb26SDawid Weiss<!ENTITY euro "€" ><!-- euro sign, U+20AC NEW --> 308*8eb4eb26SDawid Weiss 309*8eb4eb26SDawid Weiss<!-- end of xhtml-special.ent --> 310*8eb4eb26SDawid Weiss 311*8eb4eb26SDawid WeissF.1.3. XHTML Mathematical, Greek, and Symbolic Characters 312*8eb4eb26SDawid Weiss 313*8eb4eb26SDawid WeissYou can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-symbol.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent. 314*8eb4eb26SDawid Weiss 315*8eb4eb26SDawid Weiss<!-- ...................................................................... --> 316*8eb4eb26SDawid Weiss<!-- ISO Math, Greek and Symbolic Character Entity Set for XHTML .......... --> 317*8eb4eb26SDawid Weiss<!-- file: xhtml-symbol.ent 318*8eb4eb26SDawid Weiss 319*8eb4eb26SDawid Weiss Typical invocation: 320*8eb4eb26SDawid Weiss 321*8eb4eb26SDawid Weiss <!ENTITY % xhtml-symbol 322*8eb4eb26SDawid Weiss PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN" 323*8eb4eb26SDawid Weiss "xhtml-symbol.ent" > 324*8eb4eb26SDawid Weiss %xhtml-symbol; 325*8eb4eb26SDawid Weiss 326*8eb4eb26SDawid Weiss This DTD module is identified by the PUBLIC and SYSTEM identifiers: 327*8eb4eb26SDawid Weiss 328*8eb4eb26SDawid Weiss PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN" 329*8eb4eb26SDawid Weiss SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent" 330*8eb4eb26SDawid Weiss 331*8eb4eb26SDawid Weiss Revision: Id: xhtml-symbol.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI 332*8eb4eb26SDawid Weiss 333*8eb4eb26SDawid Weiss Portions (C) International Organization for Standardization 1986: 334*8eb4eb26SDawid Weiss Permission to copy in any form is granted for use with conforming 335*8eb4eb26SDawid Weiss SGML systems and applications as defined in ISO 8879, provided 336*8eb4eb26SDawid Weiss this notice is included in all copies. 337*8eb4eb26SDawid Weiss--> 338*8eb4eb26SDawid Weiss 339*8eb4eb26SDawid Weiss<!-- Relevant ISO entity set is given unless names are newly introduced. 340*8eb4eb26SDawid Weiss New names (i.e., not in ISO 8879 [SGML] list) do not clash with 341*8eb4eb26SDawid Weiss any existing ISO 8879 entity names. ISO 10646 [ISO10646] character 342*8eb4eb26SDawid Weiss numbers are given for each character, in hex. Entity values are 343*8eb4eb26SDawid Weiss decimal conversions of the ISO 10646 values and refer to the 344*8eb4eb26SDawid Weiss document character set. Names are Unicode [UNICODE] names. 345*8eb4eb26SDawid Weiss--> 346*8eb4eb26SDawid Weiss 347*8eb4eb26SDawid Weiss<!-- Latin Extended-B --> 348*8eb4eb26SDawid Weiss<!ENTITY fnof "ƒ" ><!-- latin small f with hook = function 349*8eb4eb26SDawid Weiss = florin, U+0192 ISOtech --> 350*8eb4eb26SDawid Weiss 351*8eb4eb26SDawid Weiss<!-- Greek --> 352*8eb4eb26SDawid Weiss<!ENTITY Alpha "Α" ><!-- greek capital letter alpha, U+0391 --> 353*8eb4eb26SDawid Weiss<!ENTITY Beta "Β" ><!-- greek capital letter beta, U+0392 --> 354*8eb4eb26SDawid Weiss<!ENTITY Gamma "Γ" ><!-- greek capital letter gamma, U+0393 ISOgrk3 --> 355*8eb4eb26SDawid Weiss<!ENTITY Delta "Δ" ><!-- greek capital letter delta, U+0394 ISOgrk3 --> 356*8eb4eb26SDawid Weiss<!ENTITY Epsilon "Ε" ><!-- greek capital letter epsilon, U+0395 --> 357*8eb4eb26SDawid Weiss<!ENTITY Zeta "Ζ" ><!-- greek capital letter zeta, U+0396 --> 358*8eb4eb26SDawid Weiss<!ENTITY Eta "Η" ><!-- greek capital letter eta, U+0397 --> 359*8eb4eb26SDawid Weiss<!ENTITY Theta "Θ" ><!-- greek capital letter theta, U+0398 ISOgrk3 --> 360*8eb4eb26SDawid Weiss<!ENTITY Iota "Ι" ><!-- greek capital letter iota, U+0399 --> 361*8eb4eb26SDawid Weiss<!ENTITY Kappa "Κ" ><!-- greek capital letter kappa, U+039A --> 362*8eb4eb26SDawid Weiss<!ENTITY Lambda "Λ" ><!-- greek capital letter lambda, U+039B ISOgrk3 --> 363*8eb4eb26SDawid Weiss<!ENTITY Mu "Μ" ><!-- greek capital letter mu, U+039C --> 364*8eb4eb26SDawid Weiss<!ENTITY Nu "Ν" ><!-- greek capital letter nu, U+039D --> 365*8eb4eb26SDawid Weiss<!ENTITY Xi "Ξ" ><!-- greek capital letter xi, U+039E ISOgrk3 --> 366*8eb4eb26SDawid Weiss<!ENTITY Omicron "Ο" ><!-- greek capital letter omicron, U+039F --> 367*8eb4eb26SDawid Weiss<!ENTITY Pi "Π" ><!-- greek capital letter pi, U+03A0 ISOgrk3 --> 368*8eb4eb26SDawid Weiss<!ENTITY Rho "Ρ" ><!-- greek capital letter rho, U+03A1 --> 369*8eb4eb26SDawid Weiss<!-- there is no Sigmaf, and no U+03A2 character either --> 370*8eb4eb26SDawid Weiss<!ENTITY Sigma "Σ" ><!-- greek capital letter sigma, U+03A3 ISOgrk3 --> 371*8eb4eb26SDawid Weiss<!ENTITY Tau "Τ" ><!-- greek capital letter tau, U+03A4 --> 372*8eb4eb26SDawid Weiss<!ENTITY Upsilon "Υ" ><!-- greek capital letter upsilon, 373*8eb4eb26SDawid Weiss U+03A5 ISOgrk3 --> 374*8eb4eb26SDawid Weiss<!ENTITY Phi "Φ" ><!-- greek capital letter phi, U+03A6 ISOgrk3 --> 375*8eb4eb26SDawid Weiss<!ENTITY Chi "Χ" ><!-- greek capital letter chi, U+03A7 --> 376*8eb4eb26SDawid Weiss<!ENTITY Psi "Ψ" ><!-- greek capital letter psi, U+03A8 ISOgrk3 --> 377*8eb4eb26SDawid Weiss<!ENTITY Omega "Ω" ><!-- greek capital letter omega, U+03A9 ISOgrk3 --> 378*8eb4eb26SDawid Weiss<!ENTITY alpha "α" ><!-- greek small letter alpha, U+03B1 ISOgrk3 --> 379*8eb4eb26SDawid Weiss<!ENTITY beta "β" ><!-- greek small letter beta, U+03B2 ISOgrk3 --> 380*8eb4eb26SDawid Weiss<!ENTITY gamma "γ" ><!-- greek small letter gamma, U+03B3 ISOgrk3 --> 381*8eb4eb26SDawid Weiss<!ENTITY delta "δ" ><!-- greek small letter delta, U+03B4 ISOgrk3 --> 382*8eb4eb26SDawid Weiss<!ENTITY epsilon "ε" ><!-- greek small letter epsilon, U+03B5 ISOgrk3 --> 383*8eb4eb26SDawid Weiss<!ENTITY zeta "ζ" ><!-- greek small letter zeta, U+03B6 ISOgrk3 --> 384*8eb4eb26SDawid Weiss<!ENTITY eta "η" ><!-- greek small letter eta, U+03B7 ISOgrk3 --> 385*8eb4eb26SDawid Weiss<!ENTITY theta "θ" ><!-- greek small letter theta, U+03B8 ISOgrk3 --> 386*8eb4eb26SDawid Weiss<!ENTITY iota "ι" ><!-- greek small letter iota, U+03B9 ISOgrk3 --> 387*8eb4eb26SDawid Weiss<!ENTITY kappa "κ" ><!-- greek small letter kappa, U+03BA ISOgrk3 --> 388*8eb4eb26SDawid Weiss<!ENTITY lambda "λ" ><!-- greek small letter lambda, U+03BB ISOgrk3 --> 389*8eb4eb26SDawid Weiss<!ENTITY mu "μ" ><!-- greek small letter mu, U+03BC ISOgrk3 --> 390*8eb4eb26SDawid Weiss<!ENTITY nu "ν" ><!-- greek small letter nu, U+03BD ISOgrk3 --> 391*8eb4eb26SDawid Weiss<!ENTITY xi "ξ" ><!-- greek small letter xi, U+03BE ISOgrk3 --> 392*8eb4eb26SDawid Weiss<!ENTITY omicron "ο" ><!-- greek small letter omicron, U+03BF NEW --> 393*8eb4eb26SDawid Weiss<!ENTITY pi "π" ><!-- greek small letter pi, U+03C0 ISOgrk3 --> 394*8eb4eb26SDawid Weiss<!ENTITY rho "ρ" ><!-- greek small letter rho, U+03C1 ISOgrk3 --> 395*8eb4eb26SDawid Weiss<!ENTITY sigmaf "ς" ><!-- greek small letter final sigma, U+03C2 ISOgrk3 --> 396*8eb4eb26SDawid Weiss<!ENTITY sigma "σ" ><!-- greek small letter sigma, U+03C3 ISOgrk3 --> 397*8eb4eb26SDawid Weiss<!ENTITY tau "τ" ><!-- greek small letter tau, U+03C4 ISOgrk3 --> 398*8eb4eb26SDawid Weiss<!ENTITY upsilon "υ" ><!-- greek small letter upsilon, U+03C5 ISOgrk3 --> 399*8eb4eb26SDawid Weiss<!ENTITY phi "φ" ><!-- greek small letter phi, U+03C6 ISOgrk3 --> 400*8eb4eb26SDawid Weiss<!ENTITY chi "χ" ><!-- greek small letter chi, U+03C7 ISOgrk3 --> 401*8eb4eb26SDawid Weiss<!ENTITY psi "ψ" ><!-- greek small letter psi, U+03C8 ISOgrk3 --> 402*8eb4eb26SDawid Weiss<!ENTITY omega "ω" ><!-- greek small letter omega, U+03C9 ISOgrk3 --> 403*8eb4eb26SDawid Weiss<!ENTITY thetasym "ϑ" ><!-- greek small letter theta symbol, U+03D1 NEW --> 404*8eb4eb26SDawid Weiss<!ENTITY upsih "ϒ" ><!-- greek upsilon with hook symbol, U+03D2 NEW --> 405*8eb4eb26SDawid Weiss<!ENTITY piv "ϖ" ><!-- greek pi symbol, U+03D6 ISOgrk3 --> 406*8eb4eb26SDawid Weiss 407*8eb4eb26SDawid Weiss<!-- General Punctuation --> 408*8eb4eb26SDawid Weiss<!ENTITY bull "•" ><!-- bullet = black small circle, U+2022 ISOpub --> 409*8eb4eb26SDawid Weiss<!-- bullet is NOT the same as bullet operator, U+2219 --> 410*8eb4eb26SDawid Weiss<!ENTITY hellip "…" ><!-- horizontal ellipsis = three dot leader, U+2026 ISOpub --> 411*8eb4eb26SDawid Weiss<!ENTITY prime "′" ><!-- prime = minutes = feet, U+2032 ISOtech --> 412*8eb4eb26SDawid Weiss<!ENTITY Prime "″" ><!-- double prime = seconds = inches, U+2033 ISOtech --> 413*8eb4eb26SDawid Weiss<!ENTITY oline "‾" ><!-- overline = spacing overscore, U+203E NEW --> 414*8eb4eb26SDawid Weiss<!ENTITY frasl "⁄" ><!-- fraction slash, U+2044 NEW --> 415*8eb4eb26SDawid Weiss 416*8eb4eb26SDawid Weiss<!-- Letterlike Symbols --> 417*8eb4eb26SDawid Weiss<!ENTITY weierp "℘" ><!-- script capital P = power set = Weierstrass p, U+2118 ISOamso --> 418*8eb4eb26SDawid Weiss<!ENTITY image "ℑ" ><!-- blackletter capital I = imaginary part, U+2111 ISOamso --> 419*8eb4eb26SDawid Weiss<!ENTITY real "ℜ" ><!-- blackletter capital R = real part symbol, U+211C ISOamso --> 420*8eb4eb26SDawid Weiss<!ENTITY trade "™" ><!-- trade mark sign, U+2122 ISOnum --> 421*8eb4eb26SDawid Weiss<!ENTITY alefsym "ℵ" ><!-- alef symbol = first transfinite cardinal, U+2135 NEW --> 422*8eb4eb26SDawid Weiss<!-- alef symbol is NOT the same as hebrew letter alef, U+05D0 although 423*8eb4eb26SDawid Weiss the same glyph could be used to depict both characters --> 424*8eb4eb26SDawid Weiss 425*8eb4eb26SDawid Weiss<!-- Arrows --> 426*8eb4eb26SDawid Weiss<!ENTITY larr "←" ><!-- leftwards arrow, U+2190 ISOnum --> 427*8eb4eb26SDawid Weiss<!ENTITY uarr "↑" ><!-- upwards arrow, U+2191 ISOnum--> 428*8eb4eb26SDawid Weiss<!ENTITY rarr "→" ><!-- rightwards arrow, U+2192 ISOnum --> 429*8eb4eb26SDawid Weiss<!ENTITY darr "↓" ><!-- downwards arrow, U+2193 ISOnum --> 430*8eb4eb26SDawid Weiss<!ENTITY harr "↔" ><!-- left right arrow, U+2194 ISOamsa --> 431*8eb4eb26SDawid Weiss<!ENTITY crarr "↵" ><!-- downwards arrow with corner leftwards 432*8eb4eb26SDawid Weiss = carriage return, U+21B5 NEW --> 433*8eb4eb26SDawid Weiss<!ENTITY lArr "⇐" ><!-- leftwards double arrow, U+21D0 ISOtech --> 434*8eb4eb26SDawid Weiss<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow 435*8eb4eb26SDawid Weiss but also does not have any other character for that function. So ? lArr can 436*8eb4eb26SDawid Weiss be used for 'is implied by' as ISOtech suggests --> 437*8eb4eb26SDawid Weiss<!ENTITY uArr "⇑" ><!-- upwards double arrow, U+21D1 ISOamsa --> 438*8eb4eb26SDawid Weiss<!ENTITY rArr "⇒" ><!-- rightwards double arrow, U+21D2 ISOtech --> 439*8eb4eb26SDawid Weiss<!-- Unicode does not say this is the 'implies' character but does not have 440*8eb4eb26SDawid Weiss another character with this function so ? 441*8eb4eb26SDawid Weiss rArr can be used for 'implies' as ISOtech suggests --> 442*8eb4eb26SDawid Weiss<!ENTITY dArr "⇓" ><!-- downwards double arrow, U+21D3 ISOamsa --> 443*8eb4eb26SDawid Weiss<!ENTITY hArr "⇔" ><!-- left right double arrow, U+21D4 ISOamsa --> 444*8eb4eb26SDawid Weiss 445*8eb4eb26SDawid Weiss<!-- Mathematical Operators --> 446*8eb4eb26SDawid Weiss<!ENTITY forall "∀" ><!-- for all, U+2200 ISOtech --> 447*8eb4eb26SDawid Weiss<!ENTITY part "∂" ><!-- partial differential, U+2202 ISOtech --> 448*8eb4eb26SDawid Weiss<!ENTITY exist "∃" ><!-- there exists, U+2203 ISOtech --> 449*8eb4eb26SDawid Weiss<!ENTITY empty "∅" ><!-- empty set = null set, U+2205 ISOamso --> 450*8eb4eb26SDawid Weiss<!ENTITY nabla "∇" ><!-- nabla = backward difference, U+2207 ISOtech --> 451*8eb4eb26SDawid Weiss<!ENTITY isin "∈" ><!-- element of, U+2208 ISOtech --> 452*8eb4eb26SDawid Weiss<!ENTITY notin "∉" ><!-- not an element of, U+2209 ISOtech --> 453*8eb4eb26SDawid Weiss<!ENTITY ni "∋" ><!-- contains as member, U+220B ISOtech --> 454*8eb4eb26SDawid Weiss<!-- should there be a more memorable name than 'ni'? --> 455*8eb4eb26SDawid Weiss<!ENTITY prod "∏" ><!-- n-ary product = product sign, U+220F ISOamsb --> 456*8eb4eb26SDawid Weiss<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though 457*8eb4eb26SDawid Weiss the same glyph might be used for both --> 458*8eb4eb26SDawid Weiss<!ENTITY sum "∑" ><!-- n-ary sumation, U+2211 ISOamsb --> 459*8eb4eb26SDawid Weiss<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma' 460*8eb4eb26SDawid Weiss though the same glyph might be used for both --> 461*8eb4eb26SDawid Weiss<!ENTITY minus "−" ><!-- minus sign, U+2212 ISOtech --> 462*8eb4eb26SDawid Weiss<!ENTITY lowast "∗" ><!-- asterisk operator, U+2217 ISOtech --> 463*8eb4eb26SDawid Weiss<!ENTITY radic "√" ><!-- square root = radical sign, U+221A ISOtech --> 464*8eb4eb26SDawid Weiss<!ENTITY prop "∝" ><!-- proportional to, U+221D ISOtech --> 465*8eb4eb26SDawid Weiss<!ENTITY infin "∞" ><!-- infinity, U+221E ISOtech --> 466*8eb4eb26SDawid Weiss<!ENTITY ang "∠" ><!-- angle, U+2220 ISOamso --> 467*8eb4eb26SDawid Weiss<!ENTITY and "∧" ><!-- logical and = wedge, U+2227 ISOtech --> 468*8eb4eb26SDawid Weiss<!ENTITY or "∨" ><!-- logical or = vee, U+2228 ISOtech --> 469*8eb4eb26SDawid Weiss<!ENTITY cap "∩" ><!-- intersection = cap, U+2229 ISOtech --> 470*8eb4eb26SDawid Weiss<!ENTITY cup "∪" ><!-- union = cup, U+222A ISOtech --> 471*8eb4eb26SDawid Weiss<!ENTITY int "∫" ><!-- integral, U+222B ISOtech --> 472*8eb4eb26SDawid Weiss<!ENTITY there4 "∴" ><!-- therefore, U+2234 ISOtech --> 473*8eb4eb26SDawid Weiss<!ENTITY sim "∼" ><!-- tilde operator = varies with = similar to, U+223C ISOtech --> 474*8eb4eb26SDawid Weiss<!-- tilde operator is NOT the same character as the tilde, U+007E, 475*8eb4eb26SDawid Weiss although the same glyph might be used to represent both --> 476*8eb4eb26SDawid Weiss<!ENTITY cong "≅" ><!-- approximately equal to, U+2245 ISOtech --> 477*8eb4eb26SDawid Weiss<!ENTITY asymp "≈" ><!-- almost equal to = asymptotic to, U+2248 ISOamsr --> 478*8eb4eb26SDawid Weiss<!ENTITY ne "≠" ><!-- not equal to, U+2260 ISOtech --> 479*8eb4eb26SDawid Weiss<!ENTITY equiv "≡" ><!-- identical to, U+2261 ISOtech --> 480*8eb4eb26SDawid Weiss<!ENTITY le "≤" ><!-- less-than or equal to, U+2264 ISOtech --> 481*8eb4eb26SDawid Weiss<!ENTITY ge "≥" ><!-- greater-than or equal to, U+2265 ISOtech --> 482*8eb4eb26SDawid Weiss<!ENTITY sub "⊂" ><!-- subset of, U+2282 ISOtech --> 483*8eb4eb26SDawid Weiss<!ENTITY sup "⊃" ><!-- superset of, U+2283 ISOtech --> 484*8eb4eb26SDawid Weiss<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol 485*8eb4eb26SDawid Weiss font encoding and is not included. Should it be, for symmetry? 486*8eb4eb26SDawid Weiss It is in ISOamsn --> 487*8eb4eb26SDawid Weiss<!ENTITY nsub "⊄" ><!-- not a subset of, U+2284 ISOamsn --> 488*8eb4eb26SDawid Weiss<!ENTITY sube "⊆" ><!-- subset of or equal to, U+2286 ISOtech --> 489*8eb4eb26SDawid Weiss<!ENTITY supe "⊇" ><!-- superset of or equal to, U+2287 ISOtech --> 490*8eb4eb26SDawid Weiss<!ENTITY oplus "⊕" ><!-- circled plus = direct sum, U+2295 ISOamsb --> 491*8eb4eb26SDawid Weiss<!ENTITY otimes "⊗" ><!-- circled times = vector product, U+2297 ISOamsb --> 492*8eb4eb26SDawid Weiss<!ENTITY perp "⊥" ><!-- up tack = orthogonal to = perpendicular, U+22A5 ISOtech --> 493*8eb4eb26SDawid Weiss<!ENTITY sdot "⋅" ><!-- dot operator, U+22C5 ISOamsb --> 494*8eb4eb26SDawid Weiss<!-- dot operator is NOT the same character as U+00B7 middle dot --> 495*8eb4eb26SDawid Weiss 496*8eb4eb26SDawid Weiss<!-- Miscellaneous Technical --> 497*8eb4eb26SDawid Weiss<!ENTITY lceil "⌈" ><!-- left ceiling = apl upstile, U+2308 ISOamsc --> 498*8eb4eb26SDawid Weiss<!ENTITY rceil "⌉" ><!-- right ceiling, U+2309 ISOamsc --> 499*8eb4eb26SDawid Weiss<!ENTITY lfloor "⌊" ><!-- left floor = apl downstile, U+230A ISOamsc --> 500*8eb4eb26SDawid Weiss<!ENTITY rfloor "⌋" ><!-- right floor, U+230B ISOamsc --> 501*8eb4eb26SDawid Weiss<!ENTITY lang "〈" ><!-- left-pointing angle bracket = bra, U+2329 ISOtech --> 502*8eb4eb26SDawid Weiss<!-- lang is NOT the same character as U+003C 'less than' 503*8eb4eb26SDawid Weiss or U+2039 'single left-pointing angle quotation mark' --> 504*8eb4eb26SDawid Weiss<!ENTITY rang "〉" ><!-- right-pointing angle bracket = ket, U+232A ISOtech --> 505*8eb4eb26SDawid Weiss<!-- rang is NOT the same character as U+003E 'greater than' 506*8eb4eb26SDawid Weiss or U+203A 'single right-pointing angle quotation mark' --> 507*8eb4eb26SDawid Weiss 508*8eb4eb26SDawid Weiss<!-- Geometric Shapes --> 509*8eb4eb26SDawid Weiss<!ENTITY loz "◊" ><!-- lozenge, U+25CA ISOpub --> 510*8eb4eb26SDawid Weiss 511*8eb4eb26SDawid Weiss<!-- Miscellaneous Symbols --> 512*8eb4eb26SDawid Weiss<!ENTITY spades "♠" ><!-- black spade suit, U+2660 ISOpub --> 513*8eb4eb26SDawid Weiss<!-- black here seems to mean filled as opposed to hollow --> 514*8eb4eb26SDawid Weiss<!ENTITY clubs "♣" ><!-- black club suit = shamrock, U+2663 ISOpub --> 515*8eb4eb26SDawid Weiss<!ENTITY hearts "♥" ><!-- black heart suit = valentine, U+2665 ISOpub --> 516*8eb4eb26SDawid Weiss<!ENTITY diams "♦" ><!-- black diamond suit, U+2666 ISOpub --> 517*8eb4eb26SDawid Weiss 518*8eb4eb26SDawid Weiss<!-- end of xhtml-symbol.ent --> 519*8eb4eb26SDawid Weiss""" 520*8eb4eb26SDawid Weiss return text 521*8eb4eb26SDawid Weiss 522*8eb4eb26SDawid Weissdef get_apache_license(): 523*8eb4eb26SDawid Weiss license = r"""/* 524*8eb4eb26SDawid Weiss * Licensed to the Apache Software Foundation (ASF) under one or more 525*8eb4eb26SDawid Weiss * contributor license agreements. See the NOTICE file distributed with 526*8eb4eb26SDawid Weiss * this work for additional information regarding copyright ownership. 527*8eb4eb26SDawid Weiss * The ASF licenses this file to You under the Apache License, Version 2.0 528*8eb4eb26SDawid Weiss * (the "License"); you may not use this file except in compliance with 529*8eb4eb26SDawid Weiss * the License. You may obtain a copy of the License at 530*8eb4eb26SDawid Weiss * 531*8eb4eb26SDawid Weiss * http://www.apache.org/licenses/LICENSE-2.0 532*8eb4eb26SDawid Weiss * 533*8eb4eb26SDawid Weiss * Unless required by applicable law or agreed to in writing, software 534*8eb4eb26SDawid Weiss * distributed under the License is distributed on an "AS IS" BASIS, 535*8eb4eb26SDawid Weiss * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 536*8eb4eb26SDawid Weiss * See the License for the specific language governing permissions and 537*8eb4eb26SDawid Weiss * limitations under the License. 538*8eb4eb26SDawid Weiss */ 539*8eb4eb26SDawid Weiss 540*8eb4eb26SDawid Weiss""" 541*8eb4eb26SDawid Weiss return license 542*8eb4eb26SDawid Weiss 543*8eb4eb26SDawid Weissmain() 544