1# 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to You under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17# 18# Parses Myanmar text, with syllable as token. 19# 20 21$Cons = [[:Other_Letter:]&[:Myanmar:]]; 22$Virama = [\u1039]; 23$Asat = [\u103A]; 24 25$WordJoin = [:Line_Break=Word_Joiner:]; 26 27# 28# default numerical definitions 29# 30$Extend = [\p{Word_Break = Extend}]; 31$Format = [\p{Word_Break = Format}]; 32$MidNumLet = [\p{Word_Break = MidNumLet}]; 33$MidNum = [\p{Word_Break = MidNum}]; 34$Numeric = [\p{Word_Break = Numeric}]; 35$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; 36$MidNumLetEx = $MidNumLet ($Extend | $Format)*; 37$MidNumEx = $MidNum ($Extend | $Format)*; 38$NumericEx = $Numeric ($Extend | $Format)*; 39$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; 40 41$ConsEx = $Cons ($Extend | $Format)*; 42$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*; 43$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*; 44$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*; 45 46!!forward; 47$MyanmarJoinedSyllableEx {200}; 48 49# default numeric rules 50$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100}; 51