From 2149de4a2f26df890cb267df67fdaa481b7429c2 Mon Sep 17 00:00:00 2001 From: Neil S Patel <46502494+NeilSureshPatel@users.noreply.github.com> Date: Tue, 26 Nov 2024 14:15:07 -0500 Subject: [PATCH] CLDR-17885 Add Sunu-Deva and Deva-Sunu transforms CLDR-17885 Initial commit of Sunuwar to Devanagari and Devanagari to Sunuwar transforms using the InterIndic intermediate step. Transforms are based on the SWS standard. --- common/transforms/Devanagari-Sunuwar_SWS.xml | 21 + common/transforms/InterIndic-Sunuwar_SWS.xml | 472 +++++++++++++++++++ common/transforms/Sunuwar-Devanagari_SWS.xml | 21 + common/transforms/Sunuwar-InterIndic_SWS.xml | 360 ++++++++++++++ 4 files changed, 874 insertions(+) create mode 100644 common/transforms/Devanagari-Sunuwar_SWS.xml create mode 100644 common/transforms/InterIndic-Sunuwar_SWS.xml create mode 100644 common/transforms/Sunuwar-Devanagari_SWS.xml create mode 100644 common/transforms/Sunuwar-InterIndic_SWS.xml diff --git a/common/transforms/Devanagari-Sunuwar_SWS.xml b/common/transforms/Devanagari-Sunuwar_SWS.xml new file mode 100644 index 00000000000..0131edef9c2 --- /dev/null +++ b/common/transforms/Devanagari-Sunuwar_SWS.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/common/transforms/InterIndic-Sunuwar_SWS.xml b/common/transforms/InterIndic-Sunuwar_SWS.xml new file mode 100644 index 00000000000..85463d57160 --- /dev/null +++ b/common/transforms/InterIndic-Sunuwar_SWS.xml @@ -0,0 +1,472 @@ + + + + + + + + +######################################################################## +# InterIndic-Sunuwar_SWS +#\u0E00 reserved +#consonants +$chandrabindu=\uE001; +$anusvara=\uE002; +$visarga=\uE003; +#\u0E004 reserved +# w←vowel→ represents the stand-alone form +$wa=\uE005; +$waa=\uE006; +$wi=\uE007; +$wii=\uE008; +$wu=\uE009; +$wuu=\uE00A; +$wr=\uE00B; +$wl=\uE00C; +$wce=\uE00D; # LETTER CANDRA E +$wse=\uE00E; # LETTER SHORT E +$we=\uE00F; # ए LETTER E +$wai=\uE010; +$wco=\uE011; # LETTER CANDRA O +$wso=\uE012; # LETTER SHORT O +$wo=\uE013; # ओ LETTER O +$wau=\uE014; +$ka=\uE015; +$kha=\uE016; +$ga=\uE017; +$gha=\uE018; +$nga=\uE019; +$ca=\uE01A; +$cha=\uE01B; +$ja=\uE01C; +$jha=\uE01D; +$nya=\uE01E; +$tta=\uE01F; +$ttha=\uE020; +$dda=\uE021; +$ddha=\uE022; +$nna=\uE023; +$ta=\uE024; +$tha=\uE025; +$da=\uE026; +$dha=\uE027; +$na=\uE028; +$ena=\uE029; #compatibility +$pa=\uE02A; +$pha=\uE02B; +$ba=\uE02C; +$bha=\uE02D; +$ma=\uE02E; +$ya=\uE02F; +$ra=\uE030; +$vva=\uE081; +$rra=\uE031; +$la=\uE032; +$lla=\uE033; +$ela=\uE034; #compatibility +$va=\uE035; +$sha=\uE036; +$ssa=\uE037; +$sa=\uE038; +$ha=\uE039; +#\u093A Reserved +#\u093B Reserved +$nukta=\uE03C; +$avagraha=\uE03D; # SIGN AVAGRAHA +# ←vowel→ represents the dependent form +$aa=\uE03E; +$i=\uE03F; +$ii=\uE040; +$u=\uE041; +$uu=\uE042; +$rh=\uE043; +$rrh=\uE044; +$ce=\uE045; #VOWEL SIGN CANDRA E +$se=\uE046; #VOWEL SIGN SHORT E +$e=\uE047; +$ai=\uE048; +$co=\uE049; # VOWEL SIGN CANDRA O +$so=\uE04A; # VOWEL SIGN SHORT O +$o=\uE04B; # ो +$au=\uE04C; +$virama=\uE04D; +# \u094E Reserved +# \u094F Reserved +$om=\uE050; # OM +\uE051→; # UNMAPPED STRESS SIGN UDATTA +\uE052→; # UNMAPPED STRESS SIGN ANUDATTA +\uE053→; # UNMAPPED GRAVE ACCENT +\uE054→; # UNMAPPED ACUTE ACCENT +$lm = \uE055;# Telugu Length Mark +$ailm=\uE056;# AI Length Mark +$aulm=\uE057;# AU Length Mark +#urdu compatibility forms +$uka=\uE058; +$ukha=\uE059; +$ugha=\uE05A; +$ujha=\uE05B; +$uddha=\uE05C; +$udha=\uE05D; +$ufa=\uE05E; +$uya=\uE05F; +$wrr=\uE060; +$wll=\uE061; +$lh=\uE062; +$llh=\uE063; +$danda=\uE064; +$doubleDanda=\uE065; +$zero=\uE066; # DIGIT ZERO +$one=\uE067; # DIGIT ONE +$two=\uE068; # DIGIT TWO +$three=\uE069; # DIGIT THREE +$four=\uE06A; # DIGIT FOUR +$five=\uE06B; # DIGIT FIVE +$six=\uE06C; # DIGIT SIX +$seven=\uE06D; # DIGIT SEVEN +$eight=\uE06E; # DIGIT EIGHT +$nine=\uE06F; # DIGIT NINE +# Glottal stop +$dgs=\uE082; +#Khanda-ta +$kta=\uE083; +$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; +$depVowelBelow=[\uE041-\uE044]; +# $x was originally called '§'; $z was '%' +$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; +$z=[bcdfghjklmnpqrstvwxyz]; +$vowels=[aeiour̥̄̆]; +$forceIndependentMatra = [^[[:L:][̀-͌]]]; +###################################################################### +# Sunuwar letters +###################################################################### +#consonants +#$devi=\u11BC0; +$devi=𑯀; +$tasla=\u11BC1; +$reu=\u11BC4; +$kik=\u11BC6; +$mama=\u11BC7; +#$pip=\u11BC9; +$pip=𑯉; +$gil=\u11BCA; +$hamso=\u11BCB; +$carmi=\u11BCC; +$nah=\u11BCD; +$bur=\u11BCE; +$jyah=\u11BCF; +$loacha=\u11BD0; +$shyele=\u11BD2; +$varca=\u11BD3; +$yat=\u11BD4; +$ava=\u11BD5; +$donga=\u11BD7; +$thari=\u11BD8; +$phar=\u11BD9; +$ngar=\u11BDA; +$khaSunu=\u11BDB; +$shyer=\u11BDC; +$chelap=\u11BDD; +$tentu=\u11BDE; +$thele=\u11BDF; +$kloko=\u11BE0; +$pvo=𑯡; +#vowels +$eko=\u11BC2; +$imar=\u11BC3; +$utthi=\u11BC5; +$appho=\u11BC8; +$otthi=\u11BD1; +$aal=\u11BD6; +$consonantsdevasunu=[$ka$kha$ga$ca$cha$nga$ja$tta$ttha$dda$ta$tha$da$na$pa$pha$ba$ma$ya$ra$la$va$sa$ha]; # devanagari consonants that map directly to sunuwar consonants +$vowelsdevasunu=[$aa$i$e$o$u]; #devanagari vowels that map directly to sunuwar vowels +###################################################################### +# convert from Native letters to Sunuwar letters +###################################################################### +#transliterations for anusvara +$anusvara→ \u0303; +#auspicious symbol +$om→$pvo; +# normal consonants +$ka}$x→$kik; +$ka$virama→$kik; +$ka}[^$consonantsdevasunu]→$kik; +$ka→$kik$appho; +# +$kha}$x→$khaSunu; +$kha$virama→$khaSunu; +$kha}[^$consonantsdevasunu]→$khaSunu; +$kha→$khaSunu$appho; +# +$ga}$x→$gil; +$ga$virama→$gil; +$ga}[^$consonantsdevasunu]→$gil; +$ga→$gil$appho; +# +$gha}$x→$gil$hamso; +$gha$virama→$gil$hamso; +$gha}[^$consonantsdevasunu]→$gil$hamso; +$gha→$gil$hamso$appho; +# +$nga}$x→$ngar; +$nga$virama→$ngar; +$nga}[^$consonantsdevasunu]→$ngar; +$nga→$ngar$appho; +# +$ca}$x→$carmi; +$ca$virama→$carmi; +$ca}[^$consonantsdevasunu]→$carmi; +$ca→$carmi$appho; +# +$cha}$x→$chelap; +$cha$virama→$chelap; +$cha}[^$consonantsdevasunu]→$chelap; +$cha→$chelap$appho; +# +$ja}$x→$jyah; +$ja$virama→$jyah; +$ja}[^$consonantsdevasunu]→$jyah; +$ja→$jyah$appho; +# +$jha}$x→$jyah$hamso; +$jha$virama→$jyah$hamso; +$jha}[^$consonantsdevasunu]→$jyah$hamso; +$jha→$jyah$hamso$appho; +# +$nya}$x→$nah$yat; +$nya$virama→$nah$yat; +$nya}[^$consonantsdevasunu]→$nah$yat; +$nya→$nah$yat$appho; +# +$tta}$x→$tentu; +$tta$virama→$tentu; +$tta}[^$consonantsdevasunu]→$tentu; +$tta→$tentu$appho; +# +$ttha}$x→$thele; +$ttha$virama→$thele; +$ttha}[^$consonantsdevasunu]→$thele; +$ttha→$thele$appho; +# +$dda}$x→$donga; +$dda$virama→$donga; +$dda}[^$consonantsdevasunu]→$donga; +$dda→$donga$appho; +# +$ddha}$x→$donga$hamso; +$ddha$virama→$donga$hamso; +$ddha}[^$consonantsdevasunu]→$donga$hamso; +$ddha→$donga$hamso$appho; +# +$nna}$x→$nah; +$nna$virama→$nah; +$nna}[^$consonantsdevasunu]→$nah; +$nna→$nah$appho; +# +$ta}$x→$tasla; +$ta$virama→$tasla; +$ta}[^$consonantsdevasunu]→$tasla; +$ta→$tasla$appho; +# +$tha}$x→$thari; +$tha$virama→$thari; +$tha}[^$consonantsdevasunu]→$thari; +$tha→$thari$appho; +# +$da}$x→$devi; +$da$virama→$devi; +$da}[^$consonantsdevasunu]→$devi; +$da→$devi$appho; +# +$dha}$x→$devi$hamso; +$dha$virama→$devi$hamso; +$dha}[^$consonantsdevasunu]→$devi$hamso; +$dha→$devi$hamso$appho; +# +$na}$x→$nah; +$na$virama→$nah; +$na}[^$consonantsdevasunu]→$nah; +$na→$nah$appho; +# +$pa}$x→$pip; +$pa$virama→$pip; +$pa}[^$consonantsdevasunu]→$pip; +$pa→$pip$appho; +# +$pha}$x→$phar; +$pha$virama→$phar; +$pha}[^$consonantsdevasunu]→$phar; +$pha→$phar$appho; +# +$ba$virama$va}$x → $ava; +$ba$virama$va$virama → $ava; +$ba$virama$va}[^$consonantsdevasunu]→$ava; +$ba$virama$va→$ava$appho; +# +$ba}$x→$bur; +$ba$virama→$bur; +$ba}[^$consonantsdevasunu]→$bur; +$ba→$bur$appho; +# +$bha}$x→$bur$hamso; +$bha$virama→$bur$hamso; +$bha}[^$consonantsdevasunu]→$bur$hamso; +$bha→$bur$hamso$appho; +# +$ma}$x→$mama; +$ma$virama→$mama; +$ma}[^$consonantsdevasunu]→$mama; +$ma→$mama$appho; +# +$ya}$x→$yat; +$ya$virama→$yat; +$ya}[^$consonantsdevasunu]→$yat; +$ya→$yat$appho; +# +$ra}$x→$reu; +$ra$virama→$reu; +$ra}[^$consonantsdevasunu]→$reu; +$ra→$reu$appho; +# +$la}$x→$loacha; +$la$virama→$loacha; +$la}[^$consonantsdevasunu]→$loacha; +$la→$loacha$appho; +# +$va}$x→$varca; +$va$virama→$varca; +$va}[^$consonantsdevasunu]→$varca; +$va→$varca$appho; +# +$sa}$x→$shyele; +$sa$virama→$shyele; +$sa}[^$consonantsdevasunu]→$shyele; +$sa→$shyele$appho; +# +$sha}$x→$shyer; +$sha$virama→$shyer; +$sha}[^$consonantsdevasunu]→$shyer; +$sha→$shyer$appho; +# +$ssa}$x→$shyer; +$ssa$virama→$shyer; +$ssa}[^$consonantsdevasunu]→$shyer; +$ssa→$shyer$appho; +# +$ha}$x→$hamso; +$ha$virama→$hamso; +$ha}[^$consonantsdevasunu]→$hamso; +$ha→$hamso$appho; +# +$wa$virama→$kloko; +$wa$nukta→$kloko; +# dependent vowels (should never occur except following consonants) +$forceIndependentMatra{$aa → ̔$aal; +$forceIndependentMatra{$ai → ̔$appho$imar; +$forceIndependentMatra{$ii → ̔$imar':'; +$forceIndependentMatra{$i → ̔$imar; +$forceIndependentMatra{$uu → ̔$utthi':'; +$forceIndependentMatra{$u → ̔$utthi; +$forceIndependentMatra{$rh → ̔$reu$imar; +$forceIndependentMatra{$e → ̔$eko; +$forceIndependentMatra{$o → ̔$otthi; +$aa → $aal; +$ai → $appho$imar; +$ii → $imar':'; +$i → $imar; +$uu → $utthi':'; +$u → $utthi; +$rh → $reu$imar; +$e → $eko; +$o → $otthi; +# independent vowels when preceded by vowels +$vowels{$waa → ''$aal; +$vowels{$wai → ''$appho$imar; +$vowels{$wii → ''$imar':'; +$vowels{$wi → ''$imar; +$vowels{$wuu → ''$utthi':'; +$vowels{$wu → ''$utthi; +$vowels{$wr → ''$reu$imar; +$vowels{$we → ''$eko; +$vowels{$wo → ''$otthi; +$vowels{$wa → ''$appho; +# independent vowels (otherwise) +$waa → $aal; +$wai → $appho$imar; +$wii → $imar':'; +$wi → $imar; +$wuu → $utthi':'; +$wu → $utthi; +$wr → $reu$imar; +$we → $eko; +$wo → $otthi; +$wa → $appho; +#stress marks +$avagraha → ̕; +$chandrabindu$anusvara→\u0303; +$chandrabindu → \u0303; +$visarga→':'; +#numbers +$zero → \u11BF0; +$one → \u11BF1; +$two → \u11BF2; +$three → \u11BF3; +$four → \u11BF4; +$five → \u11BF5; +$six → \u11BF6; +$seven → \u11BF7; +$eight → \u11BF8; +$nine → \u11BF9; +$lm →; +$ailm →; +$aulm →; +$dgs→ʔ; +$kta→ṯ; +$danda→'.'; +$doubleDanda→'.'; +\uE070→'.'; # ABBREVIATION SIGN +# LETTER RA WITH MIDDLE DIAGONAL +\uE071}$x→$reu; +\uE071$virama→$reu; +\uE071→$reu; +# LETTER RA WITH LOWER DIAGONAL +\uE072}$x→$reu; +\uE072$virama→$reu; +\uE072→$reu; +\uE073→; # RUPEE MARK +\uE074→; # RUPEE SIGN +\uE075→; # CURRENCY NUMERATOR ONE +\uE076→; # CURRENCY NUMERATOR TWO +\uE077→; # CURRENCY NUMERATOR THREE +\uE078→; # CURRENCY NUMERATOR FOUR +\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\uE07A→; # CURRENCY DENOMINATOR SIXTEEN +\uE07B→; # ISSHAR +\uE07C→; # TIPPI +\uE07D→; # ADDAK +\uE07E→; # IRI +\uE07F→; # URA +\uE080→; # EK ONKAR +\uE004→; # DEVANAGARI VOWEL SIGN SHORT A +$virama→; # remove remaining viramas +$nukta→\u0323 ; # combining dot below + ]]> + + + \ No newline at end of file diff --git a/common/transforms/Sunuwar-Devanagari_SWS.xml b/common/transforms/Sunuwar-Devanagari_SWS.xml new file mode 100644 index 00000000000..ca75e343ba5 --- /dev/null +++ b/common/transforms/Sunuwar-Devanagari_SWS.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/common/transforms/Sunuwar-InterIndic_SWS.xml b/common/transforms/Sunuwar-InterIndic_SWS.xml new file mode 100644 index 00000000000..6b48f06a5e6 --- /dev/null +++ b/common/transforms/Sunuwar-InterIndic_SWS.xml @@ -0,0 +1,360 @@ + + + + + + + + + +######################################################################## +# Sunuwar-InterIndic_SWS +#:: NFD; +#\u0E00 reserved +#consonants +$chandrabindu=\uE001; +$anusvara=\uE002; +$visarga=\uE003; +#\u0E004 reserved +# w←vowel→ represents the stand-alone form +$wa=\uE005; +$waa=\uE006; +$wi=\uE007; +$wii=\uE008; +$wu=\uE009; +$wuu=\uE00A; +$wr=\uE00B; +$wl=\uE00C; +$wce=\uE00D; # LETTER CANDRA E +$wse=\uE00E; # LETTER SHORT E +$we=\uE00F; # ए LETTER E +$wai=\uE010; +$wco=\uE011; # LETTER CANDRA O +$wso=\uE012; # LETTER SHORT O +$wo=\uE013; # ओ LETTER O +$wau=\uE014; +$ka=\uE015; +$kha=\uE016; +$ga=\uE017; +$gha=\uE018; +$nga=\uE019; +$ca=\uE01A; +$cha=\uE01B; +$ja=\uE01C; +$jha=\uE01D; +$nya=\uE01E; +$tta=\uE01F; +$ttha=\uE020; +$dda=\uE021; +$ddha=\uE022; +$nna=\uE023; +$ta=\uE024; +$tha=\uE025; +$da=\uE026; +$dha=\uE027; +$na=\uE028; +$ena=\uE029; #compatibility +$pa=\uE02A; +$pha=\uE02B; +$ba=\uE02C; +$bha=\uE02D; +$ma=\uE02E; +$ya=\uE02F; +$ra=\uE030; +$rra=\uE031; +$la=\uE032; +$lla=\uE033; +$ela=\uE034; #compatibility +$va=\uE035; +$vva=\uE081; +$sha=\uE036; +$ssa=\uE037; +$sa=\uE038; +$ha=\uE039; +#\u093A Reserved +#\u093B Reserved +$nukta=\uE03C; +$avagraha=\uE03D; # SIGN AVAGRAHA +# ←vowel→ represents the dependent form +$aa=\uE03E; +$i=\uE03F; +$ii=\uE040; +$u=\uE041; +$uu=\uE042; +$rh=\uE043; +$rrh=\uE044; +$ce=\uE045; #VOWEL SIGN CANDRA E +$se=\uE046; #VOWEL SIGN SHORT E +$e=\uE047; +$ai=\uE048; +$co=\uE049; # VOWEL SIGN CANDRA O +$so=\uE04A; # VOWEL SIGN SHORT O +$o=\uE04B; # ो +$au=\uE04C; +$virama=\uE04D; +# \u094E Reserved +# \u094F Reserved +$om = \uE050; # OM +# ॑→\uE051; # UNMAPPED STRESS SIGN UDATTA +# ॒→; # UNMAPPED STRESS SIGN ANUDATTA +# ॓→; # UNMAPPED GRAVE ACCENT +# ॔→; # UNMAPPED ACUTE ACCENT +$lm = \uE055;# Telugu Length Mark +$ailm=\uE056;# AI Length Mark +$aulm=\uE057;# AU Length Mark +#urdu compatibity forms +$uka=\uE058; +$ukha=\uE059; +$ugha=\uE05A; +$ujha=\uE05B; +$uddha=\uE05C; +$udha=\uE05D; +$ufa=\uE05E; +$uya=\uE05F; +$wrr=\uE060; +$wll=\uE061; +$lh=\uE062; +$llh=\uE063; +$danda=\uE064; +$doubleDanda=\uE065; +$zero=\uE066; # DIGIT ZERO +$one=\uE067; # DIGIT ONE +$two=\uE068; # DIGIT TWO +$three=\uE069; # DIGIT THREE +$four=\uE06A; # DIGIT FOUR +$five=\uE06B; # DIGIT FIVE +$six=\uE06C; # DIGIT SIX +$seven=\uE06D; # DIGIT SEVEN +$eight=\uE06E; # DIGIT EIGHT +$nine=\uE06F; # DIGIT NINE +$dgs=\uE082; +# For all other scripts +$ecp0=\uE070; +$ecp1=\uE071; +$ecp2=\uE072; +$ecp3=\uE073; +$ecp4=\uE074; +$ecp5=\uE075; +$ecp6=\uE076; +$ecp7=\uE077; +$ecp8=\uE078; +$ecp9=\uE079; +$ecpA=\uE07A; +$ecpB=\uE07B; +$ecpC=\uE07C; +$ecpD=\uE07D; +$ecpE=\uE07E; +$ecpF=\uE07F; +# Khanda-ta +$kta=\uE083; +# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN +$vowelAbove=[\uE03F\uE040\uE045-\uE04C]; +$vowelBelow=[\uE005-\uE007\uE009-\uE00B\uE00F\uE010\uE013\uE03E\uE03F\uE041-\uE044]; +$depVowels=[\uE03E-\uE04C]; +$endThing=[$danda$doubleDanda]; +# $x was originally called '§'; $z was '%' +$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; +$z=[bcdfghjklmnpqrstvwxyz]; +$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]]; +###################################################################### +# Sunuwar letters +###################################################################### +#consonants +$devi=𑯀; +$tasla=𑯁; +$reu=𑯄; +$kik=𑯆; +$mama=𑯇; +$pip=𑯉; +$gil=𑯊; +$hamso=𑯋; +$carmi=𑯌; +$nah=𑯍; +$bur=𑯎; +$jyah=𑯏; +$loacha=𑯐; +$shyele=𑯒; +$varca=𑯓; +$yat=𑯔; +$ava=𑯕; +$donga=𑯗; +$thari=𑯘; +$phar=𑯙; +$ngar=𑯚; +$khaSunu=𑯛; +$shyer=𑯜; +$chelap=𑯝; +$tentu=𑯞; +$thele=𑯟; +$kloko=𑯠; +$pvo=𑯡; +#vowels +$eko=𑯂; +$imar=𑯃; +$utthi=𑯅; +$appho=𑯈; +$otthi=𑯑; +$aal=𑯖; +$consonantsdevasunu=[$ka$kha$ga$ca$cha$nga$ja$tta$ttha$dda$ta$tha$da$na$pa$pha$ba$ma$ya$ra$la$va$sa$ha]; # devanagari consonants that map directly to sunuwar consonants +$sunuconsonants=[$devi$tasla$reu$kik$mama$pip$gil$hamso$carmi$nah$bur$jyah$loacha$shyele$varca$yat$ava$donga$thari$phar$ngar$khaSunu$shyer$chelap$tentu$thele]; +$vowelsdevasunu=[$aa$i$e$o$u]; #devanagari vowels that map directly to sunuwar vowels +$nonletter = [:^Letter:] ; +$vowelthings = [$eko$imar$utthi$appho$otthi$aal':'\u0303\u0300\u0301\u030D]*; +###################################################################### +̕ → $avagraha; +#\0303→$chandrabindu; +$nonletter { $appho } $nonletter→$wa ; +#convert auspicious symbol +$pvo→$om; +#handle shyer mapping. converts to ssa when following another shyer and vowel +#$shyer($vowelthings)($sunuconsonants)→|$shyer$1$shyer$virama; +$shyer$shyer→$ssa$virama$ssa$virama; +$shyer($vowelthings)$shyer→|$shyer$1$shyer$virama; +($sunuconsonants)$shyer →|$1$ssa$virama; +{ $shyer } $sunuconsonants→|$ssa$virama; +# convert to independent forms at start of word or syllable: +# dependent forms for roundtrip +̔$aal→$aa; +̔$appho$imar→$ai; +̔$imar':'→$ii; +̔$utthi':'→$uu; +̔$utthi→$u; +̔$eko→$e; +̔$otthi→$o; +̔$appho→; +# preceeded by consonants +$consonants{ $aal→$aa; +$consonants{ $appho$imar→$ai; +$consonants{ $imar':'→$ii; +$consonants{ $imar→$i; +$consonants{ $utthi':'→$uu; +$consonants{ $utthi→$u; +$consonants{ $eko→$e; +$consonants{ $otthi→$o; +# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai}) +$aal→$waa; +$appho$imar→$wai; +$imar':'→$wii; +$imar→$wi; +$utthi':'→$wuu; +$utthi→$wu; +$eko→$we; +$otthi→$wo; +$kloko→$wa$virama; +$tentu→$tta|$virama; +$donga$hamso→$ddha|$virama; +$donga→$dda|$virama; +$khaSunu→$kha|$virama; +$kik→$ka|$virama; +$gil$hamso→$gha|$virama; +$gil→$ga|$virama; +$chelap→$cha|$virama; +$carmi→$ca|$virama; +$jyah$hamso→$jha|$virama; +$jyah→$ja|$virama; +$nah$yat→$nya|$virama; +$thele→$ttha|$virama; +$thari→$tha|$virama; +$tasla→$ta|$virama; +$devi$hamso→$dha|$virama; +$devi→$da|$virama; +####################################################################### +# Need to address occasional use of nna in Devanagari-based Sunuwar. +# At this time there is not enough examples to determine if this can be +# dismbiguated from na. +#$nah→$nna|$virama; +####################################################################### +$nah→$na|$virama; +$phar→$pha|$virama; +$pip→$pa|$virama; +$ava→$ba$virama$va|$virama; +$bur$hamso→$bha|$virama; +$bur→$ba|$virama; +$mama→$ma|$virama; +$yat→$ya|$virama; +$reu→$ra|$virama; +$loacha→$la|$virama; +$varca→$va|$virama; +$shyer→$sha|$virama; +$shyele→$sa|$virama; +$hamso→$ha|$virama; +'.'→$danda; +$danda'.'→$doubleDanda; +$vowelAbove{̃→$anusvara; +$vowelBelow{̃→$chandrabindu; +####################################################################### +# Will need to add provisions for stress marks. Currently there are not +# enough examples of stress marks in Devanagari-based Sunuwar to determine +# the rules of usage. +#'\u030D'→\uE051; +####################################################################### +# convert to dependent forms after consonant with no vowel: +# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai} +$virama $aal→$aa; +$virama $appho$imar→$ai; +$virama $imar':'→$ii; +$virama $imar→$i; +$virama $utthi':'→$uu; +$virama $utthi→$u; +$virama $eko→$e; +$virama $otthi→$o; +$virama $appho→; +# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai} +$virama''$aal→$waa; +$virama''$appho$imar→$wai; +$virama''$imar':'→$wii; +$virama''$imar→$wi; +$virama''$utthi':'→$wuu; +$virama''$utthi→$wu; +$virama''$eko→$we; +$virama''$otthi→$wo; +$virama''$kloko→$wa$virama; +# no virama +''$aal→$waa; +''$appho$imar→$wai; +''$imar':'→$wii; +''$imar→$wi; +''$utthi':'→$wuu; +''$utthi→$wu; +''$eko→$we; +''$otthi→$wo; +''$kloko→$wa$virama; +$virama } [$z] → $virama; +$virama } ' ' → $virama ; +$virama}$endThing→; +ʔ→$dgs; # Glottal Stop +':'→$visarga; +''$appho→$wa; +𑯰→$zero; +𑯱→$one; +𑯲→$two; +𑯳→$three; +𑯴→$four; +𑯵→$five; +𑯶→$six; +𑯷→$seven; +𑯸→$eight; +𑯹→$nine; +''→; +#:: NFC (NFD) ; + ]]> + + + \ No newline at end of file