From 2149de4a2f26df890cb267df67fdaa481b7429c2 Mon Sep 17 00:00:00 2001
From: Neil S Patel <46502494+NeilSureshPatel@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:15:07 -0500
Subject: [PATCH] CLDR-17885 Add Sunu-Deva and Deva-Sunu transforms
CLDR-17885
Initial commit of Sunuwar to Devanagari and Devanagari to Sunuwar transforms using the InterIndic intermediate step. Transforms are based on the SWS standard.
---
common/transforms/Devanagari-Sunuwar_SWS.xml | 21 +
common/transforms/InterIndic-Sunuwar_SWS.xml | 472 +++++++++++++++++++
common/transforms/Sunuwar-Devanagari_SWS.xml | 21 +
common/transforms/Sunuwar-InterIndic_SWS.xml | 360 ++++++++++++++
4 files changed, 874 insertions(+)
create mode 100644 common/transforms/Devanagari-Sunuwar_SWS.xml
create mode 100644 common/transforms/InterIndic-Sunuwar_SWS.xml
create mode 100644 common/transforms/Sunuwar-Devanagari_SWS.xml
create mode 100644 common/transforms/Sunuwar-InterIndic_SWS.xml
diff --git a/common/transforms/Devanagari-Sunuwar_SWS.xml b/common/transforms/Devanagari-Sunuwar_SWS.xml
new file mode 100644
index 00000000000..0131edef9c2
--- /dev/null
+++ b/common/transforms/Devanagari-Sunuwar_SWS.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/common/transforms/InterIndic-Sunuwar_SWS.xml b/common/transforms/InterIndic-Sunuwar_SWS.xml
new file mode 100644
index 00000000000..85463d57160
--- /dev/null
+++ b/common/transforms/InterIndic-Sunuwar_SWS.xml
@@ -0,0 +1,472 @@
+
+
+
+
+
+
+
+
+########################################################################
+# InterIndic-Sunuwar_SWS
+#\u0E00 reserved
+#consonants
+$chandrabindu=\uE001;
+$anusvara=\uE002;
+$visarga=\uE003;
+#\u0E004 reserved
+# w←vowel→ represents the stand-alone form
+$wa=\uE005;
+$waa=\uE006;
+$wi=\uE007;
+$wii=\uE008;
+$wu=\uE009;
+$wuu=\uE00A;
+$wr=\uE00B;
+$wl=\uE00C;
+$wce=\uE00D; # LETTER CANDRA E
+$wse=\uE00E; # LETTER SHORT E
+$we=\uE00F; # ए LETTER E
+$wai=\uE010;
+$wco=\uE011; # LETTER CANDRA O
+$wso=\uE012; # LETTER SHORT O
+$wo=\uE013; # ओ LETTER O
+$wau=\uE014;
+$ka=\uE015;
+$kha=\uE016;
+$ga=\uE017;
+$gha=\uE018;
+$nga=\uE019;
+$ca=\uE01A;
+$cha=\uE01B;
+$ja=\uE01C;
+$jha=\uE01D;
+$nya=\uE01E;
+$tta=\uE01F;
+$ttha=\uE020;
+$dda=\uE021;
+$ddha=\uE022;
+$nna=\uE023;
+$ta=\uE024;
+$tha=\uE025;
+$da=\uE026;
+$dha=\uE027;
+$na=\uE028;
+$ena=\uE029; #compatibility
+$pa=\uE02A;
+$pha=\uE02B;
+$ba=\uE02C;
+$bha=\uE02D;
+$ma=\uE02E;
+$ya=\uE02F;
+$ra=\uE030;
+$vva=\uE081;
+$rra=\uE031;
+$la=\uE032;
+$lla=\uE033;
+$ela=\uE034; #compatibility
+$va=\uE035;
+$sha=\uE036;
+$ssa=\uE037;
+$sa=\uE038;
+$ha=\uE039;
+#\u093A Reserved
+#\u093B Reserved
+$nukta=\uE03C;
+$avagraha=\uE03D; # SIGN AVAGRAHA
+# ←vowel→ represents the dependent form
+$aa=\uE03E;
+$i=\uE03F;
+$ii=\uE040;
+$u=\uE041;
+$uu=\uE042;
+$rh=\uE043;
+$rrh=\uE044;
+$ce=\uE045; #VOWEL SIGN CANDRA E
+$se=\uE046; #VOWEL SIGN SHORT E
+$e=\uE047;
+$ai=\uE048;
+$co=\uE049; # VOWEL SIGN CANDRA O
+$so=\uE04A; # VOWEL SIGN SHORT O
+$o=\uE04B; # ो
+$au=\uE04C;
+$virama=\uE04D;
+# \u094E Reserved
+# \u094F Reserved
+$om=\uE050; # OM
+\uE051→; # UNMAPPED STRESS SIGN UDATTA
+\uE052→; # UNMAPPED STRESS SIGN ANUDATTA
+\uE053→; # UNMAPPED GRAVE ACCENT
+\uE054→; # UNMAPPED ACUTE ACCENT
+$lm = \uE055;# Telugu Length Mark
+$ailm=\uE056;# AI Length Mark
+$aulm=\uE057;# AU Length Mark
+#urdu compatibility forms
+$uka=\uE058;
+$ukha=\uE059;
+$ugha=\uE05A;
+$ujha=\uE05B;
+$uddha=\uE05C;
+$udha=\uE05D;
+$ufa=\uE05E;
+$uya=\uE05F;
+$wrr=\uE060;
+$wll=\uE061;
+$lh=\uE062;
+$llh=\uE063;
+$danda=\uE064;
+$doubleDanda=\uE065;
+$zero=\uE066; # DIGIT ZERO
+$one=\uE067; # DIGIT ONE
+$two=\uE068; # DIGIT TWO
+$three=\uE069; # DIGIT THREE
+$four=\uE06A; # DIGIT FOUR
+$five=\uE06B; # DIGIT FIVE
+$six=\uE06C; # DIGIT SIX
+$seven=\uE06D; # DIGIT SEVEN
+$eight=\uE06E; # DIGIT EIGHT
+$nine=\uE06F; # DIGIT NINE
+# Glottal stop
+$dgs=\uE082;
+#Khanda-ta
+$kta=\uE083;
+$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
+$depVowelBelow=[\uE041-\uE044];
+# $x was originally called '§'; $z was '%'
+$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
+$z=[bcdfghjklmnpqrstvwxyz];
+$vowels=[aeiour̥̄̆];
+$forceIndependentMatra = [^[[:L:][̀-͌]]];
+######################################################################
+# Sunuwar letters
+######################################################################
+#consonants
+#$devi=\u11BC0;
+$devi=;
+$tasla=\u11BC1;
+$reu=\u11BC4;
+$kik=\u11BC6;
+$mama=\u11BC7;
+#$pip=\u11BC9;
+$pip=;
+$gil=\u11BCA;
+$hamso=\u11BCB;
+$carmi=\u11BCC;
+$nah=\u11BCD;
+$bur=\u11BCE;
+$jyah=\u11BCF;
+$loacha=\u11BD0;
+$shyele=\u11BD2;
+$varca=\u11BD3;
+$yat=\u11BD4;
+$ava=\u11BD5;
+$donga=\u11BD7;
+$thari=\u11BD8;
+$phar=\u11BD9;
+$ngar=\u11BDA;
+$khaSunu=\u11BDB;
+$shyer=\u11BDC;
+$chelap=\u11BDD;
+$tentu=\u11BDE;
+$thele=\u11BDF;
+$kloko=\u11BE0;
+$pvo=;
+#vowels
+$eko=\u11BC2;
+$imar=\u11BC3;
+$utthi=\u11BC5;
+$appho=\u11BC8;
+$otthi=\u11BD1;
+$aal=\u11BD6;
+$consonantsdevasunu=[$ka$kha$ga$ca$cha$nga$ja$tta$ttha$dda$ta$tha$da$na$pa$pha$ba$ma$ya$ra$la$va$sa$ha]; # devanagari consonants that map directly to sunuwar consonants
+$vowelsdevasunu=[$aa$i$e$o$u]; #devanagari vowels that map directly to sunuwar vowels
+######################################################################
+# convert from Native letters to Sunuwar letters
+######################################################################
+#transliterations for anusvara
+$anusvara→ \u0303;
+#auspicious symbol
+$om→$pvo;
+# normal consonants
+$ka}$x→$kik;
+$ka$virama→$kik;
+$ka}[^$consonantsdevasunu]→$kik;
+$ka→$kik$appho;
+#
+$kha}$x→$khaSunu;
+$kha$virama→$khaSunu;
+$kha}[^$consonantsdevasunu]→$khaSunu;
+$kha→$khaSunu$appho;
+#
+$ga}$x→$gil;
+$ga$virama→$gil;
+$ga}[^$consonantsdevasunu]→$gil;
+$ga→$gil$appho;
+#
+$gha}$x→$gil$hamso;
+$gha$virama→$gil$hamso;
+$gha}[^$consonantsdevasunu]→$gil$hamso;
+$gha→$gil$hamso$appho;
+#
+$nga}$x→$ngar;
+$nga$virama→$ngar;
+$nga}[^$consonantsdevasunu]→$ngar;
+$nga→$ngar$appho;
+#
+$ca}$x→$carmi;
+$ca$virama→$carmi;
+$ca}[^$consonantsdevasunu]→$carmi;
+$ca→$carmi$appho;
+#
+$cha}$x→$chelap;
+$cha$virama→$chelap;
+$cha}[^$consonantsdevasunu]→$chelap;
+$cha→$chelap$appho;
+#
+$ja}$x→$jyah;
+$ja$virama→$jyah;
+$ja}[^$consonantsdevasunu]→$jyah;
+$ja→$jyah$appho;
+#
+$jha}$x→$jyah$hamso;
+$jha$virama→$jyah$hamso;
+$jha}[^$consonantsdevasunu]→$jyah$hamso;
+$jha→$jyah$hamso$appho;
+#
+$nya}$x→$nah$yat;
+$nya$virama→$nah$yat;
+$nya}[^$consonantsdevasunu]→$nah$yat;
+$nya→$nah$yat$appho;
+#
+$tta}$x→$tentu;
+$tta$virama→$tentu;
+$tta}[^$consonantsdevasunu]→$tentu;
+$tta→$tentu$appho;
+#
+$ttha}$x→$thele;
+$ttha$virama→$thele;
+$ttha}[^$consonantsdevasunu]→$thele;
+$ttha→$thele$appho;
+#
+$dda}$x→$donga;
+$dda$virama→$donga;
+$dda}[^$consonantsdevasunu]→$donga;
+$dda→$donga$appho;
+#
+$ddha}$x→$donga$hamso;
+$ddha$virama→$donga$hamso;
+$ddha}[^$consonantsdevasunu]→$donga$hamso;
+$ddha→$donga$hamso$appho;
+#
+$nna}$x→$nah;
+$nna$virama→$nah;
+$nna}[^$consonantsdevasunu]→$nah;
+$nna→$nah$appho;
+#
+$ta}$x→$tasla;
+$ta$virama→$tasla;
+$ta}[^$consonantsdevasunu]→$tasla;
+$ta→$tasla$appho;
+#
+$tha}$x→$thari;
+$tha$virama→$thari;
+$tha}[^$consonantsdevasunu]→$thari;
+$tha→$thari$appho;
+#
+$da}$x→$devi;
+$da$virama→$devi;
+$da}[^$consonantsdevasunu]→$devi;
+$da→$devi$appho;
+#
+$dha}$x→$devi$hamso;
+$dha$virama→$devi$hamso;
+$dha}[^$consonantsdevasunu]→$devi$hamso;
+$dha→$devi$hamso$appho;
+#
+$na}$x→$nah;
+$na$virama→$nah;
+$na}[^$consonantsdevasunu]→$nah;
+$na→$nah$appho;
+#
+$pa}$x→$pip;
+$pa$virama→$pip;
+$pa}[^$consonantsdevasunu]→$pip;
+$pa→$pip$appho;
+#
+$pha}$x→$phar;
+$pha$virama→$phar;
+$pha}[^$consonantsdevasunu]→$phar;
+$pha→$phar$appho;
+#
+$ba$virama$va}$x → $ava;
+$ba$virama$va$virama → $ava;
+$ba$virama$va}[^$consonantsdevasunu]→$ava;
+$ba$virama$va→$ava$appho;
+#
+$ba}$x→$bur;
+$ba$virama→$bur;
+$ba}[^$consonantsdevasunu]→$bur;
+$ba→$bur$appho;
+#
+$bha}$x→$bur$hamso;
+$bha$virama→$bur$hamso;
+$bha}[^$consonantsdevasunu]→$bur$hamso;
+$bha→$bur$hamso$appho;
+#
+$ma}$x→$mama;
+$ma$virama→$mama;
+$ma}[^$consonantsdevasunu]→$mama;
+$ma→$mama$appho;
+#
+$ya}$x→$yat;
+$ya$virama→$yat;
+$ya}[^$consonantsdevasunu]→$yat;
+$ya→$yat$appho;
+#
+$ra}$x→$reu;
+$ra$virama→$reu;
+$ra}[^$consonantsdevasunu]→$reu;
+$ra→$reu$appho;
+#
+$la}$x→$loacha;
+$la$virama→$loacha;
+$la}[^$consonantsdevasunu]→$loacha;
+$la→$loacha$appho;
+#
+$va}$x→$varca;
+$va$virama→$varca;
+$va}[^$consonantsdevasunu]→$varca;
+$va→$varca$appho;
+#
+$sa}$x→$shyele;
+$sa$virama→$shyele;
+$sa}[^$consonantsdevasunu]→$shyele;
+$sa→$shyele$appho;
+#
+$sha}$x→$shyer;
+$sha$virama→$shyer;
+$sha}[^$consonantsdevasunu]→$shyer;
+$sha→$shyer$appho;
+#
+$ssa}$x→$shyer;
+$ssa$virama→$shyer;
+$ssa}[^$consonantsdevasunu]→$shyer;
+$ssa→$shyer$appho;
+#
+$ha}$x→$hamso;
+$ha$virama→$hamso;
+$ha}[^$consonantsdevasunu]→$hamso;
+$ha→$hamso$appho;
+#
+$wa$virama→$kloko;
+$wa$nukta→$kloko;
+# dependent vowels (should never occur except following consonants)
+$forceIndependentMatra{$aa → ̔$aal;
+$forceIndependentMatra{$ai → ̔$appho$imar;
+$forceIndependentMatra{$ii → ̔$imar':';
+$forceIndependentMatra{$i → ̔$imar;
+$forceIndependentMatra{$uu → ̔$utthi':';
+$forceIndependentMatra{$u → ̔$utthi;
+$forceIndependentMatra{$rh → ̔$reu$imar;
+$forceIndependentMatra{$e → ̔$eko;
+$forceIndependentMatra{$o → ̔$otthi;
+$aa → $aal;
+$ai → $appho$imar;
+$ii → $imar':';
+$i → $imar;
+$uu → $utthi':';
+$u → $utthi;
+$rh → $reu$imar;
+$e → $eko;
+$o → $otthi;
+# independent vowels when preceded by vowels
+$vowels{$waa → ''$aal;
+$vowels{$wai → ''$appho$imar;
+$vowels{$wii → ''$imar':';
+$vowels{$wi → ''$imar;
+$vowels{$wuu → ''$utthi':';
+$vowels{$wu → ''$utthi;
+$vowels{$wr → ''$reu$imar;
+$vowels{$we → ''$eko;
+$vowels{$wo → ''$otthi;
+$vowels{$wa → ''$appho;
+# independent vowels (otherwise)
+$waa → $aal;
+$wai → $appho$imar;
+$wii → $imar':';
+$wi → $imar;
+$wuu → $utthi':';
+$wu → $utthi;
+$wr → $reu$imar;
+$we → $eko;
+$wo → $otthi;
+$wa → $appho;
+#stress marks
+$avagraha → ̕;
+$chandrabindu$anusvara→\u0303;
+$chandrabindu → \u0303;
+$visarga→':';
+#numbers
+$zero → \u11BF0;
+$one → \u11BF1;
+$two → \u11BF2;
+$three → \u11BF3;
+$four → \u11BF4;
+$five → \u11BF5;
+$six → \u11BF6;
+$seven → \u11BF7;
+$eight → \u11BF8;
+$nine → \u11BF9;
+$lm →;
+$ailm →;
+$aulm →;
+$dgs→ʔ;
+$kta→ṯ;
+$danda→'.';
+$doubleDanda→'.';
+\uE070→'.'; # ABBREVIATION SIGN
+# LETTER RA WITH MIDDLE DIAGONAL
+\uE071}$x→$reu;
+\uE071$virama→$reu;
+\uE071→$reu;
+# LETTER RA WITH LOWER DIAGONAL
+\uE072}$x→$reu;
+\uE072$virama→$reu;
+\uE072→$reu;
+\uE073→; # RUPEE MARK
+\uE074→; # RUPEE SIGN
+\uE075→; # CURRENCY NUMERATOR ONE
+\uE076→; # CURRENCY NUMERATOR TWO
+\uE077→; # CURRENCY NUMERATOR THREE
+\uE078→; # CURRENCY NUMERATOR FOUR
+\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
+\uE07A→; # CURRENCY DENOMINATOR SIXTEEN
+\uE07B→; # ISSHAR
+\uE07C→; # TIPPI
+\uE07D→; # ADDAK
+\uE07E→; # IRI
+\uE07F→; # URA
+\uE080→; # EK ONKAR
+\uE004→; # DEVANAGARI VOWEL SIGN SHORT A
+$virama→; # remove remaining viramas
+$nukta→\u0323 ; # combining dot below
+ ]]>
+
+
+
\ No newline at end of file
diff --git a/common/transforms/Sunuwar-Devanagari_SWS.xml b/common/transforms/Sunuwar-Devanagari_SWS.xml
new file mode 100644
index 00000000000..ca75e343ba5
--- /dev/null
+++ b/common/transforms/Sunuwar-Devanagari_SWS.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/common/transforms/Sunuwar-InterIndic_SWS.xml b/common/transforms/Sunuwar-InterIndic_SWS.xml
new file mode 100644
index 00000000000..6b48f06a5e6
--- /dev/null
+++ b/common/transforms/Sunuwar-InterIndic_SWS.xml
@@ -0,0 +1,360 @@
+
+
+
+
+
+
+
+
+
+########################################################################
+# Sunuwar-InterIndic_SWS
+#:: NFD;
+#\u0E00 reserved
+#consonants
+$chandrabindu=\uE001;
+$anusvara=\uE002;
+$visarga=\uE003;
+#\u0E004 reserved
+# w←vowel→ represents the stand-alone form
+$wa=\uE005;
+$waa=\uE006;
+$wi=\uE007;
+$wii=\uE008;
+$wu=\uE009;
+$wuu=\uE00A;
+$wr=\uE00B;
+$wl=\uE00C;
+$wce=\uE00D; # LETTER CANDRA E
+$wse=\uE00E; # LETTER SHORT E
+$we=\uE00F; # ए LETTER E
+$wai=\uE010;
+$wco=\uE011; # LETTER CANDRA O
+$wso=\uE012; # LETTER SHORT O
+$wo=\uE013; # ओ LETTER O
+$wau=\uE014;
+$ka=\uE015;
+$kha=\uE016;
+$ga=\uE017;
+$gha=\uE018;
+$nga=\uE019;
+$ca=\uE01A;
+$cha=\uE01B;
+$ja=\uE01C;
+$jha=\uE01D;
+$nya=\uE01E;
+$tta=\uE01F;
+$ttha=\uE020;
+$dda=\uE021;
+$ddha=\uE022;
+$nna=\uE023;
+$ta=\uE024;
+$tha=\uE025;
+$da=\uE026;
+$dha=\uE027;
+$na=\uE028;
+$ena=\uE029; #compatibility
+$pa=\uE02A;
+$pha=\uE02B;
+$ba=\uE02C;
+$bha=\uE02D;
+$ma=\uE02E;
+$ya=\uE02F;
+$ra=\uE030;
+$rra=\uE031;
+$la=\uE032;
+$lla=\uE033;
+$ela=\uE034; #compatibility
+$va=\uE035;
+$vva=\uE081;
+$sha=\uE036;
+$ssa=\uE037;
+$sa=\uE038;
+$ha=\uE039;
+#\u093A Reserved
+#\u093B Reserved
+$nukta=\uE03C;
+$avagraha=\uE03D; # SIGN AVAGRAHA
+# ←vowel→ represents the dependent form
+$aa=\uE03E;
+$i=\uE03F;
+$ii=\uE040;
+$u=\uE041;
+$uu=\uE042;
+$rh=\uE043;
+$rrh=\uE044;
+$ce=\uE045; #VOWEL SIGN CANDRA E
+$se=\uE046; #VOWEL SIGN SHORT E
+$e=\uE047;
+$ai=\uE048;
+$co=\uE049; # VOWEL SIGN CANDRA O
+$so=\uE04A; # VOWEL SIGN SHORT O
+$o=\uE04B; # ो
+$au=\uE04C;
+$virama=\uE04D;
+# \u094E Reserved
+# \u094F Reserved
+$om = \uE050; # OM
+# ॑→\uE051; # UNMAPPED STRESS SIGN UDATTA
+# ॒→; # UNMAPPED STRESS SIGN ANUDATTA
+# ॓→; # UNMAPPED GRAVE ACCENT
+# ॔→; # UNMAPPED ACUTE ACCENT
+$lm = \uE055;# Telugu Length Mark
+$ailm=\uE056;# AI Length Mark
+$aulm=\uE057;# AU Length Mark
+#urdu compatibity forms
+$uka=\uE058;
+$ukha=\uE059;
+$ugha=\uE05A;
+$ujha=\uE05B;
+$uddha=\uE05C;
+$udha=\uE05D;
+$ufa=\uE05E;
+$uya=\uE05F;
+$wrr=\uE060;
+$wll=\uE061;
+$lh=\uE062;
+$llh=\uE063;
+$danda=\uE064;
+$doubleDanda=\uE065;
+$zero=\uE066; # DIGIT ZERO
+$one=\uE067; # DIGIT ONE
+$two=\uE068; # DIGIT TWO
+$three=\uE069; # DIGIT THREE
+$four=\uE06A; # DIGIT FOUR
+$five=\uE06B; # DIGIT FIVE
+$six=\uE06C; # DIGIT SIX
+$seven=\uE06D; # DIGIT SEVEN
+$eight=\uE06E; # DIGIT EIGHT
+$nine=\uE06F; # DIGIT NINE
+$dgs=\uE082;
+# For all other scripts
+$ecp0=\uE070;
+$ecp1=\uE071;
+$ecp2=\uE072;
+$ecp3=\uE073;
+$ecp4=\uE074;
+$ecp5=\uE075;
+$ecp6=\uE076;
+$ecp7=\uE077;
+$ecp8=\uE078;
+$ecp9=\uE079;
+$ecpA=\uE07A;
+$ecpB=\uE07B;
+$ecpC=\uE07C;
+$ecpD=\uE07D;
+$ecpE=\uE07E;
+$ecpF=\uE07F;
+# Khanda-ta
+$kta=\uE083;
+# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
+$vowelAbove=[\uE03F\uE040\uE045-\uE04C];
+$vowelBelow=[\uE005-\uE007\uE009-\uE00B\uE00F\uE010\uE013\uE03E\uE03F\uE041-\uE044];
+$depVowels=[\uE03E-\uE04C];
+$endThing=[$danda$doubleDanda];
+# $x was originally called '§'; $z was '%'
+$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
+$z=[bcdfghjklmnpqrstvwxyz];
+$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
+######################################################################
+# Sunuwar letters
+######################################################################
+#consonants
+$devi=;
+$tasla=;
+$reu=;
+$kik=;
+$mama=;
+$pip=;
+$gil=;
+$hamso=;
+$carmi=;
+$nah=;
+$bur=;
+$jyah=;
+$loacha=;
+$shyele=;
+$varca=;
+$yat=;
+$ava=;
+$donga=;
+$thari=;
+$phar=;
+$ngar=;
+$khaSunu=;
+$shyer=;
+$chelap=;
+$tentu=;
+$thele=;
+$kloko=;
+$pvo=;
+#vowels
+$eko=;
+$imar=;
+$utthi=;
+$appho=;
+$otthi=;
+$aal=;
+$consonantsdevasunu=[$ka$kha$ga$ca$cha$nga$ja$tta$ttha$dda$ta$tha$da$na$pa$pha$ba$ma$ya$ra$la$va$sa$ha]; # devanagari consonants that map directly to sunuwar consonants
+$sunuconsonants=[$devi$tasla$reu$kik$mama$pip$gil$hamso$carmi$nah$bur$jyah$loacha$shyele$varca$yat$ava$donga$thari$phar$ngar$khaSunu$shyer$chelap$tentu$thele];
+$vowelsdevasunu=[$aa$i$e$o$u]; #devanagari vowels that map directly to sunuwar vowels
+$nonletter = [:^Letter:] ;
+$vowelthings = [$eko$imar$utthi$appho$otthi$aal':'\u0303\u0300\u0301\u030D]*;
+######################################################################
+̕ → $avagraha;
+#\0303→$chandrabindu;
+$nonletter { $appho } $nonletter→$wa ;
+#convert auspicious symbol
+$pvo→$om;
+#handle shyer mapping. converts to ssa when following another shyer and vowel
+#$shyer($vowelthings)($sunuconsonants)→|$shyer$1$shyer$virama;
+$shyer$shyer→$ssa$virama$ssa$virama;
+$shyer($vowelthings)$shyer→|$shyer$1$shyer$virama;
+($sunuconsonants)$shyer →|$1$ssa$virama;
+{ $shyer } $sunuconsonants→|$ssa$virama;
+# convert to independent forms at start of word or syllable:
+# dependent forms for roundtrip
+̔$aal→$aa;
+̔$appho$imar→$ai;
+̔$imar':'→$ii;
+̔$utthi':'→$uu;
+̔$utthi→$u;
+̔$eko→$e;
+̔$otthi→$o;
+̔$appho→;
+# preceeded by consonants
+$consonants{ $aal→$aa;
+$consonants{ $appho$imar→$ai;
+$consonants{ $imar':'→$ii;
+$consonants{ $imar→$i;
+$consonants{ $utthi':'→$uu;
+$consonants{ $utthi→$u;
+$consonants{ $eko→$e;
+$consonants{ $otthi→$o;
+# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
+$aal→$waa;
+$appho$imar→$wai;
+$imar':'→$wii;
+$imar→$wi;
+$utthi':'→$wuu;
+$utthi→$wu;
+$eko→$we;
+$otthi→$wo;
+$kloko→$wa$virama;
+$tentu→$tta|$virama;
+$donga$hamso→$ddha|$virama;
+$donga→$dda|$virama;
+$khaSunu→$kha|$virama;
+$kik→$ka|$virama;
+$gil$hamso→$gha|$virama;
+$gil→$ga|$virama;
+$chelap→$cha|$virama;
+$carmi→$ca|$virama;
+$jyah$hamso→$jha|$virama;
+$jyah→$ja|$virama;
+$nah$yat→$nya|$virama;
+$thele→$ttha|$virama;
+$thari→$tha|$virama;
+$tasla→$ta|$virama;
+$devi$hamso→$dha|$virama;
+$devi→$da|$virama;
+#######################################################################
+# Need to address occasional use of nna in Devanagari-based Sunuwar.
+# At this time there is not enough examples to determine if this can be
+# dismbiguated from na.
+#$nah→$nna|$virama;
+#######################################################################
+$nah→$na|$virama;
+$phar→$pha|$virama;
+$pip→$pa|$virama;
+$ava→$ba$virama$va|$virama;
+$bur$hamso→$bha|$virama;
+$bur→$ba|$virama;
+$mama→$ma|$virama;
+$yat→$ya|$virama;
+$reu→$ra|$virama;
+$loacha→$la|$virama;
+$varca→$va|$virama;
+$shyer→$sha|$virama;
+$shyele→$sa|$virama;
+$hamso→$ha|$virama;
+'.'→$danda;
+$danda'.'→$doubleDanda;
+$vowelAbove{̃→$anusvara;
+$vowelBelow{̃→$chandrabindu;
+#######################################################################
+# Will need to add provisions for stress marks. Currently there are not
+# enough examples of stress marks in Devanagari-based Sunuwar to determine
+# the rules of usage.
+#'\u030D'→\uE051;
+#######################################################################
+# convert to dependent forms after consonant with no vowel:
+# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
+$virama $aal→$aa;
+$virama $appho$imar→$ai;
+$virama $imar':'→$ii;
+$virama $imar→$i;
+$virama $utthi':'→$uu;
+$virama $utthi→$u;
+$virama $eko→$e;
+$virama $otthi→$o;
+$virama $appho→;
+# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
+$virama''$aal→$waa;
+$virama''$appho$imar→$wai;
+$virama''$imar':'→$wii;
+$virama''$imar→$wi;
+$virama''$utthi':'→$wuu;
+$virama''$utthi→$wu;
+$virama''$eko→$we;
+$virama''$otthi→$wo;
+$virama''$kloko→$wa$virama;
+# no virama
+''$aal→$waa;
+''$appho$imar→$wai;
+''$imar':'→$wii;
+''$imar→$wi;
+''$utthi':'→$wuu;
+''$utthi→$wu;
+''$eko→$we;
+''$otthi→$wo;
+''$kloko→$wa$virama;
+$virama } [$z] → $virama;
+$virama } ' ' → $virama ;
+$virama}$endThing→;
+ʔ→$dgs; # Glottal Stop
+':'→$visarga;
+''$appho→$wa;
+→$zero;
+→$one;
+→$two;
+→$three;
+→$four;
+→$five;
+→$six;
+→$seven;
+→$eight;
+→$nine;
+''→;
+#:: NFC (NFD) ;
+ ]]>
+
+
+
\ No newline at end of file