local data = {}
local U = mw.ustring.char
local anusvAra = U(0x114C0)
local visarga = U(0x114C1)
local virAma = U(0x114C2)
local nuktA = U(0x114C3)
local candrabindu = U(0x114BF)
local avagraha = "𑓄"
local consonants = "𑒏𑒐𑒑𑒒𑒓𑒔𑒕𑒖𑒗𑒘𑒙𑒚𑒛𑒜𑒝𑒞𑒟𑒠𑒡𑒢𑒣𑒤𑒥𑒦𑒧𑒨𑒩𑒪𑒫𑒮𑒬𑒭𑒯"
local consonant = "[" .. consonants .. "]" .. nuktA .. "?"
local acute = U(0x301) -- combining acute
data["mai"] = {
-- Vowels and modifiers. Do the diphthongs and diaereses first.
{"ai", "𑒌"},
{"au", "𑒎"},
{"ä", "𑒁"},
{"ï", "𑒃"},
{"ü", "𑒅"},
{"a", "𑒁"},
{"ā", "𑒂"},
{"i", "𑒃"},
{"ī", "𑒄"},
{"u", "𑒅"},
{"ū", "𑒆"},
{"e", U(0x114BA)},
{"ē", "𑒋"},
{"o", U(0x114BD)},
{"ō", "𑒍"},
{"ṝ", "𑒈"},
{"ṛ", "𑒇"},
{"r̥", "𑒇"},
{"ḹ", "𑒊"},
{"ḷ", "𑒉"},
{"(𑒁)[%-/]([𑒃𑒅])", "%1%2"}, -- a-i, a-u for 𑒁𑒃, 𑒁𑒅; must follow rules for "ai", "au"
-- Two-letter consonants must go before h.
{"kh", "𑒐"},
{"gh", "𑒒"},
{"ch", "𑒕"},
{"jh", "𑒗"},
{"ṭh", "𑒚"},
{"ḍh", "𑒜"},
{"ɽh", "𑒜𑓃"},
{"th", "𑒟"},
{"dh", "𑒡"},
{"ph", "𑒤"},
{"bh", "𑒦"},
{"h", "𑒯"},
-- Other stops.
{"k", "𑒏"},
{"g", "𑒑"},
{"c", "𑒔"},
{"j", "𑒖"},
{"ṭ", "𑒙"},
{"ḍ", "𑒛"},
{"ɽ", "𑒛𑓃"},
{"t", "𑒞"},
{"d", "𑒠"},
{"p", "𑒣"},
{"b", "𑒥"},
-- Nasals.
{"ṅ", "𑒓"},
{"ñ", "𑒘"},
{"ṇ", "𑒝"},
{"n", "𑒢"},
{"n", "𑒢"},
{"m", "𑒧"},
-- Remaining consonants.
{"y", "𑒨"},
{"r", "𑒩"},
{"l", "𑒪"},
{"v", "𑒫"},
{"ś", "𑒬"},
{"ṣ", "𑒭"},
{"s", "𑒮"},
{"ṃ", anusvAra},
{"ḥ", visarga},
{"'", avagraha},
{"~", candrabindu},
-- This rule must be applied twice because a consonant may only be in one capture per operation,
-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
}
local vowels = {
["𑒃"] = U(0x114B1),
["𑒅"] = U(0x114B3),
["𑒇"] = U(0x114B5),
["𑒉"] = U(0x114B7),
["𑒋"] = U(0x114B9),
["𑒍"] = U(0x114BC),
["𑒂"] = U(0x114B0),
["𑒄"] = U(0x114B2),
["𑒆"] = U(0x114B4),
["𑒈"] = U(0x114B6),
["𑒊"] = U(0x114B8),
["𑒌"] = U(0x114BB),
["𑒎"] = U(0x114BE),
}
-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data["mai"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["mai"], {"(" .. consonant .. ")𑒁", "%1"})
-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["mai-tr"] = {
[1] = {
["A"] = "ā",
["I"] = "ī",
["U"] = "ū",
["E"] = "ē",
["O"] = "ō",
["J"] = "ñ",
["T"] = "ṭ",
["D"] = "ḍ",
["N"] = "ṇ",
["G"] = "ṅ",
["z"] = "ś",
["S"] = "ṣ",
["M"] = "ṃ",
["H"] = "ḥ",
["lRR"] = "ḹ",
["/"] = acute,
},
[2] = {
["_rh_"] = "ɽh",
["lR"] = "ḷ",
["RR"] = "ṝ",
},
[3] = {
["_r_"] = "ɽ",
["R"] = "ṛ",
},
}
return data