local data = {}
local U = mw.ustring.char
local anusvAra = U(0x11837)
local visarga = U(0x11838)
local virAma = U(0x11839)
local nuktA = U(0x1183A)
local consonants = "𑠊-𑠫"
local consonant = "[" .. consonants .. "]" .. nuktA .. "?"
local acute = U(0x301) -- combining acute
data["doi"] = {
{"ai", "𑠇"},
{"au", "𑠉"},
{"aï", "𑠀𑠂"},
{"aü", "𑠀𑠄"},
{"aö", "𑠀𑠈"},
{"ṃ", anusvAra},
{"ḥ", visarga},
{"kh", "𑠋"},
{"gh", "𑠍"},
{"ṅ", "𑠎"},
{"ch", "𑠐"},
{"jh", "𑠒"},
{"ñ", "𑠓"},
{"ṭh", "𑠕"},
{"ḍh", "𑠗"},
{"ṇ", "𑠘"},
{"th", "𑠚"},
{"dh", "𑠜"},
{"n", "𑠝"},
{"ph", "𑠟"},
{"bh", "𑠡"},
{"m", "𑠢"},
{"y", "𑠣"},
{"r", "𑠤"},
{"l", "𑠥"},
{"v", "𑠦"},
{"ś", "𑠧"},
{"ṣ", "𑠨"},
{"s", "𑠩"},
{"a", "𑠀"},
{"ā", "𑠁"},
{"i", "𑠂"},
{"ī", "𑠃"},
{"u", "𑠄"},
{"ū", "𑠅"},
{"e", "𑠆"},
{"o", "𑠈"},
{"r̥̄", "ॠ"},
{"k", "𑠊"},
{"g", "𑠌"},
{"c", "𑠏"},
{"j", "𑠑"},
{"ṭ", "𑠔"},
{"ḍ", "𑠖"},
{"t", "𑠙"},
{"d", "𑠛"},
{"p", "𑠞"},
{"b", "𑠠"},
{"h", "𑠪"},
{'̈', ""},
{"r̥", "ऋ"},
{"ṛ", "𑠫"},
{"(𑠀)[%-/]([𑠂𑠄])", "%1%2"}, -- a-i, a-u for 𑠀𑠂, 𑠀𑠄; must follow rules for "ai", "au"
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
-- this rule must be applied twice because a consonant may only be in one caoture per operation, so "CCC" will only recognize the first two consonants
{"(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"i", "𑠂"},
{"u", "𑠄"},
}
local vowels = {
["𑠂"] = U(0x1182D),
["𑠄"] = U(0x1182F),
["ऋ"] = U(0x11831),
["𑠆"] = U(0x11833),
["𑠈"] = U(0x11835),
["𑠁"] = U(0x1182C),
["𑠃"] = U(0x1182E),
["𑠅"] = U(0x11830),
["ॠ"] = U(0x11832),
["𑠇"] = U(0x11834),
["𑠉"] = U(0x11836),
}
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data["doi"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["doi"], {"(" .. consonant .. ")𑠀", "%1"})
-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["doi-tr"] = {
[1] = {
["A"] = "ā",
["I"] = "ī",
["U"] = "ū",
["J"] = "ñ",
["T"] = "ṭ",
["D"] = "ḍ",
["N"] = "ṇ",
["G"] = "ṅ",
["z"] = "ś",
["S"] = "ṣ",
["M"] = "ṃ",
["H"] = "ḥ",
["/"] = acute,
},
[2] = {
["R"] = "r̥",
},
}
return data