Module:zh-translit
- පහත දැක්වෙන උපදෙස්, Module:documentation/functions/translit මගින් ජනනය කොට ඇත. [සංස්කරණය කරන්න]
- ප්රයෝජනවත් සබැඳි: උප පිටු ලැයිස්තුව • සබැඳි • transclusions • testcases • sandbox
This module will transliterate චීන භාෂාව text. It is also used to transliterate Min Dong, Jin, මැන්ඩරීන්, Gan, Xiang, Middle Chinese, Literary Chinese, Northern Min, Old Chinese, Wu, කැන්ටනීස්, Sichuanese, and Taishanese.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:zh-translit/testcases.
Functions සංස්කරණය
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local findTemplates = require("Module:template parser").findTemplates
local get_section = require("Module:utilities").get_section
local insert = table.insert
local sub = string.sub
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local ulower = string.ulower
local frame = mw.getCurrentFrame()
local tag
local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr
local export = {}
local function fail(lang, request)
local langObj, req, cat = require("Module:languages").getByCode(lang)
if request then
cat = {"Requests for transliteration of " .. langObj:getCanonicalName() .. " terms"}
end
return nil, true, cat
end
local function get_content(title)
local content = mw.title.new(title)
if not content then
return false
end
return get_section(content:getContent(), "Chinese", 2)
end
local function get_reading(readings, lang, i, i_end, start)
if i == i_end then
return sub(readings, start, i - 1)
end
local c = sub(readings, i, i)
if c == "," and (
lang == "cmn" or
lang == "lzh" or
lang == "wuu" or
lang == "yue" or
lang == "zh" or
lang == "zhx-tai"
) then
if sub(readings, i + 1, i + 1) ~= " " then
return sub(readings, start, i - 1)
end
elseif c == "/" then
return sub(readings, start, i - 1)
end
end
local function handle_readings(readings, lang, tr)
if lang == "ltc" or lang == "och" then
if tr and readings ~= tr then
return false
end
return readings
end
local i, start, i_end, c, reading = 1, 1, #readings + 1
while i <= i_end do
reading = get_reading(readings, lang, i, i_end, start)
if reading and not reading:match("=") then
if (
not tr or
tr == reading or
ulower(tr) == reading
) then
tr = reading
elseif ulower(reading) ~= tr then
return false
end
start = i + 1
end
i = i + 1
end
return tr
end
local function iterate_content(content, lang, see, seen, tr)
for template, args in findTemplates(content) do
if template == "zh-pron" then
for k, v in pairs(args) do
if (
#v > 0 and
type(k) == "string" and
frame:preprocess(k) == lect_code[lang]
) then
tr = handle_readings(frame:preprocess(v), lang, tr)
break
end
end
if tr == false then
return tr
end
elseif template == "zh-see" then
local arg = trim(frame:preprocess(args[1]))
if not seen[arg] then
insert(see, arg)
end
end
end
return tr
end
function export.tr(text, lang, sc)
if (not text) or text == "" then
return text
end
if not lect_code[lang] then
lang = require("Module:languages").getByCode(lang, nil, true):getFullCode()
end
local content = get_content(text)
if not content then
return fail(lang)
end
local see = {}
local seen = {
[text] = true
}
local tr = iterate_content(content, lang, see, seen)
if tr == nil then
local i, title = 1
while i <= #see do
title = see[i]
content = get_content(title)
if content then
tr = iterate_content(content, lang, see, seen, tr)
if tr == false then
return fail(lang)
end
seen[title] = true
end
i = i + 1
end
end
if not tr then
return fail(lang)
end
if lang == "cmn" or lang == "lzh" or lang == "zh" then
tr = tr:gsub("#", "")
if tr:match("[\194-\244]") then
tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT
tr = tr:gsub("[%z\1-\127\194-\244][\128-\191]*", function(m)
if m == "一" then
return "yī"
elseif m == "不" then
return "bù"
else
m = tag[m] and tag[m][1]
if m then
return toNFD(m):gsub("^[aeiou]", "'%0")
end
end
end)
:gsub("^'", "") --remove initial apostrophe inserted by previous function
end
elseif lang == "hak" then
-- TODO
elseif lang == "ltc" or lang == "och" then
if tr == "n" then
return fail(lang)
end
local index = {}
if tr then
if lang == "ltc" then
index = mw.text.split(tr, ",")
else
index = mw.text.split(tr, ";")
end
end
for i = 1, mw.ustring.len(text) do
local module_type = lang .. "-pron"
if lang == "och" then
module_type = module_type .. "-ZS"
end
local success, data_module = pcall(require, "Module:zh/data/" .. module_type .. "/" .. mw.ustring.sub(text, i, i))
if not success or (((not index[i]) or index[i] == "y") and #data_module > 1) then
return fail(lang)
end
if index[i] == "y" then
index[i] = 1
elseif index[i] then
index[i] = tonumber(index[i])
end
index[i] = index[i] and data_module[index[i]] or data_module[1]
if lang == "ltc" then
local data = mw.loadData("Module:ltc-pron/data")
local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
tone = tone ~= "" and ("<sup>" .. tone .. "</sup>") or tone
index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
else
index[i] = index[i][6]
end
end
tr = table.concat(index, " ")
if lang == "och" then
tr = "*" .. tr
end
elseif lang == "nan" then
-- TODO
elseif lang == "yue" then
tr = tr:gsub("[%d-]+", "<sup>%0</sup>")
elseif lang == "zhx-sic" then
tr = tr
:gsub("([%d-])(%a)", "%1 %2")
:gsub("[%d-]+", "<sup>%0</sup>")
elseif lang == "zhx-tai" then
tr = tr:gsub("[%d*]+%-?[%d*]*", "<sup>%0</sup>")
elseif lang == "nan-tws" then
-- TODO
elseif lang == "wuu" then
if tr:match(';') then
--TODO
return fail(lang)
elseif tr:match(':') then
tr = tr:sub(4)
end
tr = require("Module:wuu-pron").wugniu_format(tr)
else
tr = require("Module:" .. lang .. "-pron").rom(tr)
end
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return tr .. " "
end
return export