--local Han_pattern = "[" .. require("Module:scripts").getByCode("Hani"):getCharacters() .. "]"
local Han_pattern = "[一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𰀀-𲎯]"
local initials = {
b=1,p=1,m=1,f=1,d=1,t=1,n=1,l=1,
g=1,k=1,ng=1,h=1,gw=1,kw=1,
z=1,c=1,s=1,j=1,w=1,
}
local finals = {
aa=1,aai=1,aau=1,aam=1,aan=1,aang=1,aap=1,aat=1,aak=1,
a=1,ai=1,au=1,am=1,an=1,ang=1,ap=1,at=1,ak=1,
e=1,ei=1,eu=1,em=1,eng=1,ep=1,ek=1,
i=1,iu=1,im=1,["in"]=1,ing=1,ip=1,it=1,ik=1,
o=1,oi=1,ou=1,on=1,ong=1,ot=1,ok=1,
u=1,ui=1,un=1,ung=1,ut=1,uk=1,
eoi=1,eon=1,eot=1,
oe=1,oeng=1,oet=1,oek=1,
yu=1,yun=1,yut=1,
}
local function check_jyutping(syl)
local s,v = syl:match("^(%l+)[1-6](%-?[12]?)$")
if not s or #v == 1 then return false end
local i,f = s:match("^([bpmfdtnlgknhzcsjw][gw]?)([aeiouy]+[mnptk]?g?)$")
if i and initials[i] and finals[f] then return true end
if finals[s] or s == "m" or s == "ng" then return true end
return false
end
return function(word, prons)
local res, word_len = mw.ustring.gsub(word,Han_pattern,"")
if res ~= "" then
return false
end
for pron in prons:gmatch("[^,]+") do
-- check length
if pron:find(" ") then
return false
end
if word_len ~= pron:gsub("[^ ]+",""):len() + 1 then
return false
end
-- check against [[Module:zh/data/Jyutping character]]?
-- check jyutping validity
for syl in pron:gmatch("[^ ]+") do
if not check_jyutping(syl) then
return false
end
end
end
return true
end