instead-unittest/morph/lang-ru.lua

434 lines
8.1 KiB
Lua
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

local type = type
local kbdru = {
["q"] = "й",
["w"] = "ц",
["e"] = "у",
["r"] = "к",
["t"] = "е",
["y"] = "н",
["u"] = "г",
["i"] = "ш",
["o"] = "щ",
["p"] = "з",
["["] = "х",
["]"] = "ъ",
["a"] = "ф",
["s"] = "ы",
["d"] = "в",
["f"] = "а",
["g"] = "п",
["h"] = "р",
["j"] = "о",
["k"] = "л",
["l"] = "д",
[";"] = "ж",
["'"] = "э",
["z"] = "я",
["x"] = "ч",
["c"] = "с",
["v"] = "м",
["b"] = "и",
["n"] = "т",
["m"] = "ь",
[","] = "б",
["."] = "ю",
["`"] = "ё",
["/"] = ".",
shifted = {
["q"] = "Й",
["w"] = "Ц",
["e"] = "У",
["r"] = "К",
["t"] = "Е",
["y"] = "Н",
["u"] = "Г",
["i"] = "Ш",
["o"] = "Щ",
["p"] = "З",
["["] = "Х",
["]"] = "Ъ",
["a"] = "Ф",
["s"] = "Ы",
["d"] = "В",
["f"] = "А",
["g"] = "П",
["h"] = "Р",
["j"] = "О",
["k"] = "Л",
["l"] = "Д",
[";"] = "Ж",
["'"] = "Э",
["z"] = "Я",
["x"] = "Ч",
["c"] = "С",
["v"] = "М",
["b"] = "И",
["n"] = "Т",
["m"] = "Ь",
[","] = "Б",
["."] = "Ю",
["`"] = "Ё",
["1"] = "!",
["2"] = "@",
["3"] = "#",
["4"] = ";",
["5"] = "%",
["6"] = ":",
["7"] = "?",
["8"] = "*",
["9"] = "(",
["0"] = ")",
["-"] = "_",
["="] = "+",
["/"] = ",",
}
}
local toupper = {
["й"] = "Й",
["ц"] = "Ц",
["у"] = "У",
["к"] = "К",
["е"] = "Е",
["н"] = "Н",
["г"] = "Г",
["ш"] = "Ш",
["щ"] = "Щ",
["з"] = "З",
["х"] = "Х",
["ъ"] = "Ъ",
["ф"] = "Ф",
["ы"] = "Ы",
["в"] = "В",
["а"] = "А",
["п"] = "П",
["р"] = "Р",
["о"] = "О",
["л"] = "Л",
["д"] = "Д",
["ж"] = "Ж",
["э"] = "Э",
["я"] = "Я",
["ч"] = "Ч",
["с"] = "С",
["м"] = "М",
["и"] = "И",
["т"] = "Т",
["ь"] = "Ь",
["б"] = "Б",
["ю"] = "Ю",
["ё"] = "Ё",
}
local function lower(str)
if type(str) ~= 'string' then
return str
end
str = str:lower()
for k, v in pairs(toupper) do
str = str:gsub(v, k)
end
return str
end
local function upper(str)
if type(str) ~= 'string' then
return str
end
str = str:upper();
for k, v in pairs(toupper) do
str = str:gsub(k, v)
end
return str
end
local function is_cap(str)
if type(str) ~= 'string' then
return false
end
local s, e
for _, v in pairs(toupper) do
if not s and str:find("^"..v) then
s = true
end
if not e and str:find(v.."$") then
e = true
end
if not s and str:find("^[A-Z]") then
s = true
end
if not e and str:find("[A-Z]$") then
e = true
end
end
return s, e
end
local function cap(str)
if type(str) ~= 'string' then
return str
end
if str:find("^[a-z]") then
str = str:gsub("^.", function(v) return v:upper() end)
return str
end
for k, v in pairs(toupper) do
local s = str:gsub("^"..k, v)
if s ~= str then
return s
end
end
return str
end
local lang
local function norm(str)
if type(str) ~= 'string' then
return str
end
if not lang.yo then
str = str:gsub("ё", "е"):gsub("Ё", "Е")
end
return str
end
local vowels = {
["у"] = true,
["е"] = true,
["ы"] = true,
["а"] = true,
["о"] = true,
["и"] = true,
["ю"] = true,
["ё"] = true,
["э"] = true,
["я"] = true,
}
local function is_vowel(l)
l = lower(l);
return vowels[l]
end
local gram_tt = {
["ИНФИНИТИВ"] = true;
["КРРИЛ"] = true;
["КРРИЧАСТИЕ"] = true;
["Г"] = true;
}
local function flex_filter(v)
local an = v.an
if an["им"] then
return true
end
if an["рд"] or an["дт"] or an["тв"] or an["пр"] or an["вн"] then
return false
end
if an["0"] then
return true
end
return gram_tt[an.t]
end
local function gram_info(a)
local t = { }
if a['мр'] then
t.gen = 'male'
elseif a['жр'] then
t.gen = 'female'
elseif a['ср'] then
t.gen = 'neuter'
else
t.gen = 'any'
end
if a['мн'] then
t.num = 'singular'
elseif a['ед'] then
t.num = 'plural'
else
t.num = 'any'
end
if a['буд'] then
t.time = 'future'
elseif a['прш'] then
t.time = 'past'
elseif a['нст'] then
t.time = 'present'
else
t.time = 'any'
end
if a[''] then
t.face = 'first'
elseif a[''] then
t.face = 'second'
elseif a[''] then
t.face = 'third'
else
t.face = 'any'
end
return t
end
local function __gram_compat(g1, g2, time)
if g1.gen ~= g2.gen and g1.gen ~= 'any' and g2.gen ~= 'any' then return false end
if not time then
if g1.num ~= g2.num and g1.num ~= 'any' and g2.num ~= 'any' then return false end
end
if g1.time ~= g2.time and g1.time ~= 'any' and g2.time ~= 'any' then return false end
if g1.face ~= g2.face and g1.face ~= 'any' and g2.face ~= 'any' then return false end
return true
end
local function gram_eq(a, b)
if not a or not b then return true end
if a == 'ИНФИНИТИВ' or b == 'ИНФИНИТИВ' then
return b == a or b == 'Г' or a == 'Г'
end
if a == 'КРРИЛ' or b == 'КРРИЛ' then
return b == a -- or b == 'П'
end
if a == 'КРРИЧАСТИЕ' or b == 'КРРИЧАСТИЕ' then
return b == a
end
if a == 'ПРИЧАСТИЕ' or b == 'ПРИЧАСТИЕ' then
return b == a
end
if a == 'Г' or b == 'Г' then return a == b end
return true
end
local function gram_compat(base, aa, bb)
if not gram_eq(base.t, aa.t) then
return false
end
-- local a, b = aa.t, bb.t
local g1, g2 = gram_info(aa), gram_info(bb)
if bb.noun then
if not base['им'] then
return false
end
local g0 = gram_info(base)
if not __gram_compat(g0, g1, true) then return false end
if not __gram_compat(g0, g2, true) then return false end
end
return __gram_compat(g1, g2)
end
local function gram_norm(an)
local a = {}
local g = {}
for _, v in ipairs(an) do
a[v] = true
table.insert(g, v)
end
if not a[''] and not a[''] and not a[''] then
table.insert(g, '')
end
return g
end
local function gram_score(an, g)
local score = 0
g = gram_norm(g)
if an["фам"] then score = score - 0.1 end
if an["арх"] then score = score - 0.1 end
for _, vv in ipairs(g or {}) do
if vv:sub(1, 1) == '~' then
vv = vv:sub(2)
if an[vv] then
score = score - 1
elseif an.t == vv then
score = score - 10
end
else
if an[vv] then
score = score + 1
elseif an.t == vv then
score = score + 10
end
end
end
return score
end
lang = { yo = false,
kbd = kbdru,
norm = norm,
upper = upper,
lower = lower,
cap = cap,
is_cap = is_cap,
is_vowel = is_vowel,
flex_filter = flex_filter,
gram_compat = gram_compat,
gram_score = gram_score,
gram_t = {
noun = 'С',
nom = 'им',
live = 'од',
nonlive = 'но',
neuter = 'ср',
male = 'мр',
female = 'жр',
plural = 'мн',
proper = 'имя',
surname = 'фам',
first = '',
second = '',
third = '',
};
dict = {
["ведро/ср,но,С"] = {
"ведро/им", "вёдра/им,мн",
"ведро/вн", "вёдра/вн,мн",
"ведра/рд", "вёдер/рд,мн",
"ведру/дт", "ведрам/дт,мн",
"ведром/тв", "вёдрами/тв,мн",
"ведре/пр", "вёдрах/пр,мн",
},
["деревья/ср,но,мн,С"] = {
"деревья/им";
"деревья/вн";
"деревьев/рд";
"деревьями/тв";
"деревьях/пр";
"деревьям/дт";
};
["дерево/ср,но,С"] = {
"дерево/им", "деревья/им,мн";
"дерево/вн", "деревья/вн,мн";
"дерева/рд", "деревьев/рд,мн";
"деревом/тв", "деревьями/тв,мн";
"дереве/пр", "деревьях/пр,мн";
"дереву/пр", "деревьям/дт,мн";
};
["огонь/мр,но,С"] = {
"огонь/им", "огни/им,мн",
"огонь/вн", "огни/вн,мн",
"огня/рд", "огней/рд,мн",
"огню/дт", "огням/дт,мн",
"огнём/тв", "огнями/тв,мн",
"огне/пр", "огнях/пр,мн",
};
["цветы/мн,мр,но,С"] = { -- цветки
"цветы/им",
"уветы/вн",
"цветов/рд",
"цветам/дт",
"цветами/тв",
"цветах/пр",
};
["хлам/пр,2"] = "хламе";
["клевер/пр"] = "клевере";
["песок/пр,2"] = "песке";
};
}
return lang