Module:Lang-hbs
Jump to navigation
Jump to search
This module is rated as alpha. It is ready for third-party input, and may be used on a few pages to see if problems arise, but should be watched. Suggestions for new features or changes in their input and output mechanisms are welcome. |
This module produces equivalent Serbo-Croatian Latin text from Cyrillic input.
Usage
This module exports two functions providing different outputs. Both functions require |text=
argument for proper operation.
cyr2lat and lat2cyr
These function should be invoked the following way:
{{#invoke:lang-hbs|cyr2lat|text to be converted}} {{#invoke:lang-hbs|lat2cyr|text to be converted}}
It replaces all letters of Serbo-Croatian Cyrillic alphabet with corresponding Latin letters.
Note: per MOS:FOREIGN non-English words in Latin script should be italicized. cyr2lat
fuction leaves it to users.
convert
This function should be invoked the following way:
{{#invoke:lang-hbs|concat|cyrillic=Cyrillic text|latin=Latin text}}
It replaces Serbo-Croatian Cyrillic with Latin and returns both Cyrillic and Latin text with latter being italicized. Either of |cyrillic=
and |latin=
is sufficient.
Caveats
- Serbo-Croatian Latin alphabet includes letters "lj", "nj" and "dž", which, in addition to upper case ("LJ", "NJ" and "DŽ" respectively), also have "title case" ("Lj", "Nj" and "Dž"), which is used when only individual letters of the word are capitalized (eg. name "Љиљана" is spelled as "Ljiljana", not "LJiljana"). For performance and algorythmic complexity reasons this template always uses title case instead of upper case.
- Latin to Cyrillic transliteration works only with Unicode digraphs. Use Cyrillic to Latin transliteration if in doubt.
local p = {}
-- Cyrillic to Latin substitution table
local c2l = {
["а"] = "a", ["А"] = "A",
["б"] = "b", ["Б"] = "B",
["в"] = "v", ["В"] = "V",
["г"] = "g", ["Г"] = "G",
["д"] = "d", ["Д"] = "D",
["ђ"] = "đ", ["Ђ"] = "Đ",
["е"] = "e", ["E"] = "E",
["ж"] = "ž", ["Ж"] = "Ž",
["з"] = "z", ["З"] = "Z",
["и"] = "i", ["И"] = "I",
["ј"] = "j", ["Ј"] = "J",
["к"] = "k", ["К"] = "K",
["л"] = "l", ["Л"] = "L",
["љ"] = "lj", ["Љ"] = "Lj",
["м"] = "m", ["М"] = "M",
["н"] = "n", ["Н"] = "N",
["њ"] = "nj", ["Њ"] = "Nj",
["о"] = "o", ["О"] = "O",
["п"] = "p", ["П"] = "P",
["р"] = "r", ["Р"] = "R",
["с"] = "s", ["С"] = "S",
["т"] = "t", ["Т"] = "T",
["ћ"] = "ć", ["Ћ"] = "Ć",
["у"] = "u", ["У"] = "U",
["ф"] = "f", ["Ф"] = "F",
["х"] = "h", ["Х"] = "H",
["ц"] = "c", ["Ц"] = "C",
["ч"] = "č", ["Ч"] = "Č",
["џ"] = "dž", ["Џ"] = "Dž",
["ш"] = "š", ["Ш"] = "Š"
}
-- Latin to Cyrillic substitution table
local l2c = {
["a"] = "а", ["A"] = "А",
["b"] = "б", ["B"] = "Б",
["v"] = "в", ["V"] = "В",
["g"] = "г", ["G"] = "Г",
["d"] = "д", ["D"] = "Д",
["đ"] = "ђ", ["Đ"] = "Ђ",
["e"] = "е", ["E"] = "E",
["ž"] = "ж", ["Ž"] = "Ж",
["z"] = "з", ["Z"] = "З",
["i"] = "и", ["I"] = "И",
["j"] = "ј", ["J"] = "Ј",
["k"] = "к", ["K"] = "К",
["l"] = "л", ["L"] = "Л",
["lj"] = "љ", ["Lj"] = "Љ", ["LJ"] = "Љ",
["m"] = "м", ["M"] = "М",
["n"] = "н", ["N"] = "Н",
["nj"] = "њ", ["Nj"] = "Њ", ["NJ"] = "Њ",
["o"] = "о", ["O"] = "О",
["p"] = "п", ["P"] = "П",
["r"] = "р", ["R"] = "Р",
["s"] = "с", ["S"] = "С",
["t"] = "т", ["T"] = "Т",
["ć"] = "ћ", ["Ć"] = "Ћ",
["u"] = "у", ["U"] = "У",
["f"] = "ф", ["F"] = "Ф",
["h"] = "х", ["H"] = "Х",
["c"] = "ц", ["C"] = "Ц",
["č"] = "ч", ["Č"] = "Ч",
["dž"] = "џ", ["Dž"] = "Џ", ["DŽ"] = "Џ",
["š"] = "ш", ["Š"] = "Ш"
}
function _cyr2lat(str)
local lat = mw.ustring.gsub(str, "%a", c2l)
return lat
end
function _lat2cyr(str)
local cyr = mw.ustring.gsub(str, "%a", l2c)
return cyr
end
function p.cyr2lat(frame)
return _cyr2lat(frame.args[1])
end
function p.lat2cyr(frame)
return _lat2cyr(frame.args[1])
end
function p.convert(frame)
local lat = frame.args.latin
local cyr = frame.args.cyrillic
if not cyr then
if lat then
cyr = _lat2cyr(lat)
else
error("Neither Latin nor Cyrillic text is included", 0)
end
elseif not lat then
lat = _cyr2lat(cyr)
end
return mw.ustring.format("\'\'%s\'\', %s", lat, cyr)
end
return p