Módulo:lenguas/idiomas/2
La documentación para este módulo puede ser creada en Módulo:lenguas/idiomas/2/doc
local u = mw.ustring.char
local m_langdata = require("Módulo:lenguas/idiomas/puntuacion")
local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared
-- Ideally, we want to move these into [[Module:languages/data]], but because (a) it's necessary to use require on that module, and (b) they're only used in this data module, it's less memory-efficient to do that at the moment. If it becomes possible to use mw.loadData, then these should be moved there.
s["sortkey/no"] = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla,
remove_exceptions = {"å"},
from = {"æ", "ø", "å"},
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
}
s["standardchars/no"] = {
Latn = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå",
c.punc
}
s["nombentrada/ru-Cyrl"] = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_exceptions = {"Ё", "ё", "Ѣ̈", "ѣ̈", "Я̈", "я̈"}
}
s["sortkey/ru-Cyrl"] = {
from = {
"ё", "ѣ̈", "я̈", -- 2 chars
"і", "ѣ", "ѳ", "ѵ" -- 1 char
},
to = {
"е" .. p[1], "ь" .. p[2], "я" .. p[1],
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
}
}
local m = {}
m["aa"] = {
"afar",
27811,
"cus-eas",
"Latn, Ethi",
entry_name = {Latn = {remove_diacritics = c.acute}},
otherNames = {"qafar"},
}
m["ab"] = {
"abjasio",
5111,
"cau-abz",
"Cyrl, Geor, Latn",
translit = {
Cyrl = "translit/ab",
Geor = "translit/Geor",
},
override_translit = true,
display_text = {Cyrl = s["mostrartexto/cau-Cyrl"]},
entry_name = {
Cyrl = s["nombentrada/cau-Cyrl"],
Latn = s["nombentrada/cau-Latn"],
},
sort_key = {
Cyrl = {
from = {
"х'ә", -- 3 chars
"гь", "гә", "ӷь", "ҕь", "ӷә", "ҕә", "дә", "ё", "жь", "жә", "ҙә", "ӡә", "ӡ'", "кь", "кә", "қь", "қә", "ҟь", "ҟә", "ҫә", "тә", "ҭә", "ф'", "хь", "хә", "х'", "ҳә", "ць", "цә", "ц'", "ҵә", "ҵ'", "шь", "шә", "џь", -- 2 chars
"ӷ", "ҕ", "ҙ", "ӡ", "қ", "ҟ", "ԥ", "ҧ", "ҫ", "ҭ", "ҳ", "ҵ", "ҷ", "ҽ", "ҿ", "ҩ", "џ", "ә" -- 1 char
},
to = {
"х" .. p[4],
"г" .. p[1], "г" .. p[2], "г" .. p[5], "г" .. p[6], "г" .. p[7], "г" .. p[8], "д" .. p[1], "е" .. p[1], "ж" .. p[1], "ж" .. p[2], "з" .. p[2], "з" .. p[4], "з" .. p[5], "к" .. p[1], "к" .. p[2], "к" .. p[4], "к" .. p[5], "к" .. p[7], "к" .. p[8], "с" .. p[2], "т" .. p[1], "т" .. p[3], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "х" .. p[6], "ц" .. p[1], "ц" .. p[2], "ц" .. p[3], "ц" .. p[5], "ц" .. p[6], "ш" .. p[1], "ш" .. p[2], "ы" .. p[3],
"г" .. p[3], "г" .. p[4], "з" .. p[1], "з" .. p[3], "к" .. p[3], "к" .. p[6], "п" .. p[1], "п" .. p[2], "с" .. p[1], "т" .. p[2], "х" .. p[5], "ц" .. p[4], "ч" .. p[1], "ч" .. p[2], "ч" .. p[3], "ы" .. p[1], "ы" .. p[2], "ь" .. p[1]
}
},
},
otherNames = {"abjaziano?", "abjazo?", "abjasiano?", "abjaso?"},
}
m["ae"] = {
"avéstico",
29572,
"ira-cen",
"Avst, Gujr",
translit = {Avst = "translit/Avst"},
otherNames = {"zend", "bactriano antiguo"},
}
m["af"] = {
"afrikáans",
14196,
"gmw-frk",
"Latn, Arab",
ancestors = "nl",
sort_key = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'",
from = {"['ʼ]n"},
to = {"n" .. p[1]}
}
},
}
m["ak"] = {
"akan",
28026,
"alv-ctn",
"Latn",
otherNames = {"akánico", "twi", "fante", "fanti", "asante", "akuapem"},
}
m["am"] = {
"amárico",
28244,
"sem-eth",
"Ethi",
translit = "translit/Ethi",
}
m["an"] = {
"aragonés",
8765,
"roa-ibe",
"Latn",
ancestors = "roa-oan",
}
m["ar"] = {
"árabe",
13955,
"sem-arb",
"Arab, Hebr, Syrc, Brai",
translit = {Arab = "translit/ar"},
entry_name = {Arab = "nombentrada/ar"},
-- put Judeo-Arabic (Hebrew-script Arabic) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
Hebr = {
from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},
to = {u(0xFB21)},
},
},
otherNames = {"árabe estándar", "árabe clásico"},
}
m["as"] = {
"asamés",
29401,
"inc-eas",
"as-Beng",
ancestors = "inc-mas",
translit = "translit/as",
otherNames = {"asamiya"},
}
m["av"] = {
"avar",
29561,
"cau-ava",
"Cyrl, Latn, Arab",
ancestors = "oav",
translit = {
Cyrl = "translit/cau-nec",
Arab = "translit/ar",
},
override_translit = true,
display_text = {Cyrl = s["mostrartexto/cau-Cyrl"]},
entry_name = {
Cyrl = s["nombentrada/cau-Cyrl"],
Latn = s["nombentrada/cau-Latn"],
},
sort_key = {
Cyrl = {
from = {"гъ", "гь", "гӏ", "ё", "кк", "къ", "кь", "кӏ", "лъ", "лӏ", "тӏ", "хх", "хъ", "хь", "хӏ", "цӏ", "чӏ"},
to = {"г" .. p[1], "г" .. p[2], "г" .. p[3], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "к" .. p[4], "л" .. p[1], "л" .. p[2], "т" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "х" .. p[4], "ц" .. p[1], "ч" .. p[1]}
},
},
otherNames = {"avárico"},
}
m["ay"] = {
"aimara",
4627,
"sai-aym",
"Latn",
otherNames = {"aimara meridional", "aimara central", "aimara septentrional"},
}
m["az"] = {
"azerí",
9292,
"trk-ogz",
"Latn, Cyrl, fa-Arab",
ancestors = "trk-oat",
dotted_dotless_i = true,
entry_name = {
Latn = {
from = {"ʼ"},
to = {"'"},
},
["fa-Arab"] = "nombentrada/ar",
},
display_text = {
Latn = {
from = {"'"},
to = {"ʼ"}
}
},
sort_key = {
Latn = {
from = {
"i", -- Ensure "i" comes after "ı".
"ç", "ə", "ğ", "x", "ı", "q", "ö", "ş", "ü", "w"
},
to = {
"i" .. p[1],
"c" .. p[1], "e" .. p[1], "g" .. p[1], "h" .. p[1], "i", "k" .. p[1], "o" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]
}
},
Cyrl = {
from = {"ғ", "ә", "ы", "ј", "ҝ", "ө", "ү", "һ", "ҹ"},
to = {"г" .. p[1], "е" .. p[1], "и" .. p[1], "и" .. p[2], "к" .. p[1], "о" .. p[1], "у" .. p[1], "х" .. p[1], "ч" .. p[1]}
},
},
otherNames = {"azarí", "afshar", "afshari", "afchar", "qashqa'i", "qashqai", "sonqor", "azerí turco", "azerí azerbayano", "azerbaiyano"},
}
m["ba"] = {
"baskir",
13389,
"trk-kbu",
"Cyrl",
translit = "translit/ba",
override_translit = true,
sort_key = {
from = {"ғ", "ҙ", "ё", "ҡ", "ң", "ө", "ҫ", "ү", "һ", "ә"},
to = {"г" .. p[1], "д" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "с" .. p[1], "у" .. p[1], "х" .. p[1], "э" .. p[1]}
},
}
m["be"] = {
"bielorruso",
9091,
"zle",
"Cyrl, Latn",
ancestors = "orv",
translit = {Cyrl = "translit/be"},
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_exceptions = {"Ć", "ć", "Ń", "ń", "Ś", "ś", "Ź", "ź"},
},
sort_key = {
Cyrl = {
from = {"ґ", "ё", "і", "ў"},
to = {"г" .. p[1], "е" .. p[1], "и" .. p[1], "у" .. p[1]}
},
Latn = {
from = {"ć", "č", "dz", "dź", "dž", "ch", "ł", "ń", "ś", "š", "ŭ", "ź", "ž"},
to = {"c" .. p[1], "c" .. p[2], "d" .. p[1], "d" .. p[2], "d" .. p[3], "h" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
c.punc
},
otherNames = {"belarús"},
}
m["bg"] = {
"búlgaro",
7918,
"zls",
"Cyrl",
ancestors = "cu",
translit = "translit/bg",
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
},
standardChars = {
Cyrl = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя",
c.punc,
},
}
m["bh"] = {
"bihari",
135305,
"inc-eas",
"Deva",
ancestors = "pra",
}
m["bi"] = {
"bislama",
35452,
"crp",
"Latn",
ancestors = "en",
}
m["bm"] = {
"bambara",
33243,
"dmn-emn",
"Latn",
sort_key = {
from = {"ɛ", "ɲ", "ŋ", "ɔ"},
to = {"e" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1]}
},
otherNames = {"bamanakano"},
}
m["bn"] = {
"bengalí",
9610,
"inc-eas",
"Beng, Newa",
ancestors = "inc-mbn",
translit = {Beng = "translit/bn"},
otherNames = {"bangla"},
}
m["bo"] = {
"tibetano",
34271,
"sit-tib",
"Tibt",
ancestors = "xct",
translit = "translit/Tibt",
override_translit = true,
display_text = s["mostrartexto/Tibt"],
entry_name = s["nombentrada/Tibt"],
sort_key = "sortkey/Tibt",
otherNames = {"amdo", "lhasa", "dolpo", "gola", "humla", "khamba", "kham", "nubri", "lhomi",
"limi", "loke", "lowa", "mugom", "nubri", "panang", "shing saapa", "tichurong", "thudam", "tseku",
"ü", "dbus", "walungge", "gyalsumdo?", "bajo manang?", "kyirong?"},
}
m["br"] = {
"bretón",
12107,
"cel-brs",
"Latn",
ancestors = "xbm",
sort_key = {
from = {"ch", "c['ʼ’]h"},
to = {"c" .. p[1], "c" .. p[2]}
},
otherNames = {"gwenedeg", "vannetais", "kerneveg", "cornouaillais", "leoneg", "léonard", "tregerieg", "trégorrois"},
}
m["ca"] = {
"catalán",
7026,
"roa-ocr",
"Latn",
ancestors = "roa-oca",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla,
from = {"l·l"},
to = {"ll"}
},
standardChars = {
Latn = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·",
c.punc,
},
-- don't list varieties here that are in [[Module:etymology languages/data]]
otherNames = {"catalán valenciano"},
}
m["ce"] = {
"checheno",
33350,
"cau-vay",
"Cyrl, Latn, Arab",
translit = {
Cyrl = "translit/cau-nec",
Arab = "translit/ar",
},
override_translit = true,
display_text = {Cyrl = s["mostrartexto/cau-Cyrl"]},
entry_name = {
Cyrl = s["nombentrada/cau-Cyrl"],
Latn = s["nombentrada/cau-Latn"],
},
sort_key = {
Cyrl = {
from = {"аь", "гӏ", "ё", "кх", "къ", "кӏ", "оь", "пӏ", "тӏ", "уь", "хь", "хӏ", "цӏ", "чӏ", "юь", "яь"},
to = {"а" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "о" .. p[1], "п" .. p[1], "т" .. p[1], "у" .. p[1], "х" .. p[1], "х" .. p[2], "ц" .. p[1], "ч" .. p[1], "ю" .. p[1], "я" .. p[1]}
},
},
}
m["ch"] = {
"chamorro",
33262,
"poz-sus",
"Latn",
sort_key = {
remove_diacritics = "'",
from = {"å", "ch", "ñ", "ng"},
to = {"a" .. p[1], "c" .. p[1], "n" .. p[1], "n" .. p[2]}
},
otherNames = {"chamorru"},
}
m["co"] = {
"corso",
33111,
"roa-itd",
"Latn",
sort_key = {
from = {"chj", "ghj", "sc", "sg"},
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
},
standardChars = {
Latn = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz",
c.punc,
},
otherNames = {"corsu"},
}
m["cr"] = {
"cree",
33390,
"alg",
"Cans, Latn",
translit = {Cans = "translit/cr"},
}
m["cs"] = {
"checo",
9056,
"zlw",
"Latn",
ancestors = "zlw-ocs",
sort_key = {
from = {"á", "č", "ď", "é", "ě", "ch", "í", "ň", "ó", "ř", "š", "ť", "ú", "ů", "ý", "ž"},
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
},
standardChars = {
Latn = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž",
c.punc,
},
}
m["cu"] = {
"eslavo eclesiástico antiguo",
35499,
"zls",
"Cyrs, Glag",
translit = {Cyrs = "translit/Cyrs", Glag = "translit/Glag"},
entry_name = {Cyrs = s["nombentrada/Cyrs"]},
sort_key = {Cyrs = s["sortkey/Cyrs"]},
}
m["cv"] = {
"chuvasio",
33348,
"trk-ogr",
"Cyrl",
ancestors = "xbo",
translit = "translit/cv",
override_translit = true,
sort_key = {
from = {"ӑ", "ё", "ӗ", "ҫ", "ӳ"},
to = {"а" .. p[1], "е" .. p[1], "е" .. p[2], "с" .. p[1], "у" .. p[1]}
},
}
m["cy"] = {
"galés",
9309,
"cel-brw",
"Latn",
ancestors = "wlm",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. "'",
from = {"ch", "dd", "ff", "ng", "ll", "ph", "rh", "th"},
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
},
standardChars = {
Latn = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ",
c.punc,
},
otherNames = {"cofi", "dyfedio", "gwentiano", "gwynediano", "venedotiano", "powysiano", "galés patagónico"},
}
m["da"] = {
"danés",
9035,
"gmq-eas",
"Latn",
ancestors = "gmq-oda",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla,
remove_exceptions = {"å"},
from = {"æ", "ø", "å"},
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
standardChars = {
Latn = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå",
c.punc,
},
}
m["de"] = {
"alemán",
188,
"gmw-hgm",
"Latn, Latf",
ancestors = "gmh",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove,
from = {"æ", "œ", "ß"},
to = {"ae", "oe", "ss"}
},
standardChars = {
Latn = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz",
c.punc,
},
otherNames = {"alemán de Alemania", "alemán alsatiano", "alemán estadounidense",
"alemán bávaro", "alemán belga", "alemán africano", "alemán hesiano", "alemán francés",
"alemán prusiano", "alemán silesiano", "alemán luxemburgués", "alemán suizo", "alemán DDR"},
}
m["dv"] = {
"dhivehi",
32656,
"inc-ins",
"Thaa, Diak",
ancestors = "pra",
translit = {
Thaa = "translit/dv",
Diak = "translit/Diak",
},
override_translit = true,
otherNames = {"divehi", "maldivo", "majal", "mahal", "mahl"},
}
m["dz"] = {
"dzongkha",
33081,
"sit-tib",
"Tibt",
ancestors = "xct",
translit = "translit/Tibt",
override_translit = true,
display_text = s["mostrartexto/Tibt"],
entry_name = s["nombentrada/Tibt"],
sort_key = "sortkey/Tibt",
}
m["ee"] = {
"ewe",
30005,
"alv-gbe",
"Latn",
sort_key = {
remove_diacritics = c.tilde,
from = {"ɖ", "dz", "ɛ", "ƒ", "gb", "ɣ", "kp", "ny", "ŋ", "ɔ", "ts", "ʋ"},
to = {"d" .. p[1], "d" .. p[2], "e" .. p[1], "f" .. p[1], "g" .. p[1], "g" .. p[2], "k" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1], "t" .. p[1], "v" .. p[1]}
},
}
m["el"] = {
"griego",
9129,
"grk",
"Grek, Polyt, Brai",
translit = {
Grek = "translit/el",
Polyt = "translit/grc",
},
override_translit = true,
entry_name = {
Grek = {remove_diacritics = c.caron .. c.diaerbelow .. c.brevebelow},
Polyt = {
remove_diacritics = c.macron .. c.breve .. c.dbrevebelow,
from = {"[" .. c.RSQuo .. c.psili .. c.coronis .. "]"},
to = {"'"}
},
},
sort_key = {
Grek = s["sortkey/Grek"],
Polyt = s["sortkey/Grek"],
},
standardChars = {
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Brai = c.braille,
c.punc
},
otherNames = {"griego moderno", "griego actual", "neohelénico"},
}
m["en"] = {
"inglés",
1860,
"gmw-ang",
"Latn, Brai, Shaw, Dsrt",
wikimedia_codes = "en, simple",
ancestors = "enm",
sort_key = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.diaer .. c.ringabove .. c.caron .. c.cedilla .. "'%-%s",
from = {"æ", "œ"},
to = {"ae", "oe"}
},
},
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
c.punc
},
otherNames = {"inglés británico", "inglés estadounidense", "inglés canadiense", "inglés australiano", "inglés neozelandés", "inglés de la Mancomunidad de Naciones", "yinglish"},
}
m["eo"] = {
"esperanto",
143,
"art",
"Latn",
sort_key = {
remove_diacritics = c.grave .. c.acute,
from = {"ĉ", "ĝ", "ĥ", "ĵ", "ŝ", "ŭ"},
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
},
standardChars = {
Latn = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz",
c.punc,
},
}
m["es"] = {
"español",
1321,
"roa-ibe",
"Latn, Brai",
ancestors = "osp",
sort_key = {
Latn = {
remove_diacritics = c.acute .. c.diaer .. c.cedilla,
from = {"ñ"},
to = {"n" .. p[1]}
},
},
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Brai = c.braille,
c.punc,
},
otherNames = {"castellano", "español amazónico", "español de la selva", "español de la jungla", "español andino", "español rioplatense", "español chileno", "español caribeño", "español centroamericano", "español mexicano", "español de España", "español andaluz", "español canario", "español marroquí", "español ecuatoguineano", "español filipino?", "lunfardo", "parlache"},
}
m["et"] = {
"estonio",
9072,
"urj-fin",
"Latn",
sort_key = {
from = {
"š", "ž", "õ", "ä", "ö", "ü", -- 2 chars
"z" -- 1 char
},
to = {
"s" .. p[1], "s" .. p[3], "w" .. p[1], "w" .. p[2], "w" .. p[3], "w" .. p[4],
"s" .. p[2]
}
},
standardChars = {
Latn = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü",
c.punc,
},
}
m["eu"] = {
"vasco",
8752,
"euq",
"Latn",
sort_key = {
from = {"ç", "ñ"},
to = {"c" .. p[1], "n" .. p[1]}
},
standardChars = {
Latn = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz",
c.punc,
},
otherNames = {"euskera"},
}
m["fa"] = {
"persa",
9168,
"ira-swi",
"fa-Arab",
ancestors = "pal",
entry_name = {
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
to = {"ه", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.kashida .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
},
otherNames = {"farsi", "persa moderno", "persa iraní", "dari", "aimaq", "aimak", "eimak", "persa occidental"},
}
m["fc"] = {
"franco-comtés",
510561,
"roa-oil",
"Latn",
sort_key = s["sortkey/roa-oil"],
otherNames = {"comtés", "jurassien", "ajoulot", "vâdais", "taignon", "bisontin", "bousbot"},
}
m["ff"] = {
"fula",
33454,
"alv-fwo",
"Latn, Adlm",
otherNames = {"fulani", "fulfulde", "pular", "pulaar", "fulfulde de Adamawa", "fulfulde de Bagirmi", "fulfulde de Borgu", "fulfulde de Maasina", "fulfulde de Nigeria", "fulfulde del Níger"},
}
m["fi"] = {
"finés",
1412,
"urj-fin",
"Latn",
display_text = {
from = {"'"},
to = {"’"}
},
entry_name = { -- used to indicate gemination of the next consonant
remove_diacritics = "ˣ",
from = {"’"},
to = {"'"},
},
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla .. "':",
remove_exceptions = {"å"},
from = {"ø", "æ", "œ", "ß", "å", "(.)%-"},
to = {"o", "ae", "oe", "ss", "z" .. p[1], "%1"}
},
standardChars = {
Latn = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö",
c.punc,
},
otherNames = {"suomi"},
}
m["fj"] = {
"fiyiano",
33295,
"poz-occ",
"Latn",
}
m["fo"] = {
"feroés",
25258,
"gmq-ins",
"Latn",
sort_key = {
from = {"á", "ð", "í", "ó", "ú", "ý", "æ", "ø"},
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
},
standardChars = {
Latn = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø",
c.punc,
},
otherNames = {"faroés", "faeroés"},
}
m["fr"] = {
"francés",
150,
"roa-oil",
"Latn, Brai",
display_text = {
from = {"'"},
to = {"’"}
},
entry_name = {
from = {"’"},
to = {"'"},
},
ancestors = "frm",
sort_key = {Latn = s["sortkey/roa-oil"]},
standardChars = {
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Brai = c.braille,
c.punc
},
otherNames = {"francés actual", "francés africano", "francés belga", "francés canadiense", "francés europeo", "francés estadounidense", "cajún"},
}
m["fy"] = {
"frisón",
27175,
"gmw-fri",
"Latn",
ancestors = "ofs",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer,
from = {"y"},
to = {"i"}
},
standardChars = {
Latn = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz",
c.punc,
},
otherNames = {"frisón occidental"},
}
m["ga"] = {
"irlandés",
9142,
"cel-gae",
"Latn, Latg",
ancestors = "mga",
sort_key = {
remove_diacritics = c.acute,
from = {"ḃ", "ċ", "ḋ", "ḟ", "ġ", "ṁ", "ṗ", "ṡ", "ṫ"},
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
},
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv",
c.punc,
},
otherNames = {"irlandés gaélico", "gaélico"},
}
m["gd"] = {
"gaélico escocés",
9314,
"cel-gae",
"Latn, Latg",
ancestors = "mga",
sort_key = {remove_diacritics = c.grave .. c.acute},
standardChars = {
Latn = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù",
c.punc,
},
otherNames = {"gaélico", "gàidhlig"},
}
m["gl"] = {
"gallego",
9307,
"roa-ibe",
"Latn",
ancestors = "roa-opt",
sort_key = {
remove_diacritics = c.acute,
from = {"ñ"},
to = {"n" .. p[1]}
},
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz",
c.punc,
},
}
m["gn"] = {
"guaraní",
35876,
"tup-gua",
"Latn",
}
m["gu"] = {
"guyaratí",
5137,
"inc-wes",
"Arab, Gujr",
ancestors = "inc-mgu",
translit = {
Gujr = "translit/gu",
},
entry_name = {
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun .. "઼"
},
}
m["gv"] = {
"manés",
12175,
"cel-gae",
"Latn",
ancestors = "mga",
sort_key = {remove_diacritics = c.cedilla .. "-"},
standardChars = {
Latn = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy",
c.punc,
},
otherNames = {"gaélico manés", "manés septentrional", "manés meridional"},
}
m["ha"] = {
"hausa",
56475,
"cdc-wst",
"Latn, Arab",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}},
sort_key = {
Latn = {
from = {"ɓ", "b'", "ɗ", "d'", "ƙ", "k'", "sh", "ƴ", "'y"},
to = {"b" .. p[1], "b" .. p[2], "d" .. p[1], "d" .. p[2], "k" .. p[1], "k" .. p[2], "s" .. p[1], "y" .. p[1], "y" .. p[2]}
},
},
}
m["he"] = {
"hebreo",
9288,
"sem-can",
"Hebr, Phnx, Brai",
ancestors = "hbo",
entry_name = {Hebr = {remove_diacritics = u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. c.CGJ}},
otherNames = {"ivrit"},
}
m["hi"] = {
"hindi",
1568,
"inc-hnd",
"Deva, Kthi, Newa",
ancestors = "inc-ohi",
translit = {Deva = "translit/hi"},
standardChars = {
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
c.punc
},
otherNames = {"hindavi"},
}
m["ho"] = {
"hiri motu",
33617,
"crp",
"Latn",
ancestors = "meu",
otherNames = {"jirimotu", "pidgin motu", "motu policía"},
}
m["ht"] = {
"criollo haitiano",
33491,
"crp",
"Latn",
ancestors = "fr",
sort_key = {
from = {
"oun", -- 3 chars
"an", "ch", "è", "en", "ng", "ò", "on", "ou", "ui" -- 2 chars
},
to = {
"o" .. p[4],
"a" .. p[1], "c" .. p[1], "e" .. p[1], "e" .. p[2], "n" .. p[1], "o" .. p[1], "o" .. p[2], "o" .. p[3], "u" .. p[1]
}
},
}
m["hu"] = {
"húngaro",
9067,
"urj-ugr",
"Latn, Hung",
ancestors = "ohu",
sort_key = {
Latn = {
from = {
"dzs", -- 3 chars
"á", "cs", "dz", "é", "gy", "í", "ly", "ny", "ó", "ö", "ő", "sz", "ty", "ú", "ü", "ű", "zs", -- 2 chars
},
to = {
"d" .. p[2],
"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "o" .. p[2], "o" .. p[3], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "u" .. p[3], "z" .. p[1],
}
},
},
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
c.punc
},
otherNames = {"magyar"},
}
m["hy"] = {
"armenio",
8785,
"hyx",
"Armn, Brai",
ancestors = "axm",
translit = {Armn = "translit/Armn"},
override_translit = true,
entry_name = {
Armn = {
remove_diacritics = "՛՜՞՟",
from = {"եւ", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"},
to = {"և", "յ", "ի", "է"}
},
},
sort_key = {
Armn = {
from = {
"ու", "եւ", -- 2 chars
"և" -- 1 char
},
to = {
"ւ", "եվ",
"եվ"
}
},
},
otherNames = {"armenio actual", "armenio oriental", "armenio occidental"},
}
m["hz"] = {
"herero",
33315,
"bnt-swb",
"Latn",
}
m["ia"] = {
"interlingua",
35934,
"art",
"Latn",
}
m["id"] = {
"indonesio",
9240,
"poz-mly",
"Latn",
ancestors = "ms",
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
c.punc,
},
}
m["ie"] = {
"interlingue (occidental)",
35850,
"art",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ},
otherNames = {"interlingue", "occidental"},
}
m["ig"] = {
"igbo",
33578,
"alv-igb",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron},
sort_key = {
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
to = {"g" .. p[1], "g" .. p[2], "g" .. p[3], "i" .. p[1], "k" .. p[1], "k" .. p[2], "n" .. p[1], "n" .. p[2], "n" .. p[3], "o" .. p[1], "s" .. p[1], "u" .. p[1]}
},
}
m["ii"] = {
"lolo de Sichuán",
34235,
"tbq-lol",
"Yiii",
translit = "translit/ii",
otherNames = {"nuosu", "nosu", "yi septentrional", "yi de Liangshan", "yi de Sichuán"},
}
m["ik"] = {
"iñupiaq",
27183,
"esx-inu",
"Latn",
sort_key = {
from = {
"ch", "ġ", "dj", "ḷ", "ł̣", "ñ", "ng", "r̂", "sr", "zr", -- 2 chars
"ł", "ŋ", "ʼ" -- 1 char
},
to = {
"c" .. p[1], "g" .. p[1], "h" .. p[1], "l" .. p[1], "l" .. p[3], "n" .. p[1], "n" .. p[2], "r" .. p[1], "s" .. p[1], "z" .. p[1],
"l" .. p[2], "n" .. p[2], "z" .. p[2]
}
},
otherNames = {"iñupiak", "inupiatun"},
}
m["io"] = {
"ido",
35224,
"art",
"Latn",
}
m["is"] = {
"islandés",
294,
"gmq-ins",
"Latn",
sort_key = {
from = {"á", "ð", "é", "í", "ó", "ú", "ý", "þ", "æ", "ö"},
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
standardChars = {
Latn = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö",
c.punc,
},
}
m["it"] = {
"italiano",
652,
"roa-itd",
"Latn",
ancestors = "roa-oit",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
standardChars = {
Latn = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz",
c.punc,
},
}
m["iu"] = {
"inuktitut",
29921,
"esx-inu",
"Cans, Latn",
translit = {Cans = "translit/cr"},
override_translit = true,
otherNames = {"aivilimmiut", "inuinnaq", "inuinnaqtun", "inuvialuktun", "inuvialuk", "kivallirmiut",
"natsilingmiut", "nunavimmiutit", "nunatsiavummiut", "siglitun", "siglit", "inuktitut del Este/Oeste canadiense"},
}
m["ja"] = {
"japonés",
5287,
"jpx",
"Jpan, Latn, Brai",
ancestors = "ojp",
translit = s["translit/Jpan"],
link_tr = true,
sort_key = s["sortkey/Jpan"],
otherNames = {"japonés actual", "niponés", "nihongo"},
}
m["jv"] = {
"javanés",
33549,
"poz-sus",
"Latn, Java",
ancestors = "kaw",
translit = {Java = "translit/jv"},
link_tr = true,
entry_name = {remove_diacritics = c.circ}, -- Modern jv don't use ê
sort_key = {
Latn = {
from = {"å", "dh", "é", "è", "ng", "ny", "th"},
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "n" .. p[1], "n" .. p[2], "t" .. p[1]}
},
},
}
m["ka"] = {
"georgiano",
8108,
"ccs-gzn",
"Geor, Geok, Hebr",
ancestors = "oge",
translit = {
Geor = "translit/Geor",
Geok = "translit/Geok",
},
override_translit = true,
entry_name = {remove_diacritics = c.circ},
otherNames = {"judeogeorgiano", "kivruli", "gruzínico"},
}
m["kg"] = {
"kikongo",
33702,
"bnt-kng",
"Latn",
otherNames = {"kongo", "Koongo", "laari", "kongo salvadoreño", "yombe"},
}
m["ki"] = {
"kikuyu",
33587,
"bnt-kka",
"Latn",
otherNames = {"gikuyu", "gĩkũyũ"},
}
m["kj"] = {
"kuanyama",
1405077,
"bnt-ova",
"Latn",
otherNames = {"kwanyama", "oshikwanyama", "oshikuanyama"},
}
m["kk"] = {
"kazajo",
9252,
"trk-kno",
"Cyrl, Latn, kk-Arab",
translit = {
Cyrl = {
from = {
"Ё", "ё", "Й", "й", "Нг", "нг", "Ӯ", "ӯ", -- 2 chars; are "Ӯ" and "ӯ" actually used?
"А", "а", "Ә", "ә", "Б", "б", "В", "в", "Г", "г", "Ғ", "ғ", "Д", "д", "Е", "е", "Ж", "ж", "З", "з", "И", "и", "К", "к", "Қ", "қ", "Л", "л", "М", "м", "Н", "н", "Ң", "ң", "О", "о", "Ө", "ө", "П", "п", "Р", "р", "С", "с", "Т", "т", "У", "у", "Ұ", "ұ", "Ү", "ү", "Ф", "ф", "Х", "х", "Һ", "һ", "Ц", "ц", "Ч", "ч", "Ш", "ш", "Щ", "щ", "Ъ", "ъ", "Ы", "ы", "І", "і", "Ь", "ь", "Э", "э", "Ю", "ю", "Я", "я", -- 1 char
},
to = {
"E", "e", "İ", "i", "Ñ", "ñ", "U", "u",
"A", "a", "Ä", "ä", "B", "b", "V", "v", "G", "g", "Ğ", "ğ", "D", "d", "E", "e", "J", "j", "Z", "z", "İ", "i", "K", "k", "Q", "q", "L", "l", "M", "m", "N", "n", "Ñ", "ñ", "O", "o", "Ö", "ö", "P", "p", "R", "r", "S", "s", "T", "t", "U", "u", "Ū", "ū", "Ü", "ü", "F", "f", "X", "x", "H", "h", "S", "s", "Ç", "ç", "Ş", "ş", "Ş", "ş", "", "", "Y", "y", "I", "ı", "", "", "É", "é", "Ü", "ü", "Ä", "ä",
}
}
},
-- override_translit = true,
sort_key = {
Cyrl = {
from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ұ", "ү", "һ", "і"},
to = {"а" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "у" .. p[2], "х" .. p[1], "ы" .. p[1]}
},
},
standardChars = {
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
c.punc
},
}
m["kl"] = {
"groenlandés",
25355,
"esx-inu",
"Latn",
sort_key = {
from = {"æ", "ø", "å"},
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
otherNames = {"kalaallisut"},
}
m["km"] = {
"jemer",
9205,
"mkh-kmr",
"Khmr",
ancestors = "xhm",
translit = "translit/km",
otherNames = {"camboyano", "jemer central", "jemer moderno"},
}
m["kn"] = {
"kannada",
33673,
"dra-kan",
"Knda",
ancestors = "dra-mkn",
translit = "translit/kn",
otherNames = {"canarés"},
}
m["ko"] = {
"coreano",
9176,
"qfa-kor",
"Kore, Brai",
ancestors = "ko-ear",
translit = {Kore = "translit/ko"},
entry_name = {Kore = s["nombentrada/Kore"]},
otherNames = {"coreano moderno"},
}
m["kr"] = {
"kanuri",
36094,
"ssa-sah",
"Latn, Arab",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}}, -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
sort_key = {
Latn = {
from = {"ǝ", "ny", "ɍ", "sh"},
to = {"e" .. p[1], "n" .. p[1], "r" .. p[1], "s" .. p[1]}
},
},
otherNames = {"Kanembu", "Bilma Kanuri", "Central Kanuri", "Manga Kanuri", "Tumari Kanuri"},
}
m["ks"] = {
"cachemiro",
33552,
"inc-kas",
"ks-Arab, Deva, Shrd, Latn",
translit = {
["ks-Arab"] = "translit/ks-Arab",
Deva = "translit/ks-Deva",
Shrd = "translit/Shrd",
},
otherNames = {"kashmiri", "Koshur", "kishtwari", "kashtwari"},
}
m["ku"] = {
"kurdo (macrolengua)",
36368,
"kur",
"Latn, Cyrl, Armn, ku-Arab",
translit = {
Cyrl = "translit/kmr",
Armn = "translit/Armn",
["ku-Arab"] = "translit/ckb",
},
entry_name = {
remove_diacritics = "'’",
from = {"r̄", "R̄", "ẍ", "Ẍ"},
to = {"rr", "Rr", "x", "X"}
},
}
m["kv"] = {
"komi",
36126,
"urj-prm",
"Cyrl",
translit = "translit/kv",
override_translit = true,
otherNames = {"komi permio", "komi ziriano", "Komi-Permiyak", "Komi-Zyrian"}
}
m["kw"] = {
"córnico",
25289,
"cel-brs",
"Latn",
ancestors = "cnx",
sort_key = {
from = {"ch"},
to = {"c" .. p[1]}
},
}
m["ky"] = {
"kirguís",
9255,
"trk-kip",
"Cyrl, Latn, Arab",
translit = {Cyrl = "translit/ky"},
override_translit = true,
sort_key = {
Cyrl = {
from = {"ё", "ң", "ө", "ү"},
to = {"е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]}
},
},
otherNames = {"kirghiz", "kirgiz"},
}
m["la"] = {
"latín",
397,
"itc",
"Latn, Ital",
ancestors = "itc-ola",
entry_name = {Latn = {remove_diacritics = c.macron .. c.breve .. c.diaer .. c.dinvbreve}},
sort_key = {
Latn = {
from = {"æ", "œ"},
to = {"ae", "oe"}
},
},
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXxZz",
c.punc
},
}
m["lb"] = {
"luxemburgués",
9051,
"gmw-hgm",
"Latn",
ancestors = "gmw-cfr",
sort_key = {
from = {"ä", "ë", "é"},
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
}
m["lg"] = {
"luganda",
33368,
"bnt-nyg",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
from = {"ŋ"},
to = {"n" .. p[1]}
},
otherNames = {"ganda", "oluganda"},
}
m["li"] = {
"limburgués",
102172,
"gmw-frk",
"Latn",
ancestors = "dum",
otherNames = {"limbúrgico"},
}
m["ln"] = {
"lingala",
36217,
"bnt-bmo",
"Latn",
sort_key = {
remove_diacritics = c.acute .. c.circ .. c.caron,
from = {"ɛ", "gb", "mb", "mp", "nd", "ng", "nk", "ns", "nt", "ny", "nz", "ɔ"},
to = {"e" .. p[1], "g" .. p[1], "m" .. p[1], "m" .. p[2], "n" .. p[1], "n" .. p[2], "n" .. p[3], "n" .. p[4], "n" .. p[5], "n" .. p[6], "n" .. p[7], "o" .. p[1]}
},
otherNames = {"ngala"},
}
m["lo"] = {
"lao",
9211,
"tai-swe",
"Laoo",
translit = "translit/lo",
sort_key = "sortkey/Laoo",
standardChars = {
Laoo = "ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮ",
c.punc,
},
otherNames = {"laosiano"},
}
m["lt"] = {
"lituano",
9083,
"bat-eas",
"Latn",
ancestors = "olt",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.tilde},
sort_key = {
from = {"ą", "č", "ę", "ė", "į", "y", "š", "ų", "ū", "ž"},
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "e" .. p[2], "i" .. p[1], "i" .. p[2], "s" .. p[1], "u" .. p[1], "u" .. p[2], "z" .. p[1]}
},
standardChars = {
Latn = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž",
c.punc,
},
}
m["lu"] = {
"tshiluba katanga",
36157,
"bnt-lub",
"Latn",
}
m["lv"] = {
"letón",
9078,
"bat-eas",
"Latn",
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
to = {"%1", c.tilde, "%1%2%3", "%1%2", "%1%2", "%1" .. c.macron}
},
sort_key = {
from = {"ā", "č", "ē", "ģ", "ī", "ķ", "ļ", "ņ", "š", "ū", "ž"},
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
},
standardChars = {
Latn = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž",
c.punc,
},
otherNames = {"letonés"},
}
m["mg"] = {
"malgache",
7930,
"poz-bre",
"Latn",
otherNames = {"antankarana", "bara", "bushi", "betsimisaraka septentrional", "betsimisaraka meridional", "masikoro", "malgache de la meseta", "sakalava", "tandroy-mahafaly", "tesaka", "antaisaka", "tanosy", "antanosy", "tsimihety"},
}
m["mh"] = {
"marshalés",
36280,
"poz-mic",
"Latn",
sort_key = {
from = {"ā", "ļ", "m̧", "ņ", "n̄", "o̧", "ō", "ū"},
to = {"a" .. p[1], "l" .. p[1], "m" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1], "o" .. p[2], "u" .. p[1]}
},
}
m["mi"] = {
"maorí",
36451,
"poz-pep",
"Latn",
sort_key = {
remove_diacritics = c.macron,
from = {"ng", "wh"},
to = {"z" .. p[1], "z" .. p[2]}
},
otherNames = {"māori"},
}
m["mk"] = {
"macedonio",
9296,
"zls",
"Cyrl",
ancestors = "cu",
translit = "translit/mk",
entry_name = {
remove_diacritics = c.acute,
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
},
sort_key = {
remove_diacritics = c.grave,
from = {"ѓ", "ѕ", "ј", "љ", "њ", "ќ", "џ"},
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
},
standardChars = {
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
c.punc,
},
}
m["ml"] = {
"malayalam",
36236,
"dra-mal",
"Mlym",
translit = "translit/ml",
override_translit = true,
}
m["mn"] = {
"mongol",
9246,
"xgn-cen",
"Cyrl, Mong, Latn, Brai",
ancestors = "cmg",
translit = {
Cyrl = "translit/mn",
Mong = "translit/Mong",
},
override_translit = true,
display_text = {Mong = s["mostrartexto/Mong"]},
entry_name = {
Cyrl = {remove_diacritics = c.grave .. c.acute},
Mong = s["nombentrada/Mong"],
},
sort_key = {
Cyrl = {
remove_diacritics = c.grave,
from = {"ё", "ө", "ү"},
to = {"е" .. p[1], "о" .. p[1], "у" .. p[1]}
},
},
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Brai = c.braille,
c.punc
},
otherNames = {"khalkha"},
}
m["mr"] = {
"maratí",
1571,
"inc-sou",
"Deva, Modi",
ancestors = "omr",
translit = {
Deva = "translit/mr",
Modi = "translit/mr-Modi",
},
entry_name = {
Deva = {
from = {"च़", "ज़", "झ़"},
to = {"च", "ज", "झ"}
},
},
}
m["ms"] = {
"malayo",
9237,
"poz-mly",
"Latn, ms-Arab",
ancestors = "omy",
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
c.punc
},
otherNames = {"malayo estándar"},
}
m["mt"] = {
"maltés",
9166,
"sem-arb",
"Latn",
display_text = {
from = {"'"},
to = {"’"}
},
entry_name = {
from = {"’"},
to = {"'"},
},
ancestors = "sqr",
sort_key = {
from = {
"ċ", "ġ", "ż", -- Convert into PUA so that decomposed form does not get caught by the next step.
"([cgz])", -- Ensure "c" comes after "ċ", "g" comes after "ġ" and "z" comes after "ż".
"g" .. p[1] .. "ħ", -- "għ" after initial conversion of "g".
p[3], p[4], "ħ", "ie", p[5] -- Convert "ċ", "ġ", "ħ", "ie", "ż" into final output.
},
to = {
p[3], p[4], p[5],
"%1" .. p[1],
"g" .. p[2],
"c", "g", "h" .. p[1], "i" .. p[1], "z"
}
},
}
m["my"] = {
"birmano",
9228,
"tbq-brm",
"Mymr",
ancestors = "obr",
translit = "translit/my",
override_translit = true,
sort_key = {
from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}
},
otherNames = {"mianmarés", "mandalay", "myeik", "palaw", "rangún", "yaw"},
}
m["na"] = {
"nauruano",
13307,
"poz-mic",
"Latn",
otherNames = {"nauruense"},
}
m["nb"] = {
"noruego",
25167,
"gmq",
"Latn",
wikimedia_codes = "no",
ancestors = "gmq-mno, da",
sort_key = s["sortkey/no"],
standardChars = s["standardchars/no"],
otherNames = {"bokmål"},
}
m["nd"] = {
"sindebele",
35613,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
otherNames = {"ndebele septentrional"},
}
m["ne"] = {
"nepalí",
33823,
"inc-pah",
"Deva, Newa",
translit = {Deva = "translit/ne"},
otherNames = {"nepalés", "*palpa"}, -- 3832956, former "plp", retired by ISO as spurious
}
m["ng"] = {
"ndonga",
33900,
"bnt-ova",
"Latn",
}
m["nl"] = {
"neerlandés",
7411,
"gmw-frk",
"Latn, Brai",
ancestors = "dum",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
c.punc
},
otherNames = {"holandés", "flamenco"}, -- FIXME, check this
}
m["nn"] = {
"noruego nynorsk",
25164,
"gmq-wes",
"Latn",
ancestors = "gmq-mno",
entry_name = {
remove_diacritics = c.grave .. c.acute,
},
sort_key = s["sortkey/no"],
standardChars = s["standardchars/no"],
otherNames = {"nynorsk", "neonoruego"},
}
m["no"] = {
"noruego bokmål",
9043,
"gmq-wes",
"Latn",
ancestors = "gmq-mno",
sort_key = s["sortkey/no"],
standardChars = s["standardchars/no"],
}
m["nr"] = {
"ndebele meridional",
36785,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
otherNames = {"ndebele transvaal"},
}
m["nv"] = {
"navajo",
13310,
"apa",
"Latn",
sort_key = {
remove_diacritics = c.acute .. c.ogonek,
from = {
"chʼ", "tłʼ", "tsʼ", -- 3 chars
"ch", "dl", "dz", "gh", "hw", "kʼ", "kw", "sh", "tł", "ts", "zh", -- 2 chars
"ł", "ʼ" -- 1 char
},
to = {
"c" .. p[2], "t" .. p[2], "t" .. p[4],
"c" .. p[1], "d" .. p[1], "d" .. p[2], "g" .. p[1], "h" .. p[1], "k" .. p[1], "k" .. p[2], "s" .. p[1], "t" .. p[1], "t" .. p[3], "z" .. p[1],
"l" .. p[1], "z" .. p[2]
}
},
otherNames = {"diné bizaad"},
}
m["ny"] = {
"chewa",
33273,
"bnt-nys",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
from = {"ng'"},
to = {"ng"}
},
otherNames = {"chicheŵa", "chiñanya", "ñanya", "chiñanja", "ñanja", "cicewa", "cewa", "cinyanja"},
}
m["oc"] = {
"occitano",
14185,
"roa-ocr",
"Latn, Hebr",
ancestors = "pro",
sort_key = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla,
from = {"([lns])·h"},
to = {"%1h"}
},
},
-- don't list varieties here that are in [[Module:etymology languages/data]]
}
m["oj"] = {
"ojibua",
33875,
"alg",
"Cans, Latn",
sort_key = {
Latn = {
from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
to = {"a" .. p[1], "h" .. p[1], "i" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1]}
},
},
otherNames = {"ojibway", "ojibwa", "ojibwe", "ojibue", "chipewa", "ojibwemowin"},
}
m["om"] = {
"oromo",
33864,
"cus-eas",
"Latn, Ethi",
otherNames = {"orma", "oromo borana-arsi-guji", "oromo occidental"},
}
m["or"] = {
"odia",
33810,
"inc-eas",
"Orya",
ancestors = "inc-mor",
translit = "translit/or",
otherNames = {"oriya", "oorya"},
}
m["os"] = {
"oseta",
33968,
"xsc",
"Cyrl, Geor, Latn",
ancestors = "oos",
translit = {
Cyrl = "translit/os",
Geor = "translit/Geor",
},
override_translit = true,
display_text = {
Cyrl = {
from = {"æ"},
to = {"ӕ"}
},
Latn = {
from = {"ӕ"},
to = {"æ"}
},
},
entry_name = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
from = {"æ"},
to = {"ӕ"}
},
Latn = {
from = {"ӕ"},
to = {"æ"}
},
},
sort_key = {
Cyrl = {
from = {"ӕ", "гъ", "дж", "дз", "ё", "къ", "пъ", "тъ", "хъ", "цъ", "чъ"},
to = {"а" .. p[1], "г" .. p[1], "д" .. p[1], "д" .. p[2], "е" .. p[1], "к" .. p[1], "п" .. p[1], "т" .. p[1], "х" .. p[1], "ц" .. p[1], "ч" .. p[1]}
},
},
otherNames = {"osete", "osético", "digor", "iron"},
}
m["pa"] = {
"panyabí",
58635,
"inc-pan",
"Guru, pa-Arab",
ancestors = "inc-opa",
translit = {
Guru = "translit/Guru",
["pa-Arab"] = "translit/pa-Arab",
},
entry_name = {
["pa-Arab"] = {
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
from = {"ݨ", "ࣇ"},
to = {"ن", "ل"}
},
},
otherNames = {"punyabí", "punjabí", "panjabí"},
}
m["pi"] = {
"pali",
36727,
"inc-mid",
"Latn, Brah, Deva, Beng, Sinh, Mymr, Thai, Lana, Laoo, Khmr, Cakm",
ancestors = "sa",
translit = {
Brah = "translit/Brah",
Deva = "translit/sa",
Beng = "translit/pi",
Sinh = "translit/si",
Mymr = "translit/pi",
Thai = "translit/pi",
Lana = "translit/pi",
Laoo = "translit/pi",
Khmr = "translit/pi",
Cakm = "translit/Cakm",
},
entry_name = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
},
remove_diacritics = c.VS01
},
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
from = {"ā", "ī", "ū", "ḍ", "ḷ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}
},
}
m["pl"] = {
"polaco",
809,
"zlw-lch",
"Latn",
ancestors = "zlw-opl",
sort_key = {
from = {"ą", "ć", "ę", "ł", "ń", "ó", "ś", "ź", "ż"},
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
},
standardChars = {
Latn = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż",
c.punc,
},
}
m["ps"] = {
"pastún",
58680,
"ira-pat",
"ps-Arab",
entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef},
otherNames = {"pashtún", "pushtó", "pashtú", "afghani", "pukhto", "pakhto", "pakkhto", "pashtún meridional/central/septentrional"},
}
m["pt"] = {
"portugués",
5146,
"roa-ibe",
"Latn, Brai",
ancestors = "roa-opt",
sort_key = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.cedilla}},
standardChars = {
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Brai = c.braille,
c.punc
},
otherNames = {"portugués actual", "portugués de Portugal", "portugués brasilero"},
}
m["qu"] = {
"quechua",
5218,
"qwe",
"Latn",
}
m["rm"] = {
"romanche",
13199,
"roa-rhe",
"Latn",
}
m["ro"] = {
"rumano",
7913,
"roa-eas",
"Latn, Cyrl",
sort_key = {
Latn = {
from = {"ă", "â", "î", "ș", "ț"},
to = {"a" .. p[1], "a" .. p[2], "i" .. p[1], "s" .. p[1], "t" .. p[1]}
},
Cyrl = {
from = {"ӂ"},
to = {"ж" .. p[1]}
},
},
standardChars = {
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
c.punc
},
}
m["ru"] = {
"ruso",
7737,
"zle",
"Cyrl, Cyrs, Brai",
ancestors = "orv",
translit = {
Cyrl = "translit/ru",
Cyrs = "translit/ru",
},
entry_name = {
Cyrl = s["nombentrada/ru-Cyrl"],
Cyrs = s["nombentrada/ru-Cyrl"],
},
sort_key = {
Cyrl = s["sortkey/ru-Cyrl"],
Cyrs = s["sortkey/ru-Cyrl"],
},
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Brai = c.braille,
c.punc
},
}
m["rw"] = {
"ruandés",
3217514,
"bnt-glb",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
-- don't list varieties here that are in [[Module:etymology languages/data]]
otherNames = {"ha", "giha", "hangaza", "vinza", "shubi"}, -- Deleted "Subi", which normally refers to a different language
}
m["sa"] = {
"sánscrito",
11059,
"inc-old",
"Deva, Bali, as-Beng, Beng, Bhks, Brah, Gran, Gujr, Guru, Hani, Java, Kawi, Khar, Khmr, Knda, Lana, Laoo, Marc, Mlym, Modi, Mong, mnc-Mong, xwo-Mong, Mymr, Nand, Newa, Orya, Phag, Ranj, Saur, Shrd, Sidd, Sinh, Taml, Tang, Telu, Thai, Tibt, Tirh",
translit = {
Deva = "translit/sa",
["as-Beng"] = "translit/sa-Beng",
Beng = "translit/sa-Beng",
Brah = "translit/Brah",
Gujr = "translit/sa-Gujr",
Java = "translit/sa-Java",
Khmr = "translit/pi",
Knda = "translit/sa-Knda",
Lana = "translit/pi",
Laoo = "translit/pi",
Mlym = "translit/sa-Mlym",
Modi = "translit/sa-Modi",
Mong = "translit/Mong",
["mnc-Mong"] = "translit/mnc",
["xwo-Mong"] = "translit/xal",
Mymr = "translit/pi",
Orya = "translit/sa-Orya",
Sinh = "translit/si",
Thai = "translit/pi",
Tibt = "translit/Tibt",
},
display_text = {
Mong = s["mostrartexto/Mong"],
Tibt = s["mostrartexto/Tibt"],
},
entry_name = {
Mong = s["nombentrada/Mong"],
Tibt = s["nombentrada/Tibt"],
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
},
remove_diacritics = c.VS01
},
sort_key = {
Tibt = "sortkey/Tibt",
{ -- FIXME: This needs to be converted into the current standardized format.
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
},
},
}
m["sc"] = {
"sardo",
33976,
"roa",
"Latn",
-- don't list varieties here that are in [[Module:etymology languages/data]]
}
m["sd"] = {
"sindhi",
33997,
"inc-snd",
"sd-Arab, Deva, Sind, Khoj",
translit = {Sind = "translit/Sind"},
entry_name = {
["sd-Arab"] = {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
from = {"ٱ"},
to = {"ا"}
},
},
ancestors = "inc-vra",
}
m["se"] = {
"sami septentrional",
33947,
"smi",
"Latn",
display_text = {
from = {"'"},
to = {"ˈ"}
},
entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
sort_key = {
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
},
standardChars = {
Latn = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž",
c.punc,
},
}
m["sg"] = {
"sango",
33954,
"crp",
"Latn",
ancestors = "ngb",
}
m["sh"] = {
"serbocroata",
9301,
"zls",
"Latn, Cyrl, Glag",
wikimedia_codes = "sh, bs, hr, sr",
entry_name = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_exceptions = {"Ć", "ć", "Ś", "ś", "Ź", "ź"}
},
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_exceptions = {"З́", "з́", "С́", "с́"}
},
},
sort_key = {
Latn = {
from = {"č", "ć", "dž", "đ", "lj", "nj", "š", "ś", "ž", "ź"},
to = {"c" .. p[1], "c" .. p[2], "d" .. p[1], "d" .. p[2], "l" .. p[1], "n" .. p[1], "s" .. p[1], "s" .. p[2], "z" .. p[1], "z" .. p[2]}
},
Cyrl = {
from = {"ђ", "з́", "ј", "љ", "њ", "с́", "ћ", "џ"},
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "с" .. p[1], "т" .. p[1], "ч" .. p[1]}
},
},
standardChars = {
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
c.punc
},
otherNames = {"BCS", "bosnio", "croata", "montenegrino", "serbio", "caicavo", "chacavo"},
-- don't list varieties here that are in [[Module:etymology languages/data]]
}
m["si"] = {
"cingalés",
13267,
"inc-ins",
"Sinh",
ancestors = "pra",
translit = "translit/si",
override_translit = true,
otherNames = {"*singalés"},
}
m["sk"] = {
"eslovaco",
9058,
"zlw",
"Latn",
ancestors = "zlw-osk",
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer},
standardChars = {
Latn = "AaÁáÄäBbCcČčDdĎďEeFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž",
c.punc,
},
}
m["sl"] = {
"esloveno",
9063,
"zls",
"Latn",
entry_name = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
from = {"Ə", "ə", "Ł", "ł"},
to = {"E", "e", "L", "l"}
},
sort_key = {
remove_diacritics = c.tilde .. c.dotabove .. c.diaer .. c.ringabove .. c.ringbelow .. c.ogonek,
from = {"č", "š", "ž"},
to = {"c" .. p[1], "s" .. p[1], "z" .. p[1]}
},
standardChars = {
Latn = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
c.punc,
},
otherNames = {"*eslovenio"},
}
m["sm"] = {
"samoano",
34011,
"poz-pnp",
"Latn",
}
m["sn"] = {
"shona",
34004,
"bnt-sho",
"Latn",
entry_name = {remove_diacritics = c.acute},
}
m["so"] = {
"somalí",
13275,
"cus-som",
"Latn, Arab, Osma",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}},
}
m["sq"] = {
"albanés",
8748,
"sqj",
"Latn, Grek, ota-Arab, Elba, Vith",
entry_name = {
remove_diacritics = c.acute,
from = {'^i (%w)', '^të (%w)'}, to = {'%1', '%1'},
},
sort_key = {
remove_diacritics = c.circ .. c.tilde,
from = {'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
to = {'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
},
standardChars = {
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
c.punc,
},
otherNames = {"albanés arbëreshë", "albanés arvanitika", "albanés gheg", "albanés tosco"},
}
m["ss"] = {
"swazi",
34014,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
otherNames = {"swati"},
}
m["st"] = {
"sesoto",
34340,
"bnt-sts",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
otherNames = {"sesoto meridional", "sesotho", "sotho"},
}
m["su"] = {
"sundanés",
34002,
"poz-msa",
"Latn, Sund",
ancestors = "osn",
translit = {Sund = "translit/su"},
}
m["sv"] = {
"sueco",
9027,
"gmq-eas",
"Latn",
ancestors = "gmq-osw",
sort_key = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron .. c.dacute .. c.caron .. c.cedilla .. "':",
remove_exceptions = {"å"},
from = {"ø", "æ", "œ", "ß", "å"},
to = {"o", "ae", "oe", "ss", "z" .. p[1]}
},
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö",
c.punc,
},
}
m["sw"] = {
"suajili",
7838,
"bnt-swh",
"Latn, Arab",
sort_key = {
Latn = {
from = {"ng'"},
to = {"ng" .. p[1]}
},
},
otherNames = {"suajilí", "swahili", "suahelí", "kiswahili", "kisetla", "setla", "kihindi", "kishamba", "kibabu", "kimanga", "kitvita"},
}
m["ta"] = {
"tamil",
5885,
"dra-tam",
"Taml",
ancestors = "oty",
translit = "translit/ta",
override_translit = true,
}
m["te"] = {
"telugú",
8097,
"dra-tel",
"Telu",
ancestors = "dra-ote",
translit = "translit/te",
override_translit = true,
}
m["tg"] = {
"tayiko",
9260,
"ira-swi",
"Cyrl, fa-Arab, Latn",
ancestors = "peo",
translit = {Cyrl = "translit/tg"},
override_translit = true,
entry_name = {remove_diacritics = c.grave .. c.acute},
sort_key = {
Cyrl = {
from = {"ғ", "ё", "ӣ", "қ", "ӯ", "ҳ", "ҷ"},
to = {"г" .. p[1], "е" .. p[1], "и" .. p[1], "к" .. p[1], "у" .. p[1], "х" .. p[1], "ч" .. p[1]}
},
},
otherNames = {"persa oriental"},
}
m["th"] = {
"tailandés",
9217,
"tai-swe",
"Thai, Brai",
translit = {Thai = "translit/th"},
sort_key = {Thai = "sortkey/Thai"},
otherNames = {"siamés"},
}
m["ti"] = {
"tigriña",
34124,
"sem-eth",
"Ethi",
translit = "translit/Ethi",
}
m["tk"] = {
"turcomano",
9267,
"trk-ogz",
"Latn, Cyrl, Arab",
entry_name = {remove_diacritics = c.macron},
sort_key = {
Latn = {
from = {"ç", "ä", "ž", "ň", "ö", "ş", "ü", "ý"},
to = {"c" .. p[1], "e" .. p[1], "j" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "u" .. p[1], "y" .. p[1]}
},
Cyrl = {
from = {"ё", "җ", "ң", "ө", "ү", "ә"},
to = {"е" .. p[1], "ж" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "э" .. p[1]}
},
},
}
m["tl"] = {
"tagalo",
34057,
"phi",
"Latn, Tglg",
translit = {Tglg = "translit/tl"},
override_translit = true,
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}},
standardChars = {
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
c.punc
},
sort_key = {
Latn = "sortkey/tl",
},
}
m["tn"] = {
"setsuana",
34137,
"bnt-sts",
"Latn",
otherNames = {"setswana"},
}
m["to"] = {
"tongano",
34094,
"poz-pol",
"Latn",
entry_name = {remove_diacritics = c.acute},
sort_key = {remove_diacritics = c.macron},
}
m["tr"] = {
"turco",
256,
"trk-ogz",
"Latn",
ancestors = "ota",
dotted_dotless_i = true,
sort_key = {
from = {
-- Ignore circumflex, but account for capital Î wrongly becoming ı + circ due to dotted dotless I logic.
"ı" .. c.circ, c.circ,
"i", -- Ensure "i" comes after "ı".
"ç", "ğ", "ı", "ö", "ş", "ü"
},
to = {
"i", "",
"i" .. p[1],
"c" .. p[1], "g" .. p[1], "i", "o" .. p[1], "s" .. p[1], "u" .. p[1]
}
},
standardChars = {
Latn = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz",
c.punc,
},
}
m["ts"] = {
"xitsonga",
34327,
"bnt-tsr",
"Latn",
otherNames = {"xitsonga"},
}
m["tt"] = {
"tártaro",
25285,
"trk-kbu",
"Cyrl, Latn, tt-Arab",
translit = {Cyrl = "translit/tt"},
override_translit = true,
dotted_dotless_i = true,
sort_key = {
Cyrl = {
from = {"ә", "ў", "ғ", "ё", "җ", "қ", "ң", "ө", "ү", "һ"},
to = {"а" .. p[1], "в" .. p[1], "г" .. p[1], "е" .. p[1], "ж" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "х" .. p[1]}
},
Latn = {
from = {
"i", -- Ensure "i" comes after "ı".
"ä", "ə", "ç", "ğ", "ı", "ñ", "ŋ", "ö", "ɵ", "ş", "ü"
},
to = {
"i" .. p[1],
"a" .. p[1], "a" .. p[2], "c" .. p[1], "g" .. p[1], "i", "n" .. p[1], "n" .. p[2], "o" .. p[1], "o" .. p[2], "s" .. p[1], "u" .. p[1]
}
},
},
}
m["ty"] = {
"tahitiano",
34128,
"poz-pep",
"Latn",
}
m["ug"] = {
"uigur",
13263,
"trk-kar",
"ug-Arab, Latn, Cyrl",
ancestors = "chg",
translit = {
["ug-Arab"] = "translit/ug",
Cyrl = "translit/ug",
},
override_translit = true,
otherNames = {"uighur", "uygur"},
}
m["uk"] = {
"ucraniano",
8798,
"zle",
"Cyrl",
ancestors = "orv",
translit = "translit/uk",
entry_name = {remove_diacritics = c.grave .. c.acute},
sort_key = {
from = {
"ї", -- 2 chars
"ґ", "є", "і" -- 1 char
},
to = {
"и" .. p[2],
"г" .. p[1], "е" .. p[1], "и" .. p[1]
}
},
standardChars = {
Cyrl = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя",
c.punc,
},
}
m["ur"] = {
"urdu",
1617,
"inc-hnd",
"ur-Arab, Hebr",
ancestors = "inc-ohi",
translit = {["ur-Arab"] = "translit/ur"},
entry_name = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ۂ", "ٱ"},
to = {"ہ", "ہ", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
},
-- put Judeo-Urdu (Hebrew-script Urdu) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},
to = {u(0xFB21)},
},
}
m["uz"] = {
"uzbeco",
9264,
"trk-kar",
"Latn, Cyrl, fa-Arab",
ancestors = "chg",
translit = {Cyrl = "translit/uz"},
sort_key = {
Latn = {
from = {"oʻ", "gʻ", "sh", "ch", "ng"},
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3], "z" .. p[4], "z" .. p[5]}
},
Cyrl = {
from = {"ё", "ў", "қ", "ғ", "ҳ"},
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
},
},
otherNames = {"uzbeco septentrional/meridional"},
}
m["ve"] = {
"luvenda",
32704,
"bnt-bso",
"Latn",
}
m["vi"] = {
"vietnamita",
9199,
"mkh-vie",
"Latn, Hani",
ancestors = "mkh-mvi",
sort_key = {
Latn = "sortkey/vi",
Hani = "sortkey/Hani",
},
otherNames = {"anamés", "anamita"},
}
m["vo"] = {
"volapuk",
36986,
"art",
"Latn",
}
m["wa"] = {
"valón",
34219,
"roa-oil",
"Latn",
ancestors = "fro",
sort_key = s["sortkey/roa-oil"],
otherNames = {"liégeois", "namurois", "wallo-picard", "wallo-lorrain"},
}
m["wo"] = {
"wolof",
34257,
"alv-fwo",
"Latn, Arab",
otherNames = {"wolof gambiano"}, -- the subsumed dialect 'wof'
}
m["xh"] = {
"xhosa",
13218,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
m["yi"] = {
"ídish",
8641,
"gmw-hgm",
"Hebr",
ancestors = "gmh",
translit = "translit/yi",
sort_key = {
from = {"א[ַָ]", "בּ", "ו[ֹּ]", "יִ", "ײַ", "פֿ"},
to = {"א", "ב", "ו", "י", "יי", "פ"}
},
otherNames = {"yídish", "yiddish", "ídish estadounidense", "ídish escocés", "litvish", "poylish", "udmurtish",
"ídish europeo", "galitzish"},
}
m["yo"] = {
"yoruba",
34311,
"alv-yor",
"Latn, Arab",
entry_name = {Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}},
sort_key = {
Latn = {
from = {"ẹ", "ɛ", "gb", "ị", "kp", "ọ", "ɔ", "ṣ", "sh", "ụ"},
to = {"e" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "o" .. p[1], "o" .. p[1], "s" .. p[1], "s" .. p[1], "u" .. p[1]}
},
},
}
m["za"] = {
"chuan",
13216,
"tai",
"Latn, Hani",
sort_key = {
Latn = "sortkey/za",
Hani = "sortkey/Hani",
},
-- FIXME, are all of the following distinct?
otherNames = {"zhuang", "chuan Chongzuo", "chuan Guibei", "chuan Hongshuihe", "chuan Min", "chuan Nong",
"chuan Qiubei", "chuan Liuqian", "chuan Shangsi", "chuan Dai", "wenma", "chuan Yang", "chuan Yongbei",
"chuan Wuming", "chuan Yongnan", "chuan Youjiang", "chuan Zuojiang", "chuan Guibian"},
}
m["zh"] = {
"chino",
7850,
"zhx",
"Hani, Hant, Hans, Latn, Bopo, Nshu, Brai",
ancestors = "ltc",
generate_forms = "generarformas/zh",
translit = {
Hani = "translit/zh",
Bopo = "translit/zh",
},
sort_key = {Hani = "sortkey/Hani"},
}
m["zu"] = {
"zulú",
10179,
"bnt-ngu",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
otherNames = {"isizulú"},
}
return m