Módulo:generar-pron/pl
La documentación para este módulo puede ser creada en Módulo:generar-pron/pl/doc
local export = {}
local unpack = unpack or table.unpack
local insert = table.insert
local concat = table.concat
local m_str = require("Módulo:String")
local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strstrip = m_str.strip
local strupper = m_str.upper
local strlower = m_str.lower
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8
local strhtml = m_str.encode_html
local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"
local letters2phones = {
["a"] = {
["u"] = { "a", "w" },
[false] = "a",
},
["ą"] = {
["l"] = { "ɔ", "l" },
["ł"] = { "ɔ", "w" },
[false] = "ɔ̃",
},
["b"] = {
["i"] = {
["ą"] = {
["l"] = { "bʲ", "j", "ɔ", "l" },
["ł"] = { "bʲ", "j", "ɔ", "w" },
[false] = { "bʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "bʲ", "j", "ɛ", "l" },
["ł"] = { "bʲ", "j", "ɛ", "w" },
[false] = { "bʲ", "j", "ɛ̃" }
},
["a"] = { "bʲ", "j", "a" },
["e"] = { "bʲ", "j", "ɛ" },
["i"] = { "bʲ", "i" },
["o"] = { "bʲ", "j", "ɔ" },
["ó"] = { "bʲ", "j", "u" },
["u"] = { "bʲ", "j", "u" },
[false] = { "bʲ", "i" }
},
[false] = "b"
},
["c"] = {
["i"] = {
["ą"] = {
["l"] = { "t͡ɕ", "ɔ", "l" },
["ł"] = { "t͡ɕ", "ɔ", "w" },
[false] = { "t͡ɕ", "ɔ̃" }
},
["ę"] = {
["l"] = { "t͡ɕ", "ɛ", "l" },
["ł"] = { "t͡ɕ", "ɛ", "w" },
[false] = { "t͡ɕ", "ɛ̃" }
},
["a"] = { "t͡ɕ", "a" },
["e"] = { "t͡ɕ", "ɛ" },
["o"] = { "t͡ɕ", "ɔ" },
["ó"] = { "t͡ɕ", "u" },
["u"] = { "t͡ɕ", "u" },
["y"] = { "t͡ɕ", "ɨ" },
[false] = { "t͡ɕ", "i" }
},
["h"] = {
["i"] = {
["ą"] = {
["l"] = { "xʲ", "j", "ɔ", "l" },
["ł"] = { "xʲ", "j", "ɔ", "w" },
[false] = { "xʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "xʲ", "j", "ɛ", "l" },
["ł"] = { "xʲ", "j", "ɛ", "w" },
[false] = { "xʲ", "j", "ɛ̃" }
},
["a"] = { "xʲ", "j", "a" },
["e"] = { "xʲ", "j", "ɛ" },
["i"] = { "xʲ", "j", "i" },
["o"] = { "xʲ", "j", "ɔ" },
["ó"] = { "xʲ", "j", "u" },
["u"] = { "xʲ", "j", "u" },
[false] = { "xʲ", "i" }
},
[false] = "x"
},
["z"] = "t͡ʂ",
[false] = "t͡s"
},
["ć"] = "t͡ɕ",
["d"] = {
["i"] = {
["ą"] = {
["l"] = { "dʲ", "j", "ɔ", "l" },
["ł"] = { "dʲ", "j", "ɔ", "w" },
[false] = { "dʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "dʲ", "j", "ɛ", "l" },
["ł"] = { "dʲ", "j", "ɛ", "w" },
[false] = { "dʲ", "j", "ɛ̃" }
},
["a"] = { "dʲ", "j", "a" },
["e"] = { "dʲ", "j", "ɛ" },
["i"] = { "dʲ", "i" },
["o"] = { "dʲ", "j", "ɔ" },
["ó"] = { "dʲ", "j", "u" },
["u"] = { "dʲ", "j", "u" },
[false] = { "dʲ", "i" }
},
["z"] = {
["i"] = {
["ą"] = {
["l"] = { "d͡ʑ", "ɔ", "l" },
["ł"] = { "d͡ʑ", "ɔ", "w" },
[false] = {"d͡ʑ", "ɔ̃" }
},
["ę"] = {
["l"] = { "d͡ʑ", "ɛ", "l" },
["ł"] = { "d͡ʑ", "ɛ", "w" },
[false] = { "d͡ʑ", "ɛ̃" }
},
["a"] = { "d͡ʑ", "a" },
["e"] = { "d͡ʑ", "ɛ" },
["o"] = { "d͡ʑ", "ɔ" },
["ó"] = { "d͡ʑ", "u" },
["u"] = { "d͡ʑ", "u" },
["y"] = { "d͡ʑ", "ɨ" },
[false] = { "d͡ʑ", "i" }
},
[false] = "d͡z"
},
["ż"] = "d͡ʐ",
["ź"] = "d͡ʑ",
[false] = "d"
},
["e"] = {
["u"] = { "ɛ", "w" },
[false] = "ɛ",
},
["ę"] = {
["l"] = { "ɛ", "l" },
["ł"] = { "ɛ", "w" },
[false] = "ɛ̃",
},
["f"] = {
["i"] = {
["ą"] = {
["l"] = { "fʲ", "j", "ɔ", "l" },
["ł"] = { "fʲ", "j", "ɔ", "w" },
[false] = { "fʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "fʲ", "j", "ɛ", "l" },
["ł"] = { "fʲ", "j", "ɛ", "w" },
[false] = { "fʲ", "j", "ɛ̃" }
},
["a"] = { "fʲ", "j", "a" },
["e"] = { "fʲ", "j", "ɛ" },
["i"] = { "fʲ", "j", "i" },
["o"] = { "fʲ", "j", "ɔ" },
["ó"] = { "fʲ", "j", "u" },
["u"] = { "fʲ", "j", "u" },
[false] = { "fʲ", "i" }
},
[false] = "f"
},
["g"] = {
["i"] = {
["ą"] = {
["l"] = { "ɡʲ", "j", "ɔ", "l" },
["ł"] = { "ɡʲ", "j", "ɔ", "w" },
[false] = { "ɡʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "ɡʲ", "j", "ɛ", "l" },
["ł"] = { "ɡʲ", "j", "ɛ", "w" },
[false] = { "ɡʲ", "j", "ɛ̃" }
},
["a"] = { "ɡʲ", "j", "a" },
["e"] = { "ɡʲ", "j", "ɛ" },
["i"] = { "ɡʲ", "j", "i" },
["o"] = { "ɡʲ", "j", "ɔ" },
["ó"] = { "ɡʲ", "j", "u" },
["u"] = { "ɡʲ", "j", "u" },
[false] = { "ɡʲ", "i" }
},
[false] = "ɡ"
},
["h"] = {
["i"] = {
["ą"] = {
["l"] = { "xʲ", "j", "ɔ", "l" },
["ł"] = { "xʲ", "j", "ɔ", "w" },
[false] = { "xʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "xʲ", "j", "ɛ", "l" },
["ł"] = { "xʲ", "j", "ɛ", "w" },
[false] = { "xʲ", "j", "ɛ̃" }
},
["a"] = { "xʲ", "j", "a" },
["e"] = { "xʲ", "j", "ɛ" },
["i"] = { "xʲ", "j", "i" },
["o"] = { "xʲ", "j", "ɔ" },
["ó"] = { "xʲ", "j", "u" },
["u"] = { "xʲ", "j", "u" },
[false] = { "xʲ", "i" }
},
[false] = "x"
},
["i"] = "i",
["j"] = "j",
["k"] = {
["i"] = {
["ą"] = {
["l"] = { "kʲ", "j", "ɔ", "l" },
["ł"] = { "kʲ", "j", "ɔ", "w" },
[false] = { "kʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "kʲ", "j", "ɛ", "l" },
["ł"] = { "kʲ", "j", "ɛ", "w" },
[false] = { "kʲ", "j", "ɛ̃" }
},
["a"] = { "kʲ", "j", "a" },
["e"] = { "kʲ", "j", "ɛ" },
["i"] = { "kʲ", "j", "i" },
["o"] = { "kʲ", "j", "ɔ" },
["ó"] = { "kʲ", "j", "u" },
["u"] = { "kʲ", "j", "u" },
[false] = { "kʲ", "i" }
},
[false] = "k"
},
["l"] = {
["i"] = {
["ą"] = {
["l"] = { "lʲ", "j", "ɔ", "l" },
["ł"] = { "lʲ", "j", "ɔ", "w" },
[false] = { "lʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "lʲ", "j", "ɛ", "l" },
["ł"] = { "lʲ", "j", "ɛ", "w" },
[false] = { "lʲ", "j", "ɛ̃" }
},
["a"] = { "lʲ", "j", "a" },
["e"] = { "lʲ", "j", "ɛ" },
["i"] = { "lʲ", "j", "i" },
["o"] = { "lʲ", "j", "ɔ" },
["ó"] = { "lʲ", "j", "u" },
["u"] = { "lʲ", "j", "u" },
[false] = { "lʲ", "i" }
},
[false] = "l"
},
["ł"] = "w",
["m"] = {
["i"] = {
["ą"] = {
["l"] = { "mʲ", "j", "ɔ", "l" },
["ł"] = { "mʲ", "j", "ɔ", "w" },
[false] = { "mʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "mʲ", "j", "ɛ", "l" },
["ł"] = { "mʲ", "j", "ɛ", "w" },
[false] = { "mʲ", "j", "ɛ̃" }
},
["a"] = { "mʲ", "j", "a" },
["e"] = { "mʲ", "j", "ɛ" },
["i"] = { "mʲ", "j", "i" },
["o"] = { "mʲ", "j", "ɔ" },
["ó"] = { "mʲ", "j", "u" },
["u"] = { "mʲ", "j", "u" },
[false] = { "mʲ", "i" }
},
[false] = "m"
},
["n"] = {
["k"] = {
["i"] = {
["ą"] = {
["l"] = { "ŋ", "kʲ", "j", "ɔ", "l" },
["ł"] = { "ŋ", "kʲ", "j", "ɔ", "w" },
[false] = { "ŋ", "kʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "ŋ", "kʲ", "j", "ɛ", "l" },
["ł"] = { "ŋ", "kʲ", "j", "ɛ", "w" },
[false] = { "ŋ", "kʲ", "j", "ɛ̃" }
},
["a"] = { "ŋ", "kʲ", "j", "a" },
["e"] = { "ŋ", "kʲ", "j", "ɛ" },
["i"] = { "ŋ", "kʲ", "j", "i" },
["o"] = { "ŋ", "kʲ", "j", "ɔ" },
["ó"] = { "ŋ", "kʲ", "j", "u" },
["u"] = { "ŋ", "kʲ", "j", "u" },
[false] = { "ŋ", "kʲ", "i" }
},
[false] = { "ŋ", "k" }
},
["g"] = {
["i"] = {
["ą"] = {
["l"] = { "ŋ", "ɡʲ", "j", "l" },
["ł"] = { "ŋ", "ɡʲ", "j", "w" },
[false] = { "ŋ", "ɡʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "ŋ", "ɡʲ", "j", "ɛ", "l" },
["ł"] = { "ŋ", "ɡʲ", "j", "ɛ", "w" },
[false] = { "ŋ", "ɡʲ", "j", "ɛ̃" }
},
["a"] = { "ŋ", "ɡʲ", "j", "a" },
["e"] = { "ŋ", "ɡʲ", "j", "ɛ" },
["i"] = { "ŋ", "ɡʲ", "j", "i" },
["o"] = { "ŋ", "ɡʲ", "j", "ɔ" },
["ó"] = { "ŋ", "ɡʲ", "j", "u" },
["u"] = { "ŋ", "ɡʲ", "j", "u" },
[false] = { "ŋ", "ɡʲ", "i" }
},
[false] = { "ŋ", "ɡ" }
},
["i"] = {
["ą"] = {
["l"] = { "ɲ", "ɔ", "l" },
["ł"] = { "ɲ", "ɔ", "w" },
[false] = { "ɲ", "ɔ̃" }
},
["ę"] = {
["l"] = { "ɲ", "ɛ", "l" },
["ł"] = { "ɲ", "ɛ", "w" },
[false] = { "ɲ", "ɛ̃" }
},
["a"] = { "ɲ", "a" },
["e"] = { "ɲ", "ɛ" },
["i"] = { "ɲ", "j", "i" },
["o"] = { "ɲ", "ɔ" },
["ó"] = { "ɲ", "u" },
["u"] = { "ɲ", "u" },
[false] = { "ɲ", "i" }
},
[false] = "n"
},
["ń"] = "ɲ",
["o"] = {
[false] = "ɔ",
},
["ó"] = "u",
["p"] = {
["i"] = {
-- piątek, piasek, etc.
["ą"] = {
["l"] = { "pʲ", "j", "ɔ", "l" },
["ł"] = { "pʲ", "j", "ɔ", "w" },
[false] = { "pʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "pʲ", "j", "ɛ", "l" },
["ł"] = { "pʲ", "j", "ɛ", "w" },
[false] = { "pʲ", "j", "ɛ̃" }
},
["a"] = { "pʲ", "j", "a" },
["e"] = { "pʲ", "j", "ɛ" },
["i"] = { "pʲ", "j", "i" },
["o"] = { "pʲ", "j", "ɔ" },
["ó"] = { "pʲ", "j", "u" },
["u"] = { "pʲ", "j", "u" },
[false] = { "pʲ", "i" }
},
[false] = "p"
},
["r"] = {
["i"] = {
["ą"] = {
["l"] = { "rʲ", "j", "ɔ", "l" },
["ł"] = { "rʲ", "j", "ɔ", "w" },
[false] = { "rʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "rʲ", "j", "ɛ", "l" },
["ł"] = { "rʲ", "j", "ɛ", "w" },
[false] = { "rʲ", "j", "ɛ̃" }
},
["a"] = { "rʲ", "j", "a" },
["e"] = { "rʲ", "j", "ɛ" },
["i"] = { "rʲ", "j", "i" },
["o"] = { "rʲ", "j", "ɔ" },
["ó"] = { "rʲ", "j", "u" },
["u"] = { "rʲ", "j", "u" },
[false] = { "rʲ", "i" }
},
["z"] = "ʐ",
[false] = "r"
},
["q"] = {
["u"] = { "k", "v" },
[false] = false
},
["s"] = {
["i"] = {
["ą"] = {
["l"] = { "ɕ", "ɔ", "l" },
["ł"] = { "ɕ", "ɔ", "w" },
[false] = { "ɕ", "ɔ̃" }
},
["ę"] = {
["l"] = { "ɕ", "ɛ", "l" },
["ł"] = { "ɕ", "ɛ", "w" },
[false] = { "ɕ", "ɛ̃" }
},
["a"] = { "ɕ", "a" },
["e"] = { "ɕ", "ɛ" },
["o"] = { "ɕ", "ɔ" },
["ó"] = { "ɕ", "u" },
["u"] = { "ɕ", "u" },
["y"] = { "ɕ", "ɨ" },
[false] = { "ɕ", "i" }
},
["z"] = "ʂ",
[false] = "s",
},
["ś"] = "ɕ",
["t"] = {
["i"] = {
["ą"] = {
["l"] = { "tʲ", "j", "ɔ", "l" },
["ł"] = { "tʲ", "j", "ɔ", "w" },
[false] = { "tʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "tʲ", "j", "ɛ", "l" },
["ł"] = { "tʲ", "j", "ɛ", "w" },
[false] = { "tʲ", "j", "ɛ̃" }
},
["a"] = { "tʲ", "j", "a" },
["e"] = { "tʲ", "j", "ɛ" },
["i"] = { "tʲ", "i" },
["o"] = { "tʲ", "j", "ɔ" },
["ó"] = { "tʲ", "j", "u" },
["u"] = { "tʲ", "j", "u" },
[false] = { "tʲ", "i" }
},
[false] = "t"
},
["u"] = "u",
["v"] = {
["i"] = {
["ą"] = {
["l"] = { "vʲ", "j", "ɔ", "l" },
["ł"] = { "vʲ", "j", "ɔ", "w" },
[false] = { "vʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "vʲ", "j", "ɛ", "l" },
["ł"] = { "vʲ", "j", "ɛ", "w" },
[false] = { "vʲ", "j", "ɛ̃" }
},
["a"] = { "vʲ", "j", "a" },
["e"] = { "vʲ", "j", "ɛ" },
["i"] = { "vʲ", "j", "i" },
["o"] = { "vʲ", "j", "ɔ" },
["ó"] = { "vʲ", "j", "u" },
["u"] = { "vʲ", "j", "u" },
[false] = { "vʲ", "i" }
},
[false] = "v"
},
["w"] = {
["i"] = {
["ą"] = {
["l"] = { "vʲ", "j", "ɔ", "l" },
["ł"] = { "vʲ", "j", "ɔ", "w" },
[false] = { "vʲ", "j", "ɔ̃" }
},
["ę"] = {
["l"] = { "vʲ", "j", "ɛ", "l" },
["ł"] = { "vʲ", "j", "ɛ", "w" },
[false] = { "vʲ", "j", "ɛ̃" }
},
["a"] = { "vʲ", "j", "a" },
["e"] = { "vʲ", "j", "ɛ" },
["i"] = { "vʲ", "j", "i" },
["o"] = { "vʲ", "j", "ɔ" },
["ó"] = { "vʲ", "j", "u" },
["u"] = { "vʲ", "j", "u" },
[false] = { "vʲ", "i" }
},
[false] = "v"
},
["x"] = { "k", "s" },
["y"] = "ɨ",
["z"] = {
["i"] = {
["ą"] = {
["l"] = { "ʑ", "ɔ", "l" },
["ł"] = { "ʑ", "ɔ", "w" },
[false] = { "ʑ", "ɔ̃" }
},
["ę"] = {
["l"] = { "ʑ", "ɛ", "l" },
["ł"] = { "ʑ", "ɛ", "w" },
[false] = { "ʑ", "ɛ̃" }
},
["a"] = { "ʑ", "a" },
["e"] = { "ʑ", "ɛ" },
["o"] = { "ʑ", "ɔ" },
["ó"] = { "ʑ", "u" },
["u"] = { "ʑ", "u" },
[false] = { "ʑ", "i" }
},
[false] = "z"
},
["ź"] = "ʑ",
["ż"] = "ʐ",
["-"] = {},
}
local valid_phone = {
["a"] = true, ["b"] = true, ["bʲ"] = true, ["d"] = true, ["dʲ"] = true, ["d͡z"] = true, ["d͡ʑ"] = true,
["d͡ʐ"] = true, ["ɛ"] = true, ["ɛ̃"] = true, ["f"] = true, ["fʲ"] = true, ["ɡ"] = true,
["ɡʲ"] = true, ["i"] = true, ["ɨ"] = true, ["j"] = true, ["k"] = true, ["kʲ"] = true,
["l"] = true, ["lʲ"] = true, ["m"] = true, ["mʲ"] = true, ["n"] = true, ["ŋ"] = true,
["ɲ"] = true, ["ɔ"] = true, ["ɔ̃"] = true, ["p"] = true, ["pʲ"] = true, ["r"] = true, ["rʲ"] = true,
["s"] = true, ["ɕ"] = true, ["ʂ"] = true, ["t"] = true, ["tʲ"] = true, ["t͡s"] = true, ["t͡ɕ"] = true, ["t͡ʂ"] = true,
["u"] = true, ["v"] = true, ["vʲ"] = true, ["w"] = true, ["w̃"] = true, ["x"] = true, ["xʲ"] = true, ["z"] = true,
["ʑ"] = true, ["ʐ"] = true, ["ɣ"] = true
}
local sylmarks = {
["."] = ".", ["'"] = "ˈ", ["ˈ"] = "ˈ", [","] = "ˌ"
}
local vowel = {
["a"] = true, ["ɛ"] = true, ["ɛ̃"] = true,
["i"] = true, ["ɨ"] = true, ["ɔ"] = true,
["ɔ̃"] = true, ["u"] = true
}
local devoice = {
["b"] = "p", ["d"] = "t", ["d͡z"] = "t͡s", ["d͡ʑ"] = "t͡ɕ",
["d͡ʐ"] = "t͡ʂ", ["ɡ"] = "k", ["v"] = "f", ["vʲ"] = "fʲ",
["z"] = "s", ["ʑ"] = "ɕ", ["ʐ"] = "ʂ",
-- non-devoicable
["bʲ"] = "bʲ", ["dʲ"] = "dʲ", ["ɡʲ"] = "ɡʲ", ["m"] = "m", ["mʲ"] = "mʲ",
["n"] = "n", ["ɲ"] = "ɲ", ["ŋ"] = "ŋ", ["w"] = "w", ["w̃"] = "w̃",
["l"] = "l", ["lʲ"] = "lʲ", ["j"] = "j", ["r"] = "r", ["rʲ"] = "rʲ", ["tʲ"] = "tʲ",
}
local voice = {
["p"] = "b", ["t"] = "d", ["t͡s"] = "d͡z", ["t͡ɕ"] = "d͡ʑ",
["t͡ʂ"] = "d͡ʐ", ["k"] = "ɡ", ["f"] = "v", ["fʲ"] = "vʲ",
["s"] = "z", ["ɕ"] = "ʑ", ["ʂ"] = "ʐ", ["x"] = "ɣ",
-- non-voicable
["bʲ"] = "bʲ", ["dʲ"] = "dʲ", ["ɡʲ"] = "ɡʲ", ["m"] = "m", ["mʲ"] = "mʲ",
["n"] = "n", ["ɲ"] = "ɲ", ["ŋ"] = "ŋ", ["w"] = "w", ["w̃"] = "w̃",
["l"] = "l", ["lʲ"] = "lʲ", ["j"] = "j", ["r"] = "r", ["rʲ"] = "rʲ", ["tʲ"] = "tʲ",
}
local forward_assimilants = {
["v"] = true, ["vʲ"] = true
}
local denasalized = {
["ɛ̃"] = "ɛ",
["ɔ̃"] = "ɔ",
}
local nasal_map = {
["p"] = "m", ["pʲ"] = "m", ["b"] = "m", ["bʲ"] = "m", -- zębu, klępa
["k"] = "ŋ", ["kʲ"] = "ŋ", ["ɡ"] = "ŋ", ["ɡʲ"] = "ŋ", -- pąk, łęgowy
["t"] = "n", ["d"] = "n", -- wątek, piątek, mądrość
["t͡ɕ"] = "ɲ", ["d͡ʑ"] = "ɲ", ["ɕ"] = "ɲ", ["ʑ"] = "ɲ", -- pięć, pędziwiatr, łabędź
-- gęsi, więzi
["t͡ʂ"] = "n", ["d͡ʐ"] = "n", -- pączek, ?
-- węszyć, mężny
["t͡s"] = "n", ["d͡z"] = "n", -- wiedząc, pieniędzy
}
local SPECIAL_FLAGS = {
IS_RZ = "IS_RZ",
}
local third_last_syllable_stress = {
"łbym", "łabym", "łbyś", "łabyś", "łby", "łaby", "łoby", "liby", "łyby",
}
local fourth_last_syllable_stress = {
"libyśmy", "łybyśmy", "libyście", "łybyście",
}
local pron_abc = {
["A"] = {"a"},
["a"] = {"a"},
["Ą"] = {"ą", "a z ogonkiem"},
["ą"] = {"ą", "a z ogonkiem"},
["B"] = {"be"},
["b"] = {"be"},
["C"] = {"ce"},
["c"] = {"ce"},
["Ć"] = {"cie", "ce z kreską"},
["ć"] = {"cie", "ce z kreską"},
["D"] = {"de"},
["d"] = {"de"},
["E"] = {"e"},
["e"] = {"e"},
["Ę"] = {"ę", "e z ogonkiem"},
["ę"] = {"ę", "e z ogonkiem"},
["F"] = {"ef"},
["f"] = {"ef"},
["G"] = {"gie"},
["g"] = {"gie"},
["H"] = {"ha"},
["h"] = {"ha"},
["I"] = {"i"},
["i"] = {"i"},
["J"] = {"jot"},
["j"] = {"jot"},
["K"] = {"ka"},
["k"] = {"ka"},
["L"] = {"el"},
["l"] = {"el"},
["Ł"] = {"eł", "el z kreską"},
["ł"] = {"eł", "el z kreską"},
["M"] = {"em"},
["m"] = {"em"},
["N"] = {"en"},
["n"] = {"en"},
["Ń"] = {"eń", "en z kreską"},
["ń"] = {"eń", "en z kreską"},
["O"] = {"o"},
["o"] = {"o"},
["Ó"] = {"o z kreską", "o kreskowane", "u zamknięte"},
["ó"] = {"o z kreską", "o kreskowane", "u zamknięte"},
["P"] = {"pe"},
["p"] = {"pe"},
["Q"] = {"ku"},
["q"] = {"ku"},
["R"] = {"er"},
["r"] = {"er"},
["S"] = {"es"},
["s"] = {"es"},
["Ś"] = {"eś", "es z kreską"},
["ś"] = {"eś", "es z kreską"},
["T"] = {"te"},
["t"] = {"te"},
["U"] = {"u", "u otwarte"},
["u"] = {"u", "u otwarte"},
["V"] = {"fał", "we"},
["v"] = {"fał", "we"},
["W"] = {"wu"},
["w"] = {"wu"},
["X"] = {"iks"},
["x"] = {"iks"},
["Y"] = {"y", "igrek"},
["y"] = {"y", "igrek"},
["Z"] = {"zet"},
["z"] = {"zet"},
["Ź"] = {"ziet", "zet z kreską"},
["ź"] = {"ziet", "zet z kreską"},
["Ż"] = {"żet", "zet z kropką"},
["ż"] = {"żet", "zet z kropką"},
}
---
-- Check whether phone doesn't change due to voicing/devoicing
---@param phone string
---@return boolean
local function is_neutral(phone)
return (devoice[phone] and voice[phone]) and (voice[phone] == devoice[phone])
end
---
-- Check whether phone is a special character (syllable mark or word boundary)
---@param phone string
---@return boolean
local function is_special(phone)
return phone == " " or ((sylmarks[phone] ~= nil) and (sylmarks[phone] ~= false))
end
---
-- Check whether phone is voiced
---@param phone string
---@return boolean
local function is_voiced(phone)
return devoice[phone] and phone ~= devoice[phone]
end
---
-- Check whether phone is prone to forward assimilation
---@param phone string
---@param flags table Special flags for this phone
---@return boolean
local function is_forward_assimilant(phone, flags)
return forward_assimilants[phone] or (flags and flags[SPECIAL_FLAGS.IS_RZ])
end
---
-- Check whether phone cluster is a palatalized cluster
---@param cluster string
---@return boolean
local function is_palatalized_cluster(cluster)
return cluster:find("[ɡxkfbmprvdtl]ʲj[aɔ̃ɛɛ̃iɔu]") ~= nil
end
---
-- Process special flags for grapheme and associate them with the recorded phone
---@param grapheme string
---@return table | nil
local function process_special_flags(grapheme)
if grapheme == "rz" then
return { [SPECIAL_FLAGS.IS_RZ] = true }
end
end
---
-- Convert letters and graphemes to phones
---@param word string
---@return table<number, string>, table<number, table<string, boolean>>
local function convert_to_phones(word)
local phones = {}
local flags = {}
local chbuf = ""
local function append_phone(phone)
insert(phones, phone)
-- mark rz for assimilation later
local grapheme_flags = process_special_flags(chbuf)
if grapheme_flags then
flags[#phones] = grapheme_flags
end
chbuf = ""
end
local l2ptab = letters2phones
for ch in strmatchit(strlower(word), ".") do
local value = l2ptab[ch]
if value == nil then
value = l2ptab[false]
if value == false then
return nil
elseif type(value) == "table" then
for _, phone in ipairs(value) do
append_phone(phone)
end
else
append_phone(value)
end
l2ptab = letters2phones
value = l2ptab[ch]
end
chbuf = chbuf .. ch
if type(value) == "table" then
if value[false] == nil then
for _, phone in ipairs(value) do
append_phone(phone)
end
l2ptab = letters2phones
else
l2ptab = value
end
elseif type(value) == "string" then
append_phone(value)
l2ptab = letters2phones
else
append_phone(ch)
end
end
if l2ptab ~= letters2phones then
local value = l2ptab[false]
if type(value) == "table" then
for _, phone in ipairs(value) do
append_phone(phone)
end
else
append_phone(value)
end
end
return phones, flags
end
---
-- Simplify nasals
---@param phones table<number, string>
---@return table<number, string>, table<number, table<string, boolean>>
local function simplify_nasals(phones, flags)
local new_phones, new_flags = {}, {}
for i, phone in ipairs(phones) do
if denasalized[phone] then
local pnext = phones[i + 1]
if sylmarks[pnext] then
pnext = phones[i + 2]
end
if phone == "ɛ̃" and (not pnext or not valid_phone[pnext]) then
-- denasalize word-final ę
insert(new_phones, denasalized[phone])
new_flags[#new_phones] = flags[i]
elseif nasal_map[pnext] then
insert(new_phones, denasalized[phone])
insert(new_phones, nasal_map[pnext])
new_flags[#new_phones] = flags[i]
else
insert(new_phones, phone)
new_flags[#new_phones] = flags[i]
end
else
insert(new_phones, phone)
new_flags[#new_phones] = flags[i]
end
end
return new_phones, new_flags
end
---
-- Devoice consonant phones in terminal positions
---@param phones table<number, string> Target phone table to mutate
local function terminal_devoice(phones)
local final_phone = phones[#phones]
if is_voiced(final_phone) then
phones[#phones] = devoice[final_phone]
end
end
---
-- Process consonant cluster assimilation for single cluster
---@param cluster table<number, string> Consonant cluster
---@param flags table<number, table<string, boolean>> Flags relative to the cluster
---@param new_phones table<number, string> Target phone table to mutate
local function process_consonant_cluster(cluster, flags, new_phones)
local determining_index = #cluster
while cluster[determining_index] do
local candidate = cluster[determining_index]
-- Skip forward assimilants and neutral phones to find the first voiced/devoiced consonant which decides the entire cluster
if not is_forward_assimilant(candidate, flags[determining_index]) and not is_neutral(candidate) and not is_special(candidate) then
break
end
determining_index = determining_index - 1
end
-- If the cluster ends up being comprised of just neutral phones and forward assimilants, add it as-is
if determining_index == 0 then
for _, consonant in ipairs(cluster) do
insert(new_phones, consonant)
end
return
end
-- Transform the entire cluster, forward and back, relative to the determining consonant's voicing
local determining_consonant = cluster[determining_index]
local target_map = is_voiced(determining_consonant) and voice or devoice
for _, consonant in ipairs(cluster) do
local transformed = target_map[consonant] or consonant
insert(new_phones, transformed)
end
end
---
-- Process consonant cluster assimilation for single cluster
---@param phones table<number, string>
---@param flags table<number, table<string, boolean>>
---@return table<number, string>
local function process_consonant_clusters(phones, flags)
local new_phones = {}
local i = 1
while i <= #phones do
local pcurr, pnext = phones[i], ""
if not valid_phone[pcurr] or vowel[pcurr] then
-- Other phone encountered, add it as-is
insert(new_phones, pcurr)
else
-- Consonant cluster to process
local cluster = {}
-- Phone flags indexed relative to the cluster
local cluster_flags = {}
-- Search forward for consonant cluster
local j = i
while j <= #phones do
pnext = phones[j]
-- Break on vowel or invalid symbol and process what we have
if vowel[pnext] or (not valid_phone[pnext] and not is_special(pnext)) then
break
end
insert(cluster, pnext)
-- Set the cluster-relative flag for the latest processed phoneme
cluster_flags[#cluster] = flags[j]
j = j + 1
end
if #cluster > 0 then
if #cluster > 1 then
-- Process actual consonant cluster
process_consonant_cluster(cluster, cluster_flags, new_phones)
-- Skip forward past the processed phones to avoid any unwanted duplication
-- Offset by 1 to compensate, because i is unconditionally incremented by 1 at the very end
i = j - 1
else
-- The cluster is a single consonant, add it as-is
insert(new_phones, cluster[1])
end
end
end
i = i + 1
end
return new_phones
end
---
-- Join several phones together, handling table and nil values
---@vararg string | table Phones to join together
---@return string
local function join_phones(...)
local args = {...}
local str = ""
for _, syllable in ipairs(args) do
if type(syllable) == "table" then
str = str .. concat(syllable, "")
else
str = str .. (syllable or "")
end
end
return str
end
---
-- Group phones into syllables
---@param phones table<number, string>
---@return table<number, string>
local function collect_syllables(phones)
local words, curword, sylmarked, sylbuf, had_vowl = {}, nil, false, nil, nil
for i, pcurr in ipairs(phones) do
local pprev, pnext, pnnext = phones[i - 1], phones[i + 1], phones[i + 2]
if valid_phone[pcurr] then
if not curword then
curword, sylbuf, had_vowl, sylmarked = {}, '', false, false
insert(words, curword)
end
local same_syl = true
if vowel[pcurr] then
if had_vowl then
same_syl = false
end
had_vowl = true
elseif had_vowl then
if vowel[pnext] then
same_syl = false
elseif not vowel[pprev] and not vowel[pnext] then
same_syl = false
elseif vowel[pprev] and is_palatalized_cluster(join_phones(pcurr, pnext, pnnext)) then
same_syl = false
elseif ((pcurr == "s") and ((pnext == "t") or (pnext == "p") or (pnext == "k")))
or (pnext == "r") or (pnext == "f") or (pnext == "w")
or ((pcurr == "ɡ") and (pnext == "ʐ"))
or ((pcurr == "d") and ((pnext == "l") or (pnext == "w") or (pnext == "ɲ")))
or is_palatalized_cluster(join_phones(pprev, pcurr, pnext))
then
-- these should belong to a common syllable
same_syl = true
end
end
if same_syl then
sylbuf = sylbuf .. pcurr
else
insert(curword, sylbuf)
sylbuf, had_vowl = pcurr, vowel[pcurr]
end
elseif (curword or valid_phone[pnext]) and sylmarks[pcurr] then
if not curword then
curword, sylbuf, had_vowl = {}, '', false
insert(words, curword)
end
sylmarked = true
if sylbuf then
insert(curword, sylbuf)
sylbuf = ''
end
insert(curword, sylmarks[pcurr])
else
if sylbuf then
assert(type(curword) == "table") -- para dejar al interprete tranquilo
if #curword > 0 and not had_vowl then
curword[#curword] = curword[#curword] .. sylbuf
else
insert(curword, sylbuf)
end
if sylmarked then
words[#words] = concat(curword)
end
end
curword, sylbuf = nil, nil
insert(words, pcurr)
end
end
if sylbuf then
assert(type(curword) == "table") -- para dejar al interprete tranquilo
if #curword > 0 and not had_vowl then
curword[#curword] = curword[#curword] .. sylbuf
else
insert(curword, sylbuf)
end
if sylmarked then
words[#words] = concat(curword)
end
end
return words
end
local function get_stressed_syllable(word)
local stressed_syllable = 1
for i,v in ipairs(third_last_syllable_stress) do
if word:sub(-string.len(v)) == v
then
stressed_syllable = 2
end
end
for i,v in ipairs(fourth_last_syllable_stress) do
if word:sub(-string.len(v)) == v
then
stressed_syllable = 3
end
end
return stressed_syllable
end
local function is_more_than_one_word(word)
if string.find(word, " ") then
return true
else
return false
end
end
local function normalizar(texto)
texto = strlower(texto)
texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
texto = strsubrep(texto, "%s+", " ")
texto = strstrip(texto, "[%s|]+")
return texto
end
local function procesar_fragmento(w)
local stressed_syllable = get_stressed_syllable(w)
local more_than_one_word = is_more_than_one_word(w)
local phones, flags = convert_to_phones(w)
phones, flags = simplify_nasals(phones, flags)
terminal_devoice(phones)
phones = process_consonant_clusters(phones, flags)
local words = collect_syllables(phones)
-- mark syllable breaks and stress
for i, word in ipairs(words) do
if type(word) == "table" then
-- unless already marked
if not ((word[2] == ".") or (word[2] == "ˈ") or (word[2] == "ˌ")) then
for j, syl in ipairs(word) do
if not more_than_one_word then
if #word < stressed_syllable+1 then
stressed_syllable = #word-1
end
end
if #word > 1 then
if j == (#word - stressed_syllable) then
word[j] = "ˈ" .. syl
elseif j ~= 1 then
word[j] = "." .. syl
end
end
end
end
words[i] = concat(word)
end
end
for i, _ in ipairs(words) do
-- get rid of /ʲ/
words[i] = strsubn(words[i], "ʲ([ij])", "%1")
words[i] = strsubn(words[i], "ʲ", "j")
-- replace /ɔ̃/ and /ɛ̃/ with /ɔw̃/ and /ɛw̃/
words[i] = strsubn(words[i], "ɛ̃", "ɛw̃")
words[i] = strsubn(words[i], "ɔ̃", "ɔw̃")
---- replace /n/ with /w̃/ before /s, z, ʂ, ʐ, ɕ, ʑ/ (currently turned off)
-- words[i] = strsubn(words[i], "n([szʂʐɕʑ])", "w̃%1")
-- words[i] = strsubn(words[i], "n([ˈˌ.])([szʂʐɕʑ])", "w̃%1%2")
end
return concat(words)
end
local function generar_pron(t)
t = normalizar(t)
local convertido = {}
local fragmentos = strsplit(t, "%s*|%s*")
for _,fragmento in ipairs(fragmentos) do
insert(convertido, procesar_fragmento(fragmento))
end
return {{strhtml(concat(convertido, " | "))}}
end
local vowels = "aeiouyąęó"
local vowel = "[" .. vowels .. "]"
local consonants = "bcćdfghjklłmnńpqrsśtuvwxyzźż"
local consonant = "[" .. consonants .. "]"
-- vowel digraphs, not necessarily actual phonetic diphthongs
local diphthong_i_v2 = "[aąoeęuói]"
local diphthongs = {
["a"] = "u",
["e"] = "u",
["i"] = diphthong_i_v2
}
-- consonant digraphs (key = first letter, value = possible second letters)
local digraphs = {
["c"] = "[hz]",
["d"] = "[zźż]",
["q"] = "u",
["r"] = "z",
["s"] = "z",
}
local past_tense_suffixes = {
"liśmy", "liście", "łyśmy", "łyście",
}
local latin_borrowing_suffixes = {
"ika", "yka",
"iki", "yki",
"ika", "yka",
"ice", "yce",
"ikom", "ykom",
"ikę", "ykę",
"iką", "yką",
"ice", "yce",
"ikach", "ykach",
"iko", "yko",
}
-- if this is changed, the next two functions also need to be
local function is_respelling_close_enough(respelling, word)
word = strsubn(word, "j(" .. diphthong_i_v2 .. ")", "i%1")
respelling = strsubn(respelling, "['.]", "")
respelling = strsubn(respelling, "j(" .. diphthong_i_v2 .. ")", "i%1")
return word == respelling
end
local function partition(word, oword)
local parts = {}
local lenword = strlen(word)
local pos = 1
local offset = 0
word = strsubn(word, "['-]", ".")
while pos <= lenword do
if strfind(strlower(word), "^" .. vowel, pos) then
local initial = substr(strlower(word), pos, pos)
local seq = 1
if diphthongs[initial] and strfind(strlower(word), "^" .. initial .. diphthongs[initial], pos) then
seq = 2
end
insert(parts, { "v", substr(oword, pos - offset, pos - offset + seq - 1) })
pos = pos + seq
elseif strfind(strlower(word), "^" .. consonant, pos) then
local initial = substr(strlower(word), pos, pos)
local seq = 1
if digraphs[initial] and strfind(strlower(word), "^" .. initial .. digraphs[initial], pos) then
seq = 2
end
insert(parts, { "c", substr(oword, pos - offset, pos - offset + seq - 1) })
pos = pos + seq
elseif strfind(word, "^% ", pos) then
-- multiword, do not hyphenate
return nil
elseif strfind(word, "^%.", pos) then
-- syllable break
if not strfind(oword, "^['-]", pos - offset) then
offset = offset + 1
end
insert(parts, { "b", nil })
pos = pos + 1
else
-- unrecognized symbol
return nil
end
end
return parts
end
local function get_word_suffix(word)
word = word:gsub("([ˈ'.,ˌ])", "")
local word_suffix = 0
for i,v in ipairs(past_tense_suffixes) do
if word:sub(-string.len(v)) == v
then
word_suffix = 1
end
end
for i,v in ipairs(latin_borrowing_suffixes) do
if word:sub(-string.len(v)) == v
then
word_suffix = 2
end
end
return word_suffix
end
local function separar_en_silabas(word, otitle) --ayuda, otitle = titulo original
local syllables = {}
local cursyl = ""
local nucleus = false
local coda = nil
local parts = partition(word, otitle)
if not parts then return nil end
for pos, p in ipairs(parts) do
local kind, part = unpack(p)
if kind == "v" then
if coda then
cursyl = cursyl .. substr(syllables[#syllables], -coda)
syllables[#syllables] = substr(syllables[#syllables], 1, -coda - 1)
coda = nil
end
if nucleus then
insert(syllables, cursyl)
cursyl = ""
end
nucleus = true
coda = nil
cursyl = cursyl .. part
elseif kind == "c" then
cursyl = cursyl .. part
if nucleus then
insert(syllables, cursyl)
cursyl = ""
nucleus = false
coda = strlen(part)
else
coda = nil
end
elseif kind == "b" then
-- implicit syllable break
if #cursyl > 0 then
if nucleus or #syllables < 1 then
insert(syllables, cursyl)
else
syllables[#syllables] = syllables[#syllables] .. cursyl
end
end
cursyl = ""
nucleus = false
coda = nil
else
-- unrecognized kind
return nil
end
end
if #cursyl > 0 then
if nucleus or #syllables < 1 then
insert(syllables, cursyl)
else
syllables[#syllables] = syllables[#syllables] .. cursyl
end
end
return syllables
end
local ipavowel = "[aɛiɨɔu]"
local function generar_rima(ipa)
local vowels_at = { }
local pos = 1
while true do
local posnext = strfind(ipa, ipavowel, pos)
if not posnext then break end
insert(vowels_at, posnext)
pos = posnext + 1
end
local vend
if #vowels_at < 1 then return nil end
if #vowels_at > 1 then
vend = vowels_at[#vowels_at - 1]
else
vend = vowels_at[#vowels_at]
end
local snippet = substr(ipa, vend)
snippet = strsubn(snippet, "[ˈˌ.]", "")
if strfind(snippet, " ") then
return nil -- copout, something must be wrong
end
return snippet
end
-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
local tit = titulo
local vino_ayuda, x
if #args["ayuda"] < 1 then
args["ayuda"][1] = tit
else
vino_ayuda = true
end
if #args["fone"] < 1 and #args["fono"] < 1 then
x = pron_abc[args["ayuda"][1]]
if x then
args["ayuda"] = x
args["tl"] = x
end
local A = #args["ayuda"]
local j = 1 -- indice de la ayuda
local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
while k <= 9 and j <= A do
local fono = generar_pron(args["ayuda"][j])
for i,_ in ipairs(fono) do
table.insert(args["fono"], fono[i])
k = k + 1
if k > 9 then
break
end
end
j = j + 1
end
end
if not args["rima"] then
if args["fono"][1] and args["fono"][1][1] then
args["rima"] = generar_rima(args["fono"][1][1])
end
end
local tiene_espacios = strfind(tit, " ")
if not tiene_espacios and not x then
local ss__
if vino_ayuda then
if is_respelling_close_enough(args["ayuda"][1], tit) then
ss__ = separar_en_silabas(args["ayuda"][1], tit)
end
else
ss__ = separar_en_silabas(args["ayuda"][1], args["ayuda"][1])
end
if not ss__ then
return args
end
if not args["ls"] then
args["ls"] = #ss__
end
if not args["nl"] then
args["nl"] = strlen(tit)
end
if not args["d"][1] then
args["d"][1] = concat(ss__, "-")
end
end
return args
end
return export