Módulo:generar-pron/grc
La documentación para este módulo puede ser creada en Módulo:generar-pron/grc/doc
--Things we still need:
--Voicing of sigma around (after?) voiced stops.
--Proper alerts for editors, especially on ambiguous vowels.
local export = {}
local unpack = unpack or table.unpack
local m_str = require("Módulo:String")
local strsplit = m_str.split
local strfind = m_str.find
local substr = m_str.sub
local strmatch = m_str.match
local strsubn = m_str.gsub
local strlen = m_str.len
local strlower = m_str.lower
local strnfd = m_str.toNFD -- strnfd
local strnfc = m_str.toNFC
local U = m_str.char
-- sustitución descartando todo salvo el string retornado
local function strsub(text, pattern, repl, n)
local t, _ = strsubn(text, pattern, repl, n)
return t
end
local m_table = require("Módulo:tabla")
local copy = m_table.shallowcopy
local sparseConcat = m_table.sparseConcat
local m_data = mw.loadData("Módulo:generar-pron/grc/datos")
local diacritics = m_data.diacritics
local diacritic = m_data.diacritic
local conversions = m_data.conversions
local groups = m_data.groups
local diacritic_groups = m_data.diacritic_groups
local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
-- ################### HERRAMIENTAS (grc-utilities) ##########################
local info = {}
-- The tables are shared among different characters so that they can be checked
-- for equality if needed, and to use less space.
local vowel_t = { vowel = true }
local iota_t = { vowel = true, offglide = true }
local upsilon_t = { vowel = true, offglide = true }
-- These don't need any contents.
local rho_t = {}
-- local consonant_t = {}
local diacritic_t = { diacritic = true }
-- Needed for equality comparisons.
local breathing_t = { diacritic = true }
local UTF8_char = "[\1-\127\194-\244][\128-\191]*"
local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ
local function add_info(characters, t)
if type(characters) == "string" then
for character in string.gmatch(characters, UTF8_char) do
info[character] = t
end
else
for i, character in ipairs(characters) do
info[character] = t
end
end
end
add_info({ diacritics.macron, diacritics.breve,
diacritics.diaeresis,
diacritics.acute, diacritics.grave, diacritics.circum,
diacritics.subscript,
}, diacritic_t)
add_info({diacritics.rough, diacritics.smooth}, breathing_t)
add_info("ΑΕΗΟΩαεηοω", vowel_t)
add_info("Ιι", iota_t)
add_info("Υυ", upsilon_t)
-- add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant_t)
add_info("Ρρ", rho_t)
local not_recognized = {}
setmetatable(info, { __index =
function(t, key)
return not_recognized
end
})
-- El alfabeto
local pron_abc = {
["Α"] = "άλφα",
["α"] = "άλφα",
["Β"] = "βήτα",
["β"] = "βήτα",
["Γ"] = "γάμμα",
["γ"] = "γάμμα",
["Δ"] = "δέλτα",
["δ"] = "δέλτα",
["Ε"] = "έψιλον",
["ε"] = "έψιλον",
["Ζ"] = "ζήτα",
["ζ"] = "ζήτα",
["Η"] = "ήτα",
["η"] = "ήτα",
["Θ"] = "θήτα",
["θ"] = "θήτα",
["Ι"] = "ιώτα",
["ι"] = "ιώτα",
["Κ"] = "κάππα",
["κ"] = "κάππα",
["Λ"] = "λάμβδα",
["λ"] = "λάμβδα",
["Μ"] = "μυ",
["μ"] = "μυ",
["Ν"] = "νυ",
["ν"] = "νυ",
["Ξ"] = "ξι",
["ξ"] = "ξι",
["Ο"] = "όμικρον",
["ο"] = "όμικρον",
["Π"] = "πι",
["π"] = "πι",
["ϖ"] = "πι",
["Ρ"] = "ρο",
["ρ"] = "ρο",
["Σ"] = "σίγμα",
["σ"] = "σίγμα",
["ς"] = "σίγμα",
["Τ"] = "ταυ",
["τ"] = "ταυ",
["Υ"] = "ύψιλον",
["υ"] = "ύψιλον",
["Φ"] = "φι",
["φ"] = "φι",
["Χ"] = "χι",
["χ"] = "χι",
["Ψ"] = "ψι",
["ψ"] = "ψι",
["Ω"] = "ωμέγα",
["ω"] = "ωμέγα",
}
--[=[
local checkType = require "libraryUtil".checkType
local function _check(funcName)
return function(argIndex, arg, expectType, nilOk)
return checkType(funcName, argIndex, arg, expectType, nilOk)
end
end
]=]--
-- Perform a function on each Unicode character in a string.
local function forEach(str, func)
for char in string.gmatch(str, UTF8_char) do
func(char)
end
end
-- This concatenates or inserts a character, then removes it from the text.
local function add(list, index, chars, text)
if not chars then
error("The function add cannot act on a nil character.")
end
if list[index] then
list[index] = list[index] .. chars
else
list[index] = chars
end
-- Basic string function works here.
return text:sub(#chars + 1)
end
-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.
local function standardDiacritics(text)
text = strnfd(text)
text = text:gsub(UTF8_char, conversions)
return text
end
--[=[ This function arranges diacritics in the following order:
1. macron or breve
2. breathings or diaeresis
3. acute, circumflex, or grave
4. iota subscript
Used by [[Module:typing-aids]].
Returns an error if a sequence of diacritics contains more than one
of each category.
]=]
local function reorderDiacriticSequence(diacritics_)
local output = {}
forEach(diacritics_,
function (diacritic_)
local index = m_data.diacritic_order[diacritic_]
if not output[index] then
output[index] = diacritic_
else
-- Place breve after macron.
if diacritic_ == diacritics.breve then
index = index + 1
end
-- The following might have odd results when there
-- are three or more diacritics.
table.insert(output, index, diacritic_)
-- [[Special:WhatLinksHere/Template:tracking/grc-utils/too many diacritics]]
-- require("Module:debug").track("grc-utils/too many diacritics")
require("Módulo:traza")("grcdiacriticos")
--[[
local m_templates = require("Module:grc-utilities/templates")
error("There are two diacritics, " ..
m_templates.addDottedCircle(output[index]) .. " and " ..
m_templates.addDottedCircle(diacritic) ..
" that belong in the same position. There should be only one."
)
--]]
end
end)
return sparseConcat(output)
end
local function reorderDiacritics(text)
return (strsub(strnfd(text),
m_data.combining_diacritic .. m_data.combining_diacritic .. "+",
reorderDiacriticSequence))
end
--[=[
This breaks a word into meaningful "tokens", which are
individual letters or diphthongs with their diacritics.
Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].
--]=]
local function make_tokens(text)
local tokens, prev_info = {}, {}
local token_i, vowel_count = 1, 0 -- Vowel count tracks .
local prev
for character in string.gmatch(strnfd(text), UTF8_char) do
local curr_info = info[character]
-- Split vowels between tokens if not a diphthong.
if curr_info.vowel then
vowel_count = vowel_count + 1
if prev and (not (vowel_count == 2 and curr_info.offglide and prev_info.vowel)
-- υυ → υ, υ
-- ιυ → ι, υ
or prev_info.offglide and curr_info == upsilon_t or curr_info == prev_info) then
token_i = token_i + 1
if prev_info.vowel then
vowel_count = 1
end
elseif vowel_count == 2 then
vowel_count = 0
end
tokens[token_i] = (tokens[token_i] or "") .. character
elseif curr_info.diacritic then
vowel_count = 0
tokens[token_i] = (tokens[token_i] or "") .. character
if prev_info.diacritic or prev_info.vowel then
if character == diacritics.diaeresis then
-- Split the diphthong in the current token if a diaeresis was found:
-- the first letter, then the second letter plus any diacritics.
local previous_vowel, vowel_with_diaeresis =
string.match(tokens[token_i],
"^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")
if previous_vowel then
tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis
token_i = token_i + 1
else
-- The vowel preceding the vowel with the diaeresis will already be
-- placed in the previous token if it has a diacritic:
-- Περικλῆῐ̈ → Π ε ρ ι κ λ ῆ ῐ̈
--[[
mw.log('Diaeresis was found in ' .. text .. ', but the previous token ' ..
require("Module:Unicode data").add_dotted_circle(tokens[token_i]) ..
' couldn’t be split because it does not consist of two Basic Greek characters followed by other characters.')
--]]
end
end
elseif prev_info == rho_t then
if curr_info ~= breathing_t then
error(string.format("The character %s in %s should not have the accent %s on it.", prev, text, character))
end
else
mw.log("The character " .. prev .. " cannot have a diacritic on it.")
end
else
vowel_count = 0
if prev then
token_i = token_i + 1
end
tokens[token_i] = (tokens[token_i] or "") .. character
end
prev = character
prev_info = curr_info
end
return tokens
end
local cache = {}
local function tokenize(text)
local decomposed = strnfd(text)
if not cache[decomposed] then
cache[decomposed] = make_tokens(text)
end
return cache[decomposed]
end
--[=[ Places diacritics in the following order:
1. breathings or diaeresis
2. acute, circumflex, or grave
3. macron or breve
4. iota subscript
Used by [[Module:grc-pronunciation]]. ]=]
local function pronunciationOrder(text)
text = standardDiacritics(text)
if strfind(text, groups[1]) then
text = strsub(text,
diacritic .. diacritic .. "+",
function(sequence)
-- Put breathing and diaeresis first, then accents, then macron or breve
return table.concat{
strmatch(sequence, groups[2]) or "",
strmatch(sequence, groups[3]) or "",
strmatch(sequence, groups[1]) or "",
strmatch(sequence, groups[4]) or ""
}
end)
text = strsub(text, diacritics.macron, diacritics.spacing_macron) -- combining to spacing macron
text = strsub(text, diacritics.breve, diacritics.spacing_breve) -- combining to spacing breve
end
return strnfc(text)
end
-- Returns a table of any ambiguous vowels in the text, language-tagged.
local function findAmbig(text, noTag)
if (not text) or type(text) ~= "string" then
error("The input to function findAmbig is nonexistent or not a string")
end
local lengthDiacritic = "[" .. diacritics.macron .. diacritics.breve .. diacritics.circumflex .. diacritics.subscript .. "]"
local aiu_diacritic = "^([" .. "αιυ" .. "])(" .. diacritic .. "*)$"
-- breaks the word into units
local output, vowels = {}, {}
for _, token in ipairs(tokenize(text)) do
if not strfind(token, m_data.consonant) then
local vowel, diacritics_ = strmatch(
token,
aiu_diacritic
)
if vowel and (diacritics_ == "" or not strfind(diacritics_, lengthDiacritic)) then
local diacriticked_vowel = vowel
--[=[
if not noTag then
diacriticked_vowel = tag(vowel .. diacritics_)
else
diacriticked_vowel = vowel
end
]=]--
table.insert(output, diacriticked_vowel)
-- Lists the vowel letters that are ambiguous, for categorization purposes.
vowels[strlower(vowel)] = true
end
end
end
return output, vowels
end
-- ################### Fin herramientas ###################
-- ################### DIACRITICOS ###################
local either_vowel = "[ΑαΙιΥυ]"
local tonal_diacritic = diacritic_groups[3]
local long_diacritics = diacritics.macron .. diacritics.subscript .. diacritics.circum
local function if_not_empty(var)
if var == "" then
return nil
else
return var
end
end
local function contains_vowel(token)
return strfind(token, '[ΑΕΗΙΟΥΩαεηιουω]')
end
local function is_diphthong(token)
if strfind(token, "[ΑαΕεΗηΙιΟοΥυΩω][ΙιΥυ]") then
return true
else
return false
end
end
--[=[
local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType
local checkTypeMulti = libraryUtil.checkTypeMulti
local function _check(funcName, expectType)
if type(expectType) == "string" then
return function(argIndex, arg, nilOk)
checkType(funcName, argIndex, arg, expectType, nilOk)
end
else
return function(argIndex, arg, expectType, nilOk)
if type(expectType) == "table" then
checkTypeMulti(funcName, argIndex, arg, expectType, nilOk)
else
checkType(funcName, argIndex, arg, expectType, nilOk)
end
end
end
end
--]=]
--[[
A vowel with a breve or a lone epsilon or omicron is considered short.
Everything else is considered long, including unmarked alphas, iotas, and
upsilons. Sigh.
]]
local function is_short(token)
if strfind(token, diacritics.breve) or strfind(token, '[ΕΟεο]') and not strfind(token, '[ιυ]') then
return true
else
return false
end
end
local function conditional_gsub(...)
local str, count = strsubn(...)
if count and count > 0 then
return str
else
return nil
end
end
local accent_adding_functions = {
-- This will not throw an error if η or ω has a macron on it.
[diacritics.circum] = function(vowel)
return (strsub(
vowel,
"([ΑαΗηΙιΥυΩω])" .. diacritics.macron .. "?(" .. diacritic_groups[2] .. "?)(" .. diacritics.subscript .. "?)$",
"%1%2" .. diacritics.circum .. "%3"
))
end,
[diacritics.acute] = function(vowel)
return (
conditional_gsub(vowel,
"([Εε])([Ωω])",
"%1" .. diacritics.acute .. "%2") or
strsub(vowel,
"([ΑαΕεΗηΙιΟοΥυΩω]" .. diacritic_groups[1] .. "?" .. diacritic_groups[2] .. "?)(" .. diacritics.subscript .. "?)$",
"%1" .. diacritics.acute .. "%2"))
end,
[diacritics.macron] = function(vowel)
if strfind(vowel, "[" .. long_diacritics .. "]") or is_diphthong(vowel) then
return vowel
elseif strfind(vowel, "[ΕΟεο]") then
error("The vowel " .. vowel ..
" is short, so a macron cannot be added to it.")
else
return strsub(vowel, "(" .. either_vowel .. ")", "%1" .. diacritics.macron)
end
end,
[diacritics.breve] = function(vowel)
if strfind(vowel, "[" .. long_diacritics .. "]") then
error("The vowel " .. vowel ..
" has a iota subscript, a macron, or a circumflex, so a breve cannot be added to it.")
elseif is_diphthong(vowel) then
error("The vowel " .. vowel ..
" is a diphthong, so a breve cannot be added to it.")
else
return strsub(vowel, "(" .. either_vowel .. ")", "%1" .. diacritics.breve)
end
end,
-- This will insert a diaeresis on a single iota or upsilon, or on a
-- iota or upsilon that is the second element of a diphthong.
-- It does nothing if the vowel has a breathing on it.
[diacritics.diaeresis] = function(vowel)
return (strsub(
vowel,
"([ΙιΥυ]" .. diacritic_groups[1] .. "?)(" .. tonal_diacritic .. "?)$",
"%1" .. diacritics.diaeresis .. "%2"
))
end
}
-- Assumes strnfdd vowels (NFD).
local function add(vowel, accent)
if type(accent_adding_functions[accent]) == "function" then
return accent_adding_functions[accent](vowel)
else
local name = m_table.keyFor(diacritics, accent)
if name == "circum" then
name = "circumflex"
end
error("No function for adding a " .. name .. ".")
end
end
local function strip_accent(word)
word = strnfd(word)
-- Parentheses suppress second return value of strsub, the number of substitutions.
return (strsub(word, m_data.all, ''))
end
local function strip_tone(word)
word = strnfd(word)
if strfind(word, diacritics.circum) then
word = copy(tokenize(word))
for i = 1, #word do
-- Add a macron to every vowel with a circumflex and remove the circumflex.
word[i] = strsub(word[i],
'^([αΑιΙυΥ])([' .. diacritics.smooth .. diacritics.rough .. diacritics.diaeresis .. ']*)' .. diacritics.circum .. '$',
'%1' .. diacritics.macron .. '%2')
end
word = table.concat(word)
end
return (strsub(word, tonal_diacritic, ''))
end
local function ult(word)
word = strnfd(word)
if strfind(word, tonal_diacritic) then return word end
word = copy(tokenize(word))
for i, token in m_table.reverseIpairs(word) do
if contains_vowel(token) then
--fortunately accents go last in combining order
word[i] = add(token, diacritics.acute)
break
end
end
return table.concat(word, '')
end
--[[ WARNING: Given an unmarked α ι υ, this function will return a circmflex.
That said, if you ran into this situation in the first place, you probably
are doing something wrong. ]] --
local function circ(word)
word = strnfd(word)
if strfind(word, tonal_diacritic) then return word end
word = copy(tokenize(word))
for i, token in m_table.reverseIpairs(word) do
if contains_vowel(token) then
if is_short(token) then
word[i] = add(token, diacritics.acute)
else
word[i] = add(token, diacritics.circum)
end
break
end
end
return table.concat(word, '')
end
local function penult(orig)
local word = strnfd(orig)
if strfind(word, tonal_diacritic) then return word end
word = copy(tokenize(word))
local syllables = 0
for i, token in m_table.reverseIpairs(word) do
if token == '-' then
return orig
elseif contains_vowel(token) then
syllables = syllables + 1
if syllables == 2 then
word[i] = add(token, diacritics.acute)
return table.concat(word, '')
end
end
end
return circ(orig)
end
local function pencirc(orig)
local word = strnfd(orig)
if strfind(word, tonal_diacritic) then return word end
word = copy(tokenize(word))
local syllables = 0
local long_ult = false
for i, token in m_table.reverseIpairs(word) do
if token == '-' then return orig end
if contains_vowel(token) then
syllables = syllables + 1
if syllables == 1 and not is_short(token) then
long_ult = true
if word[#word] == 'αι' or word[#word] == 'οι' then long_ult = false end
elseif syllables == 2 then
if is_short(token) or long_ult then
word[i] = add(token, diacritics.acute)
else
word[i] = add(token, diacritics.circum)
end
return table.concat(word, '')
end
end
end
return circ(orig)
end
local function antepenult(orig)
local word = strnfd(orig)
if strfind(word, tonal_diacritic) then return word end
word = copy(tokenize(word))
local syllables = 0
local long_ult = false
for i, token in m_table.reverseIpairs(word) do
if token == '-' then return orig end
if contains_vowel(token) then
syllables = syllables + 1
if syllables == 1 and not is_short(token) then
long_ult = true
if word[#word] == 'αι' or word[#word] == 'οι' then long_ult = false end
elseif syllables == 2 and long_ult then
word[i] = add(token, diacritics.acute)
return table.concat(word, '')
elseif syllables == 3 then
word[i] = add(token, diacritics.acute)
return table.concat(word, '')
end
end
end
return pencirc(orig)
end
--[[
Counts from the beginning or end of the word, and returns the position and
type of the first accent found. Position means the number of vowels
(syllables) that have been encountered, not the number of characters.
Arguments:
- word: string (Ancient Greek word)
- from_end: boolean (whether to count from the end of the word)
]]
local accent_cache = { [true] = {}, [false] = {} }
local function detect_accent(word, from_end)
-- local check = _check("detect_accent")
-- check(1, word, "string")
-- check(2, from_end, "boolean", true)
assert(type(word) == "string")
assert(type(from_end) == "boolean" or type(from_end) == "nil")
local cache_ = accent_cache[from_end == true][strnfd(word)]
if cache_ then
return unpack(cache_)
end
local names = {
[diacritics.acute] = "acute",
[diacritics.grave] = "grave",
[diacritics.circum] = "circumflex",
}
local syllable = 0
local accent_name
for _, token in
(from_end and m_table.reverseIpairs or ipairs)(tokenize(word))
do
if contains_vowel(token) then
syllable = syllable + 1
accent_name = names[strmatch(token, tonal_diacritic)]
if accent_name then
accent_cache[from_end == true][strnfd(word)] = { syllable, accent_name }
return syllable, accent_name
end
end
end
return nil
end
--[[
Returns classification based on first accent found
when traveling back from the end of the word.
]]
local function get_accent_term(word)
local syllable, accent_name = detect_accent(word, true)
local terms = {
["grave"] = { "barytone" },
["acute"] = { "oxytone", "paroxytone", "proparoxytone" },
["circumflex"] = { "perispomenon", "properispomenon" },
}
local ordinals = { "first", "second", "third", "fourth", "fifth", }
local term
if syllable and accent_name then
term = terms[accent_name][syllable]
end
if term then
return term
else
return nil,
syllable and 'There is no term for a word with a ' .. accent_name ..
' accent on the ' .. ordinals[syllable] ..
' syllable from the end of the word.'
or 'No accent found.'
end
end
-- is_noun is a boolean or nil; if it is true, αι and οι will be
-- treated as short.
local function get_length(token, short_diphthong)
token = strlower(token)
-- not needed at the moment
-- token = strnfd(token)
if not contains_vowel(token) then
return nil
-- error("The thing supplied to get_length does not have any vowels")
end
-- η, ω; ᾳ, ῃ, ῳ; ᾱ, ῑ, ῡ; diphthongs
if strfind(token, "[ηω" .. long_diacritics .. "]") then
return "long"
end
if short_diphthong and strfind(token, "^[αο]ι") then
return "short"
end
if is_diphthong(token) then
return "long"
end
-- ε, ο; ᾰ, ῐ, ῠ
if strfind(token, "[εο" .. diacritics.breve .. "]") then
return "short"
end
-- anything else
return "either"
end
-- Takes a table of tokens and returns a table containing tables of each vowel's
-- characteristics.
local function get_vowel_info(tokens, short_diphthong)
if type(tokens) ~= "table" then
error("The argument to get_vowel_info must be a table.")
end
local vowels = {}
local vowel_i = 1
if strfind(tokens[#tokens], m_data.consonant .. "$") then
short_diphthong = false
end
for i, token in m_table.reverseIpairs(tokens) do
if contains_vowel(token) then
if vowel_i ~= 1 then
short_diphthong = false
end
local length, accent =
get_length(token, short_diphthong),
if_not_empty(strmatch(token,
"[" .. diacritics.acute .. diacritics.grave .. diacritics.circum .. "]"))
vowels[vowel_i] = {
index = i,
length = length,
accent = accent,
}
vowel_i = vowel_i + 1
end
end
return vowels
end
local function mark_implied_length(word, return_tokens, short_diphthong)
word = strnfd(word)
-- Do nothing if there are no vowel letters that could be ambiguous.
if not strfind(word, either_vowel) then
if return_tokens then
return tokenize(word)
else
return word
end
end
local tokens = copy(tokenize(word))
local vowels = get_vowel_info(tokens, short_diphthong)
if #vowels >= 2 then
local ultima = vowels[1]
local ultima_i = ultima.index
local penult_ = vowels[2]
local penult_i = penult_.index
if penult_.length == "either" and ultima.length == "short" then
if penult_.accent == diacritics.circum then
tokens[penult_i] = add(tokens[penult_i], diacritics.macron)
elseif penult_.accent == diacritics.acute then
tokens[penult_i] = add(tokens[penult_i], diacritics.breve)
end
elseif penult_.length == "long" and ultima.length == "either" then
if penult_.accent == diacritics.circum then
tokens[ultima_i] = add(tokens[ultima_i], diacritics.breve)
elseif penult_.accent == diacritics.acute then
tokens[ultima_i] = add(tokens[ultima_i], diacritics.macron)
end
end
local antepenult_ = vowels[3]
if antepenult_ and antepenult_.accent and ultima.length == "either" then
tokens[ultima_i] = add(tokens[ultima_i], diacritics.breve)
end
end
if return_tokens then
return tokens
else
return table.concat(tokens)
end
end
-- Returns the length of a syllable specified by its position from the end of the word.
local function length_at(word, syllable)
local tokens = tokenize(word)
if type(word) ~= "string" then
error("First argument of length_at should be a string.")
end
if type(syllable) ~= "number" then
error("Second argument of length_at should be a number.")
end
local syllable_count = 0
for _, token in m_table.reverseIpairs(tokens) do
local length = get_length(token)
if length then
syllable_count = syllable_count + 1
if syllable_count == syllable then
return length
end
end
end
if syllable_count < syllable then
error("Length for syllable " .. syllable .. " from the end of the word was not found.")
end
end
local function find_breathing(token)
return strmatch(token, "([" .. diacritics.rough .. diacritics.smooth .. "])")
end
local function has_same_breathing_as(token1, token2)
return find_breathing(token1) == find_breathing(token2)
end
-- Make token have the length specified by the string "length".
local function change_length(length, token)
local diacritic_
if length == "long" then
diacritic_ = diacritics.macron
elseif length == "short" then
diacritic_ = diacritics.breve
end
if diacritic_ then
return add(token, diacritic_)
else
return token
end
end
--[[
Take two words, mark implied length on each, then harmonize any macrons and
breves that disagree.
]]
local function harmonize_length(word1, word2)
word1 = strnfd(word1)
-- Do nothing if there are no vowel letters that could be ambiguous.
if not (strfind(word1, either_vowel) or strfind(word2, either_vowel)) then
return word1, word2
end
local tokens1, tokens2 = mark_implied_length(word1, true), mark_implied_length(word2, true)
local strip1, strip2 = copy(tokenize(strip_accent(word1))), copy(tokenize(strip_accent(word2)))
for i, token1 in pairs(tokens1) do
local token2 = tokens2[i]
if strip1[i] == strip2[i] then
if has_same_breathing_as(token1, token2) then
local length1, length2 = get_length(token1), get_length(token2)
if length1 and length2 and length1 ~= length2 then
if length1 == "either" then
tokens1[i] = change_length(length2, token1)
elseif length2 == "either" then
tokens2[i] = change_length(length1, token2)
end
end
else
break
end
else
break
end
end
local new_word1, new_word2 = table.concat(tokens1), table.concat(tokens2)
return new_word1, new_word2
end
--[[
Get weight of nth syllable from end of word. Position defaults to 1, the last
syllable. Returns "heavy" or "light", or nil if syllable is open with an
ambiguous vowel.
]]
local function get_weight(word, position)
if not if_not_empty(word) then
return nil
end
local tokens = tokenize(word)
if not position then
position = 1
end
local vowel
local vowel_index = 0
-- Find nth vowel from end of word.
for i, token in m_table.reverseIpairs(tokens) do
local length = get_length(token)
if length then
vowel_index = vowel_index + 1
if vowel_index == position then
vowel = { index = i, length = length }
break
end
end
end
if not vowel then
return nil
end
if vowel.length == "long" then
return "heavy"
else
-- Count consonants after the vowel.
local consonant_count = 0
for i = vowel.index + 1, #tokens do
if not contains_vowel(tokens[i]) then
consonant_count = consonant_count + 1
else
break
end
end
if consonant_count > 1 then
return "heavy"
elseif vowel.length == "short" then
return "light"
else
return nil
end
end
end
--[[
Add accent mark at position. Position is a number that refers to the nth
vowel from the beginning of the word. Respects the rules of accent.
Examples:
- δημος, 1 => δῆμος
- προτερᾱ, 1 => προτέρᾱ (position changed to 2 because ultima is long)
- μοιρα, 1, true => μοῖρα (circumflex can be added because ultima is
ambiguous)
- χωρᾱ, 1, true => χώρᾱ (circumflex can't be added because ultima
is long)
- τοιουτος, 2 => τοιοῦτος (circumflex because ultima is short)
Arguments:
- word: string (hopefully an Ancient Greek word or stem)
- syllable_position: number (less than the number of monophthongs or diphthongs
in the word)
- options: table
- circumflex boolean (add a circumflex if allowed)
- synaeresis boolean (accent can fall before εω in penult
and ultima: πόλεως)
- short_diphthong boolean (word-final οι, αι count as short)
]]
local function add_accent(word, syllable_position, options)
-- local check = _check("add_accent")
-- check(1, word, "string")
-- check(2, syllable_position, "number")
-- check(3, options, "table", true)
assert(type(word) == "string")
assert(type(syllable_position) == "number")
assert(type(options) == "table" or type(options) == "nil")
word = strnfd(word)
if strfind(word, tonal_diacritic) then
return word
end
options = options or {}
local tokens = copy(tokenize(word))
local vowels = get_vowel_info(tokens, options.short_diphthong)
local vowel_count = #vowels
-- Convert positions in relation to the beginning of the word
-- to positions in relation to the end of the word.
-- The farthest back that an accent can be placed is 3 (the antepenult),
-- so that is the greatest allowed position.
if syllable_position > 0 then
syllable_position = math.min(3, vowel_count - syllable_position + 1)
-- If the position is in relation to the end of the word and it is greater
-- than the length of the word, then reduce it to the length of the word.
-- This is for practical reasons. Positions in relation to the beginning of
-- the word do not need leeway.
elseif syllable_position < 0 then
syllable_position = math.min(-syllable_position, vowel_count)
end
if syllable_position == 0 then
error("Invalid position value " .. syllable_position .. ".")
elseif syllable_position > vowel_count then
error("The position " .. syllable_position .. " is invalid, because the word has only " .. vowel_count .. " vowels.")
end
-- Apply accent rules to change the accent's position or type.
local accent_mark = options.circumflex and diacritics.circum or diacritics.acute
local ultima = vowels[1]
-- If synaeresis is selected, a final vowel sequence εω (optionally
-- separated by an undertie) counts as one syllable.
if syllable_position == 3 then
local penult_ = vowels[2]
if not options.force_antepenult and (ultima.length == "long"
and not (options.synaeresis
and ("Ωω"):find(tokens[ultima.index], 1, true)
and ("Εε"):find(tokens[penult_.index], 1, true)
and (ultima.index == penult_.index + 1
or ultima.index == penult_.index + 2
and tokens[penult_.index + 1] == mw.ustring.char(0x035C)))) then
syllable_position = 2
else
accent_mark = diacritics.acute
end
end
if syllable_position == 2 then
if ultima.length == "short" and vowels[2].length == "long" then
accent_mark = diacritics.circum
elseif ultima.length == "long" then
accent_mark = diacritics.acute
end
end
local vowel = vowels[syllable_position]
if not vowel then
error('No vowel at position ' .. syllable_position ..
' from the end of the word ' .. word .. '.')
end
if vowel.length == "short" then
accent_mark = diacritics.acute
end
local i = vowel.index
tokens[i] = add(tokens[i], accent_mark)
return table.concat(tokens)
end
local function syllables(word, func, number)
--local check = _check('syllables')
--check(1, word, 'string')
--check(2, func, 'string', true)
--check(3, number, 'number', true)
assert(type(word) == "string")
assert(type(func) == "string" or type(func) == "nil")
assert(type(number) == "number" or type(number) == "nil")
if not func then
error('No function specified')
end
local functions = {
eq = function (word_, number_)
local vowels = 0
for _, token in ipairs(tokenize(word_)) do
if contains_vowel(token) then
vowels = vowels + 1
if vowels > number_ then
return false
end
end
end
if vowels == number_ then
return true
end
return false
end
}
func = functions[func]
if func then
return func(word, number)
else
error('No function ' .. func)
end
end
-- ################### FIN DIACRITICOS ###################
local function fetch(s, i)
--[==[
because we fetch a single character at a time so often
out of bounds fetch gives ''
]==]
i = tonumber(i)
if type(i) ~= "number" then
error("fetch requires a number or a string equivalent to a number as its second argument.")
end
if i == 0 then
return ""
end
local n = 0
for character in string.gmatch(s, "[\1-\127\194-\244][\128-\191]*") do
n = n + 1
if n == i then
return character
end
end
return ""
end
--Combining diacritics are tricky.
local tie = U(0x35C) -- tie bar
local nonsyllabic = U(0x32F) -- combining inverted breve below
local high = U(0x341) -- combining acute tone mark
local low = U(0x340) -- combining grave tone mark
local rising = U(0x30C) -- combining caron
local falling = diacritics.Latin_circum -- combining circumflex
local midHigh = U(0x1DC4) -- mid–high pitch
local midLow = U(0x1DC6) -- mid–low pitch
local highMid = U(0x1DC7) -- high–mid pitch
local voiceless = U(0x325) -- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'
local function is(text, X)
if not text or not X then
return false
end
local pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2)
if X == "frontDiphth" or X == "Greekdiacritic" then
pattern = "^" .. pattern .. "$"
else
pattern = "^[" .. pattern .. "]$"
end
return strfind(text, pattern)
end
local env_functions = {
preFront = function(term, index)
local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
end,
isIDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'ι' and not m_data[letter].diaer
end,
isUDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'υ' and not m_data[letter].diaer
end,
hasMacronBreve = function(term, index)
return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve
end,
}
local function decode(condition, x, term)
--[==[
"If" and "and" statements.
Note that we're finding the last operator first,
which means that the first will get ultimately get decided first.
If + ("and") or / ("or") is found, the function is called again,
until if-statements are found.
In if-statements:
* A number represents the character under consideration:
-1 is the previous character, 0 is the current, and 1 is the next.
* Equals sign (=) checks to see if the character under consideration
is equal to a character.
* Period (.) plus a word sends the module to the corresponding entry
in the letter's data table.
* Tilde (~) calls a function on the character under consideration,
if the function exists.
]==]
if strfind(condition, '[+/]') then
-- Find slash or plus sign preceded by something else, and followed by anything
-- (including another sequence of slash or plus sign and something else).
local subcondition1, sep, subcondition2 = strmatch(condition, "^([^/+]-)([/+])(.*)$")
if not (subcondition1 or subcondition2) then
error('Condition "' .. tostring(condition) .. '" is improperly formed')
end
if sep == '/' then -- logical operator: or
return decode(subcondition1, x, term) or decode(subcondition2, x, term)
elseif sep == '+' then -- logical operator: and
return decode(subcondition1, x, term) and decode(subcondition2, x, term)
end
elseif strfind(condition, '=') then -- check character identity
local offset, char = unpack(strsplit(condition, "="))
return char == fetch(term, x + offset) -- out of bounds fetch gives ''
elseif strfind(condition, '%.') then -- check character quality
local offset, quality = unpack(strsplit(condition, "%."))
local character = fetch(term, x + offset)
return m_data[character] and m_data[character][quality] or false
elseif strfind(condition, '~') then -- check character(s) using function
local offset, func = unpack(strsplit(condition, "~"))
return env_functions[func] and env_functions[func](term, x + offset) or false
end
end
local function check(p, x, term)
if type(p) == 'string' or type(p) == 'number' then
return p
elseif type(p) == 'table' then --This table is sequential, with a variable number of entries.
for _, possP in ipairs(p) do
if type(possP) == 'string' or type(possP) == 'number' then
return possP
elseif type(possP) == 'table' then --This table is paired, with two values: a condition and a result.
local rawCondition, rawResult = possP[1], possP[2]
if decode(rawCondition, x, term) then
return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term)
end
end
end
else
error('"p" is of unrecongized type ' .. type(p))
end
end
local function find_syllable_break(word, nVowel, wordEnd)
if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end
if wordEnd then
return strlen(word)
elseif is(fetch(word, nVowel - 1), "liquid") then
if is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
return nVowel - 4
else
return nVowel - 2
end
elseif is(fetch(word, nVowel - 1), "cons") then
return nVowel - 2
elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
return nVowel - 3
else
return nVowel - 1
end
end
local function syllabify_word(word)
local syllables = {}
--[[ cVowel means "current vowel", nVowel "next vowel",
sBreak "syllable break". ]]--
local cVowel, nVowel, sBreak, stress, wordEnd, searching
while word ~= '' do
cVowel, nVowel, sBreak, stress = false, false, false, false
--First thing is to find the first vowel.
searching = 1
local cVowelFound = false
while not cVowel do
local letter = fetch(word, searching)
local nextLetter = fetch(word, searching + 1)
if cVowelFound then
if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then
cVowel = searching - 1
elseif is(letter, "diacritic") then
searching = searching + 1
elseif letter == tie then
cVowelFound = false
searching = searching + 1
else
searching = searching + 1
end
else
if is(letter, "vowel") then
cVowelFound = true
elseif letter == 'ˈ' then
stress = true
end
searching = searching + 1
end
end
--Next we try and find the next vowel or the end.
searching = cVowel + 1
while (not nVowel) and (not wordEnd) do
local letter = fetch(word, searching)
if is(letter, "vowel") or letter == 'ˈ' then
nVowel = searching
elseif letter == '' then
wordEnd = true
else
searching = searching + 1
end
end
--Finally we find the syllable break point.
sBreak = find_syllable_break(word, nVowel, wordEnd)
--Pull everything up to and including the syllable Break.
local syllable = substr(word, 1, sBreak)
--If there is a stress accent, then we need to move it to the
--beginning of the syllable, unless it is a monosyllabic word,
--in which case we remove it altogether.
if stress then
if next(syllables) or syllable ~= word then
syllable = 'ˈ' .. strsubn(syllable, 'ˈ', '')
else
syllable = strsubn(syllable, 'ˈ', '')
end
stress = false
end
table.insert(syllables, syllable)
word = substr(word, sBreak + 1)
end
local out = nil
if #syllables > 0 then
out = table.concat(syllables, '.')
out = strsubn(out, '%.ˈ', 'ˈ')
end
return out
end
local function syllabify(s)
local t = {}
for _,w in ipairs(strsplit(s, ' ')) do
local word_ipa = syllabify_word(w)
if word_ipa then
table.insert(t, word_ipa)
end
end
return table.concat(t, ' ')
end
local function generar_pron(term)
if not term then error('The variable "term" in the function "convert_term" is nil.') end
local IPAs = {}
local start
local outPeriods = {}
local periodstart = "cla" -- harcodeamos para que arrenque desde el inicio
if periodstart and periodstart ~= "" then
start = false
else
start = true
end
for _, period in ipairs(periods) do
if period == periodstart then
start = true
end
if start then
IPAs[period] = {}
table.insert(outPeriods, period)
end
end
local length, x, advance, letter, p = strlen(term), 1, 0, '', nil
while x <= length do
letter = fetch(term, x)
local data = m_data[letter]
if not data then -- no data found
-- explicit pass
else
-- check to see if a multicharacter search is warranted
advance = data.pre and check(data.pre, x, term) or 0
p = (advance ~= 0) and m_data[substr(term, x, x + advance)].p or data.p
for _, period in ipairs(outPeriods) do
table.insert(IPAs[period], check(p[period], x, term))
end
x = x + advance
end
x = x + 1
end
local pron, fone = {}, {}
table.insert(pron, {"clásico"})
table.insert(fone, {syllabify(table.concat(IPAs["cla"], ''))})
local koi1 = syllabify(table.concat(IPAs["koi1"], ''))
local koi2 = syllabify(table.concat(IPAs["koi2"], ''))
if koi1 == koi2 then
table.insert(pron, {"koiné"})
table.insert(fone, {koi1})
else
table.insert(pron, {"koiné inicial"})
table.insert(fone, {koi1})
table.insert(pron, {"koiné final"})
table.insert(fone, {koi2})
end
local byz1 = syllabify(table.concat(IPAs["byz1"], ''))
local byz2 = syllabify(table.concat(IPAs["byz2"], ''))
if byz1 == byz2 then
table.insert(pron, {"bizantino"})
table.insert(fone, {byz1})
else
table.insert(pron, {"bizantino inicial"})
table.insert(fone, {byz1})
table.insert(pron, {"bizantino final"})
table.insert(fone, {byz2})
end
return pron, fone
end
function export.procesar_pron_args(titulo, args)
local term = pron_abc[titulo] or args["ayuda"][1] or titulo
term = strlower(term)
term = standardDiacritics(term)
term = mark_implied_length(term)
local strnfdd = strnfd(term)
if strfind(strnfdd, "[εοηω][" .. diacritics.all .. "]*[" .. diacritics.spacing_macron .. diacritics.spacing_breve .. diacritics.breve .. diacritics.macron .. "]") then
error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")
end
-- local ambig, ambig_letter_list --> FALTA PULIR ESTO
-- if args.period == "cla" then
-- ambig, ambig_letter_list = findAmbig(term)
-- end
term = strsubn(term, 'ς', 'σ')
term = strsubn(term, 'ῤ', 'ρ')
term = pronunciationOrder(term)
args["pron"], args["fone"] = generar_pron(term)
return args
end
return export