Módulo:generar-pron/grc

La documentación para este módulo puede ser creada en Módulo:generar-pron/grc/doc

--Things we still need:
--Voicing of sigma around (after?) voiced stops. 
--Proper alerts for editors, especially on ambiguous vowels.

local export = {}

local unpack = unpack or table.unpack

local m_str = require("Módulo:String")

local strsplit = m_str.split
local strfind = m_str.find
local substr = m_str.sub
local strmatch = m_str.match
local strsubn = m_str.gsub
local strlen = m_str.len
local strlower = m_str.lower
local strnfd = m_str.toNFD -- strnfd
local strnfc = m_str.toNFC
local U = m_str.char

-- sustitución descartando todo salvo el string retornado
local function strsub(text, pattern, repl, n)
    local t, _ = strsubn(text, pattern, repl, n)
    return t
 end
 

local m_table = require("Módulo:tabla")
local copy = m_table.shallowcopy
local sparseConcat = m_table.sparseConcat

local m_data = mw.loadData("Módulo:generar-pron/grc/datos")
local diacritics = m_data.diacritics
local diacritic = m_data.diacritic
local conversions = m_data.conversions
local groups = m_data.groups
local diacritic_groups = m_data.diacritic_groups

local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}

-- ################### HERRAMIENTAS (grc-utilities) ##########################
local info = {}
-- The tables are shared among different characters so that they can be checked
-- for equality if needed, and to use less space.
local vowel_t = { vowel = true }
local iota_t = { vowel = true, offglide = true }
local upsilon_t = { vowel = true, offglide = true }
-- These don't need any contents.
local rho_t = {}
-- local consonant_t = {}
local diacritic_t = { diacritic = true }
-- Needed for equality comparisons.
local breathing_t = { diacritic = true }

local UTF8_char = "[\1-\127\194-\244][\128-\191]*"
local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ

local function add_info(characters, t)
	if type(characters) == "string" then
		for character in string.gmatch(characters, UTF8_char) do
			info[character] = t
		end
	else
		for i, character in ipairs(characters) do
			info[character] = t
		end
	end
end

add_info({ diacritics.macron, diacritics.breve,
        diacritics.diaeresis,
		diacritics.acute, diacritics.grave, diacritics.circum,
		diacritics.subscript,
	}, diacritic_t)

add_info({diacritics.rough, diacritics.smooth}, breathing_t)
add_info("ΑΕΗΟΩαεηοω", vowel_t)
add_info("Ιι", iota_t)
add_info("Υυ", upsilon_t)
-- add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant_t)
add_info("Ρρ", rho_t)

local not_recognized = {}
setmetatable(info, { __index =
	function(t, key)
		return not_recognized
	end
})

-- El alfabeto
local pron_abc = {
	["Α"] = "άλφα",
	["α"] = "άλφα",
	["Β"] = "βήτα",
	["β"] = "βήτα",
	["Γ"] = "γάμμα",
	["γ"] = "γάμμα",
	["Δ"] = "δέλτα",
	["δ"] = "δέλτα",
	["Ε"] = "έψιλον",
	["ε"] = "έψιλον",
	["Ζ"] = "ζήτα",
	["ζ"] = "ζήτα",
	["Η"] = "ήτα",
	["η"] = "ήτα",
	["Θ"] = "θήτα",
	["θ"] = "θήτα",
	["Ι"] = "ιώτα",
	["ι"] = "ιώτα",
	["Κ"] = "κάππα",
	["κ"] = "κάππα",
	["Λ"] = "λάμβδα",
	["λ"] = "λάμβδα",
	["Μ"] = "μυ",
	["μ"] = "μυ",
	["Ν"] = "νυ",
	["ν"] = "νυ",
	["Ξ"] = "ξι",
	["ξ"] = "ξι",
	["Ο"] = "όμικρον",
	["ο"] = "όμικρον",
	["Π"] = "πι",
	["π"] = "πι",
	["ϖ"] = "πι",
	["Ρ"] = "ρο",
	["ρ"] = "ρο",
	["Σ"] = "σίγμα",
	["σ"] = "σίγμα",
	["ς"] = "σίγμα",
	["Τ"] = "ταυ",
	["τ"] = "ταυ",
	["Υ"] = "ύψιλον",
	["υ"] = "ύψιλον",
	["Φ"] = "φι",
	["φ"] = "φι",
	["Χ"] = "χι",
	["χ"] = "χι",
	["Ψ"] = "ψι",
	["ψ"] = "ψι",
	["Ω"] = "ωμέγα",
	["ω"] = "ωμέγα",
}

--[=[
local checkType = require "libraryUtil".checkType

local function _check(funcName)
	return function(argIndex, arg, expectType, nilOk)
		return checkType(funcName, argIndex, arg, expectType, nilOk)
	end
end
]=]--

-- Perform a function on each Unicode character in a string.
local function forEach(str, func)
	for char in string.gmatch(str, UTF8_char) do
		func(char)
	end
end

-- This concatenates or inserts a character, then removes it from the text.
local function add(list, index, chars, text)
	if not chars then
		error("The function add cannot act on a nil character.")
	end
	if list[index] then
		list[index] = list[index] .. chars
	else
		list[index] = chars
	end
	-- Basic string function works here.
	return text:sub(#chars + 1)
end

-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.
local function standardDiacritics(text)
	text = strnfd(text)
	
	text = text:gsub(UTF8_char, conversions)
	
	return text
end

--[=[	This function arranges diacritics in the following order:
			1. macron or breve
			2. breathings or diaeresis
			3. acute, circumflex, or grave
			4. iota subscript
		Used by [[Module:typing-aids]].
		
		Returns an error if a sequence of diacritics contains more than one
		of each category.
]=]
local function reorderDiacriticSequence(diacritics_)
	local output = {}
	forEach(diacritics_,
		function (diacritic_)
			local index = m_data.diacritic_order[diacritic_]
			if not output[index] then
				output[index] = diacritic_
			else
				-- Place breve after macron.
				if diacritic_ == diacritics.breve then
					index = index + 1
				end
				-- The following might have odd results when there
				-- are three or more diacritics.
				table.insert(output, index, diacritic_)
				-- [[Special:WhatLinksHere/Template:tracking/grc-utils/too many diacritics]]
				-- require("Module:debug").track("grc-utils/too many diacritics")
				require("Módulo:traza")("grcdiacriticos")
				--[[
				local m_templates = require("Module:grc-utilities/templates")
				error("There are two diacritics, " ..
						m_templates.addDottedCircle(output[index]) .. " and " ..
						m_templates.addDottedCircle(diacritic) ..
						" that belong in the same position. There should be only one."
				)
				--]]
			end
		end)
	return sparseConcat(output)
end

local function reorderDiacritics(text)
	
	return (strsub(strnfd(text),
		m_data.combining_diacritic .. m_data.combining_diacritic .. "+",
		reorderDiacriticSequence))
end

--[=[
		This breaks a word into meaningful "tokens", which are
		individual letters or diphthongs with their diacritics.
		Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].
--]=]
local function make_tokens(text)
	local tokens, prev_info = {}, {}
	local token_i, vowel_count = 1, 0 -- Vowel count tracks .
	local prev
	for character in string.gmatch(strnfd(text), UTF8_char) do
		local curr_info = info[character]
		-- Split vowels between tokens if not a diphthong.
		if curr_info.vowel then
			vowel_count = vowel_count + 1
			if prev and (not (vowel_count == 2 and curr_info.offglide and prev_info.vowel)
					-- υυ → υ, υ
					-- ιυ → ι, υ
					or prev_info.offglide and curr_info == upsilon_t or curr_info == prev_info) then
				token_i = token_i + 1
				if prev_info.vowel then
					vowel_count = 1
				end
			elseif vowel_count == 2 then
				vowel_count = 0
			end
			tokens[token_i] = (tokens[token_i] or "") .. character
		elseif curr_info.diacritic then
			vowel_count = 0
			tokens[token_i] = (tokens[token_i] or "") .. character
			if prev_info.diacritic or prev_info.vowel then
				if character == diacritics.diaeresis then
					-- Split the diphthong in the current token if a diaeresis was found:
					-- the first letter, then the second letter plus any diacritics.
					local previous_vowel, vowel_with_diaeresis =
						string.match(tokens[token_i],
							"^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")
					if previous_vowel then
						tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis
						token_i = token_i + 1
					else
						-- The vowel preceding the vowel with the diaeresis will already be
						-- placed in the previous token if it has a diacritic:
						-- Περικλῆῐ̈ → Π ε ρ ι κ λ ῆ ῐ̈
						--[[
						mw.log('Diaeresis was found in ' .. text .. ', but the previous token ' ..
							require("Module:Unicode data").add_dotted_circle(tokens[token_i]) ..
							' couldn’t be split because it does not consist of two Basic Greek characters followed by other characters.')
						--]]
					end
				end
			elseif prev_info == rho_t then
				if curr_info ~= breathing_t then
					error(string.format("The character %s in %s should not have the accent %s on it.", prev, text, character))
				end
			else
				mw.log("The character " .. prev .. " cannot have a diacritic on it.")
			end
		else
			vowel_count = 0
			if prev then
				token_i = token_i + 1
			end
			tokens[token_i] = (tokens[token_i] or "") .. character
		end
		prev = character
		prev_info = curr_info
	end
	return tokens
end

local cache = {}
local function tokenize(text)
	local decomposed = strnfd(text)
	if not cache[decomposed] then
		cache[decomposed] = make_tokens(text)
	end
	return cache[decomposed]
end

--[=[	Places diacritics in the following order:
			1. breathings or diaeresis
			2. acute, circumflex, or grave
			3. macron or breve
			4. iota subscript
		Used by [[Module:grc-pronunciation]].		]=]
local function pronunciationOrder(text)
	text = standardDiacritics(text)
	
	if strfind(text, groups[1]) then
	
		text = strsub(text,
			diacritic .. diacritic .. "+",
			function(sequence)
				-- Put breathing and diaeresis first, then accents, then macron or breve
				return table.concat{
					strmatch(sequence, groups[2]) or "",
					strmatch(sequence, groups[3]) or "",
					strmatch(sequence, groups[1]) or "",
					strmatch(sequence, groups[4]) or ""
				}
			end)
		
		text = strsub(text, diacritics.macron, diacritics.spacing_macron) -- combining to spacing macron
		text = strsub(text, diacritics.breve, diacritics.spacing_breve) -- combining to spacing breve
	end
	
	return strnfc(text)
end


-- Returns a table of any ambiguous vowels in the text, language-tagged.
local function findAmbig(text, noTag)
	if (not text) or type(text) ~= "string" then
		error("The input to function findAmbig is nonexistent or not a string")
	end
	
	local lengthDiacritic = "[" .. diacritics.macron .. diacritics.breve .. diacritics.circumflex .. diacritics.subscript .. "]"
	local aiu_diacritic = "^([" .. "αιυ" .. "])(" .. diacritic .. "*)$"
	
	-- breaks the word into units
	local output, vowels = {}, {}
	for _, token in ipairs(tokenize(text)) do
		if not strfind(token, m_data.consonant) then
			local vowel, diacritics_ = strmatch(
				token,
				aiu_diacritic
			)
			
			if vowel and (diacritics_ == "" or not strfind(diacritics_, lengthDiacritic)) then
				local diacriticked_vowel = vowel

                --[=[
				if not noTag then
					diacriticked_vowel = tag(vowel .. diacritics_)
				else
					diacriticked_vowel = vowel
				end
				]=]--

				table.insert(output, diacriticked_vowel)
				
				-- Lists the vowel letters that are ambiguous, for categorization purposes.
				vowels[strlower(vowel)] = true
			end
		end
	end
		
	return output, vowels
end
-- ################### Fin herramientas ###################

-- ################### DIACRITICOS ###################
local either_vowel = "[ΑαΙιΥυ]"
local tonal_diacritic = diacritic_groups[3]
local long_diacritics = diacritics.macron .. diacritics.subscript .. diacritics.circum

local function if_not_empty(var)
	if var == "" then
		return nil
	else
		return var
	end
end

local function contains_vowel(token)
	return strfind(token, '[ΑΕΗΙΟΥΩαεηιουω]')
end

local function is_diphthong(token)
	if strfind(token, "[ΑαΕεΗηΙιΟοΥυΩω][ΙιΥυ]") then
		return true
	else
		return false
	end
end

--[=[
local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType
local checkTypeMulti = libraryUtil.checkTypeMulti

local function _check(funcName, expectType)
	if type(expectType) == "string" then
		return function(argIndex, arg, nilOk)
			checkType(funcName, argIndex, arg, expectType, nilOk)
		end
	else
		return function(argIndex, arg, expectType, nilOk)
			if type(expectType) == "table" then
				checkTypeMulti(funcName, argIndex, arg, expectType, nilOk)
			else
				checkType(funcName, argIndex, arg, expectType, nilOk)
			end
		end
	end
end
--]=]

--[[
	A vowel with a breve or a lone epsilon or omicron is considered short.
	Everything else is considered long, including unmarked alphas, iotas, and
	upsilons. Sigh.
]]
local function is_short(token)
	if strfind(token, diacritics.breve) or strfind(token, '[ΕΟεο]') and not strfind(token, '[ιυ]') then
		return true
	else
		return false
	end
end

local function conditional_gsub(...)
	local str, count = strsubn(...)
	if count and count > 0 then
		return str
	else
		return nil
	end
end

local accent_adding_functions = {
	-- This will not throw an error if η or ω has a macron on it.
	[diacritics.circum] = function(vowel)
		return (strsub(
			vowel,
			"([ΑαΗηΙιΥυΩω])" .. diacritics.macron .. "?(" .. diacritic_groups[2] .. "?)(" .. diacritics.subscript .. "?)$",
			"%1%2" .. diacritics.circum .. "%3"
		))
	end,
	[diacritics.acute] = function(vowel)
		return (
			conditional_gsub(vowel,
				"([Εε])([Ωω])",
				"%1" .. diacritics.acute .. "%2") or
			strsub(vowel,
				"([ΑαΕεΗηΙιΟοΥυΩω]" .. diacritic_groups[1] .. "?" .. diacritic_groups[2] .. "?)(" .. diacritics.subscript .. "?)$",
				"%1" .. diacritics.acute .. "%2"))
	end,
	[diacritics.macron] = function(vowel)
		if strfind(vowel, "[" .. long_diacritics .. "]") or is_diphthong(vowel) then
			return vowel
		elseif strfind(vowel, "[ΕΟεο]") then
			error("The vowel " .. vowel ..
					" is short, so a macron cannot be added to it.")
		else
			return strsub(vowel, "(" .. either_vowel .. ")", "%1" .. diacritics.macron)
		end
	end,
	[diacritics.breve] = function(vowel)
		if strfind(vowel, "[" .. long_diacritics .. "]") then
			error("The vowel " .. vowel ..
					" has a iota subscript, a macron, or a circumflex, so a breve cannot be added to it.")
		elseif is_diphthong(vowel) then
			error("The vowel " .. vowel ..
					" is a diphthong, so a breve cannot be added to it.")
		else
			return strsub(vowel, "(" .. either_vowel .. ")", "%1" .. diacritics.breve)
		end
	end,
	-- This will insert a diaeresis on a single iota or upsilon, or on a
	-- iota or upsilon that is the second element of a diphthong.
	-- It does nothing if the vowel has a breathing on it.
	[diacritics.diaeresis] = function(vowel)
		return (strsub(
			vowel,
			"([ΙιΥυ]" .. diacritic_groups[1] .. "?)(" .. tonal_diacritic .. "?)$",
			"%1" .. diacritics.diaeresis .. "%2"
		))
	end
}

-- Assumes strnfdd vowels (NFD).
local function add(vowel, accent)
	if type(accent_adding_functions[accent]) == "function" then
		return accent_adding_functions[accent](vowel)
	else
		local name = m_table.keyFor(diacritics, accent)
		if name == "circum" then
			name = "circumflex"
		end
		error("No function for adding a " .. name .. ".")
	end
end

local function strip_accent(word)
	word = strnfd(word)
	-- Parentheses suppress second return value of strsub, the number of substitutions.
	return (strsub(word, m_data.all, ''))
end

local function strip_tone(word)
	word = strnfd(word)
	if strfind(word, diacritics.circum) then
		word = copy(tokenize(word))
		for i = 1, #word do
			-- Add a macron to every vowel with a circumflex and remove the circumflex.
			word[i] = strsub(word[i],
				'^([αΑιΙυΥ])([' .. diacritics.smooth .. diacritics.rough .. diacritics.diaeresis .. ']*)' .. diacritics.circum .. '$',
				'%1' .. diacritics.macron .. '%2')
		end
		word = table.concat(word)
	end
	return (strsub(word, tonal_diacritic, ''))
end

local function ult(word)
	word = strnfd(word)
	if strfind(word, tonal_diacritic) then return word end
	
	word = copy(tokenize(word))
	for i, token in m_table.reverseIpairs(word) do
		if contains_vowel(token) then
			--fortunately accents go last in combining order
			word[i] = add(token, diacritics.acute)
			break
		end
	end
	return table.concat(word, '')
end

--[[ WARNING: Given an unmarked α ι υ, this function will return a circmflex.
That said, if you ran into this situation in the first place, you probably
are doing something wrong. ]] --
local function circ(word)
	word = strnfd(word)
	if strfind(word, tonal_diacritic) then return word end
	
	word = copy(tokenize(word))
	for i, token in m_table.reverseIpairs(word) do
		if contains_vowel(token) then
			if is_short(token) then
				word[i] = add(token, diacritics.acute)
			else
				word[i] = add(token, diacritics.circum)
			end
			break
		end
	end
	return table.concat(word, '')
end

local function penult(orig)
	local word = strnfd(orig)
	if strfind(word, tonal_diacritic) then return word end
	
	word = copy(tokenize(word))
	local syllables = 0
	for i, token in m_table.reverseIpairs(word) do
		if token == '-' then
			return orig
		elseif contains_vowel(token) then
			syllables = syllables + 1
			if syllables == 2 then
				word[i] = add(token, diacritics.acute)
				return table.concat(word, '')
			end
		end
	end
	
	return circ(orig)
end

local function pencirc(orig)
	local word = strnfd(orig)
	if strfind(word, tonal_diacritic) then return word end
	
	word = copy(tokenize(word))
	local syllables = 0
	local long_ult = false
	for i, token in m_table.reverseIpairs(word) do
		if token == '-' then return orig end
		if contains_vowel(token) then
			syllables = syllables + 1
			if syllables == 1 and not is_short(token) then
				long_ult = true
				if word[#word] == 'αι' or word[#word] == 'οι' then long_ult = false end
			elseif syllables == 2 then
				if is_short(token) or long_ult then
					word[i] = add(token, diacritics.acute)
				else
					word[i] = add(token, diacritics.circum)
				end
				return table.concat(word, '')
			end
		end
	end
	
	return circ(orig)
end

local function antepenult(orig)
	local word = strnfd(orig)
	if strfind(word, tonal_diacritic) then return word end
	
	word = copy(tokenize(word))
	local syllables = 0
	local long_ult = false
	for i, token in m_table.reverseIpairs(word) do
		if token == '-' then return orig end
		if contains_vowel(token) then
			syllables = syllables + 1
			if syllables == 1 and not is_short(token) then
				long_ult = true
				if word[#word] == 'αι' or word[#word] == 'οι' then long_ult = false end
			elseif syllables == 2 and long_ult then
				word[i] = add(token, diacritics.acute)
				return table.concat(word, '')
			elseif syllables == 3 then
				word[i] = add(token, diacritics.acute)
				return table.concat(word, '')
			end
		end
	end
	
	return pencirc(orig)
end

--[[
	Counts from the beginning or end of the word, and returns the position and
	type of the first accent found. Position means the number of vowels
	(syllables) that have been encountered, not the number of characters.
	
	Arguments:
	- word:			string	(Ancient Greek word)
	- from_end:		boolean	(whether to count from the end of the word)
]]
local accent_cache = { [true] = {}, [false] = {} }

local function detect_accent(word, from_end)
	-- local check = _check("detect_accent")
	-- check(1, word, "string")
	-- check(2, from_end, "boolean", true)

	assert(type(word) == "string")
	assert(type(from_end) == "boolean" or type(from_end) == "nil")
	
	local cache_ = accent_cache[from_end == true][strnfd(word)]
	if cache_ then
		return unpack(cache_)
	end
	
	local names = {
		[diacritics.acute] 		= "acute",
		[diacritics.grave] 		= "grave",
		[diacritics.circum]	= "circumflex",
	}
	
	local syllable = 0
	local accent_name
	
	for _, token in
			(from_end and m_table.reverseIpairs or ipairs)(tokenize(word))
			do
		if contains_vowel(token) then
			syllable = syllable + 1
			
			accent_name = names[strmatch(token, tonal_diacritic)]
			if accent_name then
				accent_cache[from_end == true][strnfd(word)] = { syllable, accent_name }
				return syllable, accent_name
			end
		end
	end
	
	return nil
end

--[[
	Returns classification based on first accent found
	when traveling back from the end of the word.
]]
local function get_accent_term(word)
	local syllable, accent_name = detect_accent(word, true)
	
	local terms = {
		["grave"]		= { "barytone" },
		["acute"] 		= { "oxytone", "paroxytone", "proparoxytone" },
		["circumflex"]	= { "perispomenon", "properispomenon" },
	}
	
	local ordinals = { "first", "second", "third", "fourth", "fifth", }
	
	local term
	if syllable and accent_name then
		term = terms[accent_name][syllable]
	end
	
	if term then
		return term
	else
		return nil,
			syllable and 'There is no term for a word with a ' .. accent_name ..
				' accent on the ' .. ordinals[syllable] ..
				' syllable from the end of the word.'
			or 'No accent found.'
	end
end

-- is_noun is a boolean or nil; if it is true, αι and οι will be
-- treated as short.
local function get_length(token, short_diphthong)
	token = strlower(token)
	-- not needed at the moment
	-- token = strnfd(token)
	
	if not contains_vowel(token) then
		return nil
		-- error("The thing supplied to get_length does not have any vowels")
	end
	
	-- η, ω; ᾳ, ῃ, ῳ; ᾱ, ῑ, ῡ; diphthongs
	if strfind(token, "[ηω" .. long_diacritics .. "]") then
		return "long"
	end
	
	if short_diphthong and strfind(token, "^[αο]ι") then
		return "short"
	end
	
	if is_diphthong(token) then
		return "long"
	end
	
	-- ε, ο; ᾰ, ῐ, ῠ
	if strfind(token, "[εο" .. diacritics.breve .. "]") then
		return "short"
	end
	
	-- anything else
	return "either"
end

-- Takes a table of tokens and returns a table containing tables of each vowel's
-- characteristics.
local function get_vowel_info(tokens, short_diphthong)
	if type(tokens) ~= "table" then
		error("The argument to get_vowel_info must be a table.")
	end
	
	local vowels = {}
	local vowel_i = 1
	if strfind(tokens[#tokens], m_data.consonant .. "$") then
		short_diphthong = false
	end
	
	for i, token in m_table.reverseIpairs(tokens) do
		if contains_vowel(token) then
			if vowel_i ~= 1 then
				short_diphthong = false
			end
			local length, accent =
				get_length(token, short_diphthong),
				if_not_empty(strmatch(token,
					"[" .. diacritics.acute .. diacritics.grave .. diacritics.circum .. "]"))
			vowels[vowel_i] = {
					index = i,
					length = length,
					accent = accent,
			}
			vowel_i = vowel_i + 1
		end
	end
	
	return vowels
end

local function mark_implied_length(word, return_tokens, short_diphthong)
	word = strnfd(word)
	-- Do nothing if there are no vowel letters that could be ambiguous.
	if not strfind(word, either_vowel) then
		if return_tokens then
			return tokenize(word)
		else
			return word
		end
	end
	
	local tokens = copy(tokenize(word))
	local vowels = get_vowel_info(tokens, short_diphthong)
	
	if #vowels >= 2 then
		local ultima = vowels[1]
		local ultima_i = ultima.index
		
		local penult_ = vowels[2]
		local penult_i = penult_.index
		
		if penult_.length == "either" and ultima.length == "short" then
			if penult_.accent == diacritics.circum then
				tokens[penult_i] = add(tokens[penult_i], diacritics.macron)
			elseif penult_.accent == diacritics.acute then
				tokens[penult_i] = add(tokens[penult_i], diacritics.breve)
			end
		elseif penult_.length == "long" and ultima.length == "either" then
			if penult_.accent == diacritics.circum then
				tokens[ultima_i] = add(tokens[ultima_i], diacritics.breve)
			elseif penult_.accent == diacritics.acute then
				tokens[ultima_i] = add(tokens[ultima_i], diacritics.macron)
			end
		end
		
		local antepenult_ = vowels[3]
		if antepenult_ and antepenult_.accent and ultima.length == "either" then
			tokens[ultima_i] = add(tokens[ultima_i], diacritics.breve)
		end
	end
	
	if return_tokens then
		return tokens
	else
		return table.concat(tokens)
	end
end

-- Returns the length of a syllable specified by its position from the end of the word.
local function length_at(word, syllable)
	local tokens = tokenize(word)
	
	if type(word) ~= "string" then
		error("First argument of length_at should be a string.")
	end
	
	if type(syllable) ~= "number" then
		error("Second argument of length_at should be a number.")
	end
	
	local syllable_count = 0
	for _, token in m_table.reverseIpairs(tokens) do
		local length = get_length(token)
		if length then
			syllable_count = syllable_count + 1
			if syllable_count == syllable then
				return length
			end
		end
	end
	
	if syllable_count < syllable then
		error("Length for syllable " .. syllable .. " from the end of the word was not found.")
	end
end

local function find_breathing(token)
	return strmatch(token, "([" .. diacritics.rough .. diacritics.smooth .. "])")
end

local function has_same_breathing_as(token1, token2)
	return find_breathing(token1) == find_breathing(token2)
end

-- Make token have the length specified by the string "length".
local function change_length(length, token)
	local diacritic_
	if length == "long" then
		diacritic_ = diacritics.macron
	elseif length == "short" then
		diacritic_ = diacritics.breve
	end
	
	if diacritic_ then
		return add(token, diacritic_)
	else
		return token
	end
end

--[[
	Take two words, mark implied length on each, then harmonize any macrons and
	breves that disagree.
]]
local function harmonize_length(word1, word2)
	word1 = strnfd(word1)
	-- Do nothing if there are no vowel letters that could be ambiguous.
	if not (strfind(word1, either_vowel) or strfind(word2, either_vowel)) then
		return word1, word2
	end
	
	local tokens1, tokens2 = mark_implied_length(word1, true), mark_implied_length(word2, true)
	local strip1, strip2 = copy(tokenize(strip_accent(word1))), copy(tokenize(strip_accent(word2)))
	
	for i, token1 in pairs(tokens1) do
		local token2 = tokens2[i]
		
		if strip1[i] == strip2[i] then
			if has_same_breathing_as(token1, token2) then
				local length1, length2 = get_length(token1), get_length(token2)
				if length1 and length2 and length1 ~= length2 then
						if length1 == "either" then
							tokens1[i] = change_length(length2, token1)
						elseif length2 == "either" then
							tokens2[i] = change_length(length1, token2)
						end
				end
			else
				break
			end
		else
			break
		end
	end
	
	local new_word1, new_word2 = table.concat(tokens1), table.concat(tokens2)
	
	return new_word1, new_word2
end

--[[
	Get weight of nth syllable from end of word. Position defaults to 1, the last
	syllable. Returns "heavy" or "light", or nil if syllable is open with an
	ambiguous vowel.
]]
local function get_weight(word, position)
	if not if_not_empty(word) then
		return nil
	end
	local tokens = tokenize(word)
	
	if not position then
		position = 1
	end
	
	local vowel
	local vowel_index = 0
	
	-- Find nth vowel from end of word.
	for i, token in m_table.reverseIpairs(tokens) do
		local length = get_length(token)
		if length then
			vowel_index = vowel_index + 1
			if vowel_index == position then
				vowel = { index = i, length = length }
				break
			end
		end
	end
	
	if not vowel then
		return nil
	end
	
	if vowel.length == "long" then
		return "heavy"
	else
		-- Count consonants after the vowel.
		local consonant_count = 0
		
		for i = vowel.index + 1, #tokens do
			if not contains_vowel(tokens[i]) then
				consonant_count = consonant_count + 1
			else
				break
			end
		end
		
		if consonant_count > 1 then
			return "heavy"
		elseif vowel.length == "short" then
			return "light"
		else
			return nil
		end
	end
end

--[[
	Add accent mark at position. Position is a number that refers to the nth
	vowel from the beginning of the word. Respects the rules of accent.
	Examples:
	- δημος,	1		=> δῆμος
	- προτερᾱ,	1		=> προτέρᾱ	(position changed to 2 because ultima is long)
	- μοιρα,	1, true	=> μοῖρα	(circumflex can be added because ultima is
										ambiguous)
	- χωρᾱ,		1, true	=> χώρᾱ		(circumflex can't be added because ultima
										is long)
	- τοιουτος,	2		=> τοιοῦτος	(circumflex because ultima is short)
	
	Arguments:
	- word:					string	(hopefully an Ancient Greek word or stem)
	- syllable_position:	number	(less than the number of monophthongs or diphthongs
										in the word)
	- options:				table
		- circumflex		boolean		(add a circumflex if allowed)
		- synaeresis		boolean		(accent can fall before εω in penult
											and ultima: πόλεως)
		- short_diphthong	boolean		(word-final οι, αι count as short)
]]
local function add_accent(word, syllable_position, options)
	-- local check = _check("add_accent")
	-- check(1, word, "string")
	-- check(2, syllable_position, "number")
	-- check(3, options, "table", true)

	assert(type(word) == "string")
	assert(type(syllable_position) == "number")
	assert(type(options) == "table" or type(options) == "nil")
	
	word = strnfd(word)
	if strfind(word, tonal_diacritic) then
		return word
	end
	
	options = options or {}
	
	local tokens = copy(tokenize(word))
	local vowels = get_vowel_info(tokens, options.short_diphthong)
	local vowel_count = #vowels
	
	-- Convert positions in relation to the beginning of the word
	-- to positions in relation to the end of the word.
	-- The farthest back that an accent can be placed is 3 (the antepenult),
	-- so that is the greatest allowed position.
	if syllable_position > 0 then
		syllable_position = math.min(3, vowel_count - syllable_position + 1)
	-- If the position is in relation to the end of the word and it is greater
	-- than the length of the word, then reduce it to the length of the word.
	-- This is for practical reasons. Positions in relation to the beginning of
	-- the word do not need leeway.
	elseif syllable_position < 0 then
		syllable_position = math.min(-syllable_position, vowel_count)
	end
	
	if syllable_position == 0 then
		error("Invalid position value " .. syllable_position .. ".")
	elseif syllable_position > vowel_count then
		error("The position " .. syllable_position .. " is invalid, because the word has only " .. vowel_count .. " vowels.")
	end
	
	-- Apply accent rules to change the accent's position or type.
	local accent_mark = options.circumflex and diacritics.circum or diacritics.acute
	local ultima = vowels[1]
	
	-- If synaeresis is selected, a final vowel sequence εω (optionally
	-- separated by an undertie) counts as one syllable.
	if syllable_position == 3 then
		local penult_ = vowels[2]
		if not options.force_antepenult and (ultima.length == "long"
				and not (options.synaeresis
				and ("Ωω"):find(tokens[ultima.index], 1, true)
				and ("Εε"):find(tokens[penult_.index], 1, true)
				and (ultima.index == penult_.index + 1
				or ultima.index == penult_.index + 2
				and tokens[penult_.index + 1] == mw.ustring.char(0x035C)))) then
			syllable_position = 2
		else
			accent_mark = diacritics.acute
		end
	end
	
	if syllable_position == 2 then
		if ultima.length == "short" and vowels[2].length == "long"  then
			accent_mark = diacritics.circum
		elseif ultima.length == "long" then
			accent_mark = diacritics.acute
		end
	end
	
	local vowel = vowels[syllable_position]
	if not vowel then
		error('No vowel at position ' .. syllable_position ..
			' from the end of the word ' .. word .. '.')
	end
	if vowel.length == "short" then
		accent_mark = diacritics.acute
	end
	
	local i = vowel.index
	tokens[i] = add(tokens[i], accent_mark)
	
	return table.concat(tokens)
end

local function syllables(word, func, number)
	--local check = _check('syllables')
	--check(1, word, 'string')
	--check(2, func, 'string', true)
	--check(3, number, 'number', true)

	assert(type(word) == "string")
	assert(type(func) == "string" or type(func) == "nil")
	assert(type(number) == "number" or type(number) == "nil")
	
	if not func then
		error('No function specified')
	end
	
	local functions = {
		eq = function (word_, number_)
			local vowels = 0
			for _, token in ipairs(tokenize(word_)) do
				if contains_vowel(token) then
					vowels = vowels + 1
					if vowels > number_ then
						return false
					end
				end
			end
			if vowels == number_ then
				return true
			end
			return false
		end
	}
	
	func = functions[func]
	if func then
		return func(word, number)
	else
		error('No function ' .. func)
	end
end
-- ################### FIN DIACRITICOS ###################



local function fetch(s, i)
	--[==[
	because we fetch a single character at a time so often
	out of bounds fetch gives ''
	]==]
	i = tonumber(i)
	
	if type(i) ~= "number" then
		error("fetch requires a number or a string equivalent to a number as its second argument.")
	end
	
	if i == 0 then
		return ""
	end
	
	local n = 0
	for character in string.gmatch(s, "[\1-\127\194-\244][\128-\191]*") do
		n = n + 1
		if n == i then
			return character
		end
	end
	
	return ""
end

--Combining diacritics are tricky.
local tie = U(0x35C)				-- tie bar
local nonsyllabic = U(0x32F)		-- combining inverted breve below
local high = U(0x341)				-- combining acute tone mark
local low = U(0x340)				-- combining grave tone mark
local rising = U(0x30C)				-- combining caron
local falling = diacritics.Latin_circum	-- combining circumflex
local midHigh = U(0x1DC4)			-- mid–high pitch
local midLow = U(0x1DC6)			-- mid–low pitch
local highMid = U(0x1DC7)			-- high–mid pitch
local voiceless = U(0x325)			-- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'

local function is(text, X)
	if not text or not X then
		return false
	end
	local pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2)
	if X == "frontDiphth" or X == "Greekdiacritic" then
		pattern = "^" .. pattern .. "$"
	else
		pattern = "^[" .. pattern .. "]$"
	end
	return strfind(text, pattern)
end

local env_functions = {
	preFront = function(term, index)
		local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
		return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
	end,
	isIDiphth = function(term, index)
		local letter = fetch(term, index + 1)
		return strip_accent(letter) == 'ι' and not m_data[letter].diaer
	end,
	isUDiphth = function(term, index)
		local letter = fetch(term, index + 1)
		return strip_accent(letter) == 'υ' and not m_data[letter].diaer
	end,
	hasMacronBreve = function(term, index)
		return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve
	end,
}

local function decode(condition, x, term)
	--[==[
		"If" and "and" statements.
		Note that we're finding the last operator first, 
		which means that the first will get ultimately get decided first.
		If + ("and") or / ("or") is found, the function is called again,
		until if-statements are found.
		In if-statements:
		* A number represents the character under consideration:
			 -1 is the previous character, 0 is the current, and 1 is the next.
		* Equals sign (=) checks to see if the character under consideration
			is equal to a character.
		* Period (.) plus a word sends the module to the corresponding entry
			in the letter's data table.
		* Tilde (~) calls a function on the character under consideration,
			if the function exists.
	]==]
	if strfind(condition, '[+/]') then
		-- Find slash or plus sign preceded by something else, and followed by anything
		-- (including another sequence of slash or plus sign and something else).
		local subcondition1, sep, subcondition2 = strmatch(condition, "^([^/+]-)([/+])(.*)$")
		if not (subcondition1 or subcondition2) then
			error('Condition "' .. tostring(condition) .. '" is improperly formed')
		end
		
		if sep == '/' then		-- logical operator: or
			return decode(subcondition1, x, term) or decode(subcondition2, x, term)
		elseif sep == '+' then	-- logical operator: and
			return decode(subcondition1, x, term) and decode(subcondition2, x, term)
		end
	elseif strfind(condition, '=') then				-- check character identity
		local offset, char = unpack(strsplit(condition, "="))
		return char == fetch(term, x + offset) -- out of bounds fetch gives ''
	elseif strfind(condition, '%.') then				-- check character quality
		local offset, quality = unpack(strsplit(condition, "%."))
		local character = fetch(term, x + offset)
		return m_data[character] and m_data[character][quality] or false
	elseif strfind(condition, '~') then				-- check character(s) using function
		local offset, func = unpack(strsplit(condition, "~"))
		return env_functions[func] and env_functions[func](term, x + offset) or false
	end
end

local function check(p, x, term)
	if type(p) == 'string' or type(p) == 'number' then
		return p
	elseif type(p) == 'table' then   --This table is sequential, with a variable number of entries.
		for _, possP in ipairs(p) do
			if type(possP) == 'string' or type(possP) == 'number' then
				return possP
			elseif type(possP) == 'table' then    --This table is paired, with two values: a condition and a result.
				local rawCondition, rawResult = possP[1], possP[2]
				if decode(rawCondition, x, term) then
					return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term)
				end	
			end
		end
	else
		error('"p" is of unrecongized type ' .. type(p))
	end
end

local function find_syllable_break(word, nVowel, wordEnd)
	if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end
	if wordEnd then
		return strlen(word)
	elseif is(fetch(word, nVowel - 1), "liquid") then
		if is(fetch(word, nVowel - 2), "obst") then
			return nVowel - 3
		elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
			return nVowel - 4
		else
			return nVowel - 2
		end
	elseif is(fetch(word, nVowel - 1), "cons") then
		return nVowel - 2
	elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
		return nVowel - 3
	elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
		return nVowel - 3
	else
		return nVowel - 1
	end
end

local function syllabify_word(word)
	local syllables = {}
	--[[	cVowel means "current vowel", nVowel "next vowel",
			sBreak "syllable break".							]]--
	local cVowel, nVowel, sBreak, stress, wordEnd, searching
	while word ~= '' do
		cVowel, nVowel, sBreak, stress = false, false, false, false
		
		--First thing is to find the first vowel.
		searching = 1
		local cVowelFound = false
		while not cVowel do
			local letter = fetch(word, searching)
			local nextLetter = fetch(word, searching + 1)
			if cVowelFound then
				if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then
					cVowel = searching - 1
				elseif is(letter, "diacritic") then
					searching = searching + 1
				elseif letter == tie then
					cVowelFound = false
					searching = searching + 1
				else
					searching = searching + 1
				end
			else
				if is(letter, "vowel") then
					cVowelFound = true
				elseif letter == 'ˈ' then
					stress = true
				end
				searching = searching + 1
			end
		end
	
		--Next we try and find the next vowel or the end.
		searching = cVowel + 1
		while (not nVowel) and (not wordEnd) do
			local letter = fetch(word, searching)
			if is(letter, "vowel") or letter == 'ˈ' then
				nVowel = searching
			elseif letter == '' then
				wordEnd = true
			else
				searching = searching + 1
			end
		end
		
		--Finally we find the syllable break point.
		sBreak = find_syllable_break(word, nVowel, wordEnd)
		
		--Pull everything up to and including the syllable Break.
		local syllable = substr(word, 1, sBreak)
		
		--If there is a stress accent, then we need to move it to the 
		--beginning of the syllable, unless it is a monosyllabic word,
		--in which case we remove it altogether.
		if stress then
			if next(syllables) or syllable ~= word then
				syllable = 'ˈ' .. strsubn(syllable, 'ˈ', '')
			else 
				syllable = strsubn(syllable, 'ˈ', '')
			end
			stress = false
		end
		table.insert(syllables, syllable)
		word = substr(word, sBreak + 1)
	end
	
	local out = nil
	
	if #syllables > 0 then
		out = table.concat(syllables, '.')
		out = strsubn(out, '%.ˈ', 'ˈ')
	end
	return out
end

local function syllabify(s)
	local t = {}
	for _,w in ipairs(strsplit(s, ' ')) do
		local word_ipa = syllabify_word(w)
		if word_ipa then
			table.insert(t, word_ipa)
		end
	end
	return table.concat(t, ' ')
end

local function generar_pron(term)
	if not term then error('The variable "term" in the function "convert_term" is nil.') end
	local IPAs = {}
	local start
	local outPeriods = {}
	
	local periodstart = "cla" -- harcodeamos para que arrenque desde el inicio
	if periodstart and periodstart ~= "" then
		start = false
	else
		start = true
	end
	for _, period in ipairs(periods) do 
		if period == periodstart then
			start = true
		end
		if start then
			IPAs[period] = {}
			table.insert(outPeriods, period)
		end
	end
	local length, x, advance, letter, p = strlen(term), 1, 0, '', nil
	while x <= length do
		letter = fetch(term, x)
		local data = m_data[letter]
		if not data then		-- no data found
			-- explicit pass
		else
			-- check to see if a multicharacter search is warranted
			advance = data.pre and check(data.pre, x, term) or 0
			p = (advance ~= 0) and m_data[substr(term, x, x + advance)].p or data.p
			for _, period in ipairs(outPeriods) do
				table.insert(IPAs[period], check(p[period], x, term))
			end
			x = x + advance
		end
		x = x + 1
	end
	
    local pron, fone = {}, {}
	table.insert(pron, {"clásico"})
	table.insert(fone, {syllabify(table.concat(IPAs["cla"], ''))})
	
	local koi1 = syllabify(table.concat(IPAs["koi1"], ''))
	local koi2 = syllabify(table.concat(IPAs["koi2"], ''))
	
	if koi1 == koi2 then
		table.insert(pron, {"koiné"})
		table.insert(fone, {koi1})
	else
		table.insert(pron, {"koiné inicial"})
		table.insert(fone, {koi1})
		table.insert(pron, {"koiné final"})
		table.insert(fone, {koi2})
	end
	
	local byz1 = syllabify(table.concat(IPAs["byz1"], ''))
	local byz2 = syllabify(table.concat(IPAs["byz2"], ''))
	
	if byz1 == byz2 then
		table.insert(pron, {"bizantino"})
		table.insert(fone, {byz1})
	else
		table.insert(pron, {"bizantino inicial"})
		table.insert(fone, {byz1})
		table.insert(pron, {"bizantino final"})
		table.insert(fone, {byz2})
	end
	
	return pron, fone
end

function export.procesar_pron_args(titulo, args)
	local term = pron_abc[titulo] or args["ayuda"][1] or titulo
	term = strlower(term)
	term = standardDiacritics(term)
	term = mark_implied_length(term)
	
	local strnfdd = strnfd(term)
	if strfind(strnfdd, "[εοηω][" .. diacritics.all .. "]*[" .. diacritics.spacing_macron .. diacritics.spacing_breve .. diacritics.breve .. diacritics.macron .. "]") then
		error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")
	end
	
	-- local ambig, ambig_letter_list --> FALTA PULIR ESTO
	-- if args.period == "cla" then
		-- ambig, ambig_letter_list = findAmbig(term)
	-- end

	term = strsubn(term, 'ς', 'σ')
	term = strsubn(term, 'ῤ', 'ρ')
	term = pronunciationOrder(term)
	
	args["pron"], args["fone"] = generar_pron(term)
	
	return args
end

return export