Módulo:generar-pron/ast

La documentación para este módulo puede ser creada en Módulo:generar-pron/ast/doc
--Autor: Tmagc
--Adaptado de Módulo:generar-pron/es y revisado por Limotecariu

local export = {}

local insert = table.insert
local concat = table.concat

local m_table = require("Módulo:tabla")

local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strupper = m_str.upper
local strlower = m_str.lower
local strucfirst = m_str.ucfirst
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strstrip = m_str.strip
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8

local function strsub(s, a, b)
	local c, _ = strsubn(s, a, b)
	return c
end

--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ag = u(0x0301) -- acute =  ́
local gr = u(0x0300) -- grave =  ̀
local circunflejo = u(0x0302) -- circumflex =  ̂
local virgulilla = u(0x0303) -- tilde =  ̃
local dieresis = u(0x0308) -- diaeresis =  ̈
local punto_subscrito = u(0x323) -- cambia la pronunciación de la "h" y la "ll"
local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)

local diacritico = ag .. gr .. circunflejo
local DIACRITICO = "[" .. diacritico .. "]"
local tilde = ag .. gr
local TILDE = "[" .. tilde .. "]"
local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"

local divsil = u(0xFFF0)
local divsil_fijo = u(0xFFF1)
local sepsil = "%-." .. divsil .. divsil_fijo
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local SALVO_SEPARADORES_SILABICOS = "[^" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = diacritico .. acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"

local vocales = "aeiouüAEIOUÜ"
local VOCAL = "[" .. vocales .. "]"
local vocales_tildadas = "áéíóúàèìòùÁÉÍÓÚÀÈÌÒÙ"
local VOCAL_TILDADA = "[" .. vocales_tildadas .. "]"
local CONS = "[^" .. vocales .. vocales_tildadas .. separador .. "]"
local CONS_SALVO_H = "[^" .. vocales .. vocales_tildadas .. separador .. "h]"
local CONS_O_SEP_PALABRA = "[^" .. vocales .. vocales_tildadas .. separador_excepto_palabras .. "]"
--local T = "[^" .. vocales .. "lrɾjw" .. separador .. "]" -- obstruent or nasal

-- Para las notas al pie
local VOCAL_GENERAL = "[" .. vocales .. vocales_tildadas .. "]"

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

local TEMP_I = u(0xFFF1)
local TEMP_J = u(0xFFF2)
local TEMP_U = u(0xFFF3)
local TEMP_Y_CONS = u(0xFFF4)
local TEMP_QU = u(0xFFF5)
local TEMP_QU_CAPS = u(0xFFF6)
local TEMP_GU = u(0xFFF7)
local TEMP_GU_CAPS = u(0xFFF8)
local TEMP_H = u(0xFFF9)
local TEMP_Y = u(0xFFFA)
local TEMP_W = u(0xFFFB)
local TEMP_TCHR = u(0xFFFC)
local foot_boundari = "|"
local terminador = "#"
local comodin_yeista = "ɟ"
local comodin_chancho = "ĉ"
local comodin_jota = "ħ"
local comodin_deshielo = "ĥ"

local COMODINES = "[" .. TEMP_I .. TEMP_J .. TEMP_U .. TEMP_Y_CONS .. TEMP_QU .. TEMP_QU_CAPS .. TEMP_GU .. TEMP_GU_CAPS .. TEMP_H .. TEMP_Y .. TEMP_W .. TEMP_TCHR ..
comodin_yeista .. comodin_chancho .. comodin_jota .. comodin_deshielo .. "g" .. "%" .. foot_boundari .. terminador .. "]"

local recuperar_comodin = {
    [TEMP_I] = "i",
	[TEMP_J] = "i", -- i como semivocal
    [TEMP_U] = "u",
    [TEMP_Y_CONS] = "y",
    [TEMP_QU] = "qu",
    [TEMP_QU_CAPS] = "Qu",
    [TEMP_GU] = "gu",
    [TEMP_GU_CAPS] = "Gu",
    [TEMP_H] = "h", -- h que no se aspirada
    [TEMP_Y] = "i", --sufijos -ay/-ey/-oy/-uy
	[TEMP_W] = "w̝", -- hueso, huevo, etc. (la otra w ya la procesamos antes)
	[TEMP_TCHR] = "tchr",
	[comodin_jota] = "h",  -- fake aspirated "h" to real "h"
	[comodin_chancho] = "t͡ʃ", -- fake "ch" to real "ch"
	[comodin_yeista] = "ʝ", --phonetic and "ɟ͡ʝ" or "ʝ", -- fake "y" to real "y
	[comodin_deshielo] = "desh",
	["g"] = "ɡ", -- U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
	[foot_boundari] = "&#124;",
	[terminador] = "",
}

local recuperar_comodin_silabeo = {
    [TEMP_I] = "i",
	[TEMP_J] = "i", -- i como semivocal
    [TEMP_U] = "u",
    [TEMP_Y_CONS] = "y",
    [TEMP_QU] = "qu",
    [TEMP_QU_CAPS] = "Qu",
    [TEMP_GU] = "gu",
    [TEMP_GU_CAPS] = "Gu",
    [TEMP_H] = "h", -- h que no se aspirada
    [TEMP_Y] = "i", --sufijos -ay/-ey/-oy/-uy
	[TEMP_W] = "u", -- hueso, huevo, etc. (la otra w ya la procesamos antes)
	[TEMP_TCHR] = "tchr",
	[comodin_jota] = "h",  -- fake aspirated "h" to real "h"
	[comodin_chancho] = "ch", -- fake "ch" to real "ch"
	[comodin_yeista] = "y", --phonetic and "ɟ͡ʝ" or "ʝ", -- fake "y" to real "y
	[comodin_deshielo] = "desh",
	["ɡ"] = "g", -- AL REVÉS QUE EN LA OTRA TABLA
	[foot_boundari] = "&#124;",
	[terminador] = "",
}


local no_acentuado = m_table.listToSet({
	"el", "la", "los", "las", -- artículos
	"me", "te", "se", "nos", "vos", "lu", "la", "lo", "los", "les", -- pron. objeto
	"que", "quien", "como", "cuando", "onde", "u", "au", "cuantu", "cuanta", "cuanto", "cuantos", "cuantes", -- pron. relativos
	"y", "ya", "o", "u", "nin", -- conjunciones
	"de", "del", "a", "al", -- preposiciones y articulos
	"por", "en", "con", "sin", "tras", -- más preposiciones
	--"mas", --pero REVISAR, hay otro “mas”
	"so", --de so pretexto
	"si", -- ??
--	"sol", "col", "cola", "colo", "coles", "polos", "pela", "pelo", "pelos", "peles" REVISAR, puede haber variantes tónicas
})

-- aislar_diacriticos salvo ñ, ü y los puntos subscritos
local function aislar_diacriticos(text)
	text = strnfd(text)
	text = strsubn(text, ".[" .. virgulilla .. dieresis .. punto_subscrito .. "]", {
		["n" .. virgulilla] = "ñ",
		["N" .. virgulilla] = "Ñ",
		["u" .. dieresis] = "ü",
		["U" .. dieresis] = "Ü",
		["h" .. punto_subscrito] = "ḥ",
		["H" .. punto_subscrito] = "Ḥ",
		["l" .. punto_subscrito] = "ḷ",
		["L" .. punto_subscrito] = "Ḷ",
	})
	return text
end

local function quitar_diacriticos(text) -- salvo virgulilla y diéresis
	text = strnfd(text)
	text = strsubn(text, "[" .. ag .. gr .. circunflejo .. ac_primario .. ac_secundario .. "]", "")
	return strnfc(text)
end
-- convert i/u between vowels to glide
local vowel_to_glide = { ["i"] = "j", ["u"] = "w" }
local vowel_to_glide_silabeo = { ["i"] = TEMP_J, ["u"] = TEMP_W }
	
local tildar = {
	["a"] = "á",
	["e"] = "é",
	["i"] = "í",
	["o"] = "ó",
	["u"] = "ú",
}

local quitar_tilde = {
	["á"] = "a",
	["é"] = "e",
	["í"] = "i",
	["ó"] = "o",
	["ú"] = "u",
}

local diacritico_a_IPA = { [ag] = ac_primario, [gr] = ac_secundario, [circunflejo] = "" }
local pron_abc = {{"a"},{"be"},{"ce"},{"de"},{"e"},{"efe"},{"gue"},{"hache"},{"i"},
	{"jota","yota"},{"ka"},{"ele"},{"eme"},{"ene"},{"o"},{"pe"},{"cu"},{"erre"},{"ese"},{"te"},{"u"},
	{"uve"},{"uve doble"},{"xe","equis"},{"ye","y griega"},{"zeta","zeda"},{"eñe"}}

local function normalizar(texto)
	texto = strlower(texto)
	texto = aislar_diacriticos(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐](yo?s?)", "%1") -- sustituyo pronombres pospuestos con guion (-y/-yos/-ys)
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

local function determinar_acentuacion(acento_id)
	if acento_id then
		if acento_id == -1 then
			return "monosílaba"
		elseif acento_id == 0 then
			return "doble acentuada"
		elseif acento_id == 1 then
			return "aguda"
		elseif acento_id == 2 then
			return "llana"
		elseif acento_id == 3 then
			return "esdrújula"
		elseif acento_id >= 4 then
			return "sobreesdrújula"
		else
			return nil
		end
	else
		return nil
	end
end

--Convierte los diacríticos a notación IPA
local function reemplazar_tildes(w)
	local silabas = strsplit(w, SEPARADORES_SILABICOS)
	local L = #silabas
	local sufijo = nil
	
	if L >= 4 and silabas[L-1] == "men" and silabas[L] == "te" then
		sufijo = {ac_primario.."men", divsil.."te"}
		silabas[L-1], silabas[L] = nil, nil
		L = L - 2
	end
	
	local sustituido = false
	local sust = false
	
	for i,silaba in ipairs(silabas) do
		silabas[i], sust = strsubb(silaba, "^(.*)(" .. DIACRITICO .. ")(.*)$",	function(pre, diacr, post) return diacritico_a_IPA[diacr] .. pre .. post end)
		sustituido = sustituido or sust
	end
	
	if not sustituido then
		if L > 1 then
			if strfind(w, "[^" .. vocales .. "ns]$") or strfind(w, CONS..CONS.."$") then -- las palabras que terminan en doble consonante son agudas a menos que lleven tilde https://www.rae.es/espanol-al-dia/por-que-biceps-se-escribe-con-tilde-si-es-una-palabra-llana-terminada-en-s
				silabas[L] = ac_primario .. silabas[L] --aguda
			else
				silabas[L - 1] = ac_primario .. silabas[L - 1] --grave
			end
		else --Si L==1, entonces el monosílabo es tónico ya que no tiene circunflejos
			silabas[1] = ac_primario .. silabas[1]
		end 
	end
	
	local p = silabas[1]
	
	for i,s in ipairs(silabas) do
		if i > 1 then
			if strfind(s, ACENTOS_IPA) then
				p = p .. s
			else
				p = p .. divsil .. s
			end
		end
	end
	
	if sufijo then
		return p .. sufijo[1] .. sufijo[2]
	else
		return p
	end
end

--Se obtiene la longitud silábica, acentuación (hacerlo con la ayuda, no con el original)
local function obtener_informacion(w)
	if type(w) ~= "string" then
		return nil	
	end
	local silabas = strsplit(w, SEPARADORES_SILABICOS)
	
	local L = #silabas
	local sufijo = nil
	if L >= 4 and silabas[L-1] == "men" and silabas[L] == "te" then
		sufijo = "-men-te"
		silabas[L-1], silabas[L] = nil, nil
		L = L - 2
	end
	
	local acento_id = nil
	local acento_idx = nil
	local lleva_tilde = nil
	
	for i,silaba in ipairs(silabas) do
		lleva_tilde = strfind(silaba, "^(.*)(" .. VOCAL_TILDADA .. ")(.*)$")
		if lleva_tilde then
			acento_id = L == 1 and -1 or L - i + 1
			acento_idx = i
			break
		end
	end
	
	if not lleva_tilde then
		if L == 1 then
			acento_id = -1 --monosílabo
			acento_idx = 1
		else
			if strfind(w, "[^" .. vocales .. "ns]$") or strfind(w, CONS..CONS.."$") then -- las palabras que terminan en doble consonante son agudas a menos que lleven tilde https://www.rae.es/espanol-al-dia/por-que-biceps-se-escribe-con-tilde-si-es-una-palabra-llana-terminada-en-s
				acento_id = 1 -- aguda
				acento_idx = L
			else
				acento_id = 2 -- grave
				acento_idx = L - 1
			end
		end
	end
	
	if sufijo then --por ahora el único caso contemplado son los adverbios que terminan en -mente
		return L + 2, determinar_acentuacion(0), "ente" --0 para indicar doble acentuación
	else
		local x = nil
		if lleva_tilde then
			x = strfind(silabas[acento_idx], VOCAL_TILDADA)
			if not x then
				return nil,nil
			end
		else
			x = strfind(silabas[acento_idx], VOCAL)
			if not x then
				return nil,nil
			end
			local q = substr(silabas[acento_idx], x-1, x+1)
			if q == "que" or q == "qui" or q == "gue" or q == "gui" then
				x = x+1
			end
		end
		return L, determinar_acentuacion(acento_id)
	end
end

--Pasos finales al dividir en sílabas (2 puntos de entrada: la función de generar
--pronunciación y la que separa en sílabas propiamente dicha)
local function separar_en_silabas_final(word)
	
	--PASO 1: Divida tras cada grupo de vocales + última consonante que no sea h, o bien entre consonantes que estén
	--rodeades por vocales, pero sin contar la 'h' (prohibir --> prohi.bir)
	
	word = strsubrep(word, "(" .. VOCAL .. DIACRITICO .. "*)(" .. CONS_SALVO_H .. VOCAL .. ")", "%1"..divsil.."%2")
	word = strsubrep(word, "(" .. VOCAL .. DIACRITICO .. "*" .. CONS .. "+)(" .. CONS .. VOCAL .. ")", "%1"..divsil.."%2")

	--PASO 2: Vuelva a juntar algunos grupos de consonantes
	
	-- Juntar consonantes fricativas y oclusivas con l y con r. A ecepción de dl. --> En asturiano también existe "zr", existe dl??
	word = strsubn(word, "([zpbfvkctg])"..divsil.."([lrɾ])", divsil.."%1%2")
	word = strsubn(word, "d%"..divsil.."([rɾ])", divsil.."d%1")
	
	 -- swing, switch, etc.
	word = strsubn(word, "s"..divsil.."w", divsil.."sw")
	-- Juntar ch, sh, zh, ph, th, dh, fh, kh or gh. NO Juntar bh (subhumano, subhúmedo)
	word = strsubn(word, "([cszptdfkg])"..divsil.."h", divsil.."%1h")
	-- Juntar las ll y rr
	word = strsubn(word, "l"..divsil.."l", divsil.."ll")
	word = strsubn(word, "r"..divsil.."r", divsil.."rr")
	
	-- Juntar -uyir
	word = strsubn(word, "u"..divsil..TEMP_Y_CONS.."i([ɾr])$", "u"..TEMP_Y_CONS.."i%1")
	word = strsubn(word, "u"..divsil..comodin_yeista.."i([ɾr])$", "u"..comodin_yeista.."i%1")
	
	-- Juntar tz (([[Ertzaintza]], [[quetzal]], [[hertziano]], palabras Vascas, Nahualt o Alemanas) -- dudo de que sea lo correcto
	word = strsubn(word, "t"..divsil.."z", divsil.."tz")

	-- PASO 3: separar los hiatos 
	-- vocal abierta + vocal abierta (opcional h)
	word = strsubrep(word, "([aeoAEO]" .. DIACRITICO .. "*)(h?[aeo])", "%1"..divsil.."%2")
	-- vocal + vocal tildada
	word = strsubrep(word, "("..VOCAL .. DIACRITICO .. "*)(h?" .. VOCAL .. TILDE .. ")", "%1"..divsil.."%2")
	-- vocal tildada + vocal
	word = strsubn(word, "(" .. VOCAL .. TILDE .. ")(h?"..VOCAL..")", "%1"..divsil.."%2")
	
	-- verbos en -uir
	word = strsubn(word, "ui([ɾr])$", "u"..divsil.."i%1")

	-- Finalmente, vuelvo a juntar vocales repetidas (en asturiano, en general las vocales dobles se simplifican)
	word = strsubn(word, "("..VOCAL..")("..TILDE.."?)"..divsil.."(h?%1)", "%1%2%3")

	word = strsub(word, SEPARADORES_SILABICOS.."+", divsil)
	word = strstrip(word, SEPARADORES_SILABICOS.."+")

	return word
end

-- FUNCION QUE DIVIDE EN SILABAS (SÓLO PARA UNA PALBRA; SI HAY VARIAS PALABRAS HACERLO DE A UNA POR VEZ POR FAVOR)
local function separar_en_silabas(w)
	if type(w) ~= "string" or strfind(w, "%s") then
		return nil
	end
	
	local p = normalizar(w)
	
	p = strsubn(p, "|", "")
	p = strsubrep(p, "%s+", divsil_fijo) --cambio espacio normalizado por divisor de sílaba fijo
	
	p = strsubn(p, "y(" .. VOCAL .. ")", TEMP_Y_CONS .. "%1")
	
	--Cambio tchr por el comodin
	p = strsubn(p, "tchr", TEMP_TCHR)

	-- Mantenemos junta la 'sh' cuando se trate de desh- (deshuesar, deshonra, deshecho), en los demás casos la separamos.
	-- Para hacerlo, cambiamos la h en los segundos casos
	p = strsubn(p, "^([Dd]es)h", "%1" .. TEMP_H)
	p = strsubn(p, "([ %-][Dd]es)h", "%1" .. TEMP_H)

	-- Cambiamos 'qu' y 'gu'
	-- Dicen que 'qu' va bien reemplazarlo, pero hace agua con 'quietud'
	p = strsubn(p, "qu(" .. VOCAL .. ")", TEMP_QU .. "%1")
	p = strsubn(p, "Qu(" .. VOCAL .. ")", TEMP_QU_CAPS .. "%1")
	p = strsubn(p, "gu(" .. VOCAL .. ")", TEMP_GU .. "%1")
	p = strsubn(p, "Gu(" .. VOCAL .. ")", TEMP_GU_CAPS .. "%1")

	--Agregamos glides (paranoia, baiano, abreuense, alauita, Malaui, marihuana, parihuela, antihielo, pelluhuano, náhuatl)
	--NOTA IMPORTANTE: conviene hacerlo de atrás hacia adelante para que no se generen cosas raras como 'an.tih.ie.lo'

	p = strsubrep(p, "(.*" .. VOCAL .. DIACRITICO .. "*)(h?)([iu])(" .. VOCAL .. ")",function (v1, h, iu, v2) return v1 .. divsil .. h .. vowel_to_glide_silabeo[iu] .. v2 end)

	-- Entro a la función que separa ya pre-procesado a formato IPA
	p = separar_en_silabas_final(p)
	
	p = strsubn(p, COMODINES, recuperar_comodin_silabeo)
	p = strsubn(p, divsil, "-")
	return strnfc(p)
end

-- EXPORTAR A IPA. Es decir, ya teniendo el texto re-escrito para facilitar la pronunciación.
local function generar_pron(text, phonetic)
	if type(text) ~= "string" then
		return {},{}
	end
	
	text = normalizar(text)
	
	--Comienzo a sustituir
	--Observación general: el orden en el que sustituimos importa
	
	-- Make prefixes unstressed unless they have an explicit stress marker; also make certain
	-- monosyllabic words (e.g. [[el]], [[la]], [[de]], [[en]], etc.) without stress marks be
	-- unstressed.
	local words = strsplit(text, " ")
	for i, word in ipairs(words) do
		if no_acentuado[word] then
			words[i] = strsubn(word, "^(.*" .. VOCAL .. ")", "%1" .. circunflejo) --pongo el circunflejo para indicar que no debe haber acentuación
		end
	end
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = concat(words, " ")
	text = strsubn(text, " | ", "# | #")
	text = "##" .. strsubn(text, " ", "# #") .. "##"

	--determinar el sonido apropiado para la y
	-- Nota: sufijos -ay/-ey/-oy/-uy se acentúan, pero no -ai/-ei/-oi/-ui
	-- Nota: -uy mapea a /uj/ mientras que -ui mapea a /wi/
	
	text = strsubb(text, "y(" .. VOCAL .. ")", comodin_yeista.."%1") -- ɟ -> comodín del yeísmo
	
	--Esto no es conveniente hacerlo
	--text = strsubn(text, "([aeou])y#", "%1" .. TEMP_Y .. "#") -- pongo marca privada por el momento
	--text = strsubn(text, "y", "i")

	--Necesitamos procesar sh/ch justo acá (para no hacer lío con la x exhausto (??))
	text = strsubn(text, "ch", comodin_chancho) -- otro comodín más
	text = strsubn(text, "#desh", comodin_deshielo) --comodín para preservar 'desh' (deshuesar, etc.)
	text = strsubn(text, "sh", "ʃ") --reemplazo sh
	text = strsubn(text, "zh", "ʒ") --reemplazo zh
	text = strsubn(text, comodin_deshielo, "#desh") --restauro desh
	text = strsubn(text, "#p([st])", "#%1") -- Saco la p inicial de psicología o pterodáctilo
	text = strsubn(text, "#[mg]n", "#n") -- Saco la g inicial de gnoseología y la m de mnemónico
	
	--tl al final de la palabra
	text = strsubn(text, "tl#", "t#")

	--x
	-- text = strsubn(text, "#x", "#s") -- reemplazo la x inicial: xenofobia, xilófono, etc., QUE PASA CON ESTO ??
	text = strsubn(text, "x", "ʃ") --> DIFERENCIA FUNDAMENTAL: cómo mapear la x

	--c, g, q
	text = strsubb(text, "c([ie])",  "θ"  .. "%1") -- Busco si hay ceceos y los sustituyo. ¿Por qué habían puesto la z?
	text = strsubn(text, "g([iey])", "x%1") -- must happen after handling of x above
	text = strsubn(text, "gu([ie])", "g%1")
	text = strsubn(text, "gü([ie])", "gu%1")
	text = strsubn(text, "ng([^aeiouüwhlr#])", "n%1") -- [[Bangkok]], [[ángstrom]], [[electroswing]], no aplica para [[branding]] (hay que acentuarla manualmente)
	text = strsubn(text, "qu([ie])", "k%1")
	text = strsubn(text, "ü", "u") -- [[Düsseldorf]], [[hübnerita]], obsolete [[freqüentemente]], etc.
	text = strsubn(text, "q", "k") -- [[quark]], [[Qatar]], [[burqa]], [[Iraq]], etc.

	-- map various consonants to their phoneme equivalent
	text = strsubn(text, "[cjñrv]", {["c"]="k", ["j"]="x", ["ñ"]="ɲ", ["r"]="ɾ", ["v"]="b" })

	-- ([[hielo]], [[enhiesto]], [[deshielo]], ...)
	local word_initial_hi, syl_initial_hi, initial_hi
	text, word_initial_hi = strsubb(text, "#h?[iy](" .. VOCAL .. ")", "#ʝ%1") -- [[hiena]], [[hieráticu]], diferencia con el español ?
	text, syl_initial_hi = strsubb(text, "(" .. CONS .. SEPARADORES_SILABICOS .. "*)h[iy](" .. VOCAL .. ")", "%1j%2")
	initial_hi = word_initial_hi or syl_initial_hi
	--  ([[huevo]], [[deshuesar]])
	text = strsubb(text, "(" .. CONS_O_SEP_PALABRA .. SEPARADORES_SILABICOS .. "*)hu(" .. VOCAL .. ")", "%1" .. TEMP_W .. "%2")

	
	-- ll
	text = strsubn(text, "ll#", "l#") -- [[krill]]
	text = strsubn(text, "ll", "ʎ")

	--Sustituyo rr y lr por r ¿no falta buscar el separador silábico en lr y sr?
	text = strsubn(text, "ɾɾ", "r")
	text = strsubn(text, "([#lnsθ])ɾ", "%1r") --([[alrededor]], [[malrotar]]), nr ([[enriquecer]], [[sonrisa]], etc.), sr ([[Israel]], [[desregular]], etc.), zr ([[Azrael]], [[cruzrojista]]), rr
	text = strsubn(text, "nn", "N") --doble n (ennoblecer) ¿por qué una N grande en lugar de meter un separador silábico?
	text = strsubn(text, "bb", "B") --doble b (subbase) ¿por qué una B grande en lugar de meter un separador silábico?

	text = strsubn(text, "(" .. CONS .. ")%1", "%1") -- elimino consonantes dobles [[Addis Abeba]], [[cappa]], [[descender]], [[crackear]]
	text = strsubn(text, "sθ", "θ") -- elimino sz como en [[fascinante]]

	-- restablezco las consonantes dobles, MUY OSCURO ESTO
	text = strsubn(text, "N", "nn")
	text = strsubn(text, "B", "bb")

	--sustitución de oclusivas (T) cuando se juntan con ptk --> esta parte no tiene sentido
	--local voice_stop = { ["p"] = "b", ["t"] = "d", ["k"] = "g" }
	--text = strsubn(text, "t(" .. SEPARADOR .. "*[sθ])", "!%1") -- eximir -ts-, -tz-
	--text = strsubn(text, "([ptk])(" .. SEPARADOR .. "*" .. T .. ")", function(stop, after) return voice_stop[stop] .. after end)
	--text = strsubn(text, "!", "t") -- recuperar -ts-, -tz-

	text = strsubn(text, "n([# .]*[bpm])", "m%1") --nb, bp, nm por mb, mp, mm (ejemplo: enviar, inmoral, etc.)

	text = strsubn(text, "h", "") --sacar la h muda

	-- i and u between vowels -> consonant-like substitutions: [[paranoia]], [[baiano]], [[abreuense]], [[alauita]],
	-- [[Malaui]], etc.; also with h, as in [[marihuana]], [[parihuela]], [[antihielo]], [[pelluhuano]], [[náhuatl]],
	-- etc. Add .* at the beginning so we go right-to-left, in the case of [[hawaiiano]] -> ha.wai.iano.
	text = strsubrep(text, "(.*" .. VOCAL .. DIACRITICO .. "*h?)([iu])(" .. VOCAL .. ")",
		function (v1, iu, v2) return v1 .. vowel_to_glide[iu] .. v2 end
	)
	
	-- switch -> swich
	text = strsubn(text, "t".."("..SEPARADORES_SILABICOS.."?"..comodin_chancho..")", "%1")
	
	words = strsplit(text, "[#(%s)]+")
	local ac_words = {}
	for _,word in ipairs(words) do
		if #word > 0 then
			if strfind(word, "|") then
				insert(ac_words, word)
			else
				insert(ac_words, reemplazar_tildes(separar_en_silabas_final(word)))
			end
		end
	end
	
	text = concat(ac_words, " ")
	text = strsubn(text, " | ", "# | #")
	text = "##" .. strsubn(text, " ", "# #") .. "##"

	--diphthongs; do not include TEMP_Y here
	text = strsubn(text, "u"..comodin_yeista.."iɾ", "wir") -- el -uyir antihiático
	text = strsubn(text, "i([aeou])", "j%1")
	text = strsubn(text, "u([aeo])", "w%1")
	text = strsubn(text, "ui", "uj") -- caso aparte que no se da en el español
	text = strsubn(text, "([aeiou])y", "%1j")
	--text = strsubn(text, TEMP_Y, "i") -- -ay/-ey/-oy/-uy
	text = strsubn(text, "([aeiou])i#", "%1j#") -- -ai/-ei/-oi/-ui
	text = strsubn(text, "i i#", "ij#")
	
	-- simplifico vocales dobles (propio del asturiano)
	text = strsubn(text, "("..VOCAL..")%1", "%1")
	
	-- z
	text = strsubb(text, "z", "θ")

	-- suppress syllable mark before IPA stress indicator
	text = strsubn(text, "%.(" .. ACENTOS_IPA .. ")", "%1")
	--make all primary stresses but the last one be secondary
	text = strsubrep(text, ac_primario.."(.+)"..ac_primario, ac_secundario.."%1"..ac_primario)
	
	-- g
	text = strsubn(text, "g", "ɣ")
	
	-- grafías dialectales
	text = strsubn(text, "ḷḷ", "ʈ͡ʂ")
	text = strsubn(text, "ḥ", "h")

	--phonetic transcription --> más fino, lo que se escribe entre [] corchetes
	if phonetic then
		local voiced = "mnɲbd"..comodin_yeista.."gʎ" .. TEMP_W
		local r_cluster = "ɾr"
		local tovoiced = {
			["θ"] = "θ̬",
			--["s"] = "z",
			--["f"] = "v",
		}
		local function voice(sound, following)
			return tovoiced[sound] .. following
		end

		-- aproximantes: convierto 
		local stop_to_fricative = {["b"] = "β", ["d"] = "ð"}
		local fricative_to_stop = {["β"] = "b", ["ð"] = "d"}
		text = strsubn(text, "[bd]", stop_to_fricative) --convierto todo a fricativa
		
		-- Excepciones a fricativas: consonante inicial, luego de m, n y luego de l-m-n(g) para la d
		text = strsubn(text, "([mnɲ]" .. SEPARADOR .. "*)([β])", --salvo las precedidas por m,n,ng (convierto de nuevo)
			function(nasal, fricative) return nasal .. fricative_to_stop[fricative] end
		)
		text = strsubn(text, "([lʎmnɲ]" .. SEPARADOR .. "*)([ð])",  --salvo las precedidas por lm,ln,lng, (convierto de nuevo)
			function(nasal_l, fricative) return nasal_l .. fricative_to_stop[fricative] end
		)
		text = strsubn(text, "(##" .. ACENTOS_IPA .. "*)([βð])", --salvo las sílabas iniciales (convierto de nuevo)
			function(stress, fricative) return stress .. fricative_to_stop[fricative] end
		)
		
		text = strsubn(text, "([ae])k("..SEPARADOR..")θ", "%1i%2θ") -- [[redacción]] ~ «redaición» (acc/ecc -> aic/eic)
		text = strsubn(text, "([ae])k("..SEPARADOR..")t", "%1u%2t") -- [[redactor]] ~ «redautor» (act/ect -> aut/eut)
		text = strsubn(text, "[βðkɣp]("..SEPARADOR..")([^"..vocales.."βðkɣp".."])", "%2%1%2") -- geminación para consonantes al final de silaba: absolutu -> assolutu
		text = strsubn(text, "n#", "ŋ#") -- velarización de la n final
		
		text = strsubn(text, "[td]", {["t"] = "t̪", ["d"] = "d̪"}) --dentalización

		text = strsubn(text, "([θ])(" .. SEPARADOR .. "*[" .. voiced .. r_cluster .. "])", voice)

		-- nasal assimilation before consonants
		local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =	"ɱ", "n̪", "n̟", "nʲ", "ɲ", "ŋ"
		local nasal_assimilation = {
			["f"] = labiodental,
			["t"] = dentialveolar, ["d"] = dentialveolar,
			["θ"] = dental,
			[comodin_chancho] = alveolopalatal,	["ʃ"] = alveolopalatal,	["ʒ"] = alveolopalatal,
			[comodin_yeista] = palatal, ["ʎ"] = palatal,
			["k"] = velar, ["x"] = velar, ["g"] = velar,
		}
		text = strsubn(text, "n(" .. SEPARADOR .. "*)(.)", function(stress, following) return (nasal_assimilation[following] or "n") .. stress .. following end)

		-- lateral assimilation before consonants
		text = strsubn(text, "l(" .. SEPARADOR .. "*)(.)",
			function(stress, following)
				local l = "l"
				if following == "t" or following == "d" then -- dentialveolar
					l = "l̪"
				elseif following == "θ" then -- dental
					l = "l̟"
				elseif following == comodin_chancho or following == "ʃ" then -- alveolopalatal
					l = "lʲ"
				end
				return l .. stress .. following
			end)

		--semivowels
		text = strsubn(text, "([aeouãẽõũ][iĩ])", "%1̯")
		text = strsubn(text, "([aeioãẽĩõ][uũ])", "%1̯")

		-- voiced fricatives are actually approximants
		text = strsubn(text, "([βðɣ])", "%1̞")
	end

	-- final conversions
	text = strsubn(text, COMODINES, recuperar_comodin)
	text = strsubn(text, divsil, ".")
	text = strnfc(text)
	
	return {{"pronunciación"}}, {{text}}
end

-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
	local tit = titulo
	local vino_ayuda, ss_
	
	if #args["ayuda"] < 1 then
		args["ayuda"][1] = tit
	else
		vino_ayuda = true
	end

	if #args["fone"] < 1 and #args["fono"] < 1 then
		if #titulo == 1 then
			if titulo >= "a" and titulo <= "z" then
				args["ayuda"] = pron_abc[string.byte(titulo) - 96]
				args["tl"] = args["ayuda"]
			elseif titulo >= "A" and titulo <= "Z" then
				args["ayuda"] = pron_abc[string.byte(titulo) - 64]
				args["tl"] = args["ayuda"]
			end
		elseif titulo == "ñ" or titulo == "Ñ" then
			args["ayuda"] = pron_abc[27]
			args["tl"] = args["ayuda"]
		end
		local A = #args["ayuda"]
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local pron, fone = generar_pron(args["ayuda"][j], true)
			for i,_ in ipairs(pron) do
				insert(args["pron"], pron[i])
				insert(args["fone"], fone[i])
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end
	end
	
	if not strfind(args["ayuda"][1], VOCAL_GENERAL) then
		return args	
	end
	
	if not args["rima"][1] then
		local palabras = strsplit(args["ayuda"][1], "%s+")
		local rims = {}
		local _, fono = generar_pron(palabras[#palabras], false)
		for _,f in ipairs(fono) do
			local rim = f[1]
			rim = strsub(rim, "^.*"..ac_primario.."(.-)$", "%1")
			rim = strsub(rim, ".-".."("..VOCAL_GENERAL..".*"..")".."$", "%1")
			if rim and rim ~= "" then
				rims[rim] = true
			end
		end
		for rim,v in pairs(rims) do
			insert(args["rima"], rim)
		end
	end
	
	if strfind(args["ayuda"][1], "%s") then
		return args
	end
	
	if #args["d"] < 1 and #titulo > 1 and titulo ~= "ñ" and titulo ~= "Ñ" then
		if not args["ls"][1] and not args["ac"][1] and #titulo > 1 then
			local ls = {}
			local ac = {}
			for _,a in ipairs(args["ayuda"]) do
				ss_ = separar_en_silabas(a)
				local lon, ace = obtener_informacion(ss_)
				ls[lon] = true
				ac[ace] = true
			end
			for lon,_ in pairs(ls) do
				insert(args["ls"], lon)
			end
			for ace,_ in pairs(ac) do
				insert(args["ac"], ace)
			end
		end
		
		-- obtenida la “información” de la palabra en base a la “ayuda”, determino cómo mostrar la grafía original
		if not vino_ayuda then -- situación normal
			args["d"][1] = separar_en_silabas(args["ayuda"][1]) -- división en sílabas
		else -- palabra extranjera, vino con ayuda --> veo cómo quedaría mejor
			local tit_sombra = quitar_diacriticos(tit)
			local ss_ayuda = quitar_diacriticos(ss_)
			local ss = tit
			local j0 = 0 -- desde dónde debería buscar la próxima coincidencia de letras
			
			while true do
				local i = strfind(ss_ayuda, "[a-zA-ZÀ-ž]%-[a-zA-ZÀ-ž]")
				if not i or i < 1 then
					break
				end
				local a, b = substr(ss_ayuda, i, i), substr(ss_ayuda, i+2, i+2)
				local ab = a..b 
				local j = strfind(tit_sombra, ab, j0) -- veo si encuentro la separación
				if j then -- Si aparece en el título, entonces inserto separador ahí miemo
					tit_sombra = substr(tit_sombra, 1, j).."-"..substr(tit_sombra, j+1)
					ss = substr(ss, 1, j).."-"..substr(ss, j+1)
					j0 = j+2 -- incremento dos: por la primera letra y por el guion
				else --Sino, veo si al menos coincide con una de las dos letras
					local k = strfind(tit_sombra, a, j0) or math.huge
					local l = strfind(tit_sombra, b, j0) or math.huge
					if k < l and k >= 1 then
						tit_sombra = substr(tit_sombra, 1, k).."-"..substr(tit_sombra, k+1)
						ss = substr(ss, 1, k).."-"..substr(ss, k+1)
						j0 = k+2
					elseif l < k and l >= 2 then
						tit_sombra = substr(tit_sombra, 1, l-1).."-"..substr(tit_sombra, l)
						ss = substr(ss, 1, l-1).."-"..substr(ss, l)
						j0 = l+1 -- en este caso se agrega sólo el guion porque la letra considerada es la segunda
					end
				end
				ss_ayuda = substr(ss_ayuda, i+2) -- trunco la ayuda para obtener la siguiente separación
			end
			
			--junto sílabas que sólo tengan consonantes
			ss = "-"..ss.."-"
			ss = strsubrep(ss, "("..SEPARADORES_SILABICOS..CONS.."+)"..SEPARADORES_SILABICOS.."("..SALVO_SEPARADORES_SILABICOS.."-"..VOCAL_GENERAL..")", "%1%2")
			ss = strsubrep(ss, "("..SEPARADORES_SILABICOS..SALVO_SEPARADORES_SILABICOS.."-)"..SEPARADORES_SILABICOS.."("..CONS.."+"..SEPARADORES_SILABICOS..")", "%1%2")
			
			-- limpio los separadores de más
			ss = strsubn(ss, SEPARADORES_SILABICOS .. "+", "-")
			ss = strstrip(ss, SEPARADOR)
			
			args["d"][1] = ss
		end
	end
	
	return args
end

return export