Módulo:String/sustituir
La documentación para este módulo puede ser creada en Módulo:String/sustituir/doc
local insert = table.insert
local patterns_ = {
"((</?link>))\0", -- Special link formatting added by [[Module:links]]
"((<[^<>\1\2]+>))", -- HTML tag
"((\1[Ff][Ii][Ll][Ee]:[^\1\2]+\2))\0", -- File
"((\1[Ii][Mm][Aa][Gg][Ee]:[^\1\2]+\2))\0", -- Image
"((\1[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy]:[^\1\2]+\2))\0", -- Category
"((\1[Cc][Aa][Tt]:[^\1\2]+\2))\0", -- Category
"((\1)[^\1\2|]+(\2))\0", -- Bare internal link
"((\1)[^\1\2|]-(|)[^\1\2]-(\2))\0", -- Piped internal link
"((%[https?://[^[%] ]+)[^[%]]*(%]))\0", -- External link
"((\127'\"`UNIQ%-%-%l+%-%x+%-+QINU`\"'\127))", -- Strip marker
"('*(''').-'*('''))", -- Bold
"('*('').-'*(''))" -- Italics
}
local function table_icopy(t)
local t2 = {}
for i,v in ipairs(t) do
t2[i] = v
end
return t2
end
local escapar = require("Módulo:String/escapar")
-- Temporarily convert various formatting characters to PUA to prevent them from being disrupted by the substitution process.
local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim)
local patterns = table_icopy(patterns_)
if keepCarets then
insert(patterns, "((\\+)%^)")
insert(patterns, "((%^))")
end
-- Ensure any whitespace at the beginning and end is temp substituted, to prevent it from being accidentally trimmed. We only want to trim any final spaces added during the substitution process (e.g. by a module), which means we only do this during the first round of temp substitutions.
if not noTrim then
insert(patterns, "^([\128-\191\244]*(%s+))")
insert(patterns, "((%s+)[\128-\191\244]*)$")
end
-- Pre-substitution, of "[[" and "]]", which makes pattern matching more accurate.
text = text
:gsub("%f[%[]%[%[", "\1")
:gsub("%f[%]]%]%]", "\2")
local i = #subbedChars
for j, pattern in ipairs(patterns) do
-- Patterns ending in \0 stand are for things like "[[" or "]]"), so the inserted PUA are treated as breaks between terms by modules that scrape info from pages.
local term_divider
pattern = pattern:gsub("%z$", function(divider)
term_divider = divider == "\0"
return ""
end)
text = text:gsub(pattern, function(...)
local m = {...}
local m1New = m[1]
for k = 2, #m do
local n = i + k - 1
subbedChars[n] = m[k]
local byte2 = math.floor(n / 4096) % 64 + (term_divider and 128 or 136)
local byte3 = math.floor(n / 64) % 64 + 128
local byte4 = n % 64 + 128
m1New = m1New:gsub(escapar(m[k]), "\244" .. string.char(byte2) .. string.char(byte3) .. string.char(byte4), 1)
end
i = i + #m - 1
return m1New
end)
end
text = text
:gsub("\1", "%[%[")
:gsub("\2", "%]%]")
return text, subbedChars
end
-- Reinsert any formatting that was temporarily substituted.
local function undoTempSubstitutions(text, subbedChars)
for i = 1, #subbedChars do
local byte2 = math.floor(i / 4096) % 64 + 128
local byte3 = math.floor(i / 64) % 64 + 128
local byte4 = i % 64 + 128
text = text:gsub("\244[" .. string.char(byte2) .. string.char(byte2+8) .. "]" .. string.char(byte3) .. string.char(byte4), escapar(subbedChars[i]))
end
text = text
:gsub("\1", "%[%[")
:gsub("\2", "%]%]")
return text
end
-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them.
return function (text, subbedChars, keepCarets, cod, idioma, sc, substitution_data, function_name)
local fail, cats, sections = nil, {}, nil
-- See [[Module:languages/data]].
if not text:match("\244") or require("Módulo:lenguas/idiomas/puntuacion").contiguous_substitution[cod] then
sections = {text}
else
sections = mw.text.split(text, "[-]")
end
for i, section in ipairs(sections) do
-- Don't bother processing empty strings or whitespace (which may also not be handled well by dedicated modules).
if section:gsub("%s", "") ~= "" then
local sub, sub_fail, sub_cats = require("Módulo:String/sustituir_rec")(section, cod, idioma, sc, substitution_data, function_name)
-- Second round of temporary substitutions, in case any formatting was added by the main substitution process. However, don't do this if the section contains formatting already (as it would have had to have been escaped to reach this stage, and therefore should be given as raw text).
if sub and subbedChars then
local noSub
for _, pattern in ipairs(patterns_) do
if section:match(pattern .. "%z?") then
noSub = true
end
end
if not noSub then
sub, subbedChars = doTempSubstitutions(sub, subbedChars, keepCarets, true)
end
end
if (not sub) or sub_fail then
text = sub
fail = sub_fail
cats = sub_cats or {}
break
end
text = sub and text:gsub(escapar(section), escapar(sub), 1) or text
if type(sub_cats) == "table" then
for _, cat in ipairs(sub_cats) do
insert(cats, cat)
end
end
end
end
-- Trim, unless there are only spacing characters, while ignoring any final formatting characters.
text = text and text
:gsub("^([\128-\191\244]*)%s+(%S)", "%1%2")
:gsub("(%S)%s+([\128-\191\244]*)$", "%1%2")
-- Remove duplicate categories.
if #cats > 1 then
cats = require("Módulo:tabla").removeDuplicates(cats)
end
return text, fail, cats, subbedChars
end