Usuario:Alhen/MediaWiki:ExtractFirst.xsl
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:output method='html'/> <xsl:variable name="dir">ltr</xsl:variable> <xsl:variable name="more">» Más</xsl:variable> <xsl:variable name="error">Error: </xsl:variable> <xsl:variable name="copyright"> © <a href="http://es.wiktionary.org/wiki/">Wikcionario</a>. Released under <a href="http://creativecommons.org/licenses/by-sa/3.0/deed.es" rel="license copyright">CC-BY-SA 3.0</a></xsl:variable> <xsl:variable name="contentLang" select="'es'"/>
<xsl:variable name="pageName" select="concat('http://', $contentLang, '.wiktionary.org')"/> <xsl:template match="/">
<html dir="{$dir}" lang="{$contentLang}" xml:lang="{$contentLang}"> <head> <meta name="generator" content="Wiktionary Extract XSLT 1.08-EN"/> <base target='_blank' href="{$pageName}" />
<title> Wiktionary extract</title>
<style> #wordThisIsFor { font-weight:bold;} a.wtif1 { color: black; text-decoration: none;} a.wtif1:hover {text-decoration: underline;} .disambig-see-also, .disambig-see-also-2 {display:inline;} #container {background-color:white; padding: 0.5em; border: solid black thin;} a.new {color: red;} #error {color: red;font-size:larger;} </style> <script type='text/javascript'> /*<![CDATA[*/ function setup () { //Stuff to translate: var preferLang = {'es': 'Español', 'en': 'Inglés', 'fr': 'Francés', 'de': 'Alemán', 'pt': 'Portugués', 'qqqAny': null}; //for now.
var extractSeeAlso = /<div\sclass="disambig-see-also"\s?>\s?
\s?<a [^>]*>\s*<img [^>]*>\s?<\/a>\s?<\/div>\s*<\/div>[\s\S]*?
\s([\s\S]*?)<\/div>/;\\s*\\s*(?:<a [^>]*>)?\s?' + preferLang[preferLangCode] + '(?:<\\/a>)?\s?<\\/b>[\\s\\S]*$');
var extractCurLangName = /
\s([\s\S]*?)<\/div>/;
var see_also_process = function (sa) { return sa[1];} var createLink = '» Crear'; // text only. var not_found = "No hay resultados por $1.";
//END stuff to translate (there is one more translation block below)
//Stuff not to translate in general (setup).
var rd = location.search.match(/\&rd\=([^&]*)/); //is this from redirect. + converts to numeric. rd = rd ? (+rd[1] + 1) : 1; //redirection level. var showWord = 0; //default to not showing. 0 = none, 1 = bold, 2 = bold link. var showWordRaw = location.search.match(/\&showWord\=([^&]*)/); showWordRaw = showWordRaw ? showWordRaw[1] : 'none'; if (showWordRaw !== "none") { showWord++; } if (showWordRaw === "link") { showWord++; } var numbDfn = location.search.match(/\&count\=([^&]*)/); //count. + converts to numeric. numbDfn = numbDfn ? (parseInt(numbDfn[1])) : 1; //default to 1 var pageURL = '/w/index.php?title=' +location.search.match(/\&page\=([^&]*)/)[1]; var src = document.getElementById('src'); var display = document.getElementById('word-list'); var loc = location.search.match(/\&page\=([^&]*)/)[1]; var escWord = decodeURIComponent(loc).replace(/&/, '&').replace(/>/, '<').replace(/</, '>'); //note: wordEsc does not escape quotes. DO NOT PUT AS ATTRIBUTE VALUE var preferLangCode = location.search.match(/\&lang\=([^&]*)/); if (preferLangCode) { preferLangCode = preferLangCode[1]; } else {preferLangCode = 'qqqAny';} src.normalize(); var html = src.firstChild.data; var def = html //may be redefined later.
//stuff you might need to translate, but hopefully won'tvar subSectRegex = new RegExp('
\\s*\\s*(?:<a [^>]*>)?\s?' + preferLang[preferLangCode] + '(?:<\\/a>)?\s?<\\/b>[\\s\\S]*$');
var extractCurLangName = /\s*\s*(?:<a [^>]*>)?(\S*?)(?:<\/a>)?\s?<\/b>/; //first subexpression
//End stuff you hopefully won't need to translate.
try {
//this assumes attribute order doesn't change!!!
html = html.replace(/[\s\S]*?<\/div>/, );
if (preferLangCode && preferLang[preferLangCode]) {
try {
//strip off all definitions before tagret lang.
var subSect = html.match(subSectRegex)[0];
if (subSect.match(/\s*- \d\d?<\/dt>\s*
- (?:[\s\S]*?)\s(?:<\/dd>|
|
)/)) {
//if it has content
def = subSect;
}
} catch (e) { /*alert(e)*/}
}
var lang = def.match(extractCurLangName)[1];
var intro = "(" + lang + ") ";
if (showWord) intro = '<a href="' + pageURL + '" id="wordThisIsFor" class="wtif' + showWord + '" >' + escWord + "</a> " + intro ;
var definitions_matched;
//FIXME: in both cases the extraction method does not properly strip nested divs. This results in image thumbnails being left behind
if (numbDfn === 1) {
definitions_matched = def.match(/\s*- \d\d?<\/dt>\s*
- ([\s\S]*?)\s?(?:<\/dd>|
|
)/)[1].replace(/[\s\S]*?<\/dl>/g, ).replace(/<div[^>]*>[\s\S]*?<\/div>/g, ).replace(/<\/div>/g, ).replace(/[\s\S]*?<\/ul>/g, ).replace(/
[\s\S]*?<\/ol>/g, ).replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">');
display.innerHTML = intro + definitions_matched;
} else {
//this use not well supported...
var tmp = intro + '
';
for (var i = 0; i < numbDfn; i++) {
try {
definitions_matched = def.match(/\s*- \d\d?<\/dt>\s*
- ([\s\S]*?)\s?(?:<\/dd>|
|
)/)[1].replace(/[\s\S]*?<\/dl>/g, ).replace(/<div[^>]*>[\s\S]*?<\/div>/g, ).replace(/<\/div>/g, ).replace(/[\s\S]*?<\/ul>/g, ).replace(/
[\s\S]*?<\/ol>/g, ).replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">');
} catch (e) {
break; //this is ugly
}
def = def.replace(/\s*- \d\d?<\/dt>\s*
- ([\s\S]*?)\s(?:<\/dd>|
|
)[\s\S]*?<\/dl>/, ); //to move to next one.
tmp += "- " + definitions_matched + '
';
}
display.innerHTML = tmp + '
';
}
}
catch (e) {
//alert(e)
//page does not exist, not well formed, these regexs suck, etc
display.appendChild(document.createTextNode(not_found.replace("$1", decodeURIComponent(loc))));
document.getElementById('more-link').firstChild.data = createLink;
if (rd < 9) { //arbitrary to prevent infinite loops
//make sure don't have loops.
var newLoc; //this should not be urlEncoded.
var remAlt = false;
var dLoc = decodeURIComponent(loc);
newLoc = dLoc.charAt(0).toLowerCase() + dLoc.substring(1, loc.length);
//try some other redirections.
if (newLoc === dLoc) newLoc = dLoc.toLowerCase();
if (newLoc === dLoc && location.search.match(/\&alt\=([^&]*)/)) {
newLoc = decodeURIComponent(location.search.match(/\&alt\=([^&]*)/)[1]);
remAlt = true;
}
if (newLoc !== dLoc) { //redir
var newURL = location.href.replace(/(^[\s\S]*?\&page\=)[^&]*([\s\S]*$)/, '$1'+ encodeURIComponent(newLoc) + '$2');
newURL = newURL.replace(/&rd\=[^&]*/, ); //strip old redirect header.
if (remAlt) {
location.href.replace(/&alt\=[^&]*/, );
}
location = newURL + '&rd=' + rd;
}
}
}
var sa = html.match(extractSeeAlso);
if(sa) {
document.getElementById('see-also').innerHTML = ' (' + see_also_process(sa) + ')' ;
}
document.getElementById('more-link').href= pageURL;
}
/*]]>*/
</script>
</head>
<body onload='setup()'>
<xsl:apply-templates select='api/error'/>
<a id='more-link'><xsl:value-of select="$more"/></a> <xsl:copy-of select="$copyright"/>
<xsl:value-of select='api/parse/text'/>
</body>
</html>
</xsl:template>
<xsl:template match='api/error'>
<xsl:value-of select="$error"/> <xsl:value-of select='@info'/>
</xsl:template>
</xsl:stylesheet>
[\s\S]*?<\/div>/, );
if (preferLangCode && preferLang[preferLangCode]) { try { //strip off all definitions before tagret lang. var subSect = html.match(subSectRegex)[0];if (subSect.match(/
- \s*
- \d\d?<\/dt>\s*
- (?:[\s\S]*?)\s(?:<\/dd>|
- |
- )/)) {
//if it has content
def = subSect;
}
} catch (e) { /*alert(e)*/}
}
var lang = def.match(extractCurLangName)[1];
var intro = "(" + lang + ") ";
if (showWord) intro = '<a href="' + pageURL + '" id="wordThisIsFor" class="wtif' + showWord + '" >' + escWord + "</a> " + intro ;
var definitions_matched;
//FIXME: in both cases the extraction method does not properly strip nested divs. This results in image thumbnails being left behind
if (numbDfn === 1) {
definitions_matched = def.match(/
- \d\d?<\/dt>\s*
- ([\s\S]*?)\s?(?:<\/dd>|
- |
- )/)[1].replace(/
- \d\d?<\/dt>\s*
- ([\s\S]*?)\s?(?:<\/dd>|
- |
- )/)[1].replace(/
- \d\d?<\/dt>\s*
- ([\s\S]*?)\s(?:<\/dd>|
- |
- )[\s\S]*?<\/dl>/, ); //to move to next one.
tmp += "
- " + definitions_matched + ' '; } display.innerHTML = tmp + '
- [\s\S]*?<\/dl>/g, ).replace(/<div[^>]*>[\s\S]*?<\/div>/g, ).replace(/<\/div>/g, ).replace(/
- [\s\S]*?<\/ul>/g, ).replace(/
- [\s\S]*?<\/ol>/g, ).replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">');
} catch (e) {
break; //this is ugly
}
def = def.replace(/
- \s*
- [\s\S]*?<\/dl>/g, ).replace(/<div[^>]*>[\s\S]*?<\/div>/g, ).replace(/<\/div>/g, ).replace(/
- [\s\S]*?<\/ul>/g, ).replace(/
- [\s\S]*?<\/ol>/g, ).replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">');
display.innerHTML = intro + definitions_matched;
} else {
//this use not well supported...
var tmp = intro + '
- ';
for (var i = 0; i < numbDfn; i++) {
try {
definitions_matched = def.match(/
- \s*
} } catch (e) { //alert(e) //page does not exist, not well formed, these regexs suck, etc
display.appendChild(document.createTextNode(not_found.replace("$1", decodeURIComponent(loc)))); document.getElementById('more-link').firstChild.data = createLink; if (rd < 9) { //arbitrary to prevent infinite loops //make sure don't have loops. var newLoc; //this should not be urlEncoded. var remAlt = false; var dLoc = decodeURIComponent(loc);
newLoc = dLoc.charAt(0).toLowerCase() + dLoc.substring(1, loc.length); //try some other redirections.
if (newLoc === dLoc) newLoc = dLoc.toLowerCase(); if (newLoc === dLoc && location.search.match(/\&alt\=([^&]*)/)) { newLoc = decodeURIComponent(location.search.match(/\&alt\=([^&]*)/)[1]); remAlt = true; } if (newLoc !== dLoc) { //redir var newURL = location.href.replace(/(^[\s\S]*?\&page\=)[^&]*([\s\S]*$)/, '$1'+ encodeURIComponent(newLoc) + '$2'); newURL = newURL.replace(/&rd\=[^&]*/, ); //strip old redirect header. if (remAlt) { location.href.replace(/&alt\=[^&]*/, ); } location = newURL + '&rd=' + rd; } } } var sa = html.match(extractSeeAlso); if(sa) { document.getElementById('see-also').innerHTML = ' (' + see_also_process(sa) + ')' ; } document.getElementById('more-link').href= pageURL;
}
/*]]>*/ </script>
</head>
<body onload='setup()'>
<xsl:apply-templates select='api/error'/><a id='more-link'><xsl:value-of select="$more"/></a> <xsl:copy-of select="$copyright"/><xsl:value-of select='api/parse/text'/>
</body> </html>
</xsl:template> <xsl:template match='api/error'> <xsl:value-of select="$error"/> <xsl:value-of select='@info'/> </xsl:template>
</xsl:stylesheet>
- \s*