انتقل إلى المحتوى

مستخدم:ياسين/AutoEd/unicodify.js

من ويكيبيديا، الموسوعة الحرة

ملاحظة: بعد الحفظ، قد يلزمك إفراغ الكاش لرؤية التغييرات (إفراغ الكاش الآن).

//<source lang=javascript>
function autoEdUnicodify(str) { //MAIN FUNCTION describes list of fixes
 
 // Task 1: Replace named html entities with unicode
 
 // Most common replacements
 str = str.replace(/&mdash;/gi, '—');
 str = str.replace(/&ndash;/gi, '–');
 
 // Case insensitive symbols
 if(str.search(/&[a-z][a-z]+[0-9]*;/i) >= 0) {
  //XML and HTML Symbols
  str = str.replace(/&hellip;/gi, '...');
  str = str.replace(/&plus;/gi, '+');
  str = str.replace(/&plusmn;/gi, '±');
  str = str.replace(/&minus;/gi, '−');
  str = str.replace(/&times;/gi, '×');
  str = str.replace(/&divide;/gi, '÷');
  str = str.replace(/&ne;/gi, '≠');
  str = str.replace(/&asymp;/gi, '≈');
  str = str.replace(/&le;/gi, '≤');
  str = str.replace(/&ge;/gi, '≥');
  str = str.replace(/&quot;/gi, '"'); // "
  str = str.replace(/&apos;/gi, "'"); // '
  str = str.replace(/&iexcl;/gi, '¡');
  str = str.replace(/&cent;/gi, '¢');
  str = str.replace(/&pound;/gi, '£');
  str = str.replace(/&curren;/gi, '¤');
  str = str.replace(/&yen;/gi, '¥');
  str = str.replace(/&brvbar;/gi, '¦');
  str = str.replace(/&sect;/gi, '§');
  str = str.replace(/&uml;/gi, '¨');
  str = str.replace(/&copy;/gi, '©');
  str = str.replace(/&ordf;/gi, 'ª');
  str = str.replace(/&laquo;/gi, '«');
  str = str.replace(/&not;/gi, '¬');
  str = str.replace(/&reg;/gi, '®');
  str = str.replace(/&macr;/gi, '¯');
  str = str.replace(/&deg;/gi, '°');
  str = str.replace(/&sup2;/gi, '²');
  str = str.replace(/&sup3;/gi, '³');
  str = str.replace(/&acute;/gi, '´');
  str = str.replace(/&micro;/gi, 'µ');
  str = str.replace(/&para;/gi, '¶');
  str = str.replace(/&middot;/gi, '·');
  str = str.replace(/&cedil;/gi, '¸');
  str = str.replace(/&sup1;/gi, '¹');
  str = str.replace(/&ordm;/gi, 'º');
  str = str.replace(/&raquo;/gi, '»');
  str = str.replace(/&frac14;/gi, '¼');
  str = str.replace(/&frac12;/gi, '½');
  str = str.replace(/&frac34;/gi, '¾');
  str = str.replace(/&iquest;/gi, '¿');
  str = str.replace(/&circ;/gi, 'ˆ');
  str = str.replace(/&tilde;/gi, '˜');
  str = str.replace(/&lsquo;/gi, '‘');
  str = str.replace(/&rsquo;/gi, '’');
  str = str.replace(/&sbquo;/gi, '‚');
  str = str.replace(/&ldquo;/gi, '“');
  str = str.replace(/&rdquo;/gi, '”');
  str = str.replace(/&bdquo;/gi, '„');
  str = str.replace(/&bull;/gi, '•');
  str = str.replace(/&permil;/gi, '‰');
  str = str.replace(/&lsaquo;/gi, '‹');
  str = str.replace(/&rsaquo;/gi, '›');
  str = str.replace(/&oline;/gi, '‾');
  str = str.replace(/&frasl;/gi, '⁄');
  str = str.replace(/&euro;/gi, '€');
  str = str.replace(/&image;/gi, 'ℑ');
  str = str.replace(/&weierp;/gi, '℘');
  str = str.replace(/&real;/gi, 'ℜ');
  str = str.replace(/&trade;/gi, '™');
  str = str.replace(/&alefsym;/gi, 'ℵ');
  str = str.replace(/&crarr;/gi, '↵');
  str = str.replace(/&forall;/gi, '∀');
  str = str.replace(/&part;/gi, '∂');
  str = str.replace(/&exist;/gi, '∃');
  str = str.replace(/&empty;/gi, '∅');
  str = str.replace(/&nabla;/gi, '∇');
  str = str.replace(/&isin;/gi, '∈');
  str = str.replace(/&notin;/gi, '∉');
  str = str.replace(/&ni;/gi, '∋');
  str = str.replace(/&prod;/gi, '∏');
  str = str.replace(/&sum;/gi, '∑');
  str = str.replace(/&lowast;/gi, '∗');
  str = str.replace(/&radic;/gi, '√');
  str = str.replace(/&prop;/gi, '∝');
  str = str.replace(/&infin;/gi, '∞');
  str = str.replace(/&ang;/gi, '∠');
  str = str.replace(/&and;/gi, '∧');
  str = str.replace(/&or;/gi, '∨');
  str = str.replace(/&cap;/gi, '∩');
  str = str.replace(/&cup;/gi, '∪');
  str = str.replace(/&int;/gi, '∫');
  str = str.replace(/&there4;/gi, '∴');
  str = str.replace(/&sim;/gi, '∼');
  str = str.replace(/&cong;/gi, '≅');
  str = str.replace(/&sub;/gi, '⊂');
  str = str.replace(/&sup;/gi, '⊃');
  str = str.replace(/&nsub;/gi, '⊄');
  str = str.replace(/&sube;/gi, '⊆');
  str = str.replace(/&supe;/gi, '⊇');
  str = str.replace(/&oplus;/gi, '⊕');
  str = str.replace(/&otimes;/gi, '⊗');
  str = str.replace(/&perp;/gi, '⊥');
  str = str.replace(/&sdot;/gi, '⋅');
  str = str.replace(/&lceil;/gi, '⌈');
  str = str.replace(/&rceil;/gi, '⌉');
  str = str.replace(/&lfloor;/gi, '⌊');
  str = str.replace(/&rfloor;/gi, '⌋');
  str = str.replace(/&lang;/gi, '〈');
  str = str.replace(/&rang;/gi, '〉');
  str = str.replace(/&loz;/gi, '◊');
  str = str.replace(/&spades;/gi, '♠');
  str = str.replace(/&clubs;/gi, '♣');
  str = str.replace(/&hearts;/gi, '♥');
  str = str.replace(/&diams;/gi, '♦');
}
 
// Uppercase symbols
 if(str.search(/&[A-Z][a-z]+;/) >= 0) {
  //Greek symbols
  str = str.replace(/&Alpha;/g, 'Α');
  str = str.replace(/&Beta;/g, 'Β');
  str = str.replace(/&Gamma;/g, 'Γ');
  str = str.replace(/&Delta;/g, 'Δ');
  str = str.replace(/&Epsilon;/g, 'Ε');
  str = str.replace(/&Zeta;/g, 'Ζ');
  str = str.replace(/&Eta;/g, 'Η');
  str = str.replace(/&Theta;/g, 'Θ');
  str = str.replace(/&Iota;/g, 'Ι');
  str = str.replace(/&Kappa;/g, 'Κ');
  str = str.replace(/&Lambda;/g, 'Λ');
  str = str.replace(/&Mu;/g, 'Μ');
  str = str.replace(/&Nu;/g, 'Ν');
  str = str.replace(/&Xi;/g, 'Ξ');
  str = str.replace(/&Omicron;/g, 'Ο');
  str = str.replace(/&Pi;/g, 'Π');
  str = str.replace(/&Rho;/g, 'Ρ');
  str = str.replace(/&Sigma;/g, 'Σ');
  str = str.replace(/&Tau;/g, 'Τ');
  str = str.replace(/&Upsilon;/g, 'Υ');
  str = str.replace(/&Phi;/g, 'Φ');
  str = str.replace(/&Chi;/g, 'Χ');
  str = str.replace(/&Psi;/g, 'Ψ');
  str = str.replace(/&Omega;/g, 'Ω');
  //Latin symbols
  str = str.replace(/&Agrave;/g, 'À');
  str = str.replace(/&Aacute;/g, 'Á');
  str = str.replace(/&Acirc;/g, 'Â');
  str = str.replace(/&Atilde;/g, 'Ã');
  str = str.replace(/&Auml;/g, 'Ä');
  str = str.replace(/&Aring;/g, 'Å');
  str = str.replace(/&AElig;/g, 'Æ');
  str = str.replace(/&Ccedil;/g, 'Ç');
  str = str.replace(/&Egrave;/g, 'È');
  str = str.replace(/&Eacute;/g, 'É');
  str = str.replace(/&Ecirc;/g, 'Ê');
  str = str.replace(/&Euml;/g, 'Ë');
  str = str.replace(/&Igrave;/g, 'Ì');
  str = str.replace(/&Iacute;/g, 'Í');
  str = str.replace(/&Icirc;/g, 'Î');
  str = str.replace(/&Iuml;/g, 'Ï');
  str = str.replace(/&Ntilde;/g, 'Ñ');
  str = str.replace(/&Ograve;/g, 'Ò');
  str = str.replace(/&Oacute;/g, 'Ó');
  str = str.replace(/&Ocirc;/g, 'Ô');
  str = str.replace(/&Otilde;/g, 'Õ');
  str = str.replace(/&Ouml;/g, 'Ö');
  str = str.replace(/&Oslash;/g, 'Ø');
  str = str.replace(/&Ugrave;/g, 'Ù');
  str = str.replace(/&Uacute;/g, 'Ú');
  str = str.replace(/&Ucirc;/g, 'Û');
  str = str.replace(/&Uuml;/g, 'Ü');
  str = str.replace(/&Yacute;/g, 'Ý');
  str = str.replace(/&Scaron;/g, 'Š');
  str = str.replace(/&Yuml;/g, 'Ÿ');
  //XML and HTML Symbols
  str = str.replace(/&Dagger;/g, '‡');
  str = str.replace(/&Prime;/g, '″');
}
 
// lowercase symbols
 if(str.search(/&[a-z][a-z]+;/) >= 0) {
  //Greek symbols
  str = str.replace(/&alpha;/g, 'α');
  str = str.replace(/&beta;/g, 'β');
  str = str.replace(/&gamma;/g, 'γ');
  str = str.replace(/&delta;/g, 'δ');
  str = str.replace(/&epsilon;/g, 'ε');
  str = str.replace(/&zeta;/g, 'ζ');
  str = str.replace(/&eta;/g, 'η');
  str = str.replace(/&theta;/g, 'θ');
  str = str.replace(/&iota;/g, 'ι');
  str = str.replace(/&kappa;/g, 'κ');
  str = str.replace(/&lambda;/g, 'λ');
  str = str.replace(/&mu;/g, 'μ');
  str = str.replace(/&nu;/g, 'ν');
  str = str.replace(/&xi;/g, 'ξ');
  str = str.replace(/&omicron;/g, 'ο');
  str = str.replace(/&pi;/g, 'π');
  str = str.replace(/&rho;/g, 'ρ');
  str = str.replace(/&sigmaf;/g, 'ς');
  str = str.replace(/&sigma;/g, 'σ');
  str = str.replace(/&tau;/g, 'τ');
  str = str.replace(/&upsilon;/g, 'υ');
  str = str.replace(/&phi;/g, 'φ');
  str = str.replace(/&chi;/g, 'χ');
  str = str.replace(/&psi;/g, 'ψ');
  str = str.replace(/&omega;/g, 'ω');
  str = str.replace(/&thetasym;/g, 'ϑ');
  str = str.replace(/&upsih;/g, 'ϒ');
  str = str.replace(/&piv;/g, 'ϖ');
  //Latin symbols
  str = str.replace(/&szlig;/g, 'ß');
  str = str.replace(/&agrave;/g, 'à');
  str = str.replace(/&aacute;/g, 'á');
  str = str.replace(/&acirc;/g, 'â');
  str = str.replace(/&atilde;/g, 'ã');
  str = str.replace(/&auml;/g, 'ä');
  str = str.replace(/&aring;/g, 'å');
  str = str.replace(/&aelig;/g, 'æ');
  str = str.replace(/&ccedil;/g, 'ç');
  str = str.replace(/&egrave;/g, 'è');
  str = str.replace(/&eacute;/g, 'é');
  str = str.replace(/&ecirc;/g, 'ê');
  str = str.replace(/&euml;/g, 'ë');
  str = str.replace(/&igrave;/g, 'ì');
  str = str.replace(/&iacute;/g, 'í');
  str = str.replace(/&icirc;/g, 'î');
  str = str.replace(/&iuml;/g, 'ï');
  str = str.replace(/&eth;/g, 'ð');
  str = str.replace(/&ntilde;/g, 'ñ');
  str = str.replace(/&ograve;/g, 'ò');
  str = str.replace(/&oacute;/g, 'ó');
  str = str.replace(/&ocirc;/g, 'ô');
  str = str.replace(/&otilde;/g, 'õ');
  str = str.replace(/&ouml;/g, 'ö');
  str = str.replace(/&oslash;/g, 'ø');
  str = str.replace(/&ugrave;/g, 'ù');
  str = str.replace(/&uacute;/g, 'ú');
  str = str.replace(/&ucirc;/g, 'û');
  str = str.replace(/&uuml;/g, 'ü');
  str = str.replace(/&yacute;/g, 'ý');
  str = str.replace(/&thorn;/g, 'þ');
  str = str.replace(/&yuml;/g, 'ÿ');
  str = str.replace(/&oelig;/g, 'œ');
  str = str.replace(/&scaron;/g, 'š');
  str = str.replace(/&fnof;/g, 'ƒ');
  //XML and HTML Symbols
  str = str.replace(/&dagger;/g, '†');
  str = str.replace(/&prime;/g, '′');
 }
 
 // False positives
 // Breaks large amounts of code which discuss programming/scripting.
 // str = str.replace(/&lt;/gi, '<');
 // str = str.replace(/&gt;/gi, '>');
 // Breaks large number of URLs and discussion of programming/scripting.
 // str = str.replace(/&amp;/gi, '&');
 
 // Arrows
 str = str.replace(/&larr;/g, '←');
 str = str.replace(/&rarr;/g, '→');
 str = str.replace(/&uarr;/g, '↑');
 str = str.replace(/&darr;/g, '↓');
 str = str.replace(/&lArr;/g, '⇐');
 str = str.replace(/&rArr;/g, '⇒');
 str = str.replace(/&uArr;/g, '⇑');
 str = str.replace(/&dArr;/g, '⇓');
 str = str.replace(/&harr;/g, '↔');
 str = str.replace(/&hArr;/g, '⇔');
 str = str.replace(/<==|<--/gi, '←');
 str = str.replace(/==>/gi, '→');
 
 // Specific case
 str = str.replace(/&ETH;/g, 'Ð');
 str = str.replace(/&THORN;/g, 'Þ');
 str = str.replace(/&OElig;/g, 'Œ');
 
 
 // Task 2: Replace numeric html entities with unicode ( User:CharlotteWebb )
 
 // Symbols for which there may be a good reason to obfuscate/escape
 var dont_replace = "|!{}[]=<>";
 
 // START specialreplace function from User:CharlotteWebb
 function specialreplace(ent, base){
  var chr = "";
  var num = parseInt(ent.replace(/[\&\#\;x]/g, ''), base);
  // see [[UTF-16]] for chars outside the BMP
  // try this with Gothic letters at full volume ^_^
  if (num > 0xFFFF) {
   num -= 0x10000;
   chr = String.fromCharCode(0xD800 + (num >> 10), 0xDC00 + (num & 0x3FF));  
  } else {
   chr = String.fromCharCode(num);
  }
  if (dont_replace.indexOf(chr) == -1) {
   str = str.replace(ent, chr, "gi");
  }
 }
 // END specialreplace function
 
 // perform replacement
 if(m = str.match(/\&\#(\d+)\;/g)) {
  for(i = 0; i < m.length; i++) {
   specialreplace(m[i], 10);
  }
 }
 if(m = str.match(/\&\#x([\da-f]+)\;/gi)) {
  for(i = 0; i < m.length; i++) { 
   specialreplace(m[i], 16);
  }
 }
 
 // Task 3: Unprintable control characters [[Windows-1252]] from User:CharlotteWebb
 var failstr = "<!-- AutoEd: rm unicode ctrl char w/no win-1252 mapping, intent unknown -->";
 str = str.replace(/\u0080/g, '€');
 str = str.replace(/\u0081/g, failstr);
 str = str.replace(/\u0082/g, '‚');
 str = str.replace(/\u0083/g, 'ƒ');
 str = str.replace(/\u0084/g, '„');
 str = str.replace(/\u0085/g, '…');
 str = str.replace(/\u0086/g, '†');
 str = str.replace(/\u0087/g, '‡');
 str = str.replace(/\u0088/g, 'ˆ');
 str = str.replace(/\u0089/g, '‰');
 str = str.replace(/\u008a/g, 'Š');
 str = str.replace(/\u008b/g, '‹');
 str = str.replace(/\u008c/g, 'Œ');
 str = str.replace(/\u008d/g, failstr);
 str = str.replace(/\u008e/g, 'Ž');
 str = str.replace(/\u008f/g, failstr);
 str = str.replace(/\u0090/g, failstr);
 str = str.replace(/\u0091/g, '‘');
 str = str.replace(/\u0092/g, '’');
 str = str.replace(/\u0093/g, '“');
 str = str.replace(/\u0094/g, '”');
 str = str.replace(/\u0095/g, '•');
 str = str.replace(/\u0096/g, '–');
 str = str.replace(/\u0097/g, '—');
 str = str.replace(/\u0098/g, '˜');
 str = str.replace(/\u0099/g, '™');
 str = str.replace(/\u009a/g, 'š');
 str = str.replace(/\u009b/g, '›');
 str = str.replace(/\u009c/g, 'œ');
 str = str.replace(/\u009d/g, failstr);
 str = str.replace(/\u009e/g, 'ž');
 str = str.replace(/\u009f/g, 'Ÿ');
 
 return str;
}
//</source>