// This script does cosmetic work:
// repairs / removes commented parameters
// deletes empty parameters
// replaces un-hyphenated parameters with their hyphenated forms
// deletes |ref=harv
// replaces language names in |language= with their MediaWiki ISO 639-like codes
//
// wikitext searches:
// html comments:
// hastemplate:"Module:Citation/CS1" insource:/\{ *[Cc]it[ae][^\}]*\| *\<!\-\-+ *[^\>]+\-\-+\>/
// mode:
// hastemplate:Citation insource:/\{ *[Cc]itation[^\}]*\| *mode *= *cs2/
// hastemplate:"Module:Citation/CS1" insource:/\{ *[Cc]ite[^\}]*\| *mode *= *cs1/
// deadurl:
// hastemplate:"Module:Citation/CS1" insource:"deadurl"
// postscript:
// hastemplate:Citation insource:/\{ *[Cc]itation[^\}]*\| *postscript *= *none/
// hastemplate:"Module:Citation/CS1" insource:/\{ *[Cc]ite[^\}]*\| *postscript *= *\./
// hastemplate:"Module:Citation/CS1" insource:/\{ *[Cc]it[ae][^\}]*\| *postscript *= *\<!\-\- *[Nn][Oo][Nn][Ee]/
//
// category:
// CS1 errors: empty unknown parameters
//
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
if (0 != wikiNamespace ) // if <wikiNamespace> is not mainspace
{
if (-1 == ArticleTitle.IndexOf ("User:Trappist the monk")) // don't skip pages in my own user space
{
Skip = true;
Summary = @"article not in mainspace: " + wikiNamespace;
return ArticleText; // abandon this edit
}
}
if (skip_map.ContainsKey (ArticleTitle)) // if <ArticleTitle> is in the skip list
{
Skip = true;
Summary = @"article in skip list";
return ArticleText; // abandon this edit
}
Skip = false; // assume that something will be changed
// Summary = "[[User:Monkbot/task 18|Task 18 (cosmetic)]] (dev test):"; // uses redirect to User:Monkbot/task 18: cosmetic cs1 template cleanup
// Summary = "[[User:Monkbot/task 18|Task 18 (cosmetic)]] (BRFA trial):";
Summary = "[[User:Monkbot/task 18|Task 18 (cosmetic)]]:";
int total_count = 0; // total number of cs1|2 templates
int skip_count = 0; // skip count incremented when cite template holds wikitable markup
int comment_deleted = 0;
int comment_repaired = 0;
int empty_positional_deleted = 0;
int params_removed_count = 0;
int params_hyphenated_count = 0;
int ref_harv_count = 0;
int url_status_count =0;
int deadurl_count = 0;
int nopp_count = 0;
int orig_count = 0;
int mode_count = 0;
int ps_count = 0;
int nls_count = 0;
int mask_count = 0;
int link_count = 0;
int language_count = 0;
//---------------------------< B E G I N >--------------------------------------------------------------------
ArticleText = hide (ArticleText, IS_CS1); // hide all templates except cs1|2 and hide wikilinks
//---------------------------< M A I N >----------------------------------------------------------------------
//
// this is main
//
if (Regex.Match (ArticleText, template_pattern).Success)
ArticleText = Regex.Replace (ArticleText, template_pattern,
delegate(Match match)
{
string template = match.Groups[0].Value; // this will be returned if no changes
if ((-1 != template.IndexOf("__0T4BL3__")) || (-1 != template.IndexOf("__CT4BL3__"))) // if table markup found in the cite template abandon
{
skip_count++;
return template;
}
total_count++; // bump total number of cs1|2 templates tally
template = counted_replace (template, comment_params_1_pattern, "$2$1$3$5$4", ref comment_repaired); // |<!-- param=value |--> to <!-- |param=value -->|
template = counted_replace (template, comment_params_2_pattern, "$2$1$3", ref comment_repaired); // |<!-- param=value --> to <!-- |param=value -->
template = counted_replace (template, comment_params_3_pattern, "", ref params_removed_count); // |<!-- param= --> to (delete)
template = counted_replace (template, comment_params_4_pattern, "$1", ref params_removed_count); // <!-- |param=value |param= --> to <!-- |param=value -->
template = counted_replace (template, comment_params_5_pattern, "$2$1$3", ref comment_repaired); // |<!-- plain text without = or | or } --> to <!-- plain text -->
template = counted_replace (template, empty_positional_parameter_nl, "$1", ref empty_positional_deleted); // || or |} to | or } (newline variant)
template = counted_replace (template, empty_positional_parameter, "$1", ref empty_positional_deleted); // || or |} to | or }
template = counted_replace (template, ref_harv_pattern, "", ref ref_harv_count); // |ref=harv to (delete)
template = mode_remove (template, ref mode_count);
template = postscript_remove (template, ref ps_count);
template = empty_param_remove (template, ref params_removed_count); // this is a prerequisite function for these:
template = url_status_remove (template, ref url_status_count, ref deadurl_count);
template = hyphenate (hyphenate_map, template, ref params_hyphenated_count); // this is a prerequisite function for these:
template = nopp_remove (template, ref nopp_count);
template = orig_year_remove (template, ref orig_count);
template = name_list_style_remove (template, ref nls_count);
template = name_link_mask_remove (template, ref mask_count, @"mask");
template = name_link_mask_remove (template, ref link_count, @"link");
template = language (language_map, template, ref language_count);
while (Regex.Match (template, empty_comment_pattern).Success) // cleanup; delete <!-- --> (empty or only whitespace)
template = Regex.Replace (template, empty_comment_pattern, "");
return template;
});
//---------------------------< F I N I S H >------------------------------------------------------------------
ArticleText = unhide (ArticleText); // unhide all that is hidden
if (0 != total_count) // build our edit summary
{
Summary = summary_concat (Summary, " eval " + total_count + " template" + (1 == total_count ? ":" : "s:"));
if (0 != comment_deleted)
Summary = summary_concat (Summary, " del cmtd params (" + comment_deleted + "×);");
if (0 != comment_repaired)
Summary = summary_concat (Summary, " rep cmtd params (" + comment_repaired + "×);");
if (0 != params_removed_count)
Summary = summary_concat (Summary, " del empty params (" + params_removed_count + "×);");
if (0 != params_hyphenated_count)
Summary = summary_concat (Summary, " hyphenate params (" + params_hyphenated_count + "×);");
if (0 != empty_positional_deleted)
Summary = summary_concat (Summary, " del pos params (" + empty_positional_deleted + "×);");
if (0 != ref_harv_count)
Summary = summary_concat (Summary, " del |ref=harv (" + ref_harv_count + "×);");
if (0 != mode_count)
Summary = summary_concat (Summary, " del |mode= (" + mode_count + "×);");
if (0 != ps_count)
Summary = summary_concat (Summary, " del |postscript= (" + ps_count + "×);");
if (0 != url_status_count)
Summary = summary_concat (Summary, " del |url-status= (" + url_status_count + "×);");
if (0 != deadurl_count)
Summary = summary_concat (Summary, " del |deadurl= (" + deadurl_count + "×);");
if (0 != nopp_count)
Summary = summary_concat (Summary, " del |no-pp= (" + nopp_count + "×);");
if (0 != orig_count)
Summary = summary_concat (Summary, " del |orig-year= (" + orig_count + "×);");
if (0 != nls_count)
Summary = summary_concat (Summary, " del |name-list-style= (" + nls_count + "×);");
if (0 != mask_count)
Summary = summary_concat (Summary, " del |<name-list>-mask= (" + mask_count + "×);");
if (0 != link_count)
Summary = summary_concat (Summary, " del |<name-list>-link= (" + link_count + "×);");
if (0 != language_count)
Summary = summary_concat (Summary, " cvt lang vals (" + language_count + "×);");
if (0 != skip_count)
Summary = summary_concat (Summary, " skip (" + skip_count + "×);");
}
else
{
Skip = true;
Summary = Summary + " no templates to evaluate;";
}
System.IO.StreamWriter sw;
string log_file = @"Z:\Wikipedia\AWB\Monkbot_tasks\Task18_" + DateTimeOffset.Now.ToString("u").Substring (0, 10) + @".csv";
if (!System.IO.File.Exists (log_file))
{ // for csv import, pipe is string delimiter here because not legal wp title char
sw = System.IO.File.AppendText (log_file);
sw.WriteLine ("|Article|, |evaluated|, |deleted comment|, |repaired comment|, |empty params|, |hyphenated|, |empty pos params|, |ref=harv|, |mode|, |postscript|, "
+ "|url-status|, |deadurl|, |no-pp|, |orig-year|, |name-list-style|, |<name-list>-mask|, |<name-list>-link|, |language|, |skipped|");
sw.Close();
}
if (Skip)
{
sw = System.IO.File.AppendText (log_file);
sw.WriteLine ("|" + ArticleTitle + "|," + total_count);
sw.Close();
}
else
{
sw = System.IO.File.AppendText (log_file);
sw.WriteLine ("|" + ArticleTitle + "|," + total_count + "," + comment_deleted + "," + comment_repaired + "," + params_removed_count + "," + params_hyphenated_count
+ "," + empty_positional_deleted + "," + ref_harv_count + "," + mode_count + "," + ps_count + "," + url_status_count + "," + deadurl_count
+ "," + nopp_count + "," + orig_count + "," + nls_count + "," + mask_count + "," + link_count + "," + language_count + "," + skip_count);
sw.Close();
}
return ArticleText;
}
//===========================<< S U P P O R T >>==============================================================
//---------------------------< H I D E >----------------------------------------------------------------------
//
// HIDE TEMPLATES: find templates that are not <dont_hide>; replace the opening (( with __0P3N__, the closing ))
// with __CL0S3__, and internal | (pipes) with __P1P3__
//
// single curly braces in urls and other parameter values can confuse other regex in this code so replace {
// with __0CU!21Y__ and } with __CCU!21Y__
//
private string hide (string ArticleText, string dont_hide)
{
string pattern = @"\{\{(?!\s*" + dont_hide + @")[^\{\}]*\}\}";
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string fixed_template; // a hidden template is assembled here
string raw_template = match.Groups[0].Value; // the whole template
pattern = @"\{\{"; // hide the opening ((
fixed_template = Regex.Replace (raw_template, pattern, "__0P3N__");
pattern = @"\}\}"; // hide the closing ))
fixed_template = Regex.Replace (fixed_template, pattern, "__CL0S3__");
pattern = @"\|"; // and hide the pipes
fixed_template = Regex.Replace (fixed_template, pattern, "__P1P3__");
return fixed_template;
});
}
pattern = @"(\<!\-{2,}\s*[^\>\|\}]*)\{\{(\s*" + dont_hide + @"[^\}]*)\}\}([^\>]*\-{2,}\>)"; // <!-- ((citx...)) -->
ArticleText = Regex.Replace(ArticleText, pattern, "$1__0P3N__$2__CL0S3__$3");
pattern = @"\{\|"; // open table markup
ArticleText = Regex.Replace(ArticleText, pattern, "__0T4BL3__");
pattern = @"\|\}(?!\})"; // open table markup
ArticleText = Regex.Replace(ArticleText, pattern, "__CT4BL3__");
pattern = @"([^\{])\{([^\{])"; // single opening curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__0CU!21Y__$2");
pattern = @"([^\}])\}([^\}])"; // single closing curly brace
ArticleText = Regex.Replace(ArticleText, pattern, "$1__CCU!21Y__$2");
pattern = @"\[\[(?![Ff]ile|[Ii]mage)([^\|\]]+)\|([^\]]+)\]\]"; // HIDE complex wikilinks: [[article title|label]] to __WL1NK_O__article title__P1P3__label__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__P1P3__$2__WL1NK_C__"); // [[File: with wikilinks inside can be confusing
pattern = @"\[\[([^\]]+)\]\]"; // HIDE simple wikilinks: [[article title]] to __WL1NK_O__article title__WL1NK_C__
ArticleText = Regex.Replace(ArticleText, pattern, "__WL1NK_O__$1__WL1NK_C__");
return ArticleText;
}
//---------------------------< U N H I D E >------------------------------------------------------------------
//
// UNHIDE TEMPLATES: find templates and wikilinks that are hidden; replace the 'hide' keywords with the
// appropriate wiki markup
//
private string unhide (string ArticleText)
{
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_O__", "[["); // UNHIDE: replace __WL1NK_O__ with [[
ArticleText = Regex.Replace(ArticleText, @"__WL1NK_C__", "]]"); // UNHIDE: replace __WL1NK_C__ with ]]
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|"); // UNHIDE: replace __P1P3__ with |
ArticleText = Regex.Replace(ArticleText, @"__0T4BL3__", "{|"); // UNHIDE: replace __0T4BL3__ with {|
ArticleText = Regex.Replace(ArticleText, @"__CT4BL3__", "|}"); // UNHIDE: replace __CT4BL3__ with |}
ArticleText = Regex.Replace(ArticleText, @"__0CU!21Y__", "{"); // UNHIDE: replace __0CU!21Y__ with {
ArticleText = Regex.Replace(ArticleText, @"__CCU!21Y__", "}"); // UNHIDE: replace __CCU!21Y__ with }
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "(("); // UNHIDE: replace __0P3N__ with ((
ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "))"); // UNHIDE: replace __CL0S3__ with ))
return ArticleText;
}
//---------------------------< S U M M A R Y _ C O N C A T >--------------------------------------------------
//
// concatenates text onto an existing edit summary string, limiting the string to a length of no more than 247
// characters. When <summary> appended with <text> would be longer than the allowed 247 character limit, this
// function replaces <text> with an ellipsis. Once an ellipsis is added, no more <text> can be added to <summary>
//
private string summary_concat (string summary, string text)
{
if (0 <= summary.IndexOf ("...")) // if ellipsis already present in <summary>, abandon
return summary;
if (247 >= (summary.Length + text.Length + 3)) // if adding <text> to summary will overrun the 247 char limit (+ 3 to make sure we can add ellipsis if necessary)
return summary + text; // append <text> to <summary> and done
return summary + "..."; // append ellipsis instead
}
//---------------------------< C O U N T E D _ R E P L A C E >------------------------------------------------
//
// common function to replace <pattern> with <replace> and bump <count> until no more <pattern>
//
private string counted_replace (string template, string pattern, string replace, ref int count)
{
Regex rgx = new Regex (pattern); // make a new regex from <pattern>
while (Regex.Match (template, pattern).Success) // look for <pattern> in <template>
{
template = rgx.Replace (template, replace, 1); // replace one copy of <pattern> with <replace>
count++; // bump the counter
}
return template;
}
//---------------------------< E M P T Y _ P A R A M _ R E M O V E >------------------------------------------
//
// This function removes all empty named parameters from a template, attempting to leave what remains the same form.
//
// this is a multi-step process that attempts to handle most of the vagaries of how templates are written in
// wikitext. In general there are three basic 'styles': horizontal – all parameters written on a single
// line of text, vertical – all parameters written singly one-to-a-line, and a mix of the two – multiple lines
// where each has one or more parameters.
//
// 1. where the parameter name & '=' are on one line and the value on a following line, put the value on the same line as the '='
// 2. for mixed, when empties are followed by new line; remove the empty but leave the newline
// 3. for any, empties are followed by pipe or closing }; remove the empty but leave the | or }
// 4. the preceding steps can leave blank lines; remove the blank lines
//
private string empty_param_remove (string template, ref int params_removed_count)
{
string pattern;
pattern = @"(\|[^=]+=[ \t]*)[\r\n]+(?!\s*[\|\}])"; // parameter name & '=' on one line, value on a following line
template = counted_replace (template, pattern, "$1", ref params_removed_count);
pattern = @"\|[^=]+=[ \t]*([\r\n]+)"; // empty followed by new line
template = counted_replace (template, pattern, "$1", ref params_removed_count);
pattern = @"\|[^=]+=\s*([\|\}])"; // empty followed by pipe or at end of template
template = counted_replace (template, pattern, "$1", ref params_removed_count);
pattern = @"([\r\n]+)[ \t]*[\r\n]+"; // close up multiple new lines
while (Regex.Match(template, pattern).Success)
template = Regex.Replace(template, pattern, "$1");
return template;
}
//---------------------------< H Y P H E N A T E >------------------------------------------------------------
//
// <hyphenate_map> lists unhyphenated parameter names (key) and their hyphenated counterparts (value). The
// enumerated forms of these keys contain a capture so that the enumerator is included in the hyphenated
// replacements
//
private string hyphenate (Dictionary<string, string> hyphenate_map, string template, ref int params_hyphenated_count)
{
string pattern;
foreach(KeyValuePair<string, string> entry in hyphenate_map)
{
pattern = @"(\| *)" + entry.Key; // does this unhyphenated form exist in <template>?
template = counted_replace (template, pattern, "$1" + entry.Value, ref params_hyphenated_count);
}
return template;
}
//---------------------------< L A N G U A G E >--------------------------------------------------------------
//
// replaces |language=<language name> with <language code> from <language_map> for i18n
//
// extract language name and trim. Make a lowercase copy, lc_lang_name, for indexing into the language
// dictionary. If the lc_lang_name is in the dictionary, escape any parentheses in trimmed language value for
// use as the regex pattern to be replaced.
private string language (Dictionary<string, string> language_map, string template, ref int count)
{
bool changed = false;
string raw_pattern = @"\|\s*(?:language|lang)\s*=\s*([^\|\}]+)";
Match match = Regex.Match (template, raw_pattern);
string lang_param = match.Groups[0].Value; // the whole match; this gets modified and is used to replace raw_pattern at the end
if (match.Success) // if there is a |language= parameter
{
string[] lang_array = match.Groups[1].Value.Split (new string[] {","}, StringSplitOptions.None); // split the parameter value at the comma
string pattern;
for (int i=0; i < lang_array.Length; i++)
{
string lc_lang_name = lang_array[i].Trim().ToLower(); // trimmed lowercase value for indexing into the dictionary
if (language_map.ContainsKey (lc_lang_name)) // if in the dictionary
{
string lang_name_pattern = lang_array[i].Trim();
pattern = @"\("; // escape any left-side parentheses
lang_name_pattern = Regex.Replace (lang_name_pattern, pattern, @"\(");
pattern = @"\)"; // escape any right-side parentheses
lang_name_pattern = Regex.Replace (lang_name_pattern, pattern, @"\)");
lang_param = counted_replace (lang_param, lang_name_pattern, language_map[lc_lang_name], ref count); // replace language name with MediaWiki code
changed = true;
}
}
}
if (changed)
template = Regex.Replace (template, raw_pattern, lang_param); // replace original |language=<name list> with modified |language=<code list>
return template;
}
//---------------------------< U R L _ S T A T U S _ R E M O V E >--------------------------------------------
//
// removes |url-status=<anything> when |archive-url= and |archive-date= are empty or missing (and |deadurl=y always)
//
// this function to be called after empty parameters have been removed
//
private string url_status_remove (string template, ref int url_status_count, ref int deadurl_count)
{
string pattern = @"\|\s*archive"; // remove |url-status=<anything> when |url-archive= and |archive-date= not present
if (!Regex.Match(template, pattern).Success) // if no archive parameters
{
pattern = @"\|\s*url-status\s*=\s*\b[^\|\}]*([\|\}])"; // |url-status= with any assigned value
template = counted_replace (template, pattern, "$1", ref url_status_count);
}
pattern = @"\|\s*dead\-?url\s*=\s*(?:yes|true|y)\b([\s\|\}])"; // remove reFill-added |deadurl=y
template = counted_replace (template, pattern, "$1", ref deadurl_count);
return template;
}
//---------------------------< M O D E _ R E M O V E >--------------------------------------------------------
//
// removes |mode=cs1 from cs1 templates; removes |mode=cs2 from cs2 templates
//
private string mode_remove (string template, ref int count)
{
string pattern = @"\{\{\s*([Cc]it[ae][^\|]*)";
string template_name = Regex.Match(template, pattern).Groups[1].Value.Trim().ToLower(); // template name: citation, cite book, cite, etc
if (template_name.Equals ("citation") || template_name.Equals ("cite"))
{
pattern = @"\|\s*mode\s*=\s*cs2\s*"; // remove |mode=cs2 from citation template
template = counted_replace (template, pattern, "", ref count);
}
else // here for cs1 template names
{
pattern = @"\|\s*mode\s*=\s*cs1\s*"; // remove |mode=cs1 from cite xxx template
template = counted_replace (template, pattern, "", ref count);
}
return template;
}
//---------------------------< P O S T S C R I P T _ R E M O V E >--------------------------------------------
//
// removes |postscript=. from cs1 templates; removes |postscript=none from cs2 templates; removes |postscript=<!--none-->
// from both
//
private string postscript_remove (string template, ref int ps_count)
{
string pattern = @"\|\s*postscript\s*=\s*\<!\-\-\s*[Nn][Oo][Nn][Ee]\s*\-\-\>\s*"; // various flavors of <!--none-->
template = counted_replace (template, pattern, "", ref ps_count);
pattern = @"\{\{\s*([Cc]it[ae][^\|]*)";
string template_name = Regex.Match(template, pattern).Groups[1].Value.Trim().ToLower(); // template name: citation, cite book, cite, etc
if (template_name.Equals ("citation") || template_name.Equals ("cite")) // ((citation)) and its primary redirect ((cite))
{
pattern = @"\|\s*postscript\s*=\s*[Nn][Oo][Nn][Ee]\s*"; // remove |postscript=none from citation template
template = counted_replace (template, pattern, "", ref ps_count);
}
else // here for cs1 template names
{
pattern = @"\|\s*postscript\s*=\s*\.+\s*([\|\}])"; // remove |postscript=. from cite xxx template
template = counted_replace (template, pattern, "$1", ref ps_count);
}
return template;
}
//---------------------------< N O P P _ R E M O V E >--------------------------------------------------------
//
// removes |no-pp=<anything> when |page=, |pages=, |p=, and |pp= are missing (or have been deleted because empty)
//
// removes |no-pp= from ((cite journal)) and from ((citation |journal=<anything> |...)) because journal cites
// don't use p. and pp. prefixes
//
// this function to be called after empty parameters have been removed
//
private string nopp_remove (string template, ref int nopp_count)
{
string pattern = @"\|\s*(?:pages?|pp|p)\b"; // remove |no-pp=<anything> when |page=, |pages=, |p=, and |pp= not present
if (!Regex.Match(template, pattern).Success) // if no page(s) parameters
{
pattern = @"\|\s*no\-?pp\s*=\s*\b[^\|\}]*([\|\}])"; // |no-pp= with any assigned value
template = counted_replace (template, pattern, "$1", ref nopp_count);
return template;
}
pattern = @"\{\{\s*[Cc]ite\s*[Jj]ournal";
bool journal = Regex.Match(template, pattern).Success;
pattern = @"\{\{\s*(?:[Cc]itation|[Cc]ite)(?=\s*\|)[^\}]*\|\s*journal\s*=\s*[^\|\}]+";
bool citation = Regex.Match(template, pattern).Success;
if (journal || citation)
{
pattern = @"\|\s*no\-?pp\s*=\s*\b[^\|\}]*([\|\}])"; // |no-pp= with any assigned value
template = counted_replace (template, pattern, "$1", ref nopp_count);
}
return template;
}
//---------------------------< O R I G _Y E A R _ R E M O V E >-----------------------------------------------
//
// removes |orig-year=<anything> when |year= and |date= are missing (or have been deleted because empty)
//
// this function to be called after empty parameters have been removed and after hyphenation applied
//
private string orig_year_remove (string template, ref int orig_count)
{
string pattern = @"\|\s*(?:year|date|air-date)\b"; // remove |no-pp=<anything> when |page=, |pages=, |p=, and |pp= not present
if (!Regex.Match(template, pattern).Success) // if |year=, |date=, and aliases not present
{
pattern = @"\|\s*publication\-date\b"; // |publication-date= promotes to |date= when |date= and |year= not present
if (!Regex.Match(template, pattern).Success) // if |publication-date= not present
{
pattern = @"\|\s*orig-year\s*=\s*\b[^\|\}]*([\|\}])"; // |orig-year= with any assigned value
template = counted_replace (template, pattern, "$1", ref orig_count);
}
}
return template;
}
//---------------------------< N A M E _ L I S T _ S T Y L E _ R E M O V E >----------------------------------
//
// removes |name-list-style=<value> when:
// <value> == 'vanc' and no |<first-alias>2=<name> parameters
// <value> == 'amp' | 'ampersand' | 'and' | '&' | 'serial' and no |<last-alias>2=<name> parameters
//
// this function to be called after empty parameters have been removed
//
private string name_list_style_remove (string template, ref int nls_count)
{
string pattern = @"\|\s*name\-list\-style\s*=(\s*(?:ampersand|amp|and|&|serial|vanc))\s*[\|\}]";
Match nls_match = Regex.Match (template, pattern);
bool val = false;
// |name-list-style= requires one of these with value for amp, ampersand, and, &, serial
string nls_names_pattern = @"last2|author\-last2|author2\-last|author2|surname2|subject2|host2|contributor\-last2|contributor2\-last|contributor2|contributor\-surname2|contributor2\-surname|editor\-last2|editor#\-last|editor2|editor\-surname2|editor2\-surname|interviewer\-last2|interviewer2\-last|interviewer2|translator\-last2|translator2\-last|translator2|translator\-surname2|translator2\-surname";
// |name-list-style= requires one of these with value for vanc
string nls_vanc_pattern = @"first\d*|author\-first\d*|author\d*\-first|given\d*|author\-given\d*|author\d*\-given|contributor\-first\d*|contributor\d*\-first|contributor\-given\d*|contributor\d*\-given|editor\-first\d*|editor\d*\-first|editor\-given\d*|editor\d*\-given|interviewer\-first\d*|interviewer\d*\-first|interviewer\-given\d*|interviewer\d*\-given|translator\-first\d*|translator\d*\-first|translator\-given\d*|translator\d*\-given";
if (nls_match.Success) // if there is |name-list-style=<value>
{
val = nls_match.Groups[1].Value.Trim().Equals ("vanc"); // get and trim <value>; see if it is 'vanc'
if ((val && !Regex.Match (template, nls_vanc_pattern).Success) || (!val && !Regex.Match (template, nls_names_pattern).Success))
{
pattern = @"\|\s*name\-list\-style\s*=[^\|\}]*([\|\}])"; // none of the required parameters present so delete
template = counted_replace (template, pattern, "$1", ref nls_count);
}
}
return template;
}
//---------------------------< N A M E _ L I N K _ M A S K _ R E M O V E >----------------------------------------------
//
// removes |<name>-mask=<value>, |<name>n-mask=<value>, and |<name>-maskn=<value>, when:
// |<name>n= is missing;
//
// this function to be called after empty parameters have been removed
//
private string name_link_mask_remove (string template, ref int count, string link_mask)
{
string lm_param_name = @"";
string lm_param_enum = @"";
string lm_pattern1 = @"((?:author|subject|contributor|editor|interviewer|translator))\-" + link_mask + @"(\d*)";
string lm_pattern2 = @"((?:author|subject|contributor|editor|interviewer|translator))(\d*)\-" + link_mask;
string pattern = @"";
string[] param_array; // array of parameters in <template>; [0] is '((<template name' so ignore that; [n] has '))' in rvalue but rvalue is stripped
Match match;
param_array = template.Split (new string[] {"|"}, StringSplitOptions.None); // split the template at the pipe
for (int i=1; i < param_array.Length; i++) // begin at [1] because param_array[0] has template name and opening braces
{
lm_param_name = @""; // reset the name
pattern = @"\s*([^=]+)\s*=.*"; // pattern to remove leading white space, assignment operator, and rvalue
param_array[i] = Regex.Replace (param_array[i], pattern, "$1"); // strip; keep only the parameter name
match = Regex.Match (param_array[i], lm_pattern1); // look for params with trailing enumerators
if (match.Success)
{
lm_param_name = match.Groups[1].Value; // this is the root name (without enumerator and without '-mask' or '-link'
lm_param_enum = match.Groups[2].Value; // here is the enumerator or empty string
}
else
{
match = Regex.Match (param_array[i], lm_pattern2); // look for link|mask params with embedded enumerators
if (match.Success)
{
lm_param_name = match.Groups[1].Value;
lm_param_enum = match.Groups[2].Value;
}
else
continue; // not a mask or link parameter so do next parameter in param_array
}
if ("" == lm_param_enum || "1" == lm_param_enum) // when no enumerator, same as enumerator of 1
lm_param_enum = "1?"; // rewrite enumerator to accept zero or one "1" as enumerator
lm_param_name = lm_param_name.Trim();
switch (lm_param_name) // get a pattern according to the link|mask parameter's root name
{
case "subject":
case "author":
pattern = @"(?:last" + lm_param_enum + @"|author\-last" + lm_param_enum + @"|author" + lm_param_enum + @"\-last|author" + lm_param_enum + @"|surname" + lm_param_enum + @"|host" + lm_param_enum + @"subject" + lm_param_enum + @"|vauthors)";
break;
case "contributor":
pattern = @"(?:contributor\-last" + lm_param_enum + @"|contributor" + lm_param_enum + @"\-last|contributor" + lm_param_enum + @"|contributor\-surname" + lm_param_enum + @"|contributor" + lm_param_enum + @"\-surname)";
break;
case "editor":
pattern = @"(?:editor\-last" + lm_param_enum + @"|editor" + lm_param_enum + @"\-last|editor" + lm_param_enum + @"|editor\-surname" + lm_param_enum + @"|editor" + lm_param_enum + @"\-surname|veditors)";
break;
case "interviewer":
pattern = @"(?:interviewer\-last" + lm_param_enum + @"|interviewer" + lm_param_enum + @"\-last|interviewer" + lm_param_enum + @")";
break;
case "translator":
pattern = @"(?:translator\-last" + lm_param_enum + @"|translator" + lm_param_enum + @"\-last|translator" + lm_param_enum + @"|translator\-surname" + lm_param_enum + @"|translator" + lm_param_enum + @"\-surname)";
break;
}
if ("author" == lm_param_name || "editor" == lm_param_name)
{
match = Regex.Match (template, @"\|\s*" + @"(?:vauthors|veditors)" + @"\s*=([^\|\}]*)");
if (match.Success)
{
int i_lm_param_enum = ("1?" == lm_param_enum)? 1 : int.Parse(lm_param_enum);
string vparam = match.Groups[1].Value;
string[] vp_cnt = vparam.Split(',');
if (i_lm_param_enum > vp_cnt.Length) // if enumerator is less than or equal to the number of names (number of commas + 1)
{
pattern = @"\|\s*" + param_array[i] + @"\s*=[^\r\n\|\}]*"; // make a regex for the parameter to be deleted
template = counted_replace (template, pattern, "", ref count); // and delete it
continue;
}
}
}
pattern = @"\|\s*" + pattern + @"\s*=[^\|\}]*"; // add introductory pipe, assignment operator, and rvalue regex
match = Regex.Match (template, pattern); // try to match the enumerated <name> param
if (!match.Success)
{
pattern = @"\|\s*" + param_array[i] + @"\s*=[^\r\n\|\}]*"; // make a regex for the parameter to be deleted
template = counted_replace (template, pattern, "", ref count); // and delete it
}
}
return template;
}
//===========================<< S T A T I C D A T A >>======================================================
static string IS_CS1 = @"(?:[Cc]ite[_\-\s]*(?=album\-notes|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|audio|bio[Rr]xiv|blog|book|chapter|conference|contribution|dictionary|dissertation|document|DVD|dvd|encyclopa?edia|episode|image|interview|[Jj]ournal|letter|liner notes|[Mm]agazine|mailing ?list|manual|map|media release|media|newsgroup|newspaper|(?:[Nn]ews(?!group|paper))|[Nn]ew|paper|plaque|podcast|press release|press|publication|pr|radio|report|serial|sign|speech|techreport|thesis|video|[Ww]eb|periodical)|[Cc]itation(?=\s*\|)|[Cc]ite(?=\s*\|)|[Cc]it news|[Cc]it web|[Cc]ita web|[Cc]itar notícia|[Cc]itat web|[Cc]ite we|[Ww]eb cite)";
static string template_pattern = @"\{\{\s*" + IS_CS1 + @"[^\}]+\}\}"; // basic cs1|2 template pattern
static string comment_params_1_pattern = @"(\|\s*)(\<!\-{2,}\s*)([^=\>\}]*=\s*[^\>]+)(\|\s*)(\-{2,}\>)"; // $2$1$3$5$4
static string comment_params_2_pattern = @"(\|\s*)(\<!\-{2,}\s*)([^=\>\}]*=\s*\b[^\>]+\-{2,}\>)"; // $2$1$3 always test this first; move pipe inside comment markup
static string comment_params_3_pattern = @"\|\s*\<!\-{2,}\s*[^=\>\}]*=\s*\-{2,}\>\s*"; // next; delete when match
static string comment_params_4_pattern = @"\|\s*[^=\>\}]*=\s*(\-{2,}\>)"; // $1 remove empty parameter leave comment close
static string comment_params_5_pattern = @"(\|\s*)(\<!\-{2,}\s*)([^=\>\|\}]*\-{2,}\>\s*[\|\}])"; // $2$1$3 |<!--Added by DASHBot--> | or } to <!--|Added by DASHBot--> | or }
static string empty_comment_pattern = @"\<!\-{2,}\s*\-{2,}\>"; // remove empty html comments
static string empty_positional_parameter_nl = @"\|[ \t]*([\r\n]+[ \t]*[\|\}])"; // remove empty positional parameters – newline variant to keep next line on the next line
static string empty_positional_parameter = @"\|\s*([\|\}])"; // remove empty positional parameters
static string ref_harv_pattern = @"\|\s*ref\s*=\s*harv[\t ]*"; // remove |ref=harv
//---------------------------< H Y P H E N A T I O N D A T A >----------------------------------------------
//
// unhyphenated parameter names (key of dictionary k/v pair) are replaced with hyphenated parameter names (value
// of dictionary k/pair). Some keys are regex patters (enumerated parameter names)
//
static Dictionary<string, string> hyphenate_map = new Dictionary<string, string>()
{
{"accessdate", "access-date"},
{"archivedate", "archive-date"},
{"archiveurl", "archive-url"},
{"authorlink", "author-link"},
{"authormask", "author-mask"},
{"booktitle", "book-title"},
{"chapterurl", "chapter-url"},
{"conferenceurl", "conference-url"},
{"contributionurl", "contribution-url"},
{"displayauthors", "display-authors"},
{"editorlink", "editor-link"},
{"episodelink", "episode-link"},
{"laydate", "lay-date"},
{"laysource", "lay-source"},
{"layurl", "lay-url"},
{"mailinglist", "mailing-list"},
{"mapurl", "map-url"},
{"nopp", "no-pp"},
{"notracking", "no-tracking"},
{"origyear", "orig-year"},
{"publicationdate", "publication-date"},
{"publicationplace", "publication-place"},
{"sectionurl", "section-url"},
{"serieslink", "series-link"},
{"seriesno", "series-no"},
{"subjectlink", "subject-link"},
{"timecaption", "time-caption"},
{"titlelink", "title-link"},
{"transcripturl", "transcript-url"},
{"author(\\d+)link", "author$2-link"},
{"authorlink(\\d+)", "author-link$2"},
{"author(\\d+)mask", "author$2-mask"},
{"authormask(\\d+)", "author-mask$2"},
{"editor(\\d+)link", "editor$2-link"},
{"editorlink(\\d+)", "editor-link$2"},
{"editor(\\d+)mask", "editor$2-mask"},
{"editormask(\\d+)", "editor-mask$2"},
{"subject(\\d+)link", "subject$2-link"},
{"subjectlink(\\d+)", "subject-link$2"},
};
//---------------------------< L A N G U A G E S >------------------------------------------------------------
//
// language names in |language= or |lang= (key of dictionary k/v pair) are replaced with MediaWiki language codes
// (value of dictionary k/pair). These k/v pairs are taken from the list at:
// [[Template:Citation_Style_documentation/language/doc#Language_names]]
//
static Dictionary<string, string> language_map = new Dictionary<string, string>
{
{"abaza", "abq"},
{"abkhazian", "ab"},
{"achinese", "ace"},
{"acoli", "ach"},
{"adangme", "ada"},
{"adyghe (cyrillic script)", "ady-cyrl"},
{"adyghe", "ady"},
{"afar", "aa"},
{"afrihili", "afh"},
{"afrikaans", "af"},
{"aghem", "agq"},
{"ainu", "ain"},
{"akan", "ak"},
{"akkadian", "akk"},
{"akkala sámi", "sia"},
{"akoose", "bss"},
{"alabama", "akz"},
{"albanian", "sq"},
{"aleut", "ale"},
{"algerian arabic", "arq"},
{"ambonese malay", "abs"},
{"american english", "en-us"},
{"american sign language", "ase"},
{"amharic", "am"},
{"amis", "ami"},
{"ancient egyptian", "egy"},
{"ancient greek", "grc"},
{"angika", "anp"},
{"ao naga", "njo"},
{"arabic", "ar"},
{"aragonese", "an"},
{"aramaic", "arc"},
{"araona", "aro"},
{"arapaho", "arp"},
{"arawak", "arw"},
{"armenian", "hy"},
{"aromanian", "roa-rup"},
{"arpitan", "frp"},
{"assamese", "as"},
{"asturian", "ast"},
{"asu", "asa"},
{"atikamekw", "atj"},
{"atsam", "cch"},
{"australian english", "en-au"},
{"austrian german", "de-at"},
{"avaric", "av"},
{"avestan", "ae"},
{"awadhi", "awa"},
{"aymara", "ay"},
{"azerbaijani", "az"},
{"badaga", "bfq"},
{"bafia", "ksf"},
{"bafut", "bfd"},
{"bakhtiari", "bqi"},
{"balinese", "ban"},
{"balkan romani", "rmn"},
{"baltic romani", "rml"},
{"baluchi", "bal"},
{"bambara", "bm"},
{"bamun", "bax"},
{"banjar", "bjn"},
{"basa banyumasan", "map-bms"},
{"basaa", "bas"},
{"bashkir", "ba"},
{"basque", "eu"},
{"batak mandailing", "btm"},
{"batak toba (latin script)", "bbc-latn"},
{"batak toba", "bbc"},
{"bavarian", "bar"},
{"beja", "bej"},
{"belarusian (taraškievica orthography)", "be-x-old"},
{"belarusian", "be"},
{"bemba", "bem"},
{"bena", "bez"},
{"bengali", "bn"},
{"betawi", "bew"},
{"bhojpuri", "bho"},
{"biblical hebrew", "hbo"},
{"bihari", "bh"},
{"bikol", "bik"},
{"bini", "bin"},
{"bishnupriya", "bpy"},
{"bislama", "bi"},
{"blackfoot", "bla"},
{"blin", "byn"},
{"blissymbols", "zbl"},
{"bodo", "brx"},
{"bosnian", "bs"},
{"brahui", "brh"},
{"braj", "bra"},
{"brazilian portuguese", "pt-br"},
{"breton", "br"},
{"british english", "en-gb"},
{"buginese", "bug"},
{"bulgarian", "bg"},
{"bulu", "bum"},
{"bunun", "bnn"},
{"buriat", "bua"},
{"burmese", "my"},
{"caddo", "cad"},
{"cajun french", "frc"},
{"canadian english", "en-ca"},
{"canadian french", "fr-ca"},
{"cantonese", "zh-yue"},
{"capiznon", "cps"},
{"carib", "car"},
{"carpathian romani", "rmc"},
{"catalan", "ca"},
{"cayuga", "cay"},
{"cebuano", "ceb"},
{"central atlas tamazight", "tzm"},
{"central bikol", "bcl"},
{"central dusun", "dtp"},
{"central kurdish", "ckb"},
{"central yupik", "esu"},
{"chadian arabic", "shu"},
{"chagatai", "chg"},
{"chakma", "ccp"},
{"chamorro", "ch"},
{"chavacano", "cbk-zam"},
{"chechen", "ce"},
{"cherokee", "chr"},
{"cheyenne", "chy"},
{"chibcha", "chb"},
{"chickasaw", "cic"},
{"chiga", "cgg"},
{"chilcotin", "clc"},
{"chimborazo highland quichua", "qug"},
{"chinese (china)", "zh-cn"},
{"chinese (hong kong)", "zh-hk"},
{"chinese (macau)", "zh-mo"},
{"chinese (malaysia)", "zh-my"},
{"chinese (min nan)", "zh-min-nan"},
{"chinese (singapore)", "zh-sg"},
{"chinese (taiwan)", "zh-tw"},
{"chinese", "zh"},
{"chinook jargon", "chn"},
{"chipewyan", "chp"},
{"choctaw", "cho"},
{"chukchi", "ckt"},
{"church slavic", "cu"},
{"chuukese", "chk"},
{"chuvash", "cv"},
{"classical chinese", "zh-classical"},
{"classical newari", "nwc"},
{"classical syriac", "syc"},
{"comorian", "swb"},
{"congo swahili", "sw-cd"},
{"coptic", "cop"},
{"cornish", "kw"},
{"corsican", "co"},
{"cree", "cr"},
{"crimean tatar (cyrillic script)", "crh-cyrl"},
{"crimean tatar (latin script)", "crh-latn"},
{"crimean tatar", "crh"},
{"croatian", "hr"},
{"cypriot greek", "el-cy"},
{"czech", "cs"},
{"dagbani", "dag"},
{"dakota", "dak"},
{"dalecarlian", "dlc"},
{"danish", "da"},
{"dargwa", "dar"},
{"dari", "prs"},
{"dazaga", "dzg"},
{"delaware", "del"},
{"dinka", "din"},
{"divehi", "dv"},
{"dogri", "doi"},
{"dogrib", "dgr"},
{"doteli", "dty"},
{"duala", "dua"},
{"dutch", "nl"},
{"dyula", "dyu"},
{"dzongkha", "dz"},
{"east cree", "crl"},
{"eastern balochi", "bgp"},
{"eastern canadian (aboriginal syllabics)", "ike-cans"},
{"eastern canadian (latin script)", "ike-latn"},
{"eastern cham (arabic script)", "cjm-arab"},
{"eastern cham (cham script)", "cjm-cham"},
{"eastern cham (latin script)", "cjm-latn"},
{"eastern cham", "cjm"},
{"eastern frisian", "frs"},
{"eastern mari", "mhr"},
{"eastern pwo", "kjp"},
{"eastern yiddish", "ydd"},
{"efik", "efi"},
{"egyptian arabic", "arz"},
{"ekajuk", "eka"},
{"elamite", "elx"},
{"embu", "ebu"},
{"emilian", "egl"},
{"emiliano-romagnolo", "eml"},
{"english", "en"},
{"erzya", "myv"},
{"esperanto", "eo"},
{"estonian", "et"},
{"etruscian", "ett"},
{"european portuguese", "pt-pt"},
{"european spanish", "es-es"},
{"ewe", "ee"},
{"ewondo", "ewo"},
{"extremaduran", "ext"},
{"eyak", "eya"},
{"fang", "fan"},
{"fanti", "fat"},
{"faroese", "fo"},
{"fiji hindi (latin script)", "hif-latn"},
{"fiji hindi", "hif"},
{"fijian", "fj"},
{"filipino", "fil"},
{"finnish kalo", "rmf"},
{"finnish", "fi"},
{"flemish", "nl-be"},
{"fon", "fon"},
{"frafra", "gur"},
{"french", "fr"},
{"friulian", "fur"},
{"fulah", "ff"},
// {"ga", "gaa"}, // language name ga is same as code for language name Irish; this is the only case
{"gagauz", "gag"},
{"galician", "gl"},
{"gamilaraay", "kld"},
{"gan (simplified)", "gan-hans"},
{"gan (traditional)", "gan-hant"},
{"gan chinese", "gan"},
{"ganda", "lg"},
{"gayo", "gay"},
{"gbaya", "gba"},
{"geez", "gez"},
{"georgian", "ka"},
{"german (formal address)", "de-formal"},
{"german", "de"},
{"gheg albanian", "aln"},
{"ghomala", "bbj"},
{"gilaki", "glk"},
{"gilbertese", "gil"},
{"goan konkani (devanagari script)", "gom-deva"},
{"goan konkani (latin script)", "gom-latn"},
{"goan konkani", "gom"},
{"gondi", "gon"},
{"gorontalo", "gor"},
{"gothic", "got"},
{"grebo", "grb"},
{"greek", "el"},
{"guarani", "gn"},
{"guernésiais", "nrf-gg"},
{"guianan creole", "gcr"},
{"gujarati", "gu"},
{"gusii", "guz"},
{"gwichʼin", "gwi"},
{"haida", "hai"},
{"haitian creole", "ht"},
{"hakka chinese", "hak"},
{"hausa", "ha"},
{"hawaiian", "haw"},
{"hazaragi", "haz"},
{"hebrew", "he"},
{"herero", "hz"},
{"hiligaynon", "hil"},
{"hindi", "hi"},
{"hiri motu", "ho"},
{"hittite", "hit"},
{"hmong", "hmn"},
{"hungarian", "hu"},
{"hunsrik", "hrx"},
{"hupa", "hup"},
{"iban", "iba"},
{"ibibio", "ibb"},
{"icelandic", "is"},
{"ido", "io"},
{"igbo", "ig"},
{"ilocano", "ilo"},
{"inari sami", "smn"},
{"indonesian", "id"},
{"ingrian", "izh"},
{"ingush", "inh"},
{"innu", "moe"},
{"interlingua", "ia"},
{"interlingue", "ie"},
{"inuktitut", "iu"},
{"inupiaq", "ik"},
{"iriga bicolano", "bto"},
{"irish", "ga"},
{"island carib", "crb"},
{"italian", "it"},
{"jamaican creole english", "jam"},
{"japanese (hiragana script)", "ja-hira"},
{"japanese (kana script)", "ja-hrkt"},
{"japanese (kanji script)", "ja-hani"},
{"japanese (katakana script)", "ja-kana"},
{"japanese", "ja"},
{"javanese", "jv"},
{"jinyu (simplified)", "cjy-hans"},
{"jinyu (traditional)", "cjy-hant"},
{"jinyu", "cjy"},
{"jju", "kaj"},
{"jola-fonyi", "dyo"},
{"judeo-arabic", "jrb"},
{"judeo-persian", "jpr"},
{"jutish", "jut"},
{"jèrriais", "nrf-je"},
{"kabardian (cyrillic script)", "kbd-cyrl"},
{"kabardian", "kbd"},
{"kabiye", "kbp"},
{"kabuverdianu", "kea"},
{"kabyle", "kab"},
{"kachin", "kac"},
{"kaingang", "kgp"},
{"kako", "kkj"},
{"kalaallisut", "kl"},
{"kalenjin", "kln"},
{"kalmyk", "xal"},
{"kamba", "kam"},
{"kanembu", "kbl"},
{"kannada", "kn"},
{"kanuri", "kr"},
{"kara-kalpak", "kaa"},
{"karachay-balkar", "krc"},
{"karelian", "krl"},
{"kashmiri (arabic script)", "ks-arab"},
{"kashmiri (devanagari script)", "ks-deva"},
{"kashmiri", "ks"},
{"kashubian", "csb"},
{"kawi", "kaw"},
{"kawésqar", "alc"},
{"kazakh (arabic script)", "kk-arab"},
{"kazakh (china)", "kk-cn"},
{"kazakh (cyrillic script)", "kk-cyrl"},
{"kazakh (kazakhstan)", "kk-kz"},
{"kazakh (latin script)", "kk-latn"},
{"kazakh (turkey)", "kk-tr"},
{"kazakh", "kk"},
{"kelantan-pattani malay", "mfa"},
{"kemi sámi", "sjk"},
{"kenyang", "ken"},
{"khakas", "kjh"},
{"khasi", "kha"},
{"khmer", "km"},
{"khotanese", "kho"},
{"khowar", "khw"},
{"kikuyu", "ki"},
{"kildin sami", "sjd"},
{"kimbundu", "kmb"},
{"kinaray-a", "krj"},
{"kinyarwanda", "rw"},
{"kirmanjki", "kiu"},
{"klingon", "tlh"},
{"kom", "bkm"},
{"komi-permyak", "koi"},
{"komi", "kv"},
{"kongo", "kg"},
{"konkani", "kok"},
{"korean (north korea)", "ko-kp"},
{"korean", "ko"},
{"koro", "kfo"},
{"kosraean", "kos"},
{"kotava", "avk"},
{"koyra chiini", "khq"},
{"koyraboro senni", "ses"},
{"koyukon", "koy"},
{"kpelle", "kpe"},
{"krio", "kri"},
{"kuanyama", "kj"},
{"kumyk", "kum"},
{"kurdish (arabic script)", "ku-arab"},
{"kurdish (latin script)", "ku-latn"},
{"kurdish", "ku"},
{"kurukh", "kru"},
{"kutenai", "kut"},
{"kvensk", "fkv"},
{"kwasio", "nmg"},
{"kyrgyz", "ky"},
{"kölsch", "ksh"},
{"kʼicheʼ", "quc"},
{"ladin", "lld"},
{"ladino", "lad"},
{"lahnda", "lah"},
{"lak", "lbe"},
{"laki", "lki"},
{"lakota", "lkt"},
{"lamba", "lam"},
{"langi", "lag"},
{"lao", "lo"},
{"latgalian", "ltg"},
{"latin american spanish", "es-419"},
{"latin", "la"},
{"latvian", "lv"},
{"laz", "lzz"},
{"lezghian", "lez"},
{"ligurian", "lij"},
{"limburgish", "li"},
{"lingala", "ln"},
{"lingua franca nova", "lfn"},
{"literary chinese", "lzh"},
{"lithuanian", "lt"},
{"livonian", "liv"},
{"livvi-karelian", "olo"},
{"lojban", "jbo"},
{"lombard", "lmo"},
{"louisiana creole", "lou"},
{"low german", "nds"},
{"low saxon", "nds-nl"},
{"lower silesian", "sli"},
{"lower sorbian", "dsb"},
{"lozi", "loz"},
{"luba-katanga", "lu"},
{"luba-lulua", "lua"},
{"luiseno", "lui"},
{"lule sami", "smj"},
{"lunda", "lun"},
{"luo", "luo"},
{"luxembourgish", "lb"},
{"luyia", "luy"},
{"maba", "mde"},
{"macedonian", "mk"},
{"machame", "jmc"},
{"madurese", "mad"},
{"mafa", "maf"},
{"magahi", "mag"},
{"maharashtrian konkani", "knn"},
{"main-franconian", "vmf"},
{"maithili", "mai"},
{"makasar", "mak"},
{"makhuwa-meetto", "mgh"},
{"makonde", "kde"},
{"malagasy", "mg"},
{"malay", "ms"},
{"malayalam", "ml"},
{"maltese", "mt"},
{"manchu", "mnc"},
{"mandaic", "mid"},
{"mandar", "mdr"},
{"mandingo", "man"},
{"manipuri", "mni"},
{"manx", "gv"},
{"maori", "mi"},
{"mapuche", "arn"},
{"mara", "mrh"},
{"marathi", "mr"},
{"mari", "chm"},
{"marshallese", "mh"},
{"marwari (india)", "rwr"},
{"marwari", "mwr"},
{"masai", "mas"},
{"mazanderani", "mzn"},
{"medumba", "byv"},
{"megleno-romanian (cyrillic script)", "ruq-cyrl"},
{"megleno-romanian (greek script)", "ruq-grek"},
{"megleno-romanian (latin script)", "ruq-latn"},
{"megleno-romanian", "ruq"},
{"mende", "men"},
{"mentawai", "mwv"},
{"meru", "mer"},
{"metaʼ", "mgo"},
{"mexican spanish", "es-mx"},
{"mi'kmaq", "mic"},
{"middle dutch", "dum"},
{"middle english", "enm"},
{"middle french", "frm"},
{"middle high german", "gmh"},
{"middle irish", "mga"},
{"middle low german", "gml"},
{"min dong chinese", "cdo"},
{"min nan chinese", "nan"},
{"minangkabau", "min"},
{"mingrelian", "xmf"},
{"mirandese", "mwl"},
{"mizo", "lus"},
{"modern standard arabic", "ar-001"},
{"mohawk", "moh"},
{"moksha", "mdf"},
{"moldavian", "ro-md"},
{"moldovan", "mo"},
{"mon", "mnw"},
{"mongo", "lol"},
{"mongolian", "mn"},
{"montenegrin", "cnr"},
{"monégasque", "lij-mc"},
{"morisyen", "mfe"},
{"moroccan arabic", "ary"},
{"mossi", "mos"},
{"mundang", "mua"},
{"munsee", "umu"},
{"muscogee", "mus"},
{"musi", "mui"},
{"muslim tat", "ttt"},
{"mycenaean greek", "gmy"},
{"myene", "mye"},
{"najdi arabic", "ars"},
{"nama", "naq"},
{"naskapi", "nsk"},
{"nauru", "na"},
{"navajo", "nv"},
{"ndonga", "ng"},
{"neapolitan", "nap"},
{"nederlands (informeel)", "nl-informal"},
{"nepali", "ne"},
{"newari", "new"},
{"ngambay", "sba"},
{"ngiemboon", "nnh"},
{"ngomba", "jgo"},
{"nheengatu", "yrl"},
{"nias", "nia"},
{"nigerian pidgin", "pcm"},
{"niuean", "niu"},
{"nogai", "nog"},
{"norfuk / pitkern", "pih"},
{"norman", "nrm"},
{"north ndebele", "nd"},
{"northern frisian", "frr"},
{"northern luri", "lrc"},
{"northern sami", "se"},
{"northern sotho", "nso"},
{"northern thai", "nod"},
{"norwegian bokmål", "nb"},
{"norwegian nynorsk", "nn"},
{"norwegian", "no"},
{"novial", "nov"},
{"nuer", "nus"},
{"numidian", "nxm"},
{"nyamwezi", "nym"},
{"nyanja", "ny"},
{"nyankole", "nyn"},
{"nyasa tonga", "tog"},
{"nyoro", "nyo"},
{"nyungar", "nys"},
{"nzima", "nzi"},
{"nāhuatl", "nah"},
{"n’ko", "nqo"},
{"o'odham", "ood"},
{"occitan", "oc"},
{"odia", "or"},
{"ojibwa", "oj"},
{"old english", "ang"},
{"old french", "fro"},
{"old high german", "goh"},
{"old irish", "sga"},
{"old japanese (hiragana script)", "ojp-hira"},
{"old japanese (kanji script)", "ojp-hani"},
{"old japanese", "ojp"},
{"old norse", "non"},
{"old persian", "peo"},
{"old provençal", "pro"},
{"old turkish", "otk"},
{"oromo", "om"},
{"osage", "osa"},
{"ossetic", "os"},
{"ottoman turkish", "ota"},
{"pahlavi", "pal"},
{"paiwan", "pwn"},
{"palatine german", "pfl"},
{"palauan", "pau"},
{"pali (siddham script)", "pi-sidd"},
{"pali", "pi"},
{"pampanga", "pam"},
{"pangasinan", "pag"},
{"papiamento", "pap"},
{"papora-hoanya", "ppu"},
{"pashto", "ps"},
{"pazeh", "uun"},
{"pennsylvania german", "pdc"},
{"persian", "fa"},
{"phoenician (latin script)", "phn-latn"},
{"phoenician (phoenician script)", "phn-phnx"},
{"phoenician", "phn"},
{"picard", "pcd"},
{"piedmontese", "pms"},
{"pite sami", "sje"},
{"pitjantjatjara", "pjt"},
{"plautdietsch", "pdt"},
{"pohnpeian", "pon"},
{"polish", "pl"},
{"pontic", "pnt"},
{"portuguese", "pt"},
{"prussian", "prg"},
{"pular", "fuf"},
{"punic", "xpu"},
{"punjabi", "pa"},
{"putèr", "rm-puter"},
{"puyuma", "pyu"},
{"quechua", "qu"},
{"quenya", "qya"},
{"rajasthani", "raj"},
{"rapanui", "rap"},
{"rarotongan", "rar"},
{"riffian", "rif"},
{"romagnol", "rgn"},
{"romanian", "ro"},
{"romansh", "rm"},
{"romany", "rom"},
{"rombo", "rof"},
{"rotuman", "rtm"},
{"roviana", "rug"},
{"rumantsch grischun", "rm-rumgr"},
{"rundi", "rn"},
{"russia buriat", "bxr"},
{"russian", "ru"},
{"rusyn", "rue"},
{"rwa", "rwk"},
{"saho", "ssy"},
{"saisiyat", "xsy"},
{"sakha", "sah"},
{"sakizaya", "szy"},
{"samaritan aramaic", "sam"},
{"samburu", "saq"},
{"samoan", "sm"},
{"samogitian", "sgs"},
{"sandawe", "sad"},
{"sango", "sg"},
{"sangu", "sbp"},
{"sanskrit (siddham script)", "sa-sidd"},
{"sanskrit", "sa"},
{"santali", "sat"},
{"saraiki (arabic script)", "skr-arab"},
{"saraiki", "skr"},
{"sardinian", "sc"},
{"sasak", "sas"},
{"sassarese sardinian", "sdc"},
{"saterland frisian", "stq"},
{"saurashtra", "saz"},
{"scots", "sco"},
{"scottish gaelic", "gd"},
{"selayar", "sly"},
{"selkup", "sel"},
{"sena", "seh"},
{"seneca", "see"},
{"serbian (cyrillic script)", "sr-ec"},
{"serbian (latin script)", "sr-el"},
{"serbian", "sr"},
{"serbo-croatian", "sh"},
{"serer", "srr"},
{"seri", "sei"},
{"seselwa creole french", "crs"},
{"shambala", "ksb"},
{"shan", "shn"},
{"shawiya (arabic script)", "shy-arab"},
{"shawiya (latin script)", "shy-latn"},
{"shawiya (tifinagh script)", "shy-tfng"},
{"shawiya", "shy"},
{"shona", "sn"},
{"sichuan yi", "ii"},
{"sicilian", "scn"},
{"sidamo", "sid"},
{"silesian", "szl"},
{"simple english", "simple"},
{"simplified chinese", "zh-hans"},
{"sindarin", "sjn"},
{"sindhi", "sd"},
{"sinhala", "si"},
{"sinte romani", "rmo"},
{"siraya", "fos"},
{"sirionó", "srq"},
{"skolt sami", "sms"},
{"slave", "den"},
{"slovak", "sk"},
{"slovenian", "sl"},
{"soga", "xog"},
{"sogdien", "sog"},
{"somali", "so"},
{"soninke", "snk"},
{"south azerbaijani", "azb"},
{"south ndebele", "nr"},
{"southern altai", "alt"},
{"southern balochi", "bcc"},
{"southern kurdish", "sdh"},
{"southern luri", "luz"},
{"southern sami", "sma"},
{"southern sotho", "st"},
{"spanish", "es"},
{"sranan tongo", "srn"},
{"standard moroccan tamazight", "zgh"},
{"sukuma", "suk"},
{"sumerian", "sux"},
{"sundanese", "su"},
{"surmiran", "rm-surmiran"},
{"sursilvan", "rm-sursilv"},
{"susu", "sus"},
{"sutsilvan", "rm-sutsilv"},
{"swahili", "sw"},
{"swati", "ss"},
{"swedish", "sv"},
{"swiss french", "fr-ch"},
{"swiss german", "gsw"},
{"swiss high german", "de-ch"},
{"syriac", "syr"},
{"tachelhit (latin script)", "shi-latn"},
{"tachelhit (tifinagh script)", "shi-tfng"},
{"tachelhit", "shi"},
{"tagalog", "tl"},
{"tahitian", "ty"},
{"taita", "dav"},
{"tajik (cyrillic script)", "tg-cyrl"},
{"tajik (latin script)", "tg-latn"},
{"tajik", "tg"},
{"talossan", "tzl"},
{"talysh", "tly"},
{"tamashek", "tmh"},
{"tamil", "ta"},
{"tarantino", "roa-tara"},
{"taroko", "trv"},
{"tasawaq", "twq"},
{"tatar (cyrillic script)", "tt-cyrl"},
{"tatar (latin script)", "tt-latn"},
{"tatar", "tt"},
{"tayal", "tay"},
{"taíno", "tnq"},
{"telugu", "te"},
{"ter sámi", "sjt"},
{"tereno", "ter"},
{"teso", "teo"},
{"tetum", "tet"},
{"thai", "th"},
{"thao", "ssf"},
{"tibetan", "bo"},
{"tigre", "tig"},
{"tigrinya", "ti"},
{"timne", "tem"},
{"tiv", "tiv"},
{"tlingit", "tli"},
{"tobelo", "tlb"},
{"tok pisin", "tpi"},
{"tokelau", "tkl"},
{"tongan", "to"},
{"tornedalen finnish", "fit"},
{"tosk albanian", "als"},
{"traditional chinese", "zh-hant"},
{"traveller norwegian", "rmg"},
{"tsakhur", "tkr"},
{"tsakonian", "tsd"},
{"tsimshian", "tsi"},
{"tsonga", "ts"},
{"tswana", "tn"},
{"tulu", "tcy"},
{"tumbuka", "tum"},
{"tungag", "lcm"},
{"tunisian arabic (arabic script)", "aeb-arab"},
{"tunisian arabic (latin script)", "aeb-latn"},
{"tunisian arabic", "aeb"},
{"turkish", "tr"},
{"turkmen", "tk"},
{"turoyo", "tru"},
{"tuvalu", "tvl"},
{"tuvinian", "tyv"},
{"twi", "tw"},
{"tyap", "kcg"},
{"udmurt", "udm"},
{"ugaritic", "uga"},
{"ukrainian", "uk"},
{"umbundu", "umb"},
{"ume sami", "sju"},
{"unsupported language", "mis"},
{"upper sorbian", "hsb"},
{"urdu", "ur"},
{"uyghur (arabic script)", "ug-arab"},
{"uyghur (latin script)", "ug-latn"},
{"uyghur", "ug"},
{"uzbek (cyrillic script)", "uz-cyrl"},
{"uzbek (latin script)", "uz-latn"},
{"uzbek", "uz"},
{"vai", "vai"},
{"vallader", "rm-vallader"},
{"venda", "ve"},
{"venetian", "vec"},
{"veps", "vep"},
{"vietnamese", "vi"},
{"vlax romani", "rmy"},
{"volapük", "vo"},
{"votic", "vot"},
{"vunjo", "vun"},
{"võro", "fiu-vro"},
{"wallisian", "wls"},
{"walloon", "wa"},
{"walser", "wae"},
{"waray", "war"},
{"warlpiri", "wbp"},
{"washo", "was"},
{"wayuu", "guc"},
{"welsh-romani", "rmw"},
{"welsh", "cy"},
{"west coast bajau", "bdr"},
{"west flemish", "vls"},
{"western abenaki", "abe"},
{"western armenian", "hyw"},
{"western balochi", "bgn"},
{"western cham (arabic script)", "cja-arab"},
{"western cham (cham script)", "cja-cham"},
{"western cham (latin script)", "cja-latn"},
{"western cham", "cja"},
{"western frisian", "fy"},
{"western mari", "mrj"},
{"western punjabi", "pnb"},
{"wolaytta", "wal"},
{"wolof", "wo"},
{"wu chinese", "wuu"},
{"xhosa", "xh"},
{"xiang chinese", "hsn"},
{"yangben", "yav"},
{"yao", "yao"},
{"yapese", "yap"},
{"yemba", "ybb"},
{"yiddish", "yi"},
{"yoruba", "yo"},
{"zapotec", "zap"},
{"zarma", "dje"},
{"zaza", "zza"},
{"zazaki", "diq"},
{"zeelandic", "zea"},
{"zenaga", "zen"},
{"zhuang", "za"},
{"zoroastrian dari", "gbz"},
{"zulu", "zu"},
{"zuni", "zun"},
{"español (formal)", "es-formal"},
{"magyar (formal)", "hu-formal"},
{"multiple languages", "mul"},
{"no linguistic content", "zxx"},
{"unknown language", "und"},
{"себертатар", "sty"},
{"ᬩᬲᬩᬮᬶ", "ban-bali"},
};
//---------------------------< S K I P _ L I S T >------------------------------------------------------------
//
// list of articles to be skipped for whatever reason
//
//
Dictionary<string, bool> skip_map = new Dictionary<string, bool>()
{
// {<article name here>, true},
{"Bist du bei mir", true}, // per reverts by Editor Francis Schonken
{"Concerto for Two Violins (Bach)", true},
};
// Monkbot_task_18_cosmetic_cs1_template_and_parameter_fixes.cs