Module:Language/data/ISO 639-2/make

require('Module:No globals');

--[=[&#x3c; I S O _ S Y N O N Y M _ E X T R A C T &#x3e;-

reads a local copy of data from the table at http://www.loc.gov/standards/iso639-2/php/English_list.php, extracts the ISO 639-2 (or 639-2T) codes that have equivalent ISO 639-1 codes and creates a table to translate 639-2 to 639-1. ISO-639-3 uses 639-2T codes

useful lines in the source table have the form: &#x3c;English name&#x3e;\t&#x3c;all English names&#x3e;\t&#x3c;all French names&#x3e;\t&#x3c;639-2 code&#x3e;\t&#x3c;639-1 code&#x3e;\n where: &#x3c;English name&#x3e; is primary English name (not used here); one of &#x3c;all English names&#x3e; so duplicates code listing &#x3c;all English names&#x3e; is all of the English names (not used here) &#x3c;all French names&#x3e; is all of the French names (not used here) &#x3c;639-2 code&#x3e; is the three-character ISO 639-2 or 639-2B/639-2T language code; when 639-2T present, use that code &#x3c;639-1 code&#x3e; is the two-character ISO 639-1 language code synonym of the -2 code (if one is defined) like this (with synonym): Abkhazian	Abkhazian	abkhaze	abk	ab or (without synonym): Achinese	Achinese	aceh	ace

for the file date use the date listed at the bottom of the source page in yyyymmdd numeric format without hyphens or spaces

]=]

local function ISO_synonym_extract (frame) local page = mw.title.getCurrentTitle;									-- get a page object for this page local content = page:getContent;											-- get unparsed content local content_table = {};													-- table of text lines from source local split_table = {};														-- table of lines split at the tabs local skip_table = {};														-- table of 636-2/639-2T codes that have been handled; used to prevent duplication local out_table = {};														-- output table local file_date = 'File-Date: ' .. frame.args["file-date"];					-- set the file date line from |file-date= (from the bottom of the source page)

content_table = mw.text.split (content, '[\r\n]');							-- make a table of text lines for _, line in ipairs (content_table) do									-- for each line split_table = mw.text.split (line, '\t');								-- split at the table if split_table[5] and (' ' ~= split_table[5]) then						-- if there is a 639-1 code local code = split_table[4]:match ('%a+/(%a+)') or split_table[4];	-- when 639-2B/639-2T use 639-2T else use 639-2 if not skip_table[code] then										-- skip if code already in the skip table because more than one language name skip_table[code] = true;										-- remember that we've handled this 636-2/639-2T code table.insert (out_table, "[\"" .. code .. "\"] = \"" .. split_table[5] .. "\"");		-- make new table entry end end end table.sort (out_table); return "&#x3c;br /&#x3e;&#x3c;pre&#x3e;-- " .. file_date .. "&#x3c;br /&#x3e;return {&#x3c;br /&#x3e;&#x26;#9;" .. table.concat (out_table, ',&#x3c;br /&#x3e;&#x26;#9;') .. "&#x3c;br /&#x3e;&#x26;#9;}&#x3c;br /&#x3e;" .. "&#x3c;/pre&#x3e;"; end

--[[--&#x3c; I S O _ 2 _ N A M E _ E X T R A C T &#x3e;--

reads a local copy of data from the table at http://www.loc.gov/standards/iso639-2/php/English_list.php, extracts the ISO 639-2 and 639-2T codes and their associated language names

useful lines in the source table have the form: &#x3c;English name&#x3e;\t&#x3c;all English names&#x3e;\t&#x3c;all French names&#x3e;\t&#x3c;639-2 code&#x3e;\t&#x3c;639-1 code&#x3e;\n where: &#x3c;English name&#x3e; is primary English name (not used here); one of &#x3c;all English names&#x3e; so duplicates code listing &#x3c;all English names&#x3e; is all of the English names (used here) &#x3c;all French names&#x3e; is all of the French names (not used here) &#x3c;639-2 code&#x3e; is the three-character ISO 639-2 or 639-2B/639-2T language code; both are used &#x3c;639-1 code&#x3e; is the two-character ISO 639-1 language code synonym of the -2 code (not used here)

for the file date use the date listed at the bottom of the source page in yyyymmdd numeric format without hyphens or spaces

]]

local function ISO_2_name_extract (frame) local page = mw.title.getCurrentTitle;									-- get a page object for this page local content = page:getContent;											-- get unparsed content local content_table = {};													-- table of text lines from source local split_table = {};														-- table of lines split at the tabs local skip_table = {['qaa-qtz']=true};										-- table of 636-2/639-2T codes that have been handled; used to prevent duplication; qaa-qtz reserved for local use so not supported here local name_table = {};														-- holds language names for processing local code_table = {};														-- because some languages have both -2B and -2T codes local out_table_T = {};														-- output table for 639-2T codes local out_table_B = {};														-- output table for 639-2B codes local out_table;															-- used as a pointer to the selected out_table_B or out_table_T local file_date = 'File-Date: ' .. frame.args["file-date"];					-- set the file date line from |file-date= (from the bottom of the source page)

content_table = mw.text.split (content, '[\r\n]');							-- make a table of text lines for _, line in ipairs (content_table) do									-- for each line split_table = mw.text.split (line, '\t');								-- split at the tab

if split_table[4] then													-- if a code then continue processing; skip this line else name_table = mw.text.split (split_table[2], ' *; *');				-- split 'all English names' at the '; ' into a table of individual names for i, v in ipairs (name_table) do				name_table [i] = mw.ustring.gsub (v, '(.+)', '"%1"');			-- add double quotes around each name end code_table = mw.text.split (split_table[4], ' */ *');				-- split 'ISO 639-2' code at the '/' into a table of -2B and -2T individual codes for i, code in ipairs (code_table) do								-- now built a table entry for the code(s) and its(their) associated language(s) if not skip_table[code] then									-- source data has duplicates so check to see if we have already done this code out_table = (2 == #code_table and 1 == i) and out_table_B or out_table_T;	-- does this language name have both -2B and -2T codes? table.insert (out_table,						table.concat ({ '["',												-- open code index							code,												-- the code							'"] = {',											-- close code index; open name table table.concat(name_table, ', '),						-- add the names '}'													-- close the names table })					)					skip_table[code] = true;									-- remember that we've done this code end end end end table.sort (out_table_T); table.sort (out_table_B); --	return "&#x3c;br /&#x3e;&#x3c;pre&#x3e;-- " .. file_date .. "&#x3c;br /&#x3e;return {&#x3c;br /&#x3e;&#x26;#9;" .. table.concat (out_table_T, ',&#x3c;br /&#x3e;&#x26;#9;') .. "&#x3c;br /&#x3e;&#x26;#9;}&#x3c;br /&#x3e;" .. "&#x3c;/pre&#x3e;"; return table.concat ({		"&#x3c;br /&#x3e;&#x3c;pre&#x3e;-- ",		file_date,		"&#x3c;br /&#x3e;return {",		string.rep ('&#x26;#9;', 18),		'-- 639-2T&#x3c;br /&#x3e;&#x26;#9;',		table.concat (out_table_T, ',&#x3c;br /&#x3e;&#x26;#9;'),		"&#x3c;br /&#x3e;&#x26;#9;}&#x3c;br /&#x3e;" .. "&#x3c;/pre&#x3e;&#x3c;br /&#x3e;&#x3c;br /&#x3e;",		"&#x3c;br /&#x3e;&#x3c;pre&#x3e;-- ",		file_date,		"&#x3c;br /&#x3e;return {",		string.rep ('&#x26;#9;', 18),		'-- 639-2B&#x3c;br /&#x3e;&#x26;#9;',		table.concat (out_table_B, ',&#x3c;br /&#x3e;&#x26;#9;'),		"&#x3c;br /&#x3e;&#x26;#9;}&#x3c;br /&#x3e;" .. "&#x3c;/pre&#x3e;&#x3c;br /&#x3e;&#x3c;br /&#x3e;",		}); end

----&

return { ISO_synonym_extract = ISO_synonym_extract, ISO_2_name_extract = ISO_2_name_extract };