Пређи на садржај

Модул:Citation/CS1/Language

Извор: Викизворник

Документацију овог модула можете да направите на страници Модул:Citation/CS1/Language/док

local language = {}

--[[--------------------------< L A N G U A G E   S U P P O R T >----------------------------------------------

This code is an experimant that may be useful in solving the Cyrillic/Latin nominative/dative issues that this
editor does not fully understand.  It is hoped that the data table and support code introduced here will be
useful in untangling what this editor sees as a mess that is hard to maintain and as such, hard for later editors
to understand.–ttm

]]

--[[--------------------------< T R A N S L A T I O N   T A B L E S >------------------------------------------

local iso_639_2_translation = {
	['de'] = {'немачки', 'немачком'},
	['en'] = {'nominative', 'dative'},
	['fr'] = {'nominative', 'dative'},
	};

]]

local iso_639_2_translation = {
	['aa'] = {'афарски', 'other'},	-- Afar
	['ab'] = {'абхаски', 'other'},	-- Abkhazian
	['ae'] = {'авестански', 'other'},	-- Avestan
	['af'] = {'африканс', 'other'},	-- Afrikaans
	['ak'] = {'акан', 'other'},	-- Akan
	['am'] = {'амхарски', 'other'},	-- Amharic
	['an'] = {'арагонски', 'other'},	-- Aragonese
	['ar'] = {'арапски', 'other'},	-- Arabic
	['as'] = {'асамски', 'other'},	-- Assamese
	['av'] = {'аварски', 'other'},	-- Avaric
	['ay'] = {'ајмара', 'other'},	-- Aymara
	['az'] = {'азербејџански', 'other'},	-- Azerbaijani
	['ba'] = {'башкирски', 'other'},	-- Bashkir
	['be'] = {'белоруски', 'other'},	-- Belarusian
	['bg'] = {'бугарски', 'other'},	-- Bulgarian
	['bh'] = {'бихарски', 'other'},	-- Bihari languages
	['bi'] = {'бислама', 'other'},	-- Bislama
	['bm'] = {'бамбара', 'other'},	-- Bambara
	['bn'] = {'бенгалски', 'other'},	-- Bengali
	['bo'] = {'тибетански', 'other'},	-- Tibetan
	['br'] = {'бретонски', 'other'},	-- Breton
	['bs'] = {'бошњачки', 'other'},	-- Bosnian
	['ca'] = {'каталонски', 'other'},	-- Catalan
	['ce'] = {'чеченски', 'other'},	-- Chechen
	['ch'] = {'чаморо', 'other'},	-- Chamorro
	['co'] = {'корзикански', 'other'},	-- Corsican
	['cr'] = {'кри', 'other'},	-- Cree
	['cs'] = {'чешки', 'other'},	-- Czech
	['cu'] = {'старословенски', 'other'},	-- Church Slavic
	['cv'] = {'чувашки', 'other'},	-- Chuvash
	['cy'] = {'велшки', 'other'},	-- Welsh
	['da'] = {'дански', 'other'},	-- Danish
	['de'] = {'немачки', 'other'},	-- German
	['dv'] = {'дивехијски', 'other'},	-- Divehi
	['dv'] = {'дивехијски', 'other'},	-- Dhivehi
	['dv'] = {'дивехијски', 'other'},	-- Maldivian
	['dz'] = {'џонга', 'other'},	-- Dzongkha
	['ee'] = {'еве', 'other'},	-- Ewe
	['el'] = {'грчки', 'other'},	-- Modern Greek
	['en'] = {'енглески', 'other'},	-- English
	['eo'] = {'есперанто', 'other'},	-- Esperanto
	['es'] = {'шпански', 'other'},	-- Spanish
	['et'] = {'естонски', 'other'},	-- Estonian
	['eu'] = {'баскијски', 'other'},	-- Basque
	['fa'] = {'персијски', 'other'},	-- Persian
	['ff'] = {'фулах', 'other'},	-- Fulah
	['fi'] = {'фински', 'other'},	-- Finnish
	['fj'] = {'фиџијски', 'other'},	-- Fijian
	['fo'] = {'фарски', 'other'},	-- Faroese
	['fr'] = {'француски', 'other'},	-- French
	['fy'] = {'западни фризијски', 'other'},	-- Western Frisian
	['ga'] = {'ирски', 'other'},	-- Irish
	['gd'] = {'шкотски гелски', 'other'},	-- Scottish Gaelic
	['gl'] = {'галицијски', 'other'},	-- Galician
	['gn'] = {'гварани', 'other'},	-- Guarani
	['gu'] = {'гуџарати', 'other'},	-- Gujarati
	['gv'] = {'мански', 'other'},	-- Manx
	['ha'] = {'хауса', 'other'},	-- Hausa
	['he'] = {'хебрејски', 'other'},	-- Hebrew
	['hi'] = {'хинди', 'other'},	-- Hindi
	['ho'] = {'хири моту', 'other'},	-- Hiri Motu
	['hr'] = {'хрватски', 'other'},	-- Croatian
	['ht'] = {'хаићански', 'other'},	-- Haitian Creole
	['hu'] = {'мађарски', 'other'},	-- Hungarian
	['hy'] = {'јерменски', 'other'},	-- Armenian
	['hz'] = {'хереро', 'other'},	-- Herero
	['ia'] = {'интерлингва', 'other'},	-- Interlingua
	['id'] = {'индонежански', 'other'},	-- Indonesian
	['ie'] = {'међујезички', 'other'},	-- Interlingue
	['ie'] = {'међујезички', 'other'},	-- Occidental
	['ig'] = {'игбо', 'other'},	-- Igbo
	['ii'] = {'сечуан ји', 'other'},	-- Nuosu
	['ik'] = {'унупиак', 'other'},	-- Inupiaq
	['io'] = {'Идо', 'other'},	-- Ido
	['is'] = {'исландски', 'other'},	-- Icelandic
	['it'] = {'италијански', 'other'},	-- Italian
	['iu'] = {'инуктитут', 'other'},	-- Inuktitut
	['ja'] = {'јапански', 'other'},	-- Japanese
	['jv'] = {'јавански', 'other'},	-- Javanese
	['ka'] = {'грузијски', 'other'},	-- Georgian
	['kg'] = {'конго', 'other'},	-- Kongo
	['ki'] = {'кикују', 'other'},	-- Gikuyu
	['kj'] = {'куањама', 'other'},	-- Kwanyama
	['kk'] = {'казашки', 'other'},	-- Kazakh
	['kl'] = {'калалисут', 'other'},	-- Greenlandic
	['km'] = {'кмерски', 'other'},	-- Central Khmer
	['kn'] = {'канада', 'other'},	-- Kannada
	['ko'] = {'корејски', 'other'},	-- Korean
	['kr'] = {'канури', 'other'},	-- Kanuri
	['ks'] = {'кашмирски', 'other'},	-- Kashmiri
	['ku'] = {'курдски', 'other'},	-- Kurdish
	['kv'] = {'коми', 'other'},	-- Komi
	['kw'] = {'корнволски', 'other'},	-- Cornish
	['ky'] = {'киргиски', 'other'},	-- Kyrgyz
	['la'] = {'латински', 'other'},	-- Latin
	['lb'] = {'луксембуршки', 'other'},	-- Luxembourgish
	['lg'] = {'ганда', 'other'},	-- Ganda
	['li'] = {'лимбургиш', 'other'},	-- Limburgish
	['ln'] = {'лингала', 'other'},	-- Lingala
	['lo'] = {'лаошки', 'other'},	-- Lao
	['lt'] = {'литвански', 'other'},	-- Lithuanian
	['lu'] = {'луба-катанга', 'other'},	-- Luba-Katanga
	['lv'] = {'летонски', 'other'},	-- Latvian
	['mg'] = {'малгашки', 'other'},	-- Malagasy
	['mh'] = {'маршалски', 'other'},	-- Marshallese
	['mi'] = {'маорски', 'other'},	-- Maori
	['mk'] = {'македонски', 'other'},	-- Macedonian
	['ml'] = {'малајалам', 'other'},	-- Malayalam
	['mn'] = {'монголски', 'other'},	-- Mongolian
	['mr'] = {'марати', 'other'},	-- Marathi
	['ms'] = {'малајски', 'other'},	-- Malay
	['mt'] = {'малтешки', 'other'},	-- Maltese
	['my'] = {'бурмански', 'other'},	-- Burmese
	['na'] = {'науру', 'other'},	-- Nauru
	['nb'] = {'норвешки бокмал', 'other'},	-- Norwegian Bokmål
	['nd'] = {'северни ндебеле', 'other'},	-- North Ndebele
	['ne'] = {'непалски', 'other'},	-- Nepali
	['ng'] = {'Ндонга', 'other'},	-- Ndonga
	['nl'] = {'холандски', 'other'},	-- Dutch
	['nn'] = {'норвешки нинорск', 'other'},	-- Norwegian Nynorsk
	['no'] = {'норвешки', 'other'},	-- Norwegian
	['nr'] = {'јужни ндебеле', 'other'},	-- South Ndebele
	['nv'] = {'навахо', 'other'},	-- Navajo
	['ny'] = {'њања', 'other'},	-- Nyanja
	['oc'] = {'окситански', 'other'},	-- Occitan
	['oj'] = {'ојибва', 'other'},	-- Ojibwa
	['om'] = {'оромо', 'other'},	-- Oromo
	['or'] = {'орија', 'other'},	-- Oriya
	['os'] = {'Осетски', 'other'},	-- Ossetian
	['os'] = {'Осетски', 'other'},	-- Ossetic
	['pa'] = {'панџаби', 'other'},	-- Punjabi
	['pi'] = {'Пали', 'other'},	-- Pali
	['pl'] = {'пољски', 'other'},	-- Polish
	['ps'] = {'паштунски', 'other'},	-- Pashto
	['pt'] = {'португалски', 'other'},	-- Portuguese
	['qu'] = {'кечуа', 'other'},	-- Quechua
	['rm'] = {'рето-романски', 'other'},	-- Romansh
	['rn'] = {'рунди', 'other'},	-- Rundi
	['ro'] = {'румунски', 'other'},	-- Romanian
	['ru'] = {'руски', 'other'},	-- Russian
	['rw'] = {'кинјаруанда', 'other'},	-- Kinyarwanda
	['sa'] = {'санскрт', 'other'},	-- Sanskrit
	['sc'] = {'сардињаски', 'other'},	-- Sardinian
	['sd'] = {'синди', 'other'},	-- Sindhi
	['se'] = {'севернолапонски', 'other'},	-- Northern Sami
	['sg'] = {'санго', 'other'},	-- Sango
	['si'] = {'синхалски', 'other'},	-- Sinhala
	['sk'] = {'словачки', 'other'},	-- Slovak
	['sl'] = {'словеначки', 'other'},	-- Slovenian
	['sm'] = {'самоански', 'other'},	-- Samoan
	['sn'] = {'шона', 'other'},	-- Shona
	['so'] = {'сомалски', 'other'},	-- Somali
	['sq'] = {'албански', 'other'},	-- Albanian
--	['sr'] = 
	['ss'] = {'свати', 'other'},	-- Swati
	['st'] = {'сесото', 'other'},	-- Southern Sotho
	['su'] = {'сундански', 'other'},	-- Sundanese
	['sv'] = {'шведски', 'other'},	-- Swedish
	['sw'] = {'свахили', 'other'},	-- Swahili
	['ta'] = {'тамилски', 'other'},	-- Tamil
	['te'] = {'телугу', 'other'},	-- Telugu
	['tg'] = {'таџички', 'other'},	-- Tajik
	['th'] = {'тајландски', 'other'},	-- Thai
	['ti'] = {'тигриња', 'other'},	-- Tigrinya
	['tk'] = {'туркменски', 'other'},	-- Turkmen
	['tl'] = {'Тагалски', 'other'},	-- Tagalog
	['tn'] = {'Тсвана', 'other'},	-- Tswana
	['to'] = {'тонга', 'other'},	-- Tonga
	['tr'] = {'турски', 'other'},	-- Turkish
	['ts'] = {'тсонга', 'other'},	-- Tsonga
	['tt'] = {'татарски', 'other'},	-- Tatar
	['tw'] = {'тви', 'other'},	-- Twi
	['ty'] = {'тахићански', 'other'},	-- Tahitian
	['ug'] = {'ујгурски', 'other'},	-- Uyghur
	['uk'] = {'украјински', 'other'},	-- Ukrainian
	['ur'] = {'урду', 'other'},	-- Urdu
	['uz'] = {'узбечки', 'other'},	-- Uzbek
	['ve'] = {'венда', 'other'},	-- Venda
	['vi'] = {'вијетнамски', 'other'},	-- Vietnamese
	['vo'] = {'волапук', 'other'},	-- Volapük
	['wa'] = {'валун', 'other'},	-- Walloon
	['wo'] = {'волоф', 'other'},	-- Wolof
	['xh'] = {'коса', 'other'},	-- Xhosa
	['yi'] = {'јидиш', 'other'},	-- Yiddish
	['yo'] = {'јоруба', 'other'},	-- Yoruba
	['za'] = {'жуанг', 'other'},	-- Zhuang
	['zh'] = {'кинески', 'other'},	-- Chinese
	['zu'] = {'зулу', 'other'},	-- Zulu
	};


--[[--------------------------< G E T _ I S O 6 3 9 _ C O D E >------------------------------------------------

Validates language names provided in |language= parameter if not an ISO639-1 or 639-2 code.

Returns the language name and associated two- or three-character code.  Because case of the source may be incorrect
or different from the case that WikiMedia uses, the name comparisons are done in lower case and when a match is
found, the Wikimedia version (assumed to be correct) is returned along with the code.  When there is no match, we
return the original language name string.

mw.language.fetchLanguageNames(<local wiki language>, 'all') return a list of languages that in some cases may include
extensions. For example, code 'cbk-zam' and its associated name 'Chavacano de Zamboanga' (MediaWiki does not support
code 'cbk' or name 'Chavacano'.

Names but that are included in the list will be found if that name is provided in the |language= parameter.  For example,
if |language=Chavacano de Zamboanga, that name will be found with the associated code 'cbk-zam'.  When names are found
and the associated code is not two or three characters, this function returns only the Wikimedia language name.

Adapted from code taken from Module:Check ISO 639-1.

]]

local function get_iso639_code (lang, this_wiki_code)
	local languages = mw.language.fetchLanguageNames(this_wiki_code, 'all')		-- get a list of language names known to Wikimedia
																				-- ('all' is required for North Ndebele, South Ndebele, and Ojibwa)
	local langlc = mw.ustring.lower(lang);										-- lower case version for comparisons
	
	for code, name in pairs(languages) do										-- scan the list to see if we can find our language
		if langlc == mw.ustring.lower(name) then
			if 2 ~= code:len() and 3 ~= code:len() then							-- two- or three-character codes only; extensions not supported
				return name;													-- so return the name but not the code
			end
			return name, code;													-- found it, return name to ensure proper capitalization and the the code
		end
	end
	return lang;																-- not valid language; return language in original case and nil for the code
end

--[[--------------------------< L A N G U A G E _ P A R A M E T E R >------------------------------------------

Gets language name from a provided two- or three-character ISO 639 code.  If a code is recognized by MediaWiki,
use the returned name; if not, then use the value that was provided with the language parameter.

When |language= contains a recognized language (either code or name), the page is assigned to the category for
that code: Category:Norwegian-language sources (no).  For valid three-character code languages, the page is assigned
to the single category for '639-2' codes: Category:CS1 ISO 639-2 language sources.

Languages that are the same as the local wiki are not categorized.  MediaWiki does not recognize three-character
equivalents of two-character codes: code 'ar' is recognized bit code 'ara' is not.

This function supports multiple languages in the form |language=nb, French, th where the language names or codes are
separated from each other by commas.

]]

local function language_parameter (lang)
	local code;																	-- the two- or three-character language code
	local name;																	-- the language name
	local language_list = {};													-- table of language names to be rendered
	local names_table = {};														-- table made from the value assigned to |language=

	local this_wiki = mw.getContentLanguage();									-- get a language object for this wiki
	local this_wiki_code = this_wiki:getCode()									-- get this wiki's language code
	local this_wiki_name = mw.language.fetchLanguageName(this_wiki_code, this_wiki_code);	-- get this wiki's language name

	names_table = mw.text.split (lang, '%s*,%s*');								-- names should be a comma separated list

	for _, lang in ipairs (names_table) do										-- reuse lang

		if lang:match ('^%a%a%-') then											-- strip ietf language tags from code; TODO: is there a need to support 3-char with tag?
			lang = lang:match ('(%a%a)%-')										-- keep only 639-1 code portion to lang; TODO: do something with 3166 alpha 2 country code?
		end
		if 2 == lang:len() or 3 == lang:len() then								-- if two-or three-character code
			name = mw.language.fetchLanguageName( lang:lower(), this_wiki_code);	-- get language name if |language= is a proper code
		end
	
		if is_set (name) then													-- if |language= specified a valid code
			code = lang:lower();												-- save it
		else
			name, code = get_iso639_code (lang, this_wiki_code);				-- attempt to get code from name (assign name here so that we are sure of proper capitalization)
		end
	
		if is_set (code) then													-- only 2- or 3-character codes
			if this_wiki_code ~= code then										-- when the language is not the same as this wiki's language
				if 2 == code:len() then											-- and is a two-character code
					add_prop_cat ('foreign_lang_source', {name, code})			-- categorize it
				else															-- or is a recognized language (but has a three-character code)
					add_prop_cat ('foreign_lang_source_2', {code})				-- categorize it differently TODO: support mutliple three-character code categories per cs1|2 template
				end
			end
		else
			add_maint_cat ('unknown_lang');										-- add maint category if not already added
		end
		
		table.insert (language_list, name);
		name = '';																-- so we can reuse it
	end
	
	code = #language_list														-- reuse code as number of languages in the list
	if 2 >= code then
		name = table.concat (language_list, ' and ')							-- insert '<space>and<space>' between two language names
	elseif 2 < code then
		language_list[code] = 'and ' .. language_list[code];					-- prepend last name with 'and<space>'
		name = table.concat (language_list, ', ')								-- and concatenate with '<comma><space>' separators
	end
	if this_wiki_name == name then
		return '';																-- if one language and that language is this wiki's return an empty string (no annotation)
	end
	return (" " .. wrap_msg ('language', name));								-- otherwise wrap with '(in ...)'
end


--[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------

Sets local cfg table to same (live or sandbox) as that used by the other modules.

]]

local function set_selected_modules (utilities_page_ptr)
	is_set = utilities_page_ptr.is_set;											-- import functions from selected Module:Citation/CS1/Utilities module
	add_maint_cat = utilities_page_ptr.add_maint_cat;
	add_prop_cat = utilities_page_ptr.add_prop_cat;
	wrap_msg = utilities_page_ptr.wrap_msg;
end



return {																		-- return exported functions and tables
	language_parameter = language_parameter,
	set_selected_modules = set_selected_modules
	}