მოდული:ru-common

ვიქსიკონი გვერდიდან

შეგიძლიათ შექმნათ დოკუმენტაცია ამ მოდულისათვის: მოდული:ru-common/ინფო

local export = {}

local lang = require("Module:languages").getByCode("ru")

local strutils = require("Module:string utilities")
local m_ru_translit = require("Module:ru-translit")
local m_table_tools = require("Module:table tools")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local BREVE = u(0x0306) -- breve  ̆
local DIA = u(0x0308) -- diaeresis =  ̈
local CARON = u(0x030C) -- caron  ̌

local PSEUDOVOWEL = u(0xFFF1) -- pseudovowel placeholder
local PSEUDOCONS = u(0xFFF2) -- pseudoconsonant placeholder

-- any accent
export.accent = AC .. GR .. DIA .. BREVE .. CARON
-- regex for any optional accent(s)
export.opt_accent = "[" .. export.accent .. "]*"
-- any composed Cyrillic vowel with grave accent
export.composed_grave_vowel = "ѐЀѝЍ"
-- any Cyrillic vowel except ёЁ
export.vowel_no_jo = "аеиоуяэыюіѣѵАЕИОУЯЭЫЮІѢѴ" .. PSEUDOVOWEL .. export.composed_grave_vowel
-- any Cyrillic vowel, including ёЁ
export.vowel = export.vowel_no_jo .. "ёЁ"
-- any vowel in transliteration
export.tr_vowel = "aeěɛiouyAEĚƐIOUY" .. PSEUDOVOWEL
-- any consonant in transliteration, omitting soft/hard sign
export.tr_cons_no_sign = "bcčdfghjklmnpqrsštvwxzžBCČDFGHJKLMNPQRSŠTVWXZŽ" .. PSEUDOCONS
-- any consonant in transliteration, including soft/hard sign
export.tr_cons = export.tr_cons_no_sign .. "ʹʺ"
-- regex for any consonant in transliteration, including soft/hard sign,
-- optionally followed by any accent
export.tr_cons_acc_re = "[" .. export.tr_cons .. "]" .. export.opt_accent
-- any Cyrillic consonant except sibilants and ц
export.cons_except_sib_c = "бдфгйклмнпрствхзьъБДФГЙКЛМНПРСТВХЗЬЪ" .. PSEUDOCONS
-- Cyrillic sibilant consonants
export.sib = "шщчжШЩЧЖ"
-- Cyrillic sibilant consonants and ц
export.sib_c = export.sib .. "цЦ"
-- any Cyrillic consonant
export.cons = export.cons_except_sib_c .. export.sib_c
-- Cyrillic velar consonants
export.velar = "кгхКГХ"
-- uppercase Cyrillic consonants
export.uppercase = "АЕИОУЯЭЫЁЮІѢѴБДФГЙКЛМНПРСТВХЗЬЪШЩЧЖЦ"

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local function ine(x)
	return x ~= "" and x or nil
end

-- this function enables the module to be called from a template;
-- FIXME, does this actually work?
function export.main(frame)
	-- FIXME: Not used. Consider deleting.
	if type(export[frame.args[1]]) == 'function' then
		return export[frame.args[1]](frame.args[2], frame.args[3])
	else
		return export[frame.args[1]][frame.args[2]]
	end
end

-- selects preposition о, об or обо for next phrase, which can start from
-- punctuation
function export.obo(phr)
	-- FIXME: Not used. Consider deleting.
	--Algorithm design is mainly inherited from w:ru:template:Обо
	local w = rmatch(phr,"[%p%s%c]*(.-)[%p%s%c]") or rmatch(phr,"[%p%s%c]*(.-)$")
	if not w then return nil end
	if string.find(" всей всём всех мне ",' '..ulower(w)..' ',1,true) then return 'обо' end
	local ws=usub(w,1,2)
	if ws==uupper(ws) then -- abbrev
		if rmatch(ws,"^[ЙУНФЫАРОЛЭСМИRYUIOASFHLXNMÖÜÄΑΕΟΥΩ]") then return 'об' else return 'о' end
	elseif rmatch(uupper(w),"^[АОЭИУЫAOIEÖÜÄΑΕΟΥΩ]") then
		return 'об'
	else
		return 'о'
	end
end

-- Apply Proto-Slavic iotation. This is the change that is affected by a
-- Slavic -j- after a consonant.
function export.iotation(stem, tr, shch)
	local combine_tr = false
	
	-- so this can be called from a template	
	if type(stem) == 'table' then
		stem, tr, shch = ine(stem.args[1]), ine(stem.args[2]), ine(stem.args[3])
		combine_tr = true
	end
	
	stem = rsub(stem, "[сх]$", "ш")
	stem = rsub(stem, "ск$", "щ")
	stem = rsub(stem, "ст$", "щ")
	stem = rsub(stem, "[кц]$", "ч")

	-- normally "т" is iotated as "ч" but there are many verbs that are iotated with "щ"
	if shch == "щ" then
		stem = rsub(stem, "т$", "щ")
	else
		stem = rsub(stem, "т$", "ч")
	end

	stem = rsub(stem, "[гдз]$", "ж")

	stem = rsub(stem, "([бвмпф])$", "%1л")

	if tr then
		tr = rsub(tr, "[sx]$", "š")
		tr = rsub(tr, "sk$", "šč")
		tr = rsub(tr, "st$", "šč")
		tr = rsub(tr, "[kc]$", "č")

		-- normally "т" is iotated as "ч" but there are many verbs that are iotated with "щ"
		if shch == "щ" then
			tr = rsub(tr, "t$", "šč")
		else
			tr = rsub(tr, "t$", "č")
		end

		tr = rsub(tr, "[gdz]$", "ž")

		tr = rsub(tr, "([bvmpf])$", "%1l")
	end

	if combine_tr then
		return export.combine_russian_tr(stem, tr)
	else
		return stem, tr
	end
end

-- Does a set of Cyrillic words in connected text need accents? We need to
-- split by word and check each one.
function export.needs_accents(text)
	local function word_needs_accents(word)
		-- A word needs accents if it is unstressed and contains more than
		-- one vowel, unless it's a prefix or suffix
		return not rfind(word, "^%-") and not rfind(word, "%-$") and
			export.is_unstressed(word) and not export.is_monosyllabic(word)
	end
	local words = rsplit(text, "%s")
	for _, word in ipairs(words) do
		if word_needs_accents(word) then
			return true
		end
	end
	return false
end

-- True if Cyrillic word is stressed (acute or diaeresis)
function export.is_stressed(word)
	-- A word that has ё in it is inherently stressed.
	-- diaeresis occurs in сѣ̈дла plural of сѣдло́
	return rfind(word, "[́̈ёЁ]")
end

-- True if Cyrillic word has no stress mark (acute or diaeresis)
function export.is_unstressed(word)
	return not export.is_stressed(word)
end

-- True if Cyrillic word is stressed on the last syllable
function export.is_ending_stressed(word)
	return rfind(word, "[ёЁ][^" .. export.vowel .. "]*$") or
		rfind(word, "[" .. export.vowel .. "][́̈][^" .. export.vowel .. "]*$")
end

-- True if a Cyrillic word has two or more stresses (acute or diaeresis)
function export.is_multi_stressed(word)
	word = rsub(word, "[ёЁ]", "е́")
	return rfind(word, "[" .. export.vowel .. "][́̈].*[" .. export.vowel .. "][́̈]")
end

-- True if Cyrillic word is stressed on the first syllable
function export.is_beginning_stressed(word)
	return rfind(word, "^[^" .. export.vowel .. "]*[ёЁ]") or
		rfind(word, "^[^" .. export.vowel .. "]*[" .. export.vowel .. "]́")
end

-- True if Cyrillic word has no vowel. Don't treat suffixes as nonsyllabic
-- even if they have no vowel, as they are generally added onto words with
-- vowels.
function export.is_nonsyllabic(word)
	return not rfind(word, "^%-") and not rfind(word, "[" .. export.vowel .. "]")
end

-- True if Cyrillic word has no more than one vowel; includes non-syllabic
-- stems such as льд-
function export.is_monosyllabic(word)
	return not rfind(word, "[" .. export.vowel .. "].*[" .. export.vowel .. "]")
end

local recomposer = {
	["и" .. BREVE] = "й",
	["И" .. BREVE] = "Й",
	["е" .. DIA] = "ё", -- WARNING: Cyrillic е and Е
	["Е" .. DIA] = "Ё",
	["e" .. CARON] = "ě", -- WARNING: Latin e and E
	["E" .. CARON] = "Ě",
	["c" .. CARON] = "č",
	["C" .. CARON] = "Č",
	["s" .. CARON] = "š",
	["S" .. CARON] = "Š",
	["z" .. CARON] = "ž",
	["Z" .. CARON] = "Ž",
	-- used in ru-pron:
	["ж" .. BREVE] = "ӂ", -- used in ru-pron
	["Ж" .. BREVE] = "Ӂ",
	["j" .. CFLEX] = "ĵ",
	["J" .. CFLEX] = "Ĵ",
	["j" .. CARON] = "ǰ",
	-- no composed uppercase equivalent of J-caron
	["ʒ" .. CARON] = "ǯ",
	["Ʒ" .. CARON] = "Ǯ",
}

-- Decompose acute, grave, etc. on letters (esp. Latin) into individivual
-- character + combining accent. But recompose Cyrillic and Latin characters
-- that we want to treat as units and get caught in the crossfire. We mostly
-- want acute and grave decomposed; perhaps should just explicitly decompose
-- those and no others.
function export.decompose(text)
	text = mw.ustring.toNFD(text)
	text = rsub(text, ".[" .. BREVE .. DIA .. CARON .. "]", recomposer)
	return text
end

function export.assert_decomposed(text)
	assert(not rfind(text, "[áéíóúýàèìòùỳäëïöüÿÁÉÍÓÚÝÀÈÌÒÙỲÄËÏÖÜŸ]"))
end

-- Transliterate text and then apply acute/grave decomposition.
function export.translit(text, no_include_monosyllabic_jo_accent)
	return export.decompose(m_ru_translit.tr(text, nil, nil, not no_include_monosyllabic_jo_accent))
end

-- Recompose acutes and graves into preceding vowels. Probably not necessary.
function export.recompose(text)
	return mw.ustring.toNFC(text)
end

local grave_decomposer = {
	["ѐ"] = "е" .. GR,
	["Ѐ"] = "Е" .. GR,
	["ѝ"] = "и" .. GR,
	["Ѝ"] = "И" .. GR,
}

-- decompose precomposed Cyrillic chars w/grave accent; not necessary for
-- acute accent as there aren't precomposed Cyrillic chars w/acute accent,
-- and undesirable for precomposed ё and Ё
function export.decompose_grave(word)
	return rsub(word, "[ѐЀѝЍ]", grave_decomposer)
end

local grave_deaccenter = {
	[GR] = "", -- grave accent
	["ѐ"] = "е", -- composed Cyrillic chars w/grave accent
	["Ѐ"] = "Е",
	["ѝ"] = "и",
	["Ѝ"] = "И",
}

local deaccenter = mw.clone(grave_deaccenter)
deaccenter[AC] = "" -- acute accent

-- Remove acute and grave accents; don't affect composed diaeresis in ёЁ or
-- uncomposed diaeresis in -ѣ̈- (as in plural сѣ̈дла of сѣдло́).
-- NOTE: Translit must already be decomposed! See comment at top.
function export.remove_accents(word, tr)
	local ru_removed = rsub(word, "[́̀ѐЀѝЍ]", deaccenter)
	if not tr then
		return ru_removed, nil
	end
	return ru_removed, rsub(tr, "[" .. AC .. GR .. "]", deaccenter)
end

-- Remove grave accents; don't affect acute or composed diaeresis in ёЁ or
-- uncomposed diaeresis in -ѣ̈- (as in plural сѣ̈дла of сѣдло́).
-- NOTE: Translit must already be decomposed! See comment at top.
function export.remove_grave_accents(word, tr)
	local ru_removed = rsub(word, "[̀ѐЀѝЍ]", grave_deaccenter)
	if not tr then
		return ru_removed, nil
	end
	return ru_removed, rsub(tr, GR, "")
end

-- Remove acute and grave accents in monosyllabic words; don't affect
-- diaeresis (composed or uncomposed) because it indicates a change in vowel
-- quality, which still applies to monosyllabic words. Don't change suffixes,
-- where a "monosyllabic" stress is still significant (e.g. -ча́т short
-- masculine of -ча́тый, vs. -́чат short masculine of -́чатый).
-- NOTE: Translit must already be decomposed! See comment at top.
function export.remove_monosyllabic_accents(word, tr)
	if export.is_monosyllabic(word) and not rfind(word, "^%-") then
		return export.remove_accents(word, tr)
	else
		return word, tr
	end
end

local destresser = mw.clone(deaccenter)
destresser["ё"] = "е"
destresser["Ё"] = "Е"
destresser["̈"] = "" -- diaeresis

-- Subfunction of split_syllables(). On input we get sections of text
-- consisting of CONSONANT - VOWEL - CONSONANT - VOWEL ... - CONSONANT,
-- where CONSONANT consists of zero or more consonants and VOWEL consists
-- of exactly one vowel plus any following accent(s); we combine these into
-- syllables as required by split_syllables().
local function combine_captures(captures)
	if #captures == 1 then
		return captures
	end
	local combined = {}
	for i = 1,(#captures-1),2 do
		table.insert(combined, captures[i] .. captures[i+1])
	end
	combined[#combined] = combined[#combined] .. captures[#captures]
	return combined
end

-- Split Russian text and transliteration into syllables. Syllables end with
-- vowel + accent(s), except for the last syllable, which includes any
-- trailing consonants.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.split_syllables(ru, tr)
	-- Split into alternating consonant/vowel sequences, as described in
	-- combine_captures(). Uses capturing_split(), which is like rsplit()
	-- but also includes any capturing groups in the split pattern.
	local rusyllables = combine_captures(strutils.capturing_split(ru, "([" .. export.vowel .. "]" .. export.opt_accent .. ")"))
	local trsyllables
	if tr then
		export.assert_decomposed(tr)
		trsyllables = combine_captures(strutils.capturing_split(tr, "([" .. export.tr_vowel .. "]" .. export.opt_accent .. ")"))
		if #rusyllables ~= #trsyllables then
			error("Russian " .. ru .. " doesn't have same number of syllables as translit " .. tr)
		end
	end
	--error(table.concat(rusyllables, "/") .. "(" .. #rusyllables .. (trsyllables and (") || " .. table.concat(trsyllables, "/") .. "(" .. #trsyllables .. ")") or ""))
	return rusyllables, trsyllables
end

-- Split Russian word and transliteration into hyphen-separated components.
-- Rejoining with table.concat(..., "-") will recover the original word.
-- If the original word ends in a hyphen, that hyphen gets included with the
-- preceding component (this is the only case when an individual component has
-- a hyphen in it).
function export.split_hyphens(ru, tr)
	local rucomponents = rsplit(ru, "%-")
	if rucomponents[#rucomponents] == "" and #rucomponents > 1 then
		rucomponents[#rucomponents - 1] = rucomponents[#rucomponents - 1] .. "-"
		table.remove(rucomponents)
	end
	local trcomponents
	if tr then
		trcomponents = rsplit(tr, "%-")
		if trcomponents[#trcomponents] == "" and #trcomponents > 1 then
			trcomponents[#trcomponents - 1] = trcomponents[#trcomponents - 1] .. "-"
			table.remove(trcomponents)
		end
		if #rucomponents ~= #trcomponents then
			error("Russian " .. ru .. " doesn't have same number of hyphenated components as translit " .. tr)
		end
	end
	return rucomponents, trcomponents
end

-- Apply j correction, converting je to e after consonants, jo to o after
-- a sibilant, ju to u after hard sibilant.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.j_correction(tr)
	tr = rsub(tr, "([" .. export.tr_cons_no_sign .. "]" .. export.opt_accent ..")[Jj]([EeĚě])", "%1%2")
	tr = rsub(tr, "([žščŽŠČ])[Jj]([Oo])", "%1%2")
	tr = rsub(tr, "([žšŽŠ])[Jj]([Uu])", "%1%2")
	return tr
end

local function make_unstressed_ru(ru)
	-- The following regexp has grave+acute+diaeresis after the bracket
	--
	return rsub(ru, "[̀́̈ёЁѐЀѝЍ]", destresser)
end

-- Remove all stress marks (acute, grave, diaeresis).
-- NOTE: Translit must already be decomposed! See comment at top.
function export.make_unstressed(ru, tr)
	if not tr then
		return make_unstressed_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way: Splitting
	-- into syllables and only converting Latin o to e opposite a ё.
	rusyl, trsyl = export.split_syllables(ru, tr)
	for i=1,#rusyl do
		if rfind(rusyl[i], "[ёЁ]") then
			trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
		end
		rusyl[i] = make_unstressed_ru(rusyl[i])
		-- the following should still work as it will affect accents only
		trsyl[i] = make_unstressed_ru(trsyl[i])
	end
	-- Also need to apply j correction as otherwise we'll have je after cons, etc.
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

local function remove_jo_ru(word)
	return rsub(word, "[̈ёЁ]", destresser)
end

-- Remove diaeresis stress marks only.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.remove_jo(ru, tr)
	if not tr then
		return remove_jo_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way: Splitting
	-- into syllables and only converting Latin o to e opposite a ё.
	rusyl, trsyl = export.split_syllables(ru, tr)
	for i=1,#rusyl do
		if rfind(rusyl[i], "[ёЁ]") then
			trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
		end
		rusyl[i] = remove_jo_ru(rusyl[i])
		-- the following should still work as it will affect accents only
		trsyl[i] = make_unstressed_once_ru(trsyl[i])
	end
	-- Also need to apply j correction as otherwise we'll have je after cons, etc.
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

local function make_unstressed_once_ru(word)
	-- leave graves alone
	return rsub(word, "([́̈ёЁ])([^́̈ёЁ]*)$", function(x, rest) return destresser[x] .. rest; end, 1)
end

local function map_last_hyphenated_component(fn, ru, tr)
	if rfind(ru, "%-") then
		-- If there is a hyphen, do it the hard way by splitting into
		-- individual components and doing the last one. Otherwise we just do
		-- the whole string.
		local rucomponents, trcomponents = export.split_hyphens(ru, tr)
		local lastru, lasttr = fn(rucomponents[#rucomponents],
			trcomponents and trcomponents[#trcomponents] or nil)
		rucomponents[#rucomponents] = lastru
		ru = table.concat(rucomponents, "-")
		if trcomponents then
			trcomponents[#trcomponents] = lasttr
			tr = table.concat(trcomponents, "-")
		end
		return ru, tr
	end
	return fn(ru, tr)
end

-- Make last stressed syllable (acute or diaeresis) unstressed; leave
-- unstressed; leave graves alone; if NOCONCAT, return individual syllables.
-- NOTE: Translit must already be decomposed! See comment at top.
local function make_unstressed_once_after_hyphen_split(ru, tr, noconcat)
	if not tr then
		return make_unstressed_once_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way, as with
	-- make_unstressed().
	rusyl, trsyl = export.split_syllables(ru, tr)
	for i=#rusyl,1,-1 do
		local stressed = export.is_stressed(rusyl[i])
		if stressed then
			if rfind(rusyl[i], "[ёЁ]") then
				trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
			end
			rusyl[i] = make_unstressed_once_ru(rusyl[i])
			-- the following should still work as it will affect accents only
			trsyl[i] = make_unstressed_once_ru(trsyl[i])
			break
		end
	end
	if noconcat then
		return rusyl, trsyl
	end
	-- Also need to apply j correction as otherwise we'll have je after cons
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

-- Make last stressed syllable (acute or diaeresis) to the right of any hyphen
-- unstressed (unless the hyphen is word-final); leave graves alone. We don't
-- destress a syllable to the left of a hyphen unless the hyphen is word-final
-- (i.e. a prefix). Otherwise e.g. the accents in the first part of words like
-- ко́е-како́й and а́льфа-лу́ч won't remain.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.make_unstressed_once(ru, tr)
	return map_last_hyphenated_component(make_unstressed_once_after_hyphen_split, ru, tr)
end

local function make_unstressed_once_at_beginning_ru(word)
	-- leave graves alone
	return rsub(word, "^([^́̈ёЁ]*)([́̈ёЁ])", function(rest, x) return rest .. destresser[x]; end, 1)
end

-- Make first stressed syllable (acute or diaeresis) unstressed; leave
-- graves alone; if NOCONCAT, return individual syllables.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.make_unstressed_once_at_beginning(ru, tr, noconcat)
	if not tr then
		return make_unstressed_once_at_beginning_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way, as with
	-- make_unstressed().
	rusyl, trsyl = export.split_syllables(ru, tr)
	for i=1,#rusyl do
		local stressed = export.is_stressed(rusyl[i])
		if stressed then
			if rfind(rusyl[i], "[ёЁ]") then
				trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
			end
			rusyl[i] = make_unstressed_once_at_beginning_ru(rusyl[i])
			-- the following should still work as it will affect accents only
			trsyl[i] = make_unstressed_once_at_beginning_ru(trsyl[i])
			break
		end
	end
	if noconcat then
		return rusyl, trsyl
	end
	-- Also need to apply j correction as otherwise we'll have je after cons
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

-- Subfunction of make_ending_stressed(), make_beginning_stressed(), which
-- add an acute accent to a syllable that may already have a grave accent;
-- in such a case, remove the grave.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.correct_grave_acute_clash(word, tr)
	word = rsub(word, "([̀ѐЀѝЍ])́", function(x) return grave_deaccenter[x] .. AC; end)
	word = rsub(word, AC .. GR, AC)
	if not tr then
		return word, nil
	end
	tr = rsub(tr, GR .. AC, AC)
	tr = rsub(tr, AC .. GR, AC)
	return word, tr
end

local function make_ending_stressed_ru(word)
	-- If already ending stressed, just return word so we don't mess up ё
	if export.is_ending_stressed(word) then
		return word
	end
	-- Destress the last stressed syllable
	word = make_unstressed_once_ru(word)
	-- Add an acute to the last syllable
	word = rsub(word, "([" .. export.vowel_no_jo .. "])([^" .. export.vowel .. "]*)$",
		"%1́%2")
	-- If that caused an acute and grave next to each other, remove the grave
	return export.correct_grave_acute_clash(word)
end

-- Remove the last primary stress from the word and put it on the final
-- syllable. Leave grave accents alone except in the last syllable.
-- If final syllable already has primary stress, do nothing.
-- NOTE: Translit must already be decomposed! See comment at top.
local function make_ending_stressed_after_hyphen_split(ru, tr)
	if not tr then
		return make_ending_stressed_ru(ru), nil
	end
	-- If already ending stressed, just return ru/tr so we don't mess up ё
	if export.is_ending_stressed(ru) then
		return ru, tr
	end
	-- Destress the last stressed syllable; pass in "noconcat" so we get
	-- the individual syllables back
	rusyl, trsyl = make_unstressed_once_after_hyphen_split(ru, tr, "noconcat")
	-- Add an acute to the last syllable of both Russian and translit
	rusyl[#rusyl] = rsub(rusyl[#rusyl], "([" .. export.vowel_no_jo .. "])",
		"%1" .. AC)
	trsyl[#trsyl] = rsub(trsyl[#trsyl], "([" .. export.tr_vowel .. "])",
		"%1" .. AC)
	-- If that caused an acute and grave next to each other, remove the grave
	rusyl[#rusyl], trsyl[#trsyl] =
		export.correct_grave_acute_clash(rusyl[#rusyl], trsyl[#trsyl])
	-- j correction didn't get applied in make_unstressed_once because
	-- we short-circuited it and made it return lists of syllables
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

-- Remove the last primary stress from the portion of the word to the right of
-- any hyphen (unless the hyphen is word-final) and put it on the final
-- syllable. Leave grave accents alone except in the last syllable. If final
-- syllable already has primary stress, do nothing. (See make_unstressed_once()
-- for why we don't affect stresses to the left of a hyphen.)
-- NOTE: Translit must already be decomposed! See comment at top.
function export.make_ending_stressed(ru, tr)
	return map_last_hyphenated_component(make_ending_stressed_after_hyphen_split, ru, tr)
end

local function make_beginning_stressed_ru(word)
	-- If already beginning stressed, just return word so we don't mess up ё
	if export.is_beginning_stressed(word) then
		return word
	end
	-- Destress the first stressed syllable
	word = make_unstressed_once_at_beginning_ru(word)
	-- Add an acute to the first syllable
	word = rsub(word, "^([^" .. export.vowel .. "]*)([" .. export.vowel_no_jo .. "])",
		"%1%2́")
	-- If that caused an acute and grave next to each other, remove the grave
	return export.correct_grave_acute_clash(word)
end

-- Remove the first primary stress from the word and put it on the initial
-- syllable. Leave grave accents alone except in the first syllable.
-- If initial syllable already has primary stress, do nothing.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.make_beginning_stressed(ru, tr)
	if not tr then
		return make_beginning_stressed_ru(ru), nil
	end
	-- If already beginning stressed, just return ru/tr so we don't mess up ё
	if export.is_beginning_stressed(ru) then
		return ru, tr
	end
	-- Destress the first stressed syllable; pass in "noconcat" so we get
	-- the individual syllables back
	rusyl, trsyl = export.make_unstressed_once_at_beginning(ru, tr, "noconcat")
	-- Add an acute to the first syllable of both Russian and translit
	rusyl[1] = rsub(rusyl[1], "([" .. export.vowel_no_jo .. "])",
		"%1" .. AC)
	trsyl[1] = rsub(trsyl[1], "([" .. export.tr_vowel .. "])",
		"%1" .. AC)
	-- If that caused an acute and grave next to each other, remove the grave
	rusyl[1], trsyl[1] = export.correct_grave_acute_clash(rusyl[1], trsyl[1])
	-- j correction didn't get applied in make_unstressed_once_at_beginning
	-- because we short-circuited it and made it return lists of syllables
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

-- used for tracking and categorization
local trailing_letter_type = {
	["ш"] = {"sibilant", "cons"},
	["щ"] = {"sibilant", "cons"},
	["ч"] = {"sibilant", "cons"},
	["ж"] = {"sibilant", "cons"},
	["ц"] = {"c", "cons"},
	["к"] = {"velar", "cons"},
	["г"] = {"velar", "cons"},
	["х"] = {"velar", "cons"},
	["ь"] = {"soft-cons", "cons"},
	["ъ"] = {"hard-cons", "cons"},
	["й"] = {"palatal", "cons"},
	["а"] = {"vowel", "hard-vowel"},
	["я"] = {"vowel", "soft-vowel"},
	["э"] = {"vowel", "hard-vowel"},
	["е"] = {"vowel", "soft-vowel"},
	["ѣ"] = {"vowel", "soft-vowel"},
	["и"] = {"i", "vowel", "soft-vowel"},
	["і"] = {"i", "vowel", "soft-vowel"},
	["ѵ"] = {"i", "vowel", "soft-vowel"},
	["ы"] = {"vowel", "hard-vowel"},
	["о"] = {"vowel", "hard-vowel"},
	["ё"] = {"vowel", "soft-vowel"},
	["у"] = {"vowel", "hard-vowel"},
	["ю"] = {"vowel", "soft-vowel"},
}

function export.get_stem_trailing_letter_type(stem)
	local hint = ulower(usub(export.remove_accents(stem), -1))
	local hint_types = trailing_letter_type[hint] or {"hard-cons", "cons"}
	return hint_types
end

-- Reduce stem by eliminating the "epenthetic" vowel. Applies to
-- nominative singular masculine 2nd-declension hard and soft, and
-- 3rd-declension feminine in -ь (e.g. любовь). STEM should be the
-- result after calling detect_stem_type(), but with final -й if
-- present. Normally returns two arguments (STEM and TR), but can be
-- called from a template using #invoke and will return one argument
-- (STEM, or STEM//TR if TR is present). Returns nil if unable to
-- reduce.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.reduce_stem(stem, tr, soft_n)
	local pre, letter, post
	local pretr, lettertr, posttr
	local combine_tr = false

	-- test cases with translit:
	-- =p.reduce_stem("фе́ез", "fɛ́jez") -> фе́йз, fɛ́jz
	-- =p.reduce_stem("фе́йез", "fɛ́jez") -> фе́йз, fɛ́jz
	-- =p.reduce_stem("фе́без", "fɛ́bez") -> фе́бз, fɛ́bz
	-- =p.reduce_stem("фе́лез", "fɛ́lez") -> фе́льз, fɛ́lʹz
	-- =p.reduce_stem("феёз", p.decompose("fɛjóz")) -> фейз, fɛjz
	-- don't worry about the next one, won't occur and translit might
	-- be wrong anyway
	-- =p.reduce_stem("фейёз", p.decompose("fɛjjóz")) -> ???
	-- =p.reduce_stem("фебёз", p.decompose("fɛbjóz")) -> фебз, fɛbz
	-- =p.reduce_stem("фелёз", p.decompose("fɛljóz")) -> фельз, fɛlʹz
	-- =p.reduce_stem("фе́бей", "fɛ́bej") -> фе́бь, fɛ́bʹ
	-- =p.reduce_stem("фебёй", p.decompose("fɛbjój")) -> фебь, fɛbʹ
	-- =p.reduce_stem("фе́ей", "fɛ́jej") -> фе́йй, fɛ́jj
	-- =p.reduce_stem("феёй", p.decompose("fɛjój")) -> фейй, fɛjj

	-- so this can be called from a template	
	if type(stem) == 'table' then
		stem, tr = ine(stem.args[1]), ine(stem.args[2])
		combine_tr = true
	end

	pre, letter, post = rmatch(stem, "^(.*)([оОеЕёЁ])́?([" .. export.cons .. "]+)$")
	if not pre then
		return nil, nil
	end
	if tr then
		-- FIXME, may not be necessary to write the posttr portion as a
		-- consonant + zero or more consonant/accent combinations -- when will
		-- we ever get an accent after a consonant? That would indicate a
		-- failure of the decompose mechanism.
		pretr, lettertr, posttr = rmatch(tr, "^(.*)([oOeE])́?([" .. export.tr_cons .. "][" .. export.tr_cons .. export.accent .. "]*)$")
		if not pretr then
			return nil, nil -- should not happen unless tr is really messed up
		end
		-- Unless Cyrillic stem ends in -й, Latin stem shouldn't end in -j,
		-- or we will get problems with cases like индонези́ец//indonɛzíjec.
		if not rfind(pre, "[йЙ]$") then
			pretr = rsub(pretr, "[jJ]$", "")
		end
	end
	if letter == "О" or letter == "о" then
		-- FIXME, what about when the accent is on the removed letter?
		if post == "й" or post == "Й" then
			-- FIXME, is this correct?
			return nil, nil
		end
		letter = ""
	else
		local is_upper = rfind(post, "[" .. export.uppercase .. "]")
		if rfind(pre, "[" .. export.vowel .. "]́?$") then
			letter = is_upper and "Й" or "й"
		elseif post == "й" or post == "Й" then
			letter = is_upper and "Ь" or "ь"
			post = ""
			if posttr then
				posttr = ""
			end
		elseif rfind(post, "[" .. export.velar .. "]$") and rfind(pre, "[" .. export.cons_except_sib_c .. "]$") or
			rfind(post, "[^йЙ" .. export.velar .. "]$") and rfind(pre, "[лЛ]$") or
			soft_n and rfind(pre, "[нН]$") then
			letter = is_upper and "Ь" or "ь"
		else
			letter = ""
		end
	end
	stem = pre .. letter .. post
	if tr then
		tr = pretr .. export.translit(letter) .. posttr
	end
	if combine_tr then
		return export.combine_russian_tr(stem, tr)
	else
		return stem, tr
	end
end

-- Generate the dereduced stem given STEM and EPENTHETIC_STRESS (which
-- indicates whether the epenthetic vowel should be stressed); this is
-- without any terminating non-syllabic ending, which is added if needed by
-- the calling function. Normally returns two arguments (STEM and TR), but
-- can be called from a template using #invoke and will return one argument
-- (STEM, or STEM//TR if TR is present). Returns nil if unable to dereduce.
-- NOTE: Translit must already be decomposed! See comment at top.
function export.dereduce_stem(stem, tr, epenthetic_stress)
	local combine_tr = false
	
	-- so this can be called from a template	
	if type(stem) == 'table' then
		stem, tr, epenthetic_stress = ine(stem.args[1]), ine(stem.args[2]), ine(stem.args[3])
		combine_tr = true
	end

	if epenthetic_stress then
		stem, tr = export.make_unstressed_once(stem, tr)
	end

	local pre, letter, post
	local pretr, lettertr, posttr
	-- FIXME!!! Deal with this special case
	--if not (z.stem_type == 'soft' and _.equals(z.stress_type, {'b', 'f'}) -- we should ignore asterix for 2*b and 2*f (so to process it just like 2b or 2f)
	--		 or _.contains(z.specific, '(2)') and _.equals(z.stem_type, {'velar', 'letter-ц', 'vowel'}))  -- and also the same for (2)-specific and 3,5,6 stem-types
	--then

	-- I think this corresponds to our -ья and -ье types, which we
	-- handle separately
	--if z.stem_type == 'vowel' then  -- 1).
	--	if _.equals(z.stress_type, {'b', 'c', 'e', 'f', "f'", "b'" }) then  -- gen_pl ending stressed  -- TODO: special vars for that
	--		z.stems['gen_pl'] = _.replace(z.stems['gen_pl'], 'ь$', 'е́')
	--	else
	--		z.stems['gen_pl'] = _.replace(z.stems['gen_pl'], 'ь$', 'и')
	--	end
	--end

	pre, letter, post = rmatch(stem, "^(.*)([" .. export.cons .. "])([" .. export.cons .. "])$")
	if tr then
		pretr, lettertr, posttr = rmatch(tr, "^(.*)(" .. export.tr_cons_acc_re .. ")(" .. export.tr_cons_acc_re .. ")$")
		if pre and not pretr then
			return nil, nil -- should not happen unless tr is really messed up
		end
	end
	if pre then
		local is_upper = rfind(post, "[" .. export.uppercase .. "]")
		local epvowel
		if rfind(letter, "[ьйЬЙ]") then
			letter = ""
			lettertr = ""
			if rfind(post, "[цЦ]$") or not epenthetic_stress then
				epvowel = is_upper and "Е" or "е"
			else
				epvowel = is_upper and "Ё" or "ё"
			end
		elseif rfind(letter, "[" .. export.cons_except_sib_c .. "]") and rfind(post, "[" .. export.velar .. "]") or
				rfind(letter, "[" .. export.velar .. "]") then
			epvowel = is_upper and "О" or "о"
		elseif post == "ц" or post == "Ц" then
			epvowel = is_upper and "Е" or "е"
		elseif epenthetic_stress then
			if rfind(letter, "[" .. export.sib .. "]") then
				epvowel = is_upper and "О́" or "о́"
			else
				epvowel = is_upper and "Ё" or "ё"
			end
		else
			epvowel = is_upper and "Е" or "е"
		end
		assert(epvowel)
		stem = pre .. letter .. epvowel .. post
		if tr then
			tr = pretr .. lettertr .. export.translit(epvowel) .. posttr
			tr = export.j_correction(tr)
		end

		if epenthetic_stress then
			stem, tr = export.make_ending_stressed(stem, tr)
		end
		if combine_tr then
			return export.combine_russian_tr(stem, tr)
		else
			return stem, tr
		end
	end
	return nil, nil
end

-- Parse an entry that potentially has final footnote symbols and initial *
-- for a hypothetical entry into initial symbols, text and final symbols.
function export.split_symbols(entry, do_subscript)
	local prefentry, finalnotes = m_table_tools.separate_notes(entry)
	local initnotes, text = rmatch(prefentry, "(%*?)(.*)$")
	return initnotes, text, finalnotes
end

--------------------------------------------------------------------------
--                        Used for manual translit                      --
--------------------------------------------------------------------------

function export.translit_no_links(text)
	return export.translit(require("Module:links").remove_links(text))
end

function export.split_russian_tr(term, dopair)
	local ru, tr
	if not rfind(term, "//") then
		ru = term
	else
		splitvals = rsplit(term, "//")
		if #splitvals ~= 2 then
			error("Must have at most one // in a Russian//translit expr: '" .. term .. "'")
		end
		ru, tr = splitvals[1], export.decompose(splitvals[2])
	end
	if dopair then
		return {ru, tr}
	else
		return ru, tr
	end
end

function export.combine_russian_tr(ru, tr)
	if type(ru) == "table" then
		ru, tr = unpack(ru)
	end
	if tr then
		return ru .. "//" .. tr
	else
		return ru
	end
end

local function concat_maybe_moving_notes(x, y, movenotes)
	if movenotes then
		local xentry, xnotes = m_table_tools.separate_notes(x)
		local yentry, ynotes = m_table_tools.separate_notes(y)
		return xentry .. yentry .. xnotes .. ynotes
	else
		return x .. y
	end
end

-- Concatenate two Russian strings RU1 and RU2 that may have corresponding
-- manual transliteration TR1 and TR2 (which should be nil if there is no
-- manual translit). If DOPAIR, return a two-item list of the combined
-- Russian and manual translit (which will be nil if both TR1 and TR2 are
-- nil); else, return two values, the combined Russian and manual translit.
-- If MOVENOTES, extract any footnote symbols at the end of RU1 and move
-- them to the end of the concatenated string, before any footnote symbols
-- for RU2; same thing goes for TR1 and TR2.
function export.concat_russian_tr(ru1, tr1, ru2, tr2, dopair, movenotes)
	local ru, tr
	if not tr1 and not tr2 then
		ru = concat_maybe_moving_notes(ru1, ru2, movenotes)
	else
		if not tr1 then
			tr1 = export.translit_no_links(ru1)
		end
		if not tr2 then
			tr2 = export.translit_no_links(ru2)
		end
		ru, tr = concat_maybe_moving_notes(ru1, ru2, movenotes), export.j_correction(concat_maybe_moving_notes(tr1, tr2, movenotes))
	end
	if dopair then
		return {ru, tr}
	else
		return ru, tr
	end
end

-- Concatenate two Russian/translit combinations (where each combination is
-- a two-element list of {RUSSIAN, TRANSLIT} where TRANSLIT may be nil) by
-- individually concatenating the Russian and translit portions, and return
-- a concatenated combination as a two-element list. If the manual translit
-- portions of both terms on entry are nil, the result will also have nil
-- manual translit. If MOVENOTES, extract any footnote symbols at the end
-- of TERM1 and move them after the concatenated string and before any
-- footnote symbols at the end of TERM2.
function export.concat_paired_russian_tr(term1, term2, movenotes)
	assert(type(term1) == "table")
	assert(type(term2) == "table")
	local ru1, tr1 = term1[1], term1[2]
	local ru2, tr2 = term2[1], term2[2]
	return export.concat_russian_tr(ru1, tr1, ru2, tr2, "dopair", movenotes)
end

function export.concat_forms(forms)
	local joined_rutr = {}
	for _, form in ipairs(forms) do
		table.insert(joined_rutr, export.combine_russian_tr(form))
	end
	return table.concat(joined_rutr, ",")
end

-- Given a list of forms, where each form is a two-element list of {RUSSIAN, TRANSLIT}, strip footnote symbols from the
-- end of the Russian and translit.
function export.strip_notes_from_forms(forms)
	local newforms = {}
	for _, form in ipairs(forms) do
		local ru, tr = form[1], form[2]
		ru, _ = m_table_tools.separate_notes(ru)
		if tr then
			tr, _ = m_table_tools.separate_notes(tr)
		end
		table.insert(newforms, {ru, tr})
	end
	return newforms
end

-- Given a list of forms, where each form is a two-element list of {RUSSIAN, TRANSLIT}, unzip into parallel lists of
-- Russian and translit. The latter list may have gaps in it.
function export.unzip_forms(forms)
	local rulist = {}
	local trlist = {}
	for i, form in ipairs(forms) do
		local ru, tr = form[1], form[2]
		rulist[i] = ru
		trlist[i] = tr
	end
	return rulist, trlist
end

-- Given parallel lists of Russian and translit (where the latter list may have gaps in it), return a list of forms,
-- where each form is a two-element list of {RUSSIAN, TRANSLIT}.
function export.zip_forms(rulist, trlist)
	local forms = {}
	for i, ru in ipairs(rulist) do
		table.insert(forms, {ru, trlist[i]})
	end
	return forms
end

local function any_forms_have_translit(forms)
	for _, form in ipairs(forms) do
		if form[2] then
			return true
		end
	end
	return false
end

-- Given a list of forms, where each form is a two-element list of {RUSSIAN, TRANSLIT}, combine forms with
-- identical Russian, concatenating the translit with a comma in between.
function export.combine_translit_of_duplicate_forms(forms)
	if #forms == 0 then
		return forms
	end

	-- Optimization to avoid creating a new list in the majority case when no translit exists.
	if not any_forms_have_translit(forms) then
		return forms
	end

	local newforms = {}
	table.insert(newforms, {forms[1][1], forms[1][2]})
	for i = 2, #forms do
		local found_duplicate = false
		for j = 1, #newforms do
			-- If the Russian of the next form is the same as that of the last one, combine their translits and modify
			-- newforms[] in-place. Otherwise add the next form to newforms[]. Make sure to clone the form rather than
			-- just appending it directly since we may modify it in-place; we don't want to side-effect `forms` as passed
			-- in.
			if forms[i][1] == newforms[j][1] then
				local tr1 = newforms[j][2]
				local tr2 = forms[i][2]
				if not tr1 and not tr2 then
					-- this shouldn't normally happen
				else
					tr1 = tr1 or export.translit_no_links(newforms[j][1])
					tr2 = tr2 or export.translit_no_links(forms[i][1])
					if tr1 == tr2 then
						-- this shouldn't normally happen
					else
						newforms[j][2] = tr1 .. ", " .. tr2
					end
				end
				found_duplicate = true
				break
			end
		end
		if not found_duplicate then
			table.insert(newforms, {forms[i][1], forms[i][2]})
		end
	end
	return newforms
end

-- Given a list of forms, where each form is a two-element list of {RUSSIAN, TRANSLIT}, split cases where two different
-- transliterations have been packed into a single translit field by creating two adjacent term/translit pairs. This is
-- the opposite operation of combine_translit_of_duplicate_forms().
function export.split_translit_of_duplicate_forms(forms)
	if #forms == 0 then
		return forms
	end

	-- Optimization to avoid creating a new list in the majority case when no translit exists.
	if not any_forms_have_translit(forms) then
		return forms
	end

	local newforms = {}
	for _, form in ipairs(forms) do
		local ru, tr = unpack(form)
		if not tr or not tr:find(",") or ru:find(",") then
			table.insert(newforms, form)
		else
			local split_trs = rsplit(tr, ",%s*")
			local default_tr = export.translit_no_links(ru)
			for _, split_tr in ipairs(split_trs) do
				if split_tr == default_tr then
					split_tr = nil
				end
				table.insert(newforms, {ru, split_tr})
			end
		end
	end
	return newforms
end

function export.strip_ending(ru, tr, ending)
	local strippedru = rsub(ru, ending .. "$", "")
	if strippedru == ru then
		error("Argument " .. ru .. " doesn't end with expected ending " .. ending)
	end
	ru = strippedru
	tr = export.strip_tr_ending(tr, ending)
	return ru, tr
end

function export.strip_tr_ending(tr, ending)
	if not tr then return nil end
	local endingtr = rsub(export.translit_no_links(ending), "^([Jj])", "%1?")
	local strippedtr = rsub(tr, endingtr .. "$", "")
	if strippedtr == tr then
		error("Translit " .. tr .. " doesn't end with expected ending " .. endingtr)
	end
	return strippedtr
end

return export