Jump to content

Module:Redirect

From Wikifunctions

Documentation for this module may be created at Module:Redirect/doc

-- This module provides functions for getting the target of a redirect page.

local p = {}

local char = mw.ustring.char

-- proxy to load the module lazily, replaced by the actual loaded function
local getArgs = function(...)
	getArgs = require('Module:Arguments').getArgs
	return getArgs(...)
end

-- Gets a mw.title object, using pcall to avoid generating script errors if we
-- are over the expensive function count limit (among other possible causes).
local function getTitle(...)
	local success, titleObj = pcall(mw.title.new, ...)
	if success then
		return titleObj
	else
		return nil
	end
end

--[[
Detect hard redirect in wiki page content (not used inside this module)
]]
function p.getTargetFromText(text)
	text = text or ''
	return text:match('^#[Rr][Ee][Dd][Ii][Rr][Ee][Cc][Tt][%s:]*%[%[[%s_:]*([^]|]-)[%s_]*%]%]')
end

--[[
Detect soft redirect in wiki page content of 'Category:' pages, using template
	{{Category redirect|target}}
or one of its known aliases on Commons (there are too many!!!):
	{{Category Redirect|target}}
	{{Categoryredirect|target}}
	{{Cat redirect|target}}
	{{Catredirect|target}}
	{{Catredir|target}}
	{{Cat-redirect|target}}
	{{Cat-red|target}}
	{{Redirect category|target}}
	{{Redirect cat|target}}
	{{Seecat|target}}
	{{See cat|target}}
	{{Endashcatredirect|target}}
	{{Synonym taxon category redirect|target}}
	{{Invalid taxon category redirect|target}}
	{{Monotypic taxon category redirect|target}}
The `target` value may be prefixed by 'Category:' or not (implied). For the full list of aliases, see
	https://commons.wikimedia.org/w/index.php?title=Special:WhatLinksHere/Template:Category_redirect&hidetrans=1&hidelinks=1
]]
function p.getTargetFromCatRedirect(content)
	-- Basic filtering: only in the relevant content shown on target page itself
	content = (content or '')
		:gsub('<!%-%-(.-)%-%->', '') -- Discard HTML comments.
		:gsub('<includeonly%s*>(.-)</includeonly>', '') -- Discard 'includeonly' sections and their content.
		:gsub('<includeonly%s*>(.-)$', '') -- Discard unclosed 'includeonly' sections.
		:gsub('<[/]?onlyinclude%s*>', '') -- Discard 'onlyinclude' opening/closing tags.
		:gsub('<[/]?noinclude%s*>', '') -- Discard 'noinclude' opening/closing tags.
		:gsub('</?nowiki%s*/?>', '') -- Discard nowiki opening/closing/selfclosed tags.
	-- Locate the template transclusion, keep their parameters only.
	-- Note: there may potentially be several instances, this should not occur.
	content =
		content:match('^#[Rr][Ee][Dd][Ii][Rr][Ee][Cc][Tt][%s:]*%[%[[%s_:]*([^]|]-)[%s_]*%]%]') or
		content:match('{{[%s_]*[Cc]ategory*[%s_]*[Rr]edirect[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Cc]at[%s_]*[Rr]edirect[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Cc]at-[Rr]edirect[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Cc]at[%s_]*[Rr]edir[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Cc]at-red[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Rr]edirect[%s_]*[Cc]ategory[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Rr]edirect[%s_]*[Cc]at[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Ss]ee[%s_]*[Cc]at[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Ee]ndashcatredirect[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Ss]ynonym[%s_]*taxon[%s_]*category[%s_]*redirect[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Ii]nvalid[%s_]*taxon[%s_]*category[%s_]*redirect[%s_]*|[%s_:]*(.-)[%s_]*}}') or
		content:match('{{[%s_]*[Mm]onotypic[%s_]*taxon[%s_]*category[%s_]*redirect[%s_]*|[%s_:]*(.-)[%s_]*}}')
	if not content then
		return nil
	end
	do
		-- Parse template parameters (they may be in arbitrary order).
		local params, n = {}, 0
		for param in content:gmatch('([^|]+)') do
			local key
			local pos = param:find('=')
			if pos then
				key = param:sub(1, pos - 1):match('^%s*(.-)%s*$') -- split then trim
				param = param:sub(pos + 1):match('^%s*(.-)%s*$') -- split then trim
			else
				n = n + 1
				key = tostring(n)
			end
			params[key] = param
		end
		-- The target is in parameter ['1'] of the template transclusion.
		content = params['1']
	end
	-- Check there's an effective target parameter to the template.
	if not content then
		return nil
	end
	content = content
		-- The target parameter may contain some known character entities
		-- (their validity in page names is not checked here, just parsed).
		:gsub('&([#%d%a]+);', function(entity)
			if entity:sub(1,1) == '#' then
				local codepoint
				if entity:sub(2,2) == 'x' then
					codepoint = tonumber(entity:sub(3), 16)
				else
					codepoint = tonumber(entity:sub(2))
				end
				if codepoint and (
					codepoint >= 0x0009 and codepoint <= 0x000D or -- Accept some whitespace C0 controls.
					codepoint >= 0x0020 and codepoint <= 0x10FFFD -- Normal range.
						and (codepoint < 0x0080 or codepoint > 0x009F) -- Exclude C1 controls.
						and (codepoint < 0xDC00 or codepoint > 0xDFFF) -- Exclude surrogates.
						and (codepoint < 0xFDD0 or codepoint > 0xFDEF) -- Exclude non-characters in BMP.
						and codepoint % 0x10000 <= 0xFFFD -- Exclude non-characters at end of planes.
				) then
					return char(codepoint)
				end
			else
				if entity == 'amp' then return '&' end
				if entity == 'gt' then return '>' end
				if entity == 'lt' then return '<' end
				if entity == 'quot' then return '"' end
			end
			return entity
		end)
		-- Normalize spaces in the target according to Mediawiki pagename rules.
		:gsub('[%s_]+', ' ')
		-- The target parameter should be trimmed by the template.
		:match('^ ?(.-) ?$')
	-- The target should not be empty and not contain tag delimiters or other character forbidden in full page names.
	-- Note: templates used inside the target parameter of the soft redirect are not expanded (there should be none),
	-- as this is costly or memory intensive and slow (would require invoking a full mediawiki parser).
	if content == '' or content:find('[<>%[%]{|}]') then
		return nil
	end
	-- The 'Category:' namespace is implied (it will be prefixed by a ':' to create a link)
	return 'Category:' .. (
		content:match('^:*[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy] *: *(.-)$') or
		content
	)
end

-- Get page name that a redirect leads to, or nil if it isn't a redirect.
-- fulltext is an optional option: when set, it indicates if we want the `.fullText`, otherwise we get the `.prefixedText`.
function p.getTargetFrom(obj, fulltext)
	--[[
	obj:getContent() is intensive for large pages, but obj.redirectTarget returns directly
	the target without needing to load and parse the text content of the wiki page.
	This works for wiki pages that are hard redirects (content starting by '#REDIRECT').
	In this case, using p.getTargetFromText(obj:getContent()) is not needed.
	]]
	local r = obj.isRedirect and obj.redirectTarget
	if not r then
		--[[
		For pages using soft redirects (like categories), we must detect the template
		used in the page content itself (this may be costly in memory and slow if not
		loaded in the cache of page contents).
		]]
		if obj:inNamespace('Category') then
			r = p.getTargetFromCatRedirect(obj:getContent())
			if r then
				local obj2 = getTitle(r)
				if obj2 then -- Detect when getTitle fails.
					obj = obj2 -- True replacement with the target.
					-- Special: the replacement may also be a hard redirect.
					r = obj.isRedirect and obj.redirectTarget
					-- Not done here: second soft redirect detection.
				end
			end
		end
		if not r and obj.isRedirect then
			-- The page is a redirect, but matching failed, which may be a bug
			-- in the redirect matching pattern, so throw an error.
			error(string.format(
				'could not parse redirect on page "%s"',
				fulltext and obj.fullText or obj.prefixedText
			))
		end
		r = fulltext and obj.fullText or obj.prefixedText
	end
	return r
end

local function fmtTitle(target, fulltext, ensureTitleExists)
	local titleObj
	if type(target) == 'string' or type(target) == 'number' then
		titleObj = getTitle(target)
	elseif type(target) == 'table' and type(target.getContent) == 'function' then
		titleObj = target
	else
		error(string.format(
			'bad argument #1 to "fmtTitle": string, number, or title object expected, got %s',
			type(target)
		), 2)
	end
	if titleObj then
		if not ensureTitleExists or ensureTitleExists and titleObj.exists then
			return fulltext and titleObj.fullText or titleObj.prefixedText
		end
	end
end

-- Gets the target of a redirect. If the page specified is not a redirect,
-- returns nil.
function p.getTarget(page, rname, fulltext, ensureTitleExists)
	-- Get the title object. Both page names and title objects are allowed
	-- as input.
	local titleObj
	if type(page) == 'string' or type(page) == 'number' then
		titleObj = getTitle(page)
	elseif type(page) == 'table' and type(page.getContent) == 'function' then
		titleObj = page
	else
		error(string.format(
			'bad argument #1 to "getTarget": string, number, or title object expected, got %s',
			type(page)
		), 2)
	end
	if titleObj and titleObj.exists and
		(titleObj.isRedirect or titleObj:inNamespace('Category'))
	then
		-- Find the target by using string matching on the page content.
		return fmtTitle(p.getTargetFrom(titleObj, fulltext), fulltext, ensureTitleExists)
	end
end

--[[
-- Given a single page name determines what page it redirects to and returns the
-- target page name, or the passed page name when not a redirect. The passed
-- page name can be given as plain text or as a page link.
-- 
-- Returns page name as plain text, or when the bracket parameter is given, as a
-- page link. Returns an error message when page does not exist or the redirect
-- target cannot be determined for some reason.
--]]
function p.luaMain(rname, bracket, fulltext, ensureTitleExists)
	if type(rname) ~= 'string' or not rname:find('%S') then
		return nil
	end
	rname = rname:match('%[%[[%s_:]*([^%]|]-)[%s_]*%]%]')
		or rname:match('^[%s_:]*([%]|]-)[^%s_]*$')
		or rname
	local ret = p.getTarget(rname, fulltext, ensureTitleExists) or
		fmtTitle(rname, fulltext, ensureTitleExists)
	link = bracket and (ret == rname and '[[:%s]]' or '[[:%s|%s]]') or '%s'
	return ret and link:format(ret, rname)
end

local function use(x)
	return x and (x == '1' or x == 't' or x == 'true' or x == 'y' or x == 'yes') or nil
end

-- Provides access to the luaMain function from wikitext, may return redlinks,
-- albeit page titles that do not exist in the wiki.
-- Parameters alt, bracket, fulltext, ensureTitleExists are optional
function p.main(frame)
	local args = getArgs(frame, {frameOnly = true})
	local rname = args[1]
	local alt = args[2] or ''
	local bracket = use(args.bracket)
	local fulltext = use(args.fulltext)
	local ensureTitleExists = use(args.noredlinks)

	return p.luaMain(rname, bracket, fulltext, ensureTitleExists) or alt
end

-- main variant, returns empty string if a redlink would be returned otherwise.
-- '|noredlinks=true' by default.
function p.mainE(frame)
	local args = getArgs(frame, {frameOnly = true})
	local rname = args[1]
	local alt = args[2] or ''
	local bracket = use(args.bracket)
	local fulltext = use(args.fulltext)

	return p.luaMain(rname, bracket, fulltext, true) or alt
end

-- main variant, returns args[2] if a redlink would be returned otherwise.
-- '|bracket=true|noredlinks=true' by default
function p.mainA(frame)
	local args = getArgs(frame, {frameOnly = true})
	local rname = args[1]
	local alt = args[2] or ''
	local bracket = use(args.bracket)

	return p.luaMain(rname, bracket, true, true) or alt
end

-- Returns true if the specified page is a redirect, and false otherwise.
function p.luaIsRedirect(page)
	local titleObj = getTitle(page)
	if not titleObj then
		return false
	end
	if titleObj.isRedirect then
		return true
	else
		return false
	end
end

-- Provides access to the luaIsRedirect function from wikitext, returning 'yes'
-- if the specified page is a redirect, and the blank string otherwise.
function p.isRedirect(frame)
	local args = getArgs(frame, {frameOnly = true})
	if p.luaIsRedirect(args[1]) then
		return 'yes'
	else
		return ''
	end
end

return p