Module:Wikidata table

From MOASSpedia
Jump to navigation Jump to search

This module creates one or more table rows where the data in each cell is taken from Wikidata.

Usage

Function makerows

This function is simple and is intended to test performance. It is unsuitable for use in articles.

{{#invoke:Wikidata table |makerows
	|pids= P123, P234, P345, etc. (one property id per column)
	|qids = Q12345, Q23456, Q34567, etc. (one entity id per row)
}}

Function makerow

This function is under development for use in articles. It creates a single table row from one Wikidata entity using a given set of properties. The first cell is the label for the Wikidata entity and it is marked up as a row-header. An "editonwikidata" icon is appended to the first cell. The table caption, column headings and scopes, etc. should be supplied externally. See the examples for more detail.

{{#invoke:Wikidata table |makerow
	|pids= P123, P234+P345, P456, P567/P580-P582, etc. (see below)
	|qid = Q12345 (one entity id for the row)
	|c1 = locally supplied value (overrides value from Wikidata for column 1)
	|c2 = locally supplied value (overrides value from Wikidata for column 2)
	|cN = locally supplied value (overrides value from Wikidata for column N), etc.
	|c1+ = locally supplied value (appends value from Wikidata to column 1)
	|c2+ = locally supplied value (appends value from Wikidata to column 2)
	|cN+ = locally supplied value (appends value from Wikidata to column N), etc.
}}
Notes
A table cell can be made up of a combination of multiple properties, qualifiers and references.
Whitespace is ignored in the list of pids.
The separator for cell values is the comma ,.
Within a cell, multiple properties are separated by the plus sign +.
Each property can be separated from one or more qualifiers by a forward-slash /.
Each qualifier is separated by a hyphen - (although any punctuation other than , + / will work).

See also


--[[
	version dated 2021-02-22
	Module:Wikidata table
--]]

local p = {}

local debugging = false

local sep = ", " -- separator for multiple values of same property, changed from "<br>"
local sep2 = "<br>" -- separator for values of different properties

-- Internationalisation

-- takes optional string language code (e.g. "en") and returns a language object
local findLang = function(langcode)
	local langobj
	langcode = mw.text.trim(langcode or "")
	if mw.language.isKnownLanguageTag(langcode) then
		langobj = mw.language.new( langcode )
	else
		langcode = mw.getCurrentFrame():preprocess( '{{int:lang}}' )
		if mw.language.isKnownLanguageTag(langcode) then
			langobj = mw.language.new( langcode )
		else
			langobj = mw.language.getContentLanguage()
		end
	end
	return langobj
end
local currentlang = findLang().code

local i18n = {
	filespace = "File",
	editonwikidata = "Edit this on Wikidata",
	ordinal = {
		[1] = "st",
		[2] = "nd",
		[3] = "rd",
		["default"] = "th"
	},
}

local months = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }
local mnths = {}
for idx, val in ipairs(months) do
	mnths[idx] = val:sub(1,3)
end

-- makeOrdinal needs to be internationalised along with the above i18n
-- takes cardinal number as a numeric and returns the ordinal as a string
-- we need three exceptions in English for 1st, 2nd, 3rd, 21st, .. 31st, etc.
-------------------------------------------------------------------------------
p.makeOrdinal = function(cardinal)
	local card = tonumber(cardinal)
	if not card then return cardinal end
	local ordsuffix = i18n.ordinal.default
	if card % 10 == 1 then
		ordsuffix = i18n.ordinal[1]
	elseif card % 10 == 2 then
		ordsuffix = i18n.ordinal[2]
	elseif card % 10 == 3 then
		ordsuffix = i18n.ordinal[3]
	end
	-- In English, 1, 21, 31, etc. use 'st', but 11, 111, etc. use 'th'
	-- similarly for 12 and 13, etc.
	if (card % 100 == 11) or (card % 100 == 12) or (card % 100 == 13) then
		ordsuffix = i18n.ordinal.default
	end
	return card .. ordsuffix
end

local unitsymbol = {
	inch = "in",
	foot = "ft",
	yard = "yd",
	mile = "mi",
	["nautical mile"] = "nmi",
	metre = "m",
	centimetre = "cm",
	millimetre = "mm",
	kilometre = "km",
	acre = "acres",
	hectare = "ha"
}

-- prefixes for particular qualifiers
local prefix = {
	P580 = "from ",
	P582 = "until ",
}

-- external ids which have a formatter url (P1630)
local formaturl = {
	P3563 = "https://wikidata-externalid-url.toolforge.org/?url=https%3A%2F%2Fmsi.nga.mil%2FqueryResults%3Fpublications%2Fngalol%2Flights-buoys%3Fvolume%3D%251%26featureNumber%3D%252%26includeRemovals%3Dfalse%26output%3Dhtml&exp=(%5Cd%7B3%7D)-(.*)&id=$1", -- NGA number
	P613 = "http://nearby.org.uk/coord.cgi?p=$1", -- OS grid reference
	P373 = "https://commons.wikimedia.org/wiki/Category:$1", -- Commons category
}

-- date format default to dmy
local df = "dmy"

-- fallbacks for common properties:
-- property-to-fallback-from = "property-to-fallback-to"
local fallback = {
	P276 = "P131",
	P571 = "P1619",
	P729 = "P571",
	P4755 = "P296"
}

-- error messages
local function errmsg(txt)
	if debugging then
		return "Error: " .. txt
	else
		return nil
	end
end

-- formats the first character of linked item to uppercase
local function ucf(lnk)
	local tbl = mw.text.split( lnk, "|", true )
	local ret
	if tbl[2] then -- piped link
		tbl[2] = tbl[2]:gsub("^(%l)", mw.ustring.upper)
		ret = table.concat(tbl, "|")
	elseif lnk:sub(1,2) == "[[" then -- unpiped link
		ret = lnk:gsub("^(%[%[%l)", mw.ustring.upper)
	else -- unlinked
		ret = lnk:gsub("^(%l)", mw.ustring.upper)
	end
	return ret
end
-- entrypoint for invoke
function p.ucf(frame)
	return ucf(frame.args.text or "")
end

-- return a number rounded to a precision
local function decimalprecision(x, prec)
	local s = 1
	if x < 0 then
		x = -x
		s = -1
	end
	-- if prec is not suplied, pick an arbitrary precision
	if not tonumber(prec) then prec = 1e-4
	elseif prec > 1 then prec = 1
	elseif prec < 1e-6 then prec = 1e-6
	else prec = 10 ^ math.floor(math.log10(prec))
	end
	x = math.floor(x / prec + 0.5) * prec * s
	-- if it's integral, cast to an integer:
	if  x == math.floor(x) then x = math.floor(x) end
	-- if it's less than 1e-4, it will be in exponent form, so return a string with 6dp
	-- 9e-5 becomes 0.000090
	if math.abs(x) < 1e-4 then x = string.format("%f", x) end
	return x
end

-- creates an icon that links to the relevant Wikidata entity
local function createicon(entityID, propertyID, langcode)
	langcode = langcode or ""
	if not entityID or entityID == "" then entityID= mw.wikibase.getEntityIdForCurrentPage() end
	propertyID = propertyID or ""
	local icon = "&nbsp;<span class='penicon autoconfirmed-show'>[["
	-- "&nbsp;<span data-bridge-edit-flow='overwrite' class='penicon'>[[" -> enable Wikidata Bridge
	.. i18n.filespace
	.. ":OOjs UI icon edit-ltr-progressive.svg |frameless |text-top |10px |alt="
	.. i18n.editonwikidata
	.. "|link=https://www.wikidata.org/wiki/" .. entityID
	if langcode ~= "" then icon = icon .. "?uselang=" .. langcode end
	if propertyID ~= "" then icon = icon .. "#" .. propertyID end
	icon = icon .. "|" .. i18n.editonwikidata .. "]]</span>"
	return icon
end

-- takes a statement tuple supplied from Wikidata and returns any references
function p._getrefs(statement)
	if not statement.references then return nil end
	local rtbl = {}
	local frm = mw.getCurrentFrame()
	for idx, ref in ipairs(statement.references) do
		if ref.snaks.P854 then -- reference url
			local url = ref.snaks.P854[1].datavalue.value
			if ref.snaks.P1476 then -- title (monolingual text)
				local title
				for idx1, titles in ipairs(ref.snaks.P1476) do
					if titles.datavalue.value.language == currentlang then
						title = titles.datavalue.value.text
						local citeweb = frm:expandTemplate{ title = "Cite web", args = { url=url, title=title } }
						local hash = "WD" .. mw.hash.hashValue("crc32", citeweb)
						rtbl[#rtbl+1] = frm:callParserFunction{ name = "#tag:ref", args = { citeweb, name=hash } }
						break
					end
				end
			else
				local hash = "WD" .. mw.hash.hashValue("crc32", url)
				rtbl[#rtbl+1] = frm:callParserFunction{ name = "#tag:ref", args = { url, name=hash } }
			end
		end
		if ref.snaks.P248 then -- stated in
			local rqid = ref.snaks.P248[1].datavalue.value.id
			local citeq = frm:expandTemplate{ title = "Cite Q", args = { rqid } }
			rtbl[#rtbl+1] = frm:callParserFunction{ name = "#tag:ref", args = { citeq, name=rqid } }
		end
	end
	return table.concat(rtbl)
end

-- takes a qid and attempts to return a linked name for it
-- otherwise an unlinked name; otherwise the qid
function p._getLink(qid)
	local lbl = ""
	local slink = mw.wikibase.sitelink(qid)
	local label = mw.wikibase.getLabel(qid)
	if slink and label then
		if slink:lower() == label:lower() then
			if label:find("^%u") then -- match label's case
				lbl = "[[" .. slink:gsub("^(%l)", mw.ustring.upper) .. "]]"
			else
				lbl = "[[" .. slink:gsub("^(%u)", mw.ustring.lower) .. "]]"
			end
		else
			lbl = "[[" .. slink .. "|" .. label .. "]]"
		end
	elseif slink then
		lbl = "[[" .. slink .. "]]"
	elseif label then
		lbl = label
	else
		lbl = qid
	end
	return lbl
end
-- entrypoint for #invoke getLink
function p.getLink(frame)
	local qid = (frame.args.qid or ""):upper()
	if qid == "" then return nil end
	return p._getLink(qid)
end

-- takes a snak for a time property and returns the date
local function _getDate(snak, prec)
	local retval
	retval = mw.wikibase.renderSnak(snak)
	if prec == 7 then -- century
		local num, txt = retval:match("^(%d+)%.(.+)$")
		retval = p.makeOrdinal(num) .. txt
	elseif prec == 10 then
		local m, y, e = retval:match("^(%a+) (%d+)(.*)")
		retval = m:sub(1,3) .. " " .. y .. e
	elseif prec == 11 then
		local d, m, y, e = retval:match("^(%d+) (%a+) (%d+)(.*)")
		retval = d .. " " .. m:sub(1,3) .. " " .. y .. e
	end
	return retval
end

-- takes a qid and a property id and returns the corresponding values or nil
-- maxvals greater than zero sets the maximum number of values to return
-- the string quals contains property ids of qualifiers to be returned ('-' is the separator)
function p._getWD(qid, pid, maxvals, quals)
	maxvals = maxvals or 0
	local ret = {}
	for idx, prop in ipairs(mw.wikibase.getBestStatements(qid, pid)) do
		local retval
		if prop.mainsnak.snaktype ~= "value" then
			break
		end
		local dtype = prop.mainsnak.datatype
		local dval = prop.mainsnak.datavalue.value
		if dtype == "wikibase-item" then
			retval = p._getLink(dval.id)
		elseif dtype == "monlingualtext" then
			for idx1, vals in ipairs(dval) do
				if vals.language == currentlang then
					retval = vals.text
					break
				end
			end
		elseif dtype == "commonsMedia" or dtype == "url" then
			retval = dval
		elseif dtype == "external-id" or dtype == "string" then
			if formaturl[pid] then
				retval = "[" .. mw.ustring.gsub(formaturl[pid], "$1", dval) .. " " .. dval .. "]"
			else
				retval = dval
			end
		elseif dtype == "time" then
			retval = _getDate(prop.mainsnak, dval.precision)
			if dval.precision == 11 and df == "mdy" then
				local d, m, y, e = retval:match("^(%d+) (%a+) (%d+)(.*)")
				retval = m .. " " .. d .. ", " .. y  .. e
			end
		elseif dtype == "quantity" then
			local amount = tonumber(dval.amount)
			local unitqid = string.match( dval.unit, "(Q%d+)" )
			if unitqid then
				local unit = mw.wikibase.getLabel(unitqid)
				local symbol = unitsymbol[unit]
				if symbol then
					retval = mw.getCurrentFrame():expandTemplate{ title = "cvt", args = {amount, symbol} }
				else
					retval = amount  .. " " .. unit
				end
			else
				retval = amount
			end
		elseif dtype == "globe-coordinate" then
			local lat = decimalprecision(dval.latitude, dval.precision)
			local long = decimalprecision(dval.longitude, dval.precision)
			retval = "<span style='font-size:90%;'>"
			.. mw.wikibase.formatValue(prop.mainsnak)
			.. "</span>"
		else
			retval = dval
		end
		-- get references
		retval = retval .. (p._getrefs(prop) or "")
		-- get qualifiers
		if quals and prop.qualifiers and retval then
			local qtbl = {}
			for qpid in quals:gmatch("P%d+") do
				if prop.qualifiers[qpid] then
					for i, qv in ipairs(prop.qualifiers[qpid]) do
						local fqv
						if qv.datatype == "globe-coordinate" then
							fqv = mw.wikibase.formatValue(qv) -- linked
						elseif qv.datatype == "time" then
							fqv = _getDate(qv, qv.datavalue.value.precision)
							if qv.datavalue.value.precision == 11 then -- trim to month
								fqv = fqv:match("%d+ (.+)")
							end
						else
							fqv = mw.wikibase.renderSnak(qv) -- plaintext
						end
						if fqv and fqv ~= "" then
							qtbl[#qtbl+1] = (prefix[qpid] or "") .. fqv
						end
					end
				end
			end
			if #qtbl > 0 then
				retval = retval .. "<span style='font-size:90%;'> (" .. table.concat(qtbl, "&nbsp;") .. ")</span>"
			end
		end
		ret[#ret+1] = retval
		if maxvals > 0 and #ret >= maxvals then break end
	end
	if #ret < 1 then
		return nil
	else
		return table.concat(ret, sep)
	end
end
-- entrypoint for #invoke getWD
function p.getWD(frame)
	local qid = (frame.args.qid or ""):upper()
	if qid == "" then return nil end
	local pid = (frame.args.pid or ""):upper()
	if pid == "" then return nil end
	local maxvals = tonumber(frame.args.maxvals) or 0
	local quals = (frame.args.quals or ""):upper()
	if quals == "" then quals = nil end
	return p._getWD(qid, pid, maxvals, quals)
end

-- make multiple table rows, one for each qid in args.qids,
-- with one table cell for each pid in args.pids
function p._makerows(args)
	args.qids = (args.qids or ""):upper()
	if args.qids == "" then return errmsg("missing qids") end
	args.pids = (args.pids or ""):upper()
	if args.pids == "" then return errmsg("missing pids") end
	local qids, pids = {}, {}
	for qid in args.qids:gmatch("Q%d+") do
		qids[#qids+1] = qid
	end
	for pid in args.pids:gmatch("P%d+") do
		pids[#pids+1] = pid
	end
	args.df = args.df or ""
	if args.df ~= "" then df = args.df end
	local out = ""
	for r, qid in ipairs(qids) do
		out = out .. "<tr>"
		out = out .. "<th scope='row'>" .. p._getLink(qid) .. "</th>"
		for c, pid in ipairs(pids) do
			if pid == "P18" then -- image
				local img = p._getWD(qid, pid, 1)
				if img then
					out = out .. "<td>[[File:" .. img .. "|100px]]</td>"
				end
			else
				out = out .. "<td>" .. (p._getWD(qid, pid, 0) or " ") .. "</td>"
			end
		end
		out = out .. "</tr>"
	end
	return out
end
-- entry point for #invoke makerows
function p.makerows(frame)
	local args = {}
	for key, value in pairs(frame:getParent().args) do
		args[key] = value
	end
	for key, value in pairs(frame.args) do
		args[key] = value
	end
	return p._makerows(args)
end

-- make a single table row, one cell per value passed in args.pids
-- each value may be a combination of properties and qualifiers
function p._makerow(args)
	local qid = (args.qid or ""):upper():match("Q%d+")
	-- qid can be nil if we want a row without wikidata
	-- remove whitespace, uppercase, trap nil
	args.pids = (args.pids or ""):upper():gsub("%s", "")
	if args.pids == "" then return errmsg("missing pids") end
	-- collect any parameters c1, c2, etc. as cell replacements; c1+, c2+, etc. as addenda
	local cellrep, celladd = {}, {}
	for key, value in pairs(args) do
		local col = (type(key) == "string") and tonumber(key:match("^[Cc](%d+)$"))
		if col then
			cellrep[col] = value
		end
	end
	for key, value in pairs(args) do
		local col = (type(key) == "string") and tonumber(key:match("^[Cc](%d+)%+$"))
		if col then
			celladd[col] = value
		end
	end
	-- set date format if passed
	args.df = args.df or ""
	if args.df ~= "" then df = args.df end
	-- create the html to return
	local out = "<tr>"
	if cellrep[1] and qid then
		out = out .. "<th scope='row'>" .. cellrep[1] .. createicon(qid) .. "</th>"
	elseif not qid then
		out = out .. "<th>" .. (cellrep[1] or " ") .. "</th>"
	else
		out = out .. "<th scope='row'>" .. ucf(p._getLink(qid)) .. (celladd[1] or "") .. createicon(qid) .. "</th>"
	end
	-- split args.pids at comma separators into sequence of cellpids (each may be like P12+P34/P456-P789)
	local cellpids = mw.text.split(args.pids, ",+")
	for c, val in ipairs(cellpids) do
		if cellrep[c+1] then
			out = out .. "<td>" .. cellrep[c+1] .. "</td>"
		elseif not qid then
			out = out .. "<td> </td>"
		else
			-- separate multiple properties in same cell, sep=+
			local ptbl = {} -- sequence of values for one cell
			for propandquals in mw.text.gsplit(val, "+", true) do
				-- for each property, split off property from qualifiers, sep=/
				local pid = mw.text.split(propandquals, "/")[1]:match("P%d+")
				local quals = mw.text.split(propandquals, "/")[2]
				if pid == "P18" then -- image
					local img = p._getWD(qid, pid, 1)
					if img then
						ptbl[#ptbl+1] = "[[File:" .. img .. "|100px]]"
					end
				else
					local wdval = p._getWD(qid, pid, 0, quals)
					if not wdval and fallback[pid] then
						wdval = p._getWD(qid, fallback[pid], 0, quals)
					end
					ptbl[#ptbl+1] = wdval and ucf(wdval)
				end
			end -- of loop through multiple properties in same cell
			out = out .. "<td>" .. table.concat(ptbl, sep2) .. (celladd[c+1] or "") .. "</td>"
		end
	end -- of loop through all of the cells in the row
	out = out .. "</tr>"
	return out
end
-- entry point for #invoke makerow
function p.makerow(frame)
	local args = {}
	for key, value in pairs(frame:getParent().args) do
		args[key] = value
	end
	for key, value in pairs(frame.args) do
		args[key] = value
	end
	return p._makerow(args)
end

return p