मोड्युल:JSON data
This module is intended to be used by bots or other automation tools which need to access Wiktionary data. This is not a general-purpose JSON serialisation module. Using one would be impractical, as experiments have shown that using one on the language data easily hits execution limits.
Bots may access the data by using mw:API:Expandtemplates or requesting the raw source of a page invoking this module with a &templates=expand
query parameter.
Make sure you only load this module once (or twice, if you need both the languages table and another table). As of 11th of September 2013, the JSON for the languages table has been measured to weigh 666543 bytes. (Family data takes 13731 bytes, while scripts take 6279 bytes.) Generating the JSON data takes a few seconds and puts a relatively high strain on the servers.
Available functions are: export_languages
, export_scripts
and export_families
, which generate the JSON equivalents of Module:languages, Module:scripts and Module:families respectively. The structure of the data corresponds exactly to the one used in Wiktionary modules, with a caveat below.
When export_languages
is given positional arguments, the first specifies the types of languages (see Template:language data documentation) that will be listed in the data, while the following arguments list data keys which will be exported. For the first argument, the special values TWO_LETTER
, TWO_THREE_LETTER
and TWO_THREE_LETTER_REGULAR
can be passed. For example, invoking the function with TWO_LETTER
, canonicalName
and scripts
as arguments will export the canonical names and script codes for languages with two-letter codes. To conserve space, if only one key is specified, its value is listed directly in the root object (indexed by language codes).
local export = {}
-- optimisation: local variable lookup is slightly faster than global lookup
local tab_concat, type, tostring, pairs, ipairs = table.concat, type, tostring, pairs, ipairs
local function export_str(s)
-- rudimentary escaping, to save time
return '"' .. tostring(s):gsub('["\\]', '\\%0') .. '"'
end
local function export_array(tab)
local items = {}
for key, value in ipairs(tab) do
if type(value) == 'string' then
items[#items + 1] = export_str(value)
elseif type(value) == 'boolean' then
items[#items + 1] = tostring(value)
else
error("serialisation failed: unsupported array element type")
end
end
return "[" .. tab_concat(items, ",") .. "]"
end
-- the second argument is a rudimentary "schema" which specifies
-- whether a table value at a given key should be serialised
-- as an array or an object; Lua uses the same table type for both
local function export_object(tab, schema)
local items = {}
if tab == nil then
return "null"
end
for key, value in pairs(tab) do
if type(value) == 'string' then
items[#items + 1] = export_str(key) .. ':' .. export_str(value)
elseif type(value) == 'boolean' then
items[#items + 1] = export_str(key) .. ':' .. tostring(value)
elseif type(value) == 'table' then
if not schema then
error("no schema given for array with table values")
end
local ktype = schema[key]
if ktype == false then
items[#items + 1] = export_str(key) .. ':' .. export_array(value)
elseif type(ktype) == 'table' then
items[#items + 1] = export_str(key) .. ':' .. export_object(value, ktype)
else
error("serialisation failed: table value at key '" .. key .. "' has no schema")
end
else
error("serialisation failed: unsupported object value type")
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_languages(item_filter, key_filter, skip_nulls)
if type(item_filter) == "table" then
key_filter = {}
local i = 2
while item_filter.args[i] do
key_filter[#key_filter + 1] = item_filter.args[i]
i = i + 1
end
if #key_filter == 0 then
key_filter = nil
end
skip_nulls = require('Module:yesno')(item_filter.args.nulls)
item_filter = item_filter.args[1]
end
item_filter = (item_filter ~= "") and item_filter or function() return true end
if type(item_filter) == 'string' then
if item_filter == "TWO_LETTER" then
function item_filter(key, value)
return #key == 2
end
elseif item_filter == "TWO_THREE_LETTER" then
function item_filter(key, value)
return #key <= 3
end
elseif item_filter == "TWO_THREE_LETTER_REGULAR" then
function item_filter(key, value)
return (#key <= 3) and value.type == 'regular'
end
elseif item_filter:sub(1, 1) == '=' then
local list = {}
for item in mw.text.gsplit(item_filter:sub(2), ',') do
list[item] = true
end
function item_filter(key, value)
return list[key]
end
else
local t = item_filter
function item_filter(key, value)
return value.type == t
end
end
end
local data = mw.loadData("Module:languages/alldata")
local items = {}
local schema = {
canonicalName = false,
type = false,
scripts = false,
family = false,
otherNames = false,
ancestors = false,
wikimedia_codes = false,
sort_key = {
from = false,
to = false
},
entry_name = {
from = false,
to = false
}
}
for key, value in pairs(data) do
if item_filter(key, value) then
if key_filter then
if #key_filter == 1 then
local item = value[key_filter[1]]
local itsc = schema[key_filter[1]]
if item == nil then
if not skip_nulls then
items[#items + 1] = export_str(key) .. ':null'
end
else
items[#items + 1] = export_str(key) .. ':' ..
((type(item) == "string" and export_str(item))
or (itsc and export_object(item, itsc))
or export_array(item))
end
else
local langobj = {}
for _, fkey in pairs(key_filter) do
langobj[fkey] = value[fkey]
end
items[#items + 1] = export_str(key) .. ':' .. export_object(langobj, schema)
end
else
items[#items + 1] = export_str(key) .. ':' .. export_object(value, schema)
end
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_scripts()
local data = mw.loadData("Module:scripts/data")
local items = {}
for key, value in pairs(data) do
items[#items + 1] = export_str(key) .. ':' .. export_object(value, {
names = false
})
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_families()
local data = mw.loadData("Module:families/data")
local items = {}
for key, value in pairs(data) do
items[#items + 1] = export_str(key) .. ':' .. export_object(value, {
names = false
})
end
return "{" .. tab_concat(items, ",") .. "}"
end
function export.export_labels()
local data = mw.loadData("Module:labels/data")
local labels, aliases = {}, {}
for key, value in pairs(data.labels) do
labels[#labels + 1] = export_str(key) .. ':' .. export_object(value, {
plain_categories = false,
topical_categories = false,
pos_categories = false,
regional_categories = false
})
end
for key, value in pairs(data.aliases) do
aliases[#aliases + 1] = export_str(key) .. ':' .. export_str(value)
end
return ('{"labels":{%s},"aliases":{%s},"deprecated":%s}'):format(
tab_concat(labels, ','), tab_concat(aliases, ','), export_object(data.deprecated or {})
)
end
function export.export_wgs()
local m_wgdata = mw.loadData('Module:workgroup ping/data')
local items = {}
for key, value in pairs(m_wgdata) do
if type(value) == 'string' then
items[#items + 1] = export_str(key) .. ':' .. export_str(value)
else
local item = { desc = value.desc; category = value.category; members = {} }
for _, user in ipairs(value) do
item.members[#item.members + 1] = user
end
items[#items + 1] = export_str(key) .. ':' .. export_object(item, {
members = false
})
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
-- replacement for using the [[mw:API]] to do [[Special:PrefixIndex/Template:langrev/]]
-- TODO: limits?
function export.complete_langname(frame)
local m_langs = mw.loadData("Module:languages/alldata")
local target = frame.args[1]
local items = {}
for code, data in pairs(m_langs) do
for _, name in ipairs(data.names) do
if name:sub(1, #target) == target then
items[#items + 1] = export_str(name) .. ":" .. export_str(code)
end
end
end
return "{" .. tab_concat(items, ",") .. "}"
end
return export