Module:Collation
Documentation for this module may be created at Module:Collation/doc
local export = {} -- Custom functions for generating a sortkey that will achieve the desired sort -- order. -- name of module and name of exported function local custom_sort_functions = { egy = { "egy-utilities", "make_sortkey" }, } local function is_lang_object(lang) return type(lang) == "table" and type(lang.getCanonicalName) == "function" end local function check_lang_object(funcName, argIdx, lang) if not is_lang_object(lang) then error("bad argument #" .. argIdx .. " to " .. funcName .. ": expected language object, got " .. type(lang) .. ".", 2) end end -- UTF-8-encoded characters that do not belong to the Basic Multilingual Plane -- (that is, with code points greater than U+FFFF) have byte sequences that -- begin with the bytes 240 to 244. local function contains_non_BMP(str) return str:find '[\240-\244]' end do local byte, min = string.byte, math.min function export.laborious_comp(item1, item2) local l1, l2 = #item1, #item2 for i = 1, min(l1, l2) do local char1, char2 = byte(item1, i, i), byte(item2, i, i) if char1 ~= char2 then return char1 < char2 end end return l1 < l2 end end function export.make_sortkey_func(lang) check_lang_object("make_sortkey_func", 1, lang) local cache = {} local custom_sort_function = custom_sort_functions[lang:getCode()] local makeSortKey = custom_sort_function and require("Module:" .. custom_sort_function[1])[custom_sort_function[2]] or function(text) return lang:makeSortKey(text) end local m_links = require("Module:links") return function (element) local result = cache[element] if result then return result end result = m_links.remove_links(element) result = mw.ustring.gsub(result, "[%p ]", "") result = makeSortKey(lang:makeEntryName(result)) cache[element] = result return result end end function export.make_compare_func(lang, non_BMP) check_lang_object("make_compare_func", 1, lang) local make_sortkey = export.make_sortkey_func(lang) -- When comparing two elements with code points outside the BMP, the -- less-than operator does not work correctly because of a bug in glibc. -- See [[phab:T193096]]. if non_BMP then return function (elem1, elem2) return export.laborious_comp(make_sortkey(elem1), make_sortkey(elem2)) end else return function (elem1, elem2) return make_sortkey(elem1) < make_sortkey(elem2) end end end function export.sort(elems, lang) local non_BMP for _, elem in ipairs(elems) do if contains_non_BMP(elem) then non_BMP = true break end end return table.sort(elems, is_lang_object(lang) and export.make_compare_func(lang, non_BMP) or nil) end function export.sort_template(frame) if not mw.isSubsting() then error("This template must be substed.") end local args if frame.args.parent then args = frame:getParent().args else args = frame.args end local elems = require("Module:table").shallowClone(args) local m_languages = require("Module:languages") local lang if args.lang then lang = m_languages.getByCode(args.lang) or m_languages.err(args.lang, 'lang') else local code = table.remove(elems, 1) code = code and mw.text.trim(code) lang = m_languages.getByCode(code) or m_languages.err(code, 1) end export.sort(elems, lang) return table.concat(elems, args.sep or "|") end return export