-- texdoclib-search.tlu: file searching functions for texdoc -- -- The TeX Live Team, GPLv3, see texdoclib.tlu for details -- Warning: Some functions here assume that M.init_databases() has been called. -- dependencies local kpse = require 'kpse' local lfs = require 'lfs' local texdoc = { const = require 'texdoclib-const', util = require 'texdoclib-util', alias = require 'texdoclib-alias', score = require 'texdoclib-score', config = require 'texdoclib-config', } -- shortcuts local M = {} local C = texdoc.const local err_print = texdoc.util.err_print local dbg_print = texdoc.util.dbg_print -- shared by all functions in this file local s_doclist -- the Doclist object to be populated by various functions local s_meta -- {[normname] = meta, ...} (populated by init_tlp_database) local vanilla -- is this a vanilla TL or a re-package one without tlpdb? --------------------------- utility functions ---------------------------- -- find the TeX Live root local function get_tlroot() local tlroot = kpse.expand_path('$TEXMFROOT') -- it should be exisitng one get_tlroot = function() return tlroot end return tlroot end -- says if file has a known extension according to ext_list -- (or known basename according to basename_list) local function check_ext(file) file = file:lower() -- has a known extention? if texdoc.util.get_ext(file) ~= nil then return true end -- is the basename good? for _, b in ipairs(texdoc.config.get_value('basename_list')) do if file:find('^' .. b .. '$') or file:find('^' .. b .. '%.') then return true end end return false end ---------------------- docfile and doclist objects ----------------------- --[[ doclist = { [1] = docfile1, [2] = docfiles2, ..., inv = {realpath1 = index1, ...} } The inv subtable is such that for all i doclist.inv(doclist[i].realpath:lower()) == i Paths are lowercased in order to avoid duplicates on windows. --]] local Doclist = {} Doclist.__index = Doclist -- create a new list of docfiles function Doclist:new() local dl = {inv = {}} setmetatable(dl, self) return dl end -- add a docfile to a list function Doclist:add(df) -- if no realpath information, unable to add -- (useful if vanilla == false) if not df.realpath then return end -- check the existence of the file if not lfs.isfile(df.realpath) then dbg_print('search', 'File %s not found. Skipping.', df.realpath) return end -- add the docfile to the list local index = self.inv[df.realpath:lower()] if index then self[index]:mergein(df) else dbg_print('search', 'File %s found.', df.realpath) local newindex = #self + 1 self[newindex] = df self.inv[df.realpath:lower()] = newindex end end -- stops a doclist function Doclist:stop() self.inv = nil end --[[ docfile = { -- name and tree are mandatory name = filename (used for scoring only) tree = code of the tree, see below -- at least one of the following fields should exist matches = {pattern1, pattern2, ...} or {} runtodoc = true if there is a runfile -> docfile association tlptodoc = true if there is a tlp name -> docfile association -- those are virtual members, see below realpath = full path normname = nomrmalised (path removed up to the 'doc' component) basename = basename lang = language tag from the catalogue metadata details = details tag from the catalogue metadata quality = 'good', 'bad', or 'killed' depending on score ext_pos = position of the extension in ext_list -- set for elements of a list as a side effect of sort_doclist() score = score } if tree > 1, this is the index of the tree in TEXDOCS if tree = 0, then name is relative to TLROOT tree = - 1 if and only if file is a sty file. Here name is absolute. --]] -- Docfile objects inherit members from Docfile -- and have virtual members implemented by get_() methods -- for best cache performance, getters should not return nil local Docfile = {} function Docfile:__index(key) if Docfile[key] then return Docfile[key] end local getter = Docfile['get_' .. key] if getter then rawset(self, key, getter(self)) return rawget(self, key) end end -- create a new docfile object using initialisation info -- required fields: name, tree function Docfile:new(info) local df = {} setmetatable(df, self) for k, v in pairs(info) do if k == 'pattern' then df.matches = {info.pattern} else df[k] = v end end return df end -- merge a second docfile object in, assuming it represents the same file function Docfile:mergein(df) for k, v in pairs(df) do if k == 'matches' then for _, m in ipairs(df.matches or {}) do table.insert(self.matches, m) end else self[k] = v end end end -- return the full path to the file local texdocs_tree_to_path -- definition later function Docfile:get_realpath() if self.tree > 0 then return texdocs_tree_to_path(self.tree, self.name) elseif self.tree == 0 then if vanilla then return get_tlroot() .. '/' .. self.name else return kpse.find_file(self.normname, 'TeX system documentation') end else return self.name end end -- normalise a name from the tlpdb (use for s_meta indexes) local function reloc_tlpdb_path(name) return name:gsub('^texmf[^/]*/doc/', '') end -- return normalised name function Docfile:get_normname() return (self.tree == 0) and reloc_tlpdb_path(self.name) or self.name end -- retrieve the lang from meta function Docfile:get_lang() local meta = s_meta[self.normname] return meta and (meta.lang or false) or false end -- retrieve the details from meta function Docfile:get_details() local meta = s_meta[self.normname] return meta and (meta.details or false) or false end -- return the base name function Docfile:get_basename() return self.name:gsub('.*/', '') end -- for interface consistency, matches should always be a table, never nil function Docfile:get_matches() return {} end -- from texdoclib-score.tlu Docfile.get_quality = texdoc.score.docfile_quality -- from texdoclib-score.tlu function Docfile:get_ext_pos() return texdoc.score.ext_pos(self.basename) end ------------------- select results from TEXDOCS trees -------------------- -- says if a file (with its path) matches a pattern local function matches(pattern, file) if pattern.original then return file:lower():find(pattern.name:lower(), 1, true) else return texdoc.score.is_exact(file, pattern.name) end end -- return a docfile object if file "matches", nil otherwise local function process_file(patlist, file, pathfile, code) local docfile local pattern for _, pattern in ipairs(patlist) do if matches(pattern, pathfile) then local info = { name = pathfile, tree = code, pattern = pattern, } if docfile then docfile:mergein(Docfile:new(info)) else docfile = Docfile:new(info) end end end return docfile end -- scan a database local function scan_db(patlist, code, lsr_db) for file, basename in pairs(lsr_db) do local df = process_file(patlist, basename, file, code) if df then s_doclist:add(df) end end end --------------------- manage TEXDOCS trees of the kpse ---------------------- -- build a db from a ls-R file local function init_lsr_db(root, shift) -- open the file local lsr = assert(io.open(root .. '/ls-R', 'r')) local _ = lsr:read('*line') -- throw away first line (comment) -- scan it local db = {} local maybe_dir, is_doc = true, false local current_dir local l = #shift while true do local line = lsr:read('*line') while line == '' do line, maybe_dir = lsr:read('*line'), true end if line == nil then break end -- EOF local dir_line = maybe_dir and line:match('^%./(.*):$') if dir_line then maybe_dir = false -- next line may not be a dir if string.sub(dir_line .. '/', 1, l) == shift then is_doc = true current_dir = dir_line:sub(l + 1) db[current_dir] = nil elseif is_doc then break -- we're exiting the ./doc (or shift) dir, so it's over end elseif is_doc then local file = line if current_dir ~= '' then file = current_dir .. '/' .. line end if check_ext(line) then db[file] = line end end end lsr:close() return db end -- build a db for a tree without ls-R index local function init_tree_db(base, recurse) local db = {} local function init_tree_db_rec(dir) for file in lfs.dir(base .. '/' .. dir) do if file ~= '.' and file ~= '..' then local f = (dir == '') and file or dir .. '/' .. file if lfs.isdir(base..'/'..f) then if recurse then init_tree_db_rec(f) end else if check_ext(file) then db[f] = file end end end end end init_tree_db_rec('') return db end local init_texdocs_database, get_doclist_texdocs do -- begin scope of doc_roots local doc_roots --[[ doc_roots[i] = { path = initial path, db = {[file1] = basename1, [file2] = basename2, ...}, } --]] -- populate the doc_roots filename databases init_texdocs_database = function() -- find a ls-R file in a parent directory and return it or nil local function lsr_root(path) if not lfs.isdir (path) then return end local root, shift = path, '' if root:sub(-1) == '/' then root = root:sub(1, -2) end while root:find('/', 1, true) do if lfs.isfile(root .. '/ls-R') then return root, shift end local last_comp = root:match('^.*/(.*)$') -- /!\ cannot put last_comp in a regex: can contain special char root = root:sub(1, - (#last_comp + 2)) shift = last_comp .. '/' .. shift end end doc_roots = {} local kpse_texdocs = kpse.expand_var('$TEXDOCS') -- expand the path and turn it into a lua list local raw_doc_roots = kpse.expand_braces(kpse_texdocs):explode(C.kpse_sep) local max = #raw_doc_roots + 1 for j, dir in ipairs(raw_doc_roots) do local i = max - j local n local path, db -- get path, !! and // values dir, n = dir:gsub('//$', '') local recursion_allowed = (n == 1) local path, n = dir:gsub('^!!', '') local index_mandatory = (n == 1) dbg_print('texdocs', 'texdocs[%d] = %s (index_mandatory=%s, recursion_allowed=%s)', i, path, tostring(index_mandatory), tostring(recursion_allowed)) -- decide if we should use a ls-R index, the filesystem, or do nothing local root, shift = lsr_root(path) if root and shift and recursion_allowed then dbg_print('texdocs', 'texdocs[%d] using index: %s (shift=%s)', i, root, shift) db = init_lsr_db(root, shift) elseif not index_mandatory and lfs.isdir(path) then dbg_print('texdocs', 'texdocs[%d] using filesystem search', i) db = init_tree_db(path, recursion_allowed) end -- register this in docroots doc_roots[i] = {path=path, db=db} end end -- find docfiles in texdocs directories get_doclist_texdocs = function(patlist) for code, dr in ipairs(doc_roots) do if dr.db then scan_db(patlist, code, dr.db) end end end -- return the real path from a texdocs tree number + relative path texdocs_tree_to_path = function (tree, rel) return doc_roots[tree].path .. '/' .. rel end end -- end scope of doc_roots --------------------------- look for sty files --------------------------- -- add doclist entries for sty files in patlist local function get_doclist_sty(patlist) for _, pat in ipairs(patlist) do local file = kpse.find_file(pat.name) if file then local df = Docfile:new({ name = file, tree = -1, pattern = pat, }) s_doclist:add(df) end end end ------------------------------- use tlpdb -------------------------------- -- tlpdb means TeX Live Package DataBase and tlp means TeX Live Package -- return an iterator that ouputs the keys in order local function opairs(t) local tkeys = {} -- use the counter len to avoid the cost of table.insert and to save the -- length of tkeys for later use in the iterator local len = 0 for k, _ in pairs(t) do len = len + 1 tkeys[len] = k end table.sort(tkeys) local i = 0 local function iterator() i = i + 1 if i > len then return nil else return tkeys[i], t[tkeys[i]] end end return iterator end -- return true if cache exists and is newer than original, false otherwise local function good_cache(cache, ori) local cache_date = lfs.attributes(cache, 'modification') if not cache_date then return false end local ori_date = assert(lfs.attributes(ori, 'modification')) return cache_date > ori_date end -- make sure a given directory exists, or return nil plus an error string local function mkdir_p(dir) if os.type == 'windows' and chgstrcp then if lfs.isdir(chgstrcp.syscptoutf8(dir)) then return true end else if lfs.isdir(dir) then return true end end local parent = texdoc.util.path_parent(dir) if parent then local ok, msg = mkdir_p(parent) if not ok then return nil, msg end end if os.type == 'windows' and chgstrcp then return lfs.mkdir(chgstrcp.syscptoutf8(dir)) else return lfs.mkdir(dir) end end local print_out_tlpinfo, get_doclist_tlpdb local get_tlpinfo_from_tlpdb, get_tlpinfo_from_cache, get_tlpinfo_from_dist do -- begin scope of tlpinfo tables local tlp_from_runfile -- {[runfile_basename] = {tlp1 = true, ...}, ...} local tlp_doclist -- {[tlp_name] = {relname1, relname2, ...}, ...} -- remove entries for tlp without any docfile local function remove_useless_tlp() for tlp, doclist in pairs(tlp_doclist) do if #doclist == 0 then tlp_doclist[tlp] = nil end end for runfile, tlp_set in pairs(tlp_from_runfile) do for tlp in pairs(tlp_set) do if not tlp_doclist[tlp] then tlp_from_runfile[runfile][tlp] = nil end end end end -- populate tlpinfo tables using the given texlive.tlpdb get_tlpinfo_from_tlpdb = function(filename) s_meta, tlp_from_runfile, tlp_doclist = {}, {}, {} local curr_tlp local state = 'none' for line in io.lines(filename) do if state == 'none' and line:find('^name ') then -- begin a new package curr_tlp = line:sub(6, -1):lower() tlp_doclist[curr_tlp] = {} elseif state == 'docfiles' then if not line:find('^ ') then state = 'none' else local file = line:match('^ ([^ ]*)') local meta = line:match('^ [^ ]* (.+)') local basename = file:match('([^/]*)$') if check_ext(basename) then -- we've got a docfile here, add it table.insert(tlp_doclist[curr_tlp], file) if meta then local details = meta:match('details="([^"]+)"') local lang = meta:match('language="([^"]+)"') s_meta[reloc_tlpdb_path(file)] = { details = details, lang = lang, } end end end elseif state == 'runfiles' then if not line:find('^ ') then state = 'none' else -- check for interesting runfiles local e = line:sub(-4, -1) if e == '.tex' or e == '.sty' or e == '.cls' then local f = line:match('.*/(.*)%.') tlp_from_runfile[f] = tlp_from_runfile[f] or {} tlp_from_runfile[f][curr_tlp] = true end end end -- update state if line:find('^docfiles ') then state = 'docfiles' elseif line:find('^runfiles ') then state = 'runfiles' end end remove_useless_tlp() end -- print out data from tlpdb in dofile()-able form print_out_tlpinfo = function(filename) local fh if os.type == 'windows' and chgstrcp then fh = assert(io.open(chgstrcp.syscptoutf8(filename), 'w')) else fh = assert(io.open(filename, 'w')) end local function printf(s, ...) fh:write(string.format(s, ...)) end -- s_meta printf('local s_meta = {\n') for k, v in opairs(s_meta) do printf(' [%q] = {', k) for i, j in opairs(v) do printf('[%q] = %q, ', i, j) end printf('},\n') end printf('}\n') -- tlp_from_runfile printf('local tlp_from_runfile = {\n') for k, v in opairs(tlp_from_runfile) do printf(' [%q] = {', k) for f in opairs(v) do printf('[%q]=true,', f) end printf('},\n') end printf('}\n') -- tlp_doclist printf('local tlp_doclist = {\n') for k, v in opairs(tlp_doclist) do printf(' [%q] = {\n', k) for _, f in ipairs(v) do printf(' %q,\n', f) end printf(' },\n') end printf('}\n') printf('return s_meta, tlp_from_runfile, tlp_doclist\n') fh:close() end -- get pre-hashed tlpdb info from a cache file get_tlpinfo_from_cache = function(filename) s_meta, tlp_from_runfile, tlp_doclist = dofile(filename) end -- get pre-hashed tlpdb info from a pseudo-cache file get_tlpinfo_from_dist = function() local f = kpse.find_file(C.data_tlpdb_name, 'texmfscripts') if not f then err_print('error', 'No texlive.tlpdb nor shipped tlpdb data found.') os.exit(C.exit_usage) end dbg_print('tlpdb', 'Getting data from shipped tlpdb data file ' .. f) s_meta, tlp_from_runfile, tlp_doclist = dofile(f) end -- get docfiles for pattern using specific tlpdb information get_doclist_tlpdb = function(pattern) -- runfile to tlp to docfile if tlp_from_runfile[pattern] then for tlp in pairs(tlp_from_runfile[pattern]) do for _, file in ipairs(tlp_doclist[tlp]) do s_doclist:add(Docfile:new{ name = file, tree = 0, runtodoc = true, }) end end end -- tlp name to docfile if tlp_doclist[pattern] then for _, file in ipairs(tlp_doclist[pattern]) do s_doclist:add(Docfile:new{ name = file, tree = 0, tlptodoc = true, }) end end end -- calculating Levenshtein distance by dynamic programming -- cf. http://lua-users.org/lists/lua-l/2008-01/msg00095.html function M.levenshtein(s, t) local s, t = tostring(s), tostring(t) if type(s) == 'string' and type(t) == 'string' then local m, n, d = #s, #t, {} for i = 0, m do d[i] = {[0] = i} end for j = 1, n do d[0][j] = j end for i = 1, m do for j = 1, n do local cost = s:sub(i,i) == t:sub(j,j) and 0 or 1 d[i][j] = math.min( d[i-1][j] + 1, d[i][j-1] + 1, d[i-1][j-1] + cost ) end end return d[m][n] end end -- fuzzy search by using Levenshtein distance function M.fuzzy_search(pattern) local tmp_d local min = math.huge local result = '' for p in pairs(tlp_doclist) do tmp_d = M.levenshtein(pattern, p) if tmp_d < min then min, result = tmp_d, p end end return result, min end end -- end scope of tlpinfo table -- get tlpinfo tables initialised by whatever mean local function init_tlp_database() -- we assume TEXMFVAR always consists of an element local cache_file = kpse.expand_var('$TEXMFVAR/' .. C.cache_name) -- set vanilla and detect texlive.tlpdb to use local texlive_tlpdb = get_tlroot() .. '/tlpkg/texlive.tlpdb' vanilla = lfs.isfile(texlive_tlpdb) local tlpdb_found = vanilla local custom_texlive_tlpdb = texdoc.config.get_value('texlive_tlpdb') if custom_texlive_tlpdb then if lfs.isfile(custom_texlive_tlpdb) then texlive_tlpdb = custom_texlive_tlpdb tlpdb_found = true else err_print('warning', 'Specified texlive.tlpdb does not exist: ' .. texdoc.util.w32_path(custom_texlive_tlpdb)) err_print('warning', 'Fallback to use the texlive.tlpdb in the distribution.') end end -- get tlpinfo if tlpdb_found then if good_cache(cache_file, texlive_tlpdb) then dbg_print('tlpdb', 'Using cached data from ' .. cache_file) get_tlpinfo_from_cache(cache_file) else dbg_print('tlpdb', 'Getting data from tlpdb file ' .. texlive_tlpdb) get_tlpinfo_from_tlpdb(texlive_tlpdb) dbg_print('tlpdb', 'Writing data in cache file ' .. cache_file) local ok, msg = mkdir_p(texdoc.util.path_parent(cache_file)) if not ok then err_print('warning', 'Failed to create cache file in %s:', cache_file) err_print('warning', msg) else print_out_tlpinfo(cache_file) end end else dbg_print('tlpdb', 'Using shipped tlpdb data.') get_tlpinfo_from_dist() end end ----------------------------- main function ------------------------------ -- initialise the various databases (must be called first) function M.init_databases() init_texdocs_database() init_tlp_database() end -- find docfiles according to pattern function M.get_doclist(pattern, no_alias) dbg_print('search', 'Searching documents for pattern "%s"', pattern) -- separate sty patterns from the rest local function normal_vs_sty(list) local normal, sty = {}, {} for _, p in ipairs(list) do if p.name:lower():match('%.([^/.]*)$') == 'sty' then table.insert(sty, p) else table.insert(normal, p) end end return normal, sty end local function doc_search(pattern, no_alias) -- get patterns (inc. aliases) local patterns = texdoc.alias.get_patterns(pattern, no_alias) local normal, sty = normal_vs_sty(patterns) -- get results; _texdocs search comes after _tlpdb search so that -- files found by both will have the priority of the _texdocs tree. -- (https://puszcza.gnu.org.ua/bugs/?369) get_doclist_sty(sty) get_doclist_tlpdb(pattern) get_doclist_texdocs(normal) end -- initialise result list s_doclist = Doclist:new() -- 1. normal search with the input pattern doc_search(pattern, no_alias) -- 2. if no result, execute fuzzy search local fuzzy_level = texdoc.config.get_value('fuzzy_level') if not s_doclist[1] and fuzzy_level > 0 then local f_res, f_lev = M.fuzzy_search(pattern) if f_lev <= fuzzy_level then err_print('info', 'Fuzzy search result: ' .. f_res) dbg_print('search', 'Levenshtein distance: ' .. f_lev) pattern = f_res doc_search(pattern, no_alias) else dbg_print('search', 'Fuzzy search result: ' .. f_res) dbg_print('search', 'Levenshtein distance: ' .. f_lev) end end -- finally, sort results texdoc.score.sort_doclist(s_doclist, pattern) return s_doclist end return M -- vim: ft=lua: