Sample Page

require('strict')
local p = {}

--[[--------------------------< I N L I N E _ E R R O R >------------------------------------
Formats a visible error message for the editor.
]]
local function inlineError(msg)
    return '<span style="font-size:100%" class="error">Error in [[Module:IA Citation]]: ' .. msg .. '</span>'
end

--[[--------------------------< T R I M _ A R G >--------------------------------------------
Trims whitespace and returns nil if the resulting string is empty.
]]
local function trimArg(arg)
    if arg == "" or arg == nil then return nil end
    return mw.text.trim(arg)
end

--[[--------------------------< G E T _ S A F E _ I A _ I D >--------------------------------
Extracts the Archive.org ID from a raw string or full URL.
]]
local function get_safe_ia_id(raw_id)
    if not raw_id then return nil end
    if mw.ustring.match(raw_id, "^http") then
        local extracted = mw.ustring.match(raw_id, "/details/([^/?#]+)")
        if extracted then return extracted end
        return mw.ustring.match(raw_id, "([^/]+)$")
    end
    return raw_id
end

--[[--------------------------< P A R S E _ C H U N K >--------------------------------------
Parses a single coordinate string (e.g., "xiv (n8) [keyword]") into its components.
Input: chunk (string)
Output: table {display, target, search}
]]
local function parse_chunk(chunk)
    -- 1. Protection against external links https://railrat.net/trains/141/
    if mw.ustring.match(chunk, "^%[http") then
        chunk = mw.ustring.match(chunk, "%s+([^%]]+)%]%s*$") or chunk
    end
    
    local leaf_pattern = "%s*%(n([0-9]+)%)"
    local search_pattern = "%s*%[(.-)%]"
    
    local leaf_num = mw.ustring.match(chunk, leaf_pattern)
    local inline_search = mw.ustring.match(chunk, search_pattern)
    
    -- 2. Clean the display text (keep editorial marks like ff. for the reader)
    local clean_display = mw.ustring.gsub(chunk, leaf_pattern, "")
    clean_display = mw.ustring.gsub(clean_display, search_pattern, "")
    clean_display = mw.text.trim(clean_display)
    
    -- 3. Determine the URL target and Search term
    local target
    local search_suggestion
    
    if leaf_num then
        target = "n" .. leaf_num
        -- For leaf numbers, we don't have a logical page number to search for
        search_suggestion = nil 
    else
		-- 1. Extract the alphanumeric part (e.g., "144", "xiv", or "A1")
        target = mw.ustring.match(clean_display, "([%w%.]+)") or clean_display
        
        -- 2. Strip trailing editorial marks (ff, +, -, etc.) from the URL target.
        -- Change %d+ to %w+ so it catches xiv, xvii, etc.
        -- This turns "xivff." into "xiv" and "144+" into "144"
        target = mw.ustring.gsub(target, "[f%+–—%-%.]+$", "")
        
        search_suggestion = target
    end
    
    return {
        display = clean_display,
        target = target,
        search = inline_search and mw.text.trim(inline_search) or search_suggestion
    }
end

--[[--------------------------< R E S O L V E _ S E A R C H >--------------------------------
Determines the final search term based on priority and configuration settings.
Input: inline_search, user_ia_search, config_ia_search, display_text
Output: string or nil
]]
local function resolve_search_term(inline_search, user_ia_search, config_ia_search, display_text)
    -- 1. Inline [search] always wins
    if inline_search then return inline_search end
    
    -- 2. Check User override (|ia-search=)
    if user_ia_search then
        if user_ia_search == 'none' or user_ia_search == '' then return nil end
        if user_ia_search == 'pagenum' then
            return mw.ustring.match(display_text, "^([%w]+)") or display_text
        end
        return user_ia_search
    end
    
    -- 3. Check Data Shard config (config.iasearch)
    if config_ia_search then
        if config_ia_search == 'none' or config_ia_search == '' then return nil end
        if config_ia_search == 'pagenum' then
            return mw.ustring.match(display_text, "^([%w]+)") or display_text
        end
        return config_ia_search
    end
    
    -- 4. Global Fallback: use the page number itself
    return mw.ustring.match(display_text, "^([%w]+)") or display_text
end

--[[--------------------------< M A K E _ I A _ U R L >--------------------------------------
Constructs a valid Internet Archive URL for a specific page and search term.
Input: ia_id, target, search_term, display_mode
Output: string (URL)
]]
local function make_ia_url(ia_id, target, search_term, display_mode)
    local endpoint = (display_mode == "full screen") and "stream" or "details"
    local base_url = string.format("https://archive.org/%s/%s/page/%s/mode/2up", 
                                  endpoint, ia_id, target)
    
    if search_term and search_term ~= "" then
        -- QUERY encoding ensures spaces and special characters don't break the URL
        local q_param = mw.uri.encode(search_term, "QUERY")
        return base_url .. "?q=" .. q_param
    end
    
    return base_url
end

--[[--------------------------< B U I L D _ L I N K S >--------------------------------------
Orchestrates the conversion of a page/pages string into formatted Wikitext links.
Input: input, config, user_ia_search, user_ia_display
Output: string (joined links) or nil
]]
local function build_links(input, config, user_ia_search, user_ia_display)
    if not input or mw.text.trim(input) == '' then return nil end
    
    -- "Sloppy-proof" the shard data by ensuring empty strings are treated as nil
    local shard_id = (config.id ~= "") and config.id or nil
    local shard_query = (config.query ~= "") and config.query or nil
    
    -- If neither valid ID nor Query exists, return input as plain text
    if not shard_id and not shard_query then 
        return input 
    end
    
    local safe_id = get_safe_ia_id(config.id)
    local clean_input = mw.ustring.gsub(input, "^p+%.%s*", "") 
    
    local chunks = mw.text.split(clean_input, "%s*,%s*")
    local results = {}

    for _, chunk in ipairs(chunks) do
        local parts = parse_chunk(chunk)
        local url = nil
        
        if safe_id then
            -- 1. Priority: Deep-link to specific page
            local search_term = resolve_search_term(
                parts.search, 
                user_ia_search, 
                config.iasearch, 
                parts.display
            )
            url = make_ia_url(
                safe_id, 
                parts.target, 
                search_term, 
                user_ia_display or config.iadisplay
            )
        elseif config.query then
            -- 2. Fallback: General Archive.org search for this book
            -- We append the page number to the query so the search targets the page
            local combined_query = config.query .. ' "' .. parts.target .. '"'
            url = "https://archive.org/search?query=" .. mw.uri.encode(combined_query, "QUERY")
        end
        
        if url then
            table.insert(results, string.format("[%s %s]", url, parts.display))
        else
            table.insert(results, parts.display)
        end
    end
    
    return table.concat(results, ", ")
end

--[[--------------------------< G E T _ C O N F I G >----------------------------------------
Locates and loads the data shard for a given book key.
Input: book_key (string)
Output: table (config), nil OR nil, string (error message)
]]
local function get_config(book_key)
    local normalized_key = mw.ustring.gsub(mw.ustring.upper(book_key), " ", "_")
    local first_char = mw.ustring.sub(normalized_key, 1, 1)
    
    -- Determine shard path: A-Z shards or OTHER for symbols/numbers
    local sub_path = (mw.ustring.match(first_char, '^[A-Z]$')) 
                     and ('Module:IA Citation/data/' .. first_char) 
                     or 'Module:IA Citation/data/OTHER'
    
    local success, library = pcall(mw.loadData, sub_path)
    if not success then 
        return nil, 'Data shard not found: ' .. sub_path 
    end
    
    -- We work with a local copy to allow normalization of empty strings
    local raw_config = library[normalized_key]
    if not raw_config then 
        return nil, 'Key "' .. normalized_key .. '" not found in ' .. sub_path 
    end

    -- Create a writable table so we can normalize data for the rest of the module
    local config = {}
    for k, v in pairs(raw_config) do config[k] = v end

    -- 1. SLOPPY DATA PROTECTION: Normalize empty strings to nil
    -- This ensures "" is treated exactly like a missing key
    if config.id == "" then config.id = nil end
    if config.query == "" then config.query = nil end
    if config.iasearch == "" then config.iasearch = nil end
    if config.iadisplay == "" then config.iadisplay = nil end

    -- This permits "plain text" pass-through for editions not on archive.org and no url available
    local has_linking = config.id or config.query or (config.cite_params and config.cite_params.url)
    local has_metadata = config.cite_params ~= nil
    
    if not has_linking and not has_metadata then
        return nil, 'Data shard for "' .. normalized_key .. '" contains no usable ID, Query, or Metadata.'
    end
    
    return config, nil
end

--[[--------------------------< F I L T E R _ A R G S >--------------------------------------
Separates internal module control arguments from standard template parameters.
Input: raw_args (table)
Output: table (internal_metadata), table (template_params)
]]
local function filter_args(raw_args)
    local metadata = {}
    local pass_through = {}
    
    -- List of parameters the module intercepts and uses internally
    local internal_keys = {
        ['ia-search'] = true,
        ['ia-display'] = true,
        ['chapter-page'] = true
    }

    for k, v in pairs(raw_args) do
        local val = trimArg(v)
        if val then
            local k_lower = mw.ustring.lower(k)
            if internal_keys[k_lower] then
                metadata[k_lower] = val
            else
                pass_through[k] = val
            end
        end
    end
    
    return metadata, pass_through
end

--[[--------------------------< G E T _ F A L L B A C K _ U R L >----------------------------
Constructs a base URL when no specific page coordinates are provided.
Input: config (table), user_ia_search (string), user_ia_display (string)
Output: string (URL), string (access status)
]]
local function get_fallback_url(config, user_ia_search, user_ia_display)
    -- 1. Check for a valid ID (Normalization in get_config ensures "" is nil)
    if config.id then
        local safe_id = get_safe_ia_id(config.id)
        local display_mode = user_ia_display or config.iadisplay
        local endpoint = (display_mode == "full screen") and "stream" or "details"
        local base_url = "https://archive.org/" .. endpoint .. "/" .. safe_id .. "/"
        
        -- Determine if we should append a search query to the landing page
        local fallback_search = user_ia_search
        
        -- Improved safety check for the shard-level search setting
        if not fallback_search and config.iasearch then
            local s = config.iasearch
            if s ~= 'pagenum' and s ~= 'none' and s ~= '' then
                fallback_search = s
            end
        end
        
        if fallback_search and fallback_search ~= "none" and fallback_search ~= "" then
            base_url = base_url .. "?q=" .. mw.uri.encode(fallback_search, "QUERY")
        end
        
        return base_url, (config['url-access'] or "registration")
        
    -- 2. If no ID, fall back to a general Archive.org Search Query
    elseif config.query and config.query ~= "" then
        return "https://archive.org/search?query=" .. mw.uri.encode(config.query, "QUERY"), nil
    end
    
    -- 3. No linking info available (Plain text mode)
    return nil, nil
end

--[[--------------------------< M A I N   O R C H E S T R A T O R >--------------------------
Decision logic for the module. Handles parameter merging, link building, and fallbacks.
]]
setmetatable(p, {
    __index = function(t, key)
        return function(frame)
            -- 1. Argument Sanitization
            local metadata, citeArgs = filter_args(frame:getParent().args)
            
            -- 2. Configuration Retrieval
            local config, err = get_config(key)
            if err then return inlineError(err) end

            -- 3. Template and Data Merging
            local targetTemplate = config.template or 'cite book'
            if config.cite_params then
                for k, v in pairs(config.cite_params) do 
                    citeArgs[k] = citeArgs[k] or v 
                end
            end

            -- 4. Process Coordinate Links (page/pages)
            -- Deep-linking remains active here as edition metadata is synchronized.
            if citeArgs['page'] then 
                citeArgs['page'] = build_links(citeArgs['page'], config, metadata['ia-search'], metadata['ia-display']) 
            end
            if citeArgs['pages'] then 
                citeArgs['pages'] = build_links(citeArgs['pages'], config, metadata['ia-search'], metadata['ia-display']) 
            end

            -- 5. Process Chapter Link
            -- LEGACY OFF-SWITCH: Only generate if the user hasn't provided a manual chapter-url.
            if metadata['chapter-page'] and config.id and not citeArgs['chapter-url'] then
                local parts = parse_chunk(metadata['chapter-page'])
                local dummy_search = resolve_search_term(nil, nil, nil, parts.display)
                
                citeArgs['chapter-url'] = make_ia_url(
                    get_safe_ia_id(config.id), 
                    parts.target, 
                    dummy_search, 
                    metadata['ia-display'] or config.iadisplay
                )
            end

            -- 6. Fallback Logic (Build base URL if no specific page/chapter coordinates exist)
            -- LEGACY OFF-SWITCH: If 'url' or 'title-link' already exists, we back off entirely.
            if not citeArgs['url'] and not citeArgs['chapter-url'] and not citeArgs['title-link'] then
                -- Only provide a fallback landing page if no coordinate deep-links were built.
                if not citeArgs['page'] and not citeArgs['pages'] then
                    local url, access = get_fallback_url(config, metadata['ia-search'], metadata['ia-display'])
                    citeArgs['url'] = url
                    citeArgs['url-access'] = access
                end
            end

            -- 7. Final Output
            return frame:expandTemplate{ title = targetTemplate, args = citeArgs }
        end
    end
})

return p