Module:Excerpt

local p = {} local mRedirect = require('Module:Redirect')

local errors -- Return blank text, or an error message if requested local function err(text) if errors then error(text, 2) end return "" end

-- Check image for suitability local function checkimage(image) local page = mw.ustring.match(image, "([Ff]ile%s*:[^|%]]*)") -- File:(name) ... or mw.ustring.match(image, "([Ii]mage%s*:[^|%]]*)") -- or Image:(name) ... if not page then return false end

-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.) if not mw.ustring.match(page, "%.[Gg][Ii][Ff]%s*$") and not mw.ustring.match(page, "%.[Jj][Pp][Ee]?[Gg]%s*$") and not mw.ustring.match(page, "%.[Pp][Nn][Gg]%s*$") and not mw.ustring.match(page, "%.[Ss][Vv][Gg]%s*$") and not mw.ustring.match(page, "%.[Tt][Ii][Ff][Ff]%s*$") and not mw.ustring.match(page, "%.[Xx][Cc][Ff]%s*$") then return false end

local title = mw.title.new(":" .. page) -- Read description page (for :File:Foo rather than File:Foo) if not title then return false end

local redir = mRedirect.getTarget(title) if redir then title = mw.title.new(redir) end

local frame = mw.getCurrentFrame local desc = frame:preprocess("") return desc and desc ~= "" and not mw.ustring.match(desc, "[Nn]on%-free") -- hide non-free image end

-- Attempt to parse or, either anywhere (start=false) or at the start only (start=true) local function parseimage(text, start) local startre = "" if start then startre = "^" end -- a true flag restricts search to start of string local image = mw.ustring.match(text, startre .. "%[%[%s*[Ff]ile%s*:.*") -- [[File: ...	 or mw.ustring.match(text, startre .. "%[%[%s*[Ii]mage%s*:.*") -- or [[Image: ...	if image then		image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption	end	return image end

-- Attempt to construct a block from local function argimage(text) local token = nil if mw.ustring.match(text, "{{%s*[Ii]nfobox") then local image = mw.ustring.match(text, "|%s*image%s*=%s*([^}|]*)") -- parse image= argument... or mw.ustring.match(text, "|%s*PD_image%s*=%s*([^}|]-)") -- or its known alternatives such as... or mw.ustring.match(text, "|%s*image_flag%s*=%s*([^}|]-)") -- image_flag= from Infobox country or mw.ustring.match(text, "|%s*Cover%s*=%s*([^}|]-)") -- or Cover= from Infobox album if image then -- add in relevant optional parameters: caption, alt text and image size token = "%s*[Cc]aption%s*=%s*([^}|]*)")			if caption then token = token .. "|" .. caption end			local alt = mw.ustring.match(text, "|%s*alt%s*=%s*([^}|]*)")			if alt then token = token .. "|alt=" .. alt end			local image_size = mw.ustring.match(text, "|%s*image_size%s*=%s*([^}|]*)")			if image_size then token = token .. "|" .. image_size end			token = mw.ustring.gsub(token, "\n","") .. "\n" end end

return token end

-- Help gsub to remove unwanted templates -- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string) local function striptemplate(t) local unwanted = {"[Ee]fn", "[Ee]fn%-[lu]a", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bmp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef", "[Rr]efn?", "[CcDd]n", "[Cc]itation needed", "[Dd]isambiguation needed"} for _, u in pairs(unwanted) do if mw.ustring.match(t, "^{{%s*" .. u .. "%s*%f[|}]") then return "" end -- unwanted template: remove end return nil -- not an unwanted template: keep end

-- Main function returns a string value: text of the lead of a page local function main(pagenames, options) errors = options.errors -- set the module level boolean used in local function err

if not pagenames or #pagenames < 1 then return err("No page names given") end local pagename local text local pagecount = #pagenames local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted

-- read the page, or a random one if multiple pages were provided if pagecount > 1 then math.randomseed(os.time) end while not text and pagecount > 0 do		local pagenum = 1 if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title pagename = pagenames[pagenum] if pagename and pagename ~= "" then pagename = mw.ustring.match(pagename, "%[%[%s*(.-)[]|#]") or pagename -- "Bar" → "Foo" pagename = mw.ustring.gsub(pagename, "^%s+", "") -- strip leading ... pagename = mw.ustring.gsub(pagename, "%s+$", "") -- ...and trailing white space

if pagename and pagename ~= "" then local title = mw.title.new(pagename) -- Find the lead section of the named page if not title then return err("No title for page name " .. pagename) end local redir = mRedirect.getTarget(title) if redir then title = mw.title.new(redir) end pagename = redir or pagename

text = title:getContent end end if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another pagecount = pagecount - 1 -- ensure that we exit the loop after at most #pagenames iterations end if not text then return err("Cannot read a valid page: first name is " .. firstpage) end

text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first ==Heading== and everything after it	text = mw.ustring.gsub(text, " .- ", "") -- remove noinclude bits text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs text = mw.ustring.gsub(text, "<%s*imagemap.->.-<%s*/%s*imagemap%s*>", "") -- remove imagemaps text = mw.ustring.gsub(text, "%b{}", striptemplate) -- remove unwanted templates such as references text = mw.ustring.gsub(text, "\n%s*", "\n") -- remove most common tables of contents

local allparas = true -- keep all paragraphs? if options.paraflags then for _, v in pairs(options.paraflags) do			if v then allparas = false end -- if any para specifically requested, don't keep all end end

local maxfile = 0 -- for efficiency, stop checking images after this many have been found if options.fileflags then for k, v in pairs(options.fileflags) do			if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags end end

-- a basic parser to trim down the lead local inlead = false -- have we found some text yet? local t = "" -- the stripped down output text local files = 0 -- how many images so far local paras = 0 -- how many paragraphs so far

text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space repeat -- loop around parsing a template, image or paragraph local token = mw.ustring.match(text, "^%b{}%s*") or false -- if token then -- found a template if inlead then -- lead has already started, so keep the template within the text t = t .. token elseif files < maxfile then -- discard template, but if we are still collecting images... local image = parseimage(token, false) or argimage(token) -- look for embedded, |image=, etc.				if image and checkimage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.) files = files + 1 -- count the file, whether displaying it or not if options.fileflags and options.fileflags[files] then -- if displaying this image image = mw.ustring.gsub(image, "|%s*frameless%s*%f[|%]]", "") -- make image a thumbnail, not frameless etc.						image = mw.ustring.gsub(image, "|%s*framed?%s*%f[|%]]", "") if not mw.ustring.match(image, "|%s*thumb%s*%f[|%]]") and not mw.ustring.match(image, "|%s*thumbnail%s*%f[|%]]") then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|thumb%1") end if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end t = t .. image end end end else -- the next token in text is not a template token = parseimage(text, true) if token then -- the next token in text looks like an image if files < maxfile and checkimage(token) then -- if more images are wanted and this is a wanted image files = files + 1 if options.fileflags and options.fileflags[files] then local image = token -- copy token for manipulation by adding |right etc. without changing the original if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end t = t .. image end end else -- got a paragraph, which ends at a file, image, blank line or end of text local afterend = mw.ustring.len(text) + 1 local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend -- position of next paragraph delimiter (or end of text) local endpos = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,				 mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterend,				 blankpos)				token = mw.ustring.sub(text, 1, endpos-1)				if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line					token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)				end				inlead = true -- we got a paragraph, so we are inside the lead section				paras = paras + 1				if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted			end -- of "else got a paragraph"		end -- of "else not a template"

if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text until not text or text == "" or not token or token == "" -- loop until all text parsed

text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so " more" flows on one line

-- replace the bold title or synonym near the start of the article by a wikilink to the article local lang = mw.language.getContentLanguage local pos = mw.ustring.find(text, "" .. lang:ucfirst(pagename) .. "", 1, true) -- look for "Foo is..." (uc) or "A foo is..." (lc) or mw.ustring.find(text, "" .. lang:lcfirst(pagename) .. "", 1, true) -- plain search: special characters in pagename represent themselves if pos then local len = mw.ustring.len(pagename) text = mw.ustring.sub(text, 1, pos + 2) .. "" .. mw.ustring.sub(text, pos + 3, pos + len + 2) .. "" .. mw.ustring.sub(text, pos + len + 3, -1) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) text = mw.ustring.gsub(text, "(.-+)(.-)", function(a, b) -- replace Foo by Foo if early in article and not wikilinked			if mw.ustring.len(a) < 100 and not mw.ustring.find(b, "%[") then return a .. "" .. b .. "" else return nil end		 end, 1) end

if options.more then text = text .. " " .. options.more .. "" end -- wikilink to article for more info

return text end

-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true} local function numberflags(str) local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"} local flags = {} for _, r in pairs(ranges) do		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5 if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1 if max then for p = min, max do flags[p] = true end end end return flags end

-- Shared template invocation code for lead and random functions local function invoke(frame, articlekey) -- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text} local args = {} -- args[k] = frame.args[k] or frame:getParent.args[k] for all k in either (numeric or not) for k, v in pairs(frame:getParent.args) do args[k] = v end for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template

local pagenames = {} local articlecount = #args if articlekey then -- 1 for lead template; "selected" for selected template articlekey = tonumber(articlekey) or args[articlekey] if tonumber(articlekey) then -- normalise article number into the range 1..#args if articlecount < 1 then err("No articles provided") end articlekey = articlekey % articlecount if articlekey == 0 then articlekey = articlecount end end pagenames = { args[articlekey] } else -- For random, accept any number of page names. If more than one, we'll pick one randomly if articlecount < 1 then err("No articles provided") end for i, p in pairs(args) do			if p and type(i) == 'number' then table.insert(pagenames, p) end end end

local options = args -- pick up miscellaneous options: more, errors, fileargs options.paraflags = numberflags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"} options.fileflags = numberflags(args["files"] or "") -- parse file numbers if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text

local text = main(pagenames, options) return frame:preprocess(text) end

-- Entry points for template callers using #invoke: function p.lead(frame) return invoke(frame, 1) end -- reads the first and only article function p.random(frame) return invoke(frame) end -- reads any article (default for invoke with one argument) function p.selected(frame) return invoke(frame, "selected") end -- reads the article whose key is in the selected= parameter

return p