Module:Clade/converter
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Clade/converter/doc
--require('Module:No globals') -- comment out until clade also uses noglobals
local p = {}
local pargs = mw.getCurrentFrame():getParent().args
--[[ =================== parser for conversion to clade structure =============================
Function p.newickConverter()
convert Newick strings to clade format
Usage: {{#invoke:Module:Sandbox/Jts1882/CladeN|newickConverter|newickstring={{{NEWICK_STRING}}} }}
Function p.listConverter()
convert wikitext-like lists to clade format
use @ instead of * in wikitext to avoid processing
Usage: {{#invoke:Module:Clade/converter|listConverter|list={{{LIST_STRING}}} }}
]]
function p.cladeConverter(frame)
if frame.args['newickstring'] or pargs['newick'] or pargs['newickstring'] then
return p.newickConverter(frame)
elseif frame.args['list'] or pargs['list'] then
return p.listConverter(frame)
end
end
--[[ =================== Newick to clade parser function =============================
Function of convert Newick strings to clade format
Usage: {{#invoke:Module:Sandbox/Jts1882/CladeN|newickConverter|newickstring={{{NEWICK_STRING}}} }}
]]
function p.newickConverter(frame)
local newickString = frame.args['newickstring'] or pargs['newick'] or pargs['newickstring']
--if newickString == '{{{newickstring}}}' then return newickString end
newickString = require('Module:Clade').processNewickString(newickString,"") -- "childNumber")
-- show the Newick string
local cladeString = ''
local levelNumber = 1 -- for depth of iteration
local childNumber = 1 -- number of sister elements on node (always one for root)
-- converted the newick string to the clade structure
cladeString = cladeString .. '{{clade'
cladeString = cladeString .. p.newickParseLevel(newickString, levelNumber, childNumber)
cladeString = cladeString .. '\r}}'
local resultString = ''
local option = mw.getCurrentFrame():getParent().args['option'] or ''
if option == 'tree' then
--show the transcluded clade diagram
resultString = cladeString
else
-- show the Newick string
resultString = '<div>Modified Newick string:'
.. '<pre>'..newickString..'</pre>'
-- show the converted clade structure
resultString = resultString .. 'Output of clade template structure:'
.. '<pre>'.. cladeString ..'</pre></div>'
end
--resultString = frame:expandTemplate{ title = 'clade', frame:preprocess(cladeString) }
return resultString
end
--[[ Parse one level of Newick string
This function receives a Newick string, which has two components
1. the right hand term is a clade label: |labelN=labelname
2. the left hand term in parenthesis has common delimited child nodes, each of which can be
i. a taxon name which just needs: |N=leafname
ii. a Newick string which needs further processing through reiteration
]]
function p.newickParseLevel(newickString,levelNumber,childNumber)
local cladeString = ""
local indent = p.getIndent(levelNumber)
--levelNumber=levelNumber+1
local j=0
local k=0
j,k = string.find(newickString, '%(.*%)') -- find location of outer parenthesised term
local innerTerm = string.sub(newickString, j+1, k-1) -- select content in parenthesis
local outerTerm = string.gsub(newickString, "%b()", "") -- delete parenthetic term
cladeString = cladeString .. indent .. '|label'..childNumber..'=' .. outerTerm
cladeString = cladeString .. indent .. '|' .. childNumber..'=' .. '{{clade'
levelNumber=levelNumber+1
indent = p.getIndent(levelNumber)
-- protect commas in inner parentheses from split; temporarily replace commas between parentheses
local innerTerm2 = string.gsub(innerTerm, "%b()", function (n)
return string.gsub(n, ",%s*", "XXX") -- also strip spaces after commas here
end)
--local s = p.strsplit(innerTerm2, ",")
local s = mw.text.split(innerTerm2, ",")
local i=1
while s[i] do
local restoredString = string.gsub(s[i],"XXX", ",") -- convert back to commas
local outerTerm = string.gsub(restoredString, "%b()", "")
if string.find(restoredString, '%(.*%)') then
--cladeString = cladeString .. indent .. '|y' .. i .. '=' .. p.newickParseLevel(restoredString,levelNumber+1,i)
cladeString = cladeString .. p.newickParseLevel(restoredString,levelNumber,i)
else
cladeString = cladeString .. indent .. '|' .. i .. '=' .. restoredString --.. '(level=' .. levelNumber .. ')'
end
i=i+1
end
-- end -- end splitting of strings
cladeString = cladeString .. indent .. '}}'
return cladeString
end
function p.getIndent(levelNumber)
local indent = "\r"
local extraIndent = pargs['indent'] or mw.getCurrentFrame().args['indent'] or 0
while tonumber(extraIndent) > 0 do
indent = indent .. " " -- an extra indent to make aligining compound trees easier
extraIndent = extraIndent - 1
end
while levelNumber > 1 do
indent = indent .. " "
levelNumber = levelNumber-1
end
return indent
end
--[[ =================== experimental list to clade parser function =============================
Function of convert wikitext-like listss to clade format
- use @ instead of * in wikitext to avoid processing
Usage: {{#invoke:Module:Clade/converter|listConverter|list={{{LIST_STRING}}} }}
]]
function p.listConverter(frame)
local listString = frame.args['list'] or mw.getCurrentFrame():getParent().args['list']
-- show the list string
local cladeString = ''
local levelNumber = 1 -- for depth of iteration
local childNumber = 1 -- number of sister elements on node (always one for root)
local indent = p.getIndent(levelNumber)
-- converted the newick string to the clade structure
cladeString = cladeString .. indent .. '{{clade'
cladeString = cladeString .. p.listParseLevel(listString, levelNumber, childNumber)
--cladeString = cladeString .. '\r}}'
local resultString = ''
local option = mw.getCurrentFrame():getParent().args['option'] or ''
if option == 'tree' then
--show the transcluded clade diagram
resultString = cladeString
else
-- show the list string
--resultString = '<pre>'..listString..'</pre>'
-- show the converted clade structure
resultString = resultString .. '<pre>'.. cladeString ..'</pre>'
end
--resultString = frame:expandTemplate{ title = 'clade', frame:preprocess(cladeString) }
return resultString
end
function p.listParseLevel(listString,levelNumber,childNumber)
local cladeString = ""
local indent = p.getIndent(levelNumber)
levelNumber=levelNumber+1
local list = mw.text.split(listString, "\n")
local i=1
local child=1
local lastNode=0
while list[i] do
list[i]=list[i]:gsub("^@", "") -- strip the first @
if not string.match( list[i], "^@", 1 ) then -- count children at this level (not beginning wiht @)
lastNode = lastNode+1
end
i=i+1
end
i=1
while list[i] do
--[[ pseudocode:
if next value begins with @ we have a subtree,
which must be recombined and past iteratively
else we have a simple leaf
]]
-- if the next value begins with @, we have a subtree which should be recombined
if list[i+1] and string.match( list[i+1], "^@", 1 ) then
local label=list[i]
i=i+1
local recombined = list[i]
while list[i+1] and string.match( list[i+1], "^@", 1 ) do
recombined = recombined .. "\n" .. list[i+1]
i=i+1
end
cladeString = cladeString .. indent .. '|label' .. child ..'=' .. label
cladeString = cladeString .. indent .. '|' .. child ..'=' .. '{{clade'
.. p.listParseLevel(recombined,levelNumber,i)
else
cladeString = cladeString .. indent .. '|' .. child ..'=' .. list[i]
end
i=i+1
child=child+1
end
cladeString = cladeString .. indent .. '}}'
return cladeString
end
return p