You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

368 lines
14 KiB

#!/usr/bin/python3
""" Generate an output file from a specification file and a template file.
See README.md for more details.
"""
import argparse
import re
class Reader:
""" Simple base class facilitates reading a file.
Derived class must implement handle_line() and may implement finish().
"""
def __init__(self, filename):
self.filename = filename
self.line = None # most recently read line
self.lineno = -1 # zero-based
def finish(self):
""" Called after entire file has been read
"""
pass
def handle_line(self):
""" Called after each line has been read
"""
assert False
def read(self):
with open(self.filename) as f:
lines = f.readlines()
for self.lineno in range(len(lines)):
self.line = lines[self.lineno]
self.handle_line()
self.finish()
def context(self):
""" Error-reporting aid: Return a string describing the location
of the most recently read line
"""
return "line " + str(self.lineno + 1) + " of " + self.filename
class Specification(Reader):
""" Reader for specification file
"""
# Describes %kind state
UNCONDITIONAL = 0 # No %kind in effect
CONDITIONAL_OFF = 1 # %kind in effect, lines are to be ignored
CONDITIONAL_ON = 2 # %kind in effect, lines are to be processed
def __init__(self, filename, kind):
super(Specification, self).__init__(filename)
self.sections = dict() # key is section name, value is array of strings (lines) in the section
self.section = None # name of current %section
self.section_start = None # first line number of current %section
self.defmacro = dict() # key is macro name, value is string (body of macro)
self.kind = kind
self.kinds = None # remember %define-kinds
self.conditional = self.UNCONDITIONAL
self.conditional_start = None # first line number of current %kind
def finish(self):
assert self.section is None, "\"%section " + self.section + \
"\" not terminated by end of specification file"
assert self.conditional is self.UNCONDITIONAL, "%kind not terminated by end of specification file"
def macro_substitution(self):
""" Performs macro substitution on self.line, and returns the result
"""
LINESEARCH = "(%\{)(\S+?)(?=[\s}])\s*(.*?)\s*(\})"
BODYSEARCH = "(%\{)(\d+)(\})"
orig = self.line
out = ""
match = re.search(LINESEARCH, orig)
while match:
# lookup macro
key = match[2]
assert key in self.defmacro, "Missing definition of macro %{" + key + "} at " + self.context()
# handle macro arguments (read them and substitute for them in the macro body)
body_orig = self.defmacro[key]
body_out = ""
args = []
if match[3] != "":
args = re.split("\s+", match[3])
bodymatch = re.search(BODYSEARCH, body_orig)
while bodymatch:
argnum = int(bodymatch[2])
assert argnum >= 0, "Macro argument number must be positive (at " + self.context() + ")"
assert argnum <= len(args), "Macro argument number " + str(argnum) + " exceeds " + \
str(len(args)) + " supplied arguments at " + self.context()
body_out = body_out + body_orig[:bodymatch.start(1)] + args[int(bodymatch[2]) - 1]
body_orig = body_orig[bodymatch.end(3):]
bodymatch = re.search(BODYSEARCH, body_orig)
body_out = body_out + body_orig
# perform macro substitution
out = out + orig[:match.start(1)] + body_out
orig = orig[match.end(4):]
match = re.search(LINESEARCH, orig)
out = out + orig
return out
def match_kind(self, patterns_string):
""" Utility routine for %kind directive: Is self.kind found within patterns_string?"""
patterns = re.split("\s+", patterns_string.strip())
for pattern in patterns:
wildcard_match = re.search("^(.*)\*$", pattern)
lowest_version_match = re.search("^(.*)\+$", pattern)
if wildcard_match:
# A wildcard pattern: Ends in *, so see if it's a prefix of self.kind.
if re.search("^" + re.escape(wildcard_match[1]), self.kind):
return True
elif lowest_version_match:
# A lowest version pattern: Ends in + and we check if self.kind is equal
# to the kind in the pattern or to any kind which is to the right of the
# kind in the pattern in self.kinds.
assert lowest_version_match[1] in self.kinds, (
"Kind \"" + pattern + "\" at " + self.context() +
" wasn't defined in %define-kinds"
)
lowest_pos = self.kinds.index(pattern[:-1])
if self.kind in self.kinds[lowest_pos:]:
return True
else:
# An ordinary pattern: See if it matches self.kind.
if not self.kinds is None and not pattern in self.kinds:
# TODO: Something similar for the wildcard case above
print("WARNING: kind \"" + pattern + "\" at " + self.context() +
" would have been rejected by %define-kinds")
if pattern == self.kind:
return True
return False
def handle_line(self):
""" Most of the work occurs here. Having read a line, we act on it immediately:
skip a comment, process a directive, add a line to a section or a to a multiline
definition, etc.
"""
DIRECTIVES = [
"%define", "%define-kinds", "%else", "%insert", "%insert-indented",
"%kind", "%/kind", "%section", "%/section"
]
# Common typos: /%directive, \%directive
matchbad = re.search("^[/\\\]%(\S*)", self.line)
if matchbad and "%/" + matchbad[1] in DIRECTIVES:
print("WARNING: Probable misspelled directive at " + self.context())
# Directive?
if re.search("^%", self.line) and not re.search("^%{", self.line):
# Check for comment
if re.search("^%%", self.line):
return
# Validate directive name
match = re.search("^(%\S*)", self.line);
directive = match[1]
if not directive in DIRECTIVES:
assert False, "Unknown directive \"" + directive + "\" on " + self.context()
# Check for insert
match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line)
if match:
directive = self.line.split(" ", 1)[0]
assert not self.section is None, directive + " outside %section at " + self.context()
count = match[1] or "0"
key = match[2]
assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context()
assert key in self.sections, "Unknown section \"" + key + "\" on " + self.context()
assert key != self.section, "Cannot insert section \"" + key + "\" into itself on " + self.context()
if self.conditional is self.CONDITIONAL_OFF:
return
indent = " " * int(count)
self.sections[self.section].extend(
(indent + line if line.rstrip("\n") else line)
for line in self.sections[key])
return
# Check for start of section
match = re.search("^%section\s+(\S+)\s*$", self.line)
if match:
assert self.section is None, "Nested %section is forbidden at " + self.context()
self.section_start = self.lineno
if self.conditional is self.CONDITIONAL_OFF:
self.section = ""
return
key = match[1]
assert not key in self.sections, "Duplicate definition of \"" + key + "\" on " + self.context()
self.sections[key] = []
self.section = key
# Non-directive lines will be added to self.sections[key] as they are read
# until we see %/section
return
# Check for end of section
if re.search("^%/section\s*$", self.line):
assert not self.section is None, "%/section with no matching %section on " + self.context()
assert self.conditional_start is None or self.conditional_start < self.section_start, \
"%kind not terminated by end of %section on " + self.context()
self.section = None
self.section_start = None
return
# Check for start of kind
match = re.search("^%kind\s+((\S+)(\s+\S+)*)\s*$", self.line)
if match:
assert self.conditional is self.UNCONDITIONAL, \
"Nested %kind is forbidden at " + self.context()
patterns = match[1]
if self.match_kind(patterns):
self.conditional = self.CONDITIONAL_ON
else:
self.conditional = self.CONDITIONAL_OFF
self.conditional_start = self.lineno
return
# Check for complement of kind (else)
if re.search("^%else\s*$", self.line):
assert not self.conditional is self.UNCONDITIONAL, "%else without matching %kind on " + self.context()
assert self.section_start is None or self.section_start < self.conditional_start, \
"%section not terminated by %else on " + self.context()
if self.conditional == self.CONDITIONAL_ON:
self.conditional = self.CONDITIONAL_OFF
else:
assert self.conditional == self.CONDITIONAL_OFF
self.conditional = self.CONDITIONAL_ON
# Note that we permit
# %kind foo
# abc
# %else
# def
# %else
# ghi
# %/kind
# which is equivalent to
# %kind foo
# abc
# ghi
# %else
# def
# %/kind
# Probably not very useful, but easier to allow than to forbid.
return
# Check for end of kind
if re.search("^%/kind\s*$", self.line):
assert not self.conditional is self.UNCONDITIONAL, "%/kind without matching %kind on " + self.context()
assert self.section_start is None or self.section_start < self.conditional_start, \
"%section not terminated by end of %kind on " + self.context()
self.conditional = self.UNCONDITIONAL
self.conditional_start = None
return
# Check for kinds definition
match = re.search("^%define-kinds\s+(\S.*?)\s*$", self.line)
if match:
assert self.conditional is self.UNCONDITIONAL, "%define-kinds within %kind is forbidden at " + \
self.context()
kinds = re.split("\s+", match[1])
assert self.kind in kinds, "kind \"" + self.kind + "\" is not listed on " + self.context()
assert self.kinds is None, "Second %define-kinds directive at " + self.context()
self.kinds = kinds
return
# Check for define
match = re.search("^%define\s+(\S+)(.*)$", self.line)
if match:
if self.conditional is self.CONDITIONAL_OFF:
return
key = match[1]
assert not key in self.defmacro, "Duplicate definition of \"" + key + "\" on " + self.context()
tail = match[2]
match = re.search("\s(.*)$", tail)
if match:
self.defmacro[key] = match[1]
else:
self.defmacro[key] = ""
return
# Malformed directive -- the name matched, but the syntax didn't
assert False, "Malformed directive \"" + directive + "\" on " + self.context()
if self.conditional is self.CONDITIONAL_OFF:
pass
elif self.section is None:
# Treat as comment
pass
else:
self.sections[self.section].append(self.macro_substitution())
class Template(Reader):
""" Reader for template file
"""
def __init__(self, filename, specification):
super(Template, self).__init__(filename)
self.lines = []
self.specification = specification
def handle_line(self):
""" Most of the work occurs here. Having read a line, we act on it immediately:
skip a comment, process a directive, accumulate a line.
"""
# Directive?
if re.search("^%", self.line):
# Check for comment
if re.search("^%%", self.line):
return
# Check for insertion
match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line)
if match:
count = match[1] or "0"
key = match[2]
assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context()
assert key in specification.sections, "Unknown section \"" + key + "\" on " + self.context()
indent = " " * int(count)
for line in specification.sections[key]:
if re.search("TODO", line, re.IGNORECASE):
print("WARNING: \"TODO\" at " + self.context())
self.lines.append(indent + line if line.rstrip("\n") else line)
return
# Bad directive
match = re.search("^(%\S*)", self.line)
assert False, "Unknown directive \"" + match[1] + "\" on " + self.context()
# Literal text
if re.search("TODO", self.line, re.IGNORECASE):
print("WARNING: \"TODO\" at " + self.context())
self.lines.append(self.line)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create an output file by inserting sections "
"from a specification file into a template file")
parser.add_argument("-k", "--kind", required=True,
help="token identifying kind of file to generate (per \"kind\" directive)")
parser.add_argument("-o", "--output", required=True,
help="path to generated output file")
parser.add_argument("-s", "--specification", required=True,
help="path to input specification file")
parser.add_argument("-t", "--template", required=True,
help="path to input template file")
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
if args.verbose:
print(args)
# Read the specification
specification = Specification(args.specification, args.kind)
specification.read()
if (args.verbose):
print(specification.defmacro)
# Read the template
template = Template(args.template, specification)
template.read()
# Write the output
with open(args.output, "w") as f:
f.write("".join(["".join(line) for line in template.lines]))
# TODO: Write test cases for malformed specification and template files
# TODO: Find a cleaner way to handle conditionals (%kind) or nesting in general;
# maybe add support for more nesting
# TODO: Could we do away with the distinction between a specification file and a
# template file and add a %include directive?