#!/usr/bin/python3 """ Generate an output file from a specification file and a template file. See README.md for more details. """ import argparse import re class Reader: """ Simple base class facilitates reading a file. Derived class must implement handle_line() and may implement finish(). """ def __init__(self, filename): self.filename = filename self.line = None # most recently read line self.lineno = -1 # zero-based def finish(self): """ Called after entire file has been read """ pass def handle_line(self): """ Called after each line has been read """ assert False def read(self): with open(self.filename) as f: lines = f.readlines() for self.lineno in range(len(lines)): self.line = lines[self.lineno] self.handle_line() self.finish() def context(self): """ Error-reporting aid: Return a string describing the location of the most recently read line """ return "line " + str(self.lineno + 1) + " of " + self.filename class Specification(Reader): """ Reader for specification file """ # Describes %kind state UNCONDITIONAL = 0 # No %kind in effect CONDITIONAL_OFF = 1 # %kind in effect, lines are to be ignored CONDITIONAL_ON = 2 # %kind in effect, lines are to be processed def __init__(self, filename, kind): super(Specification, self).__init__(filename) self.sections = dict() # key is section name, value is array of strings (lines) in the section self.section = None # name of current %section self.section_start = None # first line number of current %section self.defmacro = dict() # key is macro name, value is string (body of macro) self.kind = kind self.kinds = None # remember %define-kinds self.conditional = self.UNCONDITIONAL self.conditional_start = None # first line number of current %kind def finish(self): assert self.section is None, "\"%section " + self.section + \ "\" not terminated by end of specification file" assert self.conditional is self.UNCONDITIONAL, "%kind not terminated by end of specification file" def macro_substitution(self): """ Performs macro substitution on self.line, and returns the result """ LINESEARCH = "(%\{)(\S+?)(?=[\s}])\s*(.*?)\s*(\})" BODYSEARCH = "(%\{)(\d+)(\})" orig = self.line out = "" match = re.search(LINESEARCH, orig) while match: # lookup macro key = match[2] assert key in self.defmacro, "Missing definition of macro %{" + key + "} at " + self.context() # handle macro arguments (read them and substitute for them in the macro body) body_orig = self.defmacro[key] body_out = "" args = [] if match[3] != "": args = re.split("\s+", match[3]) bodymatch = re.search(BODYSEARCH, body_orig) while bodymatch: argnum = int(bodymatch[2]) assert argnum >= 0, "Macro argument number must be positive (at " + self.context() + ")" assert argnum <= len(args), "Macro argument number " + str(argnum) + " exceeds " + \ str(len(args)) + " supplied arguments at " + self.context() body_out = body_out + body_orig[:bodymatch.start(1)] + args[int(bodymatch[2]) - 1] body_orig = body_orig[bodymatch.end(3):] bodymatch = re.search(BODYSEARCH, body_orig) body_out = body_out + body_orig # perform macro substitution out = out + orig[:match.start(1)] + body_out orig = orig[match.end(4):] match = re.search(LINESEARCH, orig) out = out + orig return out def match_kind(self, patterns_string): """ Utility routine for %kind directive: Is self.kind found within patterns_string?""" patterns = re.split("\s+", patterns_string.strip()) for pattern in patterns: wildcard_match = re.search("^(.*)\*$", pattern) lowest_version_match = re.search("^(.*)\+$", pattern) if wildcard_match: # A wildcard pattern: Ends in *, so see if it's a prefix of self.kind. if re.search("^" + re.escape(wildcard_match[1]), self.kind): return True elif lowest_version_match: # A lowest version pattern: Ends in + and we check if self.kind is equal # to the kind in the pattern or to any kind which is to the right of the # kind in the pattern in self.kinds. assert lowest_version_match[1] in self.kinds, ( "Kind \"" + pattern + "\" at " + self.context() + " wasn't defined in %define-kinds" ) lowest_pos = self.kinds.index(pattern[:-1]) if self.kind in self.kinds[lowest_pos:]: return True else: # An ordinary pattern: See if it matches self.kind. if not self.kinds is None and not pattern in self.kinds: # TODO: Something similar for the wildcard case above print("WARNING: kind \"" + pattern + "\" at " + self.context() + " would have been rejected by %define-kinds") if pattern == self.kind: return True return False def handle_line(self): """ Most of the work occurs here. Having read a line, we act on it immediately: skip a comment, process a directive, add a line to a section or a to a multiline definition, etc. """ DIRECTIVES = [ "%define", "%define-kinds", "%else", "%insert", "%insert-indented", "%kind", "%/kind", "%section", "%/section" ] # Common typos: /%directive, \%directive matchbad = re.search("^[/\\\]%(\S*)", self.line) if matchbad and "%/" + matchbad[1] in DIRECTIVES: print("WARNING: Probable misspelled directive at " + self.context()) # Directive? if re.search("^%", self.line) and not re.search("^%{", self.line): # Check for comment if re.search("^%%", self.line): return # Validate directive name match = re.search("^(%\S*)", self.line); directive = match[1] if not directive in DIRECTIVES: assert False, "Unknown directive \"" + directive + "\" on " + self.context() # Check for insert match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line) if match: directive = self.line.split(" ", 1)[0] assert not self.section is None, directive + " outside %section at " + self.context() count = match[1] or "0" key = match[2] assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context() assert key in self.sections, "Unknown section \"" + key + "\" on " + self.context() assert key != self.section, "Cannot insert section \"" + key + "\" into itself on " + self.context() if self.conditional is self.CONDITIONAL_OFF: return indent = " " * int(count) self.sections[self.section].extend( (indent + line if line.rstrip("\n") else line) for line in self.sections[key]) return # Check for start of section match = re.search("^%section\s+(\S+)\s*$", self.line) if match: assert self.section is None, "Nested %section is forbidden at " + self.context() self.section_start = self.lineno if self.conditional is self.CONDITIONAL_OFF: self.section = "" return key = match[1] assert not key in self.sections, "Duplicate definition of \"" + key + "\" on " + self.context() self.sections[key] = [] self.section = key # Non-directive lines will be added to self.sections[key] as they are read # until we see %/section return # Check for end of section if re.search("^%/section\s*$", self.line): assert not self.section is None, "%/section with no matching %section on " + self.context() assert self.conditional_start is None or self.conditional_start < self.section_start, \ "%kind not terminated by end of %section on " + self.context() self.section = None self.section_start = None return # Check for start of kind match = re.search("^%kind\s+((\S+)(\s+\S+)*)\s*$", self.line) if match: assert self.conditional is self.UNCONDITIONAL, \ "Nested %kind is forbidden at " + self.context() patterns = match[1] if self.match_kind(patterns): self.conditional = self.CONDITIONAL_ON else: self.conditional = self.CONDITIONAL_OFF self.conditional_start = self.lineno return # Check for complement of kind (else) if re.search("^%else\s*$", self.line): assert not self.conditional is self.UNCONDITIONAL, "%else without matching %kind on " + self.context() assert self.section_start is None or self.section_start < self.conditional_start, \ "%section not terminated by %else on " + self.context() if self.conditional == self.CONDITIONAL_ON: self.conditional = self.CONDITIONAL_OFF else: assert self.conditional == self.CONDITIONAL_OFF self.conditional = self.CONDITIONAL_ON # Note that we permit # %kind foo # abc # %else # def # %else # ghi # %/kind # which is equivalent to # %kind foo # abc # ghi # %else # def # %/kind # Probably not very useful, but easier to allow than to forbid. return # Check for end of kind if re.search("^%/kind\s*$", self.line): assert not self.conditional is self.UNCONDITIONAL, "%/kind without matching %kind on " + self.context() assert self.section_start is None or self.section_start < self.conditional_start, \ "%section not terminated by end of %kind on " + self.context() self.conditional = self.UNCONDITIONAL self.conditional_start = None return # Check for kinds definition match = re.search("^%define-kinds\s+(\S.*?)\s*$", self.line) if match: assert self.conditional is self.UNCONDITIONAL, "%define-kinds within %kind is forbidden at " + \ self.context() kinds = re.split("\s+", match[1]) assert self.kind in kinds, "kind \"" + self.kind + "\" is not listed on " + self.context() assert self.kinds is None, "Second %define-kinds directive at " + self.context() self.kinds = kinds return # Check for define match = re.search("^%define\s+(\S+)(.*)$", self.line) if match: if self.conditional is self.CONDITIONAL_OFF: return key = match[1] assert not key in self.defmacro, "Duplicate definition of \"" + key + "\" on " + self.context() tail = match[2] match = re.search("\s(.*)$", tail) if match: self.defmacro[key] = match[1] else: self.defmacro[key] = "" return # Malformed directive -- the name matched, but the syntax didn't assert False, "Malformed directive \"" + directive + "\" on " + self.context() if self.conditional is self.CONDITIONAL_OFF: pass elif self.section is None: # Treat as comment pass else: self.sections[self.section].append(self.macro_substitution()) class Template(Reader): """ Reader for template file """ def __init__(self, filename, specification): super(Template, self).__init__(filename) self.lines = [] self.specification = specification def handle_line(self): """ Most of the work occurs here. Having read a line, we act on it immediately: skip a comment, process a directive, accumulate a line. """ # Directive? if re.search("^%", self.line): # Check for comment if re.search("^%%", self.line): return # Check for insertion match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line) if match: count = match[1] or "0" key = match[2] assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context() assert key in specification.sections, "Unknown section \"" + key + "\" on " + self.context() indent = " " * int(count) for line in specification.sections[key]: if re.search("TODO", line, re.IGNORECASE): print("WARNING: \"TODO\" at " + self.context()) self.lines.append(indent + line if line.rstrip("\n") else line) return # Bad directive match = re.search("^(%\S*)", self.line) assert False, "Unknown directive \"" + match[1] + "\" on " + self.context() # Literal text if re.search("TODO", self.line, re.IGNORECASE): print("WARNING: \"TODO\" at " + self.context()) self.lines.append(self.line) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Create an output file by inserting sections " "from a specification file into a template file") parser.add_argument("-k", "--kind", required=True, help="token identifying kind of file to generate (per \"kind\" directive)") parser.add_argument("-o", "--output", required=True, help="path to generated output file") parser.add_argument("-s", "--specification", required=True, help="path to input specification file") parser.add_argument("-t", "--template", required=True, help="path to input template file") parser.add_argument("-v", "--verbose", action="store_true") args = parser.parse_args() if args.verbose: print(args) # Read the specification specification = Specification(args.specification, args.kind) specification.read() if (args.verbose): print(specification.defmacro) # Read the template template = Template(args.template, specification) template.read() # Write the output with open(args.output, "w") as f: f.write("".join(["".join(line) for line in template.lines])) # TODO: Write test cases for malformed specification and template files # TODO: Find a cleaner way to handle conditionals (%kind) or nesting in general; # maybe add support for more nesting # TODO: Could we do away with the distinction between a specification file and a # template file and add a %include directive?