#!/usr/bin/env python #===- gen_std.py - ------------------------------------------*- python -*--===# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # #===------------------------------------------------------------------------===# """gen_std.py is a tool to generate a lookup table (from qualified names to include headers) for C/C++ Standard Library symbols by parsing archieved HTML files from cppreference. Caveats and FIXMEs: - only symbols directly in "std" namespace are added, we should also add std's subnamespace symbols (e.g. chrono). - symbols with multiple variants or defined in multiple headers aren't added, e.g. std::move, std::swap Usage: 1. Install BeautifulSoup dependency, see instruction: https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup 2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at https://en.cppreference.com/w/Cppreference:Archives 3. Unzip the zip file from step 2 to directory , you should get a "reference" directory in 4. Run the command: // Generate C++ symbols gen_std.py -cppreference -language=cpp > StdSymbolMap.inc // Generate C symbols gen_std.py -cppreference -language=c > CSymbolMap.inc """ import cppreference_parser import argparse import datetime import os import sys CODE_PREFIX = """\ //===-- gen_std.py generated file -------------------------------*- C++ -*-===// // // Used to build a lookup table (qualified names => include headers) for %s // Standard Library symbols. // // Automatically generated file, DO NOT EDIT! // // Generated from cppreference offline HTML book (modified on %s). //===----------------------------------------------------------------------===// """ def ParseArg(): parser = argparse.ArgumentParser(description='Generate StdGen file') parser.add_argument('-cppreference', metavar='PATH', default='', help='path to the cppreference offline HTML directory', required=True ) parser.add_argument('-language', default='cpp', help='Generate c or cpp symbols', required=True) return parser.parse_args() def main(): args = ParseArg() if args.language == 'cpp': page_root = os.path.join(args.cppreference, "en", "cpp") symbol_index_root = os.path.join(page_root, "symbol_index") parse_pages = [ (page_root, "symbol_index.html", "std::"), # std sub-namespace symbols have separated pages. # We don't index std literal operators (e.g. # std::literals::chrono_literals::operator""d), these symbols can't be # accessed by std::. # FIXME: index std::placeholders symbols, placeholders.html page is # different (which contains one entry for _1, _2, ..., _N), we need special # handling. (symbol_index_root, "chrono.html", "std::chrono::"), (symbol_index_root, "filesystem.html", "std::filesystem::"), (symbol_index_root, "pmr.html", "std::pmr::"), (symbol_index_root, "regex_constants.html", "std::regex_constants::"), (symbol_index_root, "this_thread.html", "std::this_thread::"), ] elif args.language == 'c': page_root = os.path.join(args.cppreference, "en", "c") symbol_index_root = page_root parse_pages = [(page_root, "index.html", None)] if not os.path.exists(symbol_index_root): exit("Path %s doesn't exist!" % symbol_index_root) symbols = cppreference_parser.GetSymbols(parse_pages) # We don't have version information from the unzipped offline HTML files. # so we use the modified time of the symbol_index.html as the version. index_page_path = os.path.join(page_root, "index.html") cppreference_modified_date = datetime.datetime.fromtimestamp( os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d') print CODE_PREFIX % (args.language.upper(), cppreference_modified_date) for symbol in symbols: if len(symbol.headers) == 1: # SYMBOL(unqualified_name, namespace, header) print "SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace, symbol.headers[0]) elif len(symbol.headers) == 0: sys.stderr.write("No header found for symbol %s\n" % symbol.name) else: # FIXME: support symbols with multiple headers (e.g. std::move). sys.stderr.write("Ambiguous header for symbol %s: %s\n" % ( symbol.name, ', '.join(symbol.headers))) if __name__ == '__main__': main()