You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
156 lines
5.4 KiB
156 lines
5.4 KiB
#!/usr/bin/env python3
|
|
# Copyright 2020 The Pigweed Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
# use this file except in compliance with the License. You may obtain a copy of
|
|
# the License at
|
|
#
|
|
# https://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations under
|
|
# the License.
|
|
"""Generates a C macro for the PW tokenizer 65599 fixed length hash."""
|
|
|
|
import datetime
|
|
import os
|
|
|
|
HASH_CONSTANT = 65599
|
|
HASH_NAME = 'pw_tokenizer_65599_fixed_length'
|
|
HASH_LENGTHS = 80, 96, 128, 256
|
|
|
|
FILE_HEADER = """\
|
|
// Copyright {year} The Pigweed Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy of
|
|
// the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations under
|
|
// the License.
|
|
|
|
// AUTOGENERATED - DO NOT EDIT
|
|
//
|
|
// This file was generated by {script}.
|
|
// To make changes, update the script and run it to regenerate the files.
|
|
#pragma once
|
|
|
|
#include <stdint.h>
|
|
|
|
// {hash_length}-character version of the tokenizer hash function.
|
|
//
|
|
// The argument must be a string literal. It is concatenated with "" to ensure
|
|
// that this is the case.
|
|
//
|
|
// clang-format off
|
|
|
|
"""
|
|
|
|
|
|
def generate_pw_tokenizer_65599_fixed_length_hash_macro(hash_length):
|
|
"""Generate macro that hashes a string literal using a modified x65599 hash.
|
|
|
|
The macros generated by this function only operate on string literals.
|
|
|
|
Since macros can only operate on fixed-length strings, the hash macro only
|
|
hashes up to a fixed length, and characters beyond that length are ignored.
|
|
To eliminate some collisions, the length of the string is hashed as if it
|
|
were the first character.
|
|
|
|
This hash is calculated with the following equation, where s is the string
|
|
and k is the maximum hash length:
|
|
|
|
H(s, k) = len(s) + 65599 * s[0] + 65599^2 * s[1] + ... + 65599^k * s[k-1]
|
|
|
|
The hash algorithm is a modified version of the x65599 hash used by the SDBM
|
|
open source project. This hash has the following differences from x65599:
|
|
- Characters are only hashed up to a fixed maximum string length.
|
|
- Characters are hashed in reverse order.
|
|
- The string length is hashed as the first character in the string.
|
|
|
|
The code generated by this function is intentionally sparse. Each character
|
|
appears hash_length times per log message, so using fewer characters results
|
|
in faster compilation times.
|
|
|
|
Args:
|
|
hash_length: maximum string size to hash; extra characters are ignored
|
|
|
|
Returns:
|
|
the macro header file as a string
|
|
"""
|
|
|
|
first_hash_term = ('(uint32_t)(sizeof(str "") - 1 + '
|
|
'/* The argument must be a string literal. */ \\\n')
|
|
|
|
# Use this to add the aligned backslash at the end of the macro lines.
|
|
line_format = '{{:<{}}}\\\n'.format(len(first_hash_term))
|
|
|
|
lines = [
|
|
FILE_HEADER.format(script=os.path.basename(__file__),
|
|
hash_length=hash_length,
|
|
year=datetime.date.today().year)
|
|
]
|
|
|
|
lines.append(
|
|
line_format.format('#define {}_{}_HASH(str)'.format(
|
|
HASH_NAME.upper(), hash_length)))
|
|
lines.append(' ' + first_hash_term) # add indendation and the macro line
|
|
|
|
indent = ' ' * len(' (uint32_t)(')
|
|
coefficient_format = '0x{coefficient:0>8x}u'
|
|
|
|
# The string will have at least a null terminator
|
|
lines.append(
|
|
line_format.format('{}0x{:0>8x}u * (uint8_t)str[0] +'.format(
|
|
indent, HASH_CONSTANT)))
|
|
|
|
# Format string to use for the remaining terms.
|
|
term_format = (
|
|
'{indent}{coefficient} * '
|
|
'(uint8_t)({index} < sizeof(str) ? str[{index}] : 0) +').format(
|
|
indent=indent,
|
|
coefficient=coefficient_format,
|
|
index='{{index:>{}}}'.format(len(str(hash_length - 1))))
|
|
|
|
for i in range(1, hash_length):
|
|
coefficient = HASH_CONSTANT**(i + 1) % 2**32
|
|
term = term_format.format(index=i, coefficient=coefficient)
|
|
lines.append(line_format.format(term))
|
|
|
|
# Remove the extra + and \ and add the closing )
|
|
lines[-1] = lines[-1].rstrip(' +\\\n') + ')'
|
|
|
|
lines.append('\n\n// clang-format on\n')
|
|
|
|
return ''.join(lines)
|
|
|
|
|
|
def _main():
|
|
base = os.path.abspath(
|
|
os.path.join(os.path.dirname(__file__), '..', 'public', 'pw_tokenizer',
|
|
'internal'))
|
|
|
|
# Generate macros for hashes of the specified lengths.
|
|
for hash_length in HASH_LENGTHS:
|
|
path = os.path.join(
|
|
base, '{}_{}_hash_macro.h'.format(HASH_NAME, hash_length))
|
|
|
|
with open(path, 'w') as header_file:
|
|
header_file.write(
|
|
generate_pw_tokenizer_65599_fixed_length_hash_macro(
|
|
hash_length))
|
|
|
|
print('Generated {}-character hash macro at {}'.format(
|
|
hash_length, path))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
_main()
|