You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
4.1 KiB

// Copyright 2020 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
#include "pw_tokenizer/detokenize.h"
#include <algorithm>
#include "pw_tokenizer/internal/decode.h"
namespace pw::tokenizer {
namespace {
std::string UnknownTokenMessage(uint32_t value) {
std::string output(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "unknown token ");
// Output a hexadecimal version of the token.
for (int shift = 28; shift >= 0; shift -= 4) {
output.push_back("0123456789abcdef"[(value >> shift) & 0xF]);
}
output.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
return output;
}
// Decoding result with the date removed, for sorting.
using DecodingResult = std::pair<DecodedFormatString, uint32_t>;
// Determines if one result is better than the other if collisions occurred.
// Returns true if lhs is preferred over rhs. This logic should match the
// collision resolution logic in detokenize.py.
bool IsBetterResult(const DecodingResult& lhs, const DecodingResult& rhs) {
// Favor the result for which decoding succeeded.
if (lhs.first.ok() != rhs.first.ok()) {
return lhs.first.ok();
}
// Favor the result for which all bytes were decoded.
if ((lhs.first.remaining_bytes() == 0u) !=
(rhs.first.remaining_bytes() == 0u)) {
return lhs.first.remaining_bytes() == 0u;
}
// Favor the result with fewer decoding errors.
if (lhs.first.decoding_errors() != rhs.first.decoding_errors()) {
return lhs.first.decoding_errors() < rhs.first.decoding_errors();
}
// Favor the result that successfully decoded the most arguments.
if (lhs.first.argument_count() != rhs.first.argument_count()) {
return lhs.first.argument_count() > rhs.first.argument_count();
}
// Favor the result that was removed from the database most recently.
return lhs.second > rhs.second;
}
} // namespace
DetokenizedString::DetokenizedString(
uint32_t token,
const std::span<const TokenizedStringEntry>& entries,
const std::span<const uint8_t>& arguments)
: token_(token), has_token_(true) {
std::vector<DecodingResult> results;
for (const auto& [format, date_removed] : entries) {
results.push_back(DecodingResult{format.Format(arguments), date_removed});
}
std::sort(results.begin(), results.end(), IsBetterResult);
for (auto& result : results) {
matches_.push_back(std::move(result.first));
}
}
std::string DetokenizedString::BestString() const {
return matches_.empty() ? std::string() : matches_[0].value();
}
std::string DetokenizedString::BestStringWithErrors() const {
if (matches_.empty()) {
return has_token_ ? UnknownTokenMessage(token_)
: PW_TOKENIZER_ARG_DECODING_ERROR("missing token");
}
return matches_[0].value_with_errors();
}
Detokenizer::Detokenizer(const TokenDatabase& database) {
for (const auto& entry : database) {
database_[entry.token].emplace_back(entry.string, entry.date_removed);
}
}
DetokenizedString Detokenizer::Detokenize(
const std::span<const uint8_t>& encoded) const {
// The token is missing from the encoded data; there is nothing to do.
if (encoded.size() < sizeof(uint32_t)) {
return DetokenizedString();
}
const uint32_t token =
encoded[3] << 24 | encoded[2] << 16 | encoded[1] << 8 | encoded[0];
const auto result = database_.find(token);
return DetokenizedString(token,
result == database_.end()
? std::span<TokenizedStringEntry>()
: std::span(result->second),
encoded.subspan(sizeof(token)));
}
} // namespace pw::tokenizer