v811_spc009/external/pigweed/pw_tokenizer/token_database_fuzzer.cc

// Copyright 2020 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

// This file implements a basic fuzz test for the TokenDatabase class
// A database is created from fuzz data, and a random entry count (also
// derived from the fuzz data) is set. We then run iterations and 'find'
// operations on this database.

#include <cstring>
#include <span>

#include "pw_fuzzer/asan_interface.h"
#include "pw_fuzzer/fuzzed_data_provider.h"
#include "pw_preprocessor/util.h"
#include "pw_tokenizer/token_database.h"

namespace pw::tokenizer {
namespace {

enum FuzzTestType : uint8_t {
  kValidHeader,
  kRandomHeader,
  kMaxValue = kRandomHeader,
};

constexpr size_t kTokenHeaderSize = 16;

// The default max length in bytes of fuzzed data provided. Note that
// this needs to change if the fuzzer executable is run with a
// '-max_len' argument.
constexpr size_t kFuzzDataSizeMax = 4096;

// Location of the 'EntryCount' field in the token header.
constexpr size_t kEntryCountOffset = 8;
constexpr size_t kEntryCountSize = 4;

void SetTokenEntryCountInBuffer(uint8_t* buffer, uint32_t count) {
  memcpy(buffer + kEntryCountOffset, &count, kEntryCountSize);
}

void IterateOverDatabase(TokenDatabase* const database) {
  for (TokenDatabase::Entry entry : *database) {
    // Since we don't "use" the contents of the entry, we exercise
    // the entry by extracting its contents into volatile variables
    // to prevent it from being optimized out during compilation.
    [[maybe_unused]] volatile const char* entry_string = entry.string;
    [[maybe_unused]] volatile uint32_t entry_token = entry.token;
  }
}

}  // namespace

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  constexpr size_t kBufferSizeMax = kFuzzDataSizeMax + kTokenHeaderSize;
  constexpr char kDefaultHeader[] = "TOKENS\0\0\0\0\0\0\0\0\0";
  static uint8_t buffer[kBufferSizeMax];

  if (size > kFuzzDataSizeMax) {
    return 0;
  }

  FuzzedDataProvider provider(data, size);

  // Initialize the token header with either a valid or invalid header
  // based on a random enum consumed from the fuzz data.
  switch (provider.ConsumeEnum<FuzzTestType>()) {
    case kValidHeader:
      memcpy(buffer, kDefaultHeader, kTokenHeaderSize);
      break;

    case kRandomHeader: {
      std::vector<uint8_t> random_header =
          provider.ConsumeBytes<uint8_t>(kTokenHeaderSize);
      random_header.resize(kTokenHeaderSize);
      memcpy(buffer, &random_header[0], kTokenHeaderSize);
      break;
    }
  }

  // Consume a 'test token' integer to look up later in the database.
  uint32_t random_token = provider.ConsumeIntegral<uint32_t>();

  // Consume a 'token count' integer to set as our database entry count.
  uint32_t random_token_count =
      provider.ConsumeIntegralInRange<uint32_t>(0, kFuzzDataSizeMax);

  // Consume the remaining data. Note that the data corresponding to the
  // string entries in the database are not explicitly null-terminated.
  // TODO(karthikmb): Once OSS-Fuzz updates to Clang11.0, switch to
  // provider.ConsumeData() to avoid extra memory and the memcpy call.
  auto consumed_bytes =
      provider.ConsumeBytes<uint8_t>(provider.remaining_bytes());
  memcpy(buffer + kTokenHeaderSize, &consumed_bytes[0], consumed_bytes.size());

  SetTokenEntryCountInBuffer(buffer, random_token_count);

  // Poison the unused buffer space for this run of the fuzzer to
  // prevent the token database creator from reading too far in.
  size_t data_size = kTokenHeaderSize + consumed_bytes.size();
  size_t poisoned_length = kBufferSizeMax - data_size;
  void* poisoned = &buffer[data_size];

  ASAN_POISON_MEMORY_REGION(poisoned, poisoned_length);

  // We create a database from a std::span of the buffer since the string
  // entries might not be null terminated, and the creation of a database
  // from a raw buffer has an explicit null terminated string requirement
  // specified in the API.
  std::span<uint8_t> data_span(buffer, data_size);
  auto token_database = TokenDatabase::Create<std::span<uint8_t>>(data_span);
  [[maybe_unused]] volatile auto match = token_database.Find(random_token);

  IterateOverDatabase(&token_database);

  // Un-poison for the next iteration.
  ASAN_UNPOISON_MEMORY_REGION(poisoned, poisoned_length);

  return 0;
}

}  // namespace pw::tokenizer
v811_spc009_project 4 months ago			`// Copyright 2020 The Pigweed Authors`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License"); you may not`
			`// use this file except in compliance with the License. You may obtain a copy of`
			`// the License at`
			`//`
			`// https://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`// License for the specific language governing permissions and limitations under`
			`// the License.`

			`// This file implements a basic fuzz test for the TokenDatabase class`
			`// A database is created from fuzz data, and a random entry count (also`
			`// derived from the fuzz data) is set. We then run iterations and 'find'`
			`// operations on this database.`

			`#include <cstring>`
			`#include <span>`

			`#include "pw_fuzzer/asan_interface.h"`
			`#include "pw_fuzzer/fuzzed_data_provider.h"`
			`#include "pw_preprocessor/util.h"`
			`#include "pw_tokenizer/token_database.h"`

			`namespace pw::tokenizer {`
			`namespace {`

			`enum FuzzTestType : uint8_t {`
			`kValidHeader,`
			`kRandomHeader,`
			`kMaxValue = kRandomHeader,`
			`};`

			`constexpr size_t kTokenHeaderSize = 16;`

			`// The default max length in bytes of fuzzed data provided. Note that`
			`// this needs to change if the fuzzer executable is run with a`
			`// '-max_len' argument.`
			`constexpr size_t kFuzzDataSizeMax = 4096;`

			`// Location of the 'EntryCount' field in the token header.`
			`constexpr size_t kEntryCountOffset = 8;`
			`constexpr size_t kEntryCountSize = 4;`

			`void SetTokenEntryCountInBuffer(uint8_t* buffer, uint32_t count) {`
			`memcpy(buffer + kEntryCountOffset, &count, kEntryCountSize);`
			`}`

			`void IterateOverDatabase(TokenDatabase* const database) {`
			`for (TokenDatabase::Entry entry : *database) {`
			`// Since we don't "use" the contents of the entry, we exercise`
			`// the entry by extracting its contents into volatile variables`
			`// to prevent it from being optimized out during compilation.`
			`[[maybe_unused]] volatile const char* entry_string = entry.string;`
			`[[maybe_unused]] volatile uint32_t entry_token = entry.token;`
			`}`
			`}`

			`} // namespace`

			`extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {`
			`constexpr size_t kBufferSizeMax = kFuzzDataSizeMax + kTokenHeaderSize;`
			`constexpr char kDefaultHeader[] = "TOKENS\0\0\0\0\0\0\0\0\0";`
			`static uint8_t buffer[kBufferSizeMax];`

			`if (size > kFuzzDataSizeMax) {`
			`return 0;`
			`}`

			`FuzzedDataProvider provider(data, size);`

			`// Initialize the token header with either a valid or invalid header`
			`// based on a random enum consumed from the fuzz data.`
			`switch (provider.ConsumeEnum<FuzzTestType>()) {`
			`case kValidHeader:`
			`memcpy(buffer, kDefaultHeader, kTokenHeaderSize);`
			`break;`

			`case kRandomHeader: {`
			`std::vector<uint8_t> random_header =`
			`provider.ConsumeBytes<uint8_t>(kTokenHeaderSize);`
			`random_header.resize(kTokenHeaderSize);`
			`memcpy(buffer, &random_header[0], kTokenHeaderSize);`
			`break;`
			`}`
			`}`

			`// Consume a 'test token' integer to look up later in the database.`
			`uint32_t random_token = provider.ConsumeIntegral<uint32_t>();`

			`// Consume a 'token count' integer to set as our database entry count.`
			`uint32_t random_token_count =`
			`provider.ConsumeIntegralInRange<uint32_t>(0, kFuzzDataSizeMax);`

			`// Consume the remaining data. Note that the data corresponding to the`
			`// string entries in the database are not explicitly null-terminated.`
			`// TODO(karthikmb): Once OSS-Fuzz updates to Clang11.0, switch to`
			`// provider.ConsumeData() to avoid extra memory and the memcpy call.`
			`auto consumed_bytes =`
			`provider.ConsumeBytes<uint8_t>(provider.remaining_bytes());`
			`memcpy(buffer + kTokenHeaderSize, &consumed_bytes[0], consumed_bytes.size());`

			`SetTokenEntryCountInBuffer(buffer, random_token_count);`

			`// Poison the unused buffer space for this run of the fuzzer to`
			`// prevent the token database creator from reading too far in.`
			`size_t data_size = kTokenHeaderSize + consumed_bytes.size();`
			`size_t poisoned_length = kBufferSizeMax - data_size;`
			`void* poisoned = &buffer[data_size];`

			`ASAN_POISON_MEMORY_REGION(poisoned, poisoned_length);`

			`// We create a database from a std::span of the buffer since the string`
			`// entries might not be null terminated, and the creation of a database`
			`// from a raw buffer has an explicit null terminated string requirement`
			`// specified in the API.`
			`std::span<uint8_t> data_span(buffer, data_size);`
			`auto token_database = TokenDatabase::Create<std::span<uint8_t>>(data_span);`
			`[[maybe_unused]] volatile auto match = token_database.Find(random_token);`

			`IterateOverDatabase(&token_database);`

			`// Un-poison for the next iteration.`
			`ASAN_UNPOISON_MEMORY_REGION(poisoned, poisoned_length);`

			`return 0;`
			`}`

			`} // namespace pw::tokenizer`