v811_spc009/external/libtextclassifier/native/annotator/grammar/utils.cc

/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "annotator/grammar/utils.h"

#include "utils/grammar/utils/rules.h"

namespace libtextclassifier3 {
namespace {

using ::libtextclassifier3::GrammarModel_::RuleClassificationResultT;

}  // namespace

Tokenizer BuildTokenizer(const UniLib* unilib,
                         const GrammarTokenizerOptions* options) {
  TC3_CHECK(options != nullptr);

  std::vector<const TokenizationCodepointRange*> codepoint_config;
  if (options->tokenization_codepoint_config() != nullptr) {
    codepoint_config.insert(codepoint_config.end(),
                            options->tokenization_codepoint_config()->begin(),
                            options->tokenization_codepoint_config()->end());
  }
  std::vector<const CodepointRange*> internal_codepoint_config;
  if (options->internal_tokenizer_codepoint_ranges() != nullptr) {
    internal_codepoint_config.insert(
        internal_codepoint_config.end(),
        options->internal_tokenizer_codepoint_ranges()->begin(),
        options->internal_tokenizer_codepoint_ranges()->end());
  }

  const bool tokenize_on_script_change =
      options->tokenization_codepoint_config() != nullptr &&
      options->tokenize_on_script_change();
  return Tokenizer(options->tokenization_type(), unilib, codepoint_config,
                   internal_codepoint_config, tokenize_on_script_change,
                   /*icu_preserve_whitespace_tokens=*/false);
}

int AddRuleClassificationResult(const std::string& collection,
                                const ModeFlag& enabled_modes,
                                float priority_score, GrammarModelT* model) {
  const int result_id = model->rule_classification_result.size();
  model->rule_classification_result.emplace_back(new RuleClassificationResultT);
  RuleClassificationResultT* result =
      model->rule_classification_result.back().get();
  result->collection_name = collection;
  result->enabled_modes = enabled_modes;
  result->priority_score = priority_score;
  return result_id;
}

}  // namespace libtextclassifier3
v811_spc009_project 4 months ago			`/*`
			`* Copyright (C) 2018 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#include "annotator/grammar/utils.h"`

			`#include "utils/grammar/utils/rules.h"`

			`namespace libtextclassifier3 {`
			`namespace {`

			`using ::libtextclassifier3::GrammarModel_::RuleClassificationResultT;`

			`} // namespace`

			`Tokenizer BuildTokenizer(const UniLib* unilib,`
			`const GrammarTokenizerOptions* options) {`
			`TC3_CHECK(options != nullptr);`

			`std::vector<const TokenizationCodepointRange*> codepoint_config;`
			`if (options->tokenization_codepoint_config() != nullptr) {`
			`codepoint_config.insert(codepoint_config.end(),`
			`options->tokenization_codepoint_config()->begin(),`
			`options->tokenization_codepoint_config()->end());`
			`}`
			`std::vector<const CodepointRange*> internal_codepoint_config;`
			`if (options->internal_tokenizer_codepoint_ranges() != nullptr) {`
			`internal_codepoint_config.insert(`
			`internal_codepoint_config.end(),`
			`options->internal_tokenizer_codepoint_ranges()->begin(),`
			`options->internal_tokenizer_codepoint_ranges()->end());`
			`}`

			`const bool tokenize_on_script_change =`
			`options->tokenization_codepoint_config() != nullptr &&`
			`options->tokenize_on_script_change();`
			`return Tokenizer(options->tokenization_type(), unilib, codepoint_config,`
			`internal_codepoint_config, tokenize_on_script_change,`
			`/icu_preserve_whitespace_tokens=/false);`
			`}`

			`int AddRuleClassificationResult(const std::string& collection,`
			`const ModeFlag& enabled_modes,`
			`float priority_score, GrammarModelT* model) {`
			`const int result_id = model->rule_classification_result.size();`
			`model->rule_classification_result.emplace_back(new RuleClassificationResultT);`
			`RuleClassificationResultT* result =`
			`model->rule_classification_result.back().get();`
			`result->collection_name = collection;`
			`result->enabled_modes = enabled_modes;`
			`result->priority_score = priority_score;`
			`return result_id;`
			`}`

			`} // namespace libtextclassifier3`