You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

373 lines
13 KiB

/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "compile/PseudolocaleGenerator.h"
#include <algorithm>
#include "ResourceTable.h"
#include "ResourceValues.h"
#include "ValueVisitor.h"
#include "compile/Pseudolocalizer.h"
#include "util/Util.h"
using ::android::ConfigDescription;
using ::android::StringPiece;
using ::android::StringPiece16;
namespace aapt {
// The struct that represents both Span objects and UntranslatableSections.
struct UnifiedSpan {
// Only present for Span objects. If not present, this was an UntranslatableSection.
Maybe<std::string> tag;
// The UTF-16 index into the string where this span starts.
uint32_t first_char;
// The UTF-16 index into the string where this span ends, inclusive.
uint32_t last_char;
};
inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
if (left.first_char < right.first_char) {
return true;
} else if (left.first_char > right.first_char) {
return false;
} else if (left.last_char < right.last_char) {
return true;
}
return false;
}
inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
return UnifiedSpan{*span.name, span.first_char, span.last_char};
}
inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
return UnifiedSpan{
{}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
}
// Merges the Span and UntranslatableSections of this StyledString into a single vector of
// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
// Ensure the Spans are sorted and converted.
std::vector<UnifiedSpan> sorted_spans;
sorted_spans.reserve(string.value->spans.size());
std::transform(string.value->spans.begin(), string.value->spans.end(),
std::back_inserter(sorted_spans), SpanToUnifiedSpan);
// Stable sort to ensure tag sequences like "<b><i>" are preserved.
std::stable_sort(sorted_spans.begin(), sorted_spans.end());
// Ensure the UntranslatableSections are sorted and converted.
std::vector<UnifiedSpan> sorted_untranslatable_sections;
sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
std::back_inserter(sorted_untranslatable_sections),
UntranslatableSectionToUnifiedSpan);
std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
std::vector<UnifiedSpan> merged_spans;
merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
auto span_iter = sorted_spans.begin();
auto untranslatable_iter = sorted_untranslatable_sections.begin();
while (span_iter != sorted_spans.end() &&
untranslatable_iter != sorted_untranslatable_sections.end()) {
if (*span_iter < *untranslatable_iter) {
merged_spans.push_back(std::move(*span_iter));
++span_iter;
} else {
merged_spans.push_back(std::move(*untranslatable_iter));
++untranslatable_iter;
}
}
while (span_iter != sorted_spans.end()) {
merged_spans.push_back(std::move(*span_iter));
++span_iter;
}
while (untranslatable_iter != sorted_untranslatable_sections.end()) {
merged_spans.push_back(std::move(*untranslatable_iter));
++untranslatable_iter;
}
return merged_spans;
}
std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
Pseudolocalizer::Method method,
StringPool* pool) {
Pseudolocalizer localizer(method);
// Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
// This will effectively subdivide the string into multiple sections that can be individually
// pseudolocalized, while keeping the span indices synchronized.
std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
// All Span indices are UTF-16 based, according to the resources.arsc format expected by the
// runtime. So we will do all our processing in UTF-16, then convert back.
const std::u16string text16 = util::Utf8ToUtf16(string->value->value);
// Convenient wrapper around the text that allows us to work with StringPieces.
const StringPiece16 text(text16);
// The new string.
std::string new_string = localizer.Start();
// The stack that keeps track of what nested Span we're in.
std::vector<size_t> span_stack;
// The current position in the original text.
uint32_t cursor = 0u;
// The current position in the new text.
uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
new_string.size(), false);
// We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
bool translatable = true;
size_t span_idx = 0u;
while (span_idx < merged_spans.size() || !span_stack.empty()) {
UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
if (span != nullptr) {
if (parent_span == nullptr || parent_span->last_char > span->first_char) {
// There is no parent, or this span is the child of the parent.
// Pseudolocalize all the text until this span.
const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
cursor += substr.size();
// Pseudolocalize the substring.
std::string new_substr = util::Utf16ToUtf8(substr);
if (translatable) {
new_substr = localizer.Text(new_substr);
}
new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
new_substr.size(), false);
new_string += new_substr;
// Rewrite the first_char.
span->first_char = new_cursor;
if (!span->tag) {
// An untranslatable section has begun!
translatable = false;
}
span_stack.push_back(span_idx);
++span_idx;
continue;
}
}
if (parent_span != nullptr) {
// There is a parent, and either this span is not a child of it, or there are no more spans.
// Pop this off the stack.
const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
cursor += substr.size();
// Pseudolocalize the substring.
std::string new_substr = util::Utf16ToUtf8(substr);
if (translatable) {
new_substr = localizer.Text(new_substr);
}
new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
new_substr.size(), false);
new_string += new_substr;
parent_span->last_char = new_cursor - 1;
if (parent_span->tag) {
// An end to an untranslatable section.
translatable = true;
}
span_stack.pop_back();
}
}
// Finish the pseudolocalization at the end of the string.
new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
new_string += localizer.End();
StyleString localized;
localized.str = std::move(new_string);
// Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
for (UnifiedSpan& span : merged_spans) {
if (span.tag) {
localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
}
}
return util::make_unique<StyledString>(pool->MakeRef(localized));
}
namespace {
class Visitor : public ValueVisitor {
public:
// Either value or item will be populated upon visiting the value.
std::unique_ptr<Value> value;
std::unique_ptr<Item> item;
Visitor(StringPool* pool, Pseudolocalizer::Method method)
: pool_(pool), method_(method), localizer_(method) {}
void Visit(Plural* plural) override {
CloningValueTransformer cloner(pool_);
std::unique_ptr<Plural> localized = util::make_unique<Plural>();
for (size_t i = 0; i < plural->values.size(); i++) {
Visitor sub_visitor(pool_, method_);
if (plural->values[i]) {
plural->values[i]->Accept(&sub_visitor);
if (sub_visitor.item) {
localized->values[i] = std::move(sub_visitor.item);
} else {
localized->values[i] = plural->values[i]->Transform(cloner);
}
}
}
localized->SetSource(plural->GetSource());
localized->SetWeak(true);
value = std::move(localized);
}
void Visit(String* string) override {
const StringPiece original_string = *string->value;
std::string result = localizer_.Start();
// Pseudolocalize only the translatable sections.
size_t start = 0u;
for (const UntranslatableSection& section : string->untranslatable_sections) {
// Pseudolocalize the content before the untranslatable section.
const size_t len = section.start - start;
if (len > 0u) {
result += localizer_.Text(original_string.substr(start, len));
}
// Copy the untranslatable content.
result += original_string.substr(section.start, section.end - section.start);
start = section.end;
}
// Pseudolocalize the content after the last untranslatable section.
if (start != original_string.size()) {
const size_t len = original_string.size() - start;
result += localizer_.Text(original_string.substr(start, len));
}
result += localizer_.End();
std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
localized->SetSource(string->GetSource());
localized->SetWeak(true);
item = std::move(localized);
}
void Visit(StyledString* string) override {
item = PseudolocalizeStyledString(string, method_, pool_);
item->SetSource(string->GetSource());
item->SetWeak(true);
}
private:
DISALLOW_COPY_AND_ASSIGN(Visitor);
StringPool* pool_;
Pseudolocalizer::Method method_;
Pseudolocalizer localizer_;
};
ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
Pseudolocalizer::Method m) {
ConfigDescription modified = base;
switch (m) {
case Pseudolocalizer::Method::kAccent:
modified.language[0] = 'e';
modified.language[1] = 'n';
modified.country[0] = 'X';
modified.country[1] = 'A';
break;
case Pseudolocalizer::Method::kBidi:
modified.language[0] = 'a';
modified.language[1] = 'r';
modified.country[0] = 'X';
modified.country[1] = 'B';
break;
default:
break;
}
return modified;
}
void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
ResourceConfigValue* original_value,
StringPool* pool, ResourceEntry* entry) {
Visitor visitor(pool, method);
original_value->value->Accept(&visitor);
std::unique_ptr<Value> localized_value;
if (visitor.value) {
localized_value = std::move(visitor.value);
} else if (visitor.item) {
localized_value = std::move(visitor.item);
}
if (!localized_value) {
return;
}
ConfigDescription config_with_accent =
ModifyConfigForPseudoLocale(original_value->config, method);
ResourceConfigValue* new_config_value =
entry->FindOrCreateValue(config_with_accent, original_value->product);
if (!new_config_value->value) {
// Only use auto-generated pseudo-localization if none is defined.
new_config_value->value = std::move(localized_value);
}
}
// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
// translatable.
static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
if (diff & ConfigDescription::CONFIG_LOCALE) {
return false;
}
return config_value->value->IsTranslatable();
}
} // namespace
bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
for (auto& package : table->packages) {
for (auto& type : package->types) {
for (auto& entry : type->entries) {
std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
for (ResourceConfigValue* value : values) {
PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
entry.get());
PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
entry.get());
}
}
}
}
return true;
}
} // namespace aapt