You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
4.8 KiB
142 lines
4.8 KiB
/*
|
|
* Copyright (C) 2015 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <cstdlib>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include <cutils/log.h>
|
|
#include <unicode/utf.h>
|
|
#include <unicode/utf8.h>
|
|
|
|
#include "minikin/U16StringPiece.h"
|
|
|
|
namespace minikin {
|
|
|
|
// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
|
|
// Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
|
|
void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
|
|
size_t* offset) {
|
|
size_t input_ix = 0;
|
|
size_t output_ix = 0;
|
|
bool seen_offset = false;
|
|
|
|
while (src[input_ix] != 0) {
|
|
switch (src[input_ix]) {
|
|
case '\'':
|
|
// single ASCII char
|
|
LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
|
|
input_ix++;
|
|
LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
|
|
LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
|
|
buf[output_ix++] = (uint16_t)src[input_ix++];
|
|
LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
|
|
input_ix++;
|
|
break;
|
|
case 'u':
|
|
case 'U': {
|
|
// Unicode codepoint in hex syntax
|
|
input_ix++;
|
|
LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
|
|
input_ix++;
|
|
char* endptr = (char*)src + input_ix;
|
|
unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
|
|
size_t num_hex_digits = endptr - (src + input_ix);
|
|
|
|
// also triggers on invalid number syntax, digits = 0
|
|
LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
|
|
LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
|
|
LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
|
|
input_ix += num_hex_digits;
|
|
if (U16_LENGTH(codepoint) == 1) {
|
|
LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
|
|
buf[output_ix++] = codepoint;
|
|
} else {
|
|
// UTF-16 encoding
|
|
LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
|
|
buf[output_ix++] = U16_LEAD(codepoint);
|
|
buf[output_ix++] = U16_TRAIL(codepoint);
|
|
}
|
|
break;
|
|
}
|
|
case ' ':
|
|
input_ix++;
|
|
break;
|
|
case '|':
|
|
LOG_ALWAYS_FATAL_IF(seen_offset);
|
|
LOG_ALWAYS_FATAL_IF(offset == nullptr);
|
|
*offset = output_ix;
|
|
seen_offset = true;
|
|
input_ix++;
|
|
break;
|
|
default:
|
|
LOG_ALWAYS_FATAL("Unexpected Character");
|
|
}
|
|
}
|
|
LOG_ALWAYS_FATAL_IF(result_size == nullptr);
|
|
*result_size = output_ix;
|
|
LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
|
|
}
|
|
|
|
std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
|
|
std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
|
|
size_t result_size = 0;
|
|
ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
|
|
return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
|
|
}
|
|
|
|
std::vector<uint16_t> parseUnicodeString(const std::string& in) {
|
|
return parseUnicodeStringWithOffset(in, nullptr);
|
|
}
|
|
|
|
std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
|
|
std::vector<uint16_t> result;
|
|
int32_t i = 0;
|
|
const int32_t textLength = static_cast<int32_t>(text.size());
|
|
uint32_t c = 0;
|
|
while (i < textLength) {
|
|
U8_NEXT(text.c_str(), i, textLength, c);
|
|
if (U16_LENGTH(c) == 1) {
|
|
result.push_back(c);
|
|
} else {
|
|
result.push_back(U16_LEAD(c));
|
|
result.push_back(U16_TRAIL(c));
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
std::string utf16ToUtf8(const U16StringPiece& u16String) {
|
|
const uint32_t textLength = u16String.size();
|
|
uint32_t i = 0;
|
|
uint32_t c = 0;
|
|
|
|
std::string out;
|
|
out.reserve(textLength * 4);
|
|
|
|
while (i < textLength) {
|
|
U16_NEXT(u16String.data(), i, textLength, c);
|
|
|
|
char buf[U8_MAX_LENGTH] = {};
|
|
uint32_t outIndex = 0;
|
|
U8_APPEND_UNSAFE(buf, outIndex, c);
|
|
out.append(buf, outIndex);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
} // namespace minikin
|