You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
279 lines
11 KiB
279 lines
11 KiB
/*
|
|
* Copyright (C) 2008 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "java_lang_StringFactory.h"
|
|
|
|
#include "common_throws.h"
|
|
#include "handle_scope-inl.h"
|
|
#include "jni/jni_internal.h"
|
|
#include "mirror/object-inl.h"
|
|
#include "mirror/string-alloc-inl.h"
|
|
#include "native_util.h"
|
|
#include "nativehelper/jni_macros.h"
|
|
#include "nativehelper/scoped_local_ref.h"
|
|
#include "nativehelper/scoped_primitive_array.h"
|
|
#include "scoped_fast_native_object_access-inl.h"
|
|
#include "scoped_thread_state_change-inl.h"
|
|
|
|
namespace art {
|
|
|
|
static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
|
|
jint high, jint offset, jint byte_count) {
|
|
ScopedFastNativeObjectAccess soa(env);
|
|
if (UNLIKELY(java_data == nullptr)) {
|
|
ThrowNullPointerException("data == null");
|
|
return nullptr;
|
|
}
|
|
StackHandleScope<1> hs(soa.Self());
|
|
Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
|
|
int32_t data_size = byte_array->GetLength();
|
|
if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
|
|
soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
|
|
"length=%d; regionStart=%d; regionLength=%d", data_size,
|
|
offset, byte_count);
|
|
return nullptr;
|
|
}
|
|
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
|
|
ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
|
|
byte_count,
|
|
byte_array,
|
|
offset,
|
|
high,
|
|
allocator_type);
|
|
return soa.AddLocalReference<jstring>(result);
|
|
}
|
|
|
|
// The char array passed as `java_data` must not be a null reference.
|
|
static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
|
|
jint char_count, jcharArray java_data) {
|
|
DCHECK(java_data != nullptr);
|
|
ScopedFastNativeObjectAccess soa(env);
|
|
StackHandleScope<1> hs(soa.Self());
|
|
Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
|
|
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
|
|
ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
|
|
char_count,
|
|
char_array,
|
|
offset,
|
|
allocator_type);
|
|
return soa.AddLocalReference<jstring>(result);
|
|
}
|
|
|
|
static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
|
|
ScopedFastNativeObjectAccess soa(env);
|
|
if (UNLIKELY(to_copy == nullptr)) {
|
|
ThrowNullPointerException("toCopy == null");
|
|
return nullptr;
|
|
}
|
|
StackHandleScope<1> hs(soa.Self());
|
|
Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
|
|
gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
|
|
ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
|
|
string->GetLength(),
|
|
string,
|
|
/*offset=*/ 0,
|
|
allocator_type);
|
|
return soa.AddLocalReference<jstring>(result);
|
|
}
|
|
|
|
static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
|
|
jint offset, jint byte_count) {
|
|
// Local Define in here
|
|
static const jchar kReplacementChar = 0xfffd;
|
|
static const int kDefaultBufferSize = 256;
|
|
static const int kTableUtf8Needed[] = {
|
|
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
|
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef
|
|
3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
|
|
};
|
|
|
|
ScopedFastNativeObjectAccess soa(env);
|
|
if (UNLIKELY(java_data == nullptr)) {
|
|
ThrowNullPointerException("data == null");
|
|
return nullptr;
|
|
}
|
|
|
|
StackHandleScope<1> hs(soa.Self());
|
|
Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
|
|
int32_t data_size = byte_array->GetLength();
|
|
if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
|
|
soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
|
|
"length=%d; regionStart=%d; regionLength=%d", data_size,
|
|
offset, byte_count);
|
|
return nullptr;
|
|
}
|
|
|
|
/*
|
|
* This code converts a UTF-8 byte sequence to a Java String (UTF-16).
|
|
* It implements the W3C recommended UTF-8 decoder.
|
|
* https://www.w3.org/TR/encoding/#utf-8-decoder
|
|
*
|
|
* Unicode 3.2 Well-Formed UTF-8 Byte Sequences
|
|
* Code Points First Second Third Fourth
|
|
* U+0000..U+007F 00..7F
|
|
* U+0080..U+07FF C2..DF 80..BF
|
|
* U+0800..U+0FFF E0 A0..BF 80..BF
|
|
* U+1000..U+CFFF E1..EC 80..BF 80..BF
|
|
* U+D000..U+D7FF ED 80..9F 80..BF
|
|
* U+E000..U+FFFF EE..EF 80..BF 80..BF
|
|
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
|
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
|
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
|
*
|
|
* Please refer to Unicode as the authority.
|
|
* p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
|
|
*
|
|
* Handling Malformed Input
|
|
* The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
|
|
* the longest code unit subsequence starting at an unconvertible offset that is either
|
|
* 1) the initial subsequence of a well-formed code unit sequence, or
|
|
* 2) a subsequence of length one:
|
|
* One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
|
|
* of a valid sequence, and with the conversion to restart after the incomplete sequence.
|
|
*
|
|
* For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
|
|
* "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
|
|
* but "C0" can't be the initial subsequence of any well-formed code unit sequence.
|
|
* Thus, the output should be "A\ufffd\ufffdA\ufffdA".
|
|
*
|
|
* Please refer to section "Best Practices for Using U+FFFD." in
|
|
* http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
|
|
*/
|
|
|
|
// Initial value
|
|
jchar stack_buffer[kDefaultBufferSize];
|
|
std::unique_ptr<jchar[]> allocated_buffer;
|
|
jchar* v;
|
|
if (byte_count <= kDefaultBufferSize) {
|
|
v = stack_buffer;
|
|
} else {
|
|
allocated_buffer.reset(new jchar[byte_count]);
|
|
v = allocated_buffer.get();
|
|
}
|
|
|
|
jbyte* d = byte_array->GetData();
|
|
DCHECK(d != nullptr);
|
|
|
|
int idx = offset;
|
|
int last = offset + byte_count;
|
|
int s = 0;
|
|
|
|
int code_point = 0;
|
|
int utf8_bytes_seen = 0;
|
|
int utf8_bytes_needed = 0;
|
|
int lower_bound = 0x80;
|
|
int upper_bound = 0xbf;
|
|
while (idx < last) {
|
|
int b = d[idx++] & 0xff;
|
|
if (utf8_bytes_needed == 0) {
|
|
if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx
|
|
v[s++] = (jchar) b;
|
|
continue;
|
|
}
|
|
|
|
if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte
|
|
v[s++] = kReplacementChar;
|
|
continue;
|
|
}
|
|
|
|
// 11xxxxxx
|
|
int tableLookupIndex = b & 0x3f;
|
|
utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
|
|
if (utf8_bytes_needed == 0) {
|
|
v[s++] = kReplacementChar;
|
|
continue;
|
|
}
|
|
|
|
// utf8_bytes_needed
|
|
// 1: b & 0x1f
|
|
// 2: b & 0x0f
|
|
// 3: b & 0x07
|
|
code_point = b & (0x3f >> utf8_bytes_needed);
|
|
if (b == 0xe0) {
|
|
lower_bound = 0xa0;
|
|
} else if (b == 0xed) {
|
|
upper_bound = 0x9f;
|
|
} else if (b == 0xf0) {
|
|
lower_bound = 0x90;
|
|
} else if (b == 0xf4) {
|
|
upper_bound = 0x8f;
|
|
}
|
|
} else {
|
|
if (b < lower_bound || b > upper_bound) {
|
|
// The bytes seen are ill-formed. Substitute them with U+FFFD
|
|
v[s++] = kReplacementChar;
|
|
code_point = 0;
|
|
utf8_bytes_needed = 0;
|
|
utf8_bytes_seen = 0;
|
|
lower_bound = 0x80;
|
|
upper_bound = 0xbf;
|
|
/*
|
|
* According to the Unicode Standard,
|
|
* "a UTF-8 conversion process is required to never consume well-formed
|
|
* subsequences as part of its error handling for ill-formed subsequences"
|
|
* The current byte could be part of well-formed subsequences. Reduce the
|
|
* index by 1 to parse it in next loop.
|
|
*/
|
|
idx--;
|
|
continue;
|
|
}
|
|
|
|
lower_bound = 0x80;
|
|
upper_bound = 0xbf;
|
|
code_point = (code_point << 6) | (b & 0x3f);
|
|
utf8_bytes_seen++;
|
|
if (utf8_bytes_needed != utf8_bytes_seen) {
|
|
continue;
|
|
}
|
|
|
|
// Encode chars from U+10000 up as surrogate pairs
|
|
if (code_point < 0x10000) {
|
|
v[s++] = (jchar) code_point;
|
|
} else {
|
|
v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
|
|
v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
|
|
}
|
|
|
|
utf8_bytes_seen = 0;
|
|
utf8_bytes_needed = 0;
|
|
code_point = 0;
|
|
}
|
|
}
|
|
|
|
// The bytes seen are ill-formed. Substitute them by U+FFFD
|
|
if (utf8_bytes_needed != 0) {
|
|
v[s++] = kReplacementChar;
|
|
}
|
|
|
|
ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
|
|
return soa.AddLocalReference<jstring>(result);
|
|
}
|
|
|
|
static JNINativeMethod gMethods[] = {
|
|
FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
|
|
FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
|
|
FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
|
|
FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
|
|
};
|
|
|
|
void register_java_lang_StringFactory(JNIEnv* env) {
|
|
REGISTER_NATIVE_METHODS("java/lang/StringFactory");
|
|
}
|
|
|
|
} // namespace art
|