/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "java_lang_StringFactory.h" #include "common_throws.h" #include "handle_scope-inl.h" #include "jni/jni_internal.h" #include "mirror/object-inl.h" #include "mirror/string-alloc-inl.h" #include "native_util.h" #include "nativehelper/jni_macros.h" #include "nativehelper/scoped_local_ref.h" #include "nativehelper/scoped_primitive_array.h" #include "scoped_fast_native_object_access-inl.h" #include "scoped_thread_state_change-inl.h" namespace art { static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data, jint high, jint offset, jint byte_count) { ScopedFastNativeObjectAccess soa(env); if (UNLIKELY(java_data == nullptr)) { ThrowNullPointerException("data == null"); return nullptr; } StackHandleScope<1> hs(soa.Self()); Handle byte_array(hs.NewHandle(soa.Decode(java_data))); int32_t data_size = byte_array->GetLength(); if ((offset | byte_count) < 0 || byte_count > data_size - offset) { soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;", "length=%d; regionStart=%d; regionLength=%d", data_size, offset, byte_count); return nullptr; } gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); ObjPtr result = mirror::String::AllocFromByteArray(soa.Self(), byte_count, byte_array, offset, high, allocator_type); return soa.AddLocalReference(result); } // The char array passed as `java_data` must not be a null reference. static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset, jint char_count, jcharArray java_data) { DCHECK(java_data != nullptr); ScopedFastNativeObjectAccess soa(env); StackHandleScope<1> hs(soa.Self()); Handle char_array(hs.NewHandle(soa.Decode(java_data))); gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); ObjPtr result = mirror::String::AllocFromCharArray(soa.Self(), char_count, char_array, offset, allocator_type); return soa.AddLocalReference(result); } static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) { ScopedFastNativeObjectAccess soa(env); if (UNLIKELY(to_copy == nullptr)) { ThrowNullPointerException("toCopy == null"); return nullptr; } StackHandleScope<1> hs(soa.Self()); Handle string(hs.NewHandle(soa.Decode(to_copy))); gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); ObjPtr result = mirror::String::AllocFromString(soa.Self(), string->GetLength(), string, /*offset=*/ 0, allocator_type); return soa.AddLocalReference(result); } static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data, jint offset, jint byte_count) { // Local Define in here static const jchar kReplacementChar = 0xfffd; static const int kDefaultBufferSize = 256; static const int kTableUtf8Needed[] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff }; ScopedFastNativeObjectAccess soa(env); if (UNLIKELY(java_data == nullptr)) { ThrowNullPointerException("data == null"); return nullptr; } StackHandleScope<1> hs(soa.Self()); Handle byte_array(hs.NewHandle(soa.Decode(java_data))); int32_t data_size = byte_array->GetLength(); if ((offset | byte_count) < 0 || byte_count > data_size - offset) { soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;", "length=%d; regionStart=%d; regionLength=%d", data_size, offset, byte_count); return nullptr; } /* * This code converts a UTF-8 byte sequence to a Java String (UTF-16). * It implements the W3C recommended UTF-8 decoder. * https://www.w3.org/TR/encoding/#utf-8-decoder * * Unicode 3.2 Well-Formed UTF-8 Byte Sequences * Code Points First Second Third Fourth * U+0000..U+007F 00..7F * U+0080..U+07FF C2..DF 80..BF * U+0800..U+0FFF E0 A0..BF 80..BF * U+1000..U+CFFF E1..EC 80..BF 80..BF * U+D000..U+D7FF ED 80..9F 80..BF * U+E000..U+FFFF EE..EF 80..BF 80..BF * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF * * Please refer to Unicode as the authority. * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf * * Handling Malformed Input * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is * the longest code unit subsequence starting at an unconvertible offset that is either * 1) the initial subsequence of a well-formed code unit sequence, or * 2) a subsequence of length one: * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix * of a valid sequence, and with the conversion to restart after the incomplete sequence. * * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80", * but "C0" can't be the initial subsequence of any well-formed code unit sequence. * Thus, the output should be "A\ufffd\ufffdA\ufffdA". * * Please refer to section "Best Practices for Using U+FFFD." in * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf */ // Initial value jchar stack_buffer[kDefaultBufferSize]; std::unique_ptr allocated_buffer; jchar* v; if (byte_count <= kDefaultBufferSize) { v = stack_buffer; } else { allocated_buffer.reset(new jchar[byte_count]); v = allocated_buffer.get(); } jbyte* d = byte_array->GetData(); DCHECK(d != nullptr); int idx = offset; int last = offset + byte_count; int s = 0; int code_point = 0; int utf8_bytes_seen = 0; int utf8_bytes_needed = 0; int lower_bound = 0x80; int upper_bound = 0xbf; while (idx < last) { int b = d[idx++] & 0xff; if (utf8_bytes_needed == 0) { if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx v[s++] = (jchar) b; continue; } if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte v[s++] = kReplacementChar; continue; } // 11xxxxxx int tableLookupIndex = b & 0x3f; utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex]; if (utf8_bytes_needed == 0) { v[s++] = kReplacementChar; continue; } // utf8_bytes_needed // 1: b & 0x1f // 2: b & 0x0f // 3: b & 0x07 code_point = b & (0x3f >> utf8_bytes_needed); if (b == 0xe0) { lower_bound = 0xa0; } else if (b == 0xed) { upper_bound = 0x9f; } else if (b == 0xf0) { lower_bound = 0x90; } else if (b == 0xf4) { upper_bound = 0x8f; } } else { if (b < lower_bound || b > upper_bound) { // The bytes seen are ill-formed. Substitute them with U+FFFD v[s++] = kReplacementChar; code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; lower_bound = 0x80; upper_bound = 0xbf; /* * According to the Unicode Standard, * "a UTF-8 conversion process is required to never consume well-formed * subsequences as part of its error handling for ill-formed subsequences" * The current byte could be part of well-formed subsequences. Reduce the * index by 1 to parse it in next loop. */ idx--; continue; } lower_bound = 0x80; upper_bound = 0xbf; code_point = (code_point << 6) | (b & 0x3f); utf8_bytes_seen++; if (utf8_bytes_needed != utf8_bytes_seen) { continue; } // Encode chars from U+10000 up as surrogate pairs if (code_point < 0x10000) { v[s++] = (jchar) code_point; } else { v[s++] = (jchar) ((code_point >> 10) + 0xd7c0); v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00); } utf8_bytes_seen = 0; utf8_bytes_needed = 0; code_point = 0; } } // The bytes seen are ill-formed. Substitute them by U+FFFD if (utf8_bytes_needed != 0) { v[s++] = kReplacementChar; } ObjPtr result = mirror::String::AllocFromUtf16(soa.Self(), s, v); return soa.AddLocalReference(result); } static JNINativeMethod gMethods[] = { FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"), FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"), FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"), FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"), }; void register_java_lang_StringFactory(JNIEnv* env) { REGISTER_NATIVE_METHODS("java/lang/StringFactory"); } } // namespace art