You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
150 lines
4.3 KiB
150 lines
4.3 KiB
/*
|
|
* wchar_t helpers, version CPython >= 3.3.
|
|
*
|
|
* CPython 3.3 added support for sys.maxunicode == 0x10FFFF on all
|
|
* platforms, even ones with wchar_t limited to 2 bytes. As such,
|
|
* this code here works from the outside like wchar_helper.h in the
|
|
* case Py_UNICODE_SIZE == 4, but the implementation is very different.
|
|
*/
|
|
|
|
typedef uint16_t cffi_char16_t;
|
|
typedef uint32_t cffi_char32_t;
|
|
|
|
|
|
static PyObject *
|
|
_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
|
|
{
|
|
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, w, size);
|
|
}
|
|
|
|
static PyObject *
|
|
_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
|
|
{
|
|
/* are there any surrogate pairs, and if so, how many? */
|
|
Py_ssize_t i, count_surrogates = 0;
|
|
for (i = 0; i < size - 1; i++) {
|
|
if (0xD800 <= w[i] && w[i] <= 0xDBFF &&
|
|
0xDC00 <= w[i+1] && w[i+1] <= 0xDFFF)
|
|
count_surrogates++;
|
|
}
|
|
if (count_surrogates == 0) {
|
|
/* no, fast path */
|
|
return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, w, size);
|
|
}
|
|
else
|
|
{
|
|
PyObject *result = PyUnicode_New(size - count_surrogates, 0x10FFFF);
|
|
Py_UCS4 *data;
|
|
assert(PyUnicode_KIND(result) == PyUnicode_4BYTE_KIND);
|
|
data = PyUnicode_4BYTE_DATA(result);
|
|
|
|
for (i = 0; i < size; i++)
|
|
{
|
|
cffi_char32_t ch = w[i];
|
|
if (0xD800 <= ch && ch <= 0xDBFF && i < size - 1) {
|
|
cffi_char32_t ch2 = w[i + 1];
|
|
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
|
|
ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
|
|
i++;
|
|
}
|
|
}
|
|
*data++ = ch;
|
|
}
|
|
return result;
|
|
}
|
|
}
|
|
|
|
static int
|
|
_my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
|
|
char *err_got)
|
|
{
|
|
cffi_char32_t ch;
|
|
if (PyUnicode_GET_LENGTH(unicode) != 1) {
|
|
sprintf(err_got, "unicode string of length %zd",
|
|
PyUnicode_GET_LENGTH(unicode));
|
|
return -1;
|
|
}
|
|
ch = PyUnicode_READ_CHAR(unicode, 0);
|
|
|
|
if (ch > 0xFFFF)
|
|
{
|
|
sprintf(err_got, "larger-than-0xFFFF character");
|
|
return -1;
|
|
}
|
|
*result = (cffi_char16_t)ch;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
_my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
|
|
char *err_got)
|
|
{
|
|
if (PyUnicode_GET_LENGTH(unicode) != 1) {
|
|
sprintf(err_got, "unicode string of length %zd",
|
|
PyUnicode_GET_LENGTH(unicode));
|
|
return -1;
|
|
}
|
|
*result = PyUnicode_READ_CHAR(unicode, 0);
|
|
return 0;
|
|
}
|
|
|
|
static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
|
|
{
|
|
Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
|
|
Py_ssize_t result = length;
|
|
unsigned int kind = PyUnicode_KIND(unicode);
|
|
|
|
if (kind == PyUnicode_4BYTE_KIND)
|
|
{
|
|
Py_UCS4 *data = PyUnicode_4BYTE_DATA(unicode);
|
|
Py_ssize_t i;
|
|
for (i = 0; i < length; i++) {
|
|
if (data[i] > 0xFFFF)
|
|
result++;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
|
|
{
|
|
return PyUnicode_GET_LENGTH(unicode);
|
|
}
|
|
|
|
static int _my_PyUnicode_AsChar16(PyObject *unicode,
|
|
cffi_char16_t *result,
|
|
Py_ssize_t resultlen)
|
|
{
|
|
Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
|
|
unsigned int kind = PyUnicode_KIND(unicode);
|
|
void *data = PyUnicode_DATA(unicode);
|
|
Py_ssize_t i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
cffi_char32_t ordinal = PyUnicode_READ(kind, data, i);
|
|
if (ordinal > 0xFFFF) {
|
|
if (ordinal > 0x10FFFF) {
|
|
PyErr_Format(PyExc_ValueError,
|
|
"unicode character out of range for "
|
|
"conversion to char16_t: 0x%x", (int)ordinal);
|
|
return -1;
|
|
}
|
|
ordinal -= 0x10000;
|
|
*result++ = 0xD800 | (ordinal >> 10);
|
|
*result++ = 0xDC00 | (ordinal & 0x3FF);
|
|
}
|
|
else
|
|
*result++ = ordinal;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int _my_PyUnicode_AsChar32(PyObject *unicode,
|
|
cffi_char32_t *result,
|
|
Py_ssize_t resultlen)
|
|
{
|
|
if (PyUnicode_AsUCS4(unicode, (Py_UCS4 *)result, resultlen, 0) == NULL)
|
|
return -1;
|
|
return 0;
|
|
}
|