You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
183 lines
3.6 KiB
183 lines
3.6 KiB
/* Copyright 2016 The Chromium OS Authors. All rights reserved.
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
* found in the LICENSE file.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <sys/types.h>
|
|
|
|
#ifdef CRAS_DBUS
|
|
#include <dbus/dbus.h>
|
|
#endif
|
|
|
|
#include "cras_utf8.h"
|
|
#include "cras_util.h"
|
|
|
|
static const uint8_t kUTF8ByteOrderMask[3] = { 0xef, 0xbb, 0xbf };
|
|
|
|
typedef struct u8range {
|
|
uint8_t min;
|
|
uint8_t max;
|
|
} u8range_t;
|
|
|
|
static const u8range_t kUTF8TwoByteSeq[] = {
|
|
{ 0xc2, 0xdf },
|
|
{ 0x80, 0xbf },
|
|
{ 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqE0[] = {
|
|
{ 0xe0, 0xe0 },
|
|
{ 0xa0, 0xbf },
|
|
{ 0x80, 0xbf },
|
|
{ 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqE1EC[] = {
|
|
{ 0xe1, 0xec },
|
|
{ 0x80, 0xbf },
|
|
{ 0x80, 0xbf },
|
|
{ 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqED[] = {
|
|
{ 0xed, 0xed },
|
|
{ 0x80, 0x9f },
|
|
{ 0x80, 0xbf },
|
|
{ 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqEEEF[] = {
|
|
{ 0xee, 0xef },
|
|
{ 0x80, 0xbf },
|
|
{ 0x80, 0xbf },
|
|
{ 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqF0[] = {
|
|
{ 0xf0, 0xf0 }, { 0x90, 0xbf }, { 0x80, 0xbf },
|
|
{ 0x80, 0xbf }, { 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqF1F3[] = {
|
|
{ 0xf1, 0xf3 }, { 0x80, 0xbf }, { 0x80, 0xbf },
|
|
{ 0x80, 0xbf }, { 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8ByteSeqF4[] = {
|
|
{ 0xf4, 0xf4 }, { 0x80, 0x8f }, { 0x80, 0xbf },
|
|
{ 0x80, 0xbf }, { 0, 0 },
|
|
};
|
|
|
|
static const u8range_t kUTF8NullRange[] = { { 0, 0 } };
|
|
|
|
typedef struct utf8seq {
|
|
const u8range_t *ranges;
|
|
} utf8seq_t;
|
|
|
|
static const utf8seq_t kUTF8Sequences[] = {
|
|
{ kUTF8TwoByteSeq }, { kUTF8ByteSeqE0 }, { kUTF8ByteSeqE1EC },
|
|
{ kUTF8ByteSeqED }, { kUTF8ByteSeqEEEF }, { kUTF8ByteSeqF0 },
|
|
{ kUTF8ByteSeqF1F3 }, { kUTF8ByteSeqF4 }, { kUTF8NullRange }
|
|
};
|
|
|
|
int valid_utf8_string(const char *string, size_t *bad_pos)
|
|
{
|
|
int bom_chars = 0;
|
|
uint8_t byte;
|
|
const char *pos = string;
|
|
int ret = 1;
|
|
const utf8seq_t *seq = NULL;
|
|
const u8range_t *range = NULL;
|
|
|
|
if (!pos) {
|
|
ret = 0;
|
|
goto error;
|
|
}
|
|
|
|
while ((byte = (uint8_t) * (pos++))) {
|
|
if (!range || range->min == 0) {
|
|
if (byte < 128) {
|
|
/* Ascii character. */
|
|
continue;
|
|
}
|
|
|
|
if (bom_chars < ARRAY_SIZE(kUTF8ByteOrderMask)) {
|
|
if (byte == kUTF8ByteOrderMask[bom_chars]) {
|
|
bom_chars++;
|
|
continue;
|
|
} else {
|
|
/* Characters not matching BOM.
|
|
* Rewind and assume that there is
|
|
* no BOM. */
|
|
bom_chars =
|
|
ARRAY_SIZE(kUTF8ByteOrderMask);
|
|
pos = string;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Find the matching sequence of characters by
|
|
* matching the first character in the sequence.
|
|
*/
|
|
seq = kUTF8Sequences;
|
|
while (seq->ranges->min != 0) {
|
|
if (byte >= seq->ranges->min &&
|
|
byte <= seq->ranges->max) {
|
|
/* Matching sequence. */
|
|
break;
|
|
}
|
|
seq++;
|
|
}
|
|
|
|
if (seq->ranges->min == 0) {
|
|
/* Could not find a matching sequence. */
|
|
ret = 0;
|
|
goto error;
|
|
}
|
|
|
|
/* Found the appropriate sequence. */
|
|
range = seq->ranges + 1;
|
|
continue;
|
|
}
|
|
|
|
if (byte >= range->min && byte <= range->max) {
|
|
range++;
|
|
continue;
|
|
}
|
|
|
|
/* This character doesn't belong in UTF8. */
|
|
ret = 0;
|
|
goto error;
|
|
}
|
|
|
|
if (range && range->min != 0) {
|
|
/* Stopped in the middle of a sequence. */
|
|
ret = 0;
|
|
}
|
|
|
|
error:
|
|
if (bad_pos)
|
|
*bad_pos = pos - string - 1;
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CRAS_DBUS
|
|
/* Use the DBus implementation if available to ensure that the UTF-8
|
|
* sequences match those expected by the DBus implementation. */
|
|
|
|
int is_utf8_string(const char *string)
|
|
{
|
|
return !!dbus_validate_utf8(string, NULL);
|
|
}
|
|
|
|
#else
|
|
|
|
int is_utf8_string(const char *string)
|
|
{
|
|
return valid_utf8_string(string, NULL);
|
|
}
|
|
|
|
#endif
|