You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
1.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/* unicode.c - convert between Unicode and UTF-8
*
* Copyright 2020 The Android Open Source Project.
*
* Loosely based on the Plan9/Inferno unicode(1).
USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))
config UNICODE
bool "unicode"
default n
help
usage: unicode [[min]-max]
Convert between Unicode code points and UTF-8, in both directions.
*/
#define FOR_unicode
#include "toys.h"
static void codepoint(unsigned wc) {
char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
"NAKSYNETBCANEM SUBESCFS GS RS US ";
unsigned n, i;
printf("U+%04X : ", wc);
if (wc < ' ') printf("%.3s", low+(wc*3));
else if (wc == 0x7f) printf("DEL");
else {
toybuf[n = wctoutf8(toybuf, wc)] = 0;
printf("%s%s", toybuf, n>1 ? " :":"");
if (n>1) for (i = 0; i < n; i++) printf(" %#02x", toybuf[i]);
}
xputc('\n');
}
void unicode_main(void)
{
unsigned from, to;
char next, **args;
for (args = toys.optargs; *args; args++) {
// unicode 660-666 => table of `U+0600 : ٠ : 0xd9 0xa0` etc.
if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2) {
while (from <= to) codepoint(from++);
// unicode 666 => just `U+0666 : ٦ : 0xd9 0xa6`.
} else if (sscanf(*args, "%x%c", &from, &next) == 1) {
codepoint(from);
// unicode hello => table showing every character in the string.
} else {
char *s = *args;
size_t l = strlen(s);
wchar_t wc;
int n;
while ((n = utf8towc(&wc, s, l)) > 0) {
codepoint(wc);
s += n;
l -= n;
}
}
}
}