You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
3.9 KiB

/*
*******************************************************************************
*
* © 2016 and later: Unicode, Inc. and others.
* License & terms of use: http://www.unicode.org/copyright.html
*
*******************************************************************************
*******************************************************************************
*
* Copyright (C) 2002, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include <unicode/ustring.h>
#include <unicode/ubrk.h>
U_CFUNC int c_main(void);
void printTextRange(UChar* str, int32_t start, int32_t end)
{
char charBuf[1000];
UChar savedEndChar;
savedEndChar = str[end];
str[end] = 0;
u_austrncpy(charBuf, str+start, sizeof(charBuf)-1);
charBuf[sizeof(charBuf)-1]=0;
printf("string[%2d..%2d] \"%s\"\n", start, end-1, charBuf);
str[end] = savedEndChar;
}
/* Print each element in order: */
void printEachForward( UBreakIterator* boundary, UChar* str) {
int32_t end;
int32_t start = ubrk_first(boundary);
for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end =
ubrk_next(boundary)) {
printTextRange(str, start, end );
}
}
/* Print each element in reverse order: */
void printEachBackward( UBreakIterator* boundary, UChar* str) {
int32_t start;
int32_t end = ubrk_last(boundary);
for (start = ubrk_previous(boundary); start != UBRK_DONE; end = start,
start =ubrk_previous(boundary)) {
printTextRange( str, start, end );
}
}
/* Print first element */
void printFirst(UBreakIterator* boundary, UChar* str) {
int32_t end;
int32_t start = ubrk_first(boundary);
end = ubrk_next(boundary);
printTextRange( str, start, end );
}
/* Print last element */
void printLast(UBreakIterator* boundary, UChar* str) {
int32_t start;
int32_t end = ubrk_last(boundary);
start = ubrk_previous(boundary);
printTextRange(str, start, end );
}
/* Print the element at a specified position */
void printAt(UBreakIterator* boundary, int32_t pos , UChar* str) {
int32_t start;
int32_t end = ubrk_following(boundary, pos);
start = ubrk_previous(boundary);
printTextRange(str, start, end );
}
/* Creating and using text boundaries*/
int c_main( void ) {
UBreakIterator *boundary;
char cStringToExamine[] = "Aaa bbb ccc. Ddd eee fff.";
UChar stringToExamine[sizeof(cStringToExamine)+1];
UErrorCode status = U_ZERO_ERROR;
printf("\n\n"
"C Boundary Analysis\n"
"-------------------\n\n");
printf("Examining: %s\n", cStringToExamine);
u_uastrcpy(stringToExamine, cStringToExamine);
/*print each sentence in forward and reverse order*/
boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine,
-1, &status);
if (U_FAILURE(status)) {
printf("ubrk_open error: %s\n", u_errorName(status));
exit(1);
}
printf("\n----- Sentence Boundaries, forward: -----------\n");
printEachForward(boundary, stringToExamine);
printf("\n----- Sentence Boundaries, backward: ----------\n");
printEachBackward(boundary, stringToExamine);
ubrk_close(boundary);
/*print each word in order*/
boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine,
u_strlen(stringToExamine), &status);
printf("\n----- Word Boundaries, forward: -----------\n");
printEachForward(boundary, stringToExamine);
printf("\n----- Word Boundaries, backward: ----------\n");
printEachBackward(boundary, stringToExamine);
/*print first element*/
printf("\n----- first: -------------\n");
printFirst(boundary, stringToExamine);
/*print last element*/
printf("\n----- last: --------------\n");
printLast(boundary, stringToExamine);
/*print word at charpos 10 */
printf("\n----- at pos 10: ---------\n");
printAt(boundary, 10 , stringToExamine);
ubrk_close(boundary);
printf("\nEnd of C boundary analysis\n");
return 0;
}