00001 /* UTF8 code from http://www.cprogramming.com/ */ 00002 #ifndef _UTF8_H 00003 #define _UTF8_H 00004 #include <stdarg.h> 00005 00006 /* is c the start of a utf8 sequence? */ 00007 #define isutf(c) (((c)&0xC0)!=0x80) 00008 00009 /* convert UTF-8 data to wide character */ 00010 int UTF8_toucs(u_int32_t *dest, int sz, char *src, int srcsz); 00011 00012 /* the opposite conversion */ 00013 int UTF8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz); 00014 00015 /* single character to UTF-8 */ 00016 int UTF8_wc_toutf8(char *dest, u_int32_t ch); 00017 00018 /* character number to byte offset */ 00019 int UTF8_offset(char *str, int charnum); 00020 00021 /* byte offset to character number */ 00022 int UTF8_charnum(char *s, int offset); 00023 00024 /* return next character, updating an index variable */ 00025 u_int32_t UTF8_nextchar(char *s, int *i); 00026 00027 /* move to next character */ 00028 void UTF8_inc(char *s, int *i); 00029 00030 /* move to previous character */ 00031 void UTF8_dec(char *s, int *i); 00032 00033 /* returns length of next utf-8 sequence */ 00034 int UTF8_seqlen(char *s); 00035 00036 /* assuming src points to the character after a backslash, read an 00037 escape sequence, storing the result in dest and returning the number of 00038 input characters processed */ 00039 int UTF8_read_escape_sequence(char *src, u_int32_t *dest); 00040 00041 /* given a wide character, convert it to an ASCII escape sequence stored in 00042 buf, where buf is "sz" bytes. returns the number of characters output. */ 00043 int UTF8_escape_wchar(char *buf, int sz, u_int32_t ch); 00044 00045 /* convert a string "src" containing escape sequences to UTF-8 */ 00046 int UTF8_unescape(char *buf, int sz, char *src); 00047 00048 /* convert UTF-8 "src" to ASCII with escape sequences. 00049 if escape_quotes is nonzero, quote characters will be preceded by 00050 backslashes as well. */ 00051 int UTF8_escape(char *buf, int sz, char *src, int escape_quotes); 00052 00053 /* return a pointer to the first occurrence of ch in s, or NULL if not 00054 found. character index of found character returned in *charn. */ 00055 char *UTF8_strchr(char *s, u_int32_t ch, int *charn); 00056 00057 /* same as the above, but searches a buffer of a given size instead of 00058 a NUL-terminated string. */ 00059 char *UTF8_memchr(char *s, u_int32_t ch, size_t sz, int *charn); 00060 00061 /* count the number of characters in a UTF-8 string */ 00062 int UTF8_strlen(char *s); 00063 00064 int UTF8_is_locale_utf8(char *locale); 00065 00066 /* printf where the format string and arguments may be in UTF-8. 00067 you can avoid this function and just use ordinary printf() if the current 00068 locale is UTF-8. */ 00069 int UTF8_vprintf(char *fmt, va_list ap); 00070 int UTF8_printf(char *fmt, ...); 00071 00072 #endif 00073