--- trunk/Src/wptUTF8.cpp 2005/10/26 12:49:29 35 +++ trunk/Src/wptUTF8.cpp 2005/10/27 15:25:13 36 @@ -1,31 +1,36 @@ /* wptUTF8.cpp - UTF8 conversation - * Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc. + * Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc. * Copyright (C) 2002, 2004 Timo Schulz * - * This file is part of WinPT. - * - * WinPT is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * WinPT is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with WinPT; if not, write to the Free Software Foundation, + * This file is part of WinPT. + * + * WinPT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * WinPT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with WinPT; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include -#include +#include +#include #include #include #include - -#include "wptTypes.h" + +#include "wptTypes.h" #include "wptErrors.h" static u16 koi8_unicode[128] = { @@ -70,32 +75,32 @@ static const char *active_charset_name = "iso-8859-1"; static u16 *active_charset = NULL; static int no_translation = 0; - -static int -ascii_strcasecmp( const char *a, const char *b ) -{ - if( a == b ) - return 0; - - for (; *a && *b; a++, b++) { - if (*a != *b && toupper(*a) != toupper(*b)) - break; - } - - return *a == *b? 0 : (toupper (*a) - toupper (*b)); -} /* ascii_strcasecmp */ + +static int +ascii_strcasecmp( const char *a, const char *b ) +{ + if( a == b ) + return 0; + + for (; *a && *b; a++, b++) { + if (*a != *b && toupper(*a) != toupper(*b)) + break; + } + + return *a == *b? 0 : (toupper (*a) - toupper (*b)); +} /* ascii_strcasecmp */ int set_native_charset( const char *newset ) { - if( !ascii_strcasecmp( newset, "iso-8859-1" ) ) { - active_charset_name = "iso-8859-1"; + if( !ascii_strcasecmp( newset, "iso-8859-1" ) ) { + active_charset_name = "iso-8859-1"; no_translation = 0; - active_charset = NULL; + active_charset = NULL; } - else if( !ascii_strcasecmp( newset, "iso-8859-2" ) ) { - active_charset_name = "iso-8859-2"; - no_translation = 0; + else if( !ascii_strcasecmp( newset, "iso-8859-2" ) ) { + active_charset_name = "iso-8859-2"; + no_translation = 0; active_charset = latin2_unicode; } else if( !ascii_strcasecmp( newset, "koi8-r" ) ) { @@ -109,15 +114,15 @@ no_translation = 1; active_charset = NULL; } - else - return WPTERR_GENERAL; + else + return WPTERR_GENERAL; return 0; } /* set_native_charset */ const char* get_native_charset( void ) -{ +{ return active_charset_name; } /* get_native_charset */ @@ -135,10 +140,10 @@ if (no_translation) buffer = strdup( string ); - else if( active_charset ) { + else if( active_charset ) { for(s=(byte*)string; *s; s++ ) { length++; - if( *s & 0x80 ) + if( *s & 0x80 ) length += 2; /* we may need 3 bytes */ } buffer = (char *)malloc( length + 1 ); @@ -154,16 +159,16 @@ *p++ = 0x80 | ( (val >> 6) & 0x3f ); *p++ = 0x80 | ( val & 0x3f ); } - } - else + } + else *p++ = *s; - } + } *p = 0; } else { for(s=(byte*)string; *s; s++ ) { length++; - if( *s & 0x80 ) + if( *s & 0x80 ) length++; } buffer = (char*)malloc( length + 1 ); @@ -172,437 +177,437 @@ *p++ = 0xc0 | ((*s >> 6) & 3); *p++ = 0x80 | ( *s & 0x3f ); } - else + else *p++ = *s; } *p = 0; - } + } return buffer; -} /* native_to_utf8 */ - -/**************** - * Convert string, which is in UTF8 to native encoding. illegal - * encodings by some "\xnn" and quote all control characters. A - * character with value DELIM will always be quoted, it must be a - * vanilla ASCII character. - */ -char * -utf8_to_native( const char *string, size_t length, int delim ) -{ - int nleft; - int i; - byte encbuf[8]; - int encidx; - const byte *s; - size_t n; - byte *buffer = NULL, *p = NULL; - unsigned long val = 0; - size_t slen; - int resync = 0; - - /* 1. pass (p==NULL): count the extended utf-8 characters */ - /* 2. pass (p!=NULL): create string */ - for( ;; ) { - for( slen=length, nleft=encidx=0, n=0, s=(byte*)string; slen; s++, slen-- ) { - if( resync ) { - if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) { - /* still invalid */ - if( p ) { - sprintf((char*)p, "\\x%02x", *s ); - p += 4; - } - n += 4; - continue; - } - resync = 0; - } - if( !nleft ) { - if( !(*s & 0x80) ) { /* plain ascii */ - if( *s < 0x20 || *s == 0x7f || *s == delim) { - n++; - if( p ) - *p++ = '\\'; - switch( *s ) { - case '\n': n++; if( p ) *p++ = 'n'; break; - case '\r': n++; if( p ) *p++ = 'r'; break; - case '\f': n++; if( p ) *p++ = 'f'; break; - case '\v': n++; if( p ) *p++ = 'v'; break; - case '\b': n++; if( p ) *p++ = 'b'; break; - case 0 : n++; if( p ) *p++ = '0'; break; - default: - n += 3; - if ( p ) { - sprintf( (char*)p, "x%02x", *s ); - p += 3; - } - break; - } - } - else { - if( p ) *p++ = *s; - n++; - } - } - else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */ - val = *s & 0x1f; - nleft = 1; - encidx = 0; - encbuf[encidx++] = *s; - } - else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */ - val = *s & 0x0f; - nleft = 2; - encidx = 0; - encbuf[encidx++] = *s; - } - else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */ - val = *s & 0x07; - nleft = 3; - encidx = 0; - encbuf[encidx++] = *s; - } - else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */ - val = *s & 0x03; - nleft = 4; - encidx = 0; - encbuf[encidx++] = *s; - } - else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */ - val = *s & 0x01; - nleft = 5; - encidx = 0; - encbuf[encidx++] = *s; - } - else { /* invalid encoding: print as \xnn */ - if( p ) { - sprintf((char*)p, "\\x%02x", *s ); - p += 4; - } - n += 4; - resync = 1; - } - } - else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */ - if( p ) { - for(i=0; i < encidx; i++ ) { - sprintf((char*)p, "\\x%02x", encbuf[i] ); - p += 4; - } - sprintf((char*)p, "\\x%02x", *s ); - p += 4; - } - n += 4 + 4*encidx; - nleft = 0; - encidx = 0; - resync = 1; - } - else { - encbuf[encidx++] = *s; - val <<= 6; - val |= *s & 0x3f; - if( !--nleft ) { /* ready */ - if (no_translation) { - if( p ) { - for(i=0; i < encidx; i++ ) - *p++ = encbuf[i]; - } - n += encidx; - encidx = 0; - } - else if( active_charset ) { /* table lookup */ - for(i=0; i < 128; i++ ) { - if( active_charset[i] == val ) - break; - } - if( i < 128 ) { /* we can print this one */ - if( p ) *p++ = i+128; - n++; - } - else { /* we do not have a translation: print utf8 */ - if( p ) { - for(i=0; i < encidx; i++ ) { - sprintf((char*)p, "\\x%02x", encbuf[i] ); - p += 4; - } - } - n += encidx*4; - encidx = 0; - } - } - else { /* native set */ - if( val >= 0x80 && val < 256 ) { - n++; /* we can simply print this character */ - if( p ) *p++ = val; - } - else { /* we do not have a translation: print utf8 */ - if( p ) { - for(i=0; i < encidx; i++ ) { - sprintf((char*)p, "\\x%02x", encbuf[i] ); - p += 4; - } - } - n += encidx*4; - encidx = 0; - } - } - } - - } - } - if( !buffer ) { /* allocate the buffer after the first pass */ - buffer = p = (byte *)malloc( n + 1 ); - } - else { - *p = 0; /* make a string */ - return (char*)buffer; - } - } +} /* native_to_utf8 */ + +/**************** + * Convert string, which is in UTF8 to native encoding. illegal + * encodings by some "\xnn" and quote all control characters. A + * character with value DELIM will always be quoted, it must be a + * vanilla ASCII character. + */ +char * +utf8_to_native( const char *string, size_t length, int delim ) +{ + int nleft; + int i; + byte encbuf[8]; + int encidx; + const byte *s; + size_t n; + byte *buffer = NULL, *p = NULL; + unsigned long val = 0; + size_t slen; + int resync = 0; + + /* 1. pass (p==NULL): count the extended utf-8 characters */ + /* 2. pass (p!=NULL): create string */ + for( ;; ) { + for( slen=length, nleft=encidx=0, n=0, s=(byte*)string; slen; s++, slen-- ) { + if( resync ) { + if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) { + /* still invalid */ + if( p ) { + sprintf((char*)p, "\\x%02x", *s ); + p += 4; + } + n += 4; + continue; + } + resync = 0; + } + if( !nleft ) { + if( !(*s & 0x80) ) { /* plain ascii */ + if( *s < 0x20 || *s == 0x7f || *s == delim) { + n++; + if( p ) + *p++ = '\\'; + switch( *s ) { + case '\n': n++; if( p ) *p++ = 'n'; break; + case '\r': n++; if( p ) *p++ = 'r'; break; + case '\f': n++; if( p ) *p++ = 'f'; break; + case '\v': n++; if( p ) *p++ = 'v'; break; + case '\b': n++; if( p ) *p++ = 'b'; break; + case 0 : n++; if( p ) *p++ = '0'; break; + default: + n += 3; + if ( p ) { + sprintf( (char*)p, "x%02x", *s ); + p += 3; + } + break; + } + } + else { + if( p ) *p++ = *s; + n++; + } + } + else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */ + val = *s & 0x1f; + nleft = 1; + encidx = 0; + encbuf[encidx++] = *s; + } + else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */ + val = *s & 0x0f; + nleft = 2; + encidx = 0; + encbuf[encidx++] = *s; + } + else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */ + val = *s & 0x07; + nleft = 3; + encidx = 0; + encbuf[encidx++] = *s; + } + else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */ + val = *s & 0x03; + nleft = 4; + encidx = 0; + encbuf[encidx++] = *s; + } + else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */ + val = *s & 0x01; + nleft = 5; + encidx = 0; + encbuf[encidx++] = *s; + } + else { /* invalid encoding: print as \xnn */ + if( p ) { + sprintf((char*)p, "\\x%02x", *s ); + p += 4; + } + n += 4; + resync = 1; + } + } + else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */ + if( p ) { + for(i=0; i < encidx; i++ ) { + sprintf((char*)p, "\\x%02x", encbuf[i] ); + p += 4; + } + sprintf((char*)p, "\\x%02x", *s ); + p += 4; + } + n += 4 + 4*encidx; + nleft = 0; + encidx = 0; + resync = 1; + } + else { + encbuf[encidx++] = *s; + val <<= 6; + val |= *s & 0x3f; + if( !--nleft ) { /* ready */ + if (no_translation) { + if( p ) { + for(i=0; i < encidx; i++ ) + *p++ = encbuf[i]; + } + n += encidx; + encidx = 0; + } + else if( active_charset ) { /* table lookup */ + for(i=0; i < 128; i++ ) { + if( active_charset[i] == val ) + break; + } + if( i < 128 ) { /* we can print this one */ + if( p ) *p++ = i+128; + n++; + } + else { /* we do not have a translation: print utf8 */ + if( p ) { + for(i=0; i < encidx; i++ ) { + sprintf((char*)p, "\\x%02x", encbuf[i] ); + p += 4; + } + } + n += encidx*4; + encidx = 0; + } + } + else { /* native set */ + if( val >= 0x80 && val < 256 ) { + n++; /* we can simply print this character */ + if( p ) *p++ = val; + } + else { /* we do not have a translation: print utf8 */ + if( p ) { + for(i=0; i < encidx; i++ ) { + sprintf((char*)p, "\\x%02x", encbuf[i] ); + p += 4; + } + } + n += encidx*4; + encidx = 0; + } + } + } + + } + } + if( !buffer ) { /* allocate the buffer after the first pass */ + buffer = p = (byte *)malloc( n + 1 ); + } + else { + *p = 0; /* make a string */ + return (char*)buffer; + } + } +} + + +static void +conv_charset (byte *string, size_t size, int what) +{ + int i; + + if( what == 0 ) { + for( i = 0; i < size; i++, string++ ) { + switch( *string ) { + case 0xa0: *string = 0xff; break; /* nobreakspace */ + case 0xa1: *string = 0xad; break; /* exclamdown */ + case 0xa2: *string = 0xbd; break; /* cent */ + case 0xa3: *string = 0x9c; break; /* sterling */ + case 0xa4: *string = 0xcf; break; /* currency */ + case 0xa5: *string = 0xbe; break; /* yen */ + case 0xa6: *string = 0xdd; break; /* brokenbar */ + case 0xa7: *string = 0xf5; break; /* section */ + case 0xa8: *string = 0xf9; break; /* diaeresis */ + case 0xa9: *string = 0xb8; break; /* copyright */ + case 0xaa: *string = 0xa6; break; /* ordfeminine */ + case 0xab: *string = 0xae; break; /* guillemotleft */ + case 0xac: *string = 0xaa; break; /* notsign */ + case 0xad: *string = 0xf0; break; /* hyphen */ + case 0xae: *string = 0xa9; break; /* registered */ + case 0xaf: *string = 0xee; break; /* macron */ + case 0xb0: *string = 0xf8; break; /* degree */ + case 0xb1: *string = 0xf1; break; /* plusminus */ + case 0xb2: *string = 0xfd; break; /* twosuperior */ + case 0xb3: *string = 0xfc; break; /* threesuperior */ + case 0xb4: *string = 0xef; break; /* acute */ + case 0xb5: *string = 0xe6; break; /* mu */ + case 0xb6: *string = 0xf4; break; /* paragraph */ + case 0xb7: *string = 0xfa; break; /* periodcentered */ + case 0xb8: *string = 0xf7; break; /* cedilla */ + case 0xb9: *string = 0xfb; break; /* onesuperior */ + case 0xba: *string = 0xa7; break; /* masculine */ + case 0xbb: *string = 0xaf; break; /* guillemotright */ + case 0xbc: *string = 0xac; break; /* onequarter */ + case 0xbd: *string = 0xab; break; /* onehalf */ + case 0xbe: *string = 0xf3; break; /* threequarters */ + case 0xbf: *string = 0xa8; break; /* questiondown */ + case 0xc0: *string = 0xb7; break; /* Agrave */ + case 0xc1: *string = 0xb5; break; /* Aacute */ + case 0xc2: *string = 0xb6; break; /* Acircumflex */ + case 0xc3: *string = 0xc7; break; /* Atilde */ + case 0xc4: *string = 0x8e; break; /* Adiaeresis */ + case 0xc5: *string = 0x8f; break; /* Aring */ + case 0xc6: *string = 0x92; break; /* AE */ + case 0xc7: *string = 0x80; break; /* Ccedilla */ + case 0xc8: *string = 0xd4; break; /* Egrave */ + case 0xc9: *string = 0x90; break; /* Eacute */ + case 0xca: *string = 0xd2; break; /* Ecircumflex */ + case 0xcb: *string = 0xd3; break; /* Ediaeresis */ + case 0xcc: *string = 0xde; break; /* Igrave */ + case 0xcd: *string = 0xd6; break; /* Iacute */ + case 0xce: *string = 0xd7; break; /* Icircumflex */ + case 0xcf: *string = 0xd8; break; /* Idiaeresis */ + case 0xd0: *string = 0xd1; break; /* Eth */ + case 0xd1: *string = 0xa5; break; /* Ntilde */ + case 0xd2: *string = 0xe3; break; /* Ograve */ + case 0xd3: *string = 0xe0; break; /* Oacute */ + case 0xd4: *string = 0xe2; break; /* Ocircumflex */ + case 0xd5: *string = 0xe5; break; /* Otilde */ + case 0xd6: *string = 0x99; break; /* Odiaeresis */ + case 0xd7: *string = 0x9e; break; /* multiply */ + case 0xd8: *string = 0x9d; break; /* Ooblique */ + case 0xd9: *string = 0xeb; break; /* Ugrave */ + case 0xda: *string = 0xe9; break; /* Uacute */ + case 0xdb: *string = 0xea; break; /* Ucircumflex */ + case 0xdc: *string = 0x9a; break; /* Udiaeresis */ + case 0xdd: *string = 0xed; break; /* Yacute */ + case 0xde: *string = 0xe8; break; /* Thorn */ + case 0xdf: *string = 0xe1; break; /* ssharp */ + case 0xe0: *string = 0x85; break; /* agrave */ + case 0xe1: *string = 0xa0; break; /* aacute */ + case 0xe2: *string = 0x83; break; /* acircumflex */ + case 0xe3: *string = 0xc6; break; /* atilde */ + case 0xe4: *string = 0x84; break; /* adiaeresis */ + case 0xe5: *string = 0x86; break; /* aring */ + case 0xe6: *string = 0x91; break; /* ae */ + case 0xe7: *string = 0x87; break; /* ccedilla */ + case 0xe8: *string = 0x8a; break; /* egrave */ + case 0xe9: *string = 0x82; break; /* eacute */ + case 0xea: *string = 0x88; break; /* ecircumflex */ + case 0xeb: *string = 0x89; break; /* ediaeresis */ + case 0xec: *string = 0x8d; break; /* igrave */ + case 0xed: *string = 0xa1; break; /* iacute */ + case 0xee: *string = 0x8c; break; /* icircumflex */ + case 0xef: *string = 0x8b; break; /* idiaeresis */ + case 0xf0: *string = 0xd0; break; /* eth */ + case 0xf1: *string = 0xa4; break; /* ntilde */ + case 0xf2: *string = 0x95; break; /* ograve */ + case 0xf3: *string = 0xa2; break; /* oacute */ + case 0xf4: *string = 0x93; break; /* ocircumflex */ + case 0xf5: *string = 0xe4; break; /* otilde */ + case 0xf6: *string = 0x94; break; /* odiaeresis */ + case 0xf7: *string = 0xf6; break; /* division */ + case 0xf8: *string = 0x9b; break; /* oslash */ + case 0xf9: *string = 0x97; break; /* ugrave */ + case 0xfa: *string = 0xa3; break; /* uacute */ + case 0xfb: *string = 0x96; break; /* ucircumflex */ + case 0xfc: *string = 0x81; break; /* udiaeresis */ + case 0xfd: *string = 0xec; break; /* yacute */ + case 0xfe: *string = 0xe7; break; /* thorn */ + case 0xff: *string = 0x98; break; /* ydiaeresis */ + default : break; + } + } + } + else { + for( i = 0; i < size; i++, string++ ) { + switch( *string ) { + case 0xff: *string = 0xa0; break; + case 0xad: *string = 0xa1; break; + case 0xbd: *string = 0xa2; break; + case 0x9c: *string = 0xa3; break; + case 0xcf: *string = 0xa4; break; + case 0xbe: *string = 0xa5; break; + case 0xdd: *string = 0xa6; break; + case 0xf5: *string = 0xa7; break; + case 0xf9: *string = 0xa8; break; + case 0xb8: *string = 0xa9; break; + case 0xa6: *string = 0xaa; break; + case 0xae: *string = 0xab; break; + case 0xaa: *string = 0xac; break; + case 0xf0: *string = 0xad; break; + case 0xa9: *string = 0xae; break; + case 0xee: *string = 0xaf; break; + case 0xf8: *string = 0xb0; break; + case 0xf1: *string = 0xb1; break; + case 0xfd: *string = 0xb2; break; + case 0xfc: *string = 0xb3; break; + case 0xef: *string = 0xb4; break; + case 0xe6: *string = 0xb5; break; + case 0xf4: *string = 0xb6; break; + case 0xfa: *string = 0xb7; break; + case 0xf7: *string = 0xb8; break; + case 0xfb: *string = 0xb9; break; + case 0xa7: *string = 0xba; break; + case 0xaf: *string = 0xbb; break; + case 0xac: *string = 0xbc; break; + case 0xab: *string = 0xbd; break; + case 0xf3: *string = 0xbe; break; + case 0xa8: *string = 0xbf; break; + case 0xb7: *string = 0xc0; break; + case 0xb5: *string = 0xc1; break; + case 0xb6: *string = 0xc2; break; + case 0xc7: *string = 0xc3; break; + case 0x8e: *string = 0xc4; break; + case 0x8f: *string = 0xc5; break; + case 0x92: *string = 0xc6; break; + case 0x80: *string = 0xc7; break; + case 0xd4: *string = 0xc8; break; + case 0x90: *string = 0xc9; break; + case 0xd2: *string = 0xca; break; + case 0xd3: *string = 0xcb; break; + case 0xde: *string = 0xcc; break; + case 0xd6: *string = 0xcd; break; + case 0xd7: *string = 0xce; break; + case 0xd8: *string = 0xcf; break; + case 0xd1: *string = 0xd0; break; + case 0xa5: *string = 0xd1; break; + case 0xe3: *string = 0xd2; break; + case 0xe0: *string = 0xd3; break; + case 0xe2: *string = 0xd4; break; + case 0xe5: *string = 0xd5; break; + case 0x99: *string = 0xd6; break; + case 0x9e: *string = 0xd7; break; + case 0x9d: *string = 0xd8; break; + case 0xeb: *string = 0xd9; break; + case 0xe9: *string = 0xda; break; + case 0xea: *string = 0xdb; break; + case 0x9a: *string = 0xdc; break; + case 0xed: *string = 0xdd; break; + case 0xe8: *string = 0xde; break; + case 0xe1: *string = 0xdf; break; + case 0x85: *string = 0xe0; break; + case 0xa0: *string = 0xe1; break; + case 0x83: *string = 0xe2; break; + case 0xc6: *string = 0xe3; break; + case 0x84: *string = 0xe4; break; + case 0x86: *string = 0xe5; break; + case 0x91: *string = 0xe6; break; + case 0x87: *string = 0xe7; break; + case 0x8a: *string = 0xe8; break; + case 0x82: *string = 0xe9; break; + case 0x88: *string = 0xea; break; + case 0x89: *string = 0xeb; break; + case 0x8d: *string = 0xec; break; + case 0xa1: *string = 0xed; break; + case 0x8c: *string = 0xee; break; + case 0x8b: *string = 0xef; break; + case 0xd0: *string = 0xf0; break; + case 0xa4: *string = 0xf1; break; + case 0x95: *string = 0xf2; break; + case 0xa2: *string = 0xf3; break; + case 0x93: *string = 0xf4; break; + case 0xe4: *string = 0xf5; break; + case 0x94: *string = 0xf6; break; + case 0xf6: *string = 0xf7; break; + case 0x9b: *string = 0xf8; break; + case 0x97: *string = 0xf9; break; + case 0xa3: *string = 0xfa; break; + case 0x96: *string = 0xfb; break; + case 0x81: *string = 0xfc; break; + case 0xec: *string = 0xfd; break; + case 0xe7: *string = 0xfe; break; + case 0x98: *string = 0xff; break; + default : break; + } + } + } +} /* conv_charset */ + + +char * +utf8_to_wincp (const char * s, size_t len) +{ + char * decs; + decs = utf8_to_native (s, len, 0); + conv_charset ((byte *)decs, strlen (decs), 1); + return decs; } - - -static void -conv_charset (byte *string, size_t size, int what) -{ - int i; - - if( what == 0 ) { - for( i = 0; i < size; i++, string++ ) { - switch( *string ) { - case 0xa0: *string = 0xff; break; /* nobreakspace */ - case 0xa1: *string = 0xad; break; /* exclamdown */ - case 0xa2: *string = 0xbd; break; /* cent */ - case 0xa3: *string = 0x9c; break; /* sterling */ - case 0xa4: *string = 0xcf; break; /* currency */ - case 0xa5: *string = 0xbe; break; /* yen */ - case 0xa6: *string = 0xdd; break; /* brokenbar */ - case 0xa7: *string = 0xf5; break; /* section */ - case 0xa8: *string = 0xf9; break; /* diaeresis */ - case 0xa9: *string = 0xb8; break; /* copyright */ - case 0xaa: *string = 0xa6; break; /* ordfeminine */ - case 0xab: *string = 0xae; break; /* guillemotleft */ - case 0xac: *string = 0xaa; break; /* notsign */ - case 0xad: *string = 0xf0; break; /* hyphen */ - case 0xae: *string = 0xa9; break; /* registered */ - case 0xaf: *string = 0xee; break; /* macron */ - case 0xb0: *string = 0xf8; break; /* degree */ - case 0xb1: *string = 0xf1; break; /* plusminus */ - case 0xb2: *string = 0xfd; break; /* twosuperior */ - case 0xb3: *string = 0xfc; break; /* threesuperior */ - case 0xb4: *string = 0xef; break; /* acute */ - case 0xb5: *string = 0xe6; break; /* mu */ - case 0xb6: *string = 0xf4; break; /* paragraph */ - case 0xb7: *string = 0xfa; break; /* periodcentered */ - case 0xb8: *string = 0xf7; break; /* cedilla */ - case 0xb9: *string = 0xfb; break; /* onesuperior */ - case 0xba: *string = 0xa7; break; /* masculine */ - case 0xbb: *string = 0xaf; break; /* guillemotright */ - case 0xbc: *string = 0xac; break; /* onequarter */ - case 0xbd: *string = 0xab; break; /* onehalf */ - case 0xbe: *string = 0xf3; break; /* threequarters */ - case 0xbf: *string = 0xa8; break; /* questiondown */ - case 0xc0: *string = 0xb7; break; /* Agrave */ - case 0xc1: *string = 0xb5; break; /* Aacute */ - case 0xc2: *string = 0xb6; break; /* Acircumflex */ - case 0xc3: *string = 0xc7; break; /* Atilde */ - case 0xc4: *string = 0x8e; break; /* Adiaeresis */ - case 0xc5: *string = 0x8f; break; /* Aring */ - case 0xc6: *string = 0x92; break; /* AE */ - case 0xc7: *string = 0x80; break; /* Ccedilla */ - case 0xc8: *string = 0xd4; break; /* Egrave */ - case 0xc9: *string = 0x90; break; /* Eacute */ - case 0xca: *string = 0xd2; break; /* Ecircumflex */ - case 0xcb: *string = 0xd3; break; /* Ediaeresis */ - case 0xcc: *string = 0xde; break; /* Igrave */ - case 0xcd: *string = 0xd6; break; /* Iacute */ - case 0xce: *string = 0xd7; break; /* Icircumflex */ - case 0xcf: *string = 0xd8; break; /* Idiaeresis */ - case 0xd0: *string = 0xd1; break; /* Eth */ - case 0xd1: *string = 0xa5; break; /* Ntilde */ - case 0xd2: *string = 0xe3; break; /* Ograve */ - case 0xd3: *string = 0xe0; break; /* Oacute */ - case 0xd4: *string = 0xe2; break; /* Ocircumflex */ - case 0xd5: *string = 0xe5; break; /* Otilde */ - case 0xd6: *string = 0x99; break; /* Odiaeresis */ - case 0xd7: *string = 0x9e; break; /* multiply */ - case 0xd8: *string = 0x9d; break; /* Ooblique */ - case 0xd9: *string = 0xeb; break; /* Ugrave */ - case 0xda: *string = 0xe9; break; /* Uacute */ - case 0xdb: *string = 0xea; break; /* Ucircumflex */ - case 0xdc: *string = 0x9a; break; /* Udiaeresis */ - case 0xdd: *string = 0xed; break; /* Yacute */ - case 0xde: *string = 0xe8; break; /* Thorn */ - case 0xdf: *string = 0xe1; break; /* ssharp */ - case 0xe0: *string = 0x85; break; /* agrave */ - case 0xe1: *string = 0xa0; break; /* aacute */ - case 0xe2: *string = 0x83; break; /* acircumflex */ - case 0xe3: *string = 0xc6; break; /* atilde */ - case 0xe4: *string = 0x84; break; /* adiaeresis */ - case 0xe5: *string = 0x86; break; /* aring */ - case 0xe6: *string = 0x91; break; /* ae */ - case 0xe7: *string = 0x87; break; /* ccedilla */ - case 0xe8: *string = 0x8a; break; /* egrave */ - case 0xe9: *string = 0x82; break; /* eacute */ - case 0xea: *string = 0x88; break; /* ecircumflex */ - case 0xeb: *string = 0x89; break; /* ediaeresis */ - case 0xec: *string = 0x8d; break; /* igrave */ - case 0xed: *string = 0xa1; break; /* iacute */ - case 0xee: *string = 0x8c; break; /* icircumflex */ - case 0xef: *string = 0x8b; break; /* idiaeresis */ - case 0xf0: *string = 0xd0; break; /* eth */ - case 0xf1: *string = 0xa4; break; /* ntilde */ - case 0xf2: *string = 0x95; break; /* ograve */ - case 0xf3: *string = 0xa2; break; /* oacute */ - case 0xf4: *string = 0x93; break; /* ocircumflex */ - case 0xf5: *string = 0xe4; break; /* otilde */ - case 0xf6: *string = 0x94; break; /* odiaeresis */ - case 0xf7: *string = 0xf6; break; /* division */ - case 0xf8: *string = 0x9b; break; /* oslash */ - case 0xf9: *string = 0x97; break; /* ugrave */ - case 0xfa: *string = 0xa3; break; /* uacute */ - case 0xfb: *string = 0x96; break; /* ucircumflex */ - case 0xfc: *string = 0x81; break; /* udiaeresis */ - case 0xfd: *string = 0xec; break; /* yacute */ - case 0xfe: *string = 0xe7; break; /* thorn */ - case 0xff: *string = 0x98; break; /* ydiaeresis */ - default : break; - } - } - } - else { - for( i = 0; i < size; i++, string++ ) { - switch( *string ) { - case 0xff: *string = 0xa0; break; - case 0xad: *string = 0xa1; break; - case 0xbd: *string = 0xa2; break; - case 0x9c: *string = 0xa3; break; - case 0xcf: *string = 0xa4; break; - case 0xbe: *string = 0xa5; break; - case 0xdd: *string = 0xa6; break; - case 0xf5: *string = 0xa7; break; - case 0xf9: *string = 0xa8; break; - case 0xb8: *string = 0xa9; break; - case 0xa6: *string = 0xaa; break; - case 0xae: *string = 0xab; break; - case 0xaa: *string = 0xac; break; - case 0xf0: *string = 0xad; break; - case 0xa9: *string = 0xae; break; - case 0xee: *string = 0xaf; break; - case 0xf8: *string = 0xb0; break; - case 0xf1: *string = 0xb1; break; - case 0xfd: *string = 0xb2; break; - case 0xfc: *string = 0xb3; break; - case 0xef: *string = 0xb4; break; - case 0xe6: *string = 0xb5; break; - case 0xf4: *string = 0xb6; break; - case 0xfa: *string = 0xb7; break; - case 0xf7: *string = 0xb8; break; - case 0xfb: *string = 0xb9; break; - case 0xa7: *string = 0xba; break; - case 0xaf: *string = 0xbb; break; - case 0xac: *string = 0xbc; break; - case 0xab: *string = 0xbd; break; - case 0xf3: *string = 0xbe; break; - case 0xa8: *string = 0xbf; break; - case 0xb7: *string = 0xc0; break; - case 0xb5: *string = 0xc1; break; - case 0xb6: *string = 0xc2; break; - case 0xc7: *string = 0xc3; break; - case 0x8e: *string = 0xc4; break; - case 0x8f: *string = 0xc5; break; - case 0x92: *string = 0xc6; break; - case 0x80: *string = 0xc7; break; - case 0xd4: *string = 0xc8; break; - case 0x90: *string = 0xc9; break; - case 0xd2: *string = 0xca; break; - case 0xd3: *string = 0xcb; break; - case 0xde: *string = 0xcc; break; - case 0xd6: *string = 0xcd; break; - case 0xd7: *string = 0xce; break; - case 0xd8: *string = 0xcf; break; - case 0xd1: *string = 0xd0; break; - case 0xa5: *string = 0xd1; break; - case 0xe3: *string = 0xd2; break; - case 0xe0: *string = 0xd3; break; - case 0xe2: *string = 0xd4; break; - case 0xe5: *string = 0xd5; break; - case 0x99: *string = 0xd6; break; - case 0x9e: *string = 0xd7; break; - case 0x9d: *string = 0xd8; break; - case 0xeb: *string = 0xd9; break; - case 0xe9: *string = 0xda; break; - case 0xea: *string = 0xdb; break; - case 0x9a: *string = 0xdc; break; - case 0xed: *string = 0xdd; break; - case 0xe8: *string = 0xde; break; - case 0xe1: *string = 0xdf; break; - case 0x85: *string = 0xe0; break; - case 0xa0: *string = 0xe1; break; - case 0x83: *string = 0xe2; break; - case 0xc6: *string = 0xe3; break; - case 0x84: *string = 0xe4; break; - case 0x86: *string = 0xe5; break; - case 0x91: *string = 0xe6; break; - case 0x87: *string = 0xe7; break; - case 0x8a: *string = 0xe8; break; - case 0x82: *string = 0xe9; break; - case 0x88: *string = 0xea; break; - case 0x89: *string = 0xeb; break; - case 0x8d: *string = 0xec; break; - case 0xa1: *string = 0xed; break; - case 0x8c: *string = 0xee; break; - case 0x8b: *string = 0xef; break; - case 0xd0: *string = 0xf0; break; - case 0xa4: *string = 0xf1; break; - case 0x95: *string = 0xf2; break; - case 0xa2: *string = 0xf3; break; - case 0x93: *string = 0xf4; break; - case 0xe4: *string = 0xf5; break; - case 0x94: *string = 0xf6; break; - case 0xf6: *string = 0xf7; break; - case 0x9b: *string = 0xf8; break; - case 0x97: *string = 0xf9; break; - case 0xa3: *string = 0xfa; break; - case 0x96: *string = 0xfb; break; - case 0x81: *string = 0xfc; break; - case 0xec: *string = 0xfd; break; - case 0xe7: *string = 0xfe; break; - case 0x98: *string = 0xff; break; - default : break; - } - } - } -} /* conv_charset */ - - -char * -utf8_to_wincp (const char * s, size_t len) -{ - char * decs; - decs = utf8_to_native (s, len, 0); - conv_charset ((byte *)decs, strlen (decs), 1); - return decs; -} - - -char * -wincp_to_utf8 (const char * s, size_t len) -{ - char * encs; - conv_charset ((byte *)s, len, 0); - encs = native_to_utf8 (s); - return encs; -} - - -int -is_8bit_string (const char * str) -{ - size_t i; - - for (i = 0; i < strlen (str); i++) { - if (str[i] & 0x80) - return -1; - } - return 0; -} /* is_8bit_string */ + + +char * +wincp_to_utf8 (const char * s, size_t len) +{ + char * encs; + conv_charset ((byte *)s, len, 0); + encs = native_to_utf8 (s); + return encs; +} + + +int +is_8bit_string (const char * str) +{ + size_t i; + + for (i = 0; i < strlen (str); i++) { + if (str[i] & 0x80) + return -1; + } + return 0; +} /* is_8bit_string */