1 |
/* wptUTF8.cpp - UTF8 conversation |
/* wptUTF8.cpp - UTF8 conversation |
2 |
* Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc. |
* Copyright (C) 2002, 2004, 2005, 2006, 2009 Timo Schulz |
|
* Copyright (C) 2002, 2004, 2005 Timo Schulz |
|
3 |
* |
* |
4 |
* This file is part of WinPT. |
* This file is part of WinPT. |
5 |
* |
* |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
* GNU General Public License for more details. |
|
* |
|
|
* You should have received a copy of the GNU General Public License |
|
|
* along with WinPT; if not, write to the Free Software Foundation, |
|
|
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
|
15 |
*/ |
*/ |
|
|
|
16 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
17 |
#include <config.h> |
#include <config.h> |
18 |
#endif |
#endif |
27 |
#include "wptErrors.h" |
#include "wptErrors.h" |
28 |
|
|
29 |
|
|
30 |
static WORD latin2_unicode[128] = { |
/** |
31 |
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087, |
* Converts the given intput string, which is encoded with the locale |
32 |
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F, |
* setting, into UTF-8 representation. |
33 |
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097, |
*/ |
34 |
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F, |
char* |
35 |
0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7, |
native_to_utf8 (const char *string) |
|
0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B, |
|
|
0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7, |
|
|
0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C, |
|
|
0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7, |
|
|
0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E, |
|
|
0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7, |
|
|
0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF, |
|
|
0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7, |
|
|
0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F, |
|
|
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7, |
|
|
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9 |
|
|
}; |
|
|
|
|
|
|
|
|
static const char *active_charset_name = "iso-8859-1"; |
|
|
static WORD *active_charset = NULL; |
|
|
static int no_translation = 0; |
|
|
|
|
|
|
|
|
static int |
|
|
ascii_strcasecmp( const char *a, const char *b ) |
|
36 |
{ |
{ |
37 |
if( a == b ) |
wchar_t *result; |
38 |
return 0; |
char *native; |
39 |
|
int n; |
40 |
for (; *a && *b; a++, b++) { |
|
41 |
if (*a != *b && toupper(*a) != toupper(*b)) |
n = MultiByteToWideChar (GetACP (), 0, string, -1, NULL, 0); |
42 |
break; |
if (n < 0) |
43 |
|
return NULL; |
44 |
|
|
45 |
|
result = new wchar_t[n+1]; |
46 |
|
if (!result) |
47 |
|
BUG (0); |
48 |
|
|
49 |
|
n = MultiByteToWideChar (GetACP (), 0, string, -1, result, n); |
50 |
|
if (n < 0) { |
51 |
|
free_if_alloc (result); |
52 |
|
return NULL; |
53 |
} |
} |
|
|
|
|
return *a == *b? 0 : (toupper (*a) - toupper (*b)); |
|
|
} |
|
54 |
|
|
55 |
int |
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, NULL, 0, NULL, NULL); |
56 |
set_native_charset( const char *newset ) |
if (n < 0) |
57 |
{ |
return NULL; |
58 |
if( !ascii_strcasecmp( newset, "iso-8859-1" ) ) { |
|
59 |
active_charset_name = "iso-8859-1"; |
native = new char[n+1]; |
60 |
no_translation = 0; |
if (!native) |
61 |
active_charset = NULL; |
BUG (0); |
62 |
} |
|
63 |
else if( !ascii_strcasecmp( newset, "iso-8859-2" ) ) { |
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, native, n, NULL, NULL); |
64 |
active_charset_name = "iso-8859-2"; |
if (n < 0) { |
65 |
no_translation = 0; |
free_if_alloc (result); |
66 |
active_charset = latin2_unicode; |
return NULL; |
67 |
} |
} |
|
else if( !ascii_strcasecmp (newset, "utf8" ) |
|
|
|| !ascii_strcasecmp(newset, "utf-8") ) { |
|
|
active_charset_name = "utf-8"; |
|
|
no_translation = 1; |
|
|
active_charset = NULL; |
|
|
} |
|
|
else |
|
|
return WPTERR_GENERAL; |
|
|
|
|
|
return 0; |
|
|
} |
|
68 |
|
|
69 |
const char* |
free_if_alloc (result); |
70 |
get_native_charset( void ) |
//native[n] = '\0'; |
71 |
{ |
return native; |
|
return active_charset_name; |
|
72 |
} |
} |
73 |
|
|
74 |
|
|
75 |
/**************** |
/** |
76 |
* Convert string, which is in native encoding to UTF8 and return the |
* Converts the given string, which is encoded in UTF-8, |
77 |
* new allocated UTF8 string. |
* into the locale setting. |
78 |
*/ |
*/ |
79 |
char * |
char* |
80 |
native_to_utf8( const char *string ) |
utf8_to_native (const char *string) |
|
{ |
|
|
const byte *s; |
|
|
char *buffer; |
|
|
byte *p; |
|
|
size_t length=0; |
|
|
|
|
|
if (no_translation) |
|
|
buffer = strdup( string ); |
|
|
else if( active_charset ) { |
|
|
for(s=(byte*)string; *s; s++ ) { |
|
|
length++; |
|
|
if( *s & 0x80 ) |
|
|
length += 2; /* we may need 3 bytes */ |
|
|
} |
|
|
buffer = (char *)malloc( length + 1 ); |
|
|
for(p=(byte *)buffer, s=(byte *)string; *s; s++ ) { |
|
|
if( *s & 0x80 ) { |
|
|
WORD val = active_charset[ *s & 0x7f ]; |
|
|
if( val < 0x0800 ) { |
|
|
*p++ = 0xc0 | ( (val >> 6) & 0x1f ); |
|
|
*p++ = 0x80 | ( val & 0x3f ); |
|
|
} |
|
|
else { |
|
|
*p++ = 0xe0 | ( (val >> 12) & 0x0f ); |
|
|
*p++ = 0x80 | ( (val >> 6) & 0x3f ); |
|
|
*p++ = 0x80 | ( val & 0x3f ); |
|
|
} |
|
|
} |
|
|
else |
|
|
*p++ = *s; |
|
|
} |
|
|
*p = 0; |
|
|
} |
|
|
else { |
|
|
for(s=(byte*)string; *s; s++ ) { |
|
|
length++; |
|
|
if( *s & 0x80 ) |
|
|
length++; |
|
|
} |
|
|
buffer = (char*)malloc( length + 1 ); |
|
|
for(p=(byte*)buffer, s=(byte*)string; *s; s++ ) { |
|
|
if( *s & 0x80 ) { |
|
|
*p++ = 0xc0 | ((*s >> 6) & 3); |
|
|
*p++ = 0x80 | ( *s & 0x3f ); |
|
|
} |
|
|
else |
|
|
*p++ = *s; |
|
|
} |
|
|
*p = 0; |
|
|
} |
|
|
|
|
|
return buffer; |
|
|
} /* native_to_utf8 */ |
|
|
|
|
|
/**************** |
|
|
* Convert string, which is in UTF8 to native encoding. illegal |
|
|
* encodings by some "\xnn" and quote all control characters. A |
|
|
* character with value DELIM will always be quoted, it must be a |
|
|
* vanilla ASCII character. |
|
|
*/ |
|
|
char * |
|
|
utf8_to_native( const char *string, size_t length, int delim ) |
|
81 |
{ |
{ |
82 |
int nleft; |
wchar_t *result; |
83 |
int i; |
char *native; |
84 |
byte encbuf[8]; |
int n; |
85 |
int encidx; |
|
86 |
const byte *s; |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
87 |
size_t n; |
if (n < 0) |
88 |
byte *buffer = NULL, *p = NULL; |
return NULL; |
89 |
unsigned long val = 0; |
|
90 |
size_t slen; |
result = new wchar_t[n+1]; |
91 |
int resync = 0; |
if (!result) |
92 |
|
BUG (0); |
93 |
/* 1. pass (p==NULL): count the extended utf-8 characters */ |
|
94 |
/* 2. pass (p!=NULL): create string */ |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n); |
95 |
for( ;; ) { |
if (n < 0) { |
96 |
for( slen=length, nleft=encidx=0, n=0, s=(byte*)string; slen; s++, slen-- ) { |
free_if_alloc (result); |
97 |
if( resync ) { |
return NULL; |
|
if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) { |
|
|
/* still invalid */ |
|
|
if( p ) { |
|
|
sprintf((char*)p, "\\x%02x", *s ); |
|
|
p += 4; |
|
|
} |
|
|
n += 4; |
|
|
continue; |
|
|
} |
|
|
resync = 0; |
|
|
} |
|
|
if( !nleft ) { |
|
|
if( !(*s & 0x80) ) { /* plain ascii */ |
|
|
if( *s < 0x20 || *s == 0x7f || *s == delim) { |
|
|
n++; |
|
|
if( p ) |
|
|
*p++ = '\\'; |
|
|
switch( *s ) { |
|
|
case '\n': n++; if( p ) *p++ = 'n'; break; |
|
|
case '\r': n++; if( p ) *p++ = 'r'; break; |
|
|
case '\f': n++; if( p ) *p++ = 'f'; break; |
|
|
case '\v': n++; if( p ) *p++ = 'v'; break; |
|
|
case '\b': n++; if( p ) *p++ = 'b'; break; |
|
|
case 0 : n++; if( p ) *p++ = '0'; break; |
|
|
default: |
|
|
n += 3; |
|
|
if ( p ) { |
|
|
sprintf( (char*)p, "x%02x", *s ); |
|
|
p += 3; |
|
|
} |
|
|
break; |
|
|
} |
|
|
} |
|
|
else { |
|
|
if( p ) *p++ = *s; |
|
|
n++; |
|
|
} |
|
|
} |
|
|
else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */ |
|
|
val = *s & 0x1f; |
|
|
nleft = 1; |
|
|
encidx = 0; |
|
|
encbuf[encidx++] = *s; |
|
|
} |
|
|
else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */ |
|
|
val = *s & 0x0f; |
|
|
nleft = 2; |
|
|
encidx = 0; |
|
|
encbuf[encidx++] = *s; |
|
|
} |
|
|
else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */ |
|
|
val = *s & 0x07; |
|
|
nleft = 3; |
|
|
encidx = 0; |
|
|
encbuf[encidx++] = *s; |
|
|
} |
|
|
else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */ |
|
|
val = *s & 0x03; |
|
|
nleft = 4; |
|
|
encidx = 0; |
|
|
encbuf[encidx++] = *s; |
|
|
} |
|
|
else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */ |
|
|
val = *s & 0x01; |
|
|
nleft = 5; |
|
|
encidx = 0; |
|
|
encbuf[encidx++] = *s; |
|
|
} |
|
|
else { /* invalid encoding: print as \xnn */ |
|
|
if( p ) { |
|
|
sprintf((char*)p, "\\x%02x", *s ); |
|
|
p += 4; |
|
|
} |
|
|
n += 4; |
|
|
resync = 1; |
|
|
} |
|
|
} |
|
|
else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */ |
|
|
if( p ) { |
|
|
for(i=0; i < encidx; i++ ) { |
|
|
sprintf((char*)p, "\\x%02x", encbuf[i] ); |
|
|
p += 4; |
|
|
} |
|
|
sprintf((char*)p, "\\x%02x", *s ); |
|
|
p += 4; |
|
|
} |
|
|
n += 4 + 4*encidx; |
|
|
nleft = 0; |
|
|
encidx = 0; |
|
|
resync = 1; |
|
|
} |
|
|
else { |
|
|
encbuf[encidx++] = *s; |
|
|
val <<= 6; |
|
|
val |= *s & 0x3f; |
|
|
if( !--nleft ) { /* ready */ |
|
|
if (no_translation) { |
|
|
if( p ) { |
|
|
for(i=0; i < encidx; i++ ) |
|
|
*p++ = encbuf[i]; |
|
|
} |
|
|
n += encidx; |
|
|
encidx = 0; |
|
|
} |
|
|
else if( active_charset ) { /* table lookup */ |
|
|
for(i=0; i < 128; i++ ) { |
|
|
if( active_charset[i] == val ) |
|
|
break; |
|
|
} |
|
|
if( i < 128 ) { /* we can print this one */ |
|
|
if( p ) *p++ = i+128; |
|
|
n++; |
|
|
} |
|
|
else { /* we do not have a translation: print utf8 */ |
|
|
if( p ) { |
|
|
for(i=0; i < encidx; i++ ) { |
|
|
sprintf((char*)p, "\\x%02x", encbuf[i] ); |
|
|
p += 4; |
|
|
} |
|
|
} |
|
|
n += encidx*4; |
|
|
encidx = 0; |
|
|
} |
|
|
} |
|
|
else { /* native set */ |
|
|
if( val >= 0x80 && val < 256 ) { |
|
|
n++; /* we can simply print this character */ |
|
|
if( p ) *p++ = val; |
|
|
} |
|
|
else { /* we do not have a translation: print utf8 */ |
|
|
if( p ) { |
|
|
for(i=0; i < encidx; i++ ) { |
|
|
sprintf((char*)p, "\\x%02x", encbuf[i] ); |
|
|
p += 4; |
|
|
} |
|
|
} |
|
|
n += encidx*4; |
|
|
encidx = 0; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
if( !buffer ) { /* allocate the buffer after the first pass */ |
|
|
buffer = p = (byte *)malloc( n + 1 ); |
|
|
} |
|
|
else { |
|
|
*p = 0; /* make a string */ |
|
|
return (char*)buffer; |
|
|
} |
|
98 |
} |
} |
|
} |
|
99 |
|
|
100 |
|
n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL); |
101 |
static void |
if (n < 0) |
102 |
conv_charset (byte *string, size_t size, int what) |
return NULL; |
103 |
{ |
|
104 |
int i; |
native = new char[n+1]; |
105 |
|
if (!native) |
106 |
if( what == 0 ) { |
BUG (0); |
107 |
for( i = 0; i < size; i++, string++ ) { |
|
108 |
switch( *string ) { |
n = WideCharToMultiByte (GetACP (), 0, result, -1, native, n, NULL, NULL); |
109 |
case 0xa0: *string = 0xff; break; /* nobreakspace */ |
if (n < 0) { |
110 |
case 0xa1: *string = 0xad; break; /* exclamdown */ |
free_if_alloc (result); |
111 |
case 0xa2: *string = 0xbd; break; /* cent */ |
return NULL; |
|
case 0xa3: *string = 0x9c; break; /* sterling */ |
|
|
case 0xa4: *string = 0xcf; break; /* currency */ |
|
|
case 0xa5: *string = 0xbe; break; /* yen */ |
|
|
case 0xa6: *string = 0xdd; break; /* brokenbar */ |
|
|
case 0xa7: *string = 0xf5; break; /* section */ |
|
|
case 0xa8: *string = 0xf9; break; /* diaeresis */ |
|
|
case 0xa9: *string = 0xb8; break; /* copyright */ |
|
|
case 0xaa: *string = 0xa6; break; /* ordfeminine */ |
|
|
case 0xab: *string = 0xae; break; /* guillemotleft */ |
|
|
case 0xac: *string = 0xaa; break; /* notsign */ |
|
|
case 0xad: *string = 0xf0; break; /* hyphen */ |
|
|
case 0xae: *string = 0xa9; break; /* registered */ |
|
|
case 0xaf: *string = 0xee; break; /* macron */ |
|
|
case 0xb0: *string = 0xf8; break; /* degree */ |
|
|
case 0xb1: *string = 0xf1; break; /* plusminus */ |
|
|
case 0xb2: *string = 0xfd; break; /* twosuperior */ |
|
|
case 0xb3: *string = 0xfc; break; /* threesuperior */ |
|
|
case 0xb4: *string = 0xef; break; /* acute */ |
|
|
case 0xb5: *string = 0xe6; break; /* mu */ |
|
|
case 0xb6: *string = 0xf4; break; /* paragraph */ |
|
|
case 0xb7: *string = 0xfa; break; /* periodcentered */ |
|
|
case 0xb8: *string = 0xf7; break; /* cedilla */ |
|
|
case 0xb9: *string = 0xfb; break; /* onesuperior */ |
|
|
case 0xba: *string = 0xa7; break; /* masculine */ |
|
|
case 0xbb: *string = 0xaf; break; /* guillemotright */ |
|
|
case 0xbc: *string = 0xac; break; /* onequarter */ |
|
|
case 0xbd: *string = 0xab; break; /* onehalf */ |
|
|
case 0xbe: *string = 0xf3; break; /* threequarters */ |
|
|
case 0xbf: *string = 0xa8; break; /* questiondown */ |
|
|
case 0xc0: *string = 0xb7; break; /* Agrave */ |
|
|
case 0xc1: *string = 0xb5; break; /* Aacute */ |
|
|
case 0xc2: *string = 0xb6; break; /* Acircumflex */ |
|
|
case 0xc3: *string = 0xc7; break; /* Atilde */ |
|
|
case 0xc4: *string = 0x8e; break; /* Adiaeresis */ |
|
|
case 0xc5: *string = 0x8f; break; /* Aring */ |
|
|
case 0xc6: *string = 0x92; break; /* AE */ |
|
|
case 0xc7: *string = 0x80; break; /* Ccedilla */ |
|
|
case 0xc8: *string = 0xd4; break; /* Egrave */ |
|
|
case 0xc9: *string = 0x90; break; /* Eacute */ |
|
|
case 0xca: *string = 0xd2; break; /* Ecircumflex */ |
|
|
case 0xcb: *string = 0xd3; break; /* Ediaeresis */ |
|
|
case 0xcc: *string = 0xde; break; /* Igrave */ |
|
|
case 0xcd: *string = 0xd6; break; /* Iacute */ |
|
|
case 0xce: *string = 0xd7; break; /* Icircumflex */ |
|
|
case 0xcf: *string = 0xd8; break; /* Idiaeresis */ |
|
|
case 0xd0: *string = 0xd1; break; /* Eth */ |
|
|
case 0xd1: *string = 0xa5; break; /* Ntilde */ |
|
|
case 0xd2: *string = 0xe3; break; /* Ograve */ |
|
|
case 0xd3: *string = 0xe0; break; /* Oacute */ |
|
|
case 0xd4: *string = 0xe2; break; /* Ocircumflex */ |
|
|
case 0xd5: *string = 0xe5; break; /* Otilde */ |
|
|
case 0xd6: *string = 0x99; break; /* Odiaeresis */ |
|
|
case 0xd7: *string = 0x9e; break; /* multiply */ |
|
|
case 0xd8: *string = 0x9d; break; /* Ooblique */ |
|
|
case 0xd9: *string = 0xeb; break; /* Ugrave */ |
|
|
case 0xda: *string = 0xe9; break; /* Uacute */ |
|
|
case 0xdb: *string = 0xea; break; /* Ucircumflex */ |
|
|
case 0xdc: *string = 0x9a; break; /* Udiaeresis */ |
|
|
case 0xdd: *string = 0xed; break; /* Yacute */ |
|
|
case 0xde: *string = 0xe8; break; /* Thorn */ |
|
|
case 0xdf: *string = 0xe1; break; /* ssharp */ |
|
|
case 0xe0: *string = 0x85; break; /* agrave */ |
|
|
case 0xe1: *string = 0xa0; break; /* aacute */ |
|
|
case 0xe2: *string = 0x83; break; /* acircumflex */ |
|
|
case 0xe3: *string = 0xc6; break; /* atilde */ |
|
|
case 0xe4: *string = 0x84; break; /* adiaeresis */ |
|
|
case 0xe5: *string = 0x86; break; /* aring */ |
|
|
case 0xe6: *string = 0x91; break; /* ae */ |
|
|
case 0xe7: *string = 0x87; break; /* ccedilla */ |
|
|
case 0xe8: *string = 0x8a; break; /* egrave */ |
|
|
case 0xe9: *string = 0x82; break; /* eacute */ |
|
|
case 0xea: *string = 0x88; break; /* ecircumflex */ |
|
|
case 0xeb: *string = 0x89; break; /* ediaeresis */ |
|
|
case 0xec: *string = 0x8d; break; /* igrave */ |
|
|
case 0xed: *string = 0xa1; break; /* iacute */ |
|
|
case 0xee: *string = 0x8c; break; /* icircumflex */ |
|
|
case 0xef: *string = 0x8b; break; /* idiaeresis */ |
|
|
case 0xf0: *string = 0xd0; break; /* eth */ |
|
|
case 0xf1: *string = 0xa4; break; /* ntilde */ |
|
|
case 0xf2: *string = 0x95; break; /* ograve */ |
|
|
case 0xf3: *string = 0xa2; break; /* oacute */ |
|
|
case 0xf4: *string = 0x93; break; /* ocircumflex */ |
|
|
case 0xf5: *string = 0xe4; break; /* otilde */ |
|
|
case 0xf6: *string = 0x94; break; /* odiaeresis */ |
|
|
case 0xf7: *string = 0xf6; break; /* division */ |
|
|
case 0xf8: *string = 0x9b; break; /* oslash */ |
|
|
case 0xf9: *string = 0x97; break; /* ugrave */ |
|
|
case 0xfa: *string = 0xa3; break; /* uacute */ |
|
|
case 0xfb: *string = 0x96; break; /* ucircumflex */ |
|
|
case 0xfc: *string = 0x81; break; /* udiaeresis */ |
|
|
case 0xfd: *string = 0xec; break; /* yacute */ |
|
|
case 0xfe: *string = 0xe7; break; /* thorn */ |
|
|
case 0xff: *string = 0x98; break; /* ydiaeresis */ |
|
|
default : break; |
|
|
} |
|
|
} |
|
112 |
} |
} |
113 |
else { |
|
114 |
for( i = 0; i < size; i++, string++ ) { |
free_if_alloc (result); |
115 |
switch( *string ) { |
//native[n] = '\0'; |
116 |
case 0xff: *string = 0xa0; break; |
return native; |
|
case 0xad: *string = 0xa1; break; |
|
|
case 0xbd: *string = 0xa2; break; |
|
|
case 0x9c: *string = 0xa3; break; |
|
|
case 0xcf: *string = 0xa4; break; |
|
|
case 0xbe: *string = 0xa5; break; |
|
|
case 0xdd: *string = 0xa6; break; |
|
|
case 0xf5: *string = 0xa7; break; |
|
|
case 0xf9: *string = 0xa8; break; |
|
|
case 0xb8: *string = 0xa9; break; |
|
|
case 0xa6: *string = 0xaa; break; |
|
|
case 0xae: *string = 0xab; break; |
|
|
case 0xaa: *string = 0xac; break; |
|
|
case 0xf0: *string = 0xad; break; |
|
|
case 0xa9: *string = 0xae; break; |
|
|
case 0xee: *string = 0xaf; break; |
|
|
case 0xf8: *string = 0xb0; break; |
|
|
case 0xf1: *string = 0xb1; break; |
|
|
case 0xfd: *string = 0xb2; break; |
|
|
case 0xfc: *string = 0xb3; break; |
|
|
case 0xef: *string = 0xb4; break; |
|
|
case 0xe6: *string = 0xb5; break; |
|
|
case 0xf4: *string = 0xb6; break; |
|
|
case 0xfa: *string = 0xb7; break; |
|
|
case 0xf7: *string = 0xb8; break; |
|
|
case 0xfb: *string = 0xb9; break; |
|
|
case 0xa7: *string = 0xba; break; |
|
|
case 0xaf: *string = 0xbb; break; |
|
|
case 0xac: *string = 0xbc; break; |
|
|
case 0xab: *string = 0xbd; break; |
|
|
case 0xf3: *string = 0xbe; break; |
|
|
case 0xa8: *string = 0xbf; break; |
|
|
case 0xb7: *string = 0xc0; break; |
|
|
case 0xb5: *string = 0xc1; break; |
|
|
case 0xb6: *string = 0xc2; break; |
|
|
case 0xc7: *string = 0xc3; break; |
|
|
case 0x8e: *string = 0xc4; break; |
|
|
case 0x8f: *string = 0xc5; break; |
|
|
case 0x92: *string = 0xc6; break; |
|
|
case 0x80: *string = 0xc7; break; |
|
|
case 0xd4: *string = 0xc8; break; |
|
|
case 0x90: *string = 0xc9; break; |
|
|
case 0xd2: *string = 0xca; break; |
|
|
case 0xd3: *string = 0xcb; break; |
|
|
case 0xde: *string = 0xcc; break; |
|
|
case 0xd6: *string = 0xcd; break; |
|
|
case 0xd7: *string = 0xce; break; |
|
|
case 0xd8: *string = 0xcf; break; |
|
|
case 0xd1: *string = 0xd0; break; |
|
|
case 0xa5: *string = 0xd1; break; |
|
|
case 0xe3: *string = 0xd2; break; |
|
|
case 0xe0: *string = 0xd3; break; |
|
|
case 0xe2: *string = 0xd4; break; |
|
|
case 0xe5: *string = 0xd5; break; |
|
|
case 0x99: *string = 0xd6; break; |
|
|
case 0x9e: *string = 0xd7; break; |
|
|
case 0x9d: *string = 0xd8; break; |
|
|
case 0xeb: *string = 0xd9; break; |
|
|
case 0xe9: *string = 0xda; break; |
|
|
case 0xea: *string = 0xdb; break; |
|
|
case 0x9a: *string = 0xdc; break; |
|
|
case 0xed: *string = 0xdd; break; |
|
|
case 0xe8: *string = 0xde; break; |
|
|
case 0xe1: *string = 0xdf; break; |
|
|
case 0x85: *string = 0xe0; break; |
|
|
case 0xa0: *string = 0xe1; break; |
|
|
case 0x83: *string = 0xe2; break; |
|
|
case 0xc6: *string = 0xe3; break; |
|
|
case 0x84: *string = 0xe4; break; |
|
|
case 0x86: *string = 0xe5; break; |
|
|
case 0x91: *string = 0xe6; break; |
|
|
case 0x87: *string = 0xe7; break; |
|
|
case 0x8a: *string = 0xe8; break; |
|
|
case 0x82: *string = 0xe9; break; |
|
|
case 0x88: *string = 0xea; break; |
|
|
case 0x89: *string = 0xeb; break; |
|
|
case 0x8d: *string = 0xec; break; |
|
|
case 0xa1: *string = 0xed; break; |
|
|
case 0x8c: *string = 0xee; break; |
|
|
case 0x8b: *string = 0xef; break; |
|
|
case 0xd0: *string = 0xf0; break; |
|
|
case 0xa4: *string = 0xf1; break; |
|
|
case 0x95: *string = 0xf2; break; |
|
|
case 0xa2: *string = 0xf3; break; |
|
|
case 0x93: *string = 0xf4; break; |
|
|
case 0xe4: *string = 0xf5; break; |
|
|
case 0x94: *string = 0xf6; break; |
|
|
case 0xf6: *string = 0xf7; break; |
|
|
case 0x9b: *string = 0xf8; break; |
|
|
case 0x97: *string = 0xf9; break; |
|
|
case 0xa3: *string = 0xfa; break; |
|
|
case 0x96: *string = 0xfb; break; |
|
|
case 0x81: *string = 0xfc; break; |
|
|
case 0xec: *string = 0xfd; break; |
|
|
case 0xe7: *string = 0xfe; break; |
|
|
case 0x98: *string = 0xff; break; |
|
|
default : break; |
|
|
} |
|
|
} |
|
|
} |
|
|
} /* conv_charset */ |
|
|
|
|
|
|
|
|
/* XXX: the conv_charset() call fails when the user-id was created |
|
|
with iso-8859-1 but it is assumed that CP850 (gpg console) is used. */ |
|
|
|
|
|
char* |
|
|
utf8_to_wincp (const char * s, size_t len) |
|
|
{ |
|
|
char *decs; |
|
|
decs = utf8_to_native (s, len, 0); |
|
|
conv_charset ((byte *)decs, strlen (decs), 1); |
|
|
return decs; |
|
|
} |
|
|
|
|
|
|
|
|
char* |
|
|
wincp_to_utf8 (const char * s, size_t len) |
|
|
{ |
|
|
char * encs; |
|
|
conv_charset ((byte *)s, len, 0); |
|
|
encs = native_to_utf8 (s); |
|
|
return encs; |
|
117 |
} |
} |
118 |
|
|
119 |
|
|
120 |
|
/** |
121 |
|
* Returns -1 if the given string contains any 8-bit characters. |
122 |
|
* This is a helper to decide when to use UTF8 encoding. |
123 |
|
*/ |
124 |
int |
int |
125 |
is_8bit_string (const char * str) |
is_8bit_string (const char *str) |
126 |
{ |
{ |
127 |
size_t i; |
for (size_t i = 0; i < strlen (str); i++) { |
|
|
|
|
for (i = 0; i < strlen (str); i++) { |
|
128 |
if (str[i] & 0x80) |
if (str[i] & 0x80) |
129 |
return -1; |
return -1; |
130 |
} |
} |