1 |
/* wptUTF8.cpp - UTF8 conversation |
/* wptUTF8.cpp - UTF8 conversation |
|
* Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc. |
|
2 |
* Copyright (C) 2002, 2004, 2005, 2006 Timo Schulz |
* Copyright (C) 2002, 2004, 2005, 2006 Timo Schulz |
3 |
* |
* |
4 |
* This file is part of WinPT. |
* This file is part of WinPT. |
32 |
#include "wptErrors.h" |
#include "wptErrors.h" |
33 |
|
|
34 |
|
|
35 |
/* convert latin1 string @string into utf8. */ |
char* |
36 |
char * |
native_to_utf8 (const char *string) |
|
native_to_utf8( const char *string ) |
|
37 |
{ |
{ |
38 |
const byte *s; |
wchar_t *result; |
39 |
char *buffer; |
char *native; |
40 |
byte *p; |
int n; |
41 |
size_t length=0; |
|
42 |
|
n = MultiByteToWideChar (GetACP (), 0, string, -1, NULL, 0); |
43 |
for (s=(byte*)string; *s; s++) { |
if (n < 0) |
44 |
length++; |
return NULL; |
45 |
if (*s & 0x80) |
|
46 |
length++; |
result = new wchar_t[n+1]; |
47 |
|
if (!result) |
48 |
|
BUG (0); |
49 |
|
|
50 |
|
n = MultiByteToWideChar (GetACP (), 0, string, -1, result, n); |
51 |
|
if (n < 0) { |
52 |
|
free_if_alloc (result); |
53 |
|
return NULL; |
54 |
} |
} |
|
buffer = (char*)malloc (length + 1); |
|
|
for (p = (byte*)buffer, s=(byte*)string; *s; s++) { |
|
|
if (*s & 0x80) { |
|
|
*p++ = 0xc0 | ((*s >> 6) & 3); |
|
|
*p++ = 0x80 | ( *s & 0x3f ); |
|
|
} |
|
|
else |
|
|
*p++ = *s; |
|
|
} |
|
|
*p = 0; |
|
|
return buffer; |
|
|
} |
|
55 |
|
|
56 |
|
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, NULL, 0, NULL, NULL); |
57 |
|
if (n < 0) |
58 |
|
return NULL; |
59 |
|
|
60 |
|
native = new char[n+1]; |
61 |
|
if (!native) |
62 |
|
BUG (0); |
63 |
|
|
64 |
|
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, native, n, NULL, NULL); |
65 |
|
if (n < 0) { |
66 |
|
free_if_alloc (result); |
67 |
|
return NULL; |
68 |
|
} |
69 |
|
|
70 |
|
free_if_alloc (result); |
71 |
|
return native; |
72 |
|
} |
73 |
|
|
74 |
|
|
75 |
/* Convert utf8 string @str to native CP. */ |
/* Convert utf8 string @str to native CP. */ |
76 |
static char* |
char* |
77 |
utf8_to_native (const char *string) |
utf8_to_native (const char *string) |
78 |
{ |
{ |
79 |
wchar_t *result; |
wchar_t *result; |
80 |
char *native; |
char *native; |
81 |
int n; |
int n; |
82 |
|
|
|
/* Convert utf8 to unicode. */ |
|
83 |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
84 |
if (n < 0) |
if (n < 0) |
85 |
return NULL; |
return NULL; |
94 |
return NULL; |
return NULL; |
95 |
} |
} |
96 |
|
|
|
/* Convert wide char into native char. */ |
|
|
/* |
|
97 |
n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL); |
n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL); |
98 |
if (n < 0) |
if (n < 0) |
99 |
return NULL; |
return NULL; |
|
*/ |
|
|
n = wcstombs (NULL, result, wcslen (result)); |
|
|
if (n < 0) |
|
|
return NULL; |
|
100 |
|
|
101 |
native = (char*)malloc (n+1); |
native = (char*)malloc (n+1); |
102 |
if (!native) |
if (!native) |
103 |
BUG (0); |
BUG (0); |
104 |
|
|
105 |
/* |
n = WideCharToMultiByte (GetACP (), 0, result, -1, native, n, NULL, NULL); |
|
n = WideCharToMultiByte (CP_ACP, 0, string, -1, result, n, NULL, NULL); |
|
|
if (n < 0) { |
|
|
free (result); |
|
|
return NULL; |
|
|
} |
|
|
*/ |
|
|
n = wcstombs (native, result, -1); |
|
106 |
if (n < 0) { |
if (n < 0) { |
107 |
free (result); |
free (result); |
108 |
return NULL; |
return NULL; |
109 |
} |
} |
110 |
|
|
111 |
|
free (result); |
112 |
return native; |
return native; |
113 |
} |
} |
114 |
|
|
|
/* CP850 -> CP1251 */ |
|
|
static void |
|
|
conv_charset (byte *string, size_t size, int what) |
|
|
{ |
|
|
size_t i; |
|
|
|
|
|
if( what == 0 ) { |
|
|
for( i = 0; i < size; i++, string++ ) { |
|
|
switch( *string ) { |
|
|
case 0xa0: *string = 0xff; break; /* nobreakspace */ |
|
|
case 0xa1: *string = 0xad; break; /* exclamdown */ |
|
|
case 0xa2: *string = 0xbd; break; /* cent */ |
|
|
case 0xa3: *string = 0x9c; break; /* sterling */ |
|
|
case 0xa4: *string = 0xcf; break; /* currency */ |
|
|
case 0xa5: *string = 0xbe; break; /* yen */ |
|
|
case 0xa6: *string = 0xdd; break; /* brokenbar */ |
|
|
case 0xa7: *string = 0xf5; break; /* section */ |
|
|
case 0xa8: *string = 0xf9; break; /* diaeresis */ |
|
|
case 0xa9: *string = 0xb8; break; /* copyright */ |
|
|
case 0xaa: *string = 0xa6; break; /* ordfeminine */ |
|
|
case 0xab: *string = 0xae; break; /* guillemotleft */ |
|
|
case 0xac: *string = 0xaa; break; /* notsign */ |
|
|
case 0xad: *string = 0xf0; break; /* hyphen */ |
|
|
case 0xae: *string = 0xa9; break; /* registered */ |
|
|
case 0xaf: *string = 0xee; break; /* macron */ |
|
|
case 0xb0: *string = 0xf8; break; /* degree */ |
|
|
case 0xb1: *string = 0xf1; break; /* plusminus */ |
|
|
case 0xb2: *string = 0xfd; break; /* twosuperior */ |
|
|
case 0xb3: *string = 0xfc; break; /* threesuperior */ |
|
|
case 0xb4: *string = 0xef; break; /* acute */ |
|
|
case 0xb5: *string = 0xe6; break; /* mu */ |
|
|
case 0xb6: *string = 0xf4; break; /* paragraph */ |
|
|
case 0xb7: *string = 0xfa; break; /* periodcentered */ |
|
|
case 0xb8: *string = 0xf7; break; /* cedilla */ |
|
|
case 0xb9: *string = 0xfb; break; /* onesuperior */ |
|
|
case 0xba: *string = 0xa7; break; /* masculine */ |
|
|
case 0xbb: *string = 0xaf; break; /* guillemotright */ |
|
|
case 0xbc: *string = 0xac; break; /* onequarter */ |
|
|
case 0xbd: *string = 0xab; break; /* onehalf */ |
|
|
case 0xbe: *string = 0xf3; break; /* threequarters */ |
|
|
case 0xbf: *string = 0xa8; break; /* questiondown */ |
|
|
case 0xc0: *string = 0xb7; break; /* Agrave */ |
|
|
case 0xc1: *string = 0xb5; break; /* Aacute */ |
|
|
case 0xc2: *string = 0xb6; break; /* Acircumflex */ |
|
|
case 0xc3: *string = 0xc7; break; /* Atilde */ |
|
|
case 0xc4: *string = 0x8e; break; /* Adiaeresis */ |
|
|
case 0xc5: *string = 0x8f; break; /* Aring */ |
|
|
case 0xc6: *string = 0x92; break; /* AE */ |
|
|
case 0xc7: *string = 0x80; break; /* Ccedilla */ |
|
|
case 0xc8: *string = 0xd4; break; /* Egrave */ |
|
|
case 0xc9: *string = 0x90; break; /* Eacute */ |
|
|
case 0xca: *string = 0xd2; break; /* Ecircumflex */ |
|
|
case 0xcb: *string = 0xd3; break; /* Ediaeresis */ |
|
|
case 0xcc: *string = 0xde; break; /* Igrave */ |
|
|
case 0xcd: *string = 0xd6; break; /* Iacute */ |
|
|
case 0xce: *string = 0xd7; break; /* Icircumflex */ |
|
|
case 0xcf: *string = 0xd8; break; /* Idiaeresis */ |
|
|
case 0xd0: *string = 0xd1; break; /* Eth */ |
|
|
case 0xd1: *string = 0xa5; break; /* Ntilde */ |
|
|
case 0xd2: *string = 0xe3; break; /* Ograve */ |
|
|
case 0xd3: *string = 0xe0; break; /* Oacute */ |
|
|
case 0xd4: *string = 0xe2; break; /* Ocircumflex */ |
|
|
case 0xd5: *string = 0xe5; break; /* Otilde */ |
|
|
case 0xd6: *string = 0x99; break; /* Odiaeresis */ |
|
|
case 0xd7: *string = 0x9e; break; /* multiply */ |
|
|
case 0xd8: *string = 0x9d; break; /* Ooblique */ |
|
|
case 0xd9: *string = 0xeb; break; /* Ugrave */ |
|
|
case 0xda: *string = 0xe9; break; /* Uacute */ |
|
|
case 0xdb: *string = 0xea; break; /* Ucircumflex */ |
|
|
case 0xdc: *string = 0x9a; break; /* Udiaeresis */ |
|
|
case 0xdd: *string = 0xed; break; /* Yacute */ |
|
|
case 0xde: *string = 0xe8; break; /* Thorn */ |
|
|
case 0xdf: *string = 0xe1; break; /* ssharp */ |
|
|
case 0xe0: *string = 0x85; break; /* agrave */ |
|
|
case 0xe1: *string = 0xa0; break; /* aacute */ |
|
|
case 0xe2: *string = 0x83; break; /* acircumflex */ |
|
|
case 0xe3: *string = 0xc6; break; /* atilde */ |
|
|
case 0xe4: *string = 0x84; break; /* adiaeresis */ |
|
|
case 0xe5: *string = 0x86; break; /* aring */ |
|
|
case 0xe6: *string = 0x91; break; /* ae */ |
|
|
case 0xe7: *string = 0x87; break; /* ccedilla */ |
|
|
case 0xe8: *string = 0x8a; break; /* egrave */ |
|
|
case 0xe9: *string = 0x82; break; /* eacute */ |
|
|
case 0xea: *string = 0x88; break; /* ecircumflex */ |
|
|
case 0xeb: *string = 0x89; break; /* ediaeresis */ |
|
|
case 0xec: *string = 0x8d; break; /* igrave */ |
|
|
case 0xed: *string = 0xa1; break; /* iacute */ |
|
|
case 0xee: *string = 0x8c; break; /* icircumflex */ |
|
|
case 0xef: *string = 0x8b; break; /* idiaeresis */ |
|
|
case 0xf0: *string = 0xd0; break; /* eth */ |
|
|
case 0xf1: *string = 0xa4; break; /* ntilde */ |
|
|
case 0xf2: *string = 0x95; break; /* ograve */ |
|
|
case 0xf3: *string = 0xa2; break; /* oacute */ |
|
|
case 0xf4: *string = 0x93; break; /* ocircumflex */ |
|
|
case 0xf5: *string = 0xe4; break; /* otilde */ |
|
|
case 0xf6: *string = 0x94; break; /* odiaeresis */ |
|
|
case 0xf7: *string = 0xf6; break; /* division */ |
|
|
case 0xf8: *string = 0x9b; break; /* oslash */ |
|
|
case 0xf9: *string = 0x97; break; /* ugrave */ |
|
|
case 0xfa: *string = 0xa3; break; /* uacute */ |
|
|
case 0xfb: *string = 0x96; break; /* ucircumflex */ |
|
|
case 0xfc: *string = 0x81; break; /* udiaeresis */ |
|
|
case 0xfd: *string = 0xec; break; /* yacute */ |
|
|
case 0xfe: *string = 0xe7; break; /* thorn */ |
|
|
case 0xff: *string = 0x98; break; /* ydiaeresis */ |
|
|
default : break; |
|
|
} |
|
|
} |
|
|
} |
|
|
else { |
|
|
for( i = 0; i < size; i++, string++ ) { |
|
|
switch( *string ) { |
|
|
case 0xff: *string = 0xa0; break; |
|
|
case 0xad: *string = 0xa1; break; |
|
|
case 0xbd: *string = 0xa2; break; |
|
|
case 0x9c: *string = 0xa3; break; |
|
|
case 0xcf: *string = 0xa4; break; |
|
|
case 0xbe: *string = 0xa5; break; |
|
|
case 0xdd: *string = 0xa6; break; |
|
|
case 0xf5: *string = 0xa7; break; |
|
|
case 0xf9: *string = 0xa8; break; |
|
|
case 0xb8: *string = 0xa9; break; |
|
|
case 0xa6: *string = 0xaa; break; |
|
|
case 0xae: *string = 0xab; break; |
|
|
case 0xaa: *string = 0xac; break; |
|
|
case 0xf0: *string = 0xad; break; |
|
|
case 0xa9: *string = 0xae; break; |
|
|
case 0xee: *string = 0xaf; break; |
|
|
case 0xf8: *string = 0xb0; break; |
|
|
case 0xf1: *string = 0xb1; break; |
|
|
case 0xfd: *string = 0xb2; break; |
|
|
case 0xfc: *string = 0xb3; break; |
|
|
case 0xef: *string = 0xb4; break; |
|
|
case 0xe6: *string = 0xb5; break; |
|
|
case 0xf4: *string = 0xb6; break; |
|
|
case 0xfa: *string = 0xb7; break; |
|
|
case 0xf7: *string = 0xb8; break; |
|
|
case 0xfb: *string = 0xb9; break; |
|
|
case 0xa7: *string = 0xba; break; |
|
|
case 0xaf: *string = 0xbb; break; |
|
|
case 0xac: *string = 0xbc; break; |
|
|
case 0xab: *string = 0xbd; break; |
|
|
case 0xf3: *string = 0xbe; break; |
|
|
case 0xa8: *string = 0xbf; break; |
|
|
case 0xb7: *string = 0xc0; break; |
|
|
case 0xb5: *string = 0xc1; break; |
|
|
case 0xb6: *string = 0xc2; break; |
|
|
case 0xc7: *string = 0xc3; break; |
|
|
case 0x8e: *string = 0xc4; break; |
|
|
case 0x8f: *string = 0xc5; break; |
|
|
case 0x92: *string = 0xc6; break; |
|
|
case 0x80: *string = 0xc7; break; |
|
|
case 0xd4: *string = 0xc8; break; |
|
|
case 0x90: *string = 0xc9; break; |
|
|
case 0xd2: *string = 0xca; break; |
|
|
case 0xd3: *string = 0xcb; break; |
|
|
case 0xde: *string = 0xcc; break; |
|
|
case 0xd6: *string = 0xcd; break; |
|
|
case 0xd7: *string = 0xce; break; |
|
|
case 0xd8: *string = 0xcf; break; |
|
|
case 0xd1: *string = 0xd0; break; |
|
|
case 0xa5: *string = 0xd1; break; |
|
|
case 0xe3: *string = 0xd2; break; |
|
|
case 0xe0: *string = 0xd3; break; |
|
|
case 0xe2: *string = 0xd4; break; |
|
|
case 0xe5: *string = 0xd5; break; |
|
|
case 0x99: *string = 0xd6; break; |
|
|
case 0x9e: *string = 0xd7; break; |
|
|
case 0x9d: *string = 0xd8; break; |
|
|
case 0xeb: *string = 0xd9; break; |
|
|
case 0xe9: *string = 0xda; break; |
|
|
case 0xea: *string = 0xdb; break; |
|
|
case 0x9a: *string = 0xdc; break; |
|
|
case 0xed: *string = 0xdd; break; |
|
|
case 0xe8: *string = 0xde; break; |
|
|
case 0xe1: *string = 0xdf; break; |
|
|
case 0x85: *string = 0xe0; break; |
|
|
case 0xa0: *string = 0xe1; break; |
|
|
case 0x83: *string = 0xe2; break; |
|
|
case 0xc6: *string = 0xe3; break; |
|
|
case 0x84: *string = 0xe4; break; |
|
|
case 0x86: *string = 0xe5; break; |
|
|
case 0x91: *string = 0xe6; break; |
|
|
case 0x87: *string = 0xe7; break; |
|
|
case 0x8a: *string = 0xe8; break; |
|
|
case 0x82: *string = 0xe9; break; |
|
|
case 0x88: *string = 0xea; break; |
|
|
case 0x89: *string = 0xeb; break; |
|
|
case 0x8d: *string = 0xec; break; |
|
|
case 0xa1: *string = 0xed; break; |
|
|
case 0x8c: *string = 0xee; break; |
|
|
case 0x8b: *string = 0xef; break; |
|
|
case 0xd0: *string = 0xf0; break; |
|
|
case 0xa4: *string = 0xf1; break; |
|
|
case 0x95: *string = 0xf2; break; |
|
|
case 0xa2: *string = 0xf3; break; |
|
|
case 0x93: *string = 0xf4; break; |
|
|
case 0xe4: *string = 0xf5; break; |
|
|
case 0x94: *string = 0xf6; break; |
|
|
case 0xf6: *string = 0xf7; break; |
|
|
case 0x9b: *string = 0xf8; break; |
|
|
case 0x97: *string = 0xf9; break; |
|
|
case 0xa3: *string = 0xfa; break; |
|
|
case 0x96: *string = 0xfb; break; |
|
|
case 0x81: *string = 0xfc; break; |
|
|
case 0xec: *string = 0xfd; break; |
|
|
case 0xe7: *string = 0xfe; break; |
|
|
case 0x98: *string = 0xff; break; |
|
|
default : break; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
/* XXX: the conv_charset() call fails when the user-id was created |
|
|
with iso-8859-1 but it is assumed that CP850 (gpg console) is used. */ |
|
|
|
|
|
char* |
|
|
utf8_to_wincp (const char * s, size_t len) |
|
|
{ |
|
|
char *decs; |
|
|
decs = utf8_to_native (s); |
|
|
conv_charset ((byte *)decs, strlen (decs), 1); |
|
|
return decs; |
|
|
} |
|
|
|
|
|
|
|
|
char* |
|
|
wincp_to_utf8 (const char * s, size_t len) |
|
|
{ |
|
|
char * encs; |
|
|
conv_charset ((byte *)s, len, 0); |
|
|
encs = native_to_utf8 (s); |
|
|
return encs; |
|
|
} |
|
|
|
|
115 |
|
|
116 |
|
/* Return -1 if the string contains any 8-bit characters. */ |
117 |
int |
int |
118 |
is_8bit_string (const char * str) |
is_8bit_string (const char *str) |
119 |
{ |
{ |
120 |
size_t i; |
size_t i; |
121 |
|
|