1 |
twoaday |
2 |
/* wptUTF8.cpp - UTF8 conversation |
2 |
werner |
36 |
* Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc. |
3 |
twoaday |
185 |
* Copyright (C) 2002, 2004, 2005, 2006 Timo Schulz |
4 |
twoaday |
2 |
* |
5 |
werner |
36 |
* This file is part of WinPT. |
6 |
|
|
* |
7 |
|
|
* WinPT is free software; you can redistribute it and/or modify |
8 |
|
|
* it under the terms of the GNU General Public License as published by |
9 |
|
|
* the Free Software Foundation; either version 2 of the License, or |
10 |
|
|
* (at your option) any later version. |
11 |
|
|
* |
12 |
|
|
* WinPT is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
|
|
* GNU General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU General Public License |
18 |
|
|
* along with WinPT; if not, write to the Free Software Foundation, |
19 |
twoaday |
2 |
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
20 |
|
|
*/ |
21 |
|
|
|
22 |
werner |
36 |
#ifdef HAVE_CONFIG_H |
23 |
|
|
#include <config.h> |
24 |
|
|
#endif |
25 |
|
|
|
26 |
twoaday |
2 |
#include <windows.h> |
27 |
werner |
36 |
#include <stdlib.h> |
28 |
twoaday |
2 |
#include <stdio.h> |
29 |
|
|
#include <string.h> |
30 |
|
|
#include <ctype.h> |
31 |
werner |
36 |
|
32 |
|
|
#include "wptTypes.h" |
33 |
twoaday |
2 |
#include "wptErrors.h" |
34 |
|
|
|
35 |
|
|
|
36 |
twoaday |
185 |
/* convert latin1 string @string into utf8. */ |
37 |
|
|
char * |
38 |
|
|
native_to_utf8( const char *string ) |
39 |
werner |
36 |
{ |
40 |
twoaday |
185 |
const byte *s; |
41 |
|
|
char *buffer; |
42 |
|
|
byte *p; |
43 |
|
|
size_t length=0; |
44 |
werner |
36 |
|
45 |
twoaday |
185 |
for (s=(byte*)string; *s; s++) { |
46 |
|
|
length++; |
47 |
|
|
if (*s & 0x80) |
48 |
|
|
length++; |
49 |
werner |
36 |
} |
50 |
twoaday |
185 |
buffer = (char*)malloc (length + 1); |
51 |
|
|
for (p = (byte*)buffer, s=(byte*)string; *s; s++) { |
52 |
|
|
if (*s & 0x80) { |
53 |
|
|
*p++ = 0xc0 | ((*s >> 6) & 3); |
54 |
|
|
*p++ = 0x80 | ( *s & 0x3f ); |
55 |
|
|
} |
56 |
|
|
else |
57 |
|
|
*p++ = *s; |
58 |
|
|
} |
59 |
|
|
*p = 0; |
60 |
|
|
return buffer; |
61 |
twoaday |
128 |
} |
62 |
werner |
36 |
|
63 |
twoaday |
185 |
|
64 |
|
|
|
65 |
|
|
/* Convert utf8 string @str to native CP. */ |
66 |
|
|
static char* |
67 |
|
|
utf8_to_native (const char *string) |
68 |
twoaday |
2 |
{ |
69 |
twoaday |
185 |
wchar_t *result; |
70 |
|
|
char *native; |
71 |
|
|
int n; |
72 |
twoaday |
2 |
|
73 |
twoaday |
185 |
/* Convert utf8 to unicode. */ |
74 |
|
|
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
75 |
|
|
if (n < 0) |
76 |
|
|
return NULL; |
77 |
twoaday |
2 |
|
78 |
twoaday |
185 |
result = (wchar_t*)malloc ((n+1) * sizeof *result); |
79 |
|
|
if (!result) |
80 |
|
|
BUG (0); |
81 |
twoaday |
2 |
|
82 |
twoaday |
185 |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n); |
83 |
|
|
if (n < 0) { |
84 |
|
|
free (result); |
85 |
|
|
return NULL; |
86 |
|
|
} |
87 |
twoaday |
128 |
|
88 |
twoaday |
185 |
/* Convert wide char into native char. */ |
89 |
|
|
/* |
90 |
|
|
n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL); |
91 |
|
|
if (n < 0) |
92 |
|
|
return NULL; |
93 |
|
|
*/ |
94 |
|
|
n = wcstombs (NULL, result, wcslen (result)); |
95 |
|
|
if (n < 0) |
96 |
|
|
return NULL; |
97 |
twoaday |
2 |
|
98 |
twoaday |
185 |
native = (char*)malloc (n+1); |
99 |
|
|
if (!native) |
100 |
|
|
BUG (0); |
101 |
twoaday |
2 |
|
102 |
twoaday |
185 |
/* |
103 |
|
|
n = WideCharToMultiByte (CP_ACP, 0, string, -1, result, n, NULL, NULL); |
104 |
|
|
if (n < 0) { |
105 |
|
|
free (result); |
106 |
|
|
return NULL; |
107 |
|
|
} |
108 |
|
|
*/ |
109 |
|
|
n = wcstombs (native, result, -1); |
110 |
|
|
if (n < 0) { |
111 |
|
|
free (result); |
112 |
|
|
return NULL; |
113 |
|
|
} |
114 |
werner |
36 |
|
115 |
twoaday |
185 |
return native; |
116 |
twoaday |
2 |
} |
117 |
werner |
36 |
|
118 |
twoaday |
185 |
/* CP850 -> CP1251 */ |
119 |
werner |
36 |
static void |
120 |
|
|
conv_charset (byte *string, size_t size, int what) |
121 |
|
|
{ |
122 |
twoaday |
185 |
size_t i; |
123 |
werner |
36 |
|
124 |
|
|
if( what == 0 ) { |
125 |
|
|
for( i = 0; i < size; i++, string++ ) { |
126 |
|
|
switch( *string ) { |
127 |
|
|
case 0xa0: *string = 0xff; break; /* nobreakspace */ |
128 |
|
|
case 0xa1: *string = 0xad; break; /* exclamdown */ |
129 |
|
|
case 0xa2: *string = 0xbd; break; /* cent */ |
130 |
|
|
case 0xa3: *string = 0x9c; break; /* sterling */ |
131 |
|
|
case 0xa4: *string = 0xcf; break; /* currency */ |
132 |
|
|
case 0xa5: *string = 0xbe; break; /* yen */ |
133 |
|
|
case 0xa6: *string = 0xdd; break; /* brokenbar */ |
134 |
|
|
case 0xa7: *string = 0xf5; break; /* section */ |
135 |
|
|
case 0xa8: *string = 0xf9; break; /* diaeresis */ |
136 |
|
|
case 0xa9: *string = 0xb8; break; /* copyright */ |
137 |
|
|
case 0xaa: *string = 0xa6; break; /* ordfeminine */ |
138 |
|
|
case 0xab: *string = 0xae; break; /* guillemotleft */ |
139 |
|
|
case 0xac: *string = 0xaa; break; /* notsign */ |
140 |
|
|
case 0xad: *string = 0xf0; break; /* hyphen */ |
141 |
|
|
case 0xae: *string = 0xa9; break; /* registered */ |
142 |
|
|
case 0xaf: *string = 0xee; break; /* macron */ |
143 |
|
|
case 0xb0: *string = 0xf8; break; /* degree */ |
144 |
|
|
case 0xb1: *string = 0xf1; break; /* plusminus */ |
145 |
|
|
case 0xb2: *string = 0xfd; break; /* twosuperior */ |
146 |
|
|
case 0xb3: *string = 0xfc; break; /* threesuperior */ |
147 |
|
|
case 0xb4: *string = 0xef; break; /* acute */ |
148 |
|
|
case 0xb5: *string = 0xe6; break; /* mu */ |
149 |
|
|
case 0xb6: *string = 0xf4; break; /* paragraph */ |
150 |
|
|
case 0xb7: *string = 0xfa; break; /* periodcentered */ |
151 |
|
|
case 0xb8: *string = 0xf7; break; /* cedilla */ |
152 |
|
|
case 0xb9: *string = 0xfb; break; /* onesuperior */ |
153 |
|
|
case 0xba: *string = 0xa7; break; /* masculine */ |
154 |
|
|
case 0xbb: *string = 0xaf; break; /* guillemotright */ |
155 |
|
|
case 0xbc: *string = 0xac; break; /* onequarter */ |
156 |
|
|
case 0xbd: *string = 0xab; break; /* onehalf */ |
157 |
|
|
case 0xbe: *string = 0xf3; break; /* threequarters */ |
158 |
|
|
case 0xbf: *string = 0xa8; break; /* questiondown */ |
159 |
|
|
case 0xc0: *string = 0xb7; break; /* Agrave */ |
160 |
|
|
case 0xc1: *string = 0xb5; break; /* Aacute */ |
161 |
|
|
case 0xc2: *string = 0xb6; break; /* Acircumflex */ |
162 |
|
|
case 0xc3: *string = 0xc7; break; /* Atilde */ |
163 |
|
|
case 0xc4: *string = 0x8e; break; /* Adiaeresis */ |
164 |
|
|
case 0xc5: *string = 0x8f; break; /* Aring */ |
165 |
|
|
case 0xc6: *string = 0x92; break; /* AE */ |
166 |
|
|
case 0xc7: *string = 0x80; break; /* Ccedilla */ |
167 |
|
|
case 0xc8: *string = 0xd4; break; /* Egrave */ |
168 |
|
|
case 0xc9: *string = 0x90; break; /* Eacute */ |
169 |
|
|
case 0xca: *string = 0xd2; break; /* Ecircumflex */ |
170 |
|
|
case 0xcb: *string = 0xd3; break; /* Ediaeresis */ |
171 |
|
|
case 0xcc: *string = 0xde; break; /* Igrave */ |
172 |
|
|
case 0xcd: *string = 0xd6; break; /* Iacute */ |
173 |
|
|
case 0xce: *string = 0xd7; break; /* Icircumflex */ |
174 |
|
|
case 0xcf: *string = 0xd8; break; /* Idiaeresis */ |
175 |
|
|
case 0xd0: *string = 0xd1; break; /* Eth */ |
176 |
|
|
case 0xd1: *string = 0xa5; break; /* Ntilde */ |
177 |
|
|
case 0xd2: *string = 0xe3; break; /* Ograve */ |
178 |
|
|
case 0xd3: *string = 0xe0; break; /* Oacute */ |
179 |
|
|
case 0xd4: *string = 0xe2; break; /* Ocircumflex */ |
180 |
|
|
case 0xd5: *string = 0xe5; break; /* Otilde */ |
181 |
|
|
case 0xd6: *string = 0x99; break; /* Odiaeresis */ |
182 |
|
|
case 0xd7: *string = 0x9e; break; /* multiply */ |
183 |
|
|
case 0xd8: *string = 0x9d; break; /* Ooblique */ |
184 |
|
|
case 0xd9: *string = 0xeb; break; /* Ugrave */ |
185 |
|
|
case 0xda: *string = 0xe9; break; /* Uacute */ |
186 |
|
|
case 0xdb: *string = 0xea; break; /* Ucircumflex */ |
187 |
|
|
case 0xdc: *string = 0x9a; break; /* Udiaeresis */ |
188 |
|
|
case 0xdd: *string = 0xed; break; /* Yacute */ |
189 |
|
|
case 0xde: *string = 0xe8; break; /* Thorn */ |
190 |
|
|
case 0xdf: *string = 0xe1; break; /* ssharp */ |
191 |
|
|
case 0xe0: *string = 0x85; break; /* agrave */ |
192 |
|
|
case 0xe1: *string = 0xa0; break; /* aacute */ |
193 |
|
|
case 0xe2: *string = 0x83; break; /* acircumflex */ |
194 |
|
|
case 0xe3: *string = 0xc6; break; /* atilde */ |
195 |
|
|
case 0xe4: *string = 0x84; break; /* adiaeresis */ |
196 |
|
|
case 0xe5: *string = 0x86; break; /* aring */ |
197 |
|
|
case 0xe6: *string = 0x91; break; /* ae */ |
198 |
|
|
case 0xe7: *string = 0x87; break; /* ccedilla */ |
199 |
|
|
case 0xe8: *string = 0x8a; break; /* egrave */ |
200 |
|
|
case 0xe9: *string = 0x82; break; /* eacute */ |
201 |
|
|
case 0xea: *string = 0x88; break; /* ecircumflex */ |
202 |
|
|
case 0xeb: *string = 0x89; break; /* ediaeresis */ |
203 |
|
|
case 0xec: *string = 0x8d; break; /* igrave */ |
204 |
|
|
case 0xed: *string = 0xa1; break; /* iacute */ |
205 |
|
|
case 0xee: *string = 0x8c; break; /* icircumflex */ |
206 |
|
|
case 0xef: *string = 0x8b; break; /* idiaeresis */ |
207 |
|
|
case 0xf0: *string = 0xd0; break; /* eth */ |
208 |
|
|
case 0xf1: *string = 0xa4; break; /* ntilde */ |
209 |
|
|
case 0xf2: *string = 0x95; break; /* ograve */ |
210 |
|
|
case 0xf3: *string = 0xa2; break; /* oacute */ |
211 |
|
|
case 0xf4: *string = 0x93; break; /* ocircumflex */ |
212 |
|
|
case 0xf5: *string = 0xe4; break; /* otilde */ |
213 |
|
|
case 0xf6: *string = 0x94; break; /* odiaeresis */ |
214 |
|
|
case 0xf7: *string = 0xf6; break; /* division */ |
215 |
|
|
case 0xf8: *string = 0x9b; break; /* oslash */ |
216 |
|
|
case 0xf9: *string = 0x97; break; /* ugrave */ |
217 |
|
|
case 0xfa: *string = 0xa3; break; /* uacute */ |
218 |
|
|
case 0xfb: *string = 0x96; break; /* ucircumflex */ |
219 |
|
|
case 0xfc: *string = 0x81; break; /* udiaeresis */ |
220 |
|
|
case 0xfd: *string = 0xec; break; /* yacute */ |
221 |
|
|
case 0xfe: *string = 0xe7; break; /* thorn */ |
222 |
|
|
case 0xff: *string = 0x98; break; /* ydiaeresis */ |
223 |
|
|
default : break; |
224 |
|
|
} |
225 |
|
|
} |
226 |
|
|
} |
227 |
|
|
else { |
228 |
|
|
for( i = 0; i < size; i++, string++ ) { |
229 |
|
|
switch( *string ) { |
230 |
|
|
case 0xff: *string = 0xa0; break; |
231 |
|
|
case 0xad: *string = 0xa1; break; |
232 |
|
|
case 0xbd: *string = 0xa2; break; |
233 |
|
|
case 0x9c: *string = 0xa3; break; |
234 |
|
|
case 0xcf: *string = 0xa4; break; |
235 |
|
|
case 0xbe: *string = 0xa5; break; |
236 |
|
|
case 0xdd: *string = 0xa6; break; |
237 |
|
|
case 0xf5: *string = 0xa7; break; |
238 |
|
|
case 0xf9: *string = 0xa8; break; |
239 |
|
|
case 0xb8: *string = 0xa9; break; |
240 |
|
|
case 0xa6: *string = 0xaa; break; |
241 |
|
|
case 0xae: *string = 0xab; break; |
242 |
|
|
case 0xaa: *string = 0xac; break; |
243 |
|
|
case 0xf0: *string = 0xad; break; |
244 |
|
|
case 0xa9: *string = 0xae; break; |
245 |
|
|
case 0xee: *string = 0xaf; break; |
246 |
|
|
case 0xf8: *string = 0xb0; break; |
247 |
|
|
case 0xf1: *string = 0xb1; break; |
248 |
|
|
case 0xfd: *string = 0xb2; break; |
249 |
|
|
case 0xfc: *string = 0xb3; break; |
250 |
|
|
case 0xef: *string = 0xb4; break; |
251 |
|
|
case 0xe6: *string = 0xb5; break; |
252 |
|
|
case 0xf4: *string = 0xb6; break; |
253 |
|
|
case 0xfa: *string = 0xb7; break; |
254 |
|
|
case 0xf7: *string = 0xb8; break; |
255 |
|
|
case 0xfb: *string = 0xb9; break; |
256 |
|
|
case 0xa7: *string = 0xba; break; |
257 |
|
|
case 0xaf: *string = 0xbb; break; |
258 |
|
|
case 0xac: *string = 0xbc; break; |
259 |
|
|
case 0xab: *string = 0xbd; break; |
260 |
|
|
case 0xf3: *string = 0xbe; break; |
261 |
|
|
case 0xa8: *string = 0xbf; break; |
262 |
|
|
case 0xb7: *string = 0xc0; break; |
263 |
|
|
case 0xb5: *string = 0xc1; break; |
264 |
|
|
case 0xb6: *string = 0xc2; break; |
265 |
|
|
case 0xc7: *string = 0xc3; break; |
266 |
|
|
case 0x8e: *string = 0xc4; break; |
267 |
|
|
case 0x8f: *string = 0xc5; break; |
268 |
|
|
case 0x92: *string = 0xc6; break; |
269 |
|
|
case 0x80: *string = 0xc7; break; |
270 |
|
|
case 0xd4: *string = 0xc8; break; |
271 |
|
|
case 0x90: *string = 0xc9; break; |
272 |
|
|
case 0xd2: *string = 0xca; break; |
273 |
|
|
case 0xd3: *string = 0xcb; break; |
274 |
|
|
case 0xde: *string = 0xcc; break; |
275 |
|
|
case 0xd6: *string = 0xcd; break; |
276 |
|
|
case 0xd7: *string = 0xce; break; |
277 |
|
|
case 0xd8: *string = 0xcf; break; |
278 |
|
|
case 0xd1: *string = 0xd0; break; |
279 |
|
|
case 0xa5: *string = 0xd1; break; |
280 |
|
|
case 0xe3: *string = 0xd2; break; |
281 |
|
|
case 0xe0: *string = 0xd3; break; |
282 |
|
|
case 0xe2: *string = 0xd4; break; |
283 |
|
|
case 0xe5: *string = 0xd5; break; |
284 |
|
|
case 0x99: *string = 0xd6; break; |
285 |
|
|
case 0x9e: *string = 0xd7; break; |
286 |
|
|
case 0x9d: *string = 0xd8; break; |
287 |
|
|
case 0xeb: *string = 0xd9; break; |
288 |
|
|
case 0xe9: *string = 0xda; break; |
289 |
|
|
case 0xea: *string = 0xdb; break; |
290 |
|
|
case 0x9a: *string = 0xdc; break; |
291 |
|
|
case 0xed: *string = 0xdd; break; |
292 |
|
|
case 0xe8: *string = 0xde; break; |
293 |
|
|
case 0xe1: *string = 0xdf; break; |
294 |
|
|
case 0x85: *string = 0xe0; break; |
295 |
|
|
case 0xa0: *string = 0xe1; break; |
296 |
|
|
case 0x83: *string = 0xe2; break; |
297 |
|
|
case 0xc6: *string = 0xe3; break; |
298 |
|
|
case 0x84: *string = 0xe4; break; |
299 |
|
|
case 0x86: *string = 0xe5; break; |
300 |
|
|
case 0x91: *string = 0xe6; break; |
301 |
|
|
case 0x87: *string = 0xe7; break; |
302 |
|
|
case 0x8a: *string = 0xe8; break; |
303 |
|
|
case 0x82: *string = 0xe9; break; |
304 |
|
|
case 0x88: *string = 0xea; break; |
305 |
|
|
case 0x89: *string = 0xeb; break; |
306 |
|
|
case 0x8d: *string = 0xec; break; |
307 |
|
|
case 0xa1: *string = 0xed; break; |
308 |
|
|
case 0x8c: *string = 0xee; break; |
309 |
|
|
case 0x8b: *string = 0xef; break; |
310 |
|
|
case 0xd0: *string = 0xf0; break; |
311 |
|
|
case 0xa4: *string = 0xf1; break; |
312 |
|
|
case 0x95: *string = 0xf2; break; |
313 |
|
|
case 0xa2: *string = 0xf3; break; |
314 |
|
|
case 0x93: *string = 0xf4; break; |
315 |
|
|
case 0xe4: *string = 0xf5; break; |
316 |
|
|
case 0x94: *string = 0xf6; break; |
317 |
|
|
case 0xf6: *string = 0xf7; break; |
318 |
|
|
case 0x9b: *string = 0xf8; break; |
319 |
|
|
case 0x97: *string = 0xf9; break; |
320 |
|
|
case 0xa3: *string = 0xfa; break; |
321 |
|
|
case 0x96: *string = 0xfb; break; |
322 |
|
|
case 0x81: *string = 0xfc; break; |
323 |
|
|
case 0xec: *string = 0xfd; break; |
324 |
|
|
case 0xe7: *string = 0xfe; break; |
325 |
|
|
case 0x98: *string = 0xff; break; |
326 |
|
|
default : break; |
327 |
|
|
} |
328 |
|
|
} |
329 |
|
|
} |
330 |
twoaday |
185 |
} |
331 |
werner |
36 |
|
332 |
|
|
|
333 |
twoaday |
128 |
/* XXX: the conv_charset() call fails when the user-id was created |
334 |
|
|
with iso-8859-1 but it is assumed that CP850 (gpg console) is used. */ |
335 |
|
|
|
336 |
|
|
char* |
337 |
werner |
36 |
utf8_to_wincp (const char * s, size_t len) |
338 |
|
|
{ |
339 |
twoaday |
128 |
char *decs; |
340 |
twoaday |
185 |
decs = utf8_to_native (s); |
341 |
werner |
36 |
conv_charset ((byte *)decs, strlen (decs), 1); |
342 |
|
|
return decs; |
343 |
|
|
} |
344 |
|
|
|
345 |
|
|
|
346 |
twoaday |
128 |
char* |
347 |
werner |
36 |
wincp_to_utf8 (const char * s, size_t len) |
348 |
|
|
{ |
349 |
|
|
char * encs; |
350 |
|
|
conv_charset ((byte *)s, len, 0); |
351 |
|
|
encs = native_to_utf8 (s); |
352 |
|
|
return encs; |
353 |
|
|
} |
354 |
|
|
|
355 |
|
|
|
356 |
|
|
int |
357 |
|
|
is_8bit_string (const char * str) |
358 |
|
|
{ |
359 |
|
|
size_t i; |
360 |
|
|
|
361 |
|
|
for (i = 0; i < strlen (str); i++) { |
362 |
|
|
if (str[i] & 0x80) |
363 |
|
|
return -1; |
364 |
|
|
} |
365 |
|
|
return 0; |
366 |
twoaday |
128 |
} |