1 |
/* wptUTF8.cpp - UTF8 conversation |
2 |
* Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc. |
3 |
* Copyright (C) 2002, 2004 Timo Schulz |
4 |
* |
5 |
* This file is part of WinPT. |
6 |
* |
7 |
* WinPT is free software; you can redistribute it and/or modify |
8 |
* it under the terms of the GNU General Public License as published by |
9 |
* the Free Software Foundation; either version 2 of the License, or |
10 |
* (at your option) any later version. |
11 |
* |
12 |
* WinPT is distributed in the hope that it will be useful, |
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
* GNU General Public License for more details. |
16 |
* |
17 |
* You should have received a copy of the GNU General Public License |
18 |
* along with WinPT; if not, write to the Free Software Foundation, |
19 |
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
20 |
*/ |
21 |
|
22 |
#ifdef HAVE_CONFIG_H |
23 |
#include <config.h> |
24 |
#endif |
25 |
|
26 |
#include <windows.h> |
27 |
#include <stdlib.h> |
28 |
#include <stdio.h> |
29 |
#include <string.h> |
30 |
#include <ctype.h> |
31 |
|
32 |
#include "wptTypes.h" |
33 |
#include "wptErrors.h" |
34 |
|
35 |
static u16 koi8_unicode[128] = { |
36 |
0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524, |
37 |
0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590, |
38 |
0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248, |
39 |
0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7, |
40 |
0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556, |
41 |
0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e, |
42 |
0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565, |
43 |
0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9, |
44 |
0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433, |
45 |
0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e, |
46 |
0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432, |
47 |
0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a, |
48 |
0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413, |
49 |
0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e, |
50 |
0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412, |
51 |
0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a |
52 |
}; |
53 |
|
54 |
static u16 latin2_unicode[128] = { |
55 |
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087, |
56 |
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F, |
57 |
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097, |
58 |
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F, |
59 |
0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7, |
60 |
0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B, |
61 |
0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7, |
62 |
0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C, |
63 |
0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7, |
64 |
0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E, |
65 |
0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7, |
66 |
0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF, |
67 |
0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7, |
68 |
0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F, |
69 |
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7, |
70 |
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9 |
71 |
}; |
72 |
|
73 |
|
74 |
static const char *active_charset_name = "iso-8859-1"; |
75 |
static u16 *active_charset = NULL; |
76 |
static int no_translation = 0; |
77 |
|
78 |
static int |
79 |
ascii_strcasecmp( const char *a, const char *b ) |
80 |
{ |
81 |
if( a == b ) |
82 |
return 0; |
83 |
|
84 |
for (; *a && *b; a++, b++) { |
85 |
if (*a != *b && toupper(*a) != toupper(*b)) |
86 |
break; |
87 |
} |
88 |
|
89 |
return *a == *b? 0 : (toupper (*a) - toupper (*b)); |
90 |
} /* ascii_strcasecmp */ |
91 |
|
92 |
int |
93 |
set_native_charset( const char *newset ) |
94 |
{ |
95 |
if( !ascii_strcasecmp( newset, "iso-8859-1" ) ) { |
96 |
active_charset_name = "iso-8859-1"; |
97 |
no_translation = 0; |
98 |
active_charset = NULL; |
99 |
} |
100 |
else if( !ascii_strcasecmp( newset, "iso-8859-2" ) ) { |
101 |
active_charset_name = "iso-8859-2"; |
102 |
no_translation = 0; |
103 |
active_charset = latin2_unicode; |
104 |
} |
105 |
else if( !ascii_strcasecmp( newset, "koi8-r" ) ) { |
106 |
active_charset_name = "koi8-r"; |
107 |
no_translation = 0; |
108 |
active_charset = koi8_unicode; |
109 |
} |
110 |
else if( !ascii_strcasecmp (newset, "utf8" ) |
111 |
|| !ascii_strcasecmp(newset, "utf-8") ) { |
112 |
active_charset_name = "utf-8"; |
113 |
no_translation = 1; |
114 |
active_charset = NULL; |
115 |
} |
116 |
else |
117 |
return WPTERR_GENERAL; |
118 |
|
119 |
return 0; |
120 |
} /* set_native_charset */ |
121 |
|
122 |
const char* |
123 |
get_native_charset( void ) |
124 |
{ |
125 |
return active_charset_name; |
126 |
} /* get_native_charset */ |
127 |
|
128 |
/**************** |
129 |
* Convert string, which is in native encoding to UTF8 and return the |
130 |
* new allocated UTF8 string. |
131 |
*/ |
132 |
char * |
133 |
native_to_utf8( const char *string ) |
134 |
{ |
135 |
const byte *s; |
136 |
char *buffer; |
137 |
byte *p; |
138 |
size_t length=0; |
139 |
|
140 |
if (no_translation) |
141 |
buffer = strdup( string ); |
142 |
else if( active_charset ) { |
143 |
for(s=(byte*)string; *s; s++ ) { |
144 |
length++; |
145 |
if( *s & 0x80 ) |
146 |
length += 2; /* we may need 3 bytes */ |
147 |
} |
148 |
buffer = (char *)malloc( length + 1 ); |
149 |
for(p=(byte *)buffer, s=(byte *)string; *s; s++ ) { |
150 |
if( *s & 0x80 ) { |
151 |
u16 val = active_charset[ *s & 0x7f ]; |
152 |
if( val < 0x0800 ) { |
153 |
*p++ = 0xc0 | ( (val >> 6) & 0x1f ); |
154 |
*p++ = 0x80 | ( val & 0x3f ); |
155 |
} |
156 |
else { |
157 |
*p++ = 0xe0 | ( (val >> 12) & 0x0f ); |
158 |
*p++ = 0x80 | ( (val >> 6) & 0x3f ); |
159 |
*p++ = 0x80 | ( val & 0x3f ); |
160 |
} |
161 |
} |
162 |
else |
163 |
*p++ = *s; |
164 |
} |
165 |
*p = 0; |
166 |
} |
167 |
else { |
168 |
for(s=(byte*)string; *s; s++ ) { |
169 |
length++; |
170 |
if( *s & 0x80 ) |
171 |
length++; |
172 |
} |
173 |
buffer = (char*)malloc( length + 1 ); |
174 |
for(p=(byte*)buffer, s=(byte*)string; *s; s++ ) { |
175 |
if( *s & 0x80 ) { |
176 |
*p++ = 0xc0 | ((*s >> 6) & 3); |
177 |
*p++ = 0x80 | ( *s & 0x3f ); |
178 |
} |
179 |
else |
180 |
*p++ = *s; |
181 |
} |
182 |
*p = 0; |
183 |
} |
184 |
|
185 |
return buffer; |
186 |
} /* native_to_utf8 */ |
187 |
|
188 |
/**************** |
189 |
* Convert string, which is in UTF8 to native encoding. illegal |
190 |
* encodings by some "\xnn" and quote all control characters. A |
191 |
* character with value DELIM will always be quoted, it must be a |
192 |
* vanilla ASCII character. |
193 |
*/ |
194 |
char * |
195 |
utf8_to_native( const char *string, size_t length, int delim ) |
196 |
{ |
197 |
int nleft; |
198 |
int i; |
199 |
byte encbuf[8]; |
200 |
int encidx; |
201 |
const byte *s; |
202 |
size_t n; |
203 |
byte *buffer = NULL, *p = NULL; |
204 |
unsigned long val = 0; |
205 |
size_t slen; |
206 |
int resync = 0; |
207 |
|
208 |
/* 1. pass (p==NULL): count the extended utf-8 characters */ |
209 |
/* 2. pass (p!=NULL): create string */ |
210 |
for( ;; ) { |
211 |
for( slen=length, nleft=encidx=0, n=0, s=(byte*)string; slen; s++, slen-- ) { |
212 |
if( resync ) { |
213 |
if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) { |
214 |
/* still invalid */ |
215 |
if( p ) { |
216 |
sprintf((char*)p, "\\x%02x", *s ); |
217 |
p += 4; |
218 |
} |
219 |
n += 4; |
220 |
continue; |
221 |
} |
222 |
resync = 0; |
223 |
} |
224 |
if( !nleft ) { |
225 |
if( !(*s & 0x80) ) { /* plain ascii */ |
226 |
if( *s < 0x20 || *s == 0x7f || *s == delim) { |
227 |
n++; |
228 |
if( p ) |
229 |
*p++ = '\\'; |
230 |
switch( *s ) { |
231 |
case '\n': n++; if( p ) *p++ = 'n'; break; |
232 |
case '\r': n++; if( p ) *p++ = 'r'; break; |
233 |
case '\f': n++; if( p ) *p++ = 'f'; break; |
234 |
case '\v': n++; if( p ) *p++ = 'v'; break; |
235 |
case '\b': n++; if( p ) *p++ = 'b'; break; |
236 |
case 0 : n++; if( p ) *p++ = '0'; break; |
237 |
default: |
238 |
n += 3; |
239 |
if ( p ) { |
240 |
sprintf( (char*)p, "x%02x", *s ); |
241 |
p += 3; |
242 |
} |
243 |
break; |
244 |
} |
245 |
} |
246 |
else { |
247 |
if( p ) *p++ = *s; |
248 |
n++; |
249 |
} |
250 |
} |
251 |
else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */ |
252 |
val = *s & 0x1f; |
253 |
nleft = 1; |
254 |
encidx = 0; |
255 |
encbuf[encidx++] = *s; |
256 |
} |
257 |
else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */ |
258 |
val = *s & 0x0f; |
259 |
nleft = 2; |
260 |
encidx = 0; |
261 |
encbuf[encidx++] = *s; |
262 |
} |
263 |
else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */ |
264 |
val = *s & 0x07; |
265 |
nleft = 3; |
266 |
encidx = 0; |
267 |
encbuf[encidx++] = *s; |
268 |
} |
269 |
else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */ |
270 |
val = *s & 0x03; |
271 |
nleft = 4; |
272 |
encidx = 0; |
273 |
encbuf[encidx++] = *s; |
274 |
} |
275 |
else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */ |
276 |
val = *s & 0x01; |
277 |
nleft = 5; |
278 |
encidx = 0; |
279 |
encbuf[encidx++] = *s; |
280 |
} |
281 |
else { /* invalid encoding: print as \xnn */ |
282 |
if( p ) { |
283 |
sprintf((char*)p, "\\x%02x", *s ); |
284 |
p += 4; |
285 |
} |
286 |
n += 4; |
287 |
resync = 1; |
288 |
} |
289 |
} |
290 |
else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */ |
291 |
if( p ) { |
292 |
for(i=0; i < encidx; i++ ) { |
293 |
sprintf((char*)p, "\\x%02x", encbuf[i] ); |
294 |
p += 4; |
295 |
} |
296 |
sprintf((char*)p, "\\x%02x", *s ); |
297 |
p += 4; |
298 |
} |
299 |
n += 4 + 4*encidx; |
300 |
nleft = 0; |
301 |
encidx = 0; |
302 |
resync = 1; |
303 |
} |
304 |
else { |
305 |
encbuf[encidx++] = *s; |
306 |
val <<= 6; |
307 |
val |= *s & 0x3f; |
308 |
if( !--nleft ) { /* ready */ |
309 |
if (no_translation) { |
310 |
if( p ) { |
311 |
for(i=0; i < encidx; i++ ) |
312 |
*p++ = encbuf[i]; |
313 |
} |
314 |
n += encidx; |
315 |
encidx = 0; |
316 |
} |
317 |
else if( active_charset ) { /* table lookup */ |
318 |
for(i=0; i < 128; i++ ) { |
319 |
if( active_charset[i] == val ) |
320 |
break; |
321 |
} |
322 |
if( i < 128 ) { /* we can print this one */ |
323 |
if( p ) *p++ = i+128; |
324 |
n++; |
325 |
} |
326 |
else { /* we do not have a translation: print utf8 */ |
327 |
if( p ) { |
328 |
for(i=0; i < encidx; i++ ) { |
329 |
sprintf((char*)p, "\\x%02x", encbuf[i] ); |
330 |
p += 4; |
331 |
} |
332 |
} |
333 |
n += encidx*4; |
334 |
encidx = 0; |
335 |
} |
336 |
} |
337 |
else { /* native set */ |
338 |
if( val >= 0x80 && val < 256 ) { |
339 |
n++; /* we can simply print this character */ |
340 |
if( p ) *p++ = val; |
341 |
} |
342 |
else { /* we do not have a translation: print utf8 */ |
343 |
if( p ) { |
344 |
for(i=0; i < encidx; i++ ) { |
345 |
sprintf((char*)p, "\\x%02x", encbuf[i] ); |
346 |
p += 4; |
347 |
} |
348 |
} |
349 |
n += encidx*4; |
350 |
encidx = 0; |
351 |
} |
352 |
} |
353 |
} |
354 |
|
355 |
} |
356 |
} |
357 |
if( !buffer ) { /* allocate the buffer after the first pass */ |
358 |
buffer = p = (byte *)malloc( n + 1 ); |
359 |
} |
360 |
else { |
361 |
*p = 0; /* make a string */ |
362 |
return (char*)buffer; |
363 |
} |
364 |
} |
365 |
} |
366 |
|
367 |
|
368 |
static void |
369 |
conv_charset (byte *string, size_t size, int what) |
370 |
{ |
371 |
int i; |
372 |
|
373 |
if( what == 0 ) { |
374 |
for( i = 0; i < size; i++, string++ ) { |
375 |
switch( *string ) { |
376 |
case 0xa0: *string = 0xff; break; /* nobreakspace */ |
377 |
case 0xa1: *string = 0xad; break; /* exclamdown */ |
378 |
case 0xa2: *string = 0xbd; break; /* cent */ |
379 |
case 0xa3: *string = 0x9c; break; /* sterling */ |
380 |
case 0xa4: *string = 0xcf; break; /* currency */ |
381 |
case 0xa5: *string = 0xbe; break; /* yen */ |
382 |
case 0xa6: *string = 0xdd; break; /* brokenbar */ |
383 |
case 0xa7: *string = 0xf5; break; /* section */ |
384 |
case 0xa8: *string = 0xf9; break; /* diaeresis */ |
385 |
case 0xa9: *string = 0xb8; break; /* copyright */ |
386 |
case 0xaa: *string = 0xa6; break; /* ordfeminine */ |
387 |
case 0xab: *string = 0xae; break; /* guillemotleft */ |
388 |
case 0xac: *string = 0xaa; break; /* notsign */ |
389 |
case 0xad: *string = 0xf0; break; /* hyphen */ |
390 |
case 0xae: *string = 0xa9; break; /* registered */ |
391 |
case 0xaf: *string = 0xee; break; /* macron */ |
392 |
case 0xb0: *string = 0xf8; break; /* degree */ |
393 |
case 0xb1: *string = 0xf1; break; /* plusminus */ |
394 |
case 0xb2: *string = 0xfd; break; /* twosuperior */ |
395 |
case 0xb3: *string = 0xfc; break; /* threesuperior */ |
396 |
case 0xb4: *string = 0xef; break; /* acute */ |
397 |
case 0xb5: *string = 0xe6; break; /* mu */ |
398 |
case 0xb6: *string = 0xf4; break; /* paragraph */ |
399 |
case 0xb7: *string = 0xfa; break; /* periodcentered */ |
400 |
case 0xb8: *string = 0xf7; break; /* cedilla */ |
401 |
case 0xb9: *string = 0xfb; break; /* onesuperior */ |
402 |
case 0xba: *string = 0xa7; break; /* masculine */ |
403 |
case 0xbb: *string = 0xaf; break; /* guillemotright */ |
404 |
case 0xbc: *string = 0xac; break; /* onequarter */ |
405 |
case 0xbd: *string = 0xab; break; /* onehalf */ |
406 |
case 0xbe: *string = 0xf3; break; /* threequarters */ |
407 |
case 0xbf: *string = 0xa8; break; /* questiondown */ |
408 |
case 0xc0: *string = 0xb7; break; /* Agrave */ |
409 |
case 0xc1: *string = 0xb5; break; /* Aacute */ |
410 |
case 0xc2: *string = 0xb6; break; /* Acircumflex */ |
411 |
case 0xc3: *string = 0xc7; break; /* Atilde */ |
412 |
case 0xc4: *string = 0x8e; break; /* Adiaeresis */ |
413 |
case 0xc5: *string = 0x8f; break; /* Aring */ |
414 |
case 0xc6: *string = 0x92; break; /* AE */ |
415 |
case 0xc7: *string = 0x80; break; /* Ccedilla */ |
416 |
case 0xc8: *string = 0xd4; break; /* Egrave */ |
417 |
case 0xc9: *string = 0x90; break; /* Eacute */ |
418 |
case 0xca: *string = 0xd2; break; /* Ecircumflex */ |
419 |
case 0xcb: *string = 0xd3; break; /* Ediaeresis */ |
420 |
case 0xcc: *string = 0xde; break; /* Igrave */ |
421 |
case 0xcd: *string = 0xd6; break; /* Iacute */ |
422 |
case 0xce: *string = 0xd7; break; /* Icircumflex */ |
423 |
case 0xcf: *string = 0xd8; break; /* Idiaeresis */ |
424 |
case 0xd0: *string = 0xd1; break; /* Eth */ |
425 |
case 0xd1: *string = 0xa5; break; /* Ntilde */ |
426 |
case 0xd2: *string = 0xe3; break; /* Ograve */ |
427 |
case 0xd3: *string = 0xe0; break; /* Oacute */ |
428 |
case 0xd4: *string = 0xe2; break; /* Ocircumflex */ |
429 |
case 0xd5: *string = 0xe5; break; /* Otilde */ |
430 |
case 0xd6: *string = 0x99; break; /* Odiaeresis */ |
431 |
case 0xd7: *string = 0x9e; break; /* multiply */ |
432 |
case 0xd8: *string = 0x9d; break; /* Ooblique */ |
433 |
case 0xd9: *string = 0xeb; break; /* Ugrave */ |
434 |
case 0xda: *string = 0xe9; break; /* Uacute */ |
435 |
case 0xdb: *string = 0xea; break; /* Ucircumflex */ |
436 |
case 0xdc: *string = 0x9a; break; /* Udiaeresis */ |
437 |
case 0xdd: *string = 0xed; break; /* Yacute */ |
438 |
case 0xde: *string = 0xe8; break; /* Thorn */ |
439 |
case 0xdf: *string = 0xe1; break; /* ssharp */ |
440 |
case 0xe0: *string = 0x85; break; /* agrave */ |
441 |
case 0xe1: *string = 0xa0; break; /* aacute */ |
442 |
case 0xe2: *string = 0x83; break; /* acircumflex */ |
443 |
case 0xe3: *string = 0xc6; break; /* atilde */ |
444 |
case 0xe4: *string = 0x84; break; /* adiaeresis */ |
445 |
case 0xe5: *string = 0x86; break; /* aring */ |
446 |
case 0xe6: *string = 0x91; break; /* ae */ |
447 |
case 0xe7: *string = 0x87; break; /* ccedilla */ |
448 |
case 0xe8: *string = 0x8a; break; /* egrave */ |
449 |
case 0xe9: *string = 0x82; break; /* eacute */ |
450 |
case 0xea: *string = 0x88; break; /* ecircumflex */ |
451 |
case 0xeb: *string = 0x89; break; /* ediaeresis */ |
452 |
case 0xec: *string = 0x8d; break; /* igrave */ |
453 |
case 0xed: *string = 0xa1; break; /* iacute */ |
454 |
case 0xee: *string = 0x8c; break; /* icircumflex */ |
455 |
case 0xef: *string = 0x8b; break; /* idiaeresis */ |
456 |
case 0xf0: *string = 0xd0; break; /* eth */ |
457 |
case 0xf1: *string = 0xa4; break; /* ntilde */ |
458 |
case 0xf2: *string = 0x95; break; /* ograve */ |
459 |
case 0xf3: *string = 0xa2; break; /* oacute */ |
460 |
case 0xf4: *string = 0x93; break; /* ocircumflex */ |
461 |
case 0xf5: *string = 0xe4; break; /* otilde */ |
462 |
case 0xf6: *string = 0x94; break; /* odiaeresis */ |
463 |
case 0xf7: *string = 0xf6; break; /* division */ |
464 |
case 0xf8: *string = 0x9b; break; /* oslash */ |
465 |
case 0xf9: *string = 0x97; break; /* ugrave */ |
466 |
case 0xfa: *string = 0xa3; break; /* uacute */ |
467 |
case 0xfb: *string = 0x96; break; /* ucircumflex */ |
468 |
case 0xfc: *string = 0x81; break; /* udiaeresis */ |
469 |
case 0xfd: *string = 0xec; break; /* yacute */ |
470 |
case 0xfe: *string = 0xe7; break; /* thorn */ |
471 |
case 0xff: *string = 0x98; break; /* ydiaeresis */ |
472 |
default : break; |
473 |
} |
474 |
} |
475 |
} |
476 |
else { |
477 |
for( i = 0; i < size; i++, string++ ) { |
478 |
switch( *string ) { |
479 |
case 0xff: *string = 0xa0; break; |
480 |
case 0xad: *string = 0xa1; break; |
481 |
case 0xbd: *string = 0xa2; break; |
482 |
case 0x9c: *string = 0xa3; break; |
483 |
case 0xcf: *string = 0xa4; break; |
484 |
case 0xbe: *string = 0xa5; break; |
485 |
case 0xdd: *string = 0xa6; break; |
486 |
case 0xf5: *string = 0xa7; break; |
487 |
case 0xf9: *string = 0xa8; break; |
488 |
case 0xb8: *string = 0xa9; break; |
489 |
case 0xa6: *string = 0xaa; break; |
490 |
case 0xae: *string = 0xab; break; |
491 |
case 0xaa: *string = 0xac; break; |
492 |
case 0xf0: *string = 0xad; break; |
493 |
case 0xa9: *string = 0xae; break; |
494 |
case 0xee: *string = 0xaf; break; |
495 |
case 0xf8: *string = 0xb0; break; |
496 |
case 0xf1: *string = 0xb1; break; |
497 |
case 0xfd: *string = 0xb2; break; |
498 |
case 0xfc: *string = 0xb3; break; |
499 |
case 0xef: *string = 0xb4; break; |
500 |
case 0xe6: *string = 0xb5; break; |
501 |
case 0xf4: *string = 0xb6; break; |
502 |
case 0xfa: *string = 0xb7; break; |
503 |
case 0xf7: *string = 0xb8; break; |
504 |
case 0xfb: *string = 0xb9; break; |
505 |
case 0xa7: *string = 0xba; break; |
506 |
case 0xaf: *string = 0xbb; break; |
507 |
case 0xac: *string = 0xbc; break; |
508 |
case 0xab: *string = 0xbd; break; |
509 |
case 0xf3: *string = 0xbe; break; |
510 |
case 0xa8: *string = 0xbf; break; |
511 |
case 0xb7: *string = 0xc0; break; |
512 |
case 0xb5: *string = 0xc1; break; |
513 |
case 0xb6: *string = 0xc2; break; |
514 |
case 0xc7: *string = 0xc3; break; |
515 |
case 0x8e: *string = 0xc4; break; |
516 |
case 0x8f: *string = 0xc5; break; |
517 |
case 0x92: *string = 0xc6; break; |
518 |
case 0x80: *string = 0xc7; break; |
519 |
case 0xd4: *string = 0xc8; break; |
520 |
case 0x90: *string = 0xc9; break; |
521 |
case 0xd2: *string = 0xca; break; |
522 |
case 0xd3: *string = 0xcb; break; |
523 |
case 0xde: *string = 0xcc; break; |
524 |
case 0xd6: *string = 0xcd; break; |
525 |
case 0xd7: *string = 0xce; break; |
526 |
case 0xd8: *string = 0xcf; break; |
527 |
case 0xd1: *string = 0xd0; break; |
528 |
case 0xa5: *string = 0xd1; break; |
529 |
case 0xe3: *string = 0xd2; break; |
530 |
case 0xe0: *string = 0xd3; break; |
531 |
case 0xe2: *string = 0xd4; break; |
532 |
case 0xe5: *string = 0xd5; break; |
533 |
case 0x99: *string = 0xd6; break; |
534 |
case 0x9e: *string = 0xd7; break; |
535 |
case 0x9d: *string = 0xd8; break; |
536 |
case 0xeb: *string = 0xd9; break; |
537 |
case 0xe9: *string = 0xda; break; |
538 |
case 0xea: *string = 0xdb; break; |
539 |
case 0x9a: *string = 0xdc; break; |
540 |
case 0xed: *string = 0xdd; break; |
541 |
case 0xe8: *string = 0xde; break; |
542 |
case 0xe1: *string = 0xdf; break; |
543 |
case 0x85: *string = 0xe0; break; |
544 |
case 0xa0: *string = 0xe1; break; |
545 |
case 0x83: *string = 0xe2; break; |
546 |
case 0xc6: *string = 0xe3; break; |
547 |
case 0x84: *string = 0xe4; break; |
548 |
case 0x86: *string = 0xe5; break; |
549 |
case 0x91: *string = 0xe6; break; |
550 |
case 0x87: *string = 0xe7; break; |
551 |
case 0x8a: *string = 0xe8; break; |
552 |
case 0x82: *string = 0xe9; break; |
553 |
case 0x88: *string = 0xea; break; |
554 |
case 0x89: *string = 0xeb; break; |
555 |
case 0x8d: *string = 0xec; break; |
556 |
case 0xa1: *string = 0xed; break; |
557 |
case 0x8c: *string = 0xee; break; |
558 |
case 0x8b: *string = 0xef; break; |
559 |
case 0xd0: *string = 0xf0; break; |
560 |
case 0xa4: *string = 0xf1; break; |
561 |
case 0x95: *string = 0xf2; break; |
562 |
case 0xa2: *string = 0xf3; break; |
563 |
case 0x93: *string = 0xf4; break; |
564 |
case 0xe4: *string = 0xf5; break; |
565 |
case 0x94: *string = 0xf6; break; |
566 |
case 0xf6: *string = 0xf7; break; |
567 |
case 0x9b: *string = 0xf8; break; |
568 |
case 0x97: *string = 0xf9; break; |
569 |
case 0xa3: *string = 0xfa; break; |
570 |
case 0x96: *string = 0xfb; break; |
571 |
case 0x81: *string = 0xfc; break; |
572 |
case 0xec: *string = 0xfd; break; |
573 |
case 0xe7: *string = 0xfe; break; |
574 |
case 0x98: *string = 0xff; break; |
575 |
default : break; |
576 |
} |
577 |
} |
578 |
} |
579 |
} /* conv_charset */ |
580 |
|
581 |
|
582 |
char * |
583 |
utf8_to_wincp (const char * s, size_t len) |
584 |
{ |
585 |
char * decs; |
586 |
decs = utf8_to_native (s, len, 0); |
587 |
conv_charset ((byte *)decs, strlen (decs), 1); |
588 |
return decs; |
589 |
} |
590 |
|
591 |
|
592 |
char * |
593 |
wincp_to_utf8 (const char * s, size_t len) |
594 |
{ |
595 |
char * encs; |
596 |
conv_charset ((byte *)s, len, 0); |
597 |
encs = native_to_utf8 (s); |
598 |
return encs; |
599 |
} |
600 |
|
601 |
|
602 |
int |
603 |
is_8bit_string (const char * str) |
604 |
{ |
605 |
size_t i; |
606 |
|
607 |
for (i = 0; i < strlen (str); i++) { |
608 |
if (str[i] & 0x80) |
609 |
return -1; |
610 |
} |
611 |
return 0; |
612 |
} /* is_8bit_string */ |