1 |
/* wptUTF8.cpp - UTF8 conversation |
2 |
* Copyright (C) 2002, 2004, 2005, 2006, 2009, 2012 Timo Schulz |
3 |
* |
4 |
* This file is part of WinPT. |
5 |
* |
6 |
* WinPT is free software; you can redistribute it and/or modify |
7 |
* it under the terms of the GNU General Public License as published by |
8 |
* the Free Software Foundation; either version 2 of the License, or |
9 |
* (at your option) any later version. |
10 |
* |
11 |
* WinPT is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
15 |
*/ |
16 |
#ifdef HAVE_CONFIG_H |
17 |
#include <config.h> |
18 |
#endif |
19 |
|
20 |
#include <windows.h> |
21 |
#include <stdlib.h> |
22 |
#include <stdio.h> |
23 |
#include <string.h> |
24 |
#include <ctype.h> |
25 |
|
26 |
#include "wptTypes.h" |
27 |
#include "wptErrors.h" |
28 |
|
29 |
|
30 |
/* Byte order mark that is usually used to indicate that the following |
31 |
data is encoded in UTF-8. */ |
32 |
BYTE UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; |
33 |
|
34 |
|
35 |
/** |
36 |
* Convert the given intput string, which is encoded with the locale |
37 |
* setting, into UTF-8 representation. |
38 |
*/ |
39 |
char* |
40 |
native_to_utf8 (const char *string) |
41 |
{ |
42 |
int n = MultiByteToWideChar (GetACP (), 0, string, -1, NULL, 0); |
43 |
if (n < 0) |
44 |
return NULL; |
45 |
|
46 |
wchar_t *result = new wchar_t[n+1]; |
47 |
if (!result) |
48 |
BUG (0); |
49 |
|
50 |
n = MultiByteToWideChar (GetACP (), 0, string, -1, result, n); |
51 |
if (n < 0) { |
52 |
free_if_alloc (result); |
53 |
return NULL; |
54 |
} |
55 |
|
56 |
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, NULL, 0, NULL, NULL); |
57 |
if (n < 0) |
58 |
return NULL; |
59 |
|
60 |
char *native = new char[n + 1]; |
61 |
if (!native) |
62 |
BUG (0); |
63 |
memset(native, 0, n + 1); |
64 |
|
65 |
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, native, n, NULL, NULL); |
66 |
if (n < 0) { |
67 |
free_if_alloc (result); |
68 |
return NULL; |
69 |
} |
70 |
|
71 |
free_if_alloc (result); |
72 |
return native; |
73 |
} |
74 |
|
75 |
|
76 |
/** |
77 |
* Convert an UTF-8 string into an UTF-16 string. |
78 |
*/ |
79 |
wchar_t* |
80 |
utf8_to_utf16(const char *string, size_t *retlen) |
81 |
{ |
82 |
int n = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); |
83 |
if (n < 0) |
84 |
return NULL; |
85 |
|
86 |
wchar_t *result = new wchar_t[n + 1]; |
87 |
if (!result) |
88 |
BUG(0); |
89 |
|
90 |
n = MultiByteToWideChar(CP_UTF8, 0, string, -1, result, n); |
91 |
if (n < 0) { |
92 |
free_if_alloc(result); |
93 |
return NULL; |
94 |
} |
95 |
|
96 |
*retlen = n; |
97 |
return result; |
98 |
} |
99 |
|
100 |
|
101 |
/** |
102 |
* Convert the given string, which is encoded in UTF-8, |
103 |
* into the locale setting. |
104 |
*/ |
105 |
char* |
106 |
utf8_to_native (const char *string) |
107 |
{ |
108 |
int n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
109 |
if (n < 0) |
110 |
return NULL; |
111 |
|
112 |
wchar_t *result = new wchar_t[n+1]; |
113 |
if (!result) |
114 |
BUG (0); |
115 |
|
116 |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n); |
117 |
if (n < 0) { |
118 |
free_if_alloc (result); |
119 |
return NULL; |
120 |
} |
121 |
|
122 |
n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL); |
123 |
if (n < 0) |
124 |
return NULL; |
125 |
|
126 |
char *native = new char[n + 1]; |
127 |
if (!native) |
128 |
BUG (0); |
129 |
memset(native, 0, n + 1); |
130 |
|
131 |
n = WideCharToMultiByte (GetACP (), 0, result, -1, native, n, NULL, NULL); |
132 |
if (n < 0) { |
133 |
free_if_alloc (result); |
134 |
return NULL; |
135 |
} |
136 |
|
137 |
free_if_alloc (result); |
138 |
return native; |
139 |
} |
140 |
|
141 |
|
142 |
/** |
143 |
* Return -1 if the given string contains any 8-bit characters. |
144 |
* This is a helper to decide when to use UTF8 encoding. |
145 |
*/ |
146 |
int |
147 |
is_8bit_string (const char *str) |
148 |
{ |
149 |
for (size_t i = 0; i < strlen (str); i++) { |
150 |
if (str[i] & 0x80) |
151 |
return -1; |
152 |
} |
153 |
return 0; |
154 |
} |