1 |
/* wptUTF8.cpp - UTF8 conversation |
/* wptUTF8.cpp - UTF8 conversation |
2 |
* Copyright (C) 2002, 2004, 2005, 2006 Timo Schulz |
* Copyright (C) 2002, 2004, 2005, 2006, 2009, 2012 Timo Schulz |
3 |
* |
* |
4 |
* This file is part of WinPT. |
* This file is part of WinPT. |
5 |
* |
* |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
* GNU General Public License for more details. |
* GNU General Public License for more details. |
|
* |
|
|
* You should have received a copy of the GNU General Public License |
|
|
* along with WinPT; if not, write to the Free Software Foundation, |
|
|
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
|
15 |
*/ |
*/ |
|
|
|
16 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
17 |
#include <config.h> |
#include <config.h> |
18 |
#endif |
#endif |
27 |
#include "wptErrors.h" |
#include "wptErrors.h" |
28 |
|
|
29 |
|
|
30 |
|
/* Byte order mark that is usually used to indicate that the following |
31 |
|
data is encoded in UTF-8. */ |
32 |
|
BYTE UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; |
33 |
|
|
34 |
|
|
35 |
|
/** |
36 |
|
* Convert the given intput string, which is encoded with the locale |
37 |
|
* setting, into UTF-8 representation. |
38 |
|
*/ |
39 |
char* |
char* |
40 |
native_to_utf8 (const char *string) |
native_to_utf8 (const char *string) |
41 |
{ |
{ |
42 |
wchar_t *result; |
int n = MultiByteToWideChar (GetACP (), 0, string, -1, NULL, 0); |
|
char *native; |
|
|
int n; |
|
|
|
|
|
n = MultiByteToWideChar (GetACP (), 0, string, -1, NULL, 0); |
|
43 |
if (n < 0) |
if (n < 0) |
44 |
return NULL; |
return NULL; |
45 |
|
|
46 |
result = (wchar_t*)malloc ((n+1) * sizeof *result); |
wchar_t *result = new wchar_t[n+1]; |
47 |
if (!result) |
if (!result) |
48 |
BUG (0); |
BUG (0); |
49 |
|
|
50 |
n = MultiByteToWideChar (GetACP (), 0, string, -1, result, n); |
n = MultiByteToWideChar (GetACP (), 0, string, -1, result, n); |
51 |
if (n < 0) { |
if (n < 0) { |
52 |
free (result); |
free_if_alloc (result); |
53 |
return NULL; |
return NULL; |
54 |
} |
} |
55 |
|
|
57 |
if (n < 0) |
if (n < 0) |
58 |
return NULL; |
return NULL; |
59 |
|
|
60 |
native = (char*)malloc (n+1); |
char *native = new char[n + 1]; |
61 |
if (!native) |
if (!native) |
62 |
BUG (0); |
BUG (0); |
63 |
|
memset(native, 0, n + 1); |
64 |
|
|
65 |
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, native, n, NULL, NULL); |
n = WideCharToMultiByte (CP_UTF8, 0, result, -1, native, n, NULL, NULL); |
66 |
if (n < 0) { |
if (n < 0) { |
67 |
free (result); |
free_if_alloc (result); |
68 |
return NULL; |
return NULL; |
69 |
} |
} |
70 |
|
|
71 |
free (result); |
free_if_alloc (result); |
72 |
return native; |
return native; |
73 |
} |
} |
74 |
|
|
75 |
|
|
76 |
/* Convert utf8 string @str to native CP. */ |
/** |
77 |
|
* Convert an UTF-8 string into an UTF-16 string. |
78 |
|
*/ |
79 |
|
wchar_t* |
80 |
|
utf8_to_utf16(const char *string, size_t *retlen) |
81 |
|
{ |
82 |
|
int n = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); |
83 |
|
if (n < 0) |
84 |
|
return NULL; |
85 |
|
|
86 |
|
wchar_t *result = new wchar_t[n + 1]; |
87 |
|
if (!result) |
88 |
|
BUG(0); |
89 |
|
|
90 |
|
n = MultiByteToWideChar(CP_UTF8, 0, string, -1, result, n); |
91 |
|
if (n < 0) { |
92 |
|
free_if_alloc(result); |
93 |
|
return NULL; |
94 |
|
} |
95 |
|
|
96 |
|
*retlen = n; |
97 |
|
return result; |
98 |
|
} |
99 |
|
|
100 |
|
|
101 |
|
/** |
102 |
|
* Convert the given string, which is encoded in UTF-8, |
103 |
|
* into the locale setting. |
104 |
|
*/ |
105 |
char* |
char* |
106 |
utf8_to_native (const char *string) |
utf8_to_native (const char *string) |
107 |
{ |
{ |
108 |
wchar_t *result; |
int n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
|
char *native; |
|
|
int n; |
|
|
|
|
|
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0); |
|
109 |
if (n < 0) |
if (n < 0) |
110 |
return NULL; |
return NULL; |
111 |
|
|
112 |
result = (wchar_t*)malloc ((n+1) * sizeof *result); |
wchar_t *result = new wchar_t[n+1]; |
113 |
if (!result) |
if (!result) |
114 |
BUG (0); |
BUG (0); |
115 |
|
|
116 |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n); |
n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n); |
117 |
if (n < 0) { |
if (n < 0) { |
118 |
free (result); |
free_if_alloc (result); |
119 |
return NULL; |
return NULL; |
120 |
} |
} |
121 |
|
|
123 |
if (n < 0) |
if (n < 0) |
124 |
return NULL; |
return NULL; |
125 |
|
|
126 |
native = (char*)malloc (n+1); |
char *native = new char[n + 1]; |
127 |
if (!native) |
if (!native) |
128 |
BUG (0); |
BUG (0); |
129 |
|
memset(native, 0, n + 1); |
130 |
|
|
131 |
n = WideCharToMultiByte (GetACP (), 0, result, -1, native, n, NULL, NULL); |
n = WideCharToMultiByte (GetACP (), 0, result, -1, native, n, NULL, NULL); |
132 |
if (n < 0) { |
if (n < 0) { |
133 |
free (result); |
free_if_alloc (result); |
134 |
return NULL; |
return NULL; |
135 |
} |
} |
136 |
|
|
137 |
free (result); |
free_if_alloc (result); |
138 |
return native; |
return native; |
139 |
} |
} |
140 |
|
|
141 |
|
|
142 |
|
/** |
143 |
|
* Return -1 if the given string contains any 8-bit characters. |
144 |
|
* This is a helper to decide when to use UTF8 encoding. |
145 |
|
*/ |
146 |
int |
int |
147 |
is_8bit_string (const char * str) |
is_8bit_string (const char *str) |
148 |
{ |
{ |
149 |
size_t i; |
for (size_t i = 0; i < strlen (str); i++) { |
|
|
|
|
for (i = 0; i < strlen (str); i++) { |
|
150 |
if (str[i] & 0x80) |
if (str[i] & 0x80) |
151 |
return -1; |
return -1; |
152 |
} |
} |