/[winpt]/trunk/Src/wptUTF8.cpp
ViewVC logotype

Diff of /trunk/Src/wptUTF8.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 185 by twoaday, Mon Mar 20 12:48:52 2006 UTC revision 328 by twoaday, Fri Sep 25 16:07:38 2009 UTC
# Line 1  Line 1 
1  /* wptUTF8.cpp - UTF8 conversation  /* wptUTF8.cpp - UTF8 conversation
2   *      Copyright (C) 1994, 1998-2001 Free Software Foundation, Inc.   *      Copyright (C) 2002, 2004, 2005, 2006, 2009 Timo Schulz
  *      Copyright (C) 2002, 2004, 2005, 2006 Timo Schulz  
3   *   *
4   * This file is part of WinPT.   * This file is part of WinPT.
5   *   *
# Line 13  Line 12 
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   * GNU General Public License for more details.   * GNU General Public License for more details.
  *  
  * You should have received a copy of the GNU General Public License  
  * along with WinPT; if not, write to the Free Software Foundation,  
  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA  
15   */   */
   
16  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
17  #include <config.h>  #include <config.h>
18  #endif  #endif
# Line 33  Line 27 
27  #include "wptErrors.h"  #include "wptErrors.h"
28    
29    
30  /* convert latin1 string @string into utf8. */  /**
31  char *   * Converts the given intput string, which is encoded with the locale
32  native_to_utf8( const char *string )   * setting, into UTF-8 representation.
33     */
34    char*
35    native_to_utf8 (const char *string)
36  {  {
37      const byte *s;        wchar_t *result;
38      char *buffer;      char *native;
39      byte *p;        int n;
40      size_t length=0;  
41        n = MultiByteToWideChar (GetACP (), 0, string, -1, NULL, 0);
42      for (s=(byte*)string; *s; s++) {      if (n < 0)
43        length++;          return NULL;
44        if (*s & 0x80)  
45            length++;      result = new wchar_t[n+1];
46        if (!result)
47            BUG (0);
48    
49        n = MultiByteToWideChar (GetACP (), 0, string, -1, result, n);
50        if (n < 0) {
51            free_if_alloc (result);
52            return NULL;
53      }      }
     buffer = (char*)malloc (length + 1);  
     for (p = (byte*)buffer, s=(byte*)string; *s; s++) {  
       if (*s & 0x80) {  
           *p++ = 0xc0 | ((*s >> 6) & 3);  
           *p++ = 0x80 | ( *s & 0x3f );  
       }  
       else  
           *p++ = *s;  
     }  
     *p = 0;  
     return buffer;  
 }  
54    
55        n = WideCharToMultiByte (CP_UTF8, 0, result, -1, NULL, 0, NULL, NULL);
56        if (n < 0)
57            return NULL;
58    
59        native = new char[n+1];
60        if (!native)
61            BUG (0);
62    
63        n = WideCharToMultiByte (CP_UTF8, 0, result, -1, native, n, NULL, NULL);
64        if (n < 0) {
65            free_if_alloc (result);
66            return NULL;
67        }
68    
69        free_if_alloc (result);
70        //native[n] = '\0';        
71        return native;
72    }
73    
74    
75  /* Convert utf8 string @str to native CP. */  /**
76  static char*   * Converts the given string, which is encoded in UTF-8,
77     * into the locale setting.
78     */
79    char*
80  utf8_to_native (const char *string)  utf8_to_native (const char *string)
81  {  {
82      wchar_t *result;      wchar_t *result;
83      char *native;          char *native;    
84      int n;      int n;
85    
     /* Convert utf8 to unicode. */  
86      n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0);      n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0);
87      if (n < 0)      if (n < 0)
88          return NULL;          return NULL;
89    
90      result = (wchar_t*)malloc ((n+1) * sizeof *result);      result = new wchar_t[n+1];
91      if (!result)      if (!result)
92          BUG (0);          BUG (0);
93    
94      n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n);      n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n);
95      if (n < 0) {      if (n < 0) {
96          free (result);          free_if_alloc (result);
97          return NULL;          return NULL;
98      }      }
99    
     /* Convert wide char into native char. */  
     /*  
100      n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL);      n = WideCharToMultiByte (GetACP (), 0, result, -1, NULL, 0, NULL, NULL);
101      if (n < 0)      if (n < 0)
102          return NULL;          return NULL;
     */  
     n = wcstombs (NULL, result, wcslen (result));  
     if (n < 0)  
         return NULL;  
103    
104      native = (char*)malloc (n+1);      native = new char[n+1];
105      if (!native)      if (!native)
106          BUG (0);          BUG (0);
107    
108      /*      n = WideCharToMultiByte (GetACP (), 0, result, -1, native, n, NULL, NULL);
     n = WideCharToMultiByte (CP_ACP, 0, string, -1, result, n, NULL, NULL);  
     if (n < 0) {  
         free (result);  
         return NULL;  
     }  
     */    
     n = wcstombs (native, result, -1);  
109      if (n < 0) {      if (n < 0) {
110          free (result);          free_if_alloc (result);
111          return NULL;          return NULL;
112      }      }
   
     return native;  
 }  
   
 /* CP850 -> CP1251 */  
 static void  
 conv_charset (byte *string, size_t size, int what)  
 {  
     size_t i;  
113            
114      if( what == 0 ) {      free_if_alloc (result);
115          for( i = 0; i < size; i++, string++ ) {      //native[n] = '\0';
116              switch( *string ) {      return native;
             case 0xa0: *string = 0xff; break;  /* nobreakspace */  
             case 0xa1: *string = 0xad; break;  /* exclamdown */  
             case 0xa2: *string = 0xbd; break;  /* cent */  
             case 0xa3: *string = 0x9c; break;  /* sterling */  
             case 0xa4: *string = 0xcf; break;  /* currency */  
             case 0xa5: *string = 0xbe; break;  /* yen */  
             case 0xa6: *string = 0xdd; break;  /* brokenbar */  
             case 0xa7: *string = 0xf5; break;  /* section */  
             case 0xa8: *string = 0xf9; break;  /* diaeresis */  
             case 0xa9: *string = 0xb8; break;  /* copyright */  
             case 0xaa: *string = 0xa6; break;  /* ordfeminine */  
             case 0xab: *string = 0xae; break;  /* guillemotleft */  
             case 0xac: *string = 0xaa; break;  /* notsign */  
             case 0xad: *string = 0xf0; break;  /* hyphen */  
             case 0xae: *string = 0xa9; break;  /* registered */  
             case 0xaf: *string = 0xee; break;  /* macron */  
             case 0xb0: *string = 0xf8; break;  /* degree */  
             case 0xb1: *string = 0xf1; break;  /* plusminus */  
             case 0xb2: *string = 0xfd; break;  /* twosuperior */  
             case 0xb3: *string = 0xfc; break;  /* threesuperior */  
             case 0xb4: *string = 0xef; break;  /* acute */  
             case 0xb5: *string = 0xe6; break;  /* mu */  
             case 0xb6: *string = 0xf4; break;  /* paragraph */  
             case 0xb7: *string = 0xfa; break;  /* periodcentered */  
             case 0xb8: *string = 0xf7; break;  /* cedilla */  
             case 0xb9: *string = 0xfb; break;  /* onesuperior */  
             case 0xba: *string = 0xa7; break;  /* masculine */  
             case 0xbb: *string = 0xaf; break;  /* guillemotright */  
             case 0xbc: *string = 0xac; break;  /* onequarter */  
             case 0xbd: *string = 0xab; break;  /* onehalf */  
             case 0xbe: *string = 0xf3; break;  /* threequarters */  
             case 0xbf: *string = 0xa8; break;  /* questiondown */  
             case 0xc0: *string = 0xb7; break;  /* Agrave */  
             case 0xc1: *string = 0xb5; break;  /* Aacute */  
             case 0xc2: *string = 0xb6; break;  /* Acircumflex */  
             case 0xc3: *string = 0xc7; break;  /* Atilde */  
             case 0xc4: *string = 0x8e; break;  /* Adiaeresis */  
             case 0xc5: *string = 0x8f; break;  /* Aring */  
             case 0xc6: *string = 0x92; break;  /* AE */  
             case 0xc7: *string = 0x80; break;  /* Ccedilla */  
             case 0xc8: *string = 0xd4; break;  /* Egrave */  
             case 0xc9: *string = 0x90; break;  /* Eacute */  
             case 0xca: *string = 0xd2; break;  /* Ecircumflex */  
             case 0xcb: *string = 0xd3; break;  /* Ediaeresis */  
             case 0xcc: *string = 0xde; break;  /* Igrave */  
             case 0xcd: *string = 0xd6; break;  /* Iacute */  
             case 0xce: *string = 0xd7; break;  /* Icircumflex */  
             case 0xcf: *string = 0xd8; break;  /* Idiaeresis */  
             case 0xd0: *string = 0xd1; break;  /* Eth */  
             case 0xd1: *string = 0xa5; break;  /* Ntilde */  
             case 0xd2: *string = 0xe3; break;  /* Ograve */  
             case 0xd3: *string = 0xe0; break;  /* Oacute */  
             case 0xd4: *string = 0xe2; break;  /* Ocircumflex */  
             case 0xd5: *string = 0xe5; break;  /* Otilde */  
             case 0xd6: *string = 0x99; break;  /* Odiaeresis */  
             case 0xd7: *string = 0x9e; break;  /* multiply */  
             case 0xd8: *string = 0x9d; break;  /* Ooblique */  
             case 0xd9: *string = 0xeb; break;  /* Ugrave */  
             case 0xda: *string = 0xe9; break;  /* Uacute */  
             case 0xdb: *string = 0xea; break;  /* Ucircumflex */  
             case 0xdc: *string = 0x9a; break;  /* Udiaeresis */  
             case 0xdd: *string = 0xed; break;  /* Yacute */  
             case 0xde: *string = 0xe8; break;  /* Thorn */  
             case 0xdf: *string = 0xe1; break;  /* ssharp */  
             case 0xe0: *string = 0x85; break;  /* agrave */  
             case 0xe1: *string = 0xa0; break;  /* aacute */  
             case 0xe2: *string = 0x83; break;  /* acircumflex */  
             case 0xe3: *string = 0xc6; break;  /* atilde */  
             case 0xe4: *string = 0x84; break;  /* adiaeresis */  
             case 0xe5: *string = 0x86; break;  /* aring */  
             case 0xe6: *string = 0x91; break;  /* ae */  
             case 0xe7: *string = 0x87; break;  /* ccedilla */  
             case 0xe8: *string = 0x8a; break;  /* egrave */  
             case 0xe9: *string = 0x82; break;  /* eacute */  
             case 0xea: *string = 0x88; break;  /* ecircumflex */  
             case 0xeb: *string = 0x89; break;  /* ediaeresis */  
             case 0xec: *string = 0x8d; break;  /* igrave */  
             case 0xed: *string = 0xa1; break;  /* iacute */  
             case 0xee: *string = 0x8c; break;  /* icircumflex */  
             case 0xef: *string = 0x8b; break;  /* idiaeresis */  
             case 0xf0: *string = 0xd0; break;  /* eth */  
             case 0xf1: *string = 0xa4; break;  /* ntilde */  
             case 0xf2: *string = 0x95; break;  /* ograve */  
             case 0xf3: *string = 0xa2; break;  /* oacute */  
             case 0xf4: *string = 0x93; break;  /* ocircumflex */  
             case 0xf5: *string = 0xe4; break;  /* otilde */  
             case 0xf6: *string = 0x94; break;  /* odiaeresis */  
             case 0xf7: *string = 0xf6; break;  /* division */  
             case 0xf8: *string = 0x9b; break;  /* oslash */  
             case 0xf9: *string = 0x97; break;  /* ugrave */  
             case 0xfa: *string = 0xa3; break;  /* uacute */  
             case 0xfb: *string = 0x96; break;  /* ucircumflex */  
             case 0xfc: *string = 0x81; break;  /* udiaeresis */  
             case 0xfd: *string = 0xec; break;  /* yacute */  
             case 0xfe: *string = 0xe7; break;  /* thorn */  
             case 0xff: *string = 0x98; break;  /* ydiaeresis */  
             default  :  break;  
             }  
         }  
     }  
     else {  
         for( i = 0; i < size; i++, string++ ) {  
             switch( *string ) {  
             case  0xff: *string = 0xa0; break;  
             case  0xad: *string = 0xa1; break;  
             case  0xbd: *string = 0xa2; break;  
             case  0x9c: *string = 0xa3; break;  
             case  0xcf: *string = 0xa4; break;  
             case  0xbe: *string = 0xa5; break;  
             case  0xdd: *string = 0xa6; break;  
             case  0xf5: *string = 0xa7; break;  
             case  0xf9: *string = 0xa8; break;  
             case  0xb8: *string = 0xa9; break;  
             case  0xa6: *string = 0xaa; break;  
             case  0xae: *string = 0xab; break;  
             case  0xaa: *string = 0xac; break;  
             case  0xf0: *string = 0xad; break;  
             case  0xa9: *string = 0xae; break;  
             case  0xee: *string = 0xaf; break;  
             case  0xf8: *string = 0xb0; break;  
             case  0xf1: *string = 0xb1; break;  
             case  0xfd: *string = 0xb2; break;  
             case  0xfc: *string = 0xb3; break;  
             case  0xef: *string = 0xb4; break;  
             case  0xe6: *string = 0xb5; break;  
             case  0xf4: *string = 0xb6; break;  
             case  0xfa: *string = 0xb7; break;  
             case  0xf7: *string = 0xb8; break;  
             case  0xfb: *string = 0xb9; break;  
             case  0xa7: *string = 0xba; break;  
             case  0xaf: *string = 0xbb; break;  
             case  0xac: *string = 0xbc; break;  
             case  0xab: *string = 0xbd; break;  
             case  0xf3: *string = 0xbe; break;  
             case  0xa8: *string = 0xbf; break;  
             case  0xb7: *string = 0xc0; break;  
             case  0xb5: *string = 0xc1; break;  
             case  0xb6: *string = 0xc2; break;  
             case  0xc7: *string = 0xc3; break;  
             case  0x8e: *string = 0xc4; break;  
             case  0x8f: *string = 0xc5; break;  
             case  0x92: *string = 0xc6; break;  
             case  0x80: *string = 0xc7; break;  
             case  0xd4: *string = 0xc8; break;  
             case  0x90: *string = 0xc9; break;  
             case  0xd2: *string = 0xca; break;  
             case  0xd3: *string = 0xcb; break;  
             case  0xde: *string = 0xcc; break;  
             case  0xd6: *string = 0xcd; break;  
             case  0xd7: *string = 0xce; break;  
             case  0xd8: *string = 0xcf; break;  
             case  0xd1: *string = 0xd0; break;  
             case  0xa5: *string = 0xd1; break;  
             case  0xe3: *string = 0xd2; break;  
             case  0xe0: *string = 0xd3; break;  
             case  0xe2: *string = 0xd4; break;  
             case  0xe5: *string = 0xd5; break;  
             case  0x99: *string = 0xd6; break;  
             case  0x9e: *string = 0xd7; break;  
             case  0x9d: *string = 0xd8; break;  
             case  0xeb: *string = 0xd9; break;  
             case  0xe9: *string = 0xda; break;  
             case  0xea: *string = 0xdb; break;  
             case  0x9a: *string = 0xdc; break;  
             case  0xed: *string = 0xdd; break;  
             case  0xe8: *string = 0xde; break;  
             case  0xe1: *string = 0xdf; break;  
             case  0x85: *string = 0xe0; break;  
             case  0xa0: *string = 0xe1; break;  
             case  0x83: *string = 0xe2; break;  
             case  0xc6: *string = 0xe3; break;  
             case  0x84: *string = 0xe4; break;  
             case  0x86: *string = 0xe5; break;  
             case  0x91: *string = 0xe6; break;  
             case  0x87: *string = 0xe7; break;  
             case  0x8a: *string = 0xe8; break;  
             case  0x82: *string = 0xe9; break;  
             case  0x88: *string = 0xea; break;  
             case  0x89: *string = 0xeb; break;  
             case  0x8d: *string = 0xec; break;  
             case  0xa1: *string = 0xed; break;  
             case  0x8c: *string = 0xee; break;  
             case  0x8b: *string = 0xef; break;  
             case  0xd0: *string = 0xf0; break;  
             case  0xa4: *string = 0xf1; break;  
             case  0x95: *string = 0xf2; break;  
             case  0xa2: *string = 0xf3; break;  
             case  0x93: *string = 0xf4; break;  
             case  0xe4: *string = 0xf5; break;  
             case  0x94: *string = 0xf6; break;  
             case  0xf6: *string = 0xf7; break;  
             case  0x9b: *string = 0xf8; break;  
             case  0x97: *string = 0xf9; break;  
             case  0xa3: *string = 0xfa; break;  
             case  0x96: *string = 0xfb; break;  
             case  0x81: *string = 0xfc; break;  
             case  0xec: *string = 0xfd; break;  
             case  0xe7: *string = 0xfe; break;  
             case  0x98: *string = 0xff; break;  
             default  :  break;  
             }  
         }  
     }  
 }  
   
   
 /* XXX: the conv_charset() call fails when the user-id was created  
         with iso-8859-1 but it is assumed that CP850 (gpg console) is used. */  
   
 char*  
 utf8_to_wincp (const char * s, size_t len)  
 {  
     char *decs;  
     decs = utf8_to_native (s);  
     conv_charset ((byte *)decs, strlen (decs), 1);  
     return decs;  
 }  
   
   
 char*  
 wincp_to_utf8 (const char * s, size_t len)  
 {  
     char * encs;  
     conv_charset ((byte *)s, len, 0);  
     encs = native_to_utf8 (s);  
     return encs;  
117  }  }
118    
119    
120    /**
121     * Returns -1 if the given string contains any 8-bit characters.
122     * This is a helper to decide when to use UTF8 encoding.
123     */
124  int  int
125  is_8bit_string (const char * str)  is_8bit_string (const char *str)
126  {  {
127      size_t i;      for (size_t i = 0; i < strlen (str); i++) {
   
     for (i = 0; i < strlen (str); i++) {  
128          if (str[i] & 0x80)          if (str[i] & 0x80)
129              return -1;              return -1;
130      }      }

Legend:
Removed from v.185  
changed lines
  Added in v.328

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26