928fcba4c4ad5b9cc7e77400cdd9263ab723cf72
[public/netxms.git] / src / libnetxms / unicode.cpp
1 /* $Id: unicode.cpp,v 1.27 2008-01-28 18:09:38 victor Exp $ */
2 /*
3 ** NetXMS - Network Management System
4 ** Copyright (C) 2003, 2004, 2005, 2006, 2007 Victor Kirhenshtein
5 **
6 ** This program is free software; you can redistribute it and/or modify
7 ** it under the terms of the GNU General Public License as published by
8 ** the Free Software Foundation; either version 2 of the License, or
9 ** (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 **
20 ** File: unicode.cpp
21 **
22 **/
23
24 #include "libnetxms.h"
25
26
27 //
28 // Static data
29 //
30
31 static char m_cpDefault[MAX_CODEPAGE_LEN] = ICONV_DEFAULT_CODEPAGE;
32
33
34 #ifndef _WIN32
35
36 #if HAVE_ICONV_H
37 #include <iconv.h>
38 #endif
39
40
41 //
42 // UNICODE character set
43 //
44
45 #ifndef __DISABLE_ICONV
46
47 // configure first test for libiconv, then for iconv
48 // if libiconv was found, HAVE_ICONV will not be set correctly
49 #if HAVE_LIBICONV
50 #undef HAVE_ICONV
51 #define HAVE_ICONV 1
52 #endif
53
54 #if HAVE_ICONV_UCS_2_INTERNAL
55 #define UCS2_CODEPAGE_NAME "UCS-2-INTERNAL"
56 #elif HAVE_ICONV_UCS_2
57 #define UCS2_CODEPAGE_NAME "UCS-2"
58 #elif HAVE_ICONV_UCS2
59 #define UCS2_CODEPAGE_NAME "UCS2"
60 #elif HAVE_ICONV_UCS_2BE && WORDS_BIGENDIAN
61 #define UCS2_CODEPAGE_NAME "UCS-2BE"
62 #else
63 #ifdef UNICODE
64 #error Cannot determine valid UCS-2 codepage name
65 #else
66 #warning Cannot determine valid UCS-2 codepage name
67 #undef HAVE_ICONV
68 #endif
69 #endif
70
71 #if HAVE_ICONV_UCS_4_INTERNAL
72 #define UCS4_CODEPAGE_NAME "UCS-4-INTERNAL"
73 #elif HAVE_ICONV_UCS_4
74 #define UCS4_CODEPAGE_NAME "UCS-4"
75 #elif HAVE_ICONV_UCS4
76 #define UCS4_CODEPAGE_NAME "UCS4"
77 #elif HAVE_ICONV_UCS_4BE && WORDS_BIGENDIAN
78 #define UCS4_CODEPAGE_NAME "UCS-4BE"
79 #else
80 #if defined(UNICODE) && defined(UNICODE_UCS4)
81 #error Cannot determine valid UCS-4 codepage name
82 #else
83 #warning Cannot determine valid UCS-4 codepage name
84 #undef HAVE_ICONV
85 #endif
86 #endif
87
88 #ifdef UNICODE_UCS4
89 #define UNICODE_CODEPAGE_NAME UCS4_CODEPAGE_NAME
90 #else /* assume UCS-2 */
91 #define UNICODE_CODEPAGE_NAME UCS2_CODEPAGE_NAME
92 #endif
93
94 #endif /* __DISABLE_ICONV */
95
96
97 //
98 // Set application's default codepage
99 //
100
101 BOOL LIBNETXMS_EXPORTABLE SetDefaultCodepage(const char *cp)
102 {
103 BOOL rc;
104 iconv_t cd;
105
106 #if HAVE_ICONV && !defined(__DISABLE_ICONV)
107 cd = iconv_open(cp, "UTF-8");
108 if (cd != (iconv_t)(-1))
109 {
110 iconv_close(cd);
111 #endif
112 strncpy(m_cpDefault, cp, MAX_CODEPAGE_LEN);
113 m_cpDefault[MAX_CODEPAGE_LEN - 1] = 0;
114 rc = TRUE;
115 #if HAVE_ICONV && !defined(__DISABLE_ICONV)
116 }
117 else
118 {
119 rc = FALSE;
120 }
121 #endif
122 return rc;
123 }
124
125
126 //
127 // Calculate length of wide character string
128 //
129
130 #if !UNICODE_UCS2
131
132 int LIBNETXMS_EXPORTABLE ucs2_strlen(const UCS2CHAR *pStr)
133 {
134 int iLen = 0;
135 const UCS2CHAR *pCurr = pStr;
136
137 while(*pCurr++)
138 iLen++;
139 return iLen;
140 }
141
142 #endif
143
144
145 //
146 // Duplicate wide character string
147 //
148
149 #if !UNICODE_UCS2
150
151 UCS2CHAR LIBNETXMS_EXPORTABLE *ucs2_strdup(const UCS2CHAR *pStr)
152 {
153 return (UCS2CHAR *)nx_memdup(pStr, (ucs2_strlen(pStr) + 1) * sizeof(UCS2CHAR));
154 }
155
156 #endif
157
158
159 //
160 // Copy wide character string with length limitation
161 //
162
163 #if !UNICODE_UCS2
164
165 UCS2CHAR LIBNETXMS_EXPORTABLE *ucs2_strncpy(UCS2CHAR *pDst, const UCS2CHAR *pSrc, int nDstLen)
166 {
167 int nLen;
168
169 nLen = ucs2_strlen(pSrc) + 1;
170 if (nLen > nDstLen)
171 nLen = nDstLen;
172 memcpy(pDst, pSrc, nLen * sizeof(UCS2CHAR));
173 return pDst;
174 }
175
176 #endif
177
178
179 //
180 // Convert UNICODE string to single-byte string
181 //
182
183 int LIBNETXMS_EXPORTABLE WideCharToMultiByte(int iCodePage, DWORD dwFlags,
184 const WCHAR *pWideCharStr, int cchWideChar,
185 char *pByteStr, int cchByteChar,
186 char *pDefaultChar, BOOL *pbUsedDefChar)
187 {
188 #if HAVE_ICONV && !defined(__DISABLE_ICONV)
189 iconv_t cd;
190 int nRet;
191 const char *inbuf;
192 char *outbuf;
193 size_t inbytes, outbytes;
194 char cp[MAX_CODEPAGE_LEN + 16];
195
196 // Calculate required length. Because iconv cannot calculate
197 // resulting multibyte string length, assume the worst case - 3 bytes
198 // per character for UTF-8 and 2 bytes per character for other encodings
199 if (cchByteChar == 0)
200 {
201 return wcslen(pWideCharStr) * (iCodePage == CP_UTF8 ? 3 : 2) + 1;
202 }
203
204 strcpy(cp, m_cpDefault);
205 #if HAVE_ICONV_IGNORE
206 strcat(cp, "//IGNORE");
207 #endif
208 cd = iconv_open(iCodePage == CP_UTF8 ? "UTF-8" : cp, UNICODE_CODEPAGE_NAME);
209 if (cd != (iconv_t)(-1))
210 {
211 inbuf = (const char *)pWideCharStr;
212 inbytes = ((cchWideChar == -1) ? wcslen(pWideCharStr) + 1 : cchWideChar) * sizeof(WCHAR);
213 outbuf = pByteStr;
214 outbytes = cchByteChar;
215 nRet = iconv(cd, (ICONV_CONST char **)&inbuf, &inbytes, &outbuf, &outbytes);
216 iconv_close(cd);
217 if (nRet == -1)
218 {
219 if (errno == EILSEQ)
220 {
221 nRet = cchByteChar - outbytes;
222 }
223 else
224 {
225 nRet = 0;
226 }
227 }
228 if ((cchWideChar == -1) && (outbytes > 0))
229 {
230 *outbuf = 0;
231 }
232 }
233 else
234 {
235 *pByteStr = 0;
236 nRet = 0;
237 }
238 return nRet;
239
240 #else
241
242 const WCHAR *pSrc;
243 char *pDest;
244 int iPos, iSize;
245
246 if (cchByteChar == 0)
247 {
248 return wcslen(pWideCharStr) + 1;
249 }
250
251 iSize = (cchWideChar == -1) ? wcslen(pWideCharStr) : cchWideChar;
252 if (iSize >= cchByteChar)
253 iSize = cchByteChar - 1;
254 for(pSrc = pWideCharStr, iPos = 0, pDest = pByteStr; iPos < iSize; iPos++, pSrc++, pDest++)
255 *pDest = (*pSrc < 256) ? (char)(*pSrc) : '?';
256 *pDest = 0;
257 return iSize;
258
259 #endif /* HAVE_ICONV */
260 }
261
262
263 //
264 // Convert single-byte to UNICODE string
265 //
266
267 int LIBNETXMS_EXPORTABLE MultiByteToWideChar(int iCodePage, DWORD dwFlags, const char *pByteStr,
268 int cchByteChar, WCHAR *pWideCharStr, int cchWideChar)
269 {
270 #if HAVE_ICONV && !defined(__DISABLE_ICONV)
271 iconv_t cd;
272 int nRet;
273 const char *inbuf;
274 char *outbuf;
275 size_t inbytes, outbytes;
276
277 if (cchWideChar == 0)
278 {
279 return strlen(pByteStr) + 1;
280 }
281
282 cd = iconv_open(UNICODE_CODEPAGE_NAME, iCodePage == CP_UTF8 ? "UTF-8" : m_cpDefault);
283 if (cd != (iconv_t)(-1))
284 {
285 inbuf = pByteStr;
286 inbytes = (cchByteChar == -1) ? strlen(pByteStr) + 1 : cchByteChar;
287 outbuf = (char *)pWideCharStr;
288 outbytes = cchWideChar * sizeof(WCHAR);
289 nRet = iconv(cd, (ICONV_CONST char **)&inbuf, &inbytes, &outbuf, &outbytes);
290 iconv_close(cd);
291 if (nRet == -1)
292 {
293 if (errno == EILSEQ)
294 {
295 nRet = (cchWideChar * sizeof(WCHAR) - outbytes) / sizeof(WCHAR);
296 }
297 else
298 {
299 nRet = 0;
300 }
301 }
302 if (((char *)outbuf - (char *)pWideCharStr > sizeof(WCHAR)) && (*pWideCharStr == 0xFEFF))
303 {
304 // Remove UNICODE byte order indicator if presented
305 memmove(pWideCharStr, &pWideCharStr[1], (char *)outbuf - (char *)pWideCharStr - sizeof(WCHAR));
306 outbuf -= sizeof(WCHAR);
307 }
308 if ((cchByteChar == -1) && (outbytes >= sizeof(WCHAR)))
309 {
310 *((WCHAR *)outbuf) = 0;
311 }
312 }
313 else
314 {
315 *pWideCharStr = 0;
316 nRet = 0;
317 }
318 return nRet;
319
320 #else
321
322 const char *pSrc;
323 WCHAR *pDest;
324 int iPos, iSize;
325
326 if (cchWideChar == 0)
327 {
328 return strlen(pByteStr) + 1;
329 }
330
331 iSize = (cchByteChar == -1) ? strlen(pByteStr) : cchByteChar;
332 if (iSize >= cchWideChar)
333 iSize = cchWideChar - 1;
334 for(pSrc = pByteStr, iPos = 0, pDest = pWideCharStr; iPos < iSize; iPos++, pSrc++, pDest++)
335 *pDest = (WCHAR)(*pSrc);
336 *pDest = 0;
337
338 return iSize;
339 #endif
340 }
341
342 #endif /* not _WIN32 */
343
344
345 //
346 // UNICODE version of inet_addr()
347 //
348
349 DWORD LIBNETXMS_EXPORTABLE inet_addr_w(const WCHAR *pszAddr)
350 {
351 char szBuffer[256];
352
353 WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK | WC_DEFAULTCHAR,
354 pszAddr, -1, szBuffer, 256, NULL, NULL);
355 return inet_addr(szBuffer);
356 }
357
358
359 //
360 // Convert multibyte string to wide string using current codepage and
361 // allocating wide string dynamically
362 //
363
364 WCHAR LIBNETXMS_EXPORTABLE *WideStringFromMBString(const char *pszString)
365 {
366 WCHAR *pwszOut;
367 int nLen;
368
369 nLen = (int)strlen(pszString) + 1;
370 pwszOut = (WCHAR *)malloc(nLen * sizeof(WCHAR));
371 MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszString, -1, pwszOut, nLen);
372 return pwszOut;
373 }
374
375
376 //
377 // Convert wide string to multibyte string using current codepage and
378 // allocating multibyte string dynamically
379 //
380
381 char LIBNETXMS_EXPORTABLE *MBStringFromWideString(const WCHAR *pwszString)
382 {
383 char *pszOut;
384 int nLen;
385
386 nLen = (int)wcslen(pwszString) + 1;
387 pszOut = (char *)malloc(nLen);
388 WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK | WC_DEFAULTCHAR,
389 pwszString, -1, pszOut, nLen, NULL, NULL);
390 return pszOut;
391 }
392
393
394 //
395 // Convert wide string to UTF8 string allocating UTF8 string dynamically
396 //
397
398 char LIBNETXMS_EXPORTABLE *UTF8StringFromWideString(const WCHAR *pwszString)
399 {
400 char *pszOut;
401 int nLen;
402
403 nLen = WideCharToMultiByte(CP_UTF8, 0, pwszString, -1, NULL, 0, NULL, NULL);
404 pszOut = (char *)malloc(nLen);
405 WideCharToMultiByte(CP_UTF8, 0, pwszString, -1, pszOut, nLen, NULL, NULL);
406 return pszOut;
407 }
408
409
410 //
411 // Get OpenSSL error string as UNICODE string
412 // Buffer must be at least 256 character long
413 //
414
415 #ifdef _WITH_ENCRYPTION
416
417 WCHAR LIBNETXMS_EXPORTABLE *ERR_error_string_W(int nError, WCHAR *pwszBuffer)
418 {
419 char text[256];
420
421 memset(text, 0, sizeof(text));
422 ERR_error_string(nError, text);
423 MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, text, -1, pwszBuffer, 256);
424 return pwszBuffer;
425 }
426
427 #endif
428
429
430 #if defined(UNICODE) && defined(UNICODE_UCS4)
431
432 //
433 // Convert UCS-2 to UCS-4
434 //
435
436 size_t LIBNETXMS_EXPORTABLE ucs2_to_ucs4(const UCS2CHAR *src, size_t srcLen, WCHAR *dst, size_t dstLen)
437 {
438 iconv_t cd;
439 const char *inbuf;
440 char *outbuf;
441 size_t count, inbytes, outbytes;
442
443 cd = iconv_open(UCS4_CODEPAGE_NAME, UCS2_CODEPAGE_NAME);
444 if (cd != (iconv_t)(-1))
445 {
446 inbuf = (const char *)src;
447 inbytes = (srcLen == -1) ? ucs2_strlen(src) + 1 : srcLen;
448 outbuf = (char *)dst;
449 outbytes = dstLen * sizeof(WCHAR);
450 count = iconv(cd, (ICONV_CONST char **)&inbuf, &inbytes, &outbuf, &outbytes);
451 iconv_close(cd);
452 if (count == -1)
453 {
454 if (errno == EILSEQ)
455 {
456 count = (dstLen * sizeof(WCHAR) - outbytes) / sizeof(WCHAR);
457 }
458 else
459 {
460 count = 0;
461 }
462 }
463 if ((srcLen == -1) && (outbytes >= sizeof(WCHAR)))
464 {
465 *((WCHAR *)outbuf) = 0;
466 }
467 }
468 else
469 {
470 *dst = 0;
471 count = 0;
472 }
473 return count;
474 }
475
476
477 //
478 // Convert UCS-4 to UCS-2
479 //
480
481 size_t LIBNETXMS_EXPORTABLE ucs4_to_ucs2(const WCHAR *src, size_t srcLen, UCS2CHAR *dst, size_t dstLen)
482 {
483 iconv_t cd;
484 const char *inbuf;
485 char *outbuf;
486 size_t count, inbytes, outbytes;
487
488 cd = iconv_open(UCS2_CODEPAGE_NAME, UCS4_CODEPAGE_NAME);
489 if (cd != (iconv_t)(-1))
490 {
491 inbuf = (const char *)src;
492 inbytes = (srcLen == -1) ? wcslen(src) + 1 : srcLen;
493 outbuf = (char *)dst;
494 outbytes = dstLen * sizeof(UCS2CHAR);
495 count = iconv(cd, (ICONV_CONST char **)&inbuf, &inbytes, &outbuf, &outbytes);
496 iconv_close(cd);
497 if (count == -1)
498 {
499 if (errno == EILSEQ)
500 {
501 count = (dstLen * sizeof(UCS2CHAR) - outbytes) / sizeof(UCS2CHAR);
502 }
503 else
504 {
505 count = 0;
506 }
507 }
508 if (((char *)outbuf - (char *)dst > sizeof(UCS2CHAR)) && (*dst == 0xFEFF))
509 {
510 // Remove UNICODE byte order indicator if presented
511 memmove(dst, &dst[1], (char *)outbuf - (char *)dst - sizeof(UCS2CHAR));
512 outbuf -= sizeof(UCS2CHAR);
513 }
514 if ((srcLen == -1) && (outbytes >= sizeof(UCS2CHAR)))
515 {
516 *((UCS2CHAR *)outbuf) = 0;
517 }
518 }
519 else
520 {
521 *dst = 0;
522 count = 0;
523 }
524 return count;
525 }
526
527 #endif /* UNICODE && UNICODE_UCS4 */
528
529
530 #if !defined(_WIN32) && !defined(UNICODE)
531
532 //
533 // Convert UCS-2 to multibyte
534 //
535
536 size_t LIBNETXMS_EXPORTABLE ucs2_to_mb(const UCS2CHAR *src, size_t srcLen, char *dst, size_t dstLen)
537 {
538 #if HAVE_ICONV && !defined(__DISABLE_ICONV)
539 iconv_t cd;
540 const char *inbuf;
541 char *outbuf;
542 size_t count, inbytes, outbytes;
543
544 cd = iconv_open(m_cpDefault, UCS2_CODEPAGE_NAME);
545 if (cd != (iconv_t)(-1))
546 {
547 inbuf = (const char *)src;
548 inbytes = ((srcLen == -1) ? ucs2_strlen(src) + 1 : srcLen) * sizeof(UCS2CHAR);
549 outbuf = (char *)dst;
550 outbytes = dstLen;
551 count = iconv(cd, (ICONV_CONST char **)&inbuf, &inbytes, &outbuf, &outbytes);
552 iconv_close(cd);
553 if (count == -1)
554 {
555 if (errno == EILSEQ)
556 {
557 count = (dstLen * sizeof(char) - outbytes) / sizeof(char);
558 }
559 else
560 {
561 count = 0;
562 }
563 }
564 if ((srcLen == -1) && (outbytes >= sizeof(char)))
565 {
566 *((char *)outbuf) = 0;
567 }
568 }
569 else
570 {
571 *dst = 0;
572 count = 0;
573 }
574 return count;
575
576 #else
577
578 const UCS2CHAR *psrc;
579 char *pdst;
580 int pos, size;
581
582 size = (srcLen == -1) ? ucs2_strlen(src) : srcLen;
583 if (size >= dstLen)
584 size = dstLen - 1;
585 for(psrc = src, pos = 0, pdst = dst; pos < size; pos++, psrc++, pdst++)
586 *pdst = (*psrc < 256) ? (char)(*psrc) : '?';
587 *pdst = 0;
588 return size;
589 #endif
590 }
591
592
593 //
594 // Convert multibyte to UCS-2
595 //
596
597 size_t LIBNETXMS_EXPORTABLE mb_to_ucs2(const char *src, size_t srcLen, UCS2CHAR *dst, size_t dstLen)
598 {
599 #if HAVE_ICONV && !defined(__DISABLE_ICONV)
600 iconv_t cd;
601 const char *inbuf;
602 char *outbuf;
603 size_t count, inbytes, outbytes;
604
605 cd = iconv_open(UCS2_CODEPAGE_NAME, m_cpDefault);
606 if (cd != (iconv_t)(-1))
607 {
608 inbuf = (const char *)src;
609 inbytes = (srcLen == -1) ? strlen(src) + 1 : srcLen;
610 outbuf = (char *)dst;
611 outbytes = dstLen * sizeof(UCS2CHAR);
612 count = iconv(cd, (ICONV_CONST char **)&inbuf, &inbytes, &outbuf, &outbytes);
613 iconv_close(cd);
614 if (count == -1)
615 {
616 if (errno == EILSEQ)
617 {
618 count = (dstLen * sizeof(UCS2CHAR) - outbytes) / sizeof(UCS2CHAR);
619 }
620 else
621 {
622 count = 0;
623 }
624 }
625 if (((char *)outbuf - (char *)dst > sizeof(UCS2CHAR)) && (*dst == 0xFEFF))
626 {
627 // Remove UNICODE byte order indicator if presented
628 memmove(dst, &dst[1], (char *)outbuf - (char *)dst - sizeof(UCS2CHAR));
629 outbuf -= sizeof(UCS2CHAR);
630 }
631 if ((srcLen == -1) && (outbytes >= sizeof(UCS2CHAR)))
632 {
633 *((UCS2CHAR *)outbuf) = 0;
634 }
635 }
636 else
637 {
638 *dst = 0;
639 count = 0;
640 }
641 return count;
642
643 #else
644
645 const char *psrc;
646 UCS2CHAR *pdst;
647 int pos, size;
648
649 size = (srcLen == -1) ? strlen(src) : srcLen;
650 if (size >= dstLen)
651 size = dstLen - 1;
652 for(psrc = src, pos = 0, pdst = dst; pos < size; pos++, psrc++, pdst++)
653 *pdst = (UCS2CHAR)(*psrc);
654 *pdst = 0;
655
656 return size;
657 #endif
658 }
659
660 #endif /* !defined(_WIN32) && !defined(UNICODE) */
661
662
663 //
664 // Wide character version of some functions
665 //
666
667 #if !defined(_WIN32) && defined(UNICODE)
668
669 #if !HAVE_WFOPEN
670
671 FILE LIBNETXMS_EXPORTABLE *wfopen(const WCHAR *_name, const WCHAR *_type)
672 {
673 char *name, *type;
674 FILE *f;
675
676 name = MBStringFromWideString(_name);
677 type = MBStringFromWideString(_type);
678 f = fopen(name, type);
679 free(name);
680 free(type);
681 return f;
682 }
683
684 #endif
685
686 #if !HAVE_WOPEN
687
688 int LIBNETXMS_EXPORTABLE wopen(const WCHAR *_name, int flags, ...)
689 {
690 char *name;
691 int rc;
692
693 name = MBStringFromWideString(_name);
694 if (flags & O_CREAT)
695 {
696 va_list args;
697
698 va_start(args, flags);
699 rc = open(name, flags, va_arg(args, mode_t));
700 va_end(args);
701 }
702 else
703 {
704 rc = open(name, flags);
705 }
706 free(name);
707 return rc;
708 }
709
710 #endif
711
712 #if !HAVE_WSTAT
713
714 int wstat(const WCHAR *_path, struct stat *_sbuf)
715 {
716 char path[MAX_PATH];
717
718 WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK | WC_DEFAULTCHAR,
719 _path, -1, path, MAX_PATH, NULL, NULL);
720 return stat(path, _sbuf);
721 }
722
723 #endif
724
725 #endif
726