YAZ  5.23.1
icu_utf8.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 
11 #if HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14 
15 #if YAZ_HAVE_ICU
16 #include <yaz/xmalloc.h>
17 
18 #include <yaz/icu_I18N.h>
19 
20 #include <yaz/log.h>
21 
22 #include <string.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <assert.h>
26 
27 #include <unicode/ustring.h> /* some more string fcns*/
28 #include <unicode/uchar.h> /* char names */
29 
30 struct icu_buf_utf8 *icu_buf_utf8_create(size_t capacity)
31 {
32  struct icu_buf_utf8 *buf8
33  = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
34 
35  buf8->utf8_len = 0;
36  buf8->utf8_cap = capacity;
37  if (capacity > 0)
38  {
39  buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
40  buf8->utf8[0] = (uint8_t) 0;
41  }
42  else
43  buf8->utf8 = 0;
44  return buf8;
45 }
46 
47 struct icu_buf_utf8 *icu_buf_utf8_clear(struct icu_buf_utf8 *buf8)
48 {
49  assert(buf8);
50  if (buf8->utf8)
51  buf8->utf8[0] = (uint8_t) 0;
52  buf8->utf8_len = 0;
53  return buf8;
54 }
55 
56 struct icu_buf_utf8 *icu_buf_utf8_resize(struct icu_buf_utf8 *buf8,
57  size_t capacity)
58 {
59  assert(buf8);
60  if (capacity > 0)
61  {
62  if (0 == buf8->utf8)
63  buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
64  else
65  buf8->utf8
66  = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
67 
68  buf8->utf8_cap = capacity;
69  }
70  return buf8;
71 }
72 
73 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
74 {
75  assert(src8);
76  if (src8->utf8_len == 0)
77  return "";
78 
79  if (src8->utf8_len == src8->utf8_cap)
80  src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
81 
82  src8->utf8[src8->utf8_len] = '\0';
83 
84  return (const char *) src8->utf8;
85 }
86 
87 void icu_buf_utf8_destroy(struct icu_buf_utf8 *buf8)
88 {
89  if (buf8)
90  xfree(buf8->utf8);
91  xfree(buf8);
92 }
93 
94 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 *dest16,
95  const char *src8cstr,
96  UErrorCode *status)
97 {
98  size_t src8cstr_len = 0;
99  int32_t utf16_len = 0;
100 
101  *status = U_ZERO_ERROR;
102  src8cstr_len = strlen(src8cstr);
103 
104  u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
105  &utf16_len,
106  src8cstr, src8cstr_len, status);
107 
108  /* check for buffer overflow, resize and retry */
109  if (*status == U_BUFFER_OVERFLOW_ERROR)
110  {
111  icu_buf_utf16_resize(dest16, utf16_len * 2);
112  *status = U_ZERO_ERROR;
113  u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
114  &utf16_len,
115  src8cstr, src8cstr_len, status);
116  }
117 
118  if (U_SUCCESS(*status) && utf16_len <= dest16->utf16_cap)
119  dest16->utf16_len = utf16_len;
120  else
121  icu_buf_utf16_clear(dest16);
122 
123  return *status;
124 }
125 
126 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
127  const struct icu_buf_utf16 *src16,
128  UErrorCode *status)
129 {
130  int32_t utf8_len = 0;
131 
132  u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
133  &utf8_len,
134  src16->utf16, src16->utf16_len, status);
135 
136  /* check for buffer overflow, resize and retry */
137  if (*status == U_BUFFER_OVERFLOW_ERROR)
138  {
139  icu_buf_utf8_resize(dest8, utf8_len * 2);
140  *status = U_ZERO_ERROR;
141  u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
142  &utf8_len,
143  src16->utf16, src16->utf16_len, status);
144  }
145 
146  if (U_SUCCESS(*status) && utf8_len <= dest8->utf8_cap)
147  dest8->utf8_len = utf8_len;
148  else
149  icu_buf_utf8_clear(dest8);
150 
151  return *status;
152 }
153 
154 #endif /* YAZ_HAVE_ICU */
155 
156 /*
157  * Local variables:
158  * c-basic-offset: 4
159  * c-file-style: "Stroustrup"
160  * indent-tabs-mode: nil
161  * End:
162  * vim: shiftwidth=4 tabstop=8 expandtab
163  */
164 
int32_t utf8_len
Definition: icu_I18N.h:81
#define xrealloc(o, x)
utility macro which calls xrealloc_f
Definition: xmalloc.h:47
uint8_t * utf8
Definition: icu_I18N.h:80
UChar * utf16
Definition: icu_I18N.h:54
const char * icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8, const struct icu_buf_utf16 *src16, UErrorCode *status)
Internal header for ICU utilities.
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
int32_t utf16_len
Definition: icu_I18N.h:55
int32_t utf8_cap
Definition: icu_I18N.h:82
struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 *buf16, size_t capacity)
struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 *buf8, size_t capacity)
int32_t utf16_cap
Definition: icu_I18N.h:56
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
Header for memory handling functions.
struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 *buf16)
Logging utility.
void icu_buf_utf8_destroy(struct icu_buf_utf8 *buf8)
struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 *buf8)
UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 *dest16, const char *src8cstr, UErrorCode *status)
struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)