YAZ  5.23.1
iconv_decode_iso5426.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
17 #if HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <string.h>
24 
25 #include <yaz/xmalloc.h>
26 #include "iconv-p.h"
27 
28 struct decoder_data {
29  int g0_mode;
30  int g1_mode;
31 
33  int comb_size;
34  unsigned long comb_x[8];
35  size_t comb_no_read[8];
36 };
37 
50 
51 
52 static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
53  struct decoder_data *data,
54  unsigned char *inp,
55  size_t inbytesleft, size_t *no_read,
56  int *comb);
57 
58 static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
59  unsigned char *inp,
60  size_t inbytesleft, size_t *no_read)
61 {
62  struct decoder_data *data = (struct decoder_data *) d->data;
63  unsigned long x;
64  if (data->comb_offset < data->comb_size)
65  {
66  *no_read = data->comb_no_read[data->comb_offset];
67  x = data->comb_x[data->comb_offset];
68 
69  /* special case for double-diacritic combining characters,
70  INVERTED BREVE and DOUBLE TILDE.
71  We'll increment the no_read counter by 1, since we want to skip over
72  the processing of the closing ligature character
73  */
74  /* this code is no longer necessary.. our handlers code in
75  yaz_iso5426_?_conv (generated by charconv.tcl) now returns
76  0 and no_read=1 when a sequence does not match the input.
77  The SECOND HALFs in codetables.xml produces a non-existant
78  entry in the conversion trie.. Hence when met, the input byte is
79  skipped as it should (in yaz_iconv)
80  */
81 #if 0
82  if (x == 0x0361 || x == 0x0360)
83  *no_read += 1;
84 #endif
85  data->comb_offset++;
86  return x;
87  }
88 
89  data->comb_offset = 0;
90  for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
91  {
92  int comb = 0;
93 
94  if (inbytesleft == 0 && data->comb_size)
95  {
97  x = 0;
98  *no_read = 0;
99  break;
100  }
101  x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb);
102  if (!comb || !x)
103  break;
104  data->comb_x[data->comb_size] = x;
105  data->comb_no_read[data->comb_size] = *no_read;
106  inp += *no_read;
107  inbytesleft = inbytesleft - *no_read;
108  }
109  return x;
110 }
111 
112 static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
113  struct decoder_data *data,
114  unsigned char *inp,
115  size_t inbytesleft, size_t *no_read,
116  int *comb)
117 {
118  *no_read = 0;
119  while (inbytesleft > 0 && *inp == 27)
120  {
121  int *modep = &data->g0_mode;
122  size_t inbytesleft0 = inbytesleft;
123 
124  inbytesleft--;
125  inp++;
126  if (inbytesleft == 0)
127  goto incomplete;
128  if (*inp == '$') /* set with multiple bytes */
129  {
130  inbytesleft--;
131  inp++;
132  }
133  if (inbytesleft == 0)
134  goto incomplete;
135  if (*inp == '(' || *inp == ',') /* G0 */
136  {
137  inbytesleft--;
138  inp++;
139  }
140  else if (*inp == ')' || *inp == '-') /* G1 */
141  {
142  inbytesleft--;
143  inp++;
144  modep = &data->g1_mode;
145  }
146  if (inbytesleft == 0)
147  goto incomplete;
148  if (*inp == '!') /* ANSEL is a special case */
149  {
150  inbytesleft--;
151  inp++;
152  }
153  if (inbytesleft == 0)
154  goto incomplete;
155  *modep = *inp++; /* Final character */
156  inbytesleft--;
157 
158  (*no_read) += inbytesleft0 - inbytesleft;
159  }
160  if (inbytesleft == 0)
161  return 0;
162  else if (*inp == ' ')
163  {
164  *no_read += 1;
165  return ' ';
166  }
167  else
168  {
169  unsigned long x;
170  size_t no_read_sub = 0;
171  int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
172  *comb = 0;
173 
174  switch(mode)
175  {
176  case 'B': /* Basic ASCII */
177  case 's': /* ASCII */
178  x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb,
179  127, 0);
180  break;
181  case 'E': /* ANSEL */
182  x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb,
183  127, 128);
184  break;
185  default:
186  *no_read = 0;
188  return 0;
189  }
190  *no_read += no_read_sub;
191  return x;
192  }
193 incomplete:
194  *no_read = 0;
196  return 0;
197 }
198 
199 
201  unsigned char *inp,
202  size_t inbytesleft, size_t *no_read)
203 {
204  struct decoder_data *data = (struct decoder_data *) d->data;
205  data->g0_mode = 'B';
206  data->g1_mode = 'E';
207  data->comb_offset = data->comb_size = 0;
208  return 0;
209 }
210 
212 {
213  struct decoder_data *data = (struct decoder_data *) d->data;
214  xfree(data);
215 }
216 
219 {
220  if (!yaz_matchstr(fromcode, "ISO5426"))
222  else
223  return 0;
224  {
225  struct decoder_data *data = (struct decoder_data *)
226  xmalloc(sizeof(*data));
227  d->data = data;
230  }
231  return d;
232 }
233 
234 
235 /*
236  * Local variables:
237  * c-basic-offset: 4
238  * c-file-style: "Stroustrup"
239  * indent-tabs-mode: nil
240  * End:
241  * vim: shiftwidth=4 tabstop=8 expandtab
242  */
243 
yaz_iconv_decoder_t yaz_iso5426_decoder(const char *fromcode, yaz_iconv_decoder_t d)
size_t(* init_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition: iconv-p.h:83
static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
yaz_conv_func_t yaz_iso5426_33_conv
yaz_conv_func_t yaz_iso5426_42_conv
yaz_conv_func_t yaz_iso5426_32_conv
Header for errno utilities.
void destroy_iso5426(yaz_iconv_decoder_t d)
yaz_conv_func_t yaz_iso5426_62_conv
unsigned long comb_x[8]
Internal header for iconv.
yaz_conv_func_t yaz_iso5426_4E_conv
yaz_conv_func_t yaz_iso5426_45_conv
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
yaz_conv_func_t yaz_iso5426_70_conv
#define YAZ_ICONV_EINVAL
error code: An incomplete multibyte sequence is in input buffer
Definition: yaz-iconv.h:51
yaz_conv_func_t yaz_iso5426_34_conv
#define YAZ_ICONV_EILSEQ
error code: Invalid sequence
Definition: yaz-iconv.h:49
void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
Definition: siconv.c:298
int yaz_matchstr(const char *s1, const char *s2)
match strings - independent of case and &#39;-&#39;
Definition: matchstr.c:42
size_t comb_no_read[8]
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
Header for memory handling functions.
yaz_conv_func_t yaz_iso5426_31_conv
unsigned long(* read_handle)(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inbuf, size_t inbytesleft, size_t *no_read)
Definition: iconv-p.h:86
void(* destroy_handle)(yaz_iconv_decoder_t d)
Definition: iconv-p.h:89
unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, int boffset)
Definition: iconv-p.h:70
static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd, struct decoder_data *data, unsigned char *inp, size_t inbytesleft, size_t *no_read, int *comb)
yaz_conv_func_t yaz_iso5426_53_conv
static size_t init_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read)
yaz_conv_func_t yaz_iso5426_51_conv
yaz_conv_func_t yaz_iso5426_67_conv