YAZ  4.2.60
iconv_decode_iso5426.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
17 #if HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <string.h>
24 
25 #include <yaz/xmalloc.h>
26 #include "iconv-p.h"
27 
28 struct decoder_data {
29  int g0_mode;
30  int g1_mode;
31 
33  int comb_size;
34  unsigned long comb_x[8];
35  size_t comb_no_read[8];
36 };
37 
50 
51 
52 static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
53  struct decoder_data *data,
54  unsigned char *inp,
55  size_t inbytesleft, size_t *no_read,
56  int *comb);
57 
58 static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
59  unsigned char *inp,
60  size_t inbytesleft, size_t *no_read)
61 {
62  struct decoder_data *data = (struct decoder_data *) d->data;
63  unsigned long x;
64  if (data->comb_offset < data->comb_size)
65  {
66  *no_read = data->comb_no_read[data->comb_offset];
67  x = data->comb_x[data->comb_offset];
68 
69  /* special case for double-diacritic combining characters,
70  INVERTED BREVE and DOUBLE TILDE.
71  We'll increment the no_read counter by 1, since we want to skip over
72  the processing of the closing ligature character
73  */
74  /* this code is no longer necessary.. our handlers code in
75  yaz_iso5426_?_conv (generated by charconv.tcl) now returns
76  0 and no_read=1 when a sequence does not match the input.
77  The SECOND HALFs in codetables.xml produces a non-existant
78  entry in the conversion trie.. Hence when met, the input byte is
79  skipped as it should (in yaz_iconv)
80  */
81 #if 0
82  if (x == 0x0361 || x == 0x0360)
83  *no_read += 1;
84 #endif
85  data->comb_offset++;
86  return x;
87  }
88 
89  data->comb_offset = 0;
90  for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
91  {
92  int comb = 0;
93 
94  if (inbytesleft == 0 && data->comb_size)
95  {
97  x = 0;
98  *no_read = 0;
99  break;
100  }
101  x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb);
102  if (!comb || !x)
103  break;
104  data->comb_x[data->comb_size] = x;
105  data->comb_no_read[data->comb_size] = *no_read;
106  inp += *no_read;
107  inbytesleft = inbytesleft - *no_read;
108  }
109  return x;
110 }
111 
112 #if 0
113 /* not used */
114 static unsigned long read_iso5426s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
115  unsigned char *inp,
116  size_t inbytesleft, size_t *no_read)
117 {
118  struct decoder_data *data = (struct decoder_data *) d->data;
119  unsigned long x = read_iso5426(cd, d, inp, inbytesleft, no_read);
120  if (x && data->comb_size == 1)
121  {
122  if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
123  {
124  *no_read += data->comb_no_read[0];
125  data->comb_size = 0;
126  }
127  }
128  return x;
129 }
130 #endif
131 
132 static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
133  struct decoder_data *data,
134  unsigned char *inp,
135  size_t inbytesleft, size_t *no_read,
136  int *comb)
137 {
138  *no_read = 0;
139  while (inbytesleft > 0 && *inp == 27)
140  {
141  int *modep = &data->g0_mode;
142  size_t inbytesleft0 = inbytesleft;
143 
144  inbytesleft--;
145  inp++;
146  if (inbytesleft == 0)
147  goto incomplete;
148  if (*inp == '$') /* set with multiple bytes */
149  {
150  inbytesleft--;
151  inp++;
152  }
153  if (inbytesleft == 0)
154  goto incomplete;
155  if (*inp == '(' || *inp == ',') /* G0 */
156  {
157  inbytesleft--;
158  inp++;
159  }
160  else if (*inp == ')' || *inp == '-') /* G1 */
161  {
162  inbytesleft--;
163  inp++;
164  modep = &data->g1_mode;
165  }
166  if (inbytesleft == 0)
167  goto incomplete;
168  if (*inp == '!') /* ANSEL is a special case */
169  {
170  inbytesleft--;
171  inp++;
172  }
173  if (inbytesleft == 0)
174  goto incomplete;
175  *modep = *inp++; /* Final character */
176  inbytesleft--;
177 
178  (*no_read) += inbytesleft0 - inbytesleft;
179  }
180  if (inbytesleft == 0)
181  return 0;
182  else if (*inp == ' ')
183  {
184  *no_read += 1;
185  return ' ';
186  }
187  else
188  {
189  unsigned long x;
190  size_t no_read_sub = 0;
191  int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
192  *comb = 0;
193 
194  switch(mode)
195  {
196  case 'B': /* Basic ASCII */
197  case 's': /* ASCII */
198  x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
199  break;
200  case 'E': /* ANSEL */
201  x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
202  break;
203 
204 #if 0
205  case 'g': /* Greek */
206  x = yaz_iso5426_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
207  break;
208  case 'b': /* Subscripts */
209  x = yaz_iso5426_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
210  break;
211  case 'p': /* Superscripts */
212  x = yaz_iso5426_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
213  break;
214  case '2': /* Basic Hebrew */
215  x = yaz_iso5426_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
216  break;
217  case 'N': /* Basic Cyrillic */
218  x = yaz_iso5426_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
219  break;
220  case 'Q': /* Extended Cyrillic */
221  x = yaz_iso5426_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
222  break;
223  case '3': /* Basic Arabic */
224  x = yaz_iso5426_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
225  break;
226  case '4': /* Extended Arabic */
227  x = yaz_iso5426_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
228  break;
229  case 'S': /* Greek */
230  x = yaz_iso5426_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
231  break;
232  case '1': /* Chinese, Japanese, Korean (EACC) */
233  x = yaz_iso5426_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
234  break;
235 #endif
236  default:
237  *no_read = 0;
239  return 0;
240  }
241  *no_read += no_read_sub;
242  return x;
243  }
244 incomplete:
245  *no_read = 0;
247  return 0;
248 }
249 
250 
252  unsigned char *inp,
253  size_t inbytesleft, size_t *no_read)
254 {
255  struct decoder_data *data = (struct decoder_data *) d->data;
256  data->g0_mode = 'B';
257  data->g1_mode = 'E';
258  data->comb_offset = data->comb_size = 0;
259  return 0;
260 }
261 
263 {
264  struct decoder_data *data = (struct decoder_data *) d->data;
265  xfree(data);
266 }
267 
270 {
271  if (!yaz_matchstr(fromcode, "ISO5426"))
273  else
274  return 0;
275  {
276  struct decoder_data *data = (struct decoder_data *)
277  xmalloc(sizeof(*data));
278  d->data = data;
281  }
282  return d;
283 }
284 
285 
286 /*
287  * Local variables:
288  * c-basic-offset: 4
289  * c-file-style: "Stroustrup"
290  * indent-tabs-mode: nil
291  * End:
292  * vim: shiftwidth=4 tabstop=8 expandtab
293  */
294