00001
00002
00003
00004
00011 #if HAVE_CONFIG_H
00012 #include <config.h>
00013 #endif
00014
00015 #include <assert.h>
00016 #include <errno.h>
00017 #include <string.h>
00018 #include <ctype.h>
00019
00020 #include <yaz/xmalloc.h>
00021 #include "iconv-p.h"
00022
00023 struct encoder_data
00024 {
00025 unsigned long compose_char;
00026 };
00027
00028
00029
00030 static struct {
00031 unsigned long x1, x2;
00032 unsigned y;
00033 } latin1_comb[] = {
00034 { 'A', 0x0300, 0xc0},
00035 { 'A', 0x0301, 0xc1},
00036 { 'A', 0x0302, 0xc2},
00037 { 'A', 0x0303, 0xc3},
00038 { 'A', 0x0308, 0xc4},
00039 { 'A', 0x030a, 0xc5},
00040
00041 { 'C', 0x0327, 0xc7},
00042 { 'E', 0x0300, 0xc8},
00043 { 'E', 0x0301, 0xc9},
00044 { 'E', 0x0302, 0xca},
00045 { 'E', 0x0308, 0xcb},
00046 { 'I', 0x0300, 0xcc},
00047 { 'I', 0x0301, 0xcd},
00048 { 'I', 0x0302, 0xce},
00049 { 'I', 0x0308, 0xcf},
00050 { 'N', 0x0303, 0xd1},
00051 { 'O', 0x0300, 0xd2},
00052 { 'O', 0x0301, 0xd3},
00053 { 'O', 0x0302, 0xd4},
00054 { 'O', 0x0303, 0xd5},
00055 { 'O', 0x0308, 0xd6},
00056
00057
00058 { 'U', 0x0300, 0xd9},
00059 { 'U', 0x0301, 0xda},
00060 { 'U', 0x0302, 0xdb},
00061 { 'U', 0x0308, 0xdc},
00062 { 'Y', 0x0301, 0xdd},
00063
00064
00065 { 'a', 0x0300, 0xe0},
00066 { 'a', 0x0301, 0xe1},
00067 { 'a', 0x0302, 0xe2},
00068 { 'a', 0x0303, 0xe3},
00069 { 'a', 0x0308, 0xe4},
00070 { 'a', 0x030a, 0xe5},
00071
00072 { 'c', 0x0327, 0xe7},
00073 { 'e', 0x0300, 0xe8},
00074 { 'e', 0x0301, 0xe9},
00075 { 'e', 0x0302, 0xea},
00076 { 'e', 0x0308, 0xeb},
00077 { 'i', 0x0300, 0xec},
00078 { 'i', 0x0301, 0xed},
00079 { 'i', 0x0302, 0xee},
00080 { 'i', 0x0308, 0xef},
00081
00082 { 'n', 0x0303, 0xf1},
00083 { 'o', 0x0300, 0xf2},
00084 { 'o', 0x0301, 0xf3},
00085 { 'o', 0x0302, 0xf4},
00086 { 'o', 0x0303, 0xf5},
00087 { 'o', 0x0308, 0xf6},
00088
00089
00090 { 'u', 0x0300, 0xf9},
00091 { 'u', 0x0301, 0xfa},
00092 { 'u', 0x0302, 0xfb},
00093 { 'u', 0x0308, 0xfc},
00094 { 'y', 0x0301, 0xfd},
00095
00096 { 'y', 0x0308, 0xff},
00097
00098 { 0, 0, 0}
00099 };
00100
00101 int yaz_iso_8859_1_lookup_y(unsigned long v,
00102 unsigned long *x1, unsigned long *x2)
00103 {
00104 if (v >= 0xc0 && v <= 0xff)
00105 {
00106 int i;
00107 for (i = 0; latin1_comb[i].x1; i++)
00108 {
00109 if (v == latin1_comb[i].y)
00110 {
00111 *x1 = latin1_comb[i].x1;
00112 *x2 = latin1_comb[i].x2;
00113 return 1;
00114 }
00115 }
00116 }
00117 return 0;
00118 }
00119
00120 int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2,
00121 unsigned long *y)
00122 {
00123
00124 int i;
00125 for (i = 0; latin1_comb[i].x1; i++)
00126 if (x2 == latin1_comb[i].x2 && x1 == latin1_comb[i].x1)
00127 {
00128 *y = latin1_comb[i].y;
00129 return 1;
00130 }
00131 return 0;
00132 }
00133
00134 static size_t write_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e,
00135 unsigned long x,
00136 char **outbuf, size_t *outbytesleft)
00137 {
00138 struct encoder_data *w = (struct encoder_data *) e->data;
00139
00140
00141
00142
00143
00144
00145
00146 unsigned char *outp = (unsigned char *) *outbuf;
00147
00148 if (w->compose_char)
00149 {
00150 int i;
00151 for (i = 0; latin1_comb[i].x1; i++)
00152 if (w->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
00153 {
00154 x = latin1_comb[i].y;
00155 break;
00156 }
00157 if (*outbytesleft < 1)
00158 {
00159 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
00160 return (size_t)(-1);
00161 }
00162 if (!latin1_comb[i].x1)
00163 {
00164 *outp++ = (unsigned char) w->compose_char;
00165 (*outbytesleft)--;
00166 *outbuf = (char *) outp;
00167 }
00168
00169 w->compose_char = 0;
00170 }
00171
00172 if (x > 32 && x < 127 && w->compose_char == 0)
00173 {
00174 w->compose_char = x;
00175 return 0;
00176 }
00177 else if (x > 255 || x < 1)
00178 {
00179 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
00180 return (size_t) -1;
00181 }
00182 else if (*outbytesleft < 1)
00183 {
00184 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
00185 return (size_t)(-1);
00186 }
00187 *outp++ = (unsigned char) x;
00188 (*outbytesleft)--;
00189 *outbuf = (char *) outp;
00190 return 0;
00191 }
00192
00193 static size_t flush_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e,
00194 char **outbuf, size_t *outbytesleft)
00195 {
00196 struct encoder_data *w = (struct encoder_data *) e->data;
00197 if (w->compose_char)
00198 {
00199 unsigned char *outp = (unsigned char *) *outbuf;
00200 if (*outbytesleft < 1)
00201 {
00202 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
00203 return (size_t)(-1);
00204 }
00205 *outp++ = (unsigned char) w->compose_char;
00206 (*outbytesleft)--;
00207 *outbuf = (char *) outp;
00208 w->compose_char = 0;
00209 }
00210 return 0;
00211 }
00212
00213
00214 void init_iso_8859_1(yaz_iconv_encoder_t e)
00215 {
00216 struct encoder_data *w = (struct encoder_data *) e->data;
00217 w->compose_char = 0;
00218 }
00219
00220 void destroy_iso_8859_1(yaz_iconv_encoder_t e)
00221 {
00222 xfree(e->data);
00223 }
00224
00225 yaz_iconv_encoder_t yaz_iso_8859_1_encoder(const char *tocode,
00226 yaz_iconv_encoder_t e)
00227
00228 {
00229 if (!yaz_matchstr(tocode, "iso88591"))
00230 {
00231 struct encoder_data *data = (struct encoder_data *)
00232 xmalloc(sizeof(*data));
00233 e->data = data;
00234 e->write_handle = write_iso_8859_1;
00235 e->flush_handle = flush_iso_8859_1;
00236 e->init_handle = init_iso_8859_1;
00237 e->destroy_handle = destroy_iso_8859_1;
00238 return e;
00239 }
00240 return 0;
00241 }
00242
00243 static unsigned long read_ISO8859_1(yaz_iconv_t cd,
00244 yaz_iconv_decoder_t d,
00245 unsigned char *inp,
00246 size_t inbytesleft, size_t *no_read)
00247 {
00248 unsigned long x = inp[0];
00249 *no_read = 1;
00250 return x;
00251 }
00252
00253 yaz_iconv_decoder_t yaz_iso_8859_1_decoder(const char *fromcode,
00254 yaz_iconv_decoder_t d)
00255
00256 {
00257 if (!yaz_matchstr(fromcode, "iso88591"))
00258 {
00259 d->read_handle = read_ISO8859_1;
00260 return d;
00261 }
00262 return 0;
00263 }
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274