YAZ  5.34.0
marc_read_iso2709.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14 
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18 
19 #include <stdio.h>
20 #include <string.h>
21 #include <yaz/marcdisp.h>
22 #include <yaz/wrbuf.h>
23 #include <yaz/yaz-util.h>
24 
25 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
26 {
27  int entry_p;
28  int record_length;
29  int indicator_length;
30  int identifier_length;
31  int end_of_directory;
32  int base_address;
33  int length_data_entry;
34  int length_starting;
35  int length_implementation;
36 
37  yaz_marc_reset(mt);
38 
39  if (!atoi_n_check(buf, 5, &record_length))
40  {
41  yaz_marc_cprintf(mt, "Bad leader");
42  return -1;
43  }
44  if (record_length < 25)
45  {
46  yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
47  return -1;
48  }
49  /* ballout if bsize is known and record_length is less than that */
50  if (bsize != -1 && record_length > bsize)
51  {
52  yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
53  record_length, bsize);
54  return -1;
55  }
56  if (yaz_marc_get_debug(mt))
57  yaz_marc_cprintf(mt, "Record length %5d", record_length);
58 
59  yaz_marc_set_leader(mt, buf,
60  &indicator_length,
61  &identifier_length,
62  &base_address,
63  &length_data_entry,
64  &length_starting,
65  &length_implementation);
66 
67  /* First pass. determine length of directory & base of data */
68  for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
69  {
70  /* length of directory entry */
71  int l = 3 + length_data_entry + length_starting;
72  if (entry_p + l >= record_length)
73  {
74  yaz_marc_cprintf(mt, "Directory offset %d: end of record."
75  " Missing FS char", entry_p);
76  return -1;
77  }
78  if (yaz_marc_get_debug(mt))
79  {
80  WRBUF hex = wrbuf_alloc();
81 
82  wrbuf_puts(hex, "Tag ");
83  wrbuf_write_escaped(hex, buf + entry_p, 3);
84  wrbuf_puts(hex, ", length ");
85  wrbuf_write_escaped(hex, buf + entry_p + 3,
86  length_data_entry);
87  wrbuf_puts(hex, ", starting ");
88  wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry,
89  length_starting);
90  yaz_marc_cprintf(mt, "Directory offset %d: %s",
91  entry_p, wrbuf_cstr(hex));
92  wrbuf_destroy(hex);
93  }
94  /* Check for digits in length+starting info */
95  while (--l >= 3)
96  if (!yaz_isdigit(buf[entry_p + l]))
97  break;
98  if (l >= 3)
99  {
100  WRBUF hex = wrbuf_alloc();
101  /* Not all digits, so stop directory scan */
102  wrbuf_write_escaped(hex, buf + entry_p,
103  length_data_entry + length_starting + 3);
104  yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
105  " length and/or length starting (%s)", entry_p,
106  wrbuf_cstr(hex));
107  wrbuf_destroy(hex);
108  break;
109  }
110  entry_p += 3 + length_data_entry + length_starting;
111  }
112  end_of_directory = entry_p;
113  if (base_address != entry_p+1)
114  {
115  yaz_marc_cprintf(mt, "Base address not at end of directory,"
116  " base %d, end %d", base_address, entry_p+1);
117  }
118 
119  /* Second pass. parse control - and datafields */
120  for (entry_p = 24; entry_p != end_of_directory; )
121  {
122  int data_length;
123  int data_offset;
124  int end_offset;
125  int i;
126  char tag[4];
127  int identifier_flag = 0;
128  int entry_p0 = entry_p;
129 
130  memcpy (tag, buf+entry_p, 3);
131  entry_p += 3;
132  tag[3] = '\0';
133  data_length = atoi_n(buf+entry_p, length_data_entry);
134  entry_p += length_data_entry;
135  data_offset = atoi_n(buf+entry_p, length_starting);
136  entry_p += length_starting;
137  i = data_offset + base_address;
138  end_offset = i+data_length-1;
139 
140  if (data_length <= 0 || data_offset < 0)
141  break;
142 
143  if (yaz_marc_get_debug(mt))
144  {
145  yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
146  " data-offset %d",
147  tag, entry_p0, data_length, data_offset);
148  }
149  if (end_offset >= record_length)
150  {
151  yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
152  entry_p0, end_offset, record_length);
153  break;
154  }
155 
156  if (memcmp (tag, "00", 2))
157  identifier_flag = 1; /* if not 00X assume subfields */
158  else if (indicator_length < 4 && indicator_length > 0)
159  {
160  /* Danmarc 00X have subfields */
161  if (buf[i + indicator_length] == ISO2709_IDFS)
162  identifier_flag = 1;
163  else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
164  identifier_flag = 2;
165  }
166 
167  if (identifier_flag)
168  {
169  /* datafield */
170  i += identifier_flag-1;
171  if (indicator_length)
172  {
173  int j, i_start = i;
174  for (j = 0; j < indicator_length; j++)
175  i += yaz_marc_sizeof_char(mt, buf + i);
176  yaz_marc_add_datafield(mt, tag, buf + i_start,
177  i - i_start);
178  }
179  while (i < end_offset &&
180  buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
181  {
182  int code_offset = i+1;
183 
184  i ++;
185  while (i < end_offset &&
186  buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
187  buf[i] != ISO2709_FS)
188  i++;
189  if (i > code_offset)
190  yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
191  }
192  }
193  else
194  {
195  /* controlfield */
196  int i0 = i;
197  while (i < end_offset &&
198  buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
199  i++;
200  yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
201  }
202  if (i < end_offset)
203  {
204  yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
205  data_length);
206  }
207  if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
208  {
209  yaz_marc_cprintf(mt, "No separator at end of field length=%d",
210  data_length);
211  }
212  }
213  return record_length;
214 }
215 
216 /*
217  * Local variables:
218  * c-basic-offset: 4
219  * c-file-style: "Stroustrup"
220  * indent-tabs-mode: nil
221  * End:
222  * vim: shiftwidth=4 tabstop=8 expandtab
223  */
224 
int atoi_n_check(const char *buf, int size, int *val)
like atoi_n but checks for proper formatting
Definition: atoin.c:32
int atoi_n(const char *buf, int len)
like atoi(3) except that it reads exactly len characters
Definition: atoin.c:19
int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
read ISO2709/MARC record from buffer
void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt,...)
adds MARC annotation - printf interface
Definition: marcdisp.c:188
size_t yaz_marc_sizeof_char(yaz_marc_t mt, const char *buf)
Definition: marcdisp.c:478
void yaz_marc_add_subfield(yaz_marc_t mt, const char *code_data, size_t code_data_len)
adds subfield to MARC structure
Definition: marcdisp.c:316
int yaz_marc_get_debug(yaz_marc_t mt)
gets debug level for MARC system
Definition: marcdisp.c:199
void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, const char *indicator, size_t indicator_len)
adds datafield to MARC structure using strings
Definition: marcdisp.c:233
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation)
sets leader, validates it, and returns important values
Definition: marcdisp.c:356
void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, const char *data, size_t data_len)
adds controlfield to MARC structure
Definition: marcdisp.c:212
void yaz_marc_reset(yaz_marc_t mt)
clears memory and MARC record
Definition: marcdisp.c:483
MARC conversion.
#define ISO2709_FS
MARC control char: field separator (30 Dec, 1E Hex)
Definition: marcdisp.h:148
#define ISO2709_IDFS
MARC control char: identifier-field separator (31 Dec, 1F Hex)
Definition: marcdisp.h:150
#define ISO2709_RS
MARC control char: record separator (29 Dec, 1D Hex)
Definition: marcdisp.h:146
string buffer
Definition: wrbuf.h:43
the internals of a yaz_marc_t handle
Definition: marcdisp.c:86
void wrbuf_destroy(WRBUF b)
destroy WRBUF and its buffer
Definition: wrbuf.c:38
WRBUF wrbuf_alloc(void)
construct WRBUF
Definition: wrbuf.c:25
void wrbuf_write_escaped(WRBUF b, const char *str, size_t len)
writes buffer to WRBUF and escape non-ASCII characters
Definition: wrbuf.c:309
const char * wrbuf_cstr(WRBUF b)
returns WRBUF content as C-string
Definition: wrbuf.c:281
void wrbuf_puts(WRBUF b, const char *buf)
appends C-string to WRBUF
Definition: wrbuf.c:89
Header for WRBUF (growing buffer)
#define yaz_isdigit(x)
Definition: yaz-iconv.h:86
Header for common YAZ utilities.