YAZ  5.23.1
marc_read_line.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14 
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18 
19 #include <assert.h>
20 #include <stdio.h>
21 #include <string.h>
22 
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 
27 static int yaz_gets(int (*getbyte)(void *client_data),
28  void (*ungetbyte)(int b, void *client_data),
29  void *client_data,
30  WRBUF w)
31 {
32  size_t sz = 0;
33  int ch = getbyte(client_data);
34 
35  while (ch != '\0' && ch != '\r' && ch != '\n')
36  {
37  wrbuf_putc(w, ch);
38  sz++;
39  ch = getbyte(client_data);
40  }
41  if (ch == '\r')
42  {
43  ch = getbyte(client_data);
44  if (ch != '\n' && ch != '\0')
45  ungetbyte(ch, client_data);
46  }
47  else if (ch == '\n')
48  {
49  ch = getbyte(client_data);
50  if (ch != '\r' && ch != '\0')
51  ungetbyte(ch, client_data);
52  }
53  if (sz)
54  {
55  return 1;
56  }
57  return 0;
58 }
59 
60 static int yaz_marc_line_gets(int (*getbyte)(void *client_data),
61  void (*ungetbyte)(int b, void *client_data),
62  void *client_data,
63  WRBUF w)
64 {
65  int more;
66 
67  wrbuf_rewind(w);
68  more = yaz_gets(getbyte, ungetbyte, client_data, w);
69  if (!more)
70  return 0;
71 
72  while (more)
73  {
74  int i;
75  for (i = 0; i<4; i++)
76  {
77  int ch = getbyte(client_data);
78  if (ch != ' ')
79  {
80  if (ch)
81  ungetbyte(ch, client_data);
82  return 1;
83  }
84  }
85  if (wrbuf_len(w) > 60 && wrbuf_buf(w)[wrbuf_len(w)-1] == '=')
86  wrbuf_cut_right(w, 1);
87  else
88  wrbuf_puts(w, " ");
89  more = yaz_gets(getbyte, ungetbyte, client_data, w);
90  }
91  return 1;
92 }
93 
94 
96  int (*getbyte)(void *client_data),
97  void (*ungetbyte)(int b, void *client_data),
98  void *client_data)
99 {
100  int indicator_length;
101  int identifier_length;
102  int base_address;
103  int length_data_entry;
104  int length_starting;
105  int length_implementation;
106  int marker_ch = 0;
107  int marker_skip = 0;
108  int header_created = 0;
109  WRBUF wrbuf_line = wrbuf_alloc();
110 
111  yaz_marc_reset(mt);
112 
113  while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line))
114  {
115  const char *line = wrbuf_cstr(wrbuf_line);
116  int val;
117  size_t line_len = strlen(line);
118  if (line_len == 0) /* empty line indicates end of record */
119  {
120  if (header_created)
121  break;
122  }
123  else if (line[0] == '$') /* indicates beginning/end of record */
124  {
125  if (header_created)
126  break;
127  }
128  else if (line[0] == '(') /* annotation, skip it */
129  ;
130  else if (line_len == 24 && atoi_n_check(line, 5, &val))
131  {
132  /* deal with header lines: 00366nam 22001698a 4500
133  */
134 
135  if (header_created)
136  break;
137  yaz_marc_set_leader(mt, line,
138  &indicator_length,
139  &identifier_length,
140  &base_address,
141  &length_data_entry,
142  &length_starting,
143  &length_implementation);
144  header_created = 1;
145  }
146  else if (line_len > 4 && line[0] != ' ' && line[1] != ' '
147  && line[2] != ' ' && line[3] == ' ' )
148  {
149  /* deal with data/control lines: 245 12 ........ */
150  char tag[4];
151  const char *datafield_start = line+6;
152  marker_ch = 0;
153  marker_skip = 0;
154 
155  memcpy(tag, line, 3);
156  tag[3] = '\0';
157  if (line_len >= 8) /* control - or datafield ? */
158  {
159  if (*datafield_start == ' ')
160  datafield_start++; /* skip blank after indicator */
161 
162  if (strchr("$_*", *datafield_start))
163  {
164  marker_ch = *datafield_start;
165  if (datafield_start[2] == ' ')
166  marker_skip = 1; /* subfields has blank before data */
167  }
168  }
169  if (!header_created)
170  {
171  const char *leader = "01000cam 2200265 i 4500";
172 
173  yaz_marc_set_leader(mt, leader,
174  &indicator_length,
175  &identifier_length,
176  &base_address,
177  &length_data_entry,
178  &length_starting,
179  &length_implementation);
180  header_created = 1;
181  }
182 
183  if (marker_ch == 0)
184  { /* control field */
185  yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4));
186  }
187  else
188  { /* data field */
189  const char *indicator = line+4;
190  int indicator_len = 2;
191  const char *cp = datafield_start;
192 
193  yaz_marc_add_datafield(mt, tag, indicator, indicator_len);
194  for (;;)
195  {
196  const char *next;
197  size_t len;
198 
199  assert(cp[0] == marker_ch);
200  cp++;
201  next = cp;
202  while ((next = strchr(next, marker_ch)))
203  {
204  if ((next[1] >= 'A' && next[1] <= 'Z')
205  ||(next[1] >= 'a' && next[1] <= 'z')
206  ||(next[1] >= '0' && next[1] <= '9'))
207  {
208  if (!marker_skip)
209  break;
210  else if (next[2] == ' ')
211  break;
212  }
213  next++;
214  }
215  len = strlen(cp);
216  if (next)
217  len = next - cp - marker_skip;
218 
219  if (marker_skip)
220  {
221  /* remove ' ' after subfield marker */
222  char *cp_blank = strchr(cp, ' ');
223  if (cp_blank)
224  {
225  len--;
226  while (cp_blank != cp)
227  {
228  cp_blank[0] = cp_blank[-1];
229  cp_blank--;
230  }
231  cp++;
232  }
233  }
234  yaz_marc_add_subfield(mt, cp, len);
235  if (!next)
236  break;
237  cp = next;
238  }
239  }
240  }
241  else
242  {
243  yaz_marc_cprintf(mt, "Ignoring line: %s", line);
244  }
245  }
246  wrbuf_destroy(wrbuf_line);
247  if (!header_created)
248  return -1;
249  return 0;
250 }
251 
252 /*
253  * Local variables:
254  * c-basic-offset: 4
255  * c-file-style: "Stroustrup"
256  * indent-tabs-mode: nil
257  * End:
258  * vim: shiftwidth=4 tabstop=8 expandtab
259  */
260 
MARC conversion.
void yaz_marc_add_subfield(yaz_marc_t mt, const char *code_data, size_t code_data_len)
adds subfield to MARC structure
Definition: marcdisp.c:316
#define wrbuf_buf(b)
Definition: wrbuf.h:251
Header for WRBUF (growing buffer)
const char * wrbuf_cstr(WRBUF b)
returns WRBUF content as C-string
Definition: wrbuf.c:281
void wrbuf_puts(WRBUF b, const char *buf)
appends C-string to WRBUF
Definition: wrbuf.c:89
Header for common YAZ utilities.
the internals of a yaz_marc_t handle
Definition: marcdisp.c:86
string buffer
Definition: wrbuf.h:42
int atoi_n_check(const char *buf, int size, int *val)
like atoi_n but checks for proper formatting
Definition: atoin.c:32
void wrbuf_cut_right(WRBUF b, size_t no_to_remove)
cut size of WRBUF
Definition: wrbuf.c:297
void wrbuf_rewind(WRBUF b)
empty WRBUF content (length of buffer set to 0)
Definition: wrbuf.c:47
void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag, const char *data, size_t data_len)
adds controlfield to MARC structure
Definition: marcdisp.c:212
void wrbuf_destroy(WRBUF b)
destroy WRBUF and its buffer
Definition: wrbuf.c:38
void yaz_marc_reset(yaz_marc_t mt)
clears memory and MARC record
Definition: marcdisp.c:483
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation)
sets leader, validates it, and returns important values
Definition: marcdisp.c:356
void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag, const char *indicator, size_t indicator_len)
adds datafield to MARC structure using strings
Definition: marcdisp.c:233
static int yaz_marc_line_gets(int(*getbyte)(void *client_data), void(*ungetbyte)(int b, void *client_data), void *client_data, WRBUF w)
void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt,...)
adds MARC annotation - printf interface
Definition: marcdisp.c:188
#define wrbuf_putc(b, c)
Definition: wrbuf.h:268
#define wrbuf_len(b)
Definition: wrbuf.h:250
static int yaz_gets(int(*getbyte)(void *client_data), void(*ungetbyte)(int b, void *client_data), void *client_data, WRBUF w)
int yaz_marc_read_line(yaz_marc_t mt, int(*getbyte)(void *client_data), void(*ungetbyte)(int b, void *client_data), void *client_data)
read MARC lineformat from stream
WRBUF wrbuf_alloc(void)
construct WRBUF
Definition: wrbuf.c:25