IDZEBRA  2.1.2
d1_marc.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 /* converts data1 tree to ISO2709/MARC record */
21 
22 #if HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <assert.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include <yaz/log.h>
30 #include <yaz/oid_db.h>
31 #include <yaz/marcdisp.h>
32 #include <yaz/readconf.h>
33 #include <yaz/xmalloc.h>
34 #include <yaz/tpath.h>
35 #include <idzebra/data1.h>
36 
38 {
39  FILE *f;
40  NMEM mem = data1_nmem_get (dh);
41  data1_marctab *res = (data1_marctab *)nmem_malloc(mem, sizeof(*res));
42  char line[512], *argv[50];
43  int lineno = 0;
44  int argc;
45 
46  if (!(f = data1_path_fopen(dh, file, "r")))
47  return 0;
48 
49  res->name = 0;
50  res->oid = 0;
51  res->next = 0;
52  res->length_data_entry = 4;
53  res->length_starting = 5;
54  res->length_implementation = 0;
55  strcpy(res->future_use, "4");
56 
57  strcpy(res->record_status, "n");
58  strcpy(res->implementation_codes, " ");
59  res->indicator_length = 2;
60  res->identifier_length = 2;
61  res->force_indicator_length = -1;
62  res->force_identifier_length = -1;
63  strcpy(res->user_systems, "z ");
64 
65  while ((argc = readconf_line(f, &lineno, line, 512, argv, 50)))
66  if (!strcmp(*argv, "name"))
67  {
68  if (argc != 2)
69  {
70  yaz_log(YLOG_WARN, "%s:%d:Missing arg for %s", file, lineno,
71  *argv);
72  continue;
73  }
74  res->name = nmem_strdup(mem, argv[1]);
75  }
76  else if (!strcmp(*argv, "reference"))
77  {
78  if (argc != 2)
79  {
80  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
81  *argv);
82  continue;
83  }
84  res->oid = yaz_string_to_oid_nmem(yaz_oid_std(),
85  CLASS_TAGSET, argv[1],
86  mem);
87  if (!res->oid)
88  {
89  yaz_log(YLOG_WARN, "%s:%d: Unknown tagset reference '%s'",
90  file, lineno, argv[1]);
91  continue;
92  }
93  }
94  else if (!strcmp(*argv, "length-data-entry"))
95  {
96  if (argc != 2)
97  {
98  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
99  *argv);
100  continue;
101  }
102  res->length_data_entry = atoi(argv[1]);
103  }
104  else if (!strcmp(*argv, "length-starting"))
105  {
106  if (argc != 2)
107  {
108  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
109  *argv);
110  continue;
111  }
112  res->length_starting = atoi(argv[1]);
113  }
114  else if (!strcmp(*argv, "length-implementation"))
115  {
116  if (argc != 2)
117  {
118  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
119  *argv);
120  continue;
121  }
122  res->length_implementation = atoi(argv[1]);
123  }
124  else if (!strcmp(*argv, "future-use"))
125  {
126  if (argc != 2)
127  {
128  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
129  *argv);
130  continue;
131  }
132  strncpy(res->future_use, argv[1], 2);
133  }
134  else if (!strcmp(*argv, "force-indicator-length"))
135  {
136  if (argc != 2)
137  {
138  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
139  *argv);
140  continue;
141  }
142  res->force_indicator_length = atoi(argv[1]);
143  }
144  else if (!strcmp(*argv, "force-identifier-length"))
145  {
146  if (argc != 2)
147  {
148  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
149  *argv);
150  continue;
151  }
152  res->force_identifier_length = atoi(argv[1]);
153  }
154  else if (!strcmp(*argv, "implementation-codes"))
155  {
156  if (argc != 2)
157  {
158  yaz_log(YLOG_WARN, "%s:%d: Missing arg for %s", file, lineno,
159  *argv);
160  continue;
161  }
162  /* up to 4 characters .. space pad */
163  if (strlen(argv[1]) > 4)
164  yaz_log(YLOG_WARN, "%s:%d: Max 4 characters for "
165  "implementation-codes", file, lineno);
166  else
167  memcpy(res->implementation_codes, argv[1], strlen(argv[1]));
168  }
169  else
170  yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file, lineno,
171  *argv);
172 
173  fclose(f);
174  return res;
175 }
176 
177 
178 static void get_data2(data1_node *n, int *len, char *dst, size_t max)
179 {
180  *len = 0;
181 
182  while (n)
183  {
184  if (n->which == DATA1N_data)
185  {
186  if (dst && *len < max)
187  {
188  size_t copy_len = max - *len;
189  if (copy_len > n->u.data.len)
190  copy_len = n->u.data.len;
191  memcpy(dst + *len, n->u.data.data, copy_len);
192  }
193  *len += n->u.data.len;
194  }
195  if (n->which == DATA1N_tag && *len == 0)
196  n = n->child;
197  else if (n->which == DATA1N_data)
198  n = n->next;
199  else
200  break;
201  }
202 }
203 
204 static void memint (char *p, int val, int len)
205 {
206  char buf[10];
207 
208  if (len == 1)
209  *p = val + '0';
210  else
211  {
212  sprintf (buf, "%08d", val);
213  memcpy (p, buf+8-len, len);
214  }
215 }
216 
217 /* check for indicator. non MARCXML only */
218 static int is_indicator (data1_marctab *p, data1_node *subf)
219 {
220  if (p->indicator_length != 2 ||
221  (subf && subf->which == DATA1N_tag && strlen(subf->u.tag.tag) == 2))
222  return 1;
223  return 0;
224 }
225 
226 static int nodetomarc(data1_handle dh,
227  data1_marctab *p, data1_node *n, int selected,
228  char **buf, int *size)
229 {
230  char leader[24];
231 
232  int len = 26;
233  int dlen;
234  int base_address = 25;
235  int entry_p, data_p;
236  char *op;
237  data1_node *field, *subf;
238 
239 #if 0
240  data1_pr_tree(dh, n, stdout);
241 #endif
242  yaz_log (YLOG_DEBUG, "nodetomarc");
243 
244  memcpy (leader+5, p->record_status, 1);
245  memcpy (leader+6, p->implementation_codes, 4);
246  memint (leader+10, p->indicator_length, 1);
247  memint (leader+11, p->identifier_length, 1);
248  memcpy (leader+17, p->user_systems, 3);
249  memint (leader+20, p->length_data_entry, 1);
250  memint (leader+21, p->length_starting, 1);
251  memint (leader+22, p->length_implementation, 1);
252  memcpy (leader+23, p->future_use, 1);
253 
254  for (field = n->child; field; field = field->next)
255  {
256  int control_field = 0; /* 00X fields - usually! */
257  int marc_xml = 0;
258 
259  if (field->which != DATA1N_tag)
260  continue;
261  if (selected && !field->u.tag.node_selected)
262  continue;
263 
264  subf = field->child;
265  if (!subf)
266  continue;
267 
268  if (!yaz_matchstr(field->u.tag.tag, "mc?"))
269  continue;
270  else if (!strcmp(field->u.tag.tag, "leader"))
271  {
272  int dlen = 0;
273  get_data2(subf, &dlen, leader, 24);
274  continue;
275  }
276  else if (!strcmp(field->u.tag.tag, "controlfield"))
277  {
278  control_field = 1;
279  marc_xml = 1;
280  }
281  else if (!strcmp(field->u.tag.tag, "datafield"))
282  {
283  control_field = 0;
284  marc_xml = 1;
285  }
286  else if (subf->which == DATA1N_data)
287  {
288  control_field = 1;
289  marc_xml = 0;
290  }
291  else
292  {
293  control_field = 0;
294  marc_xml = 0;
295  }
296 
297  len += 4 + p->length_data_entry + p->length_starting
299  base_address += 3 + p->length_data_entry + p->length_starting
301 
302  if (!control_field)
303  len += p->indicator_length;
304 
305  /* we'll allow no indicator if length is not 2 */
306  /* select when old XML format, since indicator is an element */
307  if (marc_xml == 0 && is_indicator (p, subf))
308  subf = subf->child;
309 
310  for (; subf; subf = subf->next)
311  {
312  if (!control_field)
313  {
314  if (marc_xml && subf->which != DATA1N_tag)
315  continue; /* we skip comments, cdata .. */
316  len += p->identifier_length;
317  }
318  get_data2(subf, &dlen, 0, 0);
319  len += dlen;
320  }
321  }
322 
323  if (!*buf)
324  *buf = (char *)xmalloc(*size = len);
325  else if (*size <= len)
326  *buf = (char *)xrealloc(*buf, *size = len);
327 
328  op = *buf;
329 
330  /* we know the base address now */
331  memint (leader+12, base_address, 5);
332 
333  /* copy temp leader to real output buf op */
334  memcpy (op, leader, 24);
335  memint (op, len, 5);
336 
337  entry_p = 24;
338  data_p = base_address;
339 
340  for (field = n->child; field; field = field->next)
341  {
342  int control_field = 0;
343  int marc_xml = 0;
344  const char *tag = 0;
345 
346  int data_0 = data_p;
347  char indicator_data[6];
348 
349  memset (indicator_data, ' ', sizeof(indicator_data)-1);
350  indicator_data[sizeof(indicator_data)-1] = '\0';
351 
352  if (field->which != DATA1N_tag)
353  continue;
354 
355  if (selected && !field->u.tag.node_selected)
356  continue;
357 
358  subf = field->child;
359  if (!subf)
360  continue;
361 
362  if (!yaz_matchstr(field->u.tag.tag, "mc?"))
363  continue;
364  else if (!strcmp(field->u.tag.tag, "leader"))
365  continue;
366  else if (!strcmp(field->u.tag.tag, "controlfield"))
367  {
368  control_field = 1;
369  marc_xml = 1;
370  }
371  else if (!strcmp(field->u.tag.tag, "datafield"))
372  {
373  control_field = 0;
374  marc_xml = 1;
375  }
376  else if (subf->which == DATA1N_data)
377  {
378  control_field = 1;
379  marc_xml = 0;
380  }
381  else
382  {
383  control_field = 0;
384  marc_xml = 0;
385  }
386  if (marc_xml == 0 && is_indicator (p, subf))
387  {
388  strncpy(indicator_data, subf->u.tag.tag, sizeof(indicator_data)-1);
389  subf = subf->child;
390  }
391  else if (marc_xml == 1 && !control_field)
392  {
393  data1_xattr *xa;
394  for (xa = field->u.tag.attributes; xa; xa = xa->next)
395  {
396  if (!strcmp(xa->name, "ind1"))
397  indicator_data[0] = xa->value[0];
398  if (!strcmp(xa->name, "ind2"))
399  indicator_data[1] = xa->value[0];
400  if (!strcmp(xa->name, "ind3"))
401  indicator_data[2] = xa->value[0];
402  }
403  }
404  if (!control_field)
405  {
406  memcpy (op + data_p, indicator_data, p->indicator_length);
407  data_p += p->indicator_length;
408  }
409  for (; subf; subf = subf->next)
410  {
411  if (!control_field)
412  {
413  const char *identifier = "a";
414  if (marc_xml)
415  {
416  data1_xattr *xa;
417  if (subf->which != DATA1N_tag)
418  continue;
419  if (strcmp(subf->u.tag.tag, "subfield"))
420  yaz_log(YLOG_WARN, "Unhandled tag %s",
421  subf->u.tag.tag);
422 
423  for (xa = subf->u.tag.attributes; xa; xa = xa->next)
424  if (!strcmp(xa->name, "code"))
425  identifier = xa->value;
426  }
427  else if (subf->which != DATA1N_tag)
428  yaz_log(YLOG_WARN, "Malformed fields for marc output.");
429  else
430  identifier = subf->u.tag.tag;
431  op[data_p] = ISO2709_IDFS;
432  memcpy (op + data_p+1, identifier, p->identifier_length-1);
433  data_p += p->identifier_length;
434  }
435  get_data2(subf, &dlen, op + data_p, 100000);
436  data_p += dlen;
437  }
438  op[data_p++] = ISO2709_FS;
439 
440  if (marc_xml)
441  {
442  data1_xattr *xa;
443  for (xa = field->u.tag.attributes; xa; xa = xa->next)
444  if (!strcmp(xa->name, "tag"))
445  tag = xa->value;
446  }
447  else
448  tag = field->u.tag.tag;
449 
450  if (!tag || strlen(tag) != 3)
451  tag = "000";
452  memcpy (op + entry_p, tag, 3);
453 
454  entry_p += 3;
455  memint (op + entry_p, data_p - data_0, p->length_data_entry);
456  entry_p += p->length_data_entry;
457  memint (op + entry_p, data_0 - base_address, p->length_starting);
458  entry_p += p->length_starting;
459  entry_p += p->length_implementation;
460  }
461  op[entry_p++] = ISO2709_FS;
462  assert (entry_p == base_address);
463  op[data_p++] = ISO2709_RS;
464  assert (data_p == len);
465  return len;
466 }
467 
469  int selected, int *len)
470 {
471  int *size;
472  char **buf = data1_get_map_buf (dh, &size);
473 
474  n = data1_get_root_tag (dh, n);
475  if (!n)
476  return 0;
477  *len = nodetomarc(dh, p, n, selected, buf, size);
478  return *buf;
479 }
480 /*
481  * Local variables:
482  * c-basic-offset: 4
483  * c-file-style: "Stroustrup"
484  * indent-tabs-mode: nil
485  * End:
486  * vim: shiftwidth=4 tabstop=8 expandtab
487  */
488 
int identifier_length
Definition: data1.h:145
int force_indicator_length
Definition: data1.h:153
#define DATA1N_tag
Definition: data1.h:276
static int nodetomarc(data1_handle dh, data1_marctab *p, data1_node *n, int selected, char **buf, int *size)
Definition: d1_marc.c:226
static int is_indicator(data1_marctab *p, data1_node *subf)
Definition: d1_marc.c:218
char * name
Definition: data1.h:260
int force_identifier_length
Definition: data1.h:154
struct data1_marctab * next
Definition: data1.h:156
struct data1_xattr * next
Definition: data1.h:262
int indicator_length
Definition: data1.h:144
void data1_pr_tree(data1_handle dh, data1_node *n, FILE *out)
Definition: d1_prtree.c:134
char * value
Definition: data1.h:261
data1_node * data1_get_root_tag(data1_handle dh, data1_node *n)
Definition: d1_read.c:35
int length_data_entry
Definition: data1.h:148
FILE * data1_path_fopen(data1_handle dh, const char *file, const char *mode)
Definition: d1_handle.c:151
char record_status[2]
Definition: data1.h:142
char * data1_nodetomarc(data1_handle dh, data1_marctab *p, data1_node *n, int selected, int *len)
Definition: d1_marc.c:468
char * tag
Definition: data1.h:296
char ** data1_get_map_buf(data1_handle dp, int **lenp)
Definition: d1_handle.c:118
union data1_node::@2 u
struct data1_node * child
Definition: data1.h:341
char user_systems[4]
Definition: data1.h:146
int which
Definition: data1.h:285
int length_starting
Definition: data1.h:149
struct data1_node * next
Definition: data1.h:340
char implementation_codes[5]
Definition: data1.h:143
char * data
Definition: data1.h:307
Odr_oid * oid
Definition: data1.h:140
static void get_data2(data1_node *n, int *len, char *dst, size_t max)
Definition: d1_marc.c:178
NMEM data1_nmem_get(data1_handle dh)
Definition: d1_handle.c:66
static void memint(char *p, int val, int len)
Definition: d1_marc.c:204
int length_implementation
Definition: data1.h:150
char future_use[2]
Definition: data1.h:151
#define DATA1N_data
Definition: data1.h:278
char * name
Definition: data1.h:139
data1_marctab * data1_read_marctab(data1_handle dh, const char *file)
Definition: d1_marc.c:37