YAZ  4.2.57
marcdisp.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14 
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18 
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
28 
29 #if YAZ_HAVE_XML2
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #endif
33 
38 };
39 
42 {
47 };
48 
51  char *tag;
52  char *indicator;
54 };
55 
58  char *tag;
59  char *data;
60 };
61 
64  char *comment;
65 };
66 
68 struct yaz_marc_node {
70  union {
73  char *comment;
74  char *leader;
75  } u;
77 };
78 
81  char *code_data;
83 };
84 
86 struct yaz_marc_t_ {
90  int debug;
94  char subfield_str[8];
95  char endline_str[8];
96  char *leader_spec;
100 };
101 
103 {
104  yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
106  mt->debug = 0;
107  mt->write_using_libxml2 = 0;
109  mt->m_wr = wrbuf_alloc();
110  mt->iconv_cd = 0;
111  mt->leader_spec = 0;
112  strcpy(mt->subfield_str, " $");
113  strcpy(mt->endline_str, "\n");
114 
115  mt->nmem = nmem_create();
116  yaz_marc_reset(mt);
117  return mt;
118 }
119 
121 {
122  if (!mt)
123  return ;
124  nmem_destroy(mt->nmem);
125  wrbuf_destroy(mt->m_wr);
126  xfree(mt->leader_spec);
127  xfree(mt);
128 }
129 
131 {
132  return mt->nmem;
133 }
134 
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
136 {
137  wrbuf_iconv_reset(wr, mt->iconv_cd);
138 }
139 
140 static int marc_exec_leader(const char *leader_spec, char *leader,
141  size_t size);
142 #if YAZ_HAVE_XML2
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
144  const char *ns,
145  const char *format,
146  const char *type);
147 #endif
148 
150 {
151  struct yaz_marc_node *n = (struct yaz_marc_node *)
152  nmem_malloc(mt->nmem, sizeof(*n));
153  n->next = 0;
154  *mt->nodes_pp = n;
155  mt->nodes_pp = &n->next;
156  return n;
157 }
158 
159 #if YAZ_HAVE_XML2
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161  const xmlNode *ptr_data)
162 {
163  struct yaz_marc_node *n = yaz_marc_add_node(mt);
165  n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166  n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
167 }
168 
170  const xmlNode *ptr_data)
171 {
172  struct yaz_marc_node *n = yaz_marc_add_node(mt);
174  n->u.controlfield.tag = tag;
175  n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
176 }
177 
178 #endif
179 
180 
182 {
183  struct yaz_marc_node *n = yaz_marc_add_node(mt);
184  n->which = YAZ_MARC_COMMENT;
185  n->u.comment = nmem_strdup(mt->nmem, comment);
186 }
187 
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
189 {
190  va_list ap;
191  char buf[200];
192 
193  va_start(ap, fmt);
194  yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195  yaz_marc_add_comment(mt, buf);
196  va_end (ap);
197 }
198 
200 {
201  return mt->debug;
202 }
203 
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
205 {
206  struct yaz_marc_node *n = yaz_marc_add_node(mt);
207  n->which = YAZ_MARC_LEADER;
208  n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209  marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
210 }
211 
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213  const char *data, size_t data_len)
214 {
215  struct yaz_marc_node *n = yaz_marc_add_node(mt);
217  n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218  n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
219  if (mt->debug)
220  {
221  size_t i;
222  char msg[80];
223 
224  sprintf(msg, "controlfield:");
225  for (i = 0; i < 16 && i < data_len; i++)
226  sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
227  if (i < data_len)
228  sprintf(msg + strlen(msg), " ..");
229  yaz_marc_add_comment(mt, msg);
230  }
231 }
232 
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234  const char *indicator, size_t indicator_len)
235 {
236  struct yaz_marc_node *n = yaz_marc_add_node(mt);
238  n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239  n->u.datafield.indicator =
240  nmem_strdupn(mt->nmem, indicator, indicator_len);
241  n->u.datafield.subfields = 0;
242 
243  /* make subfield_pp the current (last one) */
244  mt->subfield_pp = &n->u.datafield.subfields;
245 }
246 
254  yaz_marc_t mt, WRBUF buffer,
255  const char *attribute_name, char *code_data, size_t code_len)
256 {
257  /* TODO Map special codes to something possible for XML ELEMENT names */
258 
259  int encode = 0;
260  int index = 0;
261  int success = 0;
262  for (index = 0; index < code_len; index++)
263  {
264  if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265  (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266  (code_data[index] >= 'A' && code_data[index] <= 'Z')))
267  encode = 1;
268  }
269  /* Add as attribute */
270  if (encode && attribute_name)
271  wrbuf_printf(buffer, " %s=\"", attribute_name);
272 
273  if (!encode || attribute_name)
274  wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
275  else
276  success = -1;
277 
278  if (encode && attribute_name)
279  wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
280  return success;
281 }
282 
283 #if YAZ_HAVE_XML2
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285  const char *indicator, size_t indicator_len)
286 {
287  struct yaz_marc_node *n = yaz_marc_add_node(mt);
289  n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290  n->u.datafield.indicator =
291  nmem_strdupn(mt->nmem, indicator, indicator_len);
292  n->u.datafield.subfields = 0;
293 
294  /* make subfield_pp the current (last one) */
295  mt->subfield_pp = &n->u.datafield.subfields;
296 }
297 
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
299 {
300  struct yaz_marc_node *n = yaz_marc_add_node(mt);
302  n->u.datafield.tag = tag_value;
303  n->u.datafield.indicator = indicators;
304  n->u.datafield.subfields = 0;
305 
306  /* make subfield_pp the current (last one) */
307  mt->subfield_pp = &n->u.datafield.subfields;
308 }
309 
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
311 {
312  n->u.datafield.indicator = indicator;
313 }
314 
315 #endif
316 
318  const char *code_data, size_t code_data_len)
319 {
320  if (mt->debug)
321  {
322  size_t i;
323  char msg[80];
324 
325  sprintf(msg, "subfield:");
326  for (i = 0; i < 16 && i < code_data_len; i++)
327  sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328  if (i < code_data_len)
329  sprintf(msg + strlen(msg), " ..");
330  yaz_marc_add_comment(mt, msg);
331  }
332 
333  if (mt->subfield_pp)
334  {
335  struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336  nmem_malloc(mt->nmem, sizeof(*n));
337  n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
338  n->next = 0;
339  /* mark subfield_pp to point to this one, so we append here next */
340  *mt->subfield_pp = n;
341  mt->subfield_pp = &n->next;
342  }
343 }
344 
345 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
346  int ch_default)
347 {
348  if (leader[offset] < ' ' || leader[offset] > 127)
349  {
350  yaz_marc_cprintf(mt,
351  "Leader character at offset %d is non-ASCII. "
352  "Setting value to '%c'", offset, ch_default);
353  leader[offset] = ch_default;
354  }
355 }
356 
357 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
358  int *indicator_length,
359  int *identifier_length,
360  int *base_address,
361  int *length_data_entry,
362  int *length_starting,
363  int *length_implementation)
364 {
365  char leader[24];
366 
367  memcpy(leader, leader_c, 24);
368 
369  check_ascii(mt, leader, 5, 'a');
370  check_ascii(mt, leader, 6, 'a');
371  check_ascii(mt, leader, 7, 'a');
372  check_ascii(mt, leader, 8, '#');
373  check_ascii(mt, leader, 9, '#');
374  if (!atoi_n_check(leader+10, 1, indicator_length))
375  {
376  yaz_marc_cprintf(mt,
377  "Indicator length at offset 10 should hold a digit."
378  " Assuming 2");
379  leader[10] = '2';
380  *indicator_length = 2;
381  }
382  if (!atoi_n_check(leader+11, 1, identifier_length))
383  {
384  yaz_marc_cprintf(mt,
385  "Identifier length at offset 11 should hold a digit."
386  " Assuming 2");
387  leader[11] = '2';
388  *identifier_length = 2;
389  }
390  if (!atoi_n_check(leader+12, 5, base_address))
391  {
392  yaz_marc_cprintf(mt,
393  "Base address at offsets 12..16 should hold a number."
394  " Assuming 0");
395  *base_address = 0;
396  }
397  check_ascii(mt, leader, 17, '#');
398  check_ascii(mt, leader, 18, '#');
399  check_ascii(mt, leader, 19, '#');
400  if (!atoi_n_check(leader+20, 1, length_data_entry))
401  {
402  yaz_marc_cprintf(mt,
403  "Length data entry at offset 20 should hold a digit."
404  " Assuming 4");
405  *length_data_entry = 4;
406  leader[20] = '4';
407  }
408  if (!atoi_n_check(leader+21, 1, length_starting))
409  {
410  yaz_marc_cprintf(mt,
411  "Length starting at offset 21 should hold a digit."
412  " Assuming 5");
413  *length_starting = 5;
414  leader[21] = '5';
415  }
416  if (!atoi_n_check(leader+22, 1, length_implementation))
417  {
418  yaz_marc_cprintf(mt,
419  "Length implementation at offset 22 should hold a digit."
420  " Assuming 0");
421  *length_implementation = 0;
422  leader[22] = '0';
423  }
424  check_ascii(mt, leader, 23, '0');
425 
426  if (mt->debug)
427  {
428  yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
429  yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
430  yaz_marc_cprintf(mt, "Base address %5d", *base_address);
431  yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
432  yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
433  yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
434  }
435  yaz_marc_add_leader(mt, leader, 24);
436 }
437 
438 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
439 {
440  strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
441  mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
442 }
443 
444 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
445 {
446  strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
447  mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
448 }
449 
450 /* try to guess how many bytes the identifier really is! */
451 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
452 {
453  if (mt->iconv_cd)
454  {
455  size_t i;
456  for (i = 1; i<5; i++)
457  {
458  char outbuf[12];
459  size_t outbytesleft = sizeof(outbuf);
460  char *outp = outbuf;
461  const char *inp = buf;
462 
463  size_t inbytesleft = i;
464  size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
465  &outp, &outbytesleft);
466  yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
467  if (r != (size_t) (-1))
468  return i; /* got a complete sequence */
469  }
470  return 1; /* giving up */
471  }
472  return 1; /* we don't know */
473 }
474 
476 {
477  nmem_reset(mt->nmem);
478  mt->nodes = 0;
479  mt->nodes_pp = &mt->nodes;
480  mt->subfield_pp = 0;
481 }
482 
484 {
485  struct yaz_marc_node *n;
486  int identifier_length;
487  const char *leader = 0;
488 
489  for (n = mt->nodes; n; n = n->next)
490  if (n->which == YAZ_MARC_LEADER)
491  {
492  leader = n->u.leader;
493  break;
494  }
495 
496  if (!leader)
497  return -1;
498  if (!atoi_n_check(leader+11, 1, &identifier_length))
499  return -1;
500 
501  for (n = mt->nodes; n; n = n->next)
502  {
503  switch(n->which)
504  {
505  case YAZ_MARC_COMMENT:
506  wrbuf_iconv_write(wr, mt->iconv_cd,
507  n->u.comment, strlen(n->u.comment));
508  wrbuf_puts(wr, "\n");
509  break;
510  default:
511  break;
512  }
513  }
514  return 0;
515 }
516 
517 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
518  int identifier_length)
519 {
520  /* if identifier length is 2 (most MARCs) or less (probably an error),
521  the code is a single character .. However we've
522  seen multibyte codes, so see how big it really is */
523  if (identifier_length > 2)
524  return identifier_length - 1;
525  else
526  return cdata_one_character(mt, data);
527 }
528 
530 {
531  struct yaz_marc_node *n;
532  int identifier_length;
533  const char *leader = 0;
534 
535  for (n = mt->nodes; n; n = n->next)
536  if (n->which == YAZ_MARC_LEADER)
537  {
538  leader = n->u.leader;
539  break;
540  }
541 
542  if (!leader)
543  return -1;
544  if (!atoi_n_check(leader+11, 1, &identifier_length))
545  return -1;
546 
547  for (n = mt->nodes; n; n = n->next)
548  {
549  struct yaz_marc_subfield *s;
550  switch(n->which)
551  {
552  case YAZ_MARC_DATAFIELD:
553  wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
554  n->u.datafield.indicator);
555  for (s = n->u.datafield.subfields; s; s = s->next)
556  {
557  size_t using_code_len = get_subfield_len(mt, s->code_data,
558  identifier_length);
559 
560  wrbuf_puts (wr, mt->subfield_str);
562  using_code_len);
563  wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
564  wrbuf_iconv_puts(wr, mt->iconv_cd,
565  s->code_data + using_code_len);
566  marc_iconv_reset(mt, wr);
567  }
568  wrbuf_puts (wr, mt->endline_str);
569  break;
571  wrbuf_printf(wr, "%s", n->u.controlfield.tag);
572  wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
574  marc_iconv_reset(mt, wr);
575  wrbuf_puts (wr, mt->endline_str);
576  break;
577  case YAZ_MARC_COMMENT:
578  wrbuf_puts(wr, "(");
579  wrbuf_iconv_write(wr, mt->iconv_cd,
580  n->u.comment, strlen(n->u.comment));
581  marc_iconv_reset(mt, wr);
582  wrbuf_puts(wr, ")\n");
583  break;
584  case YAZ_MARC_LEADER:
585  wrbuf_printf(wr, "%s\n", n->u.leader);
586  }
587  }
588  wrbuf_puts(wr, "\n");
589  return 0;
590 }
591 
593 {
595  {
596  switch(mt->output_format)
597  {
598  case YAZ_MARC_MARCXML:
599  case YAZ_MARC_TURBOMARC:
600  wrbuf_printf(wr, "</collection>\n");
601  break;
602  case YAZ_MARC_XCHANGE:
603  wrbuf_printf(wr, "</collection>\n");
604  break;
605  }
606  }
607  return 0;
608 }
609 
611 {
613 }
614 
616 {
617  switch(mt->output_format)
618  {
619  case YAZ_MARC_LINE:
620  return yaz_marc_write_line(mt, wr);
621  case YAZ_MARC_MARCXML:
622  return yaz_marc_write_marcxml(mt, wr);
623  case YAZ_MARC_TURBOMARC:
624  return yaz_marc_write_turbomarc(mt, wr);
625  case YAZ_MARC_XCHANGE:
626  return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
627  case YAZ_MARC_ISO2709:
628  return yaz_marc_write_iso2709(mt, wr);
629  case YAZ_MARC_CHECK:
630  return yaz_marc_write_check(mt, wr);
631  }
632  return -1;
633 }
634 
635 static const char *record_name[2] = { "record", "r"};
636 static const char *leader_name[2] = { "leader", "l"};
637 static const char *controlfield_name[2] = { "controlfield", "c"};
638 static const char *datafield_name[2] = { "datafield", "d"};
639 static const char *indicator_name[2] = { "ind", "i"};
640 static const char *subfield_name[2] = { "subfield", "s"};
641 
653  const char *ns,
654  const char *format,
655  const char *type,
656  int turbo)
657 {
658  struct yaz_marc_node *n;
659  int identifier_length;
660  const char *leader = 0;
661 
662  for (n = mt->nodes; n; n = n->next)
663  if (n->which == YAZ_MARC_LEADER)
664  {
665  leader = n->u.leader;
666  break;
667  }
668 
669  if (!leader)
670  return -1;
671  if (!atoi_n_check(leader+11, 1, &identifier_length))
672  return -1;
673 
674  if (mt->enable_collection != no_collection)
675  {
677  {
678  wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
680  }
681  wrbuf_printf(wr, "<%s", record_name[turbo]);
682  }
683  else
684  {
685  wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
686  }
687  if (format)
688  wrbuf_printf(wr, " format=\"%.80s\"", format);
689  if (type)
690  wrbuf_printf(wr, " type=\"%.80s\"", type);
691  wrbuf_printf(wr, ">\n");
692  for (n = mt->nodes; n; n = n->next)
693  {
694  struct yaz_marc_subfield *s;
695 
696  switch(n->which)
697  {
698  case YAZ_MARC_DATAFIELD:
699 
700  wrbuf_printf(wr, " <%s", datafield_name[turbo]);
701  if (!turbo)
702  wrbuf_printf(wr, " tag=\"");
704  strlen(n->u.datafield.tag));
705  if (!turbo)
706  wrbuf_printf(wr, "\"");
707  if (n->u.datafield.indicator)
708  {
709  int i;
710  for (i = 0; n->u.datafield.indicator[i]; i++)
711  {
712  wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
714  n->u.datafield.indicator+i, 1);
715  wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
716  }
717  }
718  wrbuf_printf(wr, ">\n");
719  for (s = n->u.datafield.subfields; s; s = s->next)
720  {
721  size_t using_code_len = get_subfield_len(mt, s->code_data,
722  identifier_length);
723  wrbuf_printf(wr, " <%s", subfield_name[turbo]);
724  if (!turbo)
725  {
726  wrbuf_printf(wr, " code=\"");
728  s->code_data, using_code_len);
729  wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
730  }
731  else
732  {
733  element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
734  wrbuf_puts(wr, ">");
735  }
737  s->code_data + using_code_len,
738  strlen(s->code_data + using_code_len));
739  marc_iconv_reset(mt, wr);
740  wrbuf_printf(wr, "</%s", subfield_name[turbo]);
741  if (turbo)
742  element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
743  wrbuf_puts(wr, ">\n");
744  }
745  wrbuf_printf(wr, " </%s", datafield_name[turbo]);
746  /* TODO Not CDATA */
747  if (turbo)
749  strlen(n->u.datafield.tag));
750  wrbuf_printf(wr, ">\n");
751  break;
753  wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
754  if (!turbo)
755  {
756  wrbuf_printf(wr, " tag=\"");
758  strlen(n->u.controlfield.tag));
759  wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
760  }
761  else
762  {
763  /* TODO convert special */
765  strlen(n->u.controlfield.tag));
766  wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
767  }
769  n->u.controlfield.data,
770  strlen(n->u.controlfield.data));
771  marc_iconv_reset(mt, wr);
772  wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
773  /* TODO convert special */
774  if (turbo)
776  strlen(n->u.controlfield.tag));
777  wrbuf_puts(wr, ">\n");
778  break;
779  case YAZ_MARC_COMMENT:
780  wrbuf_printf(wr, "<!-- ");
781  wrbuf_puts(wr, n->u.comment);
782  wrbuf_printf(wr, " -->\n");
783  break;
784  case YAZ_MARC_LEADER:
785  wrbuf_printf(wr, " <%s>", leader_name[turbo]);
787  0 , /* no charset conversion for leader */
788  n->u.leader, strlen(n->u.leader));
789  wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
790  }
791  }
792  wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
793  return 0;
794 }
795 
797  const char *ns,
798  const char *format,
799  const char *type,
800  int turbo)
801 {
802  if (mt->write_using_libxml2)
803  {
804 #if YAZ_HAVE_XML2
805  int ret;
806  xmlNode *root_ptr;
807 
808  if (!turbo)
809  ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
810  else
811  ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
812  if (ret == 0)
813  {
814  xmlChar *buf_out;
815  xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
816  int len_out;
817 
818  xmlDocSetRootElement(doc, root_ptr);
819  xmlDocDumpMemory(doc, &buf_out, &len_out);
820 
821  wrbuf_write(wr, (const char *) buf_out, len_out);
822  wrbuf_puts(wr, "");
823  xmlFree(buf_out);
824  xmlFreeDoc(doc);
825  }
826  return ret;
827 #else
828  return -1;
829 #endif
830  }
831  else
832  return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
833 }
834 
836 {
837  /* set leader 09 to 'a' for UNICODE */
838  /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
839  if (!mt->leader_spec)
840  yaz_marc_modify_leader(mt, 9, "a");
841  return yaz_marc_write_marcxml_ns(mt, wr,
842  "http://www.loc.gov/MARC21/slim",
843  0, 0, 0);
844 }
845 
847 {
848  /* set leader 09 to 'a' for UNICODE */
849  /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
850  if (!mt->leader_spec)
851  yaz_marc_modify_leader(mt, 9, "a");
852  return yaz_marc_write_marcxml_ns(mt, wr,
853  "http://www.indexdata.com/turbomarc", 0, 0, 1);
854 }
855 
857  const char *format,
858  const char *type)
859 {
860  return yaz_marc_write_marcxml_ns(mt, wr,
861  "info:lc/xmlns/marcxchange-v1",
862  0, 0, 0);
863 }
864 
865 #if YAZ_HAVE_XML2
866 
868  xmlNode *record_ptr,
869  xmlNsPtr ns_record, WRBUF wr_cdata,
870  int identifier_length)
871 {
872  xmlNode *ptr;
873  struct yaz_marc_subfield *s;
875 
876  /* TODO consider if safe */
877  char field[10];
878  field[0] = 'd';
879  strncpy(field + 1, n->u.datafield.tag, 3);
880  field[4] = '\0';
881  ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
882 
883  if (n->u.datafield.indicator)
884  {
885  int i;
886  for (i = 0; n->u.datafield.indicator[i]; i++)
887  {
888  char ind_str[6];
889  char ind_val[2];
890 
891  ind_val[0] = n->u.datafield.indicator[i];
892  ind_val[1] = '\0';
893  sprintf(ind_str, "%s%d", indicator_name[1], i+1);
894  xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
895  }
896  }
897  for (s = n->u.datafield.subfields; s; s = s->next)
898  {
899  int not_written;
900  xmlNode *ptr_subfield;
901  size_t using_code_len = get_subfield_len(mt, s->code_data,
902  identifier_length);
903  wrbuf_rewind(wr_cdata);
904  wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
905  marc_iconv_reset(mt, wr_cdata);
906 
907  wrbuf_rewind(subfield_name);
908  wrbuf_puts(subfield_name, "s");
909  not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
910  ptr_subfield = xmlNewTextChild(ptr, ns_record,
911  BAD_CAST wrbuf_cstr(subfield_name),
912  BAD_CAST wrbuf_cstr(wr_cdata));
913  if (not_written)
914  {
915  /* Generate code attribute value and add */
916  wrbuf_rewind(wr_cdata);
917  wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
918  xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
919  }
920  }
921  wrbuf_destroy(subfield_name);
922 }
923 
924 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
925  const char *ns,
926  const char *format,
927  const char *type)
928 {
929  struct yaz_marc_node *n;
930  int identifier_length;
931  const char *leader = 0;
932  xmlNode *record_ptr;
933  xmlNsPtr ns_record;
934  WRBUF wr_cdata = 0;
935 
936  for (n = mt->nodes; n; n = n->next)
937  if (n->which == YAZ_MARC_LEADER)
938  {
939  leader = n->u.leader;
940  break;
941  }
942 
943  if (!leader)
944  return -1;
945  if (!atoi_n_check(leader+11, 1, &identifier_length))
946  return -1;
947 
948  wr_cdata = wrbuf_alloc();
949 
950  record_ptr = xmlNewNode(0, BAD_CAST "r");
951  *root_ptr = record_ptr;
952 
953  ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
954  xmlSetNs(record_ptr, ns_record);
955 
956  if (format)
957  xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
958  if (type)
959  xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
960  for (n = mt->nodes; n; n = n->next)
961  {
962  xmlNode *ptr;
963 
964  char field[10];
965  field[0] = 'c';
966  field[4] = '\0';
967 
968  switch(n->which)
969  {
970  case YAZ_MARC_DATAFIELD:
971  add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
972  break;
974  wrbuf_rewind(wr_cdata);
975  wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
976  marc_iconv_reset(mt, wr_cdata);
977 
978  strncpy(field + 1, n->u.controlfield.tag, 3);
979  ptr = xmlNewTextChild(record_ptr, ns_record,
980  BAD_CAST field,
981  BAD_CAST wrbuf_cstr(wr_cdata));
982  break;
983  case YAZ_MARC_COMMENT:
984  ptr = xmlNewComment(BAD_CAST n->u.comment);
985  xmlAddChild(record_ptr, ptr);
986  break;
987  case YAZ_MARC_LEADER:
988  xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
989  BAD_CAST n->u.leader);
990  break;
991  }
992  }
993  wrbuf_destroy(wr_cdata);
994  return 0;
995 }
996 
997 
998 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
999  const char *ns,
1000  const char *format,
1001  const char *type)
1002 {
1003  struct yaz_marc_node *n;
1004  int identifier_length;
1005  const char *leader = 0;
1006  xmlNode *record_ptr;
1007  xmlNsPtr ns_record;
1008  WRBUF wr_cdata = 0;
1009 
1010  for (n = mt->nodes; n; n = n->next)
1011  if (n->which == YAZ_MARC_LEADER)
1012  {
1013  leader = n->u.leader;
1014  break;
1015  }
1016 
1017  if (!leader)
1018  return -1;
1019  if (!atoi_n_check(leader+11, 1, &identifier_length))
1020  return -1;
1021 
1022  wr_cdata = wrbuf_alloc();
1023 
1024  record_ptr = xmlNewNode(0, BAD_CAST "record");
1025  *root_ptr = record_ptr;
1026 
1027  ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1028  xmlSetNs(record_ptr, ns_record);
1029 
1030  if (format)
1031  xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1032  if (type)
1033  xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1034  for (n = mt->nodes; n; n = n->next)
1035  {
1036  struct yaz_marc_subfield *s;
1037  xmlNode *ptr;
1038 
1039  switch(n->which)
1040  {
1041  case YAZ_MARC_DATAFIELD:
1042  ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1043  xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1044  if (n->u.datafield.indicator)
1045  {
1046  int i;
1047  for (i = 0; n->u.datafield.indicator[i]; i++)
1048  {
1049  char ind_str[6];
1050  char ind_val[2];
1051 
1052  sprintf(ind_str, "ind%d", i+1);
1053  ind_val[0] = n->u.datafield.indicator[i];
1054  ind_val[1] = '\0';
1055  xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1056  }
1057  }
1058  for (s = n->u.datafield.subfields; s; s = s->next)
1059  {
1060  xmlNode *ptr_subfield;
1061  size_t using_code_len = get_subfield_len(mt, s->code_data,
1062  identifier_length);
1063  wrbuf_rewind(wr_cdata);
1064  wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1065  s->code_data + using_code_len);
1066  marc_iconv_reset(mt, wr_cdata);
1067  ptr_subfield = xmlNewTextChild(
1068  ptr, ns_record,
1069  BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1070 
1071  wrbuf_rewind(wr_cdata);
1072  wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1073  s->code_data, using_code_len);
1074  xmlNewProp(ptr_subfield, BAD_CAST "code",
1075  BAD_CAST wrbuf_cstr(wr_cdata));
1076  }
1077  break;
1078  case YAZ_MARC_CONTROLFIELD:
1079  wrbuf_rewind(wr_cdata);
1080  wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1081  marc_iconv_reset(mt, wr_cdata);
1082 
1083  ptr = xmlNewTextChild(record_ptr, ns_record,
1084  BAD_CAST "controlfield",
1085  BAD_CAST wrbuf_cstr(wr_cdata));
1086 
1087  xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1088  break;
1089  case YAZ_MARC_COMMENT:
1090  ptr = xmlNewComment(BAD_CAST n->u.comment);
1091  xmlAddChild(record_ptr, ptr);
1092  break;
1093  case YAZ_MARC_LEADER:
1094  xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1095  BAD_CAST n->u.leader);
1096  break;
1097  }
1098  }
1099  wrbuf_destroy(wr_cdata);
1100  return 0;
1101 }
1102 
1103 #endif
1104 
1106 {
1107  struct yaz_marc_node *n;
1108  int indicator_length;
1109  int identifier_length;
1110  int length_data_entry;
1111  int length_starting;
1112  int length_implementation;
1113  int data_offset = 0;
1114  const char *leader = 0;
1115  WRBUF wr_dir, wr_head, wr_data_tmp;
1116  int base_address;
1117 
1118  for (n = mt->nodes; n; n = n->next)
1119  if (n->which == YAZ_MARC_LEADER)
1120  leader = n->u.leader;
1121 
1122  if (!leader)
1123  return -1;
1124  if (!atoi_n_check(leader+10, 1, &indicator_length))
1125  return -1;
1126  if (!atoi_n_check(leader+11, 1, &identifier_length))
1127  return -1;
1128  if (!atoi_n_check(leader+20, 1, &length_data_entry))
1129  return -1;
1130  if (!atoi_n_check(leader+21, 1, &length_starting))
1131  return -1;
1132  if (!atoi_n_check(leader+22, 1, &length_implementation))
1133  return -1;
1134 
1135  wr_data_tmp = wrbuf_alloc();
1136  wr_dir = wrbuf_alloc();
1137  for (n = mt->nodes; n; n = n->next)
1138  {
1139  int data_length = 0;
1140  struct yaz_marc_subfield *s;
1141 
1142  switch(n->which)
1143  {
1144  case YAZ_MARC_DATAFIELD:
1145  wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1146  data_length += indicator_length;
1147  wrbuf_rewind(wr_data_tmp);
1148  for (s = n->u.datafield.subfields; s; s = s->next)
1149  {
1150  /* write dummy IDFS + content */
1151  wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1152  wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1153  marc_iconv_reset(mt, wr_data_tmp);
1154  }
1155  /* write dummy FS (makes MARC-8 to become ASCII) */
1156  wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1157  marc_iconv_reset(mt, wr_data_tmp);
1158  data_length += wrbuf_len(wr_data_tmp);
1159  break;
1160  case YAZ_MARC_CONTROLFIELD:
1161  wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1162 
1163  wrbuf_rewind(wr_data_tmp);
1164  wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1165  n->u.controlfield.data);
1166  marc_iconv_reset(mt, wr_data_tmp);
1167  wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1168  marc_iconv_reset(mt, wr_data_tmp);
1169  data_length += wrbuf_len(wr_data_tmp);
1170  break;
1171  case YAZ_MARC_COMMENT:
1172  break;
1173  case YAZ_MARC_LEADER:
1174  break;
1175  }
1176  if (data_length)
1177  {
1178  wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1179  wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1180  data_offset += data_length;
1181  }
1182  }
1183  /* mark end of directory */
1184  wrbuf_putc(wr_dir, ISO2709_FS);
1185 
1186  /* base address of data (comes after leader+directory) */
1187  base_address = 24 + wrbuf_len(wr_dir);
1188 
1189  wr_head = wrbuf_alloc();
1190 
1191  /* write record length */
1192  wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1193  /* from "original" leader */
1194  wrbuf_write(wr_head, leader+5, 7);
1195  /* base address of data */
1196  wrbuf_printf(wr_head, "%05d", base_address);
1197  /* from "original" leader */
1198  wrbuf_write(wr_head, leader+17, 7);
1199 
1200  wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1201  wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1202  wrbuf_destroy(wr_head);
1203  wrbuf_destroy(wr_dir);
1204  wrbuf_destroy(wr_data_tmp);
1205 
1206  for (n = mt->nodes; n; n = n->next)
1207  {
1208  struct yaz_marc_subfield *s;
1209 
1210  switch(n->which)
1211  {
1212  case YAZ_MARC_DATAFIELD:
1213  wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1214  for (s = n->u.datafield.subfields; s; s = s->next)
1215  {
1216  wrbuf_putc(wr, ISO2709_IDFS);
1217  wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1218  marc_iconv_reset(mt, wr);
1219  }
1220  wrbuf_putc(wr, ISO2709_FS);
1221  break;
1222  case YAZ_MARC_CONTROLFIELD:
1224  marc_iconv_reset(mt, wr);
1225  wrbuf_putc(wr, ISO2709_FS);
1226  break;
1227  case YAZ_MARC_COMMENT:
1228  break;
1229  case YAZ_MARC_LEADER:
1230  break;
1231  }
1232  }
1233  wrbuf_printf(wr, "%c", ISO2709_RS);
1234  return 0;
1235 }
1236 
1237 
1238 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1239 {
1240  int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1241  if (r <= 0)
1242  return r;
1243  s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1244  if (s != 0)
1245  return -1; /* error */
1246  return r; /* OK, return length > 0 */
1247 }
1248 
1249 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1250  const char **result, size_t *rsize)
1251 {
1252  int r;
1253 
1254  wrbuf_rewind(mt->m_wr);
1255  r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1256  if (result)
1257  *result = wrbuf_cstr(mt->m_wr);
1258  if (rsize)
1259  *rsize = wrbuf_len(mt->m_wr);
1260  return r;
1261 }
1262 
1263 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1264 {
1265  mt->output_format = xmlmode;
1266 }
1267 
1268 void yaz_marc_debug(yaz_marc_t mt, int level)
1269 {
1270  if (mt)
1271  mt->debug = level;
1272 }
1273 
1275 {
1276  mt->iconv_cd = cd;
1277 }
1278 
1280 {
1281  return mt->iconv_cd;
1282 }
1283 
1284 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1285 {
1286  struct yaz_marc_node *n;
1287  char *leader = 0;
1288  for (n = mt->nodes; n; n = n->next)
1289  if (n->which == YAZ_MARC_LEADER)
1290  {
1291  leader = n->u.leader;
1292  memcpy(leader+off, str, strlen(str));
1293  break;
1294  }
1295 }
1296 
1297 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1298 {
1299  xfree(mt->leader_spec);
1300  mt->leader_spec = 0;
1301  if (leader_spec)
1302  {
1303  char dummy_leader[24];
1304  if (marc_exec_leader(leader_spec, dummy_leader, 24))
1305  return -1;
1306  mt->leader_spec = xstrdup(leader_spec);
1307  }
1308  return 0;
1309 }
1310 
1311 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1312 {
1313  const char *cp = leader_spec;
1314  while (cp)
1315  {
1316  char val[21];
1317  int pos;
1318  int no_read = 0, no = 0;
1319 
1320  no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1321  if (no < 2 || no_read < 3)
1322  return -1;
1323  if (pos < 0 || (size_t) pos >= size)
1324  return -1;
1325 
1326  if (*val == '\'')
1327  {
1328  const char *vp = strchr(val+1, '\'');
1329  size_t len;
1330 
1331  if (!vp)
1332  return -1;
1333  len = vp-val-1;
1334  if (len + pos > size)
1335  return -1;
1336  memcpy(leader + pos, val+1, len);
1337  }
1338  else if (*val >= '0' && *val <= '9')
1339  {
1340  int ch = atoi(val);
1341  leader[pos] = ch;
1342  }
1343  else
1344  return -1;
1345  cp += no_read;
1346  if (*cp != ',')
1347  break;
1348 
1349  cp++;
1350  }
1351  return 0;
1352 }
1353 
1354 int yaz_marc_decode_formatstr(const char *arg)
1355 {
1356  int mode = -1;
1357  if (!strcmp(arg, "marc"))
1358  mode = YAZ_MARC_ISO2709;
1359  if (!strcmp(arg, "marcxml"))
1360  mode = YAZ_MARC_MARCXML;
1361  if (!strcmp(arg, "turbomarc"))
1362  mode = YAZ_MARC_TURBOMARC;
1363  if (!strcmp(arg, "marcxchange"))
1364  mode = YAZ_MARC_XCHANGE;
1365  if (!strcmp(arg, "line"))
1366  mode = YAZ_MARC_LINE;
1367  return mode;
1368 }
1369 
1371 {
1372  mt->write_using_libxml2 = enable;
1373 }
1374 
1375 /*
1376  * Local variables:
1377  * c-basic-offset: 4
1378  * c-file-style: "Stroustrup"
1379  * indent-tabs-mode: nil
1380  * End:
1381  * vim: shiftwidth=4 tabstop=8 expandtab
1382  */
1383