YAZ  5.23.1
marc_read_xml.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14 
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18 
19 #include <stdio.h>
20 #include <string.h>
21 #include <yaz/marcdisp.h>
22 #include <yaz/wrbuf.h>
23 #include <yaz/yaz-util.h>
24 #include <yaz/nmem_xml.h>
25 
26 #if YAZ_HAVE_XML2
27 #include <libxml/tree.h>
28 #endif
29 
30 #if YAZ_HAVE_XML2
31 static int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
32 {
33  NMEM nmem = yaz_marc_get_nmem(mt);
34  for (; ptr; ptr = ptr->next)
35  {
36  if (ptr->type == XML_ELEMENT_NODE)
37  {
38  if (!strcmp((const char *) ptr->name, "subfield"))
39  {
40  size_t ctrl_data_len = 0;
41  char *ctrl_data_buf = 0;
42  const xmlNode *p = 0, *ptr_code = 0;
43  struct _xmlAttr *attr;
44  for (attr = ptr->properties; attr; attr = attr->next)
45  if (!strcmp((const char *)attr->name, "code"))
46  ptr_code = attr->children;
47  else
48  {
50  mt, "Bad attribute '%.80s' for 'subfield'",
51  attr->name);
52  return -1;
53  }
54  if (!ptr_code)
55  {
57  mt, "Missing attribute 'code' for 'subfield'" );
58  return -1;
59  }
60  if (ptr_code->type == XML_TEXT_NODE)
61  {
62  ctrl_data_len =
63  strlen((const char *)ptr_code->content);
64  }
65  else
66  {
68  mt, "Missing value for 'code' in 'subfield'" );
69  return -1;
70  }
71  for (p = ptr->children; p ; p = p->next)
72  if (p->type == XML_TEXT_NODE)
73  ctrl_data_len += strlen((const char *)p->content);
74  ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
75  strcpy(ctrl_data_buf, (const char *)ptr_code->content);
76  for (p = ptr->children; p ; p = p->next)
77  if (p->type == XML_TEXT_NODE)
78  strcat(ctrl_data_buf, (const char *)p->content);
79  yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
80  }
81  else
82  {
84  mt, "Expected element 'subfield', got '%.80s'", ptr->name);
85  return -1;
86  }
87  }
88  }
89  return 0;
90 }
91 
92 static char *element_attribute_value_extract(const xmlNode *ptr,
93  const char *attribute_name,
94  NMEM nmem)
95 {
96  const char *name = (const char *) ptr->name;
97  size_t length = strlen(name);
98  xmlAttr *attr;
99  if (length > 1 )
100  return nmem_strdup(nmem, name+1);
101  // TODO Extract from attribute where matches attribute_name
102  for (attr = ptr->properties; attr; attr = attr->next)
103  if (!strcmp((const char *)attr->name, attribute_name))
104  return nmem_text_node_cdata(attr->children, nmem);
105  return 0;
106 }
107 
108 static void get_indicator_value(yaz_marc_t mt, const xmlNode *ptr,
109  char *res, int turbo, int indicator_length)
110 {
111  int i;
112  res[0] = '\0';
113  for (i = 1; i <= indicator_length; i++)
114  {
115  struct _xmlAttr *attr;
116  char attrname[12];
117  sprintf(attrname, "%s%d", turbo ? "i" : "ind", i);
118  for (attr = ptr->properties; attr; attr = attr->next)
119  {
120  if (!strcmp((const char *)attr->name, attrname) &&
121  attr->children && attr->children->type == XML_TEXT_NODE &&
122  attr->children->content &&
123  strlen((const char *) attr->children->content) < 5)
124  {
125  strcat(res, (const char *)attr->children->content);
126  break;
127  }
128  }
129  if (!attr)
130  strcat(res, " ");
131  }
132 }
133 
134 static int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
135 {
136  for (; ptr; ptr = ptr->next)
137  {
138  if (ptr->type == XML_ELEMENT_NODE)
139  {
140  if (!strncmp((const char *) ptr->name, "s", 1))
141  {
142  NMEM nmem = yaz_marc_get_nmem(mt);
143  xmlNode *p;
144  size_t ctrl_data_len = 0;
145  char *ctrl_data_buf = 0;
146  const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
147  if (!tag_value)
148  {
150  mt, "Missing 'code' value for 'subfield'" );
151  return -1;
152  }
153 
154  ctrl_data_len = strlen((const char *) tag_value);
155  // Extract (length) from CDATA
156  for (p = ptr->children; p ; p = p->next)
157  if (p->type == XML_TEXT_NODE)
158  ctrl_data_len += strlen((const char *)p->content);
159  // Allocate memory for code value (1 character (can be multi-byte) and data
160  ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
161  // Build a string with "<Code><data>"
162  strcpy(ctrl_data_buf, (const char *) tag_value);
163  for (p = ptr->children; p ; p = p->next)
164  if (p->type == XML_TEXT_NODE)
165  strcat(ctrl_data_buf, (const char *)p->content);
166  yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
167  }
168  else
169  {
171  mt, "Expected element 'subfield', got '%.80s'", ptr->name);
172  return -1;
173  }
174  }
175  }
176  return 0;
177 }
178 
179 
180 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p,
181  int *indicator_length)
182 {
183  int identifier_length;
184  int base_address;
185  int length_data_entry;
186  int length_starting;
187  int length_implementation;
188  const char *leader = 0;
189  const xmlNode *ptr = *ptr_p;
190 
191  for(; ptr; ptr = ptr->next)
192  if (ptr->type == XML_ELEMENT_NODE)
193  {
194  if ( !strcmp( (const char *) ptr->name, "leader") ||
195  (!strncmp((const char *) ptr->name, "l", 1) ))
196  {
197  xmlNode *p = ptr->children;
198  for(; p; p = p->next)
199  if (p->type == XML_TEXT_NODE)
200  leader = (const char *) p->content;
201  ptr = ptr->next;
202  }
203  break;
204  }
205  if (!leader)
206  {
207  yaz_marc_cprintf(mt, "Missing leader. Inserting fake leader");
208  leader = "00000nam a22000000a 4500";
209  }
210  if (strlen(leader) != 24)
211  {
212  yaz_marc_cprintf(mt, "Bad length %d of leader data."
213  " Must have length of 24 characters", strlen(leader));
214  return -1;
215  }
216  yaz_marc_set_leader(mt, leader,
217  indicator_length,
218  &identifier_length,
219  &base_address,
220  &length_data_entry,
221  &length_starting,
222  &length_implementation);
223  *ptr_p = ptr;
224  return 0;
225 }
226 
227 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
228  int indicator_length)
229 {
230  for(; ptr; ptr = ptr->next)
231  if (ptr->type == XML_ELEMENT_NODE)
232  {
233  if (!strcmp( (const char *) ptr->name, "controlfield"))
234  {
235  const xmlNode *ptr_tag = 0;
236  struct _xmlAttr *attr;
237  for (attr = ptr->properties; attr; attr = attr->next)
238  if (!strcmp((const char *)attr->name, "tag"))
239  ptr_tag = attr->children;
240  else
241  {
243  mt, "Bad attribute '%.80s' for 'controlfield'",
244  attr->name);
245  return -1;
246  }
247  if (!ptr_tag)
248  {
250  mt, "Missing attribute 'tag' for 'controlfield'" );
251  return -1;
252  }
253  yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
254  }
255  else if (!strcmp((const char *) ptr->name, "datafield"))
256  {
257  char indstr[48];
258  const xmlNode *ptr_tag = 0;
259  struct _xmlAttr *attr;
260 
261  get_indicator_value(mt, ptr, indstr, 0, indicator_length);
262  for (attr = ptr->properties; attr; attr = attr->next)
263  if (!strcmp((const char *)attr->name, "tag"))
264  ptr_tag = attr->children;
265  else if (!strncmp((const char *)attr->name, "ind", 3))
266  ;
267  else
268  {
270  mt, "Bad attribute '%.80s' for 'datafield'",
271  attr->name);
272  }
273  if (!ptr_tag)
274  {
276  mt, "Missing attribute 'tag' for 'datafield'" );
277  return -1;
278  }
279  yaz_marc_add_datafield_xml(mt, ptr_tag,
280  indstr, indicator_length);
281  if (yaz_marc_read_xml_subfields(mt, ptr->children))
282  return -1;
283  }
284  else
285  {
286  yaz_marc_cprintf(mt,
287  "Expected element controlfield or datafield,"
288  " got %.80s", ptr->name);
289  return -1;
290  }
291  }
292  return 0;
293 }
294 
295 
296 static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
297  int indicator_length)
298 {
299  for(; ptr; ptr = ptr->next)
300  if (ptr->type == XML_ELEMENT_NODE)
301  {
302  if (!strncmp( (const char *) ptr->name, "c", 1))
303  {
304  NMEM nmem = yaz_marc_get_nmem(mt);
305  char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
306  if (!tag_value)
307  {
309  mt, "Missing attribute 'tag' for 'controlfield'" );
310  return -1;
311  }
312  yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
313  }
314  else if (!strncmp((const char *) ptr->name, "d",1))
315  {
316  struct _xmlAttr *attr;
317  NMEM nmem = yaz_marc_get_nmem(mt);
318  char *tag_value;
319  char *indstr = nmem_malloc(nmem, indicator_length * 5);
320  tag_value = element_attribute_value_extract(ptr, "tag", nmem);
321  if (!tag_value)
322  {
324  mt, "Missing attribute 'tag' for 'datafield'" );
325  return -1;
326  }
327  get_indicator_value(mt, ptr, indstr, 1, indicator_length);
328  for (attr = ptr->properties; attr; attr = attr->next)
329  if (strlen((const char *)attr->name) == 2 &&
330  attr->name[0] == 'i')
331  ;
332  else
333  {
335  mt, "Bad attribute '%.80s' for 'd'", attr->name);
336  }
337  yaz_marc_add_datafield_xml2(mt, tag_value, indstr);
338  if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
339  return -1;
340  }
341  else
342  {
343  yaz_marc_cprintf(mt,
344  "Expected element controlfield or datafield,"
345  " got %.80s", ptr->name);
346  return -1;
347  }
348  }
349  return 0;
350 }
351 
352 
353 #endif
354 
355 #if YAZ_HAVE_XML2
356 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
357 {
358  int indicator_length = 0;
359  int format = 0;
360  yaz_marc_reset(mt);
361 
362  for(; ptr; ptr = ptr->next)
363  if (ptr->type == XML_ELEMENT_NODE)
364  {
365  if (!strcmp((const char *) ptr->name, "record"))
366  {
367  format = YAZ_MARC_MARCXML;
368  break;
369  }
370  else if (!strcmp((const char *) ptr->name, "r"))
371  {
372  format = YAZ_MARC_TURBOMARC;
373  break;
374  }
375  else
376  {
378  mt, "Unknown element '%.80s' in MARC XML reader",
379  ptr->name);
380  return -1;
381  }
382  }
383  if (!ptr)
384  {
385  yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
386  return -1;
387  }
388  /* ptr points to record node now */
389  ptr = ptr->children;
390  if (yaz_marc_read_xml_leader(mt, &ptr, &indicator_length))
391  return -1;
392 
393  switch (format)
394  {
395  case YAZ_MARC_MARCXML:
396  return yaz_marc_read_xml_fields(mt, ptr, indicator_length);
397  case YAZ_MARC_TURBOMARC:
398  return yaz_marc_read_turbo_xml_fields(mt, ptr, indicator_length);
399  }
400  return -1;
401 }
402 #endif
403 
404 
405 /*
406  * Local variables:
407  * c-basic-offset: 4
408  * c-file-style: "Stroustrup"
409  * indent-tabs-mode: nil
410  * End:
411  * vim: shiftwidth=4 tabstop=8 expandtab
412  */
413 
MARC conversion.
Header for Nibble Memory functions + Libxml2 specific stuff.
void yaz_marc_add_subfield(yaz_marc_t mt, const char *code_data, size_t code_data_len)
adds subfield to MARC structure
Definition: marcdisp.c:316
Header for WRBUF (growing buffer)
static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p, int *indicator_length)
char * name
Definition: initopt.c:18
void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
adds datafield to MARC structure using xml Nodes
Definition: marcdisp.c:297
Header for common YAZ utilities.
the internals of a yaz_marc_t handle
Definition: marcdisp.c:86
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition: nmem.c:145
static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr, int indicator_length)
void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const char *indicator, size_t indicator_len)
adds datafield to MARC structure using xml Nodes
Definition: marcdisp.c:284
void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag, const xmlNode *ptr_data)
adds controlfield to MARC structure using xml Nodes for data
Definition: marcdisp.c:169
NMEM yaz_marc_get_nmem(yaz_marc_t mt)
returns memory for MARC handle
Definition: marcdisp.c:130
static int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
static void get_indicator_value(yaz_marc_t mt, const xmlNode *ptr, char *res, int turbo, int indicator_length)
static char * element_attribute_value_extract(const xmlNode *ptr, const char *attribute_name, NMEM nmem)
Definition: marc_read_xml.c:92
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition: nmemsdup.c:18
#define YAZ_MARC_TURBOMARC
Output format: Turbo MARC Index Data format (XML based)
Definition: marcdisp.h:78
#define YAZ_MARC_MARCXML
Output format: MARCXML.
Definition: marcdisp.h:70
void yaz_marc_reset(yaz_marc_t mt)
clears memory and MARC record
Definition: marcdisp.c:483
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation)
sets leader, validates it, and returns important values
Definition: marcdisp.c:356
void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag, const xmlNode *ptr_data)
adds controlfield to MARC structure using xml Nodes
Definition: marcdisp.c:160
void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt,...)
adds MARC annotation - printf interface
Definition: marcdisp.c:188
char * nmem_text_node_cdata(const xmlNode *ptr_cdata, NMEM nmem)
copies TEXT Libxml2 node data to NMEM
Definition: nmemsdup.c:145
static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr, int indicator_length)
int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
parses MARCXML/MarcXchange/TurboMARC record from xmlNode pointer
static int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
Definition: marc_read_xml.c:31