pazpar2  1.13.0
marcmap.c
Go to the documentation of this file.
1 /* This file is part of Pazpar2.
2  Copyright (C) Index Data
3 
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 
35 #include <yaz/nmem.h>
36 
37 #include "marcmap.h"
38 #include "marchash.h"
39 
40 struct marcmap *marcmap_load(const char *filename, NMEM nmem)
41 {
42  struct marcmap *mm;
43  struct marcmap *mmhead;
44  FILE *fp;
45  int c;
46  char buf[256];
47  int len;
48  int field;
49  int newrec;
50 
51  len = 0;
52  field = 0;
53  newrec = 1;
54  mm = NULL;
55  mmhead = NULL;
56  fp = fopen(filename, "r");
57  if (!fp)
58  return mmhead;
59 
60  while ((c = getc(fp) ) != EOF)
61  {
62  // allocate some space
63  if (newrec)
64  {
65  if (mm != NULL)
66  {
67  mm->next = nmem_malloc(nmem, sizeof(struct marcmap));
68  mm = mm->next;
69  }
70  // first one!
71  else
72  { mm = nmem_malloc(nmem, sizeof(struct marcmap));
73  mmhead = mm;
74  }
75  newrec = 0;
76  }
77  // whitespace saves and moves on
78  if (c == ' ' || c == '\n' || c == '\t')
79  {
80  buf[len] = '\0';
81  len++;
82  // first field, marc
83  if (field == 0)
84  {
85  // allow blank lines
86  if (!(len <3))
87  {
88  mm->field = nmem_malloc(nmem, len * sizeof(char));
89  strncpy(mm->field, buf, len);
90  }
91  }
92  // second, marc subfield, just a char
93  else if (field == 1)
94  {
95  mm->subfield = buf[len-2];
96  }
97  // third, pz fieldname
98  else if (field == 2)
99  {
100  mm->pz = nmem_malloc(nmem, len * sizeof(char));
101  strncpy(mm->pz, buf, len);
102  }
103 
104  // new line, new record
105  if (c == '\n')
106  {
107  field = 0;
108  newrec = 1;
109  }
110  else
111  {
112  field++;
113  }
114  len = 0;
115  }
116  else
117  {
118  buf[len] = c;
119  len++;
120  }
121  }
122  mm->next = NULL;
123  return mmhead;
124 }
125 
126 xmlDoc *marcmap_apply(struct marcmap *marcmap, xmlDoc *xml_in)
127 {
128  char mergekey[1024];
129  char medium[32];
130  char *s;
131  NMEM nmem;
132  xmlNsPtr ns_pz;
133  xmlDocPtr xml_out;
134  xmlNodePtr xml_out_root;
135  xmlNodePtr rec_node;
136  xmlNodePtr meta_node;
137  struct marchash *marchash;
138  struct marcfield *field;
139  struct marcsubfield *subfield;
140  struct marcmap *mmcur;
141 
142  xml_out = xmlNewDoc(BAD_CAST "1.0");
143  xml_out->encoding = xmlCharStrdup("UTF-8");
144  xml_out_root = xmlNewNode(NULL, BAD_CAST "record");
145  xmlDocSetRootElement(xml_out, xml_out_root);
146  ns_pz = xmlNewNs(xml_out_root, BAD_CAST "http://www.indexdata.com/pazpar2/1.0", BAD_CAST "pz");
147  xmlSetNs(xml_out_root, ns_pz);
148  nmem = nmem_create();
149  rec_node = xmlDocGetRootElement(xml_in);
150  marchash = marchash_create(nmem);
151  marchash_ingest_marcxml(marchash, rec_node);
152 
153  mmcur = marcmap;
154  while (mmcur != NULL)
155  {
156  field = 0;
157  while ((field = marchash_get_field(marchash, mmcur->field, field)) != 0)
158  {
159  // field value
160  if ((mmcur->subfield == '$') && (s = field->val))
161  {
162  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST s);
163  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST mmcur->pz);
164  }
165  // catenate all subfields
166  else if ((mmcur->subfield == '*') && (s = marchash_catenate_subfields(field, " ", nmem)))
167  {
168  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST s);
169  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST mmcur->pz);
170  }
171  // subfield value
172  else if (mmcur->subfield)
173  {
174  subfield = 0;
175  while ((subfield =
177  field, subfield)) != 0)
178  {
179  if ((s = subfield->val) != 0)
180  {
181  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST s);
182  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST mmcur->pz);
183  }
184  }
185  }
186 
187  }
188  mmcur = mmcur->next;
189  }
190 
191  // hard coded mappings
192 
193  // medium
194  if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('h', field, NULL)))
195  {
196  strncpy(medium, subfield->val, 32);
197  }
198  else if ((field = marchash_get_field(marchash, "900", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
199  strcpy(medium, "electronic resource");
200  else if ((field = marchash_get_field(marchash, "900", NULL)) && (subfield = marchash_get_subfield('b', field, NULL)))
201  strcpy(medium, "electronic resource");
202  else if ((field = marchash_get_field(marchash, "773", NULL)) && (subfield = marchash_get_subfield('t', field, NULL)))
203  strcpy(medium, "article");
204  else
205  strcpy(medium, "book");
206 
207  meta_node = xmlNewChild(xml_out_root, ns_pz, BAD_CAST "metadata", BAD_CAST medium);
208  xmlSetProp(meta_node, BAD_CAST "type", BAD_CAST "medium");
209 
210  // merge key
211  memset(mergekey, 0, 1024);
212  strcpy(mergekey, "title ");
213  if ((field = marchash_get_field(marchash, "245", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
214  strncat(mergekey, subfield->val, 1023 - strlen(mergekey));
215  strncat(mergekey, " author ", 1023 - strlen(mergekey));
216  if ((field = marchash_get_field(marchash, "100", NULL)) && (subfield = marchash_get_subfield('a', field, NULL)))
217  strncat(mergekey, subfield->val, 1023 - strlen(mergekey));
218  strncat(mergekey, " medium ", 1023 - strlen(mergekey));
219  strncat(mergekey, medium, 1023 - strlen(mergekey));
220 
221 // xmlSetProp(xml_out_root, BAD_CAST "mergekey", BAD_CAST mergekey);
222 
223  nmem_destroy(nmem);
224  return xml_out;
225 }
226 
227 /*
228  * Local variables:
229  * c-basic-offset: 4
230  * c-file-style: "Stroustrup"
231  * indent-tabs-mode: nil
232  * End:
233  * vim: shiftwidth=4 tabstop=8 expandtab
234  */
struct marcmap * next
Definition: marcmap.h:28
struct marcsubfield * marchash_get_subfield(char key, struct marcfield *field, struct marcsubfield *last)
Definition: marchash.c:225
xmlDoc * marcmap_apply(struct marcmap *marcmap, xmlDoc *xml_in)
Definition: marcmap.c:126
char * pz
Definition: marcmap.h:27
char subfield
Definition: marcmap.h:26
char * field
Definition: marcmap.h:25
char * val
Definition: marchash.h:34
struct marcfield * marchash_get_field(struct marchash *marchash, const char *key, struct marcfield *last)
Definition: marchash.c:208
void marchash_ingest_marcxml(struct marchash *marchash, xmlNodePtr rec_node)
Definition: marchash.c:84
char * marchash_catenate_subfields(struct marcfield *field, const char *delim, NMEM nmem)
Definition: marchash.c:243
struct marcmap * marcmap_load(const char *filename, NMEM nmem)
Definition: marcmap.c:40
char * val
Definition: marchash.h:42
struct marchash * marchash_create(NMEM nmem)
Definition: marchash.c:75