IDZEBRA  2.1.2
d1_absyn.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include <yaz/log.h>
29 #include <yaz/oid_db.h>
30 #include <idzebra/data1.h>
31 #include <idzebra/recctrl.h>
32 #include <zebra_xpath.h>
33 #include <d1_absyn.h>
34 
35 #define D1_MAX_NESTING 128
36 
38  NMEM nmem;
39  int size;
40  struct data1_hash_entry **ar;
41 };
42 
44  void *clientData;
45  char *str;
47 };
48 
49 unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
50 {
51  unsigned v = 0;
52  assert(str);
53  while (*str)
54  {
55  if (*str >= 'a' && *str <= 'z')
56  v = v*65509 + *str -'a'+10;
57  else if (*str >= 'A' && *str <= 'Z')
58  v = v*65509 + *str -'A'+10;
59  else if (*str >= '0' && *str <= '9')
60  v = v*65509 + *str -'0';
61  str++;
62  }
63  return v % ht->size;
64 }
65 
67 {
68  int i;
69  struct data1_hash_table *ht = nmem_malloc(nmem, sizeof(*ht));
70  ht->nmem = nmem;
71  ht->size = size;
72  if (ht->size <= 0)
73  ht->size = 29;
74  ht->ar = nmem_malloc(nmem, sizeof(*ht->ar) * ht->size);
75  for (i = 0; i<ht->size; i++)
76  ht->ar[i] = 0;
77  return ht;
78 }
79 
80 void data1_hash_insert(struct data1_hash_table *ht, const char *str,
81  void *clientData, int copy)
82 {
83  char *dstr = copy ? nmem_strdup(ht->nmem, str) : (char*) str;
84  if (strchr(str, '?') || strchr(str, '.'))
85  {
86  int i;
87  for (i = 0; i<ht->size; i++)
88  {
89  struct data1_hash_entry **he = &ht->ar[i];
90  for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
91  ;
92  if (!*he)
93  {
94  *he = nmem_malloc(ht->nmem, sizeof(**he));
95  (*he)->str = dstr;
96  (*he)->next = 0;
97  }
98  (*he)->clientData = clientData;
99  }
100  }
101  else
102  {
103  struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
104  for (; *he && strcmp(str, (*he)->str); he = &(*he)->next)
105  ;
106  if (!*he)
107  {
108  *he = nmem_malloc(ht->nmem, sizeof(**he));
109  (*he)->str = dstr;
110  (*he)->next = 0;
111  }
112  (*he)->clientData = clientData;
113  }
114 }
115 
116 void *data1_hash_lookup(struct data1_hash_table *ht, const char *str)
117 {
118  struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)];
119 
120  for (; *he && yaz_matchstr(str, (*he)->str); he = &(*he)->next)
121  ;
122  if (*he)
123  return (*he)->clientData;
124  return 0;
125 }
126 
127 struct data1_systag {
128  char *name;
129  char *value;
131 };
132 
134 {
135  char *name;
138 };
139 
141 {
142  char *name;
145 };
146 
148 {
149  data1_element *e = nmem_malloc(data1_nmem_get(dh), sizeof(*e));
150  e->name = 0;
151  e->tag = 0;
152  e->termlists = 0;
153  e->next = e->children = 0;
154  e->sub_name = 0;
155  e->hash = 0;
156  return e;
157 }
158 
160 {
162 
163  while (p)
164  {
165  if (!yaz_matchstr (name, p->name))
166  return p->absyn;
167  p = p->next;
168  }
169  return 0;
170 }
171 /* *ostrich*
172  We need to destroy DFAs, in xp_element (xelm) definitions
173  pop, 2002-12-13
174 */
175 
177 {
179 
180  while (p)
181  {
182  data1_absyn *abs = p->absyn;
183  if (abs)
184  {
185  data1_xpelement *xpe = abs->xp_elements;
186  while (xpe) {
187  yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);
188  if (xpe->dfa)
189  dfa_delete (&xpe->dfa);
190  xpe = xpe->next;
191  }
192  }
193  p = p->next;
194  }
195 }
196 
197 
198 void data1_absyn_trav (data1_handle dh, void *handle,
199  void (*fh)(data1_handle dh, void *h, data1_absyn *a))
200 {
202 
203  while (p)
204  {
205  (*fh)(dh, handle, p->absyn);
206  p = p->next;
207  }
208 }
209 
210 static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
211  enum DATA1_XPATH_INDEXING en);
212 
213 static data1_absyn *data1_absyn_add(data1_handle dh, const char *name,
214  enum DATA1_XPATH_INDEXING en)
215 {
216  char fname[512];
217  NMEM mem = data1_nmem_get (dh);
218 
219  data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p));
221 
222  sprintf(fname, "%.500s.abs", name);
223  p->absyn = data1_read_absyn(dh, fname, en);
224  p->name = nmem_strdup(mem, name);
225  p->next = *pp;
226  *pp = p;
227  return p->absyn;
228 }
229 
230 data1_absyn *data1_get_absyn (data1_handle dh, const char *name,
231  enum DATA1_XPATH_INDEXING en)
232 {
233  data1_absyn *absyn;
234 
235  if (!(absyn = data1_absyn_search (dh, name)))
236  absyn = data1_absyn_add (dh, name, en);
237  return absyn;
238 }
239 
241 {
243 
244  while (p)
245  {
246  if (!yaz_matchstr (name, p->name))
247  return p->attset;
248  p = p->next;
249  }
250  return 0;
251 }
252 
254 {
256 
257  while (p)
258  {
259  if (p->attset->oid && !oid_oidcmp(oid, p->attset->oid))
260  return p->attset;
261  p = p->next;
262  }
263  return 0;
264 }
265 
267 {
268  NMEM mem = data1_nmem_get (dh);
269  data1_attset *attset;
270 
271  attset = data1_read_attset (dh, name);
272  if (!attset)
273  yaz_log (YLOG_WARN|YLOG_ERRNO, "Couldn't load attribute set %s", name);
274  else
275  {
277  nmem_malloc (mem, sizeof(*p));
279 
280  attset->name = p->name = nmem_strdup(mem, name);
281  p->attset = attset;
282  p->next = *pp;
283  *pp = p;
284  }
285  return attset;
286 }
287 
289 {
290  data1_attset *attset;
291 
292  if (!(attset = data1_attset_search_name (dh, name)))
293  attset = data1_attset_add (dh, name);
294  return attset;
295 }
296 
298  const char *name)
299 {
300  data1_esetname *r;
301 
302  for (r = a->esetnames; r; r = r->next)
303  if (!data1_matchstr(r->name, name))
304  return r;
305  return 0;
306 }
307 
308 /* we have multiple versions of data1_getelementbyname */
309 #define DATA1_GETELEMENTBYTAGNAME_VERSION 1
310 
312  data1_element *parent,
313  const char *tagname)
314 {
315  data1_element *r;
316  struct data1_hash_table *ht;
317 
318  /* It's now possible to have a data1 tree with no abstract syntax */
319  if ( !abs )
320  return 0;
321 
322  if (!parent)
323  r = abs->main_elements;
324  else
325  r = parent->children;
326 
327 #if DATA1_GETELEMENTBYTAGNAME_VERSION==1
328  /* using hash search */
329  if (!r)
330  return 0;
331 
332  ht = r->hash;
333  if (!ht)
334  {
335  /* build hash table (the first time) */
336  ht = r->hash = data1_hash_open(29, data1_nmem_get(dh));
337  for (; r; r = r->next)
338  {
339  data1_name *n;
340 
341  for (n = r->tag->names; n; n = n->next)
342  data1_hash_insert(ht, n->name, r, 0);
343  }
344  }
345  return data1_hash_lookup(ht, tagname);
346 #else
347  /* using linear search */
348  for (; r; r = r->next)
349  {
350  data1_name *n;
351 
352  for (n = r->tag->names; n; n = n->next)
353  if (!data1_matchstr(tagname, n->name))
354  return r;
355  }
356  return 0;
357 #endif
358 }
359 
361  const char *name)
362 {
363  data1_element *r;
364 
365  /* It's now possible to have a data1 tree with no abstract syntax */
366  if ( !absyn )
367  return 0;
368  for (r = absyn->main_elements; r; r = r->next)
369  if (!data1_matchstr(r->name, name))
370  return r;
371  return 0;
372 }
373 
374 
376 {
377  /* It's now possible to have a data1 tree with no abstract syntax */
378  if ( !absyn )
379  return;
380 
381  for (; e; e = e->next)
382  {
383  if (!e->sub_name)
384  {
385  if (e->children)
386  fix_element_ref (dh, absyn, e->children);
387  }
388  else
389  {
390  data1_sub_elements *sub_e = absyn->sub_elements;
391  while (sub_e && strcmp (e->sub_name, sub_e->name))
392  sub_e = sub_e->next;
393  if (sub_e)
394  e->children = sub_e->elements;
395  else
396  yaz_log (YLOG_WARN, "Unresolved reference to sub-elements %s",
397  e->sub_name);
398  }
399  }
400 }
401 /* *ostrich*
402 
403  New function, a bit dummy now... I've seen it in zrpn.c... We should build
404  more clever regexps...
405 
406 
407  //a -> ^a/.*$
408  //a/b -> ^b/a/.*$
409  /a -> ^a/$
410  /a/b -> ^b/a/$
411 
412  / -> none
413 
414  pop, 2002-12-13
415 
416  Now [] predicates are supported
417 
418  pop, 2003-01-17
419 
420  */
421 
422 static const char * mk_xpath_regexp (data1_handle dh, const char *expr)
423 {
424  const char *p = expr;
425  int abs = 1;
426  int e = 0;
427  char *stack[32];
428  char *res_p, *res = 0;
429  size_t res_size = 1;
430 
431  if (*p != '/')
432  return ("");
433  p++;
434  if (*p == '/')
435  {
436  abs =0;
437  p++;
438  }
439  while (*p)
440  {
441  int is_predicate = 0;
442  char *s;
443  int i, j;
444  for (i = 0; *p && !strchr("/",*p); i++, p++)
445  ;
446  res_size += (i+3); /* we'll add / between later .. */
447  stack[e] = (char *) nmem_malloc(data1_nmem_get(dh), i+1);
448  s = stack[e];
449  for (j = 0; j < i; j++)
450  {
451  const char *pp = p-i+j;
452  if (*pp == '[')
453  is_predicate=1;
454  else if (*pp == ']')
455  is_predicate=0;
456  else
457  {
458  if (!is_predicate) {
459  if (*pp == '*')
460  *s++ = '.';
461  *s++ = *pp;
462  }
463  }
464  }
465  *s = 0;
466  e++;
467  if (*p)
468  p++;
469  }
470  res_p = res = nmem_malloc(data1_nmem_get(dh), res_size + 10);
471 
472  if (stack[e-1][0] == '@') /* path/@attr spec (leaf is attribute) */
473  strcpy(res_p, "/");
474  else
475  strcpy(res_p, "[^@]*/"); /* path .. (index all cdata below it) */
476  res_p = res_p + strlen(res_p);
477  while (--e >= 0) {
478  sprintf(res_p, "%s/", stack[e]);
479  res_p += strlen(stack[e]) + 1;
480  }
481  if (!abs)
482  {
483  sprintf(res_p, ".*");
484  res_p += 2;
485  }
486  sprintf (res_p, "$");
487  res_p++;
488  yaz_log(YLOG_DEBUG, "Got regexp: %s", res);
489  return res;
490 }
491 
493  char *cp, const char *file, int lineno,
494  const char *element_name, data1_absyn *res,
495  int xpelement,
496  data1_attset *attset)
497 {
498  data1_termlist **tp = *tpp;
499  while(1)
500  {
501  char attname[512], structure[512];
502  char *source;
503  int r, i;
504  int level = 0;
505  structure[0] = '\0';
506  for (i = 0; cp[i] && i<sizeof(attname)-1; i++)
507  if (strchr(":,", cp[i]))
508  break;
509  else
510  attname[i] = cp[i];
511  if (i == 0)
512  {
513  if (*cp)
514  yaz_log(YLOG_WARN,
515  "%s:%d: Syntax error in termlistspec '%s'",
516  file, lineno, cp);
517  break;
518  }
519  attname[i] = '\0';
520  r = 1;
521  cp += i;
522  if (*cp == ':')
523  cp++;
524 
525  for (i = 0; cp[i] && i<sizeof(structure)-1; i++)
526  if (level == 0 && strchr(",", cp[i]))
527  break;
528  else
529  {
530  structure[i] = cp[i];
531  if (cp[i] == '(')
532  level++;
533  else if (cp[i] == ')')
534  level--;
535  }
536  structure[i] = '\0';
537  if (i)
538  r = 2;
539  cp += i;
540  if (*cp)
541  cp++; /* skip , */
542 
543  *tp = (data1_termlist *)
544  nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
545  (*tp)->next = 0;
546 
547  if (*attname == '!')
548  {
549  if (!xpelement && element_name)
550  strcpy(attname, element_name);
551  else if (xpelement)
552  strcpy(attname, ZEBRA_XPATH_CDATA);
553  }
554  if (attset)
555  {
556  if (!data1_getattbyname(dh, attset, attname))
557  {
558  yaz_log(YLOG_WARN, "Index '%s' not found in attset(s)",
559  attname);
560  }
561  }
562 
563  (*tp)->index_name = nmem_strdup(data1_nmem_get(dh), attname);
564  assert (*(*tp)->index_name != '!');
565  if (r == 2 && (source = strchr(structure, ':')))
566  *source++ = '\0'; /* cut off structure .. */
567  else
568  source = "data"; /* ok: default is leaf data */
569  (*tp)->source = (char *)
570  nmem_strdup (data1_nmem_get (dh), source);
571 
572  if (r < 2) /* is the structure qualified? */
573  (*tp)->structure = "w";
574  else
575  (*tp)->structure = (char *)
576  nmem_strdup (data1_nmem_get (dh), structure);
577  tp = &(*tp)->next;
578  }
579 
580  *tpp = tp;
581  return 0;
582 }
583 
584 /* quinn
585  * Converts a 'melm' field[$subfield] pattern to a simple xpath
586  */
587 static int melm2xpath(char *melm, char *buf)
588 {
589  char *dollar;
590  char *field = melm;
591  char *subfield;
592  char *fieldtype;
593  if ((dollar = strchr(melm, '$'))) {
594  *dollar = '\0';
595  subfield = ++dollar;
596  } else
597  subfield = "";
598  if (field[0] == '0' && field[1] == '0')
599  fieldtype = "controlfield";
600  else
601  fieldtype = "datafield";
602  sprintf(buf, "/*/%s[@tag=\"%s\"]", fieldtype, field);
603  if (*subfield)
604  sprintf(buf + strlen(buf), "/subfield[@code=\"%s\"]", subfield);
605  else if (field[0] != '0' || field[1] != '0')
606  strcat(buf, "/subfield");
607  yaz_log(YLOG_DEBUG, "Created xpath: '%s'", buf);
608  return 0;
609 }
610 
611 const char *data1_systag_lookup(data1_absyn *absyn, const char *tag,
612  const char *default_value)
613 {
614  struct data1_systag *p = absyn->systags;
615  for (; p; p = p->next)
616  if (!strcmp(p->name, tag))
617  return p->value;
618  return default_value;
619 }
620 
621 #define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r')
622 
623 int read_absyn_line(FILE *f, int *lineno, char *line, int len,
624  char *argv[], int num)
625 {
626  char *p;
627  int argc;
628  int quoted = 0;
629 
630  while ((p = fgets(line, len, f)))
631  {
632  (*lineno)++;
633  while (*p && l_isspace(*p))
634  p++;
635  if (*p && *p != '#')
636  break;
637  }
638  if (!p)
639  return 0;
640 
641  for (argc = 0; *p ; argc++)
642  {
643  if (*p == '#') /* trailing comment */
644  break;
645  argv[argc] = p;
646  while (*p && !(l_isspace(*p) && !quoted)) {
647  if (*p =='"') quoted = 1 - quoted;
648  if (*p =='[') quoted = 1;
649  if (*p ==']') quoted = 0;
650  p++;
651  }
652  if (*p)
653  {
654  *(p++) = '\0';
655  while (*p && l_isspace(*p))
656  p++;
657  }
658  }
659  return argc;
660 }
661 
663 {
664  if (root->u.root.absyn)
665  return root->u.root.absyn->marc;
666  return 0;
667 }
668 
670  data1_node *root)
671 {
672  if (root->u.root.absyn)
673  return root->u.root.absyn->main_elements;
674  return 0;
675 }
676 
677 static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
678  enum DATA1_XPATH_INDEXING default_xpath)
679 {
680  data1_sub_elements *cur_elements = NULL;
681  data1_xpelement **cur_xpelement = NULL;
682  data1_attset *attset_list = data1_empty_attset(dh);
683  data1_attset_child **attset_childp = &attset_list->children;
684 
685  data1_absyn *res = 0;
686  FILE *f;
688  data1_esetname **esetpp;
689  data1_maptab **maptabp;
690  data1_marctab **marcp;
691  data1_termlist *all = 0;
692  data1_tagset **tagset_childp;
693  struct data1_systag **systagsp;
694  int level = 0;
695  int lineno = 0;
696  int argc;
697  char *argv[50], line[512];
698 
699  f = data1_path_fopen(dh, file, "r");
700 
701  res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res));
702  res->name = 0;
703  res->oid = 0;
704  res->tagset = 0;
705  res->encoding = 0;
706  res->xpath_indexing =
707  (f ? DATA1_XPATH_INDEXING_DISABLE : default_xpath);
708  res->systags = 0;
709  systagsp = &res->systags;
710  tagset_childp = &res->tagset;
711 
712  res->varset = 0;
713  res->esetnames = 0;
714  esetpp = &res->esetnames;
715  res->maptabs = 0;
716  maptabp = &res->maptabs;
717  res->marc = 0;
718  marcp = &res->marc;
719  res->sub_elements = NULL;
720  res->main_elements = NULL;
721  res->xp_elements = NULL;
722  cur_xpelement = &res->xp_elements;
723 
724  while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
725  {
726  char *cmd = *argv;
727  if (!strcmp(cmd, "elm") || !strcmp(cmd, "element"))
728  {
729  data1_element *new_element;
730  int i;
731  char *p, *sub_p, *path, *name, *termlists;
732  int type, value;
733  data1_termlist **tp;
734 
735  if (argc < 4)
736  {
737  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
738  continue;
739  }
740  path = argv[1];
741  name = argv[2];
742  termlists = argv[3];
743 
744  if (!cur_elements)
745  {
746  cur_elements = (data1_sub_elements *)
747  nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
748  cur_elements->next = res->sub_elements;
749  cur_elements->elements = NULL;
750  cur_elements->name = "main";
751  res->sub_elements = cur_elements;
752 
753  level = 0;
754  ppl[level] = &cur_elements->elements;
755  }
756  p = path;
757  for (i = 1;; i++)
758  {
759  char *e;
760 
761  if ((e = strchr(p, '/')))
762  p = e+1;
763  else
764  break;
765  }
766  if (i > level+1)
767  {
768  yaz_log(YLOG_WARN, "%s:%d: Bad level increase", file, lineno);
769  fclose(f);
770  return 0;
771  }
772  level = i;
773  new_element = *ppl[level-1] = data1_mk_element(dh);
774 
775  tp = &new_element->termlists;
776  ppl[level-1] = &new_element->next;
777  ppl[level] = &new_element->children;
778 
779  /* consider subtree (if any) ... */
780  if ((sub_p = strchr (p, ':')) && sub_p[1])
781  {
782  *sub_p++ = '\0';
783  new_element->sub_name =
784  nmem_strdup (data1_nmem_get(dh), sub_p);
785  }
786  /* well-defined tag */
787  if (sscanf(p, "(%d,%d)", &type, &value) == 2)
788  {
789  if (!res->tagset)
790  {
791  yaz_log(YLOG_WARN, "%s:%d: No tagset loaded", file, lineno);
792  fclose(f);
793  return 0;
794  }
795  if (!(new_element->tag = data1_gettagbynum (dh, res->tagset,
796  type, value)))
797  {
798  yaz_log(YLOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
799  file, lineno, p);
800  fclose(f);
801  return 0;
802  }
803  }
804  /* private tag */
805  else if (*p)
806  {
807  data1_tag *nt =
808  new_element->tag = (data1_tag *)
809  nmem_malloc(data1_nmem_get (dh),
810  sizeof(*new_element->tag));
811  nt->which = DATA1T_string;
812  nt->value.string = nmem_strdup(data1_nmem_get (dh), p);
813  nt->names = (data1_name *)
814  nmem_malloc(data1_nmem_get(dh),
815  sizeof(*new_element->tag->names));
816  nt->names->name = nt->value.string;
817  nt->names->next = 0;
818  nt->kind = DATA1K_string;
819  nt->next = 0;
820  nt->tagset = 0;
821  }
822  else
823  {
824  yaz_log(YLOG_WARN, "%s:%d: Bad element", file, lineno);
825  fclose(f);
826  return 0;
827  }
828  /* parse termList definitions */
829  p = termlists;
830  if (*p != '-')
831  {
832  if (parse_termlists (dh, &tp, p, file, lineno, name, res, 0,
833  attset_list))
834  {
835  fclose (f);
836  return 0;
837  }
838  *tp = all; /* append any ALL entries to the list */
839  }
840  new_element->name = nmem_strdup(data1_nmem_get (dh), name);
841  }
842  /* *ostrich*
843  New code to support xelm directive
844  for each xelm a dfa is built. xelms are stored in res->xp_elements
845 
846  maybe we should use a simple sscanf instead of dfa?
847 
848  pop, 2002-12-13
849 
850  Now [] predicates are supported. regexps and xpath structure is
851  a bit redundant, however it's comfortable later...
852 
853  pop, 2003-01-17
854  */
855 
856  else if (!strcmp(cmd, "xelm") || !strcmp(cmd, "melm")) {
857 
858  int i;
859  char *p, *xpath_expr, *termlists;
860  const char *regexp;
861  struct DFA *dfa = 0;
862  data1_termlist **tp;
863  char melm_xpath[128];
864  data1_xpelement *xp_ele = 0;
865  data1_xpelement *last_match = 0;
866 
867  if (argc != 3)
868  {
869  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to %s",
870  file, lineno, cmd);
871  continue;
872  }
873 
874  if (!strcmp(cmd, "melm")) {
875  if (melm2xpath(argv[1], melm_xpath) < 0)
876  continue;
877  xpath_expr = melm_xpath;
878  } else {
879  xpath_expr = argv[1];
880  }
881  termlists = argv[2];
882  regexp = mk_xpath_regexp(dh, xpath_expr);
883 
884 #if OPTIMIZE_MELM
885  /* get last of existing regulars with same regexp */
886  for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next)
887  if (!strcmp(xp_ele->regexp, regexp))
888  last_match = xp_ele;
889 #endif
890  if (!last_match)
891  {
892  /* new regular expression . Parse + generate */
893  const char *regexp_ptr = regexp;
894 
895  dfa = dfa_init();
896  i = dfa_parse (dfa, &regexp_ptr);
897  if (i || *regexp_ptr) {
898  yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno);
899  dfa_delete (&dfa);
900  continue;
901  }
902  }
903  *cur_xpelement = (data1_xpelement *)
904  nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement));
905  (*cur_xpelement)->next = 0;
906  (*cur_xpelement)->match_next = 0;
907  if (last_match)
908  last_match->match_next = *cur_xpelement;
909 #if OPTIMIZE_MELM
910  (*cur_xpelement)->regexp = regexp;
911 #endif
912  (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh),
913  xpath_expr);
914 
915  if (dfa)
916  dfa_mkstate (dfa);
917  (*cur_xpelement)->dfa = dfa;
918 
919 #ifdef ENHANCED_XELM
920  (*cur_xpelement)->xpath_len =
922  xpath_expr,
923  (*cur_xpelement)->xpath, XPATH_STEP_COUNT,
924  data1_nmem_get(dh));
925 #endif
926  (*cur_xpelement)->termlists = 0;
927  tp = &(*cur_xpelement)->termlists;
928 
929  /* parse termList definitions */
930  p = termlists;
931  if (*p != '-')
932  {
933  if (parse_termlists (dh, &tp, p, file, lineno,
934  xpath_expr, res, 1, attset_list))
935  {
936  fclose (f);
937  return 0;
938  }
939  *tp = all; /* append any ALL entries to the list */
940  }
941  cur_xpelement = &(*cur_xpelement)->next;
942  }
943  else if (!strcmp(cmd, "section"))
944  {
945  char *name;
946 
947  if (argc < 2)
948  {
949  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to section",
950  file, lineno);
951  continue;
952  }
953  name = argv[1];
954 
955  cur_elements = (data1_sub_elements *)
956  nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
957  cur_elements->next = res->sub_elements;
958  cur_elements->elements = NULL;
959  cur_elements->name = nmem_strdup (data1_nmem_get(dh), name);
960  res->sub_elements = cur_elements;
961 
962  level = 0;
963  ppl[level] = &cur_elements->elements;
964  }
965  else if (!strcmp(cmd, "xpath"))
966  {
967  if (argc != 2)
968  {
969  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'xpath' directive",
970  file, lineno);
971  continue;
972  }
973  if (!strcmp(argv[1], "enable"))
975  else if (!strcmp (argv[1], "disable"))
977  else
978  {
979  yaz_log(YLOG_WARN, "%s:%d: Expecting disable/enable "
980  "after 'xpath' directive", file, lineno);
981  }
982  }
983  else if (!strcmp(cmd, "all"))
984  {
985  data1_termlist **tp = &all;
986  if (all)
987  {
988  yaz_log(YLOG_WARN, "%s:%d: Too many 'all' directives - ignored",
989  file, lineno);
990  continue;
991  }
992  if (argc != 2)
993  {
994  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'all' directive",
995  file, lineno);
996  continue;
997  }
998  if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res, 0,
999  attset_list))
1000  {
1001  fclose (f);
1002  return 0;
1003  }
1004  }
1005  else if (!strcmp(cmd, "name"))
1006  {
1007  if (argc != 2)
1008  {
1009  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to name directive",
1010  file, lineno);
1011  continue;
1012  }
1013  res->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
1014  }
1015  else if (!strcmp(cmd, "reference"))
1016  {
1017  char *name;
1018 
1019  if (argc != 2)
1020  {
1021  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to reference",
1022  file, lineno);
1023  continue;
1024  }
1025  name = argv[1];
1026  res->oid = yaz_string_to_oid_nmem(yaz_oid_std(),
1027  CLASS_SCHEMA, name,
1028  data1_nmem_get(dh));
1029  if (!res->oid)
1030  {
1031  yaz_log(YLOG_WARN, "%s:%d: Unknown tagset ref '%s'",
1032  file, lineno, name);
1033  continue;
1034  }
1035  }
1036  else if (!strcmp(cmd, "attset"))
1037  {
1038  char *name;
1039  data1_attset *attset;
1040 
1041  if (argc != 2)
1042  {
1043  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset",
1044  file, lineno);
1045  continue;
1046  }
1047  name = argv[1];
1048  if (!(attset = data1_get_attset (dh, name)))
1049  {
1050  yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s",
1051  file, lineno, name);
1052  continue;
1053  }
1054  *attset_childp = (data1_attset_child *)
1055  nmem_malloc (data1_nmem_get(dh), sizeof(**attset_childp));
1056  (*attset_childp)->child = attset;
1057  (*attset_childp)->next = 0;
1058  attset_childp = &(*attset_childp)->next;
1059  }
1060  else if (!strcmp(cmd, "tagset"))
1061  {
1062  char *name;
1063  int type = 0;
1064  if (argc < 2)
1065  {
1066  yaz_log(YLOG_WARN, "%s:%d: Bad # of args to tagset",
1067  file, lineno);
1068  continue;
1069  }
1070  name = argv[1];
1071  if (argc == 3)
1072  type = atoi(argv[2]);
1073  *tagset_childp = data1_read_tagset (dh, name, type);
1074  if (!(*tagset_childp))
1075  {
1076  yaz_log(YLOG_WARN, "%s:%d: Couldn't load tagset %s",
1077  file, lineno, name);
1078  continue;
1079  }
1080  tagset_childp = &(*tagset_childp)->next;
1081  }
1082  else if (!strcmp(cmd, "varset"))
1083  {
1084  char *name;
1085 
1086  if (argc != 2)
1087  {
1088  yaz_log(YLOG_WARN, "%s:%d: Bad # of args in varset",
1089  file, lineno);
1090  continue;
1091  }
1092  name = argv[1];
1093  if (!(res->varset = data1_read_varset (dh, name)))
1094  {
1095  yaz_log(YLOG_WARN, "%s:%d: Couldn't load Varset %s",
1096  file, lineno, name);
1097  continue;
1098  }
1099  }
1100  else if (!strcmp(cmd, "esetname"))
1101  {
1102  char *name, *fname;
1103 
1104  if (argc != 3)
1105  {
1106  yaz_log(YLOG_WARN, "%s:%d: Bad # of args in esetname",
1107  file, lineno);
1108  continue;
1109  }
1110  name = argv[1];
1111  fname = argv[2];
1112 
1113  *esetpp = (data1_esetname *)
1114  nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
1115  (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
1116  (*esetpp)->next = 0;
1117  if (*fname == '@')
1118  (*esetpp)->spec = 0;
1119  else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname)))
1120  {
1121  yaz_log(YLOG_WARN, "%s:%d: Espec-1 read failed for %s",
1122  file, lineno, fname);
1123  continue;
1124  }
1125  esetpp = &(*esetpp)->next;
1126  }
1127  else if (!strcmp(cmd, "maptab"))
1128  {
1129  char *name;
1130 
1131  if (argc != 2)
1132  {
1133  yaz_log(YLOG_WARN, "%s:%d: Bad # of args for maptab",
1134  file, lineno);
1135  continue;
1136  }
1137  name = argv[1];
1138  if (!(*maptabp = data1_read_maptab (dh, name)))
1139  {
1140  yaz_log(YLOG_WARN, "%s:%d: Couldn't load maptab %s",
1141  file, lineno, name);
1142  continue;
1143  }
1144  maptabp = &(*maptabp)->next;
1145  }
1146  else if (!strcmp(cmd, "marc"))
1147  {
1148  char *name;
1149 
1150  if (argc != 2)
1151  {
1152  yaz_log(YLOG_WARN, "%s:%d: Bad # or args for marc",
1153  file, lineno);
1154  continue;
1155  }
1156  name = argv[1];
1157  if (!(*marcp = data1_read_marctab (dh, name)))
1158  {
1159  yaz_log(YLOG_WARN, "%s:%d: Couldn't read marctab %s",
1160  file, lineno, name);
1161  continue;
1162  }
1163  marcp = &(*marcp)->next;
1164  }
1165  else if (!strcmp(cmd, "encoding"))
1166  {
1167  if (argc != 2)
1168  {
1169  yaz_log(YLOG_WARN, "%s:%d: Bad # or args for encoding",
1170  file, lineno);
1171  continue;
1172  }
1173  res->encoding = nmem_strdup (data1_nmem_get(dh), argv[1]);
1174  }
1175  else if (!strcmp(cmd, "systag"))
1176  {
1177  if (argc != 3)
1178  {
1179  yaz_log(YLOG_WARN, "%s:%d: Bad # or args for systag",
1180  file, lineno);
1181  continue;
1182  }
1183  *systagsp = nmem_malloc (data1_nmem_get(dh), sizeof(**systagsp));
1184 
1185  (*systagsp)->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
1186  (*systagsp)->value = nmem_strdup(data1_nmem_get(dh), argv[2]);
1187  systagsp = &(*systagsp)->next;
1188  }
1189  else
1190  {
1191  yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file,
1192  lineno, cmd);
1193  continue;
1194  }
1195  }
1196  if (f)
1197  fclose(f);
1198 
1199  for (cur_elements = res->sub_elements; cur_elements;
1200  cur_elements = cur_elements->next)
1201  {
1202  if (!strcmp (cur_elements->name, "main"))
1203  res->main_elements = cur_elements->elements;
1204  fix_element_ref (dh, res, cur_elements->elements);
1205  }
1206  *systagsp = 0;
1207  return res;
1208 }
1209 
1210 /*
1211  * Local variables:
1212  * c-basic-offset: 4
1213  * c-file-style: "Stroustrup"
1214  * indent-tabs-mode: nil
1215  * End:
1216  * vim: shiftwidth=4 tabstop=8 expandtab
1217  */
1218 
data1_datatype kind
Definition: data1.h:212
char * name
Definition: d1_absyn.h:49
struct DFA * dfa_init(void)
Definition: dfa.c:1090
void dfa_mkstate(struct DFA *)
Definition: dfa.c:1146
data1_tag * data1_gettagbynum(data1_handle dh, data1_tagset *s, int type, int value)
Definition: d1_tagset.c:64
#define DATA1T_string
Definition: data1.h:205
data1_name * names
Definition: data1.h:203
struct data1_sub_elements * next
Definition: data1.h:255
struct data1_attset_cache_info * data1_attset_cache
Definition: data1.h:119
struct data1_systag * systags
Definition: d1_absyn.h:59
data1_esetname * data1_getesetbyname(data1_handle dh, data1_absyn *a, const char *name)
Definition: d1_absyn.c:297
char * name
Definition: data1.h:70
Definition: d1_absyn.c:43
data1_attset * attset
Definition: d1_absyn.c:143
data1_varset * data1_read_varset(data1_handle dh, const char *file)
Definition: d1_varset.c:56
data1_attset * data1_attset_search_id(data1_handle dh, const Odr_oid *oid)
Definition: d1_absyn.c:253
static const char * mk_xpath_regexp(data1_handle dh, const char *expr)
Definition: d1_absyn.c:422
struct data1_xpelement * next
Definition: d1_absyn.h:39
int zebra_parse_xpath_str(const char *xpath_string, struct xpath_location_step *xpath, int max, NMEM mem)
Definition: xpath.c:162
struct data1_marctab * next
Definition: data1.h:156
#define data1_matchstr(s1, s2)
Definition: data1.h:36
static int melm2xpath(char *melm, char *buf)
Definition: d1_absyn.c:587
data1_attset * data1_read_attset(data1_handle dh, const char *file)
Definition: d1_attset.c:63
data1_maptab * data1_read_maptab(data1_handle dh, const char *file)
Definition: d1_map.c:42
Odr_oid * oid
Definition: data1.h:71
data1_varset * varset
Definition: d1_absyn.h:52
struct data1_xpelement * xp_elements
Definition: d1_absyn.h:58
data1_element * data1_getelementbytagname(data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname)
Definition: d1_absyn.c:311
char * name
Definition: data1.h:114
data1_esetname * esetnames
Definition: d1_absyn.h:53
struct data1_element * children
Definition: data1.h:248
char * name
Definition: data1.h:244
char * xpath_expr
Definition: d1_absyn.h:32
data1_termlist * termlists
Definition: data1.h:246
FILE * data1_path_fopen(data1_handle dh, const char *file, const char *mode)
Definition: d1_handle.c:151
int read_absyn_line(FILE *f, int *lineno, char *line, int len, char *argv[], int num)
Definition: d1_absyn.c:623
struct data1_absyn_cache_info * data1_absyn_cache
Definition: data1.h:118
data1_attset * data1_get_attset(data1_handle dh, const char *name)
Definition: d1_absyn.c:288
void fix_element_ref(data1_handle dh, data1_absyn *absyn, data1_element *e)
Definition: d1_absyn.c:375
struct data1_hash_entry ** ar
Definition: d1_absyn.c:40
data1_absyn * absyn
Definition: d1_absyn.c:136
static int parse_termlists(data1_handle dh, data1_termlist ***tpp, char *cp, const char *file, int lineno, const char *element_name, data1_absyn *res, int xpelement, data1_attset *attset)
Definition: d1_absyn.c:492
struct data1_tagset * tagset
Definition: data1.h:214
data1_attset * data1_attset_search_name(data1_handle dh, const char *name)
Definition: d1_absyn.c:240
void * data1_hash_lookup(struct data1_hash_table *ht, const char *str)
Definition: d1_absyn.c:116
data1_marctab * marc
Definition: d1_absyn.h:55
void dfa_delete(struct DFA **)
Definition: dfa.c:1156
const char * data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value)
Definition: d1_absyn.c:611
data1_element * elements
Definition: data1.h:256
char * name
Definition: data1.h:161
Z_Espec1 * data1_read_espec1(data1_handle dh, const char *file)
Definition: d1_espec.c:213
union data1_node::@2 u
data1_tagset * tagset
Definition: d1_absyn.h:51
struct data1_termlist * next
Definition: data1.h:235
struct data1_node::@2::@3 root
void * clientData
Definition: d1_absyn.c:44
static data1_absyn * data1_read_absyn(data1_handle dh, const char *file, enum DATA1_XPATH_INDEXING en)
Definition: d1_absyn.c:677
data1_attset * child
Definition: data1.h:64
struct data1_esetname * next
Definition: data1.h:163
data1_attset_cache * data1_attset_cache_get(data1_handle dh)
Definition: d1_handle.c:76
data1_element * data1_getelementbyname(data1_handle dh, data1_absyn *absyn, const char *name)
Definition: d1_absyn.c:360
struct data1_hash_table * data1_hash_open(int size, NMEM nmem)
Definition: d1_absyn.c:66
data1_element * main_elements
Definition: d1_absyn.h:57
data1_attset_child * children
Definition: data1.h:73
data1_tagset * next
Definition: data1.h:227
data1_marctab * data1_absyn_getmarctab(data1_handle dh, data1_node *root)
Definition: d1_absyn.c:662
#define D1_MAX_NESTING
Definition: d1_absyn.c:35
void data1_absyn_destroy(data1_handle dh)
Definition: d1_absyn.c:176
data1_marctab * data1_read_marctab(data1_handle dh, const char *file)
Definition: d1_marc.c:37
data1_attset * data1_empty_attset(data1_handle dh)
Definition: d1_attset.c:50
const char * regexp
Definition: d1_absyn.h:41
#define l_isspace(c)
Definition: d1_absyn.c:621
data1_sub_elements * sub_elements
Definition: d1_absyn.h:56
int which
Definition: data1.h:206
unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str)
Definition: d1_absyn.c:49
#define ZEBRA_XPATH_CDATA
Definition: recctrl.h:39
union data1_tag::@1 value
enum DATA1_XPATH_INDEXING xpath_indexing
Definition: d1_absyn.h:61
data1_attset_cache next
Definition: d1_absyn.c:144
data1_element * data1_mk_element(data1_handle dh)
Definition: d1_absyn.c:147
data1_attset * data1_attset_add(data1_handle dh, const char *name)
Definition: d1_absyn.c:266
struct data1_element * next
Definition: data1.h:249
data1_absyn_cache next
Definition: d1_absyn.c:137
data1_att * data1_getattbyname(data1_handle dh, data1_attset *s, const char *name)
Definition: d1_attset.c:31
int dfa_parse(struct DFA *, const char **)
Definition: dfa.c:1119
data1_attset_child * next
Definition: data1.h:65
struct DFA * dfa
Definition: d1_absyn.h:37
struct data1_xpelement * match_next
Definition: d1_absyn.h:44
char * sub_name
Definition: data1.h:247
NMEM data1_nmem_get(data1_handle dh)
Definition: d1_handle.c:66
data1_tag * tag
Definition: data1.h:245
DATA1_XPATH_INDEXING
Definition: data1.h:347
data1_maptab * maptabs
Definition: d1_absyn.h:54
Odr_oid * oid
Definition: d1_absyn.h:50
struct data1_name * next
Definition: data1.h:115
void data1_hash_insert(struct data1_hash_table *ht, const char *str, void *clientData, int copy)
Definition: d1_absyn.c:80
data1_absyn * data1_get_absyn(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition: d1_absyn.c:230
data1_attset * next
Definition: data1.h:74
data1_element * data1_absyn_getelements(data1_handle dh, data1_node *root)
Definition: d1_absyn.c:669
char * value
Definition: d1_absyn.c:129
struct data1_hash_table * hash
Definition: data1.h:250
struct data1_systag * next
Definition: d1_absyn.c:130
struct data1_tag * next
Definition: data1.h:215
data1_absyn_cache * data1_absyn_cache_get(data1_handle dh)
Definition: d1_handle.c:71
static data1_absyn * data1_absyn_add(data1_handle dh, const char *name, enum DATA1_XPATH_INDEXING en)
Definition: d1_absyn.c:213
char * encoding
Definition: d1_absyn.h:60
char * name
Definition: d1_absyn.c:128
data1_absyn * data1_absyn_search(data1_handle dh, const char *name)
Definition: d1_absyn.c:159
#define XPATH_STEP_COUNT
Definition: zebra_xpath.h:25
struct data1_maptab * next
Definition: data1.h:108
char * str
Definition: d1_absyn.c:45
void data1_absyn_trav(data1_handle dh, void *handle, void(*fh)(data1_handle dh, void *h, data1_absyn *a))
Definition: d1_absyn.c:198
struct data1_hash_entry * next
Definition: d1_absyn.c:46
Definition: dfa.h:53
char * string
Definition: data1.h:210
data1_tagset * data1_read_tagset(data1_handle dh, const char *file, int type)
Definition: d1_tagset.c:120