IDZEBRA  2.1.2
recgrs.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #include <sys/types.h>
26 #include <ctype.h>
27 
28 #include <yaz/log.h>
29 #include <yaz/oid_db.h>
30 #include <yaz/diagbib1.h>
31 
32 #include <d1_absyn.h>
33 #include <idzebra/recgrs.h>
34 
35 #define GRS_MAX_WORD 512
36 
37 struct source_parser {
38  int len;
39  const char *tok;
40  const char *src;
41  int lookahead;
42  NMEM nmem;
43 };
44 
45 static int sp_lex(struct source_parser *sp)
46 {
47  while (*sp->src == ' ')
48  (sp->src)++;
49  sp->tok = sp->src;
50  sp->len = 0;
51  while (*sp->src && !strchr("<>();,-: ", *sp->src))
52  {
53  sp->src++;
54  sp->len++;
55  }
56  if (sp->len)
57  sp->lookahead = 't';
58  else
59  {
60  sp->lookahead = *sp->src;
61  if (*sp->src)
62  sp->src++;
63  }
64  return sp->lookahead;
65 }
66 
67 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 
69 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
70 {
71  int start, len;
72  RecWord tmp_w;
73 
74  /* ( */
75  sp_lex(sp);
76  if (sp->lookahead != '(')
77  return 0;
78  sp_lex(sp); /* skip ( */
79 
80  /* 1st arg: string */
81  if (!sp_expr(sp, n, wrd))
82  return 0;
83 
84  if (sp->lookahead != ',')
85  return 0;
86  sp_lex(sp); /* skip , */
87 
88  /* 2nd arg: start */
89  if (!sp_expr(sp, n, &tmp_w))
90  return 0;
91  start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 
93  if (sp->lookahead == ',')
94  {
95  sp_lex(sp); /* skip , */
96 
97  /* 3rd arg: length */
98  if (!sp_expr(sp, n, &tmp_w))
99  return 0;
100  len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
101  }
102  else
103  len = wrd->term_len;
104 
105  /* ) */
106  if (sp->lookahead != ')')
107  return 0;
108  sp_lex(sp);
109 
110  if (wrd->term_buf)
111  {
112  if (start >= wrd->term_len)
113  wrd->term_len = 0;
114  else
115  {
116  wrd->term_len -= start;
117  wrd->term_buf += start;
118 
119  if (wrd->term_len > len)
120  wrd->term_len = len;
121  }
122  }
123  return 1;
124 }
125 
126 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
127 {
128  char num_str[20];
129  int min_pos = -1;
130  sp_lex(sp);
131  if (sp->lookahead != '(')
132  return 0;
133  sp_lex(sp); /* skip ( */
134  if (!sp_expr(sp, n, wrd))
135  return 0;
136  while (sp->lookahead == ',')
137  {
138  RecWord search_w;
139  int i;
140  sp_lex(sp); /* skip , */
141 
142  if (!sp_expr(sp, n, &search_w))
143  return 0;
144  for (i = 0; i<wrd->term_len; i++)
145  {
146  int j;
147  for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
148  if (wrd->term_buf[i+j] != search_w.term_buf[j])
149  break;
150  if (j == search_w.term_len) /* match ? */
151  {
152  if (min_pos == -1 || i < min_pos)
153  min_pos = i;
154  break;
155  }
156  }
157  }
158  if (sp->lookahead != ')')
159  return 0;
160  sp_lex(sp);
161  if (min_pos == -1)
162  min_pos = 0; /* the default if not found */
163  sprintf(num_str, "%d", min_pos);
164  wrd->term_buf = nmem_strdup(sp->nmem, num_str);
165  wrd->term_len = strlen(wrd->term_buf);
166  return 1;
167 }
168 
169 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
170 {
171  if (sp->lookahead != 't')
172  return 0;
173  if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
174  {
175  if (n->which == DATA1N_data)
176  {
177  wrd->term_buf = n->u.data.data;
178  wrd->term_len = n->u.data.len;
179  }
180  sp_lex(sp);
181  }
182  else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
183  {
184  if (n->which == DATA1N_tag)
185  {
186  wrd->term_buf = n->u.tag.tag;
187  wrd->term_len = strlen(n->u.tag.tag);
188  }
189  sp_lex(sp);
190  }
191  else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
192  {
193  RecWord tmp_w;
194  sp_lex(sp);
195  if (sp->lookahead != '(')
196  return 0;
197  sp_lex(sp);
198 
199  if (!sp_expr(sp, n, &tmp_w))
200  return 0;
201 
202  wrd->term_buf = "";
203  wrd->term_len = 0;
204  if (n->which == DATA1N_tag)
205  {
206  data1_xattr *p = n->u.tag.attributes;
207  while (p && strlen(p->name) != tmp_w.term_len &&
208  memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
209  p = p->next;
210  if (p)
211  {
212  wrd->term_buf = p->value;
213  wrd->term_len = strlen(p->value);
214  }
215  }
216  if (sp->lookahead != ')')
217  return 0;
218  sp_lex(sp);
219  }
220  else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
221  {
222  return sp_first(sp, n, wrd);
223  }
224  else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
225  {
226  return sp_range(sp, n, wrd);
227  }
228  else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
229  {
230  char *b;
231  wrd->term_len = sp->len;
232  b = nmem_malloc(sp->nmem, sp->len);
233  memcpy(b, sp->tok, sp->len);
234  wrd->term_buf = b;
235  sp_lex(sp);
236  }
237  else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
238  {
239  char *b;
240  wrd->term_len = sp->len - 2;
241  b = nmem_malloc(sp->nmem, wrd->term_len);
242  memcpy(b, sp->tok+1, wrd->term_len);
243  wrd->term_buf = b;
244  sp_lex(sp);
245  }
246  else
247  {
248  wrd->term_buf = "";
249  wrd->term_len = 0;
250  sp_lex(sp);
251  }
252  return 1;
253 }
254 
256 {
257  struct source_parser *sp = xmalloc(sizeof(*sp));
258 
259  sp->nmem = nmem_create();
260  return sp;
261 }
262 
263 static void source_parser_destroy(struct source_parser *sp)
264 {
265  if (!sp)
266  return;
267  nmem_destroy(sp->nmem);
268  xfree(sp);
269 }
270 
271 static int sp_parse(struct source_parser *sp,
272  data1_node *n, RecWord *wrd, const char *src)
273 {
274  sp->len = 0;
275  sp->tok = 0;
276  sp->src = src;
277  sp->lookahead = 0;
278  nmem_reset(sp->nmem);
279 
280  sp_lex(sp);
281  return sp_expr(sp, n, wrd);
282 }
283 
285 {
286  int res = 1;
287  char *attname;
288  data1_xattr *attr;
289 
290  if (!p) {
291  return 1;
292  } else {
293  if (p->which == XPATH_PREDICATE_RELATION) {
294  if (p->u.relation.name[0]) {
295  if (*p->u.relation.name != '@') {
296  yaz_log(YLOG_WARN,
297  " Only attributes (@) are supported in xelm xpath predicates");
298  yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
299  return 1;
300  }
301  attname = p->u.relation.name + 1;
302  res = 0;
303  /* looking for the attribute with a specified name */
304  for (attr = n->u.tag.attributes; attr; attr = attr->next) {
305  if (!strcmp(attr->name, attname)) {
306  if (p->u.relation.op[0]) {
307  if (*p->u.relation.op != '=') {
308  yaz_log(YLOG_WARN,
309  "Only '=' relation is supported (%s)",p->u.relation.op);
310  yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
311  res = 1; break;
312  } else {
313  if (!strcmp(attr->value, p->u.relation.value)) {
314  res = 1; break;
315  }
316  }
317  } else {
318  /* attribute exists, no value specified */
319  res = 1; break;
320  }
321  }
322  }
323  return res;
324  } else {
325  return 1;
326  }
327  }
328  else if (p->which == XPATH_PREDICATE_BOOLEAN) {
329  if (!strcmp(p->u.boolean.op,"and")) {
330  return d1_check_xpath_predicate(n, p->u.boolean.left)
331  && d1_check_xpath_predicate(n, p->u.boolean.right);
332  }
333  else if (!strcmp(p->u.boolean.op,"or")) {
334  return (d1_check_xpath_predicate(n, p->u.boolean.left)
335  || d1_check_xpath_predicate(n, p->u.boolean.right));
336  } else {
337  yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
338  return 1;
339  }
340  }
341  }
342  return 0;
343 }
344 
345 
346 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
347 {
348  struct DFA_state *s = dfaar[0]; /* start state */
349  struct DFA_tran *t;
350  int i;
351  const char *p = text;
352  unsigned char c;
353 
354  for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
355  {
356  if (c >= t->ch[0] && c <= t->ch[1])
357  {
358  while (i >= 0)
359  {
360  /* move to next state and return if we get a match */
361  s = dfaar[t->to];
362  if (s->rule_no)
363  return 1;
364  /* next char */
365  if (!c)
366  return 0;
367  c = *p++;
368  for (t = s->trans, i = s->tran_no; --i >= 0; t++)
369  if (c >= t->ch[0] && c <= t->ch[1])
370  break;
371  }
372  }
373  }
374  return 0;
375 }
376 
377 /* *ostrich*
378 
379 New function, looking for xpath "element" definitions in abs, by
380 tagpath, using a kind of ugly regxp search.The DFA was built while
381 parsing abs, so here we just go trough them and try to match
382 against the given tagpath. The first matching entry is returned.
383 
384 pop, 2002-12-13
385 
386 Added support for enhanced xelm. Now [] predicates are considered
387 as well, when selecting indexing rules... (why the hell it's called
388 termlist???)
389 
390 pop, 2003-01-17
391 
392 */
393 
395 {
396  data1_absyn *abs = n->root->u.root.absyn;
397 
398  data1_xpelement *xpe = 0;
399  data1_node *nn;
400 #ifdef ENHANCED_XELM
401  struct xpath_location_step *xp;
402 #endif
403  char *pexpr = xmalloc(strlen(tagpath)+5);
404 
405  sprintf(pexpr, "/%s\n", tagpath);
406 
407  for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
408  xpe->match_state = -1; /* don't know if it matches yet */
409 
410  for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
411  {
412  int i;
413  int ok = xpe->match_state;
414  if (ok == -1)
415  { /* don't know whether there is a match yet */
416  data1_xpelement *xpe1;
417 
418  assert(xpe->dfa);
419  ok = dfa_match_first(xpe->dfa->states, pexpr);
420 
421 #if OPTIMIZE_MELM
422  /* mark this and following ones with same regexp */
423  for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
424  xpe1->match_state = ok;
425 #endif
426  }
427  assert(ok == 0 || ok == 1);
428  if (ok) {
429 #ifdef ENHANCED_XELM
430  /* we have to check the perdicates up to the root node */
431  xp = xpe->xpath;
432 
433  /* find the first tag up in the node structure */
434  for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
435  ;
436 
437  /* go from inside out in the node structure, while going
438  backwards trough xpath location steps ... */
439  for (i = xpe->xpath_len - 1; i>0; i--)
440  {
441  if (!d1_check_xpath_predicate(nn, xp[i].predicate))
442  {
443  ok = 0;
444  break;
445  }
446 
447  if (nn->which == DATA1N_tag)
448  nn = nn->parent;
449  }
450 #endif
451  if (ok)
452  break;
453  }
454  }
455 
456  xfree(pexpr);
457 
458  if (xpe) {
459  return xpe->termlists;
460  } else {
461  return NULL;
462  }
463 }
464 
465 /* use
466  1 start element (tag)
467  2 end element
468  3 start attr (and attr-exact)
469  4 end attr
470 
471  1016 cdata
472  1015 attr data
473 
474  *ostrich*
475 
476  Now, if there is a matching xelm described in abs, for the
477  indexed element or the attribute, then the data is handled according
478  to those definitions...
479 
480  modified by pop, 2002-12-13
481 */
482 
483 /* add xpath index for an attribute */
484 static void index_xpath_attr(char *tag_path, char *name, char *value,
485  char *structure, struct recExtractCtrl *p,
486  RecWord *wrd)
487 {
489  wrd->index_type = "0";
490  wrd->term_buf = tag_path;
491  wrd->term_len = strlen(tag_path);
492  (*p->tokenAdd)(wrd);
493 
494  if (value) {
496  wrd->index_type = "w";
497  wrd->term_buf = value;
498  wrd->term_len = strlen(value);
499  (*p->tokenAdd)(wrd);
500  }
502  wrd->index_type = "0";
503  wrd->term_buf = tag_path;
504  wrd->term_len = strlen(tag_path);
505  (*p->tokenAdd)(wrd);
506 }
507 
508 
509 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
510 {
511  size_t flen = 0;
512  data1_node *nn;
513 
514  /* we have to fetch the whole path to the data tag */
515  for (nn = n; nn; nn = nn->parent)
516  {
517  if (nn->which == DATA1N_tag)
518  {
519  size_t tlen = strlen(nn->u.tag.tag);
520  if (tlen + flen > (max - 2))
521  break;
522  memcpy(tag_path_full + flen, nn->u.tag.tag, tlen);
523  flen += tlen;
524  tag_path_full[flen++] = '/';
525  }
526  else
527  if (nn->which == DATA1N_root)
528  break;
529  }
530  tag_path_full[flen] = 0;
531 }
532 
533 
534 static void index_xpath(struct source_parser *sp, data1_node *n,
535  struct recExtractCtrl *p,
536  int level, RecWord *wrd,
537  char *xpath_index,
538  int xpath_is_start
539  )
540 {
541  int i;
542  char tag_path_full[1024];
543  int termlist_only = 1;
544  data1_termlist *tl;
545 
546  if (!n->root->u.root.absyn
547  ||
548  n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)
549  {
550  termlist_only = 0;
551  }
552 
553 
554  switch (n->which)
555  {
556  case DATA1N_data:
557  wrd->term_buf = n->u.data.data;
558  wrd->term_len = n->u.data.len;
559 
560  mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
561 
562  /* If we have a matching termlist... */
563  if (n->root->u.root.absyn &&
564  (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
565  {
566  zint max_seqno = 0;
567  for (; tl; tl = tl->next)
568  {
569  /* need to copy recword because it may be changed */
570  RecWord wrd_tl;
571  wrd->index_type = tl->structure;
572  memcpy(&wrd_tl, wrd, sizeof(*wrd));
573  if (tl->source)
574  sp_parse(sp, n, &wrd_tl, tl->source);
575 
576  /* this is just the old fashioned attribute based index */
577  wrd_tl.index_name = tl->index_name;
578  if (p->flagShowRecords)
579  {
580  int i;
581  printf("%*sIdx: [%s]", (level + 1) * 4, "",
582  tl->structure);
583  printf("%s %s", tl->index_name, tl->source);
584  printf(" XData:\"");
585  for (i = 0; i<wrd_tl.term_len && i < 40; i++)
586  fputc(wrd_tl.term_buf[i], stdout);
587  fputc('"', stdout);
588  if (wrd_tl.term_len > 40)
589  printf(" ...");
590  fputc('\n', stdout);
591  }
592  else
593  {
594  (*p->tokenAdd)(&wrd_tl);
595  }
596  if (wrd_tl.seqno > max_seqno)
597  max_seqno = wrd_tl.seqno;
598  }
599  if (max_seqno)
600  wrd->seqno = max_seqno;
601 
602  }
603  /* xpath indexing is done, if there was no termlist given,
604  or no ! in the termlist, and default indexing is enabled... */
605  if (!p->flagShowRecords && !termlist_only)
606  {
607  wrd->index_name = xpath_index;
608  wrd->index_type = "w";
609  (*p->tokenAdd)(wrd);
610  }
611  break;
612  case DATA1N_tag:
613  mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
614 
615  wrd->index_type = "0";
616  wrd->term_buf = tag_path_full;
617  wrd->term_len = strlen(tag_path_full);
618  wrd->index_name = xpath_index;
619  if (p->flagShowRecords)
620  {
621  printf("%*s tag=", (level + 1) * 4, "");
622  for (i = 0; i<wrd->term_len && i < 40; i++)
623  fputc(wrd->term_buf[i], stdout);
624  if (i == 40)
625  printf(" ..");
626  printf("\n");
627  }
628  else
629  {
630  data1_xattr *xp;
631 
632  if (!termlist_only)
633  (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
634 
635  if (xpath_is_start == 1) /* only for the starting tag... */
636  {
637 #define MAX_ATTR_COUNT 50
639 
640  int i = 0;
641  for (xp = n->u.tag.attributes; xp; xp = xp->next) {
642  char comb[512];
643  char attr_tag_path_full[1024];
644 
645  /* this could be cached as well */
646  sprintf(attr_tag_path_full, "@%s/%s",
647  xp->name, tag_path_full);
648 
649  tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
650 
651  if (!termlist_only)
652  {
653  /* attribute (no value) */
654  wrd->index_type = "0";
656  wrd->term_buf = xp->name;
657  wrd->term_len = strlen(xp->name);
658 
659  wrd->seqno--;
660  (*p->tokenAdd)(wrd);
661 
662  if (xp->value
663  &&
664  strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2)
665  {
666  /* attribute value exact */
667  strcpy(comb, xp->name);
668  strcat(comb, "=");
669  strcat(comb, xp->value);
670 
672  wrd->index_type = "0";
673  wrd->term_buf = comb;
674  wrd->term_len = strlen(comb);
675  wrd->seqno--;
676 
677  (*p->tokenAdd)(wrd);
678  }
679  }
680  i++;
681  }
682 
683  i = 0;
684  for (xp = n->u.tag.attributes; xp; xp = xp->next) {
685  data1_termlist *tl;
686  char attr_tag_path_full[1024];
687  int xpdone = 0;
688 
689  sprintf(attr_tag_path_full, "@%s/%s",
690  xp->name, tag_path_full);
691  if ((tl = tll[i]))
692  {
693  /* If there is a termlist given (=xelm directive) */
694  for (; tl; tl = tl->next)
695  {
696  if (!tl->index_name)
697  {
698  /* add xpath index for the attribute */
699  index_xpath_attr(attr_tag_path_full, xp->name,
700  xp->value, tl->structure,
701  p, wrd);
702  xpdone = 1;
703  } else {
704  /* index attribute value (only path/@attr) */
705  if (xp->value)
706  {
707  wrd->index_name = tl->index_name;
708  wrd->index_type = tl->structure;
709  wrd->term_buf = xp->value;
710  wrd->term_len = strlen(xp->value);
711  (*p->tokenAdd)(wrd);
712  }
713  }
714  }
715  }
716  /* if there was no termlist for the given path,
717  or the termlist didn't have a ! element, index
718  the attribute as "w" */
719  if (!xpdone && !termlist_only)
720  {
721  index_xpath_attr(attr_tag_path_full, xp->name,
722  xp->value, "w", p, wrd);
723  }
724  i++;
725  }
726  }
727  }
728  }
729 }
730 
731 static void index_termlist(struct source_parser *sp, data1_node *par,
732  data1_node *n,
733  struct recExtractCtrl *p, int level, RecWord *wrd)
734 {
735  data1_termlist *tlist = 0;
736  /*
737  * cycle up towards the root until we find a tag with an att..
738  * this has the effect of indexing locally defined tags with
739  * the attribute of their ancestor in the record.
740  */
741 
742  while (!par->u.tag.element)
743  if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
744  break;
745  if (!par || !(tlist = par->u.tag.element->termlists))
746  return;
747  for (; tlist; tlist = tlist->next)
748  {
749  /* consider source */
750  wrd->term_buf = 0;
751  assert(tlist->source);
752  sp_parse(sp, n, wrd, tlist->source);
753 
754  if (wrd->term_buf && wrd->term_len)
755  {
756  if (p->flagShowRecords)
757  {
758  int i;
759  printf("%*sIdx: [%s]", (level + 1) * 4, "",
760  tlist->structure);
761  printf("%s %s", tlist->index_name, tlist->source);
762  printf(" XData:\"");
763  for (i = 0; i<wrd->term_len && i < 40; i++)
764  fputc(wrd->term_buf[i], stdout);
765  fputc('"', stdout);
766  if (wrd->term_len > 40)
767  printf(" ...");
768  fputc('\n', stdout);
769  }
770  else
771  {
772  wrd->index_type = tlist->structure;
773  wrd->index_name = tlist->index_name;
774  (*p->tokenAdd)(wrd);
775  }
776  }
777  }
778 }
779 
780 static int dumpkeys_r(struct source_parser *sp,
781  data1_node *n, struct recExtractCtrl *p, int level,
782  RecWord *wrd)
783 {
784  for (; n; n = n->next)
785  {
786  if (p->flagShowRecords) /* display element description to user */
787  {
788  if (n->which == DATA1N_root)
789  {
790  printf("%*s", level * 4, "");
791  printf("Record type: '%s'\n", n->u.root.type);
792  }
793  else if (n->which == DATA1N_tag)
794  {
795  data1_element *e;
796 
797  printf("%*s", level * 4, "");
798  if (!(e = n->u.tag.element))
799  printf("Local tag: '%s'\n", n->u.tag.tag);
800  else
801  {
802  printf("Elm: '%s' ", e->name);
803  if (e->tag)
804  {
805  data1_tag *t = e->tag;
806 
807  printf("TagNam: '%s' ", t->names->name);
808  printf("(");
809  if (t->tagset)
810  printf("%s[%d],", t->tagset->name, t->tagset->type);
811  else
812  printf("?,");
813  if (t->which == DATA1T_numeric)
814  printf("%d)", t->value.numeric);
815  else
816  printf("'%s')", t->value.string);
817  }
818  printf("\n");
819  }
820  }
821  }
822 
823  if (n->which == DATA1N_tag)
824  {
825  index_termlist(sp, n, n, p, level, wrd);
826  /* index start tag */
827  if (n->root->u.root.absyn)
828  index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
829  1 /* is start */);
830  }
831 
832  if (n->child)
833  if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
834  return -1;
835 
836 
837  if (n->which == DATA1N_data)
838  {
839  data1_node *par = get_parent_tag(p->dh, n);
840 
841  if (p->flagShowRecords)
842  {
843  printf("%*s", level * 4, "");
844  printf("Data: ");
845  if (n->u.data.len > 256)
846  printf("'%.170s ... %.70s'\n", n->u.data.data,
847  n->u.data.data + n->u.data.len-70);
848  else if (n->u.data.len > 0)
849  printf("'%.*s'\n", n->u.data.len, n->u.data.data);
850  else
851  printf("NULL\n");
852  }
853 
854  if (par)
855  index_termlist(sp, par, n, p, level, wrd);
856 
857  index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
858  0 /* is start */);
859  }
860 
861  if (n->which == DATA1N_tag)
862  {
863  /* index end tag */
864  index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
865  0 /* is start */);
866  }
867 
868  if (p->flagShowRecords && n->which == DATA1N_root)
869  {
870  printf("%*s-------------\n\n", level * 4, "");
871  }
872  }
873  return 0;
874 }
875 
876 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
877 {
878  struct source_parser *sp = source_parser_create();
879  int r = dumpkeys_r(sp, n, p, 0, wrd);
881  return r;
882 }
883 
885 {
886  RecWord wrd;
887 
888  if (n->u.root.absyn && n->u.root.absyn->oid)
889  (*p->schemaAdd)(p, n->u.root.absyn->oid);
890  (*p->init)(p, &wrd);
891 
892  /* data1_pr_tree(p->dh, n, stdout); */
893 
894  return dumpkeys(n, p, &wrd);
895 }
896 
897 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
898  NMEM mem,
899  data1_node *(*grs_read)(struct grs_read_info *))
900 {
901  data1_node *n;
902  struct grs_read_info gri;
903  RecWord wrd;
904 
905  gri.stream = p->stream;
906  gri.mem = mem;
907  gri.dh = p->dh;
908  gri.clientData = clientData;
909 
910  n = (*grs_read)(&gri);
911  if (!n)
912  return RECCTRL_EXTRACT_EOF;
913  if (n->u.root.absyn && n->u.root.absyn->oid)
914  (*p->schemaAdd)(p, n->u.root.absyn->oid);
915  data1_concat_text(p->dh, mem, n);
916 
917  /* ensure our data1 tree is UTF-8 */
918  data1_iconv(p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
919 
920 
922 
923 #if 0
924  data1_pr_tree(p->dh, n, stdout);
925 #endif
926 
927  (*p->init)(p, &wrd);
928  if (dumpkeys(n, p, &wrd) < 0)
929  {
931  }
932  return RECCTRL_EXTRACT_OK;
933 }
934 
935 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
936  data1_node *(*grs_read)(struct grs_read_info *))
937 {
938  int ret;
939  NMEM mem = nmem_create();
940  ret = grs_extract_sub(clientData, p, mem, grs_read);
941  nmem_destroy(mem);
942  return ret;
943 }
944 
945 /*
946  * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
947  */
948 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
949  char **addinfo, ODR o)
950 {
951  data1_esetname *eset;
952  Z_Espec1 *espec = 0;
953  Z_ElementSpec *p;
954 
955  switch (c->which)
956  {
957  case Z_RecordComp_simple:
958  if (c->u.simple->which != Z_ElementSetNames_generic)
959  return 26; /* only generic form supported. Fix this later */
960  if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
961  c->u.simple->u.generic)))
962  {
963  yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
964  *addinfo = odr_strdup(o, c->u.simple->u.generic);
965  return 25; /* invalid esetname */
966  }
967  yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
968  c->u.simple->u.generic);
969  espec = eset->spec;
970  break;
971  case Z_RecordComp_complex:
972  if (c->u.complex->generic)
973  {
974  /* insert check for schema */
975  if ((p = c->u.complex->generic->elementSpec))
976  {
977  switch (p->which)
978  {
979  case Z_ElementSpec_elementSetName:
980  if (!(eset =
981  data1_getesetbyname(dh, n->u.root.absyn,
982  p->u.elementSetName)))
983  {
984  yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
985  p->u.elementSetName);
986  *addinfo = odr_strdup(o, p->u.elementSetName);
987  return 25; /* invalid esetname */
988  }
989  yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
990  p->u.elementSetName);
991  espec = eset->spec;
992  break;
993  case Z_ElementSpec_externalSpec:
994  if (p->u.externalSpec->which == Z_External_espec1)
995  {
996  yaz_log(YLOG_DEBUG, "Got Espec-1");
997  espec = p->u.externalSpec-> u.espec1;
998  }
999  else
1000  {
1001  yaz_log(YLOG_LOG, "Unknown external espec.");
1002  return 25; /* bad. what is proper diagnostic? */
1003  }
1004  break;
1005  }
1006  }
1007  }
1008  else
1009  return 26; /* fix */
1010  }
1011  if (espec)
1012  {
1013  yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1014  return data1_doespec1(dh, n, espec);
1015  }
1016  else
1017  {
1018  yaz_log(YLOG_DEBUG, "Element: all match");
1019  return -1;
1020  }
1021 }
1022 
1023 /* Add Zebra info in separate namespace ...
1024  <root
1025  ...
1026  <metadata xmlns="http://www.indexdata.dk/zebra/">
1027  <size>359</size>
1028  <localnumber>447</localnumber>
1029  <filename>records/genera.xml</filename>
1030  </metadata>
1031  </root>
1032 */
1033 
1034 static void zebra_xml_metadata(struct recRetrieveCtrl *p, data1_node *top,
1035  NMEM mem)
1036 {
1037  const char *idzebra_ns[3];
1038  const char *i2 = "\n ";
1039  const char *i4 = "\n ";
1040  data1_node *n;
1041 
1042  idzebra_ns[0] = "xmlns";
1043  idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1044  idzebra_ns[2] = 0;
1045 
1046  data1_mk_text(p->dh, mem, i2, top);
1047 
1048  n = data1_mk_tag(p->dh, mem, "idzebra", idzebra_ns, top);
1049 
1050  data1_mk_text(p->dh, mem, "\n", top);
1051 
1052  data1_mk_text(p->dh, mem, i4, n);
1053 
1054  data1_mk_tag_data_int(p->dh, n, "size", p->recordSize, mem);
1055 
1056  if (p->score != -1)
1057  {
1058  data1_mk_text(p->dh, mem, i4, n);
1059  data1_mk_tag_data_int(p->dh, n, "score", p->score, mem);
1060  }
1061  data1_mk_text(p->dh, mem, i4, n);
1062  data1_mk_tag_data_zint(p->dh, n, "localnumber", p->localno, mem);
1063  if (p->fname)
1064  {
1065  data1_mk_text(p->dh, mem, i4, n);
1066  data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1067  }
1068  data1_mk_text(p->dh, mem, i2, n);
1069 }
1070 
1071 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1072  data1_node *(*grs_read)(struct grs_read_info *))
1073 {
1074  data1_node *node = 0, *onode = 0, *top;
1075  data1_node *dnew;
1076  data1_maptab *map;
1077  int res, selected = 0;
1078  NMEM mem;
1079  struct grs_read_info gri;
1080  const char *tagname;
1081 
1082  const Odr_oid *requested_schema = 0;
1083  data1_marctab *marctab;
1084  int dummy;
1085 
1086  mem = nmem_create();
1087  gri.stream = p->stream;
1088  gri.mem = mem;
1089  gri.dh = p->dh;
1090  gri.clientData = clientData;
1091 
1092  yaz_log(YLOG_DEBUG, "grs_retrieve");
1093  node = (*grs_read)(&gri);
1094  if (!node)
1095  {
1096  p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1097  nmem_destroy(mem);
1098  return 0;
1099  }
1100  data1_concat_text(p->dh, mem, node);
1101 
1102  data1_remove_idzebra_subtree(p->dh, node);
1103 
1104 #if 0
1105  data1_pr_tree(p->dh, node, stdout);
1106 #endif
1107  top = data1_get_root_tag(p->dh, node);
1108 
1109  yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1110  tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1111  if (tagname &&
1112  (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1113  {
1114  dnew->u.data.what = DATA1I_text;
1115  dnew->u.data.data = dnew->lbuf;
1116  sprintf(dnew->u.data.data, "%d", p->recordSize);
1117  dnew->u.data.len = strlen(dnew->u.data.data);
1118  }
1119 
1120  tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1121  if (tagname && p->score >= 0 &&
1122  (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1123  {
1124  yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1125  dnew->u.data.what = DATA1I_num;
1126  dnew->u.data.data = dnew->lbuf;
1127  sprintf(dnew->u.data.data, "%d", p->score);
1128  dnew->u.data.len = strlen(dnew->u.data.data);
1129  }
1130 
1131  tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1132  "localControlNumber");
1133  if (tagname && p->localno > 0 &&
1134  (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1135  {
1136  yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1137  dnew->u.data.what = DATA1I_text;
1138  dnew->u.data.data = dnew->lbuf;
1139 
1140  sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1141  dnew->u.data.len = strlen(dnew->u.data.data);
1142  }
1143 
1144  if (!p->input_format)
1145  { /* SUTRS is default input_format */
1146  p->input_format = yaz_oid_recsyn_sutrs;
1147  }
1148  assert(p->input_format);
1149 
1150  if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1151  zebra_xml_metadata(p, top, mem);
1152 
1153 #if 0
1154  data1_pr_tree(p->dh, node, stdout);
1155 #endif
1156  if (p->comp && p->comp->which == Z_RecordComp_complex &&
1157  p->comp->u.complex->generic &&
1158  p->comp->u.complex->generic->which == Z_Schema_oid &&
1159  p->comp->u.complex->generic->schema.oid)
1160  {
1161  requested_schema = p->comp->u.complex->generic->schema.oid;
1162  }
1163  /* If schema has been specified, map if possible, then check that
1164  * we got the right one
1165  */
1166  if (requested_schema)
1167  {
1168  yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1169  for (map = node->u.root.absyn->maptabs; map; map = map->next)
1170  {
1171  if (!oid_oidcmp(map->oid, requested_schema))
1172  {
1173  onode = node;
1174  if (!(node = data1_map_record(p->dh, onode, map, mem)))
1175  {
1176  p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1177  nmem_destroy(mem);
1178  return 0;
1179  }
1180  break;
1181  }
1182  }
1183  if (node->u.root.absyn
1184  && oid_oidcmp(requested_schema, node->u.root.absyn->oid))
1185  {
1186  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1187  nmem_destroy(mem);
1188  return 0;
1189  }
1190  }
1191  /*
1192  * Does the requested format match a known syntax-mapping? (this reflects
1193  * the overlap of schema and formatting which is inherent in the MARC
1194  * family)
1195  */
1196  yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1197  if (node->u.root.absyn)
1198  for (map = node->u.root.absyn->maptabs; map; map = map->next)
1199  {
1200  if (!oid_oidcmp(map->oid, p->input_format))
1201  {
1202  onode = node;
1203  if (!(node = data1_map_record(p->dh, onode, map, mem)))
1204  {
1205  p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1206  nmem_destroy(mem);
1207  return 0;
1208  }
1209  break;
1210  }
1211  }
1212  yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1213  if (node->u.root.absyn && node->u.root.absyn->oid
1214  && !oid_oidcmp(p->input_format, yaz_oid_recsyn_grs_1))
1215  {
1216  char oid_str[OID_STR_MAX];
1217  char *dot_str = oid_oid_to_dotstring(node->u.root.absyn->oid, oid_str);
1218 
1219  if (dot_str && (dnew = data1_mk_tag_data_wd(p->dh, top,
1220  "schemaIdentifier", mem)))
1221  {
1222  dnew->u.data.what = DATA1I_oid;
1223  dnew->u.data.data = (char *) nmem_strdup(mem, dot_str);
1224  dnew->u.data.len = strlen(dot_str);
1225  }
1226  }
1227 
1228  yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1229  if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1230  p->odr)) > 0)
1231  {
1232  p->diagnostic = res;
1233  nmem_destroy(mem);
1234  return 0;
1235  }
1236  else if (p->comp && !res)
1237  selected = 1;
1238 
1239 #if 0
1240  data1_pr_tree(p->dh, node, stdout);
1241 #endif
1242  yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1243 
1244  p->output_format = p->input_format;
1245 
1246  assert(p->input_format);
1247  if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1248  {
1249 #if 0
1250  data1_pr_tree(p->dh, node, stdout);
1251 #endif
1252  /* default output encoding for XML is UTF-8 */
1253  data1_iconv(p->dh, mem, node,
1254  p->encoding ? p->encoding : "UTF-8",
1255  data1_get_encoding(p->dh, node));
1256 
1257  if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1258  &p->rec_len)))
1259  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1260  else
1261  {
1262  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1263  memcpy(new_buf, p->rec_buf, p->rec_len);
1264  p->rec_buf = new_buf;
1265  }
1266  }
1267  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_grs_1))
1268  {
1269  data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1270  dummy = 0;
1271  if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1272  p->odr, &dummy)))
1273  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1274  else
1275  p->rec_len = -1;
1276  }
1277  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_explain))
1278  {
1279  /* ensure our data1 tree is UTF-8 */
1280  data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1281 
1282  if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1283  p->odr)))
1284  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1285  else
1286  p->rec_len = -1;
1287  }
1288  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_summary))
1289  {
1290  /* ensure our data1 tree is UTF-8 */
1291  data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1292  if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1293  p->odr)))
1294  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1295  else
1296  p->rec_len = -1;
1297  }
1298  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_sutrs))
1299  {
1300  if (p->encoding)
1301  data1_iconv(p->dh, mem, node, p->encoding,
1302  data1_get_encoding(p->dh, node));
1303  if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1304  &p->rec_len)))
1305  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1306  else
1307  {
1308  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1309  memcpy(new_buf, p->rec_buf, p->rec_len);
1310  p->rec_buf = new_buf;
1311  }
1312  }
1313  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_soif))
1314  {
1315  if (p->encoding)
1316  data1_iconv(p->dh, mem, node, p->encoding,
1317  data1_get_encoding(p->dh, node));
1318  if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1319  &p->rec_len)))
1320  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1321  else
1322  {
1323  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1324  memcpy(new_buf, p->rec_buf, p->rec_len);
1325  p->rec_buf = new_buf;
1326  }
1327  }
1328  else
1329  {
1330  if (!node->u.root.absyn)
1331  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1332  else
1333  {
1334  for (marctab = node->u.root.absyn->marc; marctab;
1335  marctab = marctab->next)
1336  if (marctab->oid && !oid_oidcmp(marctab->oid, p->input_format))
1337  break;
1338  if (!marctab)
1339  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1340  else
1341  {
1342  if (p->encoding)
1343  data1_iconv(p->dh, mem, node, p->encoding,
1344  data1_get_encoding(p->dh, node));
1345  if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1346  selected, &p->rec_len)))
1347  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1348  else
1349  {
1350  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1351  memcpy(new_buf, p->rec_buf, p->rec_len);
1352  p->rec_buf = new_buf;
1353  }
1354  }
1355  }
1356  }
1357  nmem_destroy(mem);
1358  return 0;
1359 }
1360 
1361 /*
1362  * Local variables:
1363  * c-basic-offset: 4
1364  * c-file-style: "Stroustrup"
1365  * indent-tabs-mode: nil
1366  * End:
1367  * vim: shiftwidth=4 tabstop=8 expandtab
1368  */
1369 
char * addinfo
Definition: recctrl.h:138
unsigned short to
Definition: dfa.h:32
Definition: dfa.h:30
#define RECCTRL_EXTRACT_OK
Definition: recctrl.h:163
void(* tokenAdd)(RecWord *w)
Definition: recctrl.h:105
static int dfa_match_first(struct DFA_state **dfaar, const char *text)
Definition: recgrs.c:346
#define DATA1N_tag
Definition: data1.h:276
data1_name * names
Definition: data1.h:203
char * structure
Definition: data1.h:233
const char * src
Definition: recgrs.c:40
int data1_iconv(data1_handle dh, NMEM m, data1_node *n, const char *tocode, const char *fromcode)
Definition: d1_read.c:1088
const char * index_name
Definition: recctrl.h:54
char lbuf[DATA1_LOCALDATA]
Definition: data1.h:339
#define DATA1N_root
Definition: data1.h:274
Z_BriefBib * data1_nodetosummary(data1_handle dh, data1_node *n, int select, ODR o)
Definition: d1_sumout.c:54
static void index_xpath(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd, char *xpath_index, int xpath_is_start)
Definition: recgrs.c:534
#define XPATH_PREDICATE_BOOLEAN
Definition: zebra_xpath.h:35
int term_len
Definition: recctrl.h:58
static void index_xpath_attr(char *tag_path, char *name, char *value, char *structure, struct recExtractCtrl *p, RecWord *wrd)
Definition: recgrs.c:484
char * name
Definition: data1.h:260
Z_GenericRecord * data1_nodetogr(data1_handle dh, data1_node *n, int select, ODR o, int *len)
Definition: d1_grs.c:376
#define ZEBRA_XPATH_ELM_END
Definition: recctrl.h:36
int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:935
data1_node * data1_mk_tag_data_text(data1_handle dh, data1_node *at, const char *tag, const char *str, NMEM nmem)
Definition: d1_read.c:506
static void zebra_xml_metadata(struct recRetrieveCtrl *p, data1_node *top, NMEM mem)
Definition: recgrs.c:1034
char * encoding
Definition: recctrl.h:125
struct data1_xpelement * next
Definition: d1_absyn.h:39
struct data1_marctab * next
Definition: data1.h:156
char * index_name
Definition: data1.h:232
struct data1_xattr * next
Definition: data1.h:262
void data1_pr_tree(data1_handle dh, data1_node *n, FILE *out)
Definition: d1_prtree.c:134
static void source_parser_destroy(struct source_parser *sp)
Definition: recgrs.c:263
#define DATA1I_oid
Definition: data1.h:318
void * clientData
Definition: recgrs.h:29
#define RECCTRL_EXTRACT_ERROR_GENERIC
Definition: recctrl.h:165
char * value
Definition: data1.h:261
struct DFA_state ** states
Definition: dfa.h:55
struct data1_xpelement * xp_elements
Definition: d1_absyn.h:58
struct xpath_predicate::@8::@10 boolean
data1_node * data1_get_root_tag(data1_handle dh, data1_node *n)
Definition: d1_read.c:35
data1_node * data1_mk_tag_data_zint(data1_handle dh, data1_node *at, const char *tag, zint num, NMEM nmem)
Definition: d1_read.c:457
char * name
Definition: data1.h:114
static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
Definition: recgrs.c:169
union xpath_predicate::@8 u
data1_termlist * termlists
Definition: d1_absyn.h:38
unsigned char ch[2]
Definition: dfa.h:31
char * name
Definition: data1.h:244
#define RECCTRL_EXTRACT_EOF
Definition: recctrl.h:164
Z_Espec1 * spec
Definition: data1.h:162
data1_node * data1_mk_tag(data1_handle dh, NMEM nmem, const char *tag, const char **attr, data1_node *at)
Definition: d1_read.c:294
static void index_termlist(struct source_parser *sp, data1_node *par, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd)
Definition: recgrs.c:731
Z_ExplainRecord * data1_nodetoexplain(data1_handle dh, data1_node *n, int select, ODR o)
Definition: d1_expout.c:1349
struct xpath_predicate::@8::@9 relation
static int sp_lex(struct source_parser *sp)
Definition: recgrs.c:45
static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, NMEM mem, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:897
const char * data1_get_encoding(data1_handle dh, data1_node *n)
Definition: d1_read.c:1069
zint seqno
Definition: recctrl.h:60
int numeric
Definition: data1.h:209
data1_handle dh
Definition: recctrl.h:112
struct data1_tagset * tagset
Definition: data1.h:214
int data1_doespec1(data1_handle dh, data1_node *n, Z_Espec1 *e)
Definition: d1_doespec.c:356
struct xpath_predicate * predicate
Definition: zebra_xpath.h:46
const Odr_oid * output_format
Definition: recctrl.h:134
#define DATA1I_text
Definition: data1.h:314
char * data1_nodetobuf(data1_handle dh, data1_node *n, int select, int *len)
Definition: d1_sutrs.c:141
void(* schemaAdd)(struct recExtractCtrl *p, Odr_oid *oid)
Definition: recctrl.h:111
struct DFA_tran * trans
Definition: dfa.h:45
struct xpath_location_step xpath[XPATH_STEP_COUNT]
Definition: d1_absyn.h:34
#define ZEBRA_XPATH_ATTR_CDATA
Definition: recctrl.h:45
const char * index_type
Definition: recctrl.h:52
char * tag
Definition: data1.h:296
data1_node * get_parent_tag(data1_handle dh, data1_node *n)
Definition: d1_read.c:52
union data1_node::@2 u
struct data1_termlist * next
Definition: data1.h:235
int lookahead
Definition: recgrs.c:41
struct data1_node::@2::@3 root
static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
Definition: recgrs.c:69
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition: recctrl.h:103
char * data1_nodetomarc(data1_handle dh, data1_marctab *p, data1_node *n, int selected, int *len)
Definition: d1_marc.c:468
data1_node * data1_map_record(data1_handle dh, data1_node *n, data1_maptab *map, NMEM m)
Definition: d1_map.c:324
static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
Definition: recgrs.c:509
struct data1_node * child
Definition: data1.h:341
const char * data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value)
Definition: d1_absyn.c:611
static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c, char **addinfo, ODR o)
Definition: recgrs.c:948
data1_node * data1_mk_tag_data_int(data1_handle dh, data1_node *at, const char *tag, int num, NMEM nmem)
Definition: d1_read.c:473
const Odr_oid * input_format
Definition: recctrl.h:123
const char * tok
Definition: recgrs.c:39
static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
Definition: recgrs.c:126
char * data1_nodetoidsgml(data1_handle dh, data1_node *n, int select, int *len)
Definition: d1_write.c:230
#define DATA1I_num
Definition: data1.h:316
int which
Definition: data1.h:206
data1_esetname * data1_getesetbyname(data1_handle dh, data1_absyn *a, const char *name)
Definition: d1_absyn.c:297
#define ZEBRA_XPATH_CDATA
Definition: recctrl.h:39
union data1_tag::@1 value
data1_node * data1_mk_text(data1_handle dh, NMEM mem, const char *buf, data1_node *parent)
Definition: d1_read.c:349
int which
Definition: data1.h:285
Z_RecordComposition * comp
Definition: recctrl.h:124
struct data1_node * parent
Definition: data1.h:343
data1_handle dh
Definition: recctrl.h:131
struct ZebraRecStream * stream
Definition: recctrl.h:119
Odr_oid * oid
Definition: data1.h:105
data1_node * data1_mk_tag_data_wd(data1_handle dh, data1_node *at, const char *tagname, NMEM m)
Definition: d1_read.c:437
int type
Definition: data1.h:222
static int dumpkeys_r(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd)
Definition: recgrs.c:780
#define DATA1T_numeric
Definition: data1.h:204
Definition: dfa.h:42
long zint
Zebra integer.
Definition: util.h:66
const char * term_buf
Definition: recctrl.h:56
char * name
Definition: data1.h:223
struct data1_node * next
Definition: data1.h:340
char * data
Definition: data1.h:307
#define ZEBRA_XPATH_ELM_BEGIN
Definition: recctrl.h:33
void * rec_buf
Definition: recctrl.h:135
short tran_no
Definition: dfa.h:48
Odr_oid * oid
Definition: data1.h:140
char * source
Definition: data1.h:234
struct DFA * dfa
Definition: d1_absyn.h:37
struct data1_xpelement * match_next
Definition: d1_absyn.h:44
int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
Definition: recgrs.c:284
data1_handle dh
Definition: recgrs.h:31
data1_tag * tag
Definition: data1.h:245
record extract for indexing
Definition: recctrl.h:101
void data1_remove_idzebra_subtree(data1_handle dh, data1_node *n)
Definition: d1_utils.c:65
int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:1071
#define XPATH_PREDICATE_RELATION
Definition: zebra_xpath.h:29
char * fname
Definition: recctrl.h:130
NMEM nmem
Definition: recgrs.c:42
void data1_concat_text(data1_handle dh, NMEM m, data1_node *n)
Definition: d1_read.c:1137
int flagShowRecords
Definition: recctrl.h:108
static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
Definition: recgrs.c:876
#define ZEBRA_XPATH_ATTR_NAME
Definition: recctrl.h:42
NMEM mem
Definition: recgrs.h:30
static struct source_parser * source_parser_create(void)
Definition: recgrs.c:255
struct ZebraRecStream * stream
Definition: recctrl.h:102
static int sp_parse(struct source_parser *sp, data1_node *n, RecWord *wrd, const char *src)
Definition: recgrs.c:271
struct ZebraRecStream * stream
Definition: recgrs.h:28
int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
Definition: recgrs.c:884
#define DATA1N_data
Definition: data1.h:278
short rule_no
Definition: dfa.h:49
data1_termlist * xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
Definition: recgrs.c:394
struct data1_maptab * next
Definition: data1.h:108
char * data1_nodetosoif(data1_handle dh, data1_node *n, int select, int *len)
Definition: d1_soif.c:75
#define MAX_ATTR_COUNT
char * string
Definition: data1.h:210
#define ZINT_FORMAT
Definition: util.h:72