IDZEBRA  2.2.7
recgrs.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #include <sys/types.h>
26 #include <ctype.h>
27 
28 #include <yaz/log.h>
29 #include <yaz/oid_db.h>
30 #include <yaz/diagbib1.h>
31 #include <yaz/wrbuf.h>
32 #include <yaz/snprintf.h>
33 
34 #include <d1_absyn.h>
35 #include <idzebra/recgrs.h>
36 
37 #define GRS_MAX_WORD 512
38 
39 struct source_parser {
40  int len;
41  const char *tok;
42  const char *src;
43  int lookahead;
44  NMEM nmem;
45 };
46 
47 static int sp_lex(struct source_parser *sp)
48 {
49  while (*sp->src == ' ')
50  (sp->src)++;
51  sp->tok = sp->src;
52  sp->len = 0;
53  while (*sp->src && !strchr("<>();,-: ", *sp->src))
54  {
55  sp->src++;
56  sp->len++;
57  }
58  if (sp->len)
59  sp->lookahead = 't';
60  else
61  {
62  sp->lookahead = *sp->src;
63  if (*sp->src)
64  sp->src++;
65  }
66  return sp->lookahead;
67 }
68 
69 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
70 
71 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
72 {
73  int start, len;
74  RecWord tmp_w;
75 
76  /* ( */
77  sp_lex(sp);
78  if (sp->lookahead != '(')
79  return 0;
80  sp_lex(sp); /* skip ( */
81 
82  /* 1st arg: string */
83  if (!sp_expr(sp, n, wrd))
84  return 0;
85 
86  if (sp->lookahead != ',')
87  return 0;
88  sp_lex(sp); /* skip , */
89 
90  /* 2nd arg: start */
91  if (!sp_expr(sp, n, &tmp_w))
92  return 0;
93  start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
94 
95  if (sp->lookahead == ',')
96  {
97  sp_lex(sp); /* skip , */
98 
99  /* 3rd arg: length */
100  if (!sp_expr(sp, n, &tmp_w))
101  return 0;
102  len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
103  }
104  else
105  len = wrd->term_len;
106 
107  /* ) */
108  if (sp->lookahead != ')')
109  return 0;
110  sp_lex(sp);
111 
112  if (wrd->term_buf)
113  {
114  if (start >= wrd->term_len)
115  wrd->term_len = 0;
116  else
117  {
118  wrd->term_len -= start;
119  wrd->term_buf += start;
120 
121  if (wrd->term_len > len)
122  wrd->term_len = len;
123  }
124  }
125  return 1;
126 }
127 
128 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
129 {
130  char num_str[20];
131  int min_pos = -1;
132  sp_lex(sp);
133  if (sp->lookahead != '(')
134  return 0;
135  sp_lex(sp); /* skip ( */
136  if (!sp_expr(sp, n, wrd))
137  return 0;
138  while (sp->lookahead == ',')
139  {
140  RecWord search_w;
141  int i;
142  sp_lex(sp); /* skip , */
143 
144  if (!sp_expr(sp, n, &search_w))
145  return 0;
146  for (i = 0; i<wrd->term_len; i++)
147  {
148  int j;
149  for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
150  if (wrd->term_buf[i+j] != search_w.term_buf[j])
151  break;
152  if (j == search_w.term_len) /* match ? */
153  {
154  if (min_pos == -1 || i < min_pos)
155  min_pos = i;
156  break;
157  }
158  }
159  }
160  if (sp->lookahead != ')')
161  return 0;
162  sp_lex(sp);
163  if (min_pos == -1)
164  min_pos = 0; /* the default if not found */
165  yaz_snprintf(num_str, sizeof(num_str), "%d", min_pos);
166  wrd->term_buf = nmem_strdup(sp->nmem, num_str);
167  wrd->term_len = strlen(wrd->term_buf);
168  return 1;
169 }
170 
171 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
172 {
173  if (sp->lookahead != 't')
174  return 0;
175  if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
176  {
177  if (n->which == DATA1N_data)
178  {
179  wrd->term_buf = n->u.data.data;
180  wrd->term_len = n->u.data.len;
181  }
182  sp_lex(sp);
183  }
184  else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
185  {
186  if (n->which == DATA1N_tag)
187  {
188  wrd->term_buf = n->u.tag.tag;
189  wrd->term_len = strlen(n->u.tag.tag);
190  }
191  sp_lex(sp);
192  }
193  else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
194  {
195  RecWord tmp_w;
196  sp_lex(sp);
197  if (sp->lookahead != '(')
198  return 0;
199  sp_lex(sp);
200 
201  if (!sp_expr(sp, n, &tmp_w))
202  return 0;
203 
204  wrd->term_buf = "";
205  wrd->term_len = 0;
206  if (n->which == DATA1N_tag)
207  {
208  data1_xattr *p = n->u.tag.attributes;
209  while (p && strlen(p->name) != tmp_w.term_len &&
210  memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
211  p = p->next;
212  if (p)
213  {
214  wrd->term_buf = p->value;
215  wrd->term_len = strlen(p->value);
216  }
217  }
218  if (sp->lookahead != ')')
219  return 0;
220  sp_lex(sp);
221  }
222  else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
223  {
224  return sp_first(sp, n, wrd);
225  }
226  else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
227  {
228  return sp_range(sp, n, wrd);
229  }
230  else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
231  {
232  char *b;
233  wrd->term_len = sp->len;
234  b = nmem_malloc(sp->nmem, sp->len);
235  memcpy(b, sp->tok, sp->len);
236  wrd->term_buf = b;
237  sp_lex(sp);
238  }
239  else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
240  {
241  char *b;
242  wrd->term_len = sp->len - 2;
243  b = nmem_malloc(sp->nmem, wrd->term_len);
244  memcpy(b, sp->tok+1, wrd->term_len);
245  wrd->term_buf = b;
246  sp_lex(sp);
247  }
248  else
249  {
250  wrd->term_buf = "";
251  wrd->term_len = 0;
252  sp_lex(sp);
253  }
254  return 1;
255 }
256 
258 {
259  struct source_parser *sp = xmalloc(sizeof(*sp));
260 
261  sp->nmem = nmem_create();
262  return sp;
263 }
264 
265 static void source_parser_destroy(struct source_parser *sp)
266 {
267  if (!sp)
268  return;
269  nmem_destroy(sp->nmem);
270  xfree(sp);
271 }
272 
273 static int sp_parse(struct source_parser *sp,
274  data1_node *n, RecWord *wrd, const char *src)
275 {
276  sp->len = 0;
277  sp->tok = 0;
278  sp->src = src;
279  sp->lookahead = 0;
280  nmem_reset(sp->nmem);
281 
282  sp_lex(sp);
283  return sp_expr(sp, n, wrd);
284 }
285 
287 {
288  int res = 1;
289  char *attname;
290  data1_xattr *attr;
291 
292  if (!p) {
293  return 1;
294  } else {
295  if (p->which == XPATH_PREDICATE_RELATION) {
296  if (p->u.relation.name[0]) {
297  if (*p->u.relation.name != '@') {
298  yaz_log(YLOG_WARN,
299  " Only attributes (@) are supported in xelm xpath predicates");
300  yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
301  return 1;
302  }
303  attname = p->u.relation.name + 1;
304  res = 0;
305  /* looking for the attribute with a specified name */
306  for (attr = n->u.tag.attributes; attr; attr = attr->next) {
307  if (!strcmp(attr->name, attname)) {
308  if (p->u.relation.op[0]) {
309  if (*p->u.relation.op != '=') {
310  yaz_log(YLOG_WARN,
311  "Only '=' relation is supported (%s)",p->u.relation.op);
312  yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
313  res = 1; break;
314  } else {
315  if (!strcmp(attr->value, p->u.relation.value)) {
316  res = 1; break;
317  }
318  }
319  } else {
320  /* attribute exists, no value specified */
321  res = 1; break;
322  }
323  }
324  }
325  return res;
326  } else {
327  return 1;
328  }
329  }
330  else if (p->which == XPATH_PREDICATE_BOOLEAN) {
331  if (!strcmp(p->u.boolean.op,"and")) {
332  return d1_check_xpath_predicate(n, p->u.boolean.left)
333  && d1_check_xpath_predicate(n, p->u.boolean.right);
334  }
335  else if (!strcmp(p->u.boolean.op,"or")) {
336  return (d1_check_xpath_predicate(n, p->u.boolean.left)
337  || d1_check_xpath_predicate(n, p->u.boolean.right));
338  } else {
339  yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
340  return 1;
341  }
342  }
343  }
344  return 0;
345 }
346 
347 
348 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
349 {
350  struct DFA_state *s = dfaar[0]; /* start state */
351  struct DFA_tran *t;
352  int i;
353  const char *p = text;
354  unsigned char c;
355 
356  for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
357  {
358  if (c >= t->ch[0] && c <= t->ch[1])
359  {
360  while (i >= 0)
361  {
362  /* move to next state and return if we get a match */
363  s = dfaar[t->to];
364  if (s->rule_no)
365  return 1;
366  /* next char */
367  if (!c)
368  return 0;
369  c = *p++;
370  for (t = s->trans, i = s->tran_no; --i >= 0; t++)
371  if (c >= t->ch[0] && c <= t->ch[1])
372  break;
373  }
374  }
375  }
376  return 0;
377 }
378 
379 /* *ostrich*
380 
381 New function, looking for xpath "element" definitions in abs, by
382 tagpath, using a kind of ugly regxp search.The DFA was built while
383 parsing abs, so here we just go trough them and try to match
384 against the given tagpath. The first matching entry is returned.
385 
386 pop, 2002-12-13
387 
388 Added support for enhanced xelm. Now [] predicates are considered
389 as well, when selecting indexing rules... (why the hell it's called
390 termlist???)
391 
392 pop, 2003-01-17
393 
394 */
395 
397 {
398  data1_absyn *abs = n->root->u.root.absyn;
399 
400  data1_xpelement *xpe = 0;
401  data1_node *nn;
402 #ifdef ENHANCED_XELM
403  struct xpath_location_step *xp;
404 #endif
405  WRBUF pexpr = wrbuf_alloc();
406 
407  wrbuf_printf(pexpr, "/%s\n", tagpath);
408 
409  for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
410  xpe->match_state = -1; /* don't know if it matches yet */
411 
412  for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
413  {
414  int i;
415  int ok = xpe->match_state;
416  if (ok == -1)
417  { /* don't know whether there is a match yet */
418  data1_xpelement *xpe1;
419 
420  assert(xpe->dfa);
421  ok = dfa_match_first(xpe->dfa->states, wrbuf_cstr(pexpr));
422 
423 #if OPTIMIZE_MELM
424  /* mark this and following ones with same regexp */
425  for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
426  xpe1->match_state = ok;
427 #endif
428  }
429  assert(ok == 0 || ok == 1);
430  if (ok) {
431 #ifdef ENHANCED_XELM
432  /* we have to check the perdicates up to the root node */
433  xp = xpe->xpath;
434 
435  /* find the first tag up in the node structure */
436  for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
437  ;
438 
439  /* go from inside out in the node structure, while going
440  backwards trough xpath location steps ... */
441  for (i = xpe->xpath_len - 1; i>0; i--)
442  {
443  if (!d1_check_xpath_predicate(nn, xp[i].predicate))
444  {
445  ok = 0;
446  break;
447  }
448 
449  if (nn->which == DATA1N_tag)
450  nn = nn->parent;
451  }
452 #endif
453  if (ok)
454  break;
455  }
456  }
457 
458  wrbuf_destroy(pexpr);
459 
460  if (xpe)
461  return xpe->termlists;
462  else
463  return NULL;
464 }
465 
466 /* use
467  1 start element (tag)
468  2 end element
469  3 start attr (and attr-exact)
470  4 end attr
471 
472  1016 cdata
473  1015 attr data
474 
475  *ostrich*
476 
477  Now, if there is a matching xelm described in abs, for the
478  indexed element or the attribute, then the data is handled according
479  to those definitions...
480 
481  modified by pop, 2002-12-13
482 */
483 
484 /* add xpath index for an attribute */
485 static void index_xpath_attr(char *tag_path, char *name, char *value,
486  char *structure, struct recExtractCtrl *p,
487  RecWord *wrd)
488 {
490  wrd->index_type = "0";
491  wrd->term_buf = tag_path;
492  wrd->term_len = strlen(tag_path);
493  (*p->tokenAdd)(wrd);
494 
495  if (value) {
497  wrd->index_type = "w";
498  wrd->term_buf = value;
499  wrd->term_len = strlen(value);
500  (*p->tokenAdd)(wrd);
501  }
503  wrd->index_type = "0";
504  wrd->term_buf = tag_path;
505  wrd->term_len = strlen(tag_path);
506  (*p->tokenAdd)(wrd);
507 }
508 
509 
510 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
511 {
512  size_t flen = 0;
513  data1_node *nn;
514 
515  /* we have to fetch the whole path to the data tag */
516  for (nn = n; nn; nn = nn->parent)
517  {
518  if (nn->which == DATA1N_tag)
519  {
520  size_t tlen = strlen(nn->u.tag.tag);
521  if (tlen + flen > (max - 2))
522  break;
523  memcpy(tag_path_full + flen, nn->u.tag.tag, tlen);
524  flen += tlen;
525  tag_path_full[flen++] = '/';
526  }
527  else
528  if (nn->which == DATA1N_root)
529  break;
530  }
531  tag_path_full[flen] = 0;
532 }
533 
534 
535 static void index_xpath(struct source_parser *sp, data1_node *n,
536  struct recExtractCtrl *p,
537  int level, RecWord *wrd,
538  char *xpath_index,
539  int xpath_is_start
540  )
541 {
542  int i;
543  char tag_path_full[1024];
544  int termlist_only = 1;
545  data1_termlist *tl;
546 
547  if (!n->root->u.root.absyn
548  ||
549  n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)
550  {
551  termlist_only = 0;
552  }
553 
554 
555  switch (n->which)
556  {
557  case DATA1N_data:
558  wrd->term_buf = n->u.data.data;
559  wrd->term_len = n->u.data.len;
560 
561  mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
562 
563  /* If we have a matching termlist... */
564  if (n->root->u.root.absyn &&
565  (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
566  {
567  zint max_seqno = 0;
568  for (; tl; tl = tl->next)
569  {
570  /* need to copy recword because it may be changed */
571  RecWord wrd_tl;
572  wrd->index_type = tl->structure;
573  memcpy(&wrd_tl, wrd, sizeof(*wrd));
574  if (tl->source)
575  sp_parse(sp, n, &wrd_tl, tl->source);
576 
577  /* this is just the old fashioned attribute based index */
578  wrd_tl.index_name = tl->index_name;
579  if (p->flagShowRecords)
580  {
581  int i;
582  printf("%*sIdx: [%s]", (level + 1) * 4, "",
583  tl->structure);
584  printf("%s %s", tl->index_name, tl->source);
585  printf(" XData:\"");
586  for (i = 0; i<wrd_tl.term_len && i < 40; i++)
587  fputc(wrd_tl.term_buf[i], stdout);
588  fputc('"', stdout);
589  if (wrd_tl.term_len > 40)
590  printf(" ...");
591  fputc('\n', stdout);
592  }
593  else
594  {
595  (*p->tokenAdd)(&wrd_tl);
596  }
597  if (wrd_tl.seqno > max_seqno)
598  max_seqno = wrd_tl.seqno;
599  }
600  if (max_seqno)
601  wrd->seqno = max_seqno;
602 
603  }
604  /* xpath indexing is done, if there was no termlist given,
605  or no ! in the termlist, and default indexing is enabled... */
606  if (!p->flagShowRecords && !termlist_only)
607  {
608  wrd->index_name = xpath_index;
609  wrd->index_type = "w";
610  (*p->tokenAdd)(wrd);
611  }
612  break;
613  case DATA1N_tag:
614  mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
615 
616  wrd->index_type = "0";
617  wrd->term_buf = tag_path_full;
618  wrd->term_len = strlen(tag_path_full);
619  wrd->index_name = xpath_index;
620  if (p->flagShowRecords)
621  {
622  printf("%*s tag=", (level + 1) * 4, "");
623  for (i = 0; i<wrd->term_len && i < 40; i++)
624  fputc(wrd->term_buf[i], stdout);
625  if (i == 40)
626  printf(" ..");
627  printf("\n");
628  }
629  else
630  {
631  data1_xattr *xp;
632 
633  if (!termlist_only)
634  (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
635 
636  if (xpath_is_start == 1) /* only for the starting tag... */
637  {
638 #define MAX_ATTR_COUNT 50
640 
641  int i = 0;
642  for (xp = n->u.tag.attributes; xp; xp = xp->next) {
643  char comb[512];
644  char attr_tag_path_full[1026];
645 
646  /* this could be cached as well */
647  yaz_snprintf(attr_tag_path_full, sizeof(attr_tag_path_full),
648  "@%s/%s", xp->name, tag_path_full);
649 
650  tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
651 
652  if (!termlist_only)
653  {
654  /* attribute (no value) */
655  wrd->index_type = "0";
657  wrd->term_buf = xp->name;
658  wrd->term_len = strlen(xp->name);
659 
660  wrd->seqno--;
661  (*p->tokenAdd)(wrd);
662 
663  if (xp->value
664  &&
665  strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2)
666  {
667  /* attribute value exact */
668  strcpy(comb, xp->name);
669  strcat(comb, "=");
670  strcat(comb, xp->value);
671 
673  wrd->index_type = "0";
674  wrd->term_buf = comb;
675  wrd->term_len = strlen(comb);
676  wrd->seqno--;
677 
678  (*p->tokenAdd)(wrd);
679  }
680  }
681  i++;
682  }
683 
684  i = 0;
685  for (xp = n->u.tag.attributes; xp; xp = xp->next) {
686  data1_termlist *tl;
687  char attr_tag_path_full[1026];
688  int xpdone = 0;
689 
690  yaz_snprintf(attr_tag_path_full, sizeof(attr_tag_path_full),
691  "@%s/%s", xp->name, tag_path_full);
692  if ((tl = tll[i]))
693  {
694  /* If there is a termlist given (=xelm directive) */
695  for (; tl; tl = tl->next)
696  {
697  if (!tl->index_name)
698  {
699  /* add xpath index for the attribute */
700  index_xpath_attr(attr_tag_path_full, xp->name,
701  xp->value, tl->structure,
702  p, wrd);
703  xpdone = 1;
704  } else {
705  /* index attribute value (only path/@attr) */
706  if (xp->value)
707  {
708  wrd->index_name = tl->index_name;
709  wrd->index_type = tl->structure;
710  wrd->term_buf = xp->value;
711  wrd->term_len = strlen(xp->value);
712  (*p->tokenAdd)(wrd);
713  }
714  }
715  }
716  }
717  /* if there was no termlist for the given path,
718  or the termlist didn't have a ! element, index
719  the attribute as "w" */
720  if (!xpdone && !termlist_only)
721  {
722  index_xpath_attr(attr_tag_path_full, xp->name,
723  xp->value, "w", p, wrd);
724  }
725  i++;
726  }
727  }
728  }
729  }
730 }
731 
732 static void index_termlist(struct source_parser *sp, data1_node *par,
733  data1_node *n,
734  struct recExtractCtrl *p, int level, RecWord *wrd)
735 {
736  data1_termlist *tlist = 0;
737  /*
738  * cycle up towards the root until we find a tag with an att..
739  * this has the effect of indexing locally defined tags with
740  * the attribute of their ancestor in the record.
741  */
742 
743  while (!par->u.tag.element)
744  if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
745  break;
746  if (!par || !(tlist = par->u.tag.element->termlists))
747  return;
748  for (; tlist; tlist = tlist->next)
749  {
750  /* consider source */
751  wrd->term_buf = 0;
752  assert(tlist->source);
753  sp_parse(sp, n, wrd, tlist->source);
754 
755  if (wrd->term_buf && wrd->term_len)
756  {
757  if (p->flagShowRecords)
758  {
759  int i;
760  printf("%*sIdx: [%s]", (level + 1) * 4, "",
761  tlist->structure);
762  printf("%s %s", tlist->index_name, tlist->source);
763  printf(" XData:\"");
764  for (i = 0; i<wrd->term_len && i < 40; i++)
765  fputc(wrd->term_buf[i], stdout);
766  fputc('"', stdout);
767  if (wrd->term_len > 40)
768  printf(" ...");
769  fputc('\n', stdout);
770  }
771  else
772  {
773  wrd->index_type = tlist->structure;
774  wrd->index_name = tlist->index_name;
775  (*p->tokenAdd)(wrd);
776  }
777  }
778  }
779 }
780 
781 static int dumpkeys_r(struct source_parser *sp,
782  data1_node *n, struct recExtractCtrl *p, int level,
783  RecWord *wrd)
784 {
785  for (; n; n = n->next)
786  {
787  if (p->flagShowRecords) /* display element description to user */
788  {
789  if (n->which == DATA1N_root)
790  {
791  printf("%*s", level * 4, "");
792  printf("Record type: '%s'\n", n->u.root.type);
793  }
794  else if (n->which == DATA1N_tag)
795  {
796  data1_element *e;
797 
798  printf("%*s", level * 4, "");
799  if (!(e = n->u.tag.element))
800  printf("Local tag: '%s'\n", n->u.tag.tag);
801  else
802  {
803  printf("Elm: '%s' ", e->name);
804  if (e->tag)
805  {
806  data1_tag *t = e->tag;
807 
808  printf("TagNam: '%s' ", t->names->name);
809  printf("(");
810  if (t->tagset)
811  printf("%s[%d],", t->tagset->name, t->tagset->type);
812  else
813  printf("?,");
814  if (t->which == DATA1T_numeric)
815  printf("%d)", t->value.numeric);
816  else
817  printf("'%s')", t->value.string);
818  }
819  printf("\n");
820  }
821  }
822  }
823 
824  if (n->which == DATA1N_tag)
825  {
826  index_termlist(sp, n, n, p, level, wrd);
827  /* index start tag */
828  if (n->root->u.root.absyn)
829  index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
830  1 /* is start */);
831  }
832 
833  if (n->child)
834  if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
835  return -1;
836 
837 
838  if (n->which == DATA1N_data)
839  {
840  data1_node *par = get_parent_tag(p->dh, n);
841 
842  if (p->flagShowRecords)
843  {
844  printf("%*s", level * 4, "");
845  printf("Data: ");
846  if (n->u.data.len > 256)
847  printf("'%.170s ... %.70s'\n", n->u.data.data,
848  n->u.data.data + n->u.data.len-70);
849  else if (n->u.data.len > 0)
850  printf("'%.*s'\n", n->u.data.len, n->u.data.data);
851  else
852  printf("NULL\n");
853  }
854 
855  if (par)
856  index_termlist(sp, par, n, p, level, wrd);
857 
858  index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
859  0 /* is start */);
860  }
861 
862  if (n->which == DATA1N_tag)
863  {
864  /* index end tag */
865  index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
866  0 /* is start */);
867  }
868 
869  if (p->flagShowRecords && n->which == DATA1N_root)
870  {
871  printf("%*s-------------\n\n", level * 4, "");
872  }
873  }
874  return 0;
875 }
876 
877 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
878 {
879  struct source_parser *sp = source_parser_create();
880  int r = dumpkeys_r(sp, n, p, 0, wrd);
882  return r;
883 }
884 
886 {
887  RecWord wrd;
888 
889  if (n->u.root.absyn && n->u.root.absyn->oid)
890  (*p->schemaAdd)(p, n->u.root.absyn->oid);
891  (*p->init)(p, &wrd);
892 
893  /* data1_pr_tree(p->dh, n, stdout); */
894 
895  return dumpkeys(n, p, &wrd);
896 }
897 
898 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
899  NMEM mem,
900  data1_node *(*grs_read)(struct grs_read_info *))
901 {
902  data1_node *n;
903  struct grs_read_info gri;
904  RecWord wrd;
905 
906  gri.stream = p->stream;
907  gri.mem = mem;
908  gri.dh = p->dh;
909  gri.clientData = clientData;
910 
911  n = (*grs_read)(&gri);
912  if (!n)
913  return RECCTRL_EXTRACT_EOF;
914  if (n->u.root.absyn && n->u.root.absyn->oid)
915  (*p->schemaAdd)(p, n->u.root.absyn->oid);
916  data1_concat_text(p->dh, mem, n);
917 
918  /* ensure our data1 tree is UTF-8 */
919  data1_iconv(p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
920 
921 
923 
924 #if 0
925  data1_pr_tree(p->dh, n, stdout);
926 #endif
927 
928  (*p->init)(p, &wrd);
929  if (dumpkeys(n, p, &wrd) < 0)
930  {
932  }
933  return RECCTRL_EXTRACT_OK;
934 }
935 
937  data1_node *(*grs_read)(struct grs_read_info *))
938 {
939  int ret;
940  NMEM mem = nmem_create();
941  ret = grs_extract_sub(clientData, p, mem, grs_read);
942  nmem_destroy(mem);
943  return ret;
944 }
945 
946 /*
947  * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
948  */
949 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
950  char **addinfo, ODR o)
951 {
952  data1_esetname *eset;
953  Z_Espec1 *espec = 0;
954  Z_ElementSpec *p;
955 
956  switch (c->which)
957  {
958  case Z_RecordComp_simple:
959  if (c->u.simple->which != Z_ElementSetNames_generic)
960  return 26; /* only generic form supported. Fix this later */
961  if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
962  c->u.simple->u.generic)))
963  {
964  yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
965  *addinfo = odr_strdup(o, c->u.simple->u.generic);
966  return 25; /* invalid esetname */
967  }
968  yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
969  c->u.simple->u.generic);
970  espec = eset->spec;
971  break;
972  case Z_RecordComp_complex:
973  if (c->u.complex->generic)
974  {
975  /* insert check for schema */
976  if ((p = c->u.complex->generic->elementSpec))
977  {
978  switch (p->which)
979  {
980  case Z_ElementSpec_elementSetName:
981  if (!(eset =
982  data1_getesetbyname(dh, n->u.root.absyn,
983  p->u.elementSetName)))
984  {
985  yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
986  p->u.elementSetName);
987  *addinfo = odr_strdup(o, p->u.elementSetName);
988  return 25; /* invalid esetname */
989  }
990  yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
991  p->u.elementSetName);
992  espec = eset->spec;
993  break;
994  case Z_ElementSpec_externalSpec:
995  if (p->u.externalSpec->which == Z_External_espec1)
996  {
997  yaz_log(YLOG_DEBUG, "Got Espec-1");
998  espec = p->u.externalSpec-> u.espec1;
999  }
1000  else
1001  {
1002  yaz_log(YLOG_LOG, "Unknown external espec.");
1003  return 25; /* bad. what is proper diagnostic? */
1004  }
1005  break;
1006  }
1007  }
1008  }
1009  else
1010  return 26; /* fix */
1011  }
1012  if (espec)
1013  {
1014  yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1015  return data1_doespec1(dh, n, espec);
1016  }
1017  else
1018  {
1019  yaz_log(YLOG_DEBUG, "Element: all match");
1020  return -1;
1021  }
1022 }
1023 
1024 /* Add Zebra info in separate namespace ...
1025  <root
1026  ...
1027  <metadata xmlns="http://www.indexdata.dk/zebra/">
1028  <size>359</size>
1029  <localnumber>447</localnumber>
1030  <filename>records/genera.xml</filename>
1031  </metadata>
1032  </root>
1033 */
1034 
1035 static void zebra_xml_metadata(struct recRetrieveCtrl *p, data1_node *top,
1036  NMEM mem)
1037 {
1038  const char *idzebra_ns[3];
1039  const char *i2 = "\n ";
1040  const char *i4 = "\n ";
1041  data1_node *n;
1042 
1043  idzebra_ns[0] = "xmlns";
1044  idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1045  idzebra_ns[2] = 0;
1046 
1047  data1_mk_text(p->dh, mem, i2, top);
1048 
1049  n = data1_mk_tag(p->dh, mem, "idzebra", idzebra_ns, top);
1050 
1051  data1_mk_text(p->dh, mem, "\n", top);
1052 
1053  data1_mk_text(p->dh, mem, i4, n);
1054 
1055  data1_mk_tag_data_int(p->dh, n, "size", p->recordSize, mem);
1056 
1057  if (p->score != -1)
1058  {
1059  data1_mk_text(p->dh, mem, i4, n);
1060  data1_mk_tag_data_int(p->dh, n, "score", p->score, mem);
1061  }
1062  data1_mk_text(p->dh, mem, i4, n);
1063  data1_mk_tag_data_zint(p->dh, n, "localnumber", p->localno, mem);
1064  if (p->fname)
1065  {
1066  data1_mk_text(p->dh, mem, i4, n);
1067  data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1068  }
1069  data1_mk_text(p->dh, mem, i2, n);
1070 }
1071 
1073  data1_node *(*grs_read)(struct grs_read_info *))
1074 {
1075  data1_node *node = 0, *onode = 0, *top;
1076  data1_node *dnew;
1077  data1_maptab *map;
1078  int res, selected = 0;
1079  NMEM mem;
1080  struct grs_read_info gri;
1081  const char *tagname;
1082 
1083  const Odr_oid *requested_schema = 0;
1084  data1_marctab *marctab;
1085  int dummy;
1086 
1087  mem = nmem_create();
1088  gri.stream = p->stream;
1089  gri.mem = mem;
1090  gri.dh = p->dh;
1091  gri.clientData = clientData;
1092 
1093  yaz_log(YLOG_DEBUG, "grs_retrieve");
1094  node = (*grs_read)(&gri);
1095  if (!node)
1096  {
1097  p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1098  nmem_destroy(mem);
1099  return 0;
1100  }
1101  data1_concat_text(p->dh, mem, node);
1102 
1103  data1_remove_idzebra_subtree(p->dh, node);
1104 
1105 #if 0
1106  data1_pr_tree(p->dh, node, stdout);
1107 #endif
1108  top = data1_get_root_tag(p->dh, node);
1109 
1110  yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1111  tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1112  if (tagname &&
1113  (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1114  {
1115  data1_set_data_zint(p->dh, dnew, mem, p->recordSize);
1116  }
1117 
1118  tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1119  if (tagname && p->score >= 0 &&
1120  (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1121  {
1122  yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1123  data1_set_data_zint(p->dh, dnew, mem, p->score);
1124  }
1125 
1126  tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1127  "localControlNumber");
1128  if (tagname && p->localno > 0 &&
1129  (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1130  {
1131  yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1132  data1_set_data_zint(p->dh, dnew, mem, p->localno);
1133  }
1134 
1135  if (!p->input_format)
1136  { /* SUTRS is default input_format */
1137  p->input_format = yaz_oid_recsyn_sutrs;
1138  }
1139  assert(p->input_format);
1140 
1141  if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1142  zebra_xml_metadata(p, top, mem);
1143 
1144 #if 0
1145  data1_pr_tree(p->dh, node, stdout);
1146 #endif
1147  if (p->comp && p->comp->which == Z_RecordComp_complex &&
1148  p->comp->u.complex->generic &&
1149  p->comp->u.complex->generic->which == Z_Schema_oid &&
1150  p->comp->u.complex->generic->schema.oid)
1151  {
1152  requested_schema = p->comp->u.complex->generic->schema.oid;
1153  }
1154  /* If schema has been specified, map if possible, then check that
1155  * we got the right one
1156  */
1157  if (requested_schema)
1158  {
1159  yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1160  for (map = node->u.root.absyn->maptabs; map; map = map->next)
1161  {
1162  if (!oid_oidcmp(map->oid, requested_schema))
1163  {
1164  onode = node;
1165  if (!(node = data1_map_record(p->dh, onode, map, mem)))
1166  {
1167  p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1168  nmem_destroy(mem);
1169  return 0;
1170  }
1171  break;
1172  }
1173  }
1174  if (node->u.root.absyn
1175  && oid_oidcmp(requested_schema, node->u.root.absyn->oid))
1176  {
1177  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1178  nmem_destroy(mem);
1179  return 0;
1180  }
1181  }
1182  /*
1183  * Does the requested format match a known syntax-mapping? (this reflects
1184  * the overlap of schema and formatting which is inherent in the MARC
1185  * family)
1186  */
1187  yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1188  if (node->u.root.absyn)
1189  for (map = node->u.root.absyn->maptabs; map; map = map->next)
1190  {
1191  if (!oid_oidcmp(map->oid, p->input_format))
1192  {
1193  onode = node;
1194  if (!(node = data1_map_record(p->dh, onode, map, mem)))
1195  {
1196  p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1197  nmem_destroy(mem);
1198  return 0;
1199  }
1200  break;
1201  }
1202  }
1203  yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1204  if (node->u.root.absyn && node->u.root.absyn->oid
1205  && !oid_oidcmp(p->input_format, yaz_oid_recsyn_grs_1))
1206  {
1207  char oid_str[OID_STR_MAX];
1208  char *dot_str = oid_oid_to_dotstring(node->u.root.absyn->oid, oid_str);
1209 
1210  if (dot_str && (dnew = data1_mk_tag_data_wd(p->dh, top,
1211  "schemaIdentifier", mem)))
1212  {
1213  dnew->u.data.what = DATA1I_oid;
1214  dnew->u.data.data = (char *) nmem_strdup(mem, dot_str);
1215  dnew->u.data.len = strlen(dot_str);
1216  }
1217  }
1218 
1219  yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1220  if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1221  p->odr)) > 0)
1222  {
1223  p->diagnostic = res;
1224  nmem_destroy(mem);
1225  return 0;
1226  }
1227  else if (p->comp && !res)
1228  selected = 1;
1229 
1230 #if 0
1231  data1_pr_tree(p->dh, node, stdout);
1232 #endif
1233  yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1234 
1235  p->output_format = p->input_format;
1236 
1237  assert(p->input_format);
1238  if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
1239  {
1240 #if 0
1241  data1_pr_tree(p->dh, node, stdout);
1242 #endif
1243  /* default output encoding for XML is UTF-8 */
1244  data1_iconv(p->dh, mem, node,
1245  p->encoding ? p->encoding : "UTF-8",
1246  data1_get_encoding(p->dh, node));
1247 
1248  if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1249  &p->rec_len)))
1250  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1251  else
1252  {
1253  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1254  memcpy(new_buf, p->rec_buf, p->rec_len);
1255  p->rec_buf = new_buf;
1256  }
1257  }
1258  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_grs_1))
1259  {
1260  data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1261  dummy = 0;
1262  if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1263  p->odr, &dummy)))
1264  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1265  else
1266  p->rec_len = -1;
1267  }
1268  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_explain))
1269  {
1270  /* ensure our data1 tree is UTF-8 */
1271  data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1272 
1273  if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1274  p->odr)))
1275  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1276  else
1277  p->rec_len = -1;
1278  }
1279  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_summary))
1280  {
1281  /* ensure our data1 tree is UTF-8 */
1282  data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1283  if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1284  p->odr)))
1285  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1286  else
1287  p->rec_len = -1;
1288  }
1289  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_sutrs))
1290  {
1291  if (p->encoding)
1292  data1_iconv(p->dh, mem, node, p->encoding,
1293  data1_get_encoding(p->dh, node));
1294  if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1295  &p->rec_len)))
1296  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1297  else
1298  {
1299  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1300  memcpy(new_buf, p->rec_buf, p->rec_len);
1301  p->rec_buf = new_buf;
1302  }
1303  }
1304  else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_soif))
1305  {
1306  if (p->encoding)
1307  data1_iconv(p->dh, mem, node, p->encoding,
1308  data1_get_encoding(p->dh, node));
1309  if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1310  &p->rec_len)))
1311  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1312  else
1313  {
1314  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1315  memcpy(new_buf, p->rec_buf, p->rec_len);
1316  p->rec_buf = new_buf;
1317  }
1318  }
1319  else
1320  {
1321  if (!node->u.root.absyn)
1322  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1323  else
1324  {
1325  for (marctab = node->u.root.absyn->marc; marctab;
1326  marctab = marctab->next)
1327  if (marctab->oid && !oid_oidcmp(marctab->oid, p->input_format))
1328  break;
1329  if (!marctab)
1330  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1331  else
1332  {
1333  if (p->encoding)
1334  data1_iconv(p->dh, mem, node, p->encoding,
1335  data1_get_encoding(p->dh, node));
1336  if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1337  selected, &p->rec_len)))
1338  p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX;
1339  else
1340  {
1341  char *new_buf = (char*) odr_malloc(p->odr, p->rec_len);
1342  memcpy(new_buf, p->rec_buf, p->rec_len);
1343  p->rec_buf = new_buf;
1344  }
1345  }
1346  }
1347  }
1348  nmem_destroy(mem);
1349  return 0;
1350 }
1351 
1352 /*
1353  * Local variables:
1354  * c-basic-offset: 4
1355  * c-file-style: "Stroustrup"
1356  * indent-tabs-mode: nil
1357  * End:
1358  * vim: shiftwidth=4 tabstop=8 expandtab
1359  */
1360 
data1_node * data1_mk_tag(data1_handle dh, NMEM nmem, const char *tag, const char **attr, data1_node *at)
Definition: d1_read.c:295
int data1_iconv(data1_handle dh, NMEM m, data1_node *n, const char *tocode, const char *fromcode)
Definition: d1_read.c:1058
data1_node * data1_mk_tag_data_text(data1_handle dh, data1_node *at, const char *tag, const char *str, NMEM nmem)
Definition: d1_read.c:526
void data1_concat_text(data1_handle dh, NMEM m, data1_node *n)
Definition: d1_read.c:1107
data1_node * data1_mk_tag_data_wd(data1_handle dh, data1_node *at, const char *tagname, NMEM m)
Definition: d1_read.c:461
char * data1_nodetosoif(data1_handle dh, data1_node *n, int select, int *len)
Definition: d1_soif.c:74
char * data1_nodetoidsgml(data1_handle dh, data1_node *n, int select, int *len)
Definition: d1_write.c:230
#define DATA1I_oid
Definition: data1.h:318
void data1_set_data_zint(data1_handle dh, data1_node *res, NMEM m, zint num)
Definition: d1_read.c:403
data1_node * data1_get_root_tag(data1_handle dh, data1_node *n)
Definition: d1_read.c:36
void data1_pr_tree(data1_handle dh, data1_node *n, FILE *out)
Definition: d1_prtree.c:134
#define DATA1N_tag
Definition: data1.h:276
#define DATA1N_data
Definition: data1.h:278
#define DATA1N_root
Definition: data1.h:274
Z_ExplainRecord * data1_nodetoexplain(data1_handle dh, data1_node *n, int select, ODR o)
Definition: d1_expout.c:1352
char * data1_nodetomarc(data1_handle dh, data1_marctab *p, data1_node *n, int selected, int *len)
Definition: d1_marc.c:469
char * data1_nodetobuf(data1_handle dh, data1_node *n, int select, int *len)
Definition: d1_sutrs.c:136
data1_esetname * data1_getesetbyname(data1_handle dh, data1_absyn *a, const char *name)
Definition: d1_absyn.c:298
Z_BriefBib * data1_nodetosummary(data1_handle dh, data1_node *n, int select, ODR o)
Definition: d1_sumout.c:55
data1_node * get_parent_tag(data1_handle dh, data1_node *n)
Definition: d1_read.c:53
data1_node * data1_mk_tag_data_zint(data1_handle dh, data1_node *at, const char *tag, zint num, NMEM nmem)
Definition: d1_read.c:481
data1_node * data1_map_record(data1_handle dh, data1_node *n, data1_maptab *map, NMEM m)
Definition: d1_map.c:322
Z_GenericRecord * data1_nodetogr(data1_handle dh, data1_node *n, int select, ODR o, int *len)
Definition: d1_grs.c:376
int data1_doespec1(data1_handle dh, data1_node *n, Z_Espec1 *e)
Definition: d1_doespec.c:356
@ DATA1_XPATH_INDEXING_ENABLE
Definition: data1.h:349
void data1_remove_idzebra_subtree(data1_handle dh, data1_node *n)
Definition: d1_utils.c:64
const char * data1_get_encoding(data1_handle dh, data1_node *n)
Definition: d1_read.c:1039
const char * data1_systag_lookup(data1_absyn *absyn, const char *tag, const char *default_value)
Definition: d1_absyn.c:610
data1_node * data1_mk_text(data1_handle dh, NMEM mem, const char *buf, data1_node *parent)
Definition: d1_read.c:347
#define DATA1T_numeric
Definition: data1.h:204
data1_node * data1_mk_tag_data_int(data1_handle dh, data1_node *at, const char *tag, int num, NMEM nmem)
Definition: d1_read.c:494
#define ZEBRA_XPATH_ELM_END
Definition: recctrl.h:36
#define ZEBRA_XPATH_ATTR_CDATA
Definition: recctrl.h:45
#define RECCTRL_EXTRACT_EOF
Definition: recctrl.h:164
#define ZEBRA_XPATH_CDATA
Definition: recctrl.h:39
#define ZEBRA_XPATH_ELM_BEGIN
Definition: recctrl.h:33
#define ZEBRA_XPATH_ATTR_NAME
Definition: recctrl.h:42
#define RECCTRL_EXTRACT_ERROR_GENERIC
Definition: recctrl.h:165
#define RECCTRL_EXTRACT_OK
Definition: recctrl.h:163
static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
Definition: recgrs.c:171
static void index_termlist(struct source_parser *sp, data1_node *par, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd)
Definition: recgrs.c:732
static int dumpkeys_r(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd)
Definition: recgrs.c:781
#define MAX_ATTR_COUNT
int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
Definition: recgrs.c:286
static void source_parser_destroy(struct source_parser *sp)
Definition: recgrs.c:265
static int dfa_match_first(struct DFA_state **dfaar, const char *text)
Definition: recgrs.c:348
static struct source_parser * source_parser_create(void)
Definition: recgrs.c:257
static int sp_lex(struct source_parser *sp)
Definition: recgrs.c:47
static int sp_parse(struct source_parser *sp, data1_node *n, RecWord *wrd, const char *src)
Definition: recgrs.c:273
data1_termlist * xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
Definition: recgrs.c:396
static void index_xpath_attr(char *tag_path, char *name, char *value, char *structure, struct recExtractCtrl *p, RecWord *wrd)
Definition: recgrs.c:485
static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
Definition: recgrs.c:510
static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c, char **addinfo, ODR o)
Definition: recgrs.c:949
static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
Definition: recgrs.c:71
int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:1072
static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
Definition: recgrs.c:877
int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:936
static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
Definition: recgrs.c:128
static void zebra_xml_metadata(struct recRetrieveCtrl *p, data1_node *top, NMEM mem)
Definition: recgrs.c:1035
static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, NMEM mem, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:898
static void index_xpath(struct source_parser *sp, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd, char *xpath_index, int xpath_is_start)
Definition: recgrs.c:535
int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
Definition: recgrs.c:885
Definition: dfa.h:42
short rule_no
Definition: dfa.h:49
short tran_no
Definition: dfa.h:48
struct DFA_tran * trans
Definition: dfa.h:45
Definition: dfa.h:30
unsigned short to
Definition: dfa.h:32
unsigned char ch[2]
Definition: dfa.h:31
struct DFA_state ** states
Definition: dfa.h:55
const char * term_buf
Definition: recctrl.h:56
const char * index_type
Definition: recctrl.h:52
zint seqno
Definition: recctrl.h:60
int term_len
Definition: recctrl.h:58
const char * index_name
Definition: recctrl.h:54
struct data1_xpelement * xp_elements
Definition: d1_absyn.h:58
data1_tag * tag
Definition: data1.h:245
char * name
Definition: data1.h:244
Z_Espec1 * spec
Definition: data1.h:162
Odr_oid * oid
Definition: data1.h:105
struct data1_maptab * next
Definition: data1.h:108
struct data1_marctab * next
Definition: data1.h:156
Odr_oid * oid
Definition: data1.h:140
char * name
Definition: data1.h:114
struct data1_node::@2::@3 root
struct data1_node * parent
Definition: data1.h:343
struct data1_node * child
Definition: data1.h:341
char * tag
Definition: data1.h:296
char * data
Definition: data1.h:307
struct data1_node * next
Definition: data1.h:340
union data1_node::@2 u
int which
Definition: data1.h:285
int which
Definition: data1.h:206
int numeric
Definition: data1.h:209
struct data1_tagset * tagset
Definition: data1.h:214
data1_name * names
Definition: data1.h:203
union data1_tag::@1 value
char * string
Definition: data1.h:210
char * name
Definition: data1.h:223
int type
Definition: data1.h:222
char * structure
Definition: data1.h:233
struct data1_termlist * next
Definition: data1.h:235
char * index_name
Definition: data1.h:232
char * source
Definition: data1.h:234
char * value
Definition: data1.h:261
char * name
Definition: data1.h:260
struct data1_xattr * next
Definition: data1.h:262
data1_termlist * termlists
Definition: d1_absyn.h:38
struct xpath_location_step xpath[XPATH_STEP_COUNT]
Definition: d1_absyn.h:34
struct data1_xpelement * match_next
Definition: d1_absyn.h:44
struct DFA * dfa
Definition: d1_absyn.h:37
struct data1_xpelement * next
Definition: d1_absyn.h:39
data1_handle dh
Definition: recgrs.h:31
struct ZebraRecStream * stream
Definition: recgrs.h:28
void * clientData
Definition: recgrs.h:29
NMEM mem
Definition: recgrs.h:30
record extract for indexing
Definition: recctrl.h:101
int flagShowRecords
Definition: recctrl.h:108
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition: recctrl.h:103
void(* tokenAdd)(RecWord *w)
Definition: recctrl.h:105
void(* schemaAdd)(struct recExtractCtrl *p, Odr_oid *oid)
Definition: recctrl.h:111
data1_handle dh
Definition: recctrl.h:112
struct ZebraRecStream * stream
Definition: recctrl.h:102
const Odr_oid * input_format
Definition: recctrl.h:123
data1_handle dh
Definition: recctrl.h:131
char * addinfo
Definition: recctrl.h:138
Z_RecordComposition * comp
Definition: recctrl.h:124
struct ZebraRecStream * stream
Definition: recctrl.h:119
const Odr_oid * output_format
Definition: recctrl.h:134
char * encoding
Definition: recctrl.h:125
char * fname
Definition: recctrl.h:130
void * rec_buf
Definition: recctrl.h:135
const char * src
Definition: recgrs.c:42
int lookahead
Definition: recgrs.c:43
NMEM nmem
Definition: recgrs.c:44
const char * tok
Definition: recgrs.c:41
struct xpath_predicate * predicate
Definition: zebra_xpath.h:46
union xpath_predicate::@8 u
struct xpath_predicate::@8::@9 relation
struct xpath_predicate::@8::@10 boolean
long zint
Zebra integer.
Definition: util.h:66
#define XPATH_PREDICATE_BOOLEAN
Definition: zebra_xpath.h:35
#define XPATH_PREDICATE_RELATION
Definition: zebra_xpath.h:29