IDZEBRA  2.1.2
rpnsearch.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32 
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39 
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42 
43 #define TERMSET_DISABLE 1
44 
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47  struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48  const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50  if (out && *out)
51  {
52  const char *outp = *out;
53  yaz_log(YLOG_LOG, "---");
54  while (*outp)
55  {
56  yaz_log(YLOG_LOG, "%02X", *outp);
57  outp++;
58  }
59  }
60 #endif
61  return out;
62 }
63 
65  struct rpn_char_map_info *map_info)
66 {
67  map_info->zm = zm;
68  if (zebra_maps_is_icu(zm))
69  dict_grep_cmap(reg->dict, 0, 0);
70  else
71  dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73 
74 #define TERM_COUNT
75 
76 struct grep_info {
77 #ifdef TERM_COUNT
78  int *term_no;
79 #endif
83  int trunc_max;
85  const char *index_type;
87 };
88 
89 static int add_isam_p(const char *name, const char *info,
90  struct grep_info *p)
91 {
92  if (!log_level_set)
93  {
94  log_level_rpn = yaz_log_module_level("rpn");
95  log_level_set = 1;
96  }
97  /* we may have to stop this madness.. NOTE: -1 so that if
98  truncmax == trunxlimit we do *not* generate result sets */
99  if (p->isam_p_indx >= p->trunc_max - 1)
100  return 1;
101 
102  if (p->isam_p_indx == p->isam_p_size)
103  {
104  ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106  int *new_term_no;
107 #endif
108  p->isam_p_size = 2*p->isam_p_size + 100;
109  new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110  p->isam_p_size);
111  if (p->isam_p_buf)
112  {
113  memcpy(new_isam_p_buf, p->isam_p_buf,
114  p->isam_p_indx * sizeof(*p->isam_p_buf));
115  xfree(p->isam_p_buf);
116  }
117  p->isam_p_buf = new_isam_p_buf;
118 
119 #ifdef TERM_COUNT
120  new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121  if (p->term_no)
122  {
123  memcpy(new_term_no, p->isam_p_buf,
124  p->isam_p_indx * sizeof(*p->term_no));
125  xfree(p->term_no);
126  }
127  p->term_no = new_term_no;
128 #endif
129  }
130  assert(*info == sizeof(*p->isam_p_buf));
131  memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132 
133  if (p->termset)
134  {
135  const char *db;
136  char term_tmp[IT_MAX_WORD];
137  int ord = 0;
138  const char *index_name;
139  int len = key_SU_decode(&ord, (const unsigned char *) name);
140 
141  zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142  yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
144  ord, 0 /* index_type */, &db, &index_name);
145  yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
146 
147  resultSetAddTerm(p->zh, p->termset, name[len], db,
148  index_name, term_tmp);
149  }
150  (p->isam_p_indx)++;
151  return 0;
152 }
153 
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156  return add_isam_p(name, info, (struct grep_info *) p);
157 }
158 
159 static int term_pre(zebra_map_t zm, const char **src,
160  const char *ct1, int first)
161 {
162  const char *s1, *s0 = *src;
163  const char **map;
164 
165  /* skip white space */
166  while (*s0)
167  {
168  if (ct1 && strchr(ct1, *s0))
169  break;
170  s1 = s0;
171  map = zebra_maps_input(zm, &s1, strlen(s1), first);
172  if (**map != *CHR_SPACE)
173  break;
174  s0 = s1;
175  }
176  *src = s0;
177  return *s0;
178 }
179 
180 
181 static void esc_str(char *out_buf, size_t out_size,
182  const char *in_buf, int in_size)
183 {
184  int k;
185 
186  assert(out_buf);
187  assert(in_buf);
188  assert(out_size > 20);
189  *out_buf = '\0';
190  for (k = 0; k < in_size; k++)
191  {
192  int c = in_buf[k] & 0xff;
193  int pc;
194  if (c < 32 || c > 126)
195  pc = '?';
196  else
197  pc = c;
198  sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199  if (strlen(out_buf) > out_size-20)
200  {
201  strcat(out_buf, "..");
202  break;
203  }
204  }
205 }
206 
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208 
209 static void add_non_space(const char *start, const char *end,
210  WRBUF term_dict,
211  WRBUF display_term,
212  const char **map, int q_map_match)
213 {
214  size_t sz = end - start;
215 
216  wrbuf_write(display_term, start, sz);
217  if (!q_map_match)
218  {
219  while (start < end)
220  {
221  if (strchr(REGEX_CHARS, *start))
222  wrbuf_putc(term_dict, '\\');
223  wrbuf_putc(term_dict, *start);
224  start++;
225  }
226  }
227  else
228  {
229  char tmpbuf[80];
230  esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 
232  wrbuf_puts(term_dict, map[0]);
233  }
234 }
235 
236 
237 /* ICU sort keys seem to be of the form
238  basechars \x01 accents \x01 length
239  For now we'll just right truncate from basechars . This
240  may give false hits due to accents not being used.
241 */
242 static size_t icu_basechars(const char *buf, size_t i)
243 {
244  while (i > 0 && buf[--i] != '\x01') /* skip length */
245  ;
246  while (i > 0 && buf[--i] != '\x01') /* skip accents */
247  ;
248  return i; /* only basechars left */
249 }
250 
251 static int term_102_icu(zebra_map_t zm,
252  const char **src, WRBUF term_dict, int space_split,
253  WRBUF display_term)
254 {
255  int no_terms = 0;
256  const char *s0 = *src, *s1;
257  while (*s0 == ' ')
258  s0++;
259  s1 = s0;
260  for (;;)
261  {
262  if (*s1 == ' ' && space_split)
263  break;
264  else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
265  s1++;
266  else
267  {
268  /* EOF or regex reserved char */
269  if (s0 != s1)
270  {
271  const char *res_buf = 0;
272  size_t res_len = 0;
273  const char *display_buf;
274  size_t display_len;
275 
276  zebra_map_tokenize_start(zm, s0, s1 - s0);
277 
278  if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279  &display_buf, &display_len))
280  {
281  size_t i;
282  res_len = icu_basechars(res_buf, res_len);
283  for (i = 0; i < res_len; i++)
284  {
285  if (strchr(REGEX_CHARS "\\", res_buf[i]))
286  wrbuf_putc(term_dict, '\\');
287  if (res_buf[i] < 32)
288  wrbuf_putc(term_dict, '\x01');
289 
290  wrbuf_putc(term_dict, res_buf[i]);
291  }
292  wrbuf_write(display_term, display_buf, display_len);
293 
294  no_terms++;
295  }
296  }
297  if (*s1 == '\0')
298  break;
299 
300  wrbuf_putc(term_dict, *s1);
301  wrbuf_putc(display_term, *s1);
302 
303  s1++;
304  s0 = s1;
305  }
306  }
307  if (no_terms)
308  wrbuf_puts(term_dict, "\x01\x01.*");
309  *src = s1;
310  return no_terms;
311 }
312 
313 static int term_100_icu(zebra_map_t zm,
314  const char **src, WRBUF term_dict,
315  WRBUF display_term,
316  int mode,
317  size_t token_number)
318 {
319  size_t i;
320  const char *res_buf = 0;
321  size_t res_len = 0;
322  const char *display_buf;
323  size_t display_len;
324 
325  zebra_map_tokenize_start(zm, *src, strlen(*src));
326  for (i = 0; i <= token_number; i++)
327  {
328  if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
329  &display_buf, &display_len))
330  return 0;
331  }
332  wrbuf_write(display_term, display_buf, display_len);
333  if (mode)
334  {
335  res_len = icu_basechars(res_buf, res_len);
336  }
337  if (mode & 2)
338  wrbuf_puts(term_dict, ".*");
339  for (i = 0; i < res_len; i++)
340  {
341  if (strchr(REGEX_CHARS "\\", res_buf[i]))
342  wrbuf_putc(term_dict, '\\');
343  if (res_buf[i] < 32)
344  wrbuf_putc(term_dict, '\x01');
345 
346  wrbuf_putc(term_dict, res_buf[i]);
347  }
348  if (mode & 1)
349  wrbuf_puts(term_dict, ".*");
350  else if (mode)
351  wrbuf_puts(term_dict, "\x01\x01.*");
352  return 1;
353 }
354 
355 /* term_100: handle term, where trunc = none(no operators at all) */
356 static int term_100(zebra_map_t zm,
357  const char **src, WRBUF term_dict, int space_split,
358  WRBUF display_term)
359 {
360  const char *s0;
361  const char **map;
362  int i = 0;
363 
364  const char *space_start = 0;
365  const char *space_end = 0;
366 
367  if (!term_pre(zm, src, 0, !space_split))
368  return 0;
369  s0 = *src;
370  while (*s0)
371  {
372  const char *s1 = s0;
373  int q_map_match = 0;
374  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
375  if (space_split)
376  {
377  if (**map == *CHR_SPACE)
378  break;
379  }
380  else /* complete subfield only. */
381  {
382  if (**map == *CHR_SPACE)
383  { /* save space mapping for later .. */
384  space_start = s1;
385  space_end = s0;
386  continue;
387  }
388  else if (space_start)
389  { /* reload last space */
390  while (space_start < space_end)
391  {
392  if (strchr(REGEX_CHARS, *space_start))
393  wrbuf_putc(term_dict, '\\');
394  wrbuf_putc(display_term, *space_start);
395  wrbuf_putc(term_dict, *space_start);
396  space_start++;
397 
398  }
399  /* and reset */
400  space_start = space_end = 0;
401  }
402  }
403  i++;
404 
405  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
406  }
407  *src = s0;
408  return i;
409 }
410 
411 /* term_101: handle term, where trunc = Process # */
412 static int term_101(zebra_map_t zm,
413  const char **src, WRBUF term_dict, int space_split,
414  WRBUF display_term)
415 {
416  const char *s0;
417  const char **map;
418  int i = 0;
419 
420  if (!term_pre(zm, src, "#", !space_split))
421  return 0;
422  s0 = *src;
423  while (*s0)
424  {
425  if (*s0 == '#')
426  {
427  i++;
428  wrbuf_puts(term_dict, ".*");
429  wrbuf_putc(display_term, *s0);
430  s0++;
431  }
432  else
433  {
434  const char *s1 = s0;
435  int q_map_match = 0;
436  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
437  if (space_split && **map == *CHR_SPACE)
438  break;
439 
440  i++;
441  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
442  }
443  }
444  *src = s0;
445  return i;
446 }
447 
448 /* term_103: handle term, where trunc = re-2 (regular expressions) */
449 static int term_103(zebra_map_t zm, const char **src,
450  WRBUF term_dict, int *errors, int space_split,
451  WRBUF display_term)
452 {
453  int i = 0;
454  const char *s0;
455  const char **map;
456 
457  if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
458  return 0;
459  s0 = *src;
460  if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
461  isdigit(((const unsigned char *)s0)[1]))
462  {
463  *errors = s0[1] - '0';
464  s0 += 3;
465  if (*errors > 3)
466  *errors = 3;
467  }
468  while (*s0)
469  {
470  if (strchr("^\\()[].*+?|-", *s0))
471  {
472  wrbuf_putc(display_term, *s0);
473  wrbuf_putc(term_dict, *s0);
474  s0++;
475  i++;
476  }
477  else
478  {
479  const char *s1 = s0;
480  int q_map_match = 0;
481  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
482  if (space_split && **map == *CHR_SPACE)
483  break;
484 
485  i++;
486  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
487  }
488  }
489  *src = s0;
490 
491  return i;
492 }
493 
494 /* term_103: handle term, where trunc = re-1 (regular expressions) */
495 static int term_102(zebra_map_t zm, const char **src,
496  WRBUF term_dict, int space_split, WRBUF display_term)
497 {
498  return term_103(zm, src, term_dict, NULL, space_split, display_term);
499 }
500 
501 
502 /* term_104: handle term, process ?n * # */
503 static int term_104(zebra_map_t zm, const char **src,
504  WRBUF term_dict, int space_split, WRBUF display_term)
505 {
506  const char *s0;
507  const char **map;
508  int i = 0;
509 
510  if (!term_pre(zm, src, "?*#", !space_split))
511  return 0;
512  s0 = *src;
513  while (*s0)
514  {
515  if (*s0 == '?')
516  {
517  i++;
518  wrbuf_putc(display_term, *s0);
519  s0++;
520  if (*s0 >= '0' && *s0 <= '9')
521  {
522  int limit = 0;
523  while (*s0 >= '0' && *s0 <= '9')
524  {
525  limit = limit * 10 + (*s0 - '0');
526  wrbuf_putc(display_term, *s0);
527  s0++;
528  }
529  if (limit > 20)
530  limit = 20;
531  while (--limit >= 0)
532  {
533  wrbuf_puts(term_dict, ".?");
534  }
535  }
536  else
537  {
538  wrbuf_puts(term_dict, ".*");
539  }
540  }
541  else if (*s0 == '*')
542  {
543  i++;
544  wrbuf_puts(term_dict, ".*");
545  wrbuf_putc(display_term, *s0);
546  s0++;
547  }
548  else if (*s0 == '#')
549  {
550  i++;
551  wrbuf_puts(term_dict, ".");
552  wrbuf_putc(display_term, *s0);
553  s0++;
554  }
555  else
556  {
557  const char *s1 = s0;
558  int q_map_match = 0;
559  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
560  if (space_split && **map == *CHR_SPACE)
561  break;
562 
563  i++;
564  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
565  }
566  }
567  *src = s0;
568  return i;
569 }
570 
571 /* term_105/106: handle term, process * ! and possibly right_truncate */
572 static int term_105(zebra_map_t zm, const char **src,
573  WRBUF term_dict, int space_split,
574  WRBUF display_term, int right_truncate)
575 {
576  const char *s0;
577  const char **map;
578  int i = 0;
579 
580  if (!term_pre(zm, src, "\\*!", !space_split))
581  return 0;
582  s0 = *src;
583  while (*s0)
584  {
585  if (*s0 == '*')
586  {
587  i++;
588  wrbuf_puts(term_dict, ".*");
589  wrbuf_putc(display_term, *s0);
590  s0++;
591  }
592  else if (*s0 == '!')
593  {
594  i++;
595  wrbuf_putc(term_dict, '.');
596  wrbuf_putc(display_term, *s0);
597  s0++;
598  }
599  else if (*s0 == '\\')
600  {
601  i++;
602  wrbuf_puts(term_dict, "\\\\");
603  wrbuf_putc(display_term, *s0);
604  s0++;
605  }
606  else
607  {
608  const char *s1 = s0;
609  int q_map_match = 0;
610  map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
611  if (space_split && **map == *CHR_SPACE)
612  break;
613 
614  i++;
615  add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
616  }
617  }
618  if (right_truncate)
619  wrbuf_puts(term_dict, ".*");
620  *src = s0;
621  return i;
622 }
623 
624 
625 /* gen_regular_rel - generate regular expression from relation
626  * val: border value (inclusive)
627  * islt: 1 if <=; 0 if >=.
628  */
629 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
630 {
631  char dst_buf[20*5*20]; /* assuming enough for expansion */
632  char *dst = dst_buf;
633  int dst_p;
634  int w, d, i;
635  int pos = 0;
636  char numstr[20];
637 
638  yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
639  if (val >= 0)
640  {
641  if (islt)
642  strcpy(dst, "(-[0-9]+|(");
643  else
644  strcpy(dst, "((");
645  }
646  else
647  {
648  if (!islt)
649  {
650  strcpy(dst, "([0-9]+|-(");
651  islt = 1;
652  }
653  else
654  {
655  strcpy(dst, "(-(");
656  islt = 0;
657  }
658  val = -val;
659  }
660  dst_p = strlen(dst);
661  sprintf(numstr, "%d", val);
662  for (w = strlen(numstr); --w >= 0; pos++)
663  {
664  d = numstr[w];
665  if (pos > 0)
666  {
667  if (islt)
668  {
669  if (d == '0')
670  continue;
671  d--;
672  }
673  else
674  {
675  if (d == '9')
676  continue;
677  d++;
678  }
679  }
680 
681  strcpy(dst + dst_p, numstr);
682  dst_p = strlen(dst) - pos - 1;
683 
684  if (islt)
685  {
686  if (d != '0')
687  {
688  dst[dst_p++] = '[';
689  dst[dst_p++] = '0';
690  dst[dst_p++] = '-';
691  dst[dst_p++] = d;
692  dst[dst_p++] = ']';
693  }
694  else
695  dst[dst_p++] = d;
696  }
697  else
698  {
699  if (d != '9')
700  {
701  dst[dst_p++] = '[';
702  dst[dst_p++] = d;
703  dst[dst_p++] = '-';
704  dst[dst_p++] = '9';
705  dst[dst_p++] = ']';
706  }
707  else
708  dst[dst_p++] = d;
709  }
710  for (i = 0; i < pos; i++)
711  {
712  dst[dst_p++] = '[';
713  dst[dst_p++] = '0';
714  dst[dst_p++] = '-';
715  dst[dst_p++] = '9';
716  dst[dst_p++] = ']';
717  }
718  dst[dst_p++] = '|';
719  }
720  dst[dst_p] = '\0';
721  if (islt)
722  {
723  /* match everything less than 10^(pos-1) */
724  strcat(dst, "0*");
725  for (i = 1; i < pos; i++)
726  strcat(dst, "[0-9]?");
727  }
728  else
729  {
730  /* match everything greater than 10^pos */
731  for (i = 0; i <= pos; i++)
732  strcat(dst, "[0-9]");
733  strcat(dst, "[0-9]*");
734  }
735  strcat(dst, "))");
736  wrbuf_puts(term_dict, dst);
737 }
738 
739 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
740 {
741  const char *src = wrbuf_cstr(wsrc);
742  if (src[*indx] == '\\')
743  {
744  wrbuf_putc(term_p, src[*indx]);
745  (*indx)++;
746  }
747  wrbuf_putc(term_p, src[*indx]);
748  (*indx)++;
749 }
750 
751 /*
752  * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
753  * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
754  * >= abc ([b-].*|a[c-].*|ab[c-].*)
755  * ([^-a].*|a[^-b].*|ab[c-].*)
756  * < abc ([-0].*|a[-a].*|ab[-b].*)
757  * ([^a-].*|a[^b-].*|ab[^c-].*)
758  * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
759  * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
760  */
761 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
762  const char **term_sub, WRBUF term_dict,
763  const Odr_oid *attributeSet,
764  zebra_map_t zm, int space_split,
765  WRBUF display_term,
766  int *error_code)
767 {
768  AttrType relation;
769  int relation_value;
770  int i;
771  WRBUF term_component = wrbuf_alloc();
772 
773  attr_init_APT(&relation, zapt, 2);
774  relation_value = attr_find(&relation, NULL);
775 
776  *error_code = 0;
777  yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
778  switch (relation_value)
779  {
780  case 1:
781  if (!term_100(zm, term_sub, term_component, space_split, display_term))
782  {
783  wrbuf_destroy(term_component);
784  return 0;
785  }
786  yaz_log(log_level_rpn, "Relation <");
787 
788  wrbuf_putc(term_dict, '(');
789  for (i = 0; i < wrbuf_len(term_component); )
790  {
791  int j = 0;
792 
793  if (i)
794  wrbuf_putc(term_dict, '|');
795  while (j < i)
796  string_rel_add_char(term_dict, term_component, &j);
797 
798  wrbuf_putc(term_dict, '[');
799 
800  wrbuf_putc(term_dict, '^');
801 
802  wrbuf_putc(term_dict, 1);
803  wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
804 
805  string_rel_add_char(term_dict, term_component, &i);
806  wrbuf_putc(term_dict, '-');
807 
808  wrbuf_putc(term_dict, ']');
809  wrbuf_putc(term_dict, '.');
810  wrbuf_putc(term_dict, '*');
811  }
812  wrbuf_putc(term_dict, ')');
813  break;
814  case 2:
815  if (!term_100(zm, term_sub, term_component, space_split, display_term))
816  {
817  wrbuf_destroy(term_component);
818  return 0;
819  }
820  yaz_log(log_level_rpn, "Relation <=");
821 
822  wrbuf_putc(term_dict, '(');
823  for (i = 0; i < wrbuf_len(term_component); )
824  {
825  int j = 0;
826 
827  while (j < i)
828  string_rel_add_char(term_dict, term_component, &j);
829  wrbuf_putc(term_dict, '[');
830 
831  wrbuf_putc(term_dict, '^');
832 
833  wrbuf_putc(term_dict, 1);
834  wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
835 
836  string_rel_add_char(term_dict, term_component, &i);
837  wrbuf_putc(term_dict, '-');
838 
839  wrbuf_putc(term_dict, ']');
840  wrbuf_putc(term_dict, '.');
841  wrbuf_putc(term_dict, '*');
842 
843  wrbuf_putc(term_dict, '|');
844  }
845  for (i = 0; i < wrbuf_len(term_component); )
846  string_rel_add_char(term_dict, term_component, &i);
847  wrbuf_putc(term_dict, ')');
848  break;
849  case 5:
850  if (!term_100(zm, term_sub, term_component, space_split, display_term))
851  {
852  wrbuf_destroy(term_component);
853  return 0;
854  }
855  yaz_log(log_level_rpn, "Relation >");
856 
857  wrbuf_putc(term_dict, '(');
858  for (i = 0; i < wrbuf_len(term_component); )
859  {
860  int j = 0;
861 
862  while (j < i)
863  string_rel_add_char(term_dict, term_component, &j);
864  wrbuf_putc(term_dict, '[');
865 
866  wrbuf_putc(term_dict, '^');
867  wrbuf_putc(term_dict, '-');
868  string_rel_add_char(term_dict, term_component, &i);
869 
870  wrbuf_putc(term_dict, ']');
871  wrbuf_putc(term_dict, '.');
872  wrbuf_putc(term_dict, '*');
873 
874  wrbuf_putc(term_dict, '|');
875  }
876  for (i = 0; i < wrbuf_len(term_component); )
877  string_rel_add_char(term_dict, term_component, &i);
878  wrbuf_putc(term_dict, '.');
879  wrbuf_putc(term_dict, '+');
880  wrbuf_putc(term_dict, ')');
881  break;
882  case 4:
883  if (!term_100(zm, term_sub, term_component, space_split, display_term))
884  {
885  wrbuf_destroy(term_component);
886  return 0;
887  }
888  yaz_log(log_level_rpn, "Relation >=");
889 
890  wrbuf_putc(term_dict, '(');
891  for (i = 0; i < wrbuf_len(term_component); )
892  {
893  int j = 0;
894 
895  if (i)
896  wrbuf_putc(term_dict, '|');
897  while (j < i)
898  string_rel_add_char(term_dict, term_component, &j);
899  wrbuf_putc(term_dict, '[');
900 
901  if (i < wrbuf_len(term_component)-1)
902  {
903  wrbuf_putc(term_dict, '^');
904  wrbuf_putc(term_dict, '-');
905  string_rel_add_char(term_dict, term_component, &i);
906  }
907  else
908  {
909  string_rel_add_char(term_dict, term_component, &i);
910  wrbuf_putc(term_dict, '-');
911  }
912  wrbuf_putc(term_dict, ']');
913  wrbuf_putc(term_dict, '.');
914  wrbuf_putc(term_dict, '*');
915  }
916  wrbuf_putc(term_dict, ')');
917  break;
918  case 3:
919  case 102:
920  case -1:
921  if (!**term_sub)
922  return 1;
923  yaz_log(log_level_rpn, "Relation =");
924  if (!term_100(zm, term_sub, term_component, space_split, display_term))
925  {
926  wrbuf_destroy(term_component);
927  return 0;
928  }
929  wrbuf_puts(term_dict, "(");
930  wrbuf_puts(term_dict, wrbuf_cstr(term_component));
931  wrbuf_puts(term_dict, ")");
932  break;
933  case 103:
934  yaz_log(log_level_rpn, "Relation always matches");
935  /* skip to end of term (we don't care what it is) */
936  while (**term_sub != '\0')
937  (*term_sub)++;
938  break;
939  default:
940  *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
941  wrbuf_destroy(term_component);
942  return 0;
943  }
944  wrbuf_destroy(term_component);
945  return 1;
946 }
947 
948 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
949  const char **term_sub,
950  WRBUF term_dict,
951  const Odr_oid *attributeSet, NMEM stream,
952  struct grep_info *grep_info,
953  const char *index_type, int complete_flag,
954  WRBUF display_term,
955  const char *xpath_use,
956  struct ord_list **ol,
957  zebra_map_t zm, size_t token_number);
958 
960  Z_AttributesPlusTerm *zapt,
961  zint *hits_limit_value,
962  const char **term_ref_id_str,
963  NMEM nmem)
964 {
965  AttrType term_ref_id_attr;
966  AttrType hits_limit_attr;
967  int term_ref_id_int;
968  zint hits_limit_from_attr;
969 
970  attr_init_APT(&hits_limit_attr, zapt, 11);
971  hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
972 
973  attr_init_APT(&term_ref_id_attr, zapt, 10);
974  term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
975  if (term_ref_id_int >= 0)
976  {
977  char *res = nmem_malloc(nmem, 20);
978  sprintf(res, "%d", term_ref_id_int);
979  *term_ref_id_str = res;
980  }
981  if (hits_limit_from_attr != -1)
982  *hits_limit_value = hits_limit_from_attr;
983 
984  yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
985  *term_ref_id_str ? *term_ref_id_str : "none",
986  *hits_limit_value);
987  return ZEBRA_OK;
988 }
989 
993  Z_AttributesPlusTerm *zapt,
994  const char **term_sub,
995  const Odr_oid *attributeSet,
996  zint hits_limit, NMEM stream,
997  struct grep_info *grep_info,
998  const char *index_type, int complete_flag,
999  const char *rank_type,
1000  const char *xpath_use,
1001  NMEM rset_nmem,
1002  RSET *rset,
1003  struct rset_key_control *kc,
1004  zebra_map_t zm,
1005  size_t token_number)
1006 {
1007  ZEBRA_RES res;
1008  struct ord_list *ol;
1009  zint hits_limit_value = hits_limit;
1010  const char *term_ref_id_str = 0;
1011  WRBUF term_dict = wrbuf_alloc();
1012  WRBUF display_term = wrbuf_alloc();
1013  *rset = 0;
1014  zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1015  stream);
1016  grep_info->isam_p_indx = 0;
1017  res = string_term(zh, zapt, term_sub, term_dict,
1018  attributeSet, stream, grep_info,
1019  index_type, complete_flag,
1020  display_term, xpath_use, &ol, zm, token_number);
1021  wrbuf_destroy(term_dict);
1022  if (res == ZEBRA_OK && *term_sub)
1023  {
1024  yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1025  *rset = rset_trunc(zh, grep_info->isam_p_buf,
1026  grep_info->isam_p_indx, wrbuf_buf(display_term),
1027  wrbuf_len(display_term), rank_type,
1028  1 /* preserve pos */,
1029  zapt->term->which, rset_nmem,
1030  kc, kc->scope, ol, index_type, hits_limit_value,
1031  term_ref_id_str);
1032  if (!*rset)
1033  res = ZEBRA_FAIL;
1034  }
1035  wrbuf_destroy(display_term);
1036  return res;
1037 }
1038 
1039 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1040  const char **term_sub,
1041  WRBUF term_dict,
1042  const Odr_oid *attributeSet, NMEM stream,
1043  struct grep_info *grep_info,
1044  const char *index_type, int complete_flag,
1045  WRBUF display_term,
1046  const char *xpath_use,
1047  struct ord_list **ol,
1048  zebra_map_t zm, size_t token_number)
1049 {
1050  int r;
1051  AttrType truncation;
1052  int truncation_value;
1053  const char *termp;
1054  struct rpn_char_map_info rcmi;
1055 
1056  int space_split = complete_flag ? 0 : 1;
1057  int ord = -1;
1058  int regex_range = 0;
1059  int max_pos, prefix_len = 0;
1060  int relation_error;
1061  char ord_buf[32];
1062  int ord_len, i;
1063 
1064  *ol = ord_list_create(stream);
1065 
1066  rpn_char_map_prepare(zh->reg, zm, &rcmi);
1067  attr_init_APT(&truncation, zapt, 5);
1068  truncation_value = attr_find(&truncation, NULL);
1069  yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1070 
1071  termp = *term_sub; /* start of term for each database */
1072 
1073  if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1074  attributeSet, &ord) != ZEBRA_OK)
1075  {
1076  *term_sub = 0;
1077  return ZEBRA_FAIL;
1078  }
1079 
1080  wrbuf_rewind(term_dict); /* new dictionary regexp term */
1081 
1082  *ol = ord_list_append(stream, *ol, ord);
1083  ord_len = key_SU_encode(ord, ord_buf);
1084 
1085  wrbuf_putc(term_dict, '(');
1086 
1087  for (i = 0; i < ord_len; i++)
1088  {
1089  wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1090  wrbuf_putc(term_dict, ord_buf[i]);
1091  }
1092  wrbuf_putc(term_dict, ')');
1093 
1094  prefix_len = wrbuf_len(term_dict);
1095 
1096  if (zebra_maps_is_icu(zm))
1097  {
1098  int relation_value;
1099  AttrType relation;
1100 
1101  attr_init_APT(&relation, zapt, 2);
1102  relation_value = attr_find(&relation, NULL);
1103  if (relation_value == 103) /* always matches */
1104  termp += strlen(termp); /* move to end of term */
1105  else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1106  {
1107  /* ICU case */
1108  switch (truncation_value)
1109  {
1110  case -1: /* not specified */
1111  case 100: /* do not truncate */
1112  if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
1113  {
1114  *term_sub = 0;
1115  return ZEBRA_OK;
1116  }
1117  break;
1118  case 102:
1119  if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1120  {
1121  *term_sub = 0;
1122  return ZEBRA_OK;
1123  }
1124  break;
1125  case 1: /* right truncation */
1126  if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
1127  {
1128  *term_sub = 0;
1129  return ZEBRA_OK;
1130  }
1131  break;
1132  case 2:
1133  if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
1134  {
1135  *term_sub = 0;
1136  return ZEBRA_OK;
1137  }
1138  break;
1139  case 3:
1140  if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
1141  {
1142  *term_sub = 0;
1143  return ZEBRA_OK;
1144  }
1145  break;
1146  default:
1148  YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1149  truncation_value);
1150  return ZEBRA_FAIL;
1151  }
1152  }
1153  else
1154  {
1156  YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1157  relation_value);
1158  return ZEBRA_FAIL;
1159  }
1160  }
1161  else
1162  {
1163  /* non-ICU case. using string.chr and friends */
1164  switch (truncation_value)
1165  {
1166  case -1: /* not specified */
1167  case 100: /* do not truncate */
1168  if (!string_relation(zh, zapt, &termp, term_dict,
1169  attributeSet,
1170  zm, space_split, display_term,
1171  &relation_error))
1172  {
1173  if (relation_error)
1174  {
1175  zebra_setError(zh, relation_error, 0);
1176  return ZEBRA_FAIL;
1177  }
1178  *term_sub = 0;
1179  return ZEBRA_OK;
1180  }
1181  break;
1182  case 1: /* right truncation */
1183  wrbuf_putc(term_dict, '(');
1184  if (!term_100(zm, &termp, term_dict, space_split, display_term))
1185  {
1186  *term_sub = 0;
1187  return ZEBRA_OK;
1188  }
1189  wrbuf_puts(term_dict, ".*)");
1190  break;
1191  case 2: /* left truncation */
1192  wrbuf_puts(term_dict, "(.*");
1193  if (!term_100(zm, &termp, term_dict, space_split, display_term))
1194  {
1195  *term_sub = 0;
1196  return ZEBRA_OK;
1197  }
1198  wrbuf_putc(term_dict, ')');
1199  break;
1200  case 3: /* left&right truncation */
1201  wrbuf_puts(term_dict, "(.*");
1202  if (!term_100(zm, &termp, term_dict, space_split, display_term))
1203  {
1204  *term_sub = 0;
1205  return ZEBRA_OK;
1206  }
1207  wrbuf_puts(term_dict, ".*)");
1208  break;
1209  case 101: /* process # in term */
1210  wrbuf_putc(term_dict, '(');
1211  if (!term_101(zm, &termp, term_dict, space_split, display_term))
1212  {
1213  *term_sub = 0;
1214  return ZEBRA_OK;
1215  }
1216  wrbuf_puts(term_dict, ")");
1217  break;
1218  case 102: /* Regexp-1 */
1219  wrbuf_putc(term_dict, '(');
1220  if (!term_102(zm, &termp, term_dict, space_split, display_term))
1221  {
1222  *term_sub = 0;
1223  return ZEBRA_OK;
1224  }
1225  wrbuf_putc(term_dict, ')');
1226  break;
1227  case 103: /* Regexp-2 */
1228  regex_range = 1;
1229  wrbuf_putc(term_dict, '(');
1230  if (!term_103(zm, &termp, term_dict, &regex_range,
1231  space_split, display_term))
1232  {
1233  *term_sub = 0;
1234  return ZEBRA_OK;
1235  }
1236  wrbuf_putc(term_dict, ')');
1237  break;
1238  case 104: /* process ?n * # term */
1239  wrbuf_putc(term_dict, '(');
1240  if (!term_104(zm, &termp, term_dict, space_split, display_term))
1241  {
1242  *term_sub = 0;
1243  return ZEBRA_OK;
1244  }
1245  wrbuf_putc(term_dict, ')');
1246  break;
1247  case 105: /* process * ! in term and right truncate */
1248  wrbuf_putc(term_dict, '(');
1249  if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1250  {
1251  *term_sub = 0;
1252  return ZEBRA_OK;
1253  }
1254  wrbuf_putc(term_dict, ')');
1255  break;
1256  case 106: /* process * ! in term */
1257  wrbuf_putc(term_dict, '(');
1258  if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1259  {
1260  *term_sub = 0;
1261  return ZEBRA_OK;
1262  }
1263  wrbuf_putc(term_dict, ')');
1264  break;
1265  default:
1267  YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1268  truncation_value);
1269  return ZEBRA_FAIL;
1270  }
1271  }
1272  if (1)
1273  {
1274  char buf[1000];
1275  const char *input = wrbuf_cstr(term_dict) + prefix_len;
1276  esc_str(buf, sizeof(buf), input, strlen(input));
1277  }
1278  {
1279  WRBUF pr_wr = wrbuf_alloc();
1280 
1281  wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1282  yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1283  wrbuf_destroy(pr_wr);
1284  }
1285  r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1286  grep_info, &max_pos,
1287  ord_len /* number of "exact" chars */,
1288  grep_handle);
1289  if (r == 1)
1291  else if (r)
1292  yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1293  *term_sub = termp;
1294  yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1295  return ZEBRA_OK;
1296 }
1297 
1298 
1299 
1300 static void grep_info_delete(struct grep_info *grep_info)
1301 {
1302 #ifdef TERM_COUNT
1303  xfree(grep_info->term_no);
1304 #endif
1305  xfree(grep_info->isam_p_buf);
1306 }
1307 
1309  Z_AttributesPlusTerm *zapt,
1310  struct grep_info *grep_info,
1311  const char *index_type)
1312 {
1313 #ifdef TERM_COUNT
1314  grep_info->term_no = 0;
1315 #endif
1316  grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1317  grep_info->isam_p_size = 0;
1318  grep_info->isam_p_buf = NULL;
1319  grep_info->zh = zh;
1320  grep_info->index_type = index_type;
1321  grep_info->termset = 0;
1322  if (zapt)
1323  {
1324  AttrType truncmax;
1325  int truncmax_value;
1326 
1327  attr_init_APT(&truncmax, zapt, 13);
1328  truncmax_value = attr_find(&truncmax, NULL);
1329  if (truncmax_value != -1)
1330  grep_info->trunc_max = truncmax_value;
1331  }
1332  if (zapt)
1333  {
1334  AttrType termset;
1335  int termset_value_numeric;
1336  const char *termset_value_string;
1337 
1338  attr_init_APT(&termset, zapt, 8);
1339  termset_value_numeric =
1340  attr_find_ex(&termset, NULL, &termset_value_string);
1341  if (termset_value_numeric != -1)
1342  {
1343 #if TERMSET_DISABLE
1344  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1345  return ZEBRA_FAIL;
1346 #else
1347  char resname[32];
1348  const char *termset_name = 0;
1349  if (termset_value_numeric != -2)
1350  {
1351 
1352  sprintf(resname, "%d", termset_value_numeric);
1353  termset_name = resname;
1354  }
1355  else
1356  termset_name = termset_value_string;
1357  yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358  grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359  if (!grep_info->termset)
1360  {
1361  zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1362  return ZEBRA_FAIL;
1363  }
1364 #endif
1365  }
1366  }
1367  return ZEBRA_OK;
1368 }
1369 
1371  Z_AttributesPlusTerm *zapt,
1372  const char *termz,
1373  const Odr_oid *attributeSet,
1374  zint hits_limit,
1375  NMEM stream,
1376  const char *index_type, int complete_flag,
1377  const char *rank_type,
1378  const char *xpath_use,
1379  NMEM rset_nmem,
1380  RSET **result_sets, int *num_result_sets,
1381  struct rset_key_control *kc,
1382  zebra_map_t zm)
1383 {
1384  struct grep_info grep_info;
1385  const char *termp = termz;
1386  int alloc_sets = 0;
1387 
1388  *num_result_sets = 0;
1389  if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1390  return ZEBRA_FAIL;
1391  while (1)
1392  {
1393  ZEBRA_RES res;
1394 
1395  if (alloc_sets == *num_result_sets)
1396  {
1397  int add = 10;
1398  RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1399  sizeof(*rnew));
1400  if (alloc_sets)
1401  memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1402  alloc_sets = alloc_sets + add;
1403  *result_sets = rnew;
1404  }
1405  res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1406  stream, &grep_info,
1407  index_type, complete_flag,
1408  rank_type,
1409  xpath_use, rset_nmem,
1410  &(*result_sets)[*num_result_sets],
1411  kc, zm,
1412  *num_result_sets);
1413  if (res != ZEBRA_OK)
1414  {
1415  int i;
1416  for (i = 0; i < *num_result_sets; i++)
1417  rset_delete((*result_sets)[i]);
1418  grep_info_delete(&grep_info);
1419  return res;
1420  }
1421  if ((*result_sets)[*num_result_sets] == 0)
1422  break;
1423  (*num_result_sets)++;
1424 
1425  if (!*termp)
1426  break;
1427  }
1428  grep_info_delete(&grep_info);
1429  return ZEBRA_OK;
1430 }
1431 
1449  Z_AttributesPlusTerm *zapt,
1450  const char *termz,
1451  const Odr_oid *attributeSet,
1452  zint hits_limit,
1453  NMEM stream,
1454  const char *index_type, int complete_flag,
1455  const char *rank_type,
1456  const char *xpath_use,
1457  NMEM rset_nmem,
1458  RSET **result_sets, int *num_result_sets,
1459  struct rset_key_control *kc)
1460 {
1461  zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1462  return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1463  stream, index_type, complete_flag,
1464  rank_type, xpath_use,
1465  rset_nmem, result_sets, num_result_sets,
1466  kc, zm);
1467 }
1468 
1470  const char *unit,
1471  const char *term,
1472  NMEM rset_nmem,
1473  struct rset_key_control *kc)
1474 {
1476  WRBUF w = wrbuf_alloc();
1477  wrbuf_puts(w, ZEBRA_GROUP_INDEX_NAME);
1478  wrbuf_puts(w, unit);
1479  int ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0",
1480  wrbuf_cstr(w));
1481  wrbuf_destroy(w);
1482  if (ord == -1)
1483  return 0;
1484  char ord_buf[32];
1485  int ord_len = key_SU_encode(ord, ord_buf);
1486  char term_dict[100];
1487  memcpy(term_dict, ord_buf, ord_len);
1488  strcpy(term_dict + ord_len, term);
1489  char *val = dict_lookup(zh->reg->dict, term_dict);
1490  if (!val)
1491  return 0;
1492  ISAM_P isam_p;
1493  assert(*val == sizeof(ISAM_P));
1494  memcpy(&isam_p, val+1, sizeof(isam_p));
1495  return zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1496  isam_p, 0);
1497 }
1498 
1502  Z_AttributesPlusTerm *zapt,
1503  const Odr_oid *attributeSet,
1504  const char *index_type,
1505  NMEM rset_nmem,
1506  RSET *rset,
1507  struct rset_key_control *kc)
1508 {
1509  int position_value;
1510  AttrType position;
1511  int ord = -1;
1512  char ord_buf[32];
1513  char term_dict[100];
1514  int ord_len;
1515  char *val;
1516  ISAM_P isam_p;
1517  zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1518 
1519  attr_init_APT(&position, zapt, 3);
1520  position_value = attr_find(&position, NULL);
1521  switch(position_value)
1522  {
1523  case 3:
1524  case -1:
1525  return ZEBRA_OK;
1526  case 1:
1527  case 2:
1528  break;
1529  default:
1530  zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1531  position_value);
1532  return ZEBRA_FAIL;
1533  }
1534 
1535 
1537  {
1538  zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1539  position_value);
1540  return ZEBRA_FAIL;
1541  }
1542 
1543  if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1544  attributeSet, &ord) != ZEBRA_OK)
1545  {
1546  return ZEBRA_FAIL;
1547  }
1548  ord_len = key_SU_encode(ord, ord_buf);
1549  memcpy(term_dict, ord_buf, ord_len);
1550  strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1551  val = dict_lookup(zh->reg->dict, term_dict);
1552  if (val)
1553  {
1554  assert(*val == sizeof(ISAM_P));
1555  memcpy(&isam_p, val+1, sizeof(isam_p));
1556 
1557  *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1558  isam_p, 0);
1559  }
1560  return ZEBRA_OK;
1561 }
1562 
1566  Z_AttributesPlusTerm *zapt,
1567  const char *termz_org,
1568  const Odr_oid *attributeSet,
1569  zint hits_limit,
1570  NMEM stream,
1571  const char *index_type,
1572  int complete_flag,
1573  const char *rank_type,
1574  const char *xpath_use,
1575  NMEM rset_nmem,
1576  RSET *rset,
1577  struct rset_key_control *kc)
1578 {
1579  RSET *result_sets = 0;
1580  int num_result_sets = 0;
1581  ZEBRA_RES res =
1582  search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1583  stream, index_type, complete_flag,
1584  rank_type, xpath_use,
1585  rset_nmem,
1586  &result_sets, &num_result_sets, kc);
1587 
1588  if (res != ZEBRA_OK)
1589  return res;
1590 
1591  if (num_result_sets > 0)
1592  {
1593  RSET first_set = 0;
1594  res = search_position(zh, zapt, attributeSet,
1595  index_type,
1596  rset_nmem, &first_set,
1597  kc);
1598  if (res != ZEBRA_OK)
1599  {
1600  int i;
1601  for (i = 0; i < num_result_sets; i++)
1602  rset_delete(result_sets[i]);
1603  return res;
1604  }
1605  if (first_set)
1606  {
1607  RSET *nsets = nmem_malloc(stream,
1608  sizeof(RSET) * (num_result_sets+1));
1609  nsets[0] = first_set;
1610  memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1611  result_sets = nsets;
1612  num_result_sets++;
1613  }
1614  }
1615  if (num_result_sets == 0)
1616  *rset = rset_create_null(rset_nmem, kc, 0);
1617  else if (num_result_sets == 1)
1618  *rset = result_sets[0];
1619  else
1620  *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1621  num_result_sets, result_sets,
1622  1 /* ordered */, 0 /* exclusion */,
1623  3 /* relation */, 1 /* distance */);
1624  if (!*rset)
1625  return ZEBRA_FAIL;
1626  return ZEBRA_OK;
1627 }
1628 
1632  Z_AttributesPlusTerm *zapt,
1633  const char *termz_org,
1634  const Odr_oid *attributeSet,
1635  zint hits_limit,
1636  NMEM stream,
1637  const char *index_type,
1638  int complete_flag,
1639  const char *rank_type,
1640  const char *xpath_use,
1641  NMEM rset_nmem,
1642  RSET *rset,
1643  struct rset_key_control *kc)
1644 {
1645  RSET *result_sets = 0;
1646  int num_result_sets = 0;
1647  int i;
1648  ZEBRA_RES res =
1649  search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1650  stream, index_type, complete_flag,
1651  rank_type, xpath_use,
1652  rset_nmem,
1653  &result_sets, &num_result_sets, kc);
1654  if (res != ZEBRA_OK)
1655  return res;
1656 
1657  for (i = 0; i < num_result_sets; i++)
1658  {
1659  RSET first_set = 0;
1660  res = search_position(zh, zapt, attributeSet,
1661  index_type,
1662  rset_nmem, &first_set,
1663  kc);
1664  if (res != ZEBRA_OK)
1665  {
1666  for (i = 0; i < num_result_sets; i++)
1667  rset_delete(result_sets[i]);
1668  return res;
1669  }
1670 
1671  if (first_set)
1672  {
1673  RSET tmp_set[2];
1674 
1675  tmp_set[0] = first_set;
1676  tmp_set[1] = result_sets[i];
1677 
1678  result_sets[i] = rset_create_prox(
1679  rset_nmem, kc, kc->scope,
1680  2, tmp_set,
1681  1 /* ordered */, 0 /* exclusion */,
1682  3 /* relation */, 1 /* distance */);
1683  }
1684  }
1685  if (num_result_sets == 0)
1686  *rset = rset_create_null(rset_nmem, kc, 0);
1687  else if (num_result_sets == 1)
1688  *rset = result_sets[0];
1689  else
1690  *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1691  num_result_sets, result_sets);
1692  if (!*rset)
1693  return ZEBRA_FAIL;
1694  return ZEBRA_OK;
1695 }
1696 
1700  Z_AttributesPlusTerm *zapt,
1701  const char *termz_org,
1702  const Odr_oid *attributeSet,
1703  zint hits_limit,
1704  NMEM stream,
1705  const char *index_type,
1706  int complete_flag,
1707  const char *rank_type,
1708  const char *xpath_use,
1709  NMEM rset_nmem,
1710  RSET *rset,
1711  struct rset_key_control *kc)
1712 {
1713  RSET *result_sets = 0;
1714  int num_result_sets = 0;
1715  int i;
1716  ZEBRA_RES res =
1717  search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1718  stream, index_type, complete_flag,
1719  rank_type, xpath_use,
1720  rset_nmem,
1721  &result_sets, &num_result_sets,
1722  kc);
1723  if (res != ZEBRA_OK)
1724  return res;
1725  for (i = 0; i < num_result_sets; i++)
1726  {
1727  RSET first_set = 0;
1728  res = search_position(zh, zapt, attributeSet,
1729  index_type,
1730  rset_nmem, &first_set,
1731  kc);
1732  if (res != ZEBRA_OK)
1733  {
1734  for (i = 0; i < num_result_sets; i++)
1735  rset_delete(result_sets[i]);
1736  return res;
1737  }
1738 
1739  if (first_set)
1740  {
1741  RSET tmp_set[2];
1742 
1743  tmp_set[0] = first_set;
1744  tmp_set[1] = result_sets[i];
1745 
1746  result_sets[i] = rset_create_prox(
1747  rset_nmem, kc, kc->scope,
1748  2, tmp_set,
1749  1 /* ordered */, 0 /* exclusion */,
1750  3 /* relation */, 1 /* distance */);
1751  }
1752  }
1753 
1754 
1755  if (num_result_sets == 0)
1756  *rset = rset_create_null(rset_nmem, kc, 0);
1757  else if (num_result_sets == 1)
1758  *rset = result_sets[0];
1759  else
1760  *rset = rset_create_and(rset_nmem, kc, kc->scope,
1761  num_result_sets, result_sets);
1762  if (!*rset)
1763  return ZEBRA_FAIL;
1764  return ZEBRA_OK;
1765 }
1766 
1767 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1768  const char **term_sub,
1769  WRBUF term_dict,
1770  const Odr_oid *attributeSet,
1771  struct grep_info *grep_info,
1772  int *max_pos,
1773  zebra_map_t zm,
1774  WRBUF display_term,
1775  int *error_code)
1776 {
1777  AttrType relation;
1778  int relation_value;
1779  int term_value;
1780  int r;
1781  WRBUF term_num = wrbuf_alloc();
1782 
1783  *error_code = 0;
1784  attr_init_APT(&relation, zapt, 2);
1785  relation_value = attr_find(&relation, NULL);
1786 
1787  yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1788 
1789  switch (relation_value)
1790  {
1791  case 1:
1792  yaz_log(log_level_rpn, "Relation <");
1793  if (!term_100(zm, term_sub, term_num, 1, display_term))
1794  {
1795  wrbuf_destroy(term_num);
1796  return 0;
1797  }
1798  term_value = atoi(wrbuf_cstr(term_num));
1799  gen_regular_rel(term_dict, term_value-1, 1);
1800  break;
1801  case 2:
1802  yaz_log(log_level_rpn, "Relation <=");
1803  if (!term_100(zm, term_sub, term_num, 1, display_term))
1804  {
1805  wrbuf_destroy(term_num);
1806  return 0;
1807  }
1808  term_value = atoi(wrbuf_cstr(term_num));
1809  gen_regular_rel(term_dict, term_value, 1);
1810  break;
1811  case 4:
1812  yaz_log(log_level_rpn, "Relation >=");
1813  if (!term_100(zm, term_sub, term_num, 1, display_term))
1814  {
1815  wrbuf_destroy(term_num);
1816  return 0;
1817  }
1818  term_value = atoi(wrbuf_cstr(term_num));
1819  gen_regular_rel(term_dict, term_value, 0);
1820  break;
1821  case 5:
1822  yaz_log(log_level_rpn, "Relation >");
1823  if (!term_100(zm, term_sub, term_num, 1, display_term))
1824  {
1825  wrbuf_destroy(term_num);
1826  return 0;
1827  }
1828  term_value = atoi(wrbuf_cstr(term_num));
1829  gen_regular_rel(term_dict, term_value+1, 0);
1830  break;
1831  case -1:
1832  case 102:
1833  case 3:
1834  yaz_log(log_level_rpn, "Relation =");
1835  if (!term_100(zm, term_sub, term_num, 1, display_term))
1836  {
1837  wrbuf_destroy(term_num);
1838  return 0;
1839  }
1840  term_value = atoi(wrbuf_cstr(term_num));
1841  wrbuf_printf(term_dict, "(0*%d)", term_value);
1842  break;
1843  case 103:
1844  /* term_tmp untouched.. */
1845  while (**term_sub != '\0')
1846  (*term_sub)++;
1847  break;
1848  default:
1849  *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1850  wrbuf_destroy(term_num);
1851  return 0;
1852  }
1853  r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1854  0, grep_info, max_pos, 0, grep_handle);
1855 
1856  if (r == 1)
1858  else if (r)
1859  yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1860  yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1861  wrbuf_destroy(term_num);
1862  return 1;
1863 }
1864 
1865 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1866  const char **term_sub,
1867  WRBUF term_dict,
1868  const Odr_oid *attributeSet, NMEM stream,
1869  struct grep_info *grep_info,
1870  const char *index_type, int complete_flag,
1871  WRBUF display_term,
1872  const char *xpath_use,
1873  struct ord_list **ol)
1874 {
1875  const char *termp;
1876  struct rpn_char_map_info rcmi;
1877  int max_pos;
1878  int relation_error = 0;
1879  int ord, ord_len, i;
1880  char ord_buf[32];
1881  zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1882 
1883  *ol = ord_list_create(stream);
1884 
1885  rpn_char_map_prepare(zh->reg, zm, &rcmi);
1886 
1887  termp = *term_sub;
1888 
1889  if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1890  attributeSet, &ord) != ZEBRA_OK)
1891  {
1892  return ZEBRA_FAIL;
1893  }
1894 
1895  wrbuf_rewind(term_dict);
1896 
1897  *ol = ord_list_append(stream, *ol, ord);
1898 
1899  ord_len = key_SU_encode(ord, ord_buf);
1900 
1901  wrbuf_putc(term_dict, '(');
1902  for (i = 0; i < ord_len; i++)
1903  {
1904  wrbuf_putc(term_dict, 1);
1905  wrbuf_putc(term_dict, ord_buf[i]);
1906  }
1907  wrbuf_putc(term_dict, ')');
1908 
1909  if (!numeric_relation(zh, zapt, &termp, term_dict,
1910  attributeSet, grep_info, &max_pos, zm,
1911  display_term, &relation_error))
1912  {
1913  if (relation_error)
1914  {
1915  zebra_setError(zh, relation_error, 0);
1916  return ZEBRA_FAIL;
1917  }
1918  *term_sub = 0;
1919  return ZEBRA_OK;
1920  }
1921  *term_sub = termp;
1922  yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1923  return ZEBRA_OK;
1924 }
1925 
1926 
1928  Z_AttributesPlusTerm *zapt,
1929  const char *termz,
1930  const Odr_oid *attributeSet,
1931  zint hits_limit,
1932  NMEM stream,
1933  const char *index_type,
1934  int complete_flag,
1935  const char *rank_type,
1936  const char *xpath_use,
1937  NMEM rset_nmem,
1938  RSET *rset,
1939  struct rset_key_control *kc)
1940 {
1941  const char *termp = termz;
1942  RSET *result_sets = 0;
1943  int num_result_sets = 0;
1944  ZEBRA_RES res;
1945  struct grep_info grep_info;
1946  int alloc_sets = 0;
1947  zint hits_limit_value = hits_limit;
1948  const char *term_ref_id_str = 0;
1949 
1950  zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1951  stream);
1952 
1953  yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1954  if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1955  return ZEBRA_FAIL;
1956  while (1)
1957  {
1958  struct ord_list *ol;
1959  WRBUF term_dict = wrbuf_alloc();
1960  WRBUF display_term = wrbuf_alloc();
1961  if (alloc_sets == num_result_sets)
1962  {
1963  int add = 10;
1964  RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1965  sizeof(*rnew));
1966  if (alloc_sets)
1967  memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1968  alloc_sets = alloc_sets + add;
1969  result_sets = rnew;
1970  }
1971  yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1972  grep_info.isam_p_indx = 0;
1973  res = numeric_term(zh, zapt, &termp, term_dict,
1974  attributeSet, stream, &grep_info,
1975  index_type, complete_flag,
1976  display_term, xpath_use, &ol);
1977  wrbuf_destroy(term_dict);
1978  if (res == ZEBRA_FAIL || termp == 0)
1979  {
1980  wrbuf_destroy(display_term);
1981  break;
1982  }
1983  yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1984  result_sets[num_result_sets] =
1985  rset_trunc(zh, grep_info.isam_p_buf,
1986  grep_info.isam_p_indx, wrbuf_buf(display_term),
1987  wrbuf_len(display_term), rank_type,
1988  0 /* preserve position */,
1989  zapt->term->which, rset_nmem,
1990  kc, kc->scope, ol, index_type,
1991  hits_limit_value,
1992  term_ref_id_str);
1993  wrbuf_destroy(display_term);
1994  if (!result_sets[num_result_sets])
1995  break;
1996  num_result_sets++;
1997  if (!*termp)
1998  break;
1999  }
2000  grep_info_delete(&grep_info);
2001 
2002  if (res != ZEBRA_OK)
2003  return res;
2004  if (num_result_sets == 0)
2005  *rset = rset_create_null(rset_nmem, kc, 0);
2006  else if (num_result_sets == 1)
2007  *rset = result_sets[0];
2008  else
2009  *rset = rset_create_and(rset_nmem, kc, kc->scope,
2010  num_result_sets, result_sets);
2011  if (!*rset)
2012  return ZEBRA_FAIL;
2013  return ZEBRA_OK;
2014 }
2015 
2017  Z_AttributesPlusTerm *zapt,
2018  const char *termz,
2019  const Odr_oid *attributeSet,
2020  NMEM stream,
2021  const char *rank_type, NMEM rset_nmem,
2022  RSET *rset,
2023  struct rset_key_control *kc)
2024 {
2025  Record rec;
2026  zint sysno = atozint(termz);
2027 
2028  if (sysno <= 0)
2029  sysno = 0;
2030  rec = rec_get(zh->reg->records, sysno);
2031  if (!rec)
2032  sysno = 0;
2033 
2034  rec_free(&rec);
2035 
2036  if (sysno <= 0)
2037  {
2038  *rset = rset_create_null(rset_nmem, kc, 0);
2039  }
2040  else
2041  {
2042  RSFD rsfd;
2043  struct it_key key;
2044  *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2045  res_get(zh->res, "setTmpDir"), 0);
2046  rsfd = rset_open(*rset, RSETF_WRITE);
2047 
2048  key.mem[0] = sysno;
2049  key.mem[1] = 1;
2050  key.len = 2;
2051  rset_write(rsfd, &key);
2052  rset_close(rsfd);
2053  }
2054  return ZEBRA_OK;
2055 }
2056 
2057 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2058  const Odr_oid *attributeSet, NMEM stream,
2059  Z_SortKeySpecList *sort_sequence,
2060  const char *rank_type,
2061  NMEM rset_nmem,
2062  RSET *rset,
2063  struct rset_key_control *kc)
2064 {
2065  int i;
2066  int sort_relation_value;
2067  AttrType sort_relation_type;
2068  Z_SortKeySpec *sks;
2069  Z_SortKey *sk;
2070  char termz[20];
2071 
2072  attr_init_APT(&sort_relation_type, zapt, 7);
2073  sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2074 
2075  if (!sort_sequence->specs)
2076  {
2077  sort_sequence->num_specs = 10;
2078  sort_sequence->specs = (Z_SortKeySpec **)
2079  nmem_malloc(stream, sort_sequence->num_specs *
2080  sizeof(*sort_sequence->specs));
2081  for (i = 0; i < sort_sequence->num_specs; i++)
2082  sort_sequence->specs[i] = 0;
2083  }
2084  if (zapt->term->which != Z_Term_general)
2085  i = 0;
2086  else
2087  i = atoi_n((char *) zapt->term->u.general->buf,
2088  zapt->term->u.general->len);
2089  if (i >= sort_sequence->num_specs)
2090  i = 0;
2091  sprintf(termz, "%d", i);
2092 
2093  sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2094  sks->sortElement = (Z_SortElement *)
2095  nmem_malloc(stream, sizeof(*sks->sortElement));
2096  sks->sortElement->which = Z_SortElement_generic;
2097  sk = sks->sortElement->u.generic = (Z_SortKey *)
2098  nmem_malloc(stream, sizeof(*sk));
2099  sk->which = Z_SortKey_sortAttributes;
2100  sk->u.sortAttributes = (Z_SortAttributes *)
2101  nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2102 
2103  sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2104  sk->u.sortAttributes->list = zapt->attributes;
2105 
2106  sks->sortRelation = (Odr_int *)
2107  nmem_malloc(stream, sizeof(*sks->sortRelation));
2108  if (sort_relation_value == 1)
2109  *sks->sortRelation = Z_SortKeySpec_ascending;
2110  else if (sort_relation_value == 2)
2111  *sks->sortRelation = Z_SortKeySpec_descending;
2112  else
2113  *sks->sortRelation = Z_SortKeySpec_ascending;
2114 
2115  sks->caseSensitivity = (Odr_int *)
2116  nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2117  *sks->caseSensitivity = 0;
2118 
2119  sks->which = Z_SortKeySpec_null;
2120  sks->u.null = odr_nullval ();
2121  sort_sequence->specs[i] = sks;
2122  *rset = rset_create_null(rset_nmem, kc, 0);
2123  return ZEBRA_OK;
2124 }
2125 
2126 
2127 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2128  const Odr_oid *attributeSet,
2129  struct xpath_location_step *xpath, int max,
2130  NMEM mem)
2131 {
2132  const Odr_oid *curAttributeSet = attributeSet;
2133  AttrType use;
2134  const char *use_string = 0;
2135 
2136  attr_init_APT(&use, zapt, 1);
2137  attr_find_ex(&use, &curAttributeSet, &use_string);
2138 
2139  if (!use_string || *use_string != '/')
2140  return -1;
2141 
2142  return zebra_parse_xpath_str(use_string, xpath, max, mem);
2143 }
2144 
2145 
2146 
2147 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2148  const char *index_type, const char *term,
2149  const char *xpath_use,
2150  NMEM rset_nmem,
2151  struct rset_key_control *kc)
2152 {
2153  struct grep_info grep_info;
2154  int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2156  index_type, xpath_use);
2157  if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2158  return rset_create_null(rset_nmem, kc, 0);
2159 
2160  if (ord < 0)
2161  return rset_create_null(rset_nmem, kc, 0);
2162  else
2163  {
2164  int i, max_pos;
2165  char ord_buf[32];
2166  RSET rset;
2167  WRBUF term_dict = wrbuf_alloc();
2168  int ord_len = key_SU_encode(ord, ord_buf);
2169  int term_type = Z_Term_characterString;
2170  const char *flags = "void";
2171 
2172  wrbuf_putc(term_dict, '(');
2173  for (i = 0; i < ord_len; i++)
2174  {
2175  wrbuf_putc(term_dict, 1);
2176  wrbuf_putc(term_dict, ord_buf[i]);
2177  }
2178  wrbuf_putc(term_dict, ')');
2179  wrbuf_puts(term_dict, term);
2180 
2181  grep_info.isam_p_indx = 0;
2182  dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2183  &grep_info, &max_pos, 0, grep_handle);
2184  yaz_log(YLOG_DEBUG, "%s %d positions", term,
2185  grep_info.isam_p_indx);
2186  rset = rset_trunc(zh, grep_info.isam_p_buf,
2187  grep_info.isam_p_indx, term, strlen(term),
2188  flags, 1, term_type, rset_nmem,
2189  kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2190  0 /* term_ref_id_str */);
2191  grep_info_delete(&grep_info);
2192  wrbuf_destroy(term_dict);
2193  return rset;
2194  }
2195 }
2196 
2197 static
2199  NMEM stream, const char *rank_type, RSET rset,
2200  int xpath_len, struct xpath_location_step *xpath,
2201  NMEM rset_nmem,
2202  RSET *rset_out,
2203  struct rset_key_control *kc)
2204 {
2205  int i;
2206  int always_matches = rset ? 0 : 1;
2207 
2208  if (xpath_len < 0)
2209  {
2210  *rset_out = rset;
2211  return ZEBRA_OK;
2212  }
2213 
2214  yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2215  for (i = 0; i < xpath_len; i++)
2216  {
2217  yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2218 
2219  }
2220 
2221  /*
2222  //a -> a/.*
2223  //a/b -> b/a/.*
2224  /a -> a/
2225  /a/b -> b/a/
2226 
2227  / -> none
2228 
2229  a[@attr = value]/b[@other = othervalue]
2230 
2231  /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2232  /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2233  /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2234  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2235  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2236  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2237 
2238  */
2239 
2240  dict_grep_cmap(zh->reg->dict, 0, 0);
2241 
2242  {
2243  int level = xpath_len;
2244  int first_path = 1;
2245 
2246  while (--level >= 0)
2247  {
2248  WRBUF xpath_rev = wrbuf_alloc();
2249  int i;
2250  RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2251 
2252  for (i = level; i >= 1; --i)
2253  {
2254  const char *cp = xpath[i].part;
2255  if (*cp)
2256  {
2257  for (; *cp; cp++)
2258  {
2259  if (*cp == '*')
2260  wrbuf_puts(xpath_rev, "[^/]*");
2261  else if (*cp == ' ')
2262  wrbuf_puts(xpath_rev, "\001 ");
2263  else
2264  wrbuf_putc(xpath_rev, *cp);
2265 
2266  /* wrbuf_putc does not null-terminate , but
2267  wrbuf_puts below ensures it does.. so xpath_rev
2268  is OK iff length is > 0 */
2269  }
2270  wrbuf_puts(xpath_rev, "/");
2271  }
2272  else if (i == 1) /* // case */
2273  wrbuf_puts(xpath_rev, ".*");
2274  }
2275  if (xpath[level].predicate &&
2276  xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2277  xpath[level].predicate->u.relation.name[0])
2278  {
2279  WRBUF wbuf = wrbuf_alloc();
2280  wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2281  if (xpath[level].predicate->u.relation.value)
2282  {
2283  const char *cp = xpath[level].predicate->u.relation.value;
2284  wrbuf_putc(wbuf, '=');
2285 
2286  while (*cp)
2287  {
2288  if (strchr(REGEX_CHARS, *cp))
2289  wrbuf_putc(wbuf, '\\');
2290  wrbuf_putc(wbuf, *cp);
2291  cp++;
2292  }
2293  }
2294  rset_attr = xpath_trunc(
2295  zh, stream, "0", wrbuf_cstr(wbuf),
2297  rset_nmem, kc);
2298  wrbuf_destroy(wbuf);
2299  }
2300  else
2301  {
2302  if (!first_path)
2303  {
2304  wrbuf_destroy(xpath_rev);
2305  continue;
2306  }
2307  }
2308  yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2309  wrbuf_cstr(xpath_rev));
2310  if (wrbuf_len(xpath_rev))
2311  {
2312  rset_start_tag = xpath_trunc(zh, stream, "0",
2313  wrbuf_cstr(xpath_rev),
2315  rset_nmem, kc);
2316  if (always_matches)
2317  rset = rset_start_tag;
2318  else
2319  {
2320  rset_end_tag = xpath_trunc(zh, stream, "0",
2321  wrbuf_cstr(xpath_rev),
2323  rset_nmem, kc);
2324 
2325  rset = rset_create_between(rset_nmem, kc, kc->scope,
2326  rset_start_tag, rset, NULL,
2327  rset_end_tag, rset_attr);
2328  }
2329  }
2330  wrbuf_destroy(xpath_rev);
2331  first_path = 0;
2332  }
2333  }
2334  *rset_out = rset;
2335  return ZEBRA_OK;
2336 }
2337 
2338 #define MAX_XPATH_STEPS 10
2339 
2341  Z_AttributesPlusTerm *zapt,
2342  const Odr_oid *attributeSet,
2343  zint hits_limit, NMEM stream,
2344  Z_SortKeySpecList *sort_sequence,
2345  NMEM rset_nmem,
2346  RSET *rset,
2347  struct rset_key_control *kc);
2348 
2349 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2350  const Odr_oid *attributeSet,
2351  zint hits_limit, NMEM stream,
2352  Z_SortKeySpecList *sort_sequence,
2353  int num_bases, const char **basenames,
2354  NMEM rset_nmem,
2355  RSET *rset,
2356  struct rset_key_control *kc)
2357 {
2358  RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2359  ZEBRA_RES res = ZEBRA_OK;
2360  int i;
2361  for (i = 0; i < num_bases; i++)
2362  {
2363 
2364  if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2365  {
2366  zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2367  basenames[i]);
2368  res = ZEBRA_FAIL;
2369  break;
2370  }
2371  res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2372  sort_sequence,
2373  rset_nmem, rsets+i, kc);
2374  if (res != ZEBRA_OK)
2375  break;
2376  }
2377  if (res != ZEBRA_OK)
2378  { /* must clean up the already created sets */
2379  while (--i >= 0)
2380  rset_delete(rsets[i]);
2381  *rset = 0;
2382  }
2383  else
2384  {
2385  if (num_bases == 1)
2386  *rset = rsets[0];
2387  else if (num_bases == 0)
2388  *rset = rset_create_null(rset_nmem, kc, 0);
2389  else
2390  *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2391  num_bases, rsets);
2392  }
2393  return res;
2394 }
2395 
2397  Z_AttributesPlusTerm *zapt,
2398  const Odr_oid *attributeSet,
2399  zint hits_limit, NMEM stream,
2400  Z_SortKeySpecList *sort_sequence,
2401  NMEM rset_nmem,
2402  RSET *rset,
2403  struct rset_key_control *kc)
2404 {
2405  ZEBRA_RES res = ZEBRA_OK;
2406  const char *index_type;
2407  char *search_type = NULL;
2408  char rank_type[128];
2409  int complete_flag;
2410  int sort_flag;
2411  char termz[IT_MAX_WORD+1];
2412  int xpath_len;
2413  const char *xpath_use = 0;
2414  struct xpath_location_step xpath[MAX_XPATH_STEPS];
2415 
2416  if (!log_level_set)
2417  {
2418  log_level_rpn = yaz_log_module_level("rpn");
2419  log_level_set = 1;
2420  }
2421  zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2422  rank_type, &complete_flag, &sort_flag);
2423 
2424  yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2425  yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2426  yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2427  yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2428 
2429  if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2430  return ZEBRA_FAIL;
2431 
2432  if (sort_flag)
2433  return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2434  rank_type, rset_nmem, rset, kc);
2435  /* consider if an X-Path query is used */
2436  xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2437  xpath, MAX_XPATH_STEPS, stream);
2438  if (xpath_len >= 0)
2439  {
2440  if (xpath[xpath_len-1].part[0] == '@')
2441  xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2442  else
2443  xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2444 
2445  if (1)
2446  {
2447  AttrType relation;
2448  int relation_value;
2449 
2450  attr_init_APT(&relation, zapt, 2);
2451  relation_value = attr_find(&relation, NULL);
2452 
2453  if (relation_value == 103) /* alwaysmatches */
2454  {
2455  *rset = 0; /* signal no "term" set */
2456  return rpn_search_xpath(zh, stream, rank_type, *rset,
2457  xpath_len, xpath, rset_nmem, rset, kc);
2458  }
2459  }
2460  }
2461 
2462  /* search using one of the various search type strategies
2463  termz is our UTF-8 search term
2464  attributeSet is top-level default attribute set
2465  stream is ODR for search
2466  reg_id is the register type
2467  complete_flag is 1 for complete subfield, 0 for incomplete
2468  xpath_use is use-attribute to be used for X-Path search, 0 for none
2469  */
2470  if (!strcmp(search_type, "phrase"))
2471  {
2472  res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2473  stream,
2474  index_type, complete_flag, rank_type,
2475  xpath_use,
2476  rset_nmem,
2477  rset, kc);
2478  }
2479  else if (!strcmp(search_type, "and-list"))
2480  {
2481  res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2482  stream,
2483  index_type, complete_flag, rank_type,
2484  xpath_use,
2485  rset_nmem,
2486  rset, kc);
2487  }
2488  else if (!strcmp(search_type, "or-list"))
2489  {
2490  res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2491  stream,
2492  index_type, complete_flag, rank_type,
2493  xpath_use,
2494  rset_nmem,
2495  rset, kc);
2496  }
2497  else if (!strcmp(search_type, "local"))
2498  {
2499  res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2500  rank_type, rset_nmem, rset, kc);
2501  }
2502  else if (!strcmp(search_type, "numeric"))
2503  {
2504  res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2505  stream,
2506  index_type, complete_flag, rank_type,
2507  xpath_use,
2508  rset_nmem,
2509  rset, kc);
2510  }
2511  else
2512  {
2513  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2514  res = ZEBRA_FAIL;
2515  }
2516  if (res != ZEBRA_OK)
2517  return res;
2518  if (!*rset)
2519  return ZEBRA_FAIL;
2520  return rpn_search_xpath(zh, stream, rank_type, *rset,
2521  xpath_len, xpath, rset_nmem, rset, kc);
2522 }
2523 
2524 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2525  const Odr_oid *attributeSet,
2526  zint hits_limit,
2527  NMEM stream, NMEM rset_nmem,
2528  Z_SortKeySpecList *sort_sequence,
2529  int num_bases, const char **basenames,
2530  RSET **result_sets, int *num_result_sets,
2531  Z_Operator *parent_op,
2532  struct rset_key_control *kc);
2533 
2535  zint *approx_limit)
2536 {
2537  ZEBRA_RES res = ZEBRA_OK;
2538  if (zs->which == Z_RPNStructure_complex)
2539  {
2540  if (res == ZEBRA_OK)
2541  res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2542  approx_limit);
2543  if (res == ZEBRA_OK)
2544  res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2545  approx_limit);
2546  }
2547  else if (zs->which == Z_RPNStructure_simple)
2548  {
2549  if (zs->u.simple->which == Z_Operand_APT)
2550  {
2551  Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2552  AttrType global_hits_limit_attr;
2553  int l;
2554 
2555  attr_init_APT(&global_hits_limit_attr, zapt, 12);
2556 
2557  l = attr_find(&global_hits_limit_attr, NULL);
2558  if (l != -1)
2559  *approx_limit = l;
2560  }
2561  }
2562  return res;
2563 }
2564 
2566  const Odr_oid *attributeSet,
2567  zint hits_limit,
2568  NMEM stream, NMEM rset_nmem,
2569  Z_SortKeySpecList *sort_sequence,
2570  int num_bases, const char **basenames,
2571  RSET *result_set)
2572 {
2573  RSET *result_sets = 0;
2574  int num_result_sets = 0;
2575  ZEBRA_RES res;
2576  struct rset_key_control *kc = zebra_key_control_create(zh);
2577 
2578  res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2579  stream, rset_nmem,
2580  sort_sequence,
2581  num_bases, basenames,
2582  &result_sets, &num_result_sets,
2583  0 /* no parent op */,
2584  kc);
2585  if (res != ZEBRA_OK)
2586  {
2587  int i;
2588  for (i = 0; i < num_result_sets; i++)
2589  rset_delete(result_sets[i]);
2590  *result_set = 0;
2591  }
2592  else
2593  {
2594  assert(num_result_sets == 1);
2595  assert(result_sets);
2596  assert(*result_sets);
2597  *result_set = *result_sets;
2598  }
2599  (*kc->dec)(kc);
2600  return res;
2601 }
2602 
2604  const Odr_oid *attributeSet, zint hits_limit,
2605  NMEM stream, NMEM rset_nmem,
2606  Z_SortKeySpecList *sort_sequence,
2607  int num_bases, const char **basenames,
2608  RSET **result_sets, int *num_result_sets,
2609  Z_Operator *parent_op,
2610  struct rset_key_control *kc)
2611 {
2612  *num_result_sets = 0;
2613  if (zs->which == Z_RPNStructure_complex)
2614  {
2615  ZEBRA_RES res;
2616  Z_Operator *zop = zs->u.complex->roperator;
2617  RSET *result_sets_l = 0;
2618  int num_result_sets_l = 0;
2619  RSET *result_sets_r = 0;
2620  int num_result_sets_r = 0;
2621 
2622  res = rpn_search_structure(zh, zs->u.complex->s1,
2623  attributeSet, hits_limit, stream, rset_nmem,
2624  sort_sequence,
2625  num_bases, basenames,
2626  &result_sets_l, &num_result_sets_l,
2627  zop, kc);
2628  if (res != ZEBRA_OK)
2629  {
2630  int i;
2631  for (i = 0; i < num_result_sets_l; i++)
2632  rset_delete(result_sets_l[i]);
2633  return res;
2634  }
2635  res = rpn_search_structure(zh, zs->u.complex->s2,
2636  attributeSet, hits_limit, stream, rset_nmem,
2637  sort_sequence,
2638  num_bases, basenames,
2639  &result_sets_r, &num_result_sets_r,
2640  zop, kc);
2641  if (res != ZEBRA_OK)
2642  {
2643  int i;
2644  for (i = 0; i < num_result_sets_l; i++)
2645  rset_delete(result_sets_l[i]);
2646  for (i = 0; i < num_result_sets_r; i++)
2647  rset_delete(result_sets_r[i]);
2648  return res;
2649  }
2650 
2651  /* make a new list of result for all children */
2652  *num_result_sets = num_result_sets_l + num_result_sets_r;
2653  *result_sets = nmem_malloc(stream, *num_result_sets *
2654  sizeof(**result_sets));
2655  memcpy(*result_sets, result_sets_l,
2656  num_result_sets_l * sizeof(**result_sets));
2657  memcpy(*result_sets + num_result_sets_l, result_sets_r,
2658  num_result_sets_r * sizeof(**result_sets));
2659 
2660  if (!parent_op || parent_op->which != zop->which
2661  || (zop->which != Z_Operator_and &&
2662  zop->which != Z_Operator_or))
2663  {
2664  /* parent node different from this one (or non-present) */
2665  /* we must combine result sets now */
2666  RSET rset;
2667  switch (zop->which)
2668  {
2669  case Z_Operator_and:
2670  rset = rset_create_and(rset_nmem, kc,
2671  kc->scope,
2672  *num_result_sets, *result_sets);
2673  break;
2674  case Z_Operator_or:
2675  rset = rset_create_or(rset_nmem, kc,
2676  kc->scope, 0, /* termid */
2677  *num_result_sets, *result_sets);
2678  break;
2679  case Z_Operator_and_not:
2680  rset = rset_create_not(rset_nmem, kc,
2681  kc->scope,
2682  (*result_sets)[0],
2683  (*result_sets)[1]);
2684  break;
2685  case Z_Operator_prox:
2686  if (zop->u.prox->which != Z_ProximityOperator_known)
2687  {
2688  zebra_setError(zh,
2689  YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2690  0);
2691  return ZEBRA_FAIL;
2692  }
2693  if (*zop->u.prox->u.known == Z_ProxUnit_word)
2694  {
2695  rset = rset_create_prox(rset_nmem, kc,
2696  kc->scope,
2697  *num_result_sets, *result_sets,
2698  *zop->u.prox->ordered,
2699  (!zop->u.prox->exclusion ?
2700  0 : *zop->u.prox->exclusion),
2701  *zop->u.prox->relationType,
2702  *zop->u.prox->distance );
2703  }
2704  else if (*zop->u.prox->u.known >= 3 &&
2705  *zop->u.prox->u.known <= 10 &&
2706  *num_result_sets == 2)
2707  {
2708  /* Z39.50 known proximity units */
2709  static const char *units[] = {
2710  "sentence", /* (3) */
2711  "paragraph", /* (4) */
2712  "section", /* (5) */
2713  "chapter", /* (6) */
2714  "document", /* (7) */
2715  "element", /* (8) */
2716  "subelement", /* (9) */
2717  "elementType" /* (10) */
2718  };
2719  const char *unit = units[*zop->u.prox->u.known - 3];
2720  RSET begin_set = search_group(zh, unit, "begin",
2721  rset_nmem, kc);
2722  RSET end_set = search_group(zh, unit, "end",
2723  rset_nmem, kc);
2724  if (begin_set && end_set)
2725  {
2726  rset = rset_create_between(
2727  rset_nmem, kc, kc->scope,
2728  begin_set,
2729  (*result_sets[0]), (*result_sets)[1], end_set,
2730  0 /* rset_attr */);
2731  }
2732  else
2733  {
2734  if (begin_set)
2735  rset_delete(begin_set);
2736  if (end_set)
2737  rset_delete(end_set);
2739  YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2740  *zop->u.prox->u.known);
2741  return ZEBRA_FAIL;
2742 
2743  }
2744  }
2745  else
2746  {
2748  YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2749  *zop->u.prox->u.known);
2750  return ZEBRA_FAIL;
2751  }
2752  break;
2753  default:
2754  zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2755  return ZEBRA_FAIL;
2756  }
2757  *num_result_sets = 1;
2758  *result_sets = nmem_malloc(stream, *num_result_sets *
2759  sizeof(**result_sets));
2760  (*result_sets)[0] = rset;
2761  }
2762  }
2763  else if (zs->which == Z_RPNStructure_simple)
2764  {
2765  RSET rset;
2766  ZEBRA_RES res;
2767 
2768  if (zs->u.simple->which == Z_Operand_APT)
2769  {
2770  yaz_log(YLOG_DEBUG, "rpn_search_APT");
2771  res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2772  attributeSet, hits_limit,
2773  stream, sort_sequence,
2774  num_bases, basenames, rset_nmem, &rset,
2775  kc);
2776  if (res != ZEBRA_OK)
2777  return res;
2778  }
2779  else if (zs->u.simple->which == Z_Operand_resultSetId)
2780  {
2781  yaz_log(YLOG_DEBUG, "rpn_search_ref");
2782  rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2783  if (!rset)
2784  {
2785  zebra_setError(zh,
2786  YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2787  zs->u.simple->u.resultSetId);
2788  return ZEBRA_FAIL;
2789  }
2790  rset_dup(rset);
2791  }
2792  else
2793  {
2794  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2795  return ZEBRA_FAIL;
2796  }
2797  *num_result_sets = 1;
2798  *result_sets = nmem_malloc(stream, *num_result_sets *
2799  sizeof(**result_sets));
2800  (*result_sets)[0] = rset;
2801  }
2802  else
2803  {
2804  zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2805  return ZEBRA_FAIL;
2806  }
2807  return ZEBRA_OK;
2808 }
2809 
2810 
2811 
2812 /*
2813  * Local variables:
2814  * c-basic-offset: 4
2815  * c-file-style: "Stroustrup"
2816  * indent-tabs-mode: nil
2817  * End:
2818  * vim: shiftwidth=4 tabstop=8 expandtab
2819  */
2820 
RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol, int scope, int rset_no, RSET *rset, int ordered, int exclusion, int relation, int distance)
Definition: rsprox.c:72
static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, NMEM stream, const char *rank_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2016
Record rec_get(Records p, zint sysno)
gets record - with given system number
Definition: records.c:927
void zebra_setError(ZebraHandle zh, int code, const char *addinfo)
Definition: zebraapi.c:2754
static ZEBRA_RES search_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc, zebra_map_t zm, size_t token_number)
search for term (which may be truncated)
Definition: rpnsearch.c:992
int len
Definition: it_key.h:31
#define rset_write(rfd, buf)
Definition: rset.h:220
static void gen_regular_rel(WRBUF term_dict, int val, int islt)
Definition: rpnsearch.c:629
const char * res_get_def(Res r, const char *name, const char *def)
Definition: res.c:313
#define ZEBRA_OK
Definition: util.h:82
RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
Definition: zsets.c:1075
const char * res_get(Res r, const char *name)
Definition: res.c:294
int zebra_maps_is_first_in_field(zebra_map_t zm)
Definition: zebramap.c:491
struct rset_key_control * zebra_key_control_create(ZebraHandle zh)
Definition: kcontrol.c:57
zint ISAM_P
Definition: isamc.h:28
int zebra_term_untrans(ZebraHandle zh, const char *index_type, char *dst, const char *src)
Definition: untrans.c:31
struct rset rset
ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, RSET *result_set)
Definition: rpnsearch.c:2565
void dict_grep_cmap(Dict dict, void *vp, const char **(*cmap)(void *vp, const char **from, int len))
install character mapping handler for dict_lookup_grep
Definition: lookgrep.c:445
static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
returns result set for or-list search
Definition: rpnsearch.c:1631
static void grep_info_delete(struct grep_info *grep_info)
Definition: rpnsearch.c:1300
#define ZEBRA_XPATH_ELM_END
Definition: recctrl.h:36
void zebra_set_partial_result(ZebraHandle zh)
Definition: zebraapi.c:1063
RSET rset_dup(RSET rs)
Duplicate an RSET.
Definition: rset.c:255
RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no, const char *term, int length_term, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, struct ord_list *ol, const char *index_type, zint hits_limit, const char *term_ref_id)
Definition: trunc.c:403
int attr_find(AttrType *src, const Odr_oid **attribute_set_oid)
Definition: attrfind.c:99
const char * index_type
Definition: rpnsearch.c:85
struct zebra_register * reg
Definition: index.h:174
ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz)
Definition: zaptterm.c:32
#define ZEBRA_FAIL
Definition: util.h:81
int zebra_maps_attr(zebra_maps_t zms, Z_AttributesPlusTerm *zapt, const char **reg_id, char **search_type, char *rank_type, int *complete_flag, int *sort_flag)
Definition: zebramap.c:514
int zebra_parse_xpath_str(const char *xpath_string, struct xpath_location_step *xpath, int max, NMEM mem)
Definition: xpath.c:162
static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, struct grep_info *grep_info, int *max_pos, zebra_map_t zm, WRBUF display_term, int *error_code)
Definition: rpnsearch.c:1767
static ZEBRA_RES grep_info_prepare(ZebraHandle zh, Z_AttributesPlusTerm *zapt, struct grep_info *grep_info, const char *index_type)
Definition: rpnsearch.c:1308
#define rset_open(rs, wflag)
Definition: rset.h:202
static int log_level_set
Definition: rpnsearch.c:40
ZEBRA_RES zebra_apt_get_ord(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *index_type, const char *xpath_use, const Odr_oid *curAttributeSet, int *ord)
Definition: attribute.c:135
static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
returns result set for phrase search
Definition: rpnsearch.c:1565
int isam_p_size
Definition: rpnsearch.c:81
Records records
Definition: index.h:138
void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
Definition: rpnsearch.c:739
static int term_100_icu(zebra_map_t zm, const char **src, WRBUF term_dict, WRBUF display_term, int mode, size_t token_number)
Definition: rpnsearch.c:313
static int add_isam_p(const char *name, const char *info, struct grep_info *p)
Definition: rpnsearch.c:89
void rec_free(Record *recpp)
frees record (from memory)
Definition: records.c:1043
static ZEBRA_RES search_terms_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET **result_sets, int *num_result_sets, struct rset_key_control *kc)
Create result set(s) for list of terms.
Definition: rpnsearch.c:1448
const char * CHR_SPACE
Definition: charmap.c:49
ZebraSet termset
Definition: rpnsearch.c:86
zebra_map_t zm
Definition: index.h:400
static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, NMEM rset_nmem, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, RSET **result_sets, int *num_result_sets, Z_Operator *parent_op, struct rset_key_control *kc)
Definition: rpnsearch.c:2603
zint atozint(const char *src)
Definition: zint.c:55
union xpath_predicate::@8 u
#define IT_MAX_WORD
Definition: it_key.h:27
ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, zint *hits_limit_value, const char **term_ref_id_str, NMEM nmem)
Definition: rpnsearch.c:959
static int term_103(zebra_map_t zm, const char **src, WRBUF term_dict, int *errors, int space_split, WRBUF display_term)
Definition: rpnsearch.c:449
struct ord_list * ord_list_create(NMEM nmem)
Definition: rset.c:301
void rset_delete(RSET rs)
Destructor RSETs.
Definition: rset.c:218
ZebraHandle zh
Definition: rpnsearch.c:84
ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
Definition: zsets.c:215
int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *str)
lookup ordinal from string index + index type
Definition: zinfo.c:1354
struct xpath_predicate::@8::@9 relation
static RSET xpath_trunc(ZebraHandle zh, NMEM stream, const char *index_type, const char *term, const char *xpath_use, NMEM rset_nmem, struct rset_key_control *kc)
Definition: rpnsearch.c:2147
static const char ** rpn_char_map_handler(void *vp, const char **from, int len)
Definition: rpnsearch.c:45
void zebra_setError_zint(ZebraHandle zh, int code, zint i)
Definition: zebraapi.c:2763
const char ** zebra_maps_input(zebra_map_t zm, const char **from, int len, int first)
Definition: zebramap.c:398
int zebra_maps_is_icu(zebra_map_t zm)
Definition: zebramap.c:740
int attr_find_ex(AttrType *src, const Odr_oid **attribute_set_oid, const char **string_value)
Definition: attrfind.c:45
ZebraExplainInfo zei
Definition: index.h:139
int zebra_map_tokenize_start(zebra_map_t zm, const char *buf, size_t len)
Definition: zebramap.c:701
struct xpath_predicate * predicate
Definition: zebra_xpath.h:46
static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, WRBUF display_term, const char *xpath_use, struct ord_list **ol)
Definition: rpnsearch.c:1865
static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:1927
RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol, int scope, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:280
int key_SU_decode(int *ch, const unsigned char *out)
Definition: su_codec.c:64
int zebra_map_tokenize_next(zebra_map_t zm, const char **result_buf, size_t *result_len, const char **display_buf, size_t *display_len)
Definition: zebramap.c:657
#define ZEBRA_XPATH_ATTR_CDATA
Definition: recctrl.h:45
zebra_maps_t zebra_maps
Definition: index.h:143
void attr_init_APT(AttrType *src, Z_AttributesPlusTerm *zapt, int type)
Definition: attrfind.c:27
void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type, const char *db, const char *index_name, const char *term)
Definition: zsets.c:188
static void add(void *set_handle, int seqno, TERMID term)
Definition: rank1.c:168
static int term_pre(zebra_map_t zm, const char **src, const char *ct1, int first)
Definition: rpnsearch.c:159
static ZEBRA_RES rpn_search_xpath(ZebraHandle zh, NMEM stream, const char *rank_type, RSET rset, int xpath_len, struct xpath_location_step *xpath, NMEM rset_nmem, RSET *rset_out, struct rset_key_control *kc)
Definition: rpnsearch.c:2198
static void end(struct zebra_register *reg, void *set_handle)
Definition: rank1.c:156
#define FIRST_IN_FIELD_CHAR
Definition: index.h:416
zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id)
Definition: zebramap.c:363
void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm, struct rpn_char_map_info *map_info)
Definition: rpnsearch.c:64
static ZEBRA_RES search_terms_chrmap(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET **result_sets, int *num_result_sets, struct rset_key_control *kc, zebra_map_t zm)
Definition: rpnsearch.c:1370
static ZEBRA_RES rpn_search_database(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2396
RSET rset_create_null(NMEM nmem, struct rset_key_control *kcontrol, TERMID term)
Definition: rsnull.c:47
RSET zebra_create_rset_isam(ZebraHandle zh, NMEM rset_nmem, struct rset_key_control *kctl, int scope, ISAM_P pos, TERMID termid)
Definition: rset_isam.c:32
static ZEBRA_RES search_position(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, const char *index_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
limit a search by position - returns result set
Definition: rpnsearch.c:1501
static void add_non_space(const char *start, const char *end, WRBUF term_dict, WRBUF display_term, const char **map, int q_map_match)
Definition: rpnsearch.c:209
static int term_102_icu(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:251
int trunc_max
Definition: rpnsearch.c:83
int zebraExplain_curDatabase(ZebraExplainInfo zei, const char *database)
Definition: zinfo.c:791
ZebraService zs
Definition: zebrash.c:63
void(* dec)(struct rset_key_control *kc)
Definition: rset.h:138
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
Definition: lookup.c:100
zint mem[IT_KEY_LEVEL_MAX]
Definition: it_key.h:32
#define ZEBRA_XPATH_CDATA
Definition: recctrl.h:39
ISAM_P * isam_p_buf
Definition: rpnsearch.c:80
#define FIRST_IN_FIELD_STR
Definition: index.h:415
static int log_level_rpn
Definition: rpnsearch.c:41
Dict dict
Definition: index.h:132
RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol, int scope, RSET rset_l, RSET rset_m1, RSET rset_m2, RSET rset_r, RSET rset_attr)
Definition: rsbetween.c:101
zinfo_index_category_t
Definition: zinfo.h:37
int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, const char **index_type, const char **db, const char **string_index)
Definition: zinfo.c:1479
int key_SU_encode(int ch, char *out)
Definition: su_codec.c:31
static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, NMEM stream, struct grep_info *grep_info, const char *index_type, int complete_flag, WRBUF display_term, const char *xpath_use, struct ord_list **ol, zebra_map_t zm, size_t token_number)
Definition: rpnsearch.c:1039
static RSET search_group(ZebraHandle zh, const char *unit, const char *term, NMEM rset_nmem, struct rset_key_control *kc)
Definition: rpnsearch.c:1469
static void esc_str(char *out_buf, size_t out_size, const char *in_buf, int in_size)
Definition: rpnsearch.c:181
int isam_p_indx
Definition: rpnsearch.c:82
const char ** zebra_maps_search(zebra_map_t zm, const char **from, int len, int *q_map_match)
Definition: zebramap.c:411
static int grep_handle(char *name, const char *info, void *p)
Definition: rpnsearch.c:154
static int term_102(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:495
static int term_105(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term, int right_truncate)
Definition: rpnsearch.c:572
long zint
Zebra integer.
Definition: util.h:66
ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs, zint *approx_limit)
Definition: rpnsearch.c:2534
RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol, int scope, TERMID termid, int no_rsets, RSET *rsets)
Definition: rsmultiandor.c:273
static int term_101(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:412
#define ZEBRA_XPATH_ELM_BEGIN
Definition: recctrl.h:33
Definition: rset.h:73
Definition: rset.h:150
static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, Z_SortKeySpecList *sort_sequence, int num_bases, const char **basenames, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2349
struct ord_list * ord_list_append(NMEM nmem, struct ord_list *list, int ord)
Definition: rset.c:306
int dict_lookup_grep(Dict dict, const char *p, int range, void *client, int *max_pos, int init_pos, int(*f)(char *name, const char *info, void *client))
regular expression search with error correction
Definition: lookgrep.c:374
static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, NMEM stream, Z_SortKeySpecList *sort_sequence, const char *rank_type, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
Definition: rpnsearch.c:2057
#define XPATH_PREDICATE_RELATION
Definition: zebra_xpath.h:29
#define ZEBRA_GROUP_INDEX_NAME
Definition: recctrl.h:47
RSET rset_create_temp(NMEM nmem, struct rset_key_control *kcontrol, int scope, const char *temp_path, TERMID term)
Definition: rstemp.c:84
Definition: it_key.h:30
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80
void rset_close(RSFD rfd)
Closes a result set RFD handle.
Definition: rset.c:98
#define ZEBRA_XPATH_ATTR_NAME
Definition: recctrl.h:42
static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz_org, const Odr_oid *attributeSet, zint hits_limit, NMEM stream, const char *index_type, int complete_flag, const char *rank_type, const char *xpath_use, NMEM rset_nmem, RSET *rset, struct rset_key_control *kc)
returns result set for and-list search
Definition: rpnsearch.c:1699
Definition: rset.h:35
static int term_104(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:503
RSET rset_create_not(NMEM nmem, struct rset_key_control *kcontrol, int scope, RSET rset_l, RSET rset_r)
Definition: rsbool.c:92
static size_t icu_basechars(const char *buf, size_t i)
Definition: rpnsearch.c:242
#define REGEX_CHARS
Definition: rpnsearch.c:207
static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const Odr_oid *attributeSet, struct xpath_location_step *xpath, int max, NMEM mem)
Definition: rpnsearch.c:2127
static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char **term_sub, WRBUF term_dict, const Odr_oid *attributeSet, zebra_map_t zm, int space_split, WRBUF display_term, int *error_code)
Definition: rpnsearch.c:761
static int term_100(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term)
Definition: rpnsearch.c:356
#define RSETF_WRITE
Definition: rset.h:200
int * term_no
Definition: rpnsearch.c:78
#define ZINT_FORMAT
Definition: util.h:72