IDZEBRA  2.0.54
extract.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) 2004-2013 Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 #include <stdio.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #ifdef WIN32
31 #include <io.h>
32 #endif
33 #if HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36 #include <fcntl.h>
37 
38 
39 #include "index.h"
40 #include "orddict.h"
41 #include <direntz.h>
42 #include <charmap.h>
43 #include <yaz/snprintf.h>
44 
45 static int log_level_extract = 0;
46 static int log_level_details = 0;
47 static int log_level_initialized = 0;
48 
49 /* 1 if we use eliminitate identical delete/insert keys */
50 /* eventually this the 0-case code will be removed */
51 #define FLUSH2 1
52 
53 #if FLUSH2
55  zebra_rec_keys_t ins_keys,
56  zint ins_rank,
57  zebra_rec_keys_t del_keys,
58  zint del_rank);
59 #else
60 static void extract_flush_record_keys(ZebraHandle zh, zint sysno,
61  int cmd,
62  zebra_rec_keys_t reckeys,
63  zint staticrank);
64 #endif
65 
66 static void zebra_init_log_level(void)
67 {
69  {
71 
72  log_level_extract = yaz_log_module_level("extract");
73  log_level_details = yaz_log_module_level("indexdetails");
74  }
75 }
76 
77 static WRBUF wrbuf_hex_str(const char *cstr)
78 {
79  size_t i;
80  WRBUF w = wrbuf_alloc();
81  for (i = 0; cstr[i]; i++)
82  {
83  if (cstr[i] < ' ' || cstr[i] > 126)
84  wrbuf_printf(w, "\\%02X", cstr[i] & 0xff);
85  else
86  wrbuf_putc(w, cstr[i]);
87  }
88  return w;
89 }
90 
91 
92 static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
93  int cmd, zebra_rec_keys_t skp);
94 static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
95 static void extract_token_add(RecWord *p);
96 
98 {
100  {
101  yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest",
103  }
104 }
105 
107 {
108  check_log_limit(zh);
109  ++zh->records_processed;
110  if (!(zh->records_processed % 1000))
111  {
112  yaz_log(YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d "
116  }
117 }
118 
119 static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
120 {
121  ctrl->flagShowRecords = !zh->m_flag_rw;
122 }
123 
124 
125 static void extract_add_index_string(RecWord *p,
127  const char *str, int length);
128 
129 static void extract_set_store_data_prepare(struct recExtractCtrl *p);
130 
131 static void extract_init(struct recExtractCtrl *p, RecWord *w)
132 {
133  w->seqno = 1;
134  w->index_name = "any";
135  w->index_type = "w";
136  w->extractCtrl = p;
137  w->record_id = 0;
138  w->section_id = 0;
139  w->segment = 0;
140 }
141 
145 };
146 
148  char *buf)
149 {
150  const char *b = p->term_buf;
151  const char **map = 0;
152  int i = 0, remain = p->term_len;
153 
154  if (remain > 0)
155  map = zebra_maps_input(zm, &b, remain, 1);
156  while (remain > 0 && i < IT_MAX_WORD)
157  {
158  while (map && *map && **map == *CHR_SPACE)
159  {
160  remain = p->term_len - (b - p->term_buf);
161 
162  if (remain > 0)
163  {
164  int first = i ? 0 : 1; /* first position */
165  map = zebra_maps_input(zm, &b, remain, first);
166  }
167  else
168  map = 0;
169  }
170  if (!map)
171  break;
172 
173  if (i && i < IT_MAX_WORD)
174  buf[i++] = *CHR_SPACE;
175  while (map && *map && **map != *CHR_SPACE)
176  {
177  const char *cp = *map;
178 
179  if (**map == *CHR_CUT)
180  {
181  i = 0;
182  }
183  else
184  {
185  if (i >= IT_MAX_WORD)
186  break;
187  while (i < IT_MAX_WORD && *cp)
188  buf[i++] = *(cp++);
189  }
190  remain = p->term_len - (b - p->term_buf);
191  if (remain > 0)
192  {
193  map = zebra_maps_input(zm, &b, remain, 0);
194  }
195  else
196  map = 0;
197  }
198  }
199  return i;
200 }
201 
202 static void snippet_add_complete_field(RecWord *p, int ord,
203  zebra_map_t zm)
204 {
205  struct snip_rec_info *h = p->extractCtrl->handle;
206  char buf[IT_MAX_WORD+1];
207  int i = parse_complete_field(p, zm, buf);
208 
209  if (!i)
210  return;
211 
212  if (p->term_len && p->term_buf && zebra_maps_is_index(zm))
213  zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
214  p->term_buf, p->term_len);
215  p->seqno++;
216 }
217 
219 {
220  struct snip_rec_info *h = p->extractCtrl->handle;
221  const char *b = p->term_buf;
222  int remain = p->term_len;
223  int first = 1;
224  const char **map = 0;
225  const char *start = b;
226  const char *last = b;
227 
228  if (remain > 0)
229  map = zebra_maps_input(zm, &b, remain, 0);
230 
231  while (map)
232  {
233  int remain;
234 
235  /* Skip spaces */
236  while (map && *map && **map == *CHR_SPACE)
237  {
238  remain = p->term_len - (b - p->term_buf);
239  last = b;
240  if (remain > 0)
241  map = zebra_maps_input(zm, &b, remain, 0);
242  else
243  map = 0;
244  }
245  if (!map)
246  break;
247  if (start != last && zebra_maps_is_index(zm))
248  {
249  zebra_snippets_appendn(h->snippets, p->seqno, 1, ord,
250  start, last - start);
251  }
252  start = last;
253  while (map && *map && **map != *CHR_SPACE)
254  {
255  remain = p->term_len - (b - p->term_buf);
256  last = b;
257  if (remain > 0)
258  map = zebra_maps_input(zm, &b, remain, 0);
259  else
260  map = 0;
261  }
262  if (start == last)
263  return ;
264 
265  if (first)
266  {
267  first = 0;
269  {
270  /* first in field marker */
271  p->seqno++;
272  }
273  }
274  if (start != last && zebra_maps_is_index(zm))
275  zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
276  start, last - start);
277  start = last;
278  p->seqno++;
279  }
280 
281 }
282 
283 static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm)
284 {
285  struct snip_rec_info *h = p->extractCtrl->handle;
286 
287  const char *res_buf = 0;
288  size_t res_len = 0;
289 
290  const char *display_buf = 0;
291  size_t display_len = 0;
292 
294  while (zebra_map_tokenize_next(zm, &res_buf, &res_len,
295  &display_buf, &display_len))
296  {
297  if (zebra_maps_is_index(zm))
298  zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
299  display_buf, display_len);
300  p->seqno++;
301  }
302 }
303 
304 static void snippet_token_add(RecWord *p)
305 {
306  struct snip_rec_info *h = p->extractCtrl->handle;
307  ZebraHandle zh = h->zh;
309 
310  if (zm)
311  {
312  ZebraExplainInfo zei = zh->reg->zei;
315 
316  if (zebra_maps_is_icu(zm))
317  snippet_add_icu(p, ch, zm);
318  else
319  {
320  if (zebra_maps_is_complete(zm))
321  snippet_add_complete_field(p, ch, zm);
322  else
323  snippet_add_incomplete_field(p, ch, zm);
324  }
325  }
326 }
327 
328 static void snippet_schema_add(
329  struct recExtractCtrl *p, Odr_oid *oid)
330 {
331 
332 }
333 
335  struct ZebraRecStream *stream,
336  RecType rt, void *recTypeClientData)
337 {
338  struct recExtractCtrl extractCtrl;
339  struct snip_rec_info info;
340 
341  extractCtrl.stream = stream;
342  extractCtrl.first_record = 1;
343  extractCtrl.init = extract_init;
344  extractCtrl.tokenAdd = snippet_token_add;
345  extractCtrl.schemaAdd = snippet_schema_add;
346  assert(zh->reg);
347  assert(zh->reg->dh);
348 
349  extractCtrl.dh = zh->reg->dh;
350 
351  info.zh = zh;
352  info.snippets = sn;
353  extractCtrl.handle = &info;
354  extractCtrl.match_criteria[0] = '\0';
355  extractCtrl.staticrank = 0;
356  extractCtrl.action = action_insert;
357 
358  init_extractCtrl(zh, &extractCtrl);
359 
360  extractCtrl.setStoreData = 0;
361 
362  (*rt->extract)(recTypeClientData, &extractCtrl);
363 }
364 
366  zebra_rec_keys_t reckeys,
367  const char *index_name,
368  const char **ws, int ws_length)
369 {
370  int i;
371  int ch = -1;
373 
374  for (i = 0; i<ws_length; i++)
375  ws[i] = NULL;
376 
377  if (ch < 0)
378  ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0", index_name);
379  if (ch < 0)
380  ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "p", index_name);
381  if (ch < 0)
382  ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "w", index_name);
383 
384  if (ch < 0)
385  return ;
386 
387  if (zebra_rec_keys_rewind(reckeys))
388  {
389  zint startSeq = -1;
390  const char *str;
391  size_t slen;
392  struct it_key key;
393  zint seqno;
394  while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
395  {
396  assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
397 
398  seqno = key.mem[key.len-1];
399 
400  if (key.mem[0] == ch)
401  {
402  zint woff;
403 
404  if (startSeq == -1)
405  startSeq = seqno;
406  woff = seqno - startSeq;
407  if (woff >= 0 && woff < ws_length)
408  ws[woff] = str;
409  }
410  }
411  }
412 }
413 
414 #define FILE_MATCH_BLANK "\t "
415 
417  zebra_rec_keys_t reckeys,
418  const char *fname, const char *spec)
419 {
420  static char dstBuf[2048]; /* static here ??? */
421  char *dst = dstBuf;
422  const char *s = spec;
423 
424  while (1)
425  {
426  for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
427  ;
428  if (!*s)
429  break;
430  if (*s == '(')
431  {
432  const char *ws[32];
433  char attset_str[64], attname_str[64];
434  int i;
435  int first = 1;
436 
437  for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
438  ;
439  for (i = 0; *s && *s != ',' && *s != ')' &&
440  !strchr(FILE_MATCH_BLANK, *s); s++)
441  if (i+1 < sizeof(attset_str))
442  attset_str[i++] = *s;
443  attset_str[i] = '\0';
444 
445  for (; strchr(FILE_MATCH_BLANK, *s); s++)
446  ;
447  if (*s != ',')
448  strcpy(attname_str, attset_str);
449  else
450  {
451  for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
452  ;
453  for (i = 0; *s && *s != ')' &&
454  !strchr(FILE_MATCH_BLANK, *s); s++)
455  if (i+1 < sizeof(attname_str))
456  attname_str[i++] = *s;
457  attname_str[i] = '\0';
458  }
459  if (*s != ')')
460  {
461  yaz_log(YLOG_WARN, "Missing ) in match criteria %s in group %s",
462  spec, zh->m_group ? zh->m_group : "none");
463  return NULL;
464  }
465  s++;
466 
467  searchRecordKey(zh, reckeys, attname_str, ws, 32);
468  if (0) /* for debugging */
469  {
470  for (i = 0; i<32; i++)
471  {
472  if (ws[i])
473  {
474  WRBUF w = wrbuf_hex_str(ws[i]);
475  yaz_log(YLOG_LOG, "ws[%d] = %s", i, wrbuf_cstr(w));
476  wrbuf_destroy(w);
477  }
478  }
479  }
480 
481  for (i = 0; i<32; i++)
482  if (ws[i])
483  {
484  if (first)
485  {
486  *dst++ = ' ';
487  first = 0;
488  }
489  strcpy(dst, ws[i]);
490  dst += strlen(ws[i]);
491  }
492  if (first)
493  {
494  yaz_log(YLOG_WARN, "Record didn't contain match"
495  " fields in (%s,%s)", attset_str, attname_str);
496  return NULL;
497  }
498  }
499  else if (*s == '$')
500  {
501  int spec_len;
502  char special[64];
503  const char *spec_src = NULL;
504  const char *s1 = ++s;
505  while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
506  s1++;
507 
508  spec_len = s1 - s;
509  if (spec_len > sizeof(special)-1)
510  spec_len = sizeof(special)-1;
511  memcpy(special, s, spec_len);
512  special[spec_len] = '\0';
513  s = s1;
514 
515  if (!strcmp(special, "group"))
516  spec_src = zh->m_group;
517  else if (!strcmp(special, "database"))
518  spec_src = zh->basenames[0];
519  else if (!strcmp(special, "filename")) {
520  spec_src = fname;
521  }
522  else if (!strcmp(special, "type"))
523  spec_src = zh->m_record_type;
524  else
525  spec_src = NULL;
526  if (spec_src)
527  {
528  strcpy(dst, spec_src);
529  dst += strlen(spec_src);
530  }
531  }
532  else if (*s == '\"' || *s == '\'')
533  {
534  int stopMarker = *s++;
535  char tmpString[64];
536  int i = 0;
537 
538  while (*s && *s != stopMarker)
539  {
540  if (i+1 < sizeof(tmpString))
541  tmpString[i++] = *s++;
542  }
543  if (*s)
544  s++;
545  tmpString[i] = '\0';
546  strcpy(dst, tmpString);
547  dst += strlen(tmpString);
548  }
549  else
550  {
551  yaz_log(YLOG_WARN, "Syntax error in match criteria %s in group %s",
552  spec, zh->m_group ? zh->m_group : "none");
553  return NULL;
554  }
555  *dst++ = 1;
556  }
557  if (dst == dstBuf)
558  {
559  yaz_log(YLOG_WARN, "No match criteria for record %s in group %s",
560  fname, zh->m_group ? zh->m_group : "none");
561  return NULL;
562  }
563  *dst = '\0';
564 
565  if (0) /* for debugging */
566  {
567  WRBUF w = wrbuf_hex_str(dstBuf);
568  yaz_log(YLOG_LOG, "get_match_from_spec %s", wrbuf_cstr(w));
569  wrbuf_destroy(w);
570  }
571 
572  return dstBuf;
573 }
574 
576  const char *fname;
578  struct recordGroup *rGroup;
579 };
580 
591 static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id,
592  zint sysno)
593 {
594  RecWord word;
595  extract_init(ctrl, &word);
596  word.record_id = record_id;
597  /* we use the seqno as placeholder for a way to get back to
598  record database from _ALLRECORDS.. This is used if a custom
599  RECORD was defined */
600  word.seqno = sysno;
601  word.index_name = "_ALLRECORDS";
602  word.index_type = "w";
603 
605  "", 0);
606 }
607 
608 /* forward declaration */
610  struct ZebraRecStream *stream,
611  enum zebra_recctrl_action_t action,
612  const char *recordType,
613  zint *sysno,
614  const char *match_criteria,
615  const char *fname,
617  void *recTypeClientData);
618 
619 
620 ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
621  enum zebra_recctrl_action_t action)
622 {
623  ZEBRA_RES r = ZEBRA_OK;
624  int i, fd;
625  char gprefix[128];
626  char ext[128];
627  char ext_res[128];
628  const char *original_record_type = 0;
630  void *recTypeClientData;
631  struct ZebraRecStream stream, *streamp;
632 
634 
635  if (!zh->m_group || !*zh->m_group)
636  *gprefix = '\0';
637  else
638  sprintf(gprefix, "%s.", zh->m_group);
639 
640  yaz_log(log_level_extract, "zebra_extract_file %s", fname);
641 
642  /* determine file extension */
643  *ext = '\0';
644  for (i = strlen(fname); --i >= 0; )
645  if (fname[i] == '/')
646  break;
647  else if (fname[i] == '.')
648  {
649  strcpy(ext, fname+i+1);
650  break;
651  }
652  /* determine file type - depending on extension */
653  original_record_type = zh->m_record_type;
654  if (!zh->m_record_type)
655  {
656  sprintf(ext_res, "%srecordType.%s", gprefix, ext);
657  zh->m_record_type = res_get(zh->res, ext_res);
658  }
659  if (!zh->m_record_type)
660  {
661  check_log_limit(zh);
662  if (zh->records_processed + zh->records_skipped
663  < zh->m_file_verbose_limit)
664  yaz_log(YLOG_LOG, "? %s", fname);
665  zh->records_skipped++;
666  return 0;
667  }
668  /* determine match criteria */
669  if (!zh->m_record_id)
670  {
671  sprintf(ext_res, "%srecordId.%s", gprefix, ext);
672  zh->m_record_id = res_get(zh->res, ext_res);
673  }
674 
675  if (!(recType =
677  &recTypeClientData)))
678  {
679  yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
680  return ZEBRA_FAIL;
681  }
682 
683  switch(recType->version)
684  {
685  case 0:
686  break;
687  default:
688  yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
689  }
690  if (sysno && (action == action_delete || action == action_a_delete))
691  {
692  streamp = 0;
693  }
694  else
695  {
696  char full_rep[1024];
697 
698  if (zh->path_reg && !yaz_is_abspath(fname))
699  {
700  strcpy(full_rep, zh->path_reg);
701  strcat(full_rep, "/");
702  strcat(full_rep, fname);
703  }
704  else
705  strcpy(full_rep, fname);
706 
707  if ((fd = open(full_rep, O_BINARY|O_RDONLY)) == -1)
708  {
709  yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
710  zh->m_record_type = original_record_type;
711  return ZEBRA_FAIL;
712  }
713  streamp = &stream;
714  zebra_create_stream_fd(streamp, fd, 0);
715  }
716  r = zebra_extract_records_stream(zh, streamp,
717  action,
718  zh->m_record_type,
719  sysno,
720  0, /*match_criteria */
721  fname,
722  recType, recTypeClientData);
723  if (streamp)
724  stream.destroy(streamp);
725  zh->m_record_type = original_record_type;
726  return r;
727 }
728 
729 /*
730  If sysno is provided, then it's used to identify the reocord.
731  If not, and match_criteria is provided, then sysno is guessed
732  If not, and a record is provided, then sysno is got from there
733 
734  */
735 
737  const char *buf, size_t buf_size,
738  enum zebra_recctrl_action_t action,
739  const char *recordType,
740  zint *sysno,
741  const char *match_criteria,
742  const char *fname)
743 {
744  struct ZebraRecStream stream;
745  ZEBRA_RES res;
746  void *clientData;
747  RecType recType = 0;
748 
749  if (recordType && *recordType)
750  {
751  yaz_log(log_level_extract,
752  "Record type explicitly specified: %s", recordType);
753  recType = recType_byName(zh->reg->recTypes, zh->res, recordType,
754  &clientData);
755  }
756  else
757  {
758  if (!(zh->m_record_type))
759  {
760  yaz_log(YLOG_WARN, "No such record type defined");
761  return ZEBRA_FAIL;
762  }
763  yaz_log(log_level_extract, "Get record type from rgroup: %s",
764  zh->m_record_type);
765  recType = recType_byName(zh->reg->recTypes, zh->res,
766  zh->m_record_type, &clientData);
767  recordType = zh->m_record_type;
768  }
769 
770  if (!recType)
771  {
772  yaz_log(YLOG_WARN, "No such record type: %s", recordType);
773  return ZEBRA_FAIL;
774  }
775 
776  zebra_create_stream_mem(&stream, buf, buf_size);
777 
778  res = zebra_extract_records_stream(zh, &stream,
779  action,
780  recordType,
781  sysno,
782  match_criteria,
783  fname,
784  recType, clientData);
785  stream.destroy(&stream);
786  return res;
787 }
788 
790  struct ZebraRecStream *stream,
791  enum zebra_recctrl_action_t action,
792  const char *recordType,
793  zint *sysno,
794  const char *match_criteria,
795  const char *fname,
797  void *recTypeClientData,
798  int *more)
799 
800 {
801  zint sysno0 = 0;
802  RecordAttr *recordAttr;
803  struct recExtractCtrl extractCtrl;
804  int r;
805  const char *matchStr = 0;
806  Record rec;
807  off_t start_offset = 0, end_offset = 0;
808  const char *pr_fname = fname; /* filename to print .. */
809  int show_progress = zh->records_processed + zh->records_skipped
810  < zh->m_file_verbose_limit ? 1:0;
811 
813 
814  if (!pr_fname)
815  pr_fname = "<no file>"; /* make it printable if file is omitted */
816 
819 
820  if (zebraExplain_curDatabase(zh->reg->zei, zh->basenames[0]))
821  {
822  if (zebraExplain_newDatabase(zh->reg->zei, zh->basenames[0],
823  zh->m_explain_database))
824  return ZEBRA_FAIL;
825  }
826 
827  if (stream)
828  {
829  off_t null_offset = 0;
830  extractCtrl.stream = stream;
831 
832  start_offset = stream->tellf(stream);
833 
834  extractCtrl.first_record = start_offset ? 0 : 1;
835 
836  stream->endf(stream, &null_offset);;
837 
838  extractCtrl.init = extract_init;
839  extractCtrl.tokenAdd = extract_token_add;
840  extractCtrl.schemaAdd = extract_schema_add;
841  extractCtrl.dh = zh->reg->dh;
842  extractCtrl.handle = zh;
843  extractCtrl.match_criteria[0] = '\0';
844  extractCtrl.staticrank = 0;
845  extractCtrl.action = action;
846 
847  init_extractCtrl(zh, &extractCtrl);
848 
849  extract_set_store_data_prepare(&extractCtrl);
850 
851  r = (*recType->extract)(recTypeClientData, &extractCtrl);
852 
853  if (action == action_update)
854  {
855  action = extractCtrl.action;
856  }
857 
858  switch (r)
859  {
860  case RECCTRL_EXTRACT_EOF:
861  return ZEBRA_FAIL;
863  /* error occured during extraction ... */
864  yaz_log(YLOG_WARN, "extract error: generic");
865  return ZEBRA_FAIL;
867  /* error occured during extraction ... */
868  yaz_log(YLOG_WARN, "extract error: no such filter");
869  return ZEBRA_FAIL;
871  if (show_progress)
872  yaz_log(YLOG_LOG, "skip %s %s " ZINT_FORMAT,
873  recordType, pr_fname, (zint) start_offset);
874  *more = 1;
875 
876  end_offset = stream->endf(stream, 0);
877  if (end_offset)
878  stream->seekf(stream, end_offset);
879 
880  return ZEBRA_OK;
881  case RECCTRL_EXTRACT_OK:
882  break;
883  default:
884  yaz_log(YLOG_WARN, "extract error: unknown error: %d", r);
885  return ZEBRA_FAIL;
886  }
887  end_offset = stream->endf(stream, 0);
888  if (end_offset)
889  stream->seekf(stream, end_offset);
890  else
891  end_offset = stream->tellf(stream);
892 
893  if (extractCtrl.match_criteria[0])
894  match_criteria = extractCtrl.match_criteria;
895  }
896 
897  *more = 1;
898 
899  if (zh->m_flag_rw == 0)
900  {
901  yaz_log(YLOG_LOG, "test %s %s " ZINT_FORMAT, recordType,
902  pr_fname, (zint) start_offset);
903  /* test mode .. Do not perform match */
904  return ZEBRA_OK;
905  }
906 
907  if (!sysno)
908  {
909  sysno = &sysno0;
910 
911  if (match_criteria && *match_criteria)
912  matchStr = match_criteria;
913  else
914  {
915  if (zh->m_record_id && *zh->m_record_id)
916  {
917  matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname,
918  zh->m_record_id);
919  if (!matchStr)
920  {
921  yaz_log(YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType,
922  pr_fname, (zint) start_offset);
923  return ZEBRA_FAIL;
924  }
925  if (0 && matchStr)
926  {
927  WRBUF w = wrbuf_alloc();
928  size_t i;
929  for (i = 0; i < strlen(matchStr); i++)
930  {
931  wrbuf_printf(w, "%02X", matchStr[i] & 0xff);
932  }
933  yaz_log(YLOG_LOG, "Got match %s", wrbuf_cstr(w));
934  wrbuf_destroy(w);
935  }
936  }
937  }
938  if (matchStr)
939  {
940  int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
941  char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
942  matchStr);
943 
944 
945  if (log_level_extract)
946  {
947  WRBUF w = wrbuf_hex_str(matchStr);
948  yaz_log(log_level_extract, "matchStr: %s", wrbuf_cstr(w));
949  wrbuf_destroy(w);
950  }
951  if (rinfo)
952  {
953  assert(*rinfo == sizeof(*sysno));
954  memcpy(sysno, rinfo+1, sizeof(*sysno));
955  }
956  }
957  }
958 
959  if (! *sysno)
960  {
961  /* new record AKA does not exist already */
962  if (action == action_delete)
963  {
964  yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
965  pr_fname, (zint) start_offset);
966  yaz_log(YLOG_WARN, "cannot delete record above (seems new)");
967  return ZEBRA_FAIL;
968  }
969  else if (action == action_a_delete)
970  {
971  if (show_progress)
972  yaz_log(YLOG_LOG, "adelete %s %s " ZINT_FORMAT, recordType,
973  pr_fname, (zint) start_offset);
974  return ZEBRA_OK;
975  }
976  else if (action == action_replace)
977  {
978  yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
979  pr_fname, (zint) start_offset);
980  yaz_log(YLOG_WARN, "cannot update record above (seems new)");
981  return ZEBRA_FAIL;
982  }
983  if (show_progress)
984  yaz_log(YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
985  (zint) start_offset);
986  rec = rec_new(zh->reg->records);
987 
988  *sysno = rec->sysno;
989 
990 
991  if (stream)
992  {
993  all_matches_add(&extractCtrl,
995  *sysno);
996  }
997 
998 
999  recordAttr = rec_init_attr(zh->reg->zei, rec);
1000  if (extractCtrl.staticrank < 0)
1001  {
1002  yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0");
1003  extractCtrl.staticrank = 0;
1004  }
1005 
1006  if (matchStr)
1007  {
1008  int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
1009  dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
1010  sizeof(*sysno), sysno);
1011  }
1012 
1013  extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1014 #if FLUSH2
1015  extract_flush_record_keys2(zh, *sysno,
1016  zh->reg->keys, extractCtrl.staticrank,
1017  0, recordAttr->staticrank);
1018 #else
1019  extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
1020  extractCtrl.staticrank);
1021 #endif
1022  recordAttr->staticrank = extractCtrl.staticrank;
1023  zh->records_inserted++;
1024  }
1025  else
1026  {
1027  /* record already exists */
1030  if (action == action_insert)
1031  {
1032  yaz_log(YLOG_LOG, "skipped %s %s " ZINT_FORMAT,
1033  recordType, pr_fname, (zint) start_offset);
1034  logRecord(zh);
1035  return ZEBRA_FAIL;
1036  }
1037 
1038  rec = rec_get(zh->reg->records, *sysno);
1039  assert(rec);
1040 
1041  if (stream)
1042  {
1043  all_matches_add(&extractCtrl,
1045  *sysno);
1046  }
1047 
1048  recordAttr = rec_init_attr(zh->reg->zei, rec);
1049 
1050  /* decrease total size */
1052  - recordAttr->recordSize);
1053 
1054  zebra_rec_keys_set_buf(delkeys,
1055  rec->info[recInfo_delKeys],
1056  rec->size[recInfo_delKeys],
1057  0);
1058  zebra_rec_keys_set_buf(sortKeys,
1059  rec->info[recInfo_sortKeys],
1060  rec->size[recInfo_sortKeys],
1061  0);
1062 
1063  extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
1064 #if !FLUSH2
1065  extract_flush_record_keys(zh, *sysno, 0, delkeys,
1066  recordAttr->staticrank);
1067 #endif
1068  if (action == action_delete || action == action_a_delete)
1069  {
1070  /* record going to be deleted */
1071 #if FLUSH2
1072  extract_flush_record_keys2(zh, *sysno, 0, recordAttr->staticrank,
1073  delkeys, recordAttr->staticrank);
1074 #endif
1075  if (zebra_rec_keys_empty(delkeys))
1076  {
1077  yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1078  pr_fname, (zint) start_offset);
1079  yaz_log(YLOG_WARN, "cannot delete file above, "
1080  "storeKeys false (3)");
1081  }
1082  else
1083  {
1084  if (show_progress)
1085  yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1086  pr_fname, (zint) start_offset);
1087  zh->records_deleted++;
1088  if (matchStr)
1089  {
1090  int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
1091  dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
1092  }
1093  rec_del(zh->reg->records, &rec);
1094  }
1095  zebra_rec_keys_close(delkeys);
1096  zebra_rec_keys_close(sortKeys);
1097  rec_free(&rec);
1098  logRecord(zh);
1099  return ZEBRA_OK;
1100  }
1101  else
1102  { /* update or special_update */
1103  if (show_progress)
1104  yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
1105  pr_fname, (zint) start_offset);
1106  extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1107 
1108 #if FLUSH2
1109  extract_flush_record_keys2(zh, *sysno,
1110  zh->reg->keys, extractCtrl.staticrank,
1111  delkeys, recordAttr->staticrank);
1112 #else
1113  extract_flush_record_keys(zh, *sysno, 1,
1114  zh->reg->keys, extractCtrl.staticrank);
1115 #endif
1116  recordAttr->staticrank = extractCtrl.staticrank;
1117  zh->records_updated++;
1118  }
1119  zebra_rec_keys_close(delkeys);
1120  zebra_rec_keys_close(sortKeys);
1121  }
1122  /* update file type */
1123  xfree(rec->info[recInfo_fileType]);
1124  rec->info[recInfo_fileType] =
1125  rec_strdup(recordType, &rec->size[recInfo_fileType]);
1126 
1127  /* update filename */
1128  xfree(rec->info[recInfo_filename]);
1129  rec->info[recInfo_filename] =
1130  rec_strdup(fname, &rec->size[recInfo_filename]);
1131 
1132  /* update delete keys */
1133  xfree(rec->info[recInfo_delKeys]);
1134  if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
1135  {
1137  &rec->info[recInfo_delKeys],
1138  &rec->size[recInfo_delKeys]);
1139  }
1140  else
1141  {
1142  rec->info[recInfo_delKeys] = NULL;
1143  rec->size[recInfo_delKeys] = 0;
1144  }
1145  /* update sort keys */
1146  xfree(rec->info[recInfo_sortKeys]);
1147 
1149  &rec->info[recInfo_sortKeys],
1150  &rec->size[recInfo_sortKeys]);
1151 
1152  if (stream)
1153  {
1154  recordAttr->recordSize = end_offset - start_offset;
1156  recordAttr->recordSize);
1157  }
1158 
1159  /* set run-number for this record */
1160  recordAttr->runNumber =
1162 
1163  /* update store data */
1164  xfree(rec->info[recInfo_storeData]);
1165 
1166  /* update store data */
1167  if (zh->store_data_buf)
1168  {
1171  zh->store_data_buf = 0;
1172  recordAttr->recordSize = zh->store_data_size;
1173  }
1174  else if (zh->m_store_data)
1175  {
1176  off_t cur_offset = stream->tellf(stream);
1177 
1178  rec->size[recInfo_storeData] = recordAttr->recordSize;
1179  rec->info[recInfo_storeData] = (char *)
1180  xmalloc(recordAttr->recordSize);
1181  stream->seekf(stream, start_offset);
1182  stream->readf(stream, rec->info[recInfo_storeData],
1183  recordAttr->recordSize);
1184  stream->seekf(stream, cur_offset);
1185  }
1186  else
1187  {
1188  rec->info[recInfo_storeData] = NULL;
1189  rec->size[recInfo_storeData] = 0;
1190  }
1191  /* update database name */
1192  xfree(rec->info[recInfo_databaseName]);
1193  rec->info[recInfo_databaseName] =
1195 
1196  /* update offset */
1197  recordAttr->recordOffset = start_offset;
1198 
1199  /* commit this record */
1200  rec_put(zh->reg->records, &rec);
1201  logRecord(zh);
1202  return ZEBRA_OK;
1203 }
1204 
1218  struct ZebraRecStream *stream,
1220  const char *recordType,
1221  zint *sysno,
1222  const char *match_criteria,
1223  const char *fname,
1224  RecType recType,
1225  void *recTypeClientData)
1226 {
1227  ZEBRA_RES res = ZEBRA_OK;
1228  while (1)
1229  {
1230  int more = 0;
1231  res = zebra_extract_record_stream(zh, stream,
1232  action,
1233  recordType,
1234  sysno,
1235  match_criteria,
1236  fname,
1237  recType, recTypeClientData, &more);
1238  if (!more)
1239  {
1240  res = ZEBRA_OK;
1241  break;
1242  }
1243  if (res != ZEBRA_OK)
1244  break;
1245  if (sysno)
1246  break;
1247  }
1248  return res;
1249 }
1250 
1252 {
1253  ZebraHandle zh = (ZebraHandle) handle;
1254  struct recExtractCtrl extractCtrl;
1255 
1256  if (zebraExplain_curDatabase(zh->reg->zei,
1257  rec->info[recInfo_databaseName]))
1258  {
1259  abort();
1260  if (zebraExplain_newDatabase(zh->reg->zei,
1261  rec->info[recInfo_databaseName], 0))
1262  abort();
1263  }
1264 
1267 
1268  extractCtrl.init = extract_init;
1269  extractCtrl.tokenAdd = extract_token_add;
1270  extractCtrl.schemaAdd = extract_schema_add;
1271  extractCtrl.dh = zh->reg->dh;
1272 
1273  init_extractCtrl(zh, &extractCtrl);
1274 
1275  extractCtrl.flagShowRecords = 0;
1276  extractCtrl.match_criteria[0] = '\0';
1277  extractCtrl.staticrank = 0;
1278  extractCtrl.action = action_update;
1279 
1280  extractCtrl.handle = handle;
1281  extractCtrl.first_record = 1;
1282 
1283  extract_set_store_data_prepare(&extractCtrl);
1284 
1285  if (n)
1286  grs_extract_tree(&extractCtrl, n);
1287 
1288  if (rec->size[recInfo_delKeys])
1289  {
1291 
1293 
1295  rec->size[recInfo_delKeys],
1296  0);
1297 #if FLUSH2
1299  zh->reg->keys, 0, delkeys, 0);
1300 #else
1301  extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0);
1302  extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
1303 #endif
1304  zebra_rec_keys_close(delkeys);
1305 
1307  rec->size[recInfo_sortKeys],
1308  0);
1309 
1310  extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
1311  zebra_rec_keys_close(sortkeys);
1312  }
1313  else
1314  {
1315 #if FLUSH2
1316  extract_flush_record_keys2(zh, rec->sysno, zh->reg->keys, 0, 0, 0);
1317 #else
1318  extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
1319 #endif
1320  }
1321  extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
1322 
1323  xfree(rec->info[recInfo_delKeys]);
1325  &rec->info[recInfo_delKeys],
1326  &rec->size[recInfo_delKeys]);
1327 
1328  xfree(rec->info[recInfo_sortKeys]);
1330  &rec->info[recInfo_sortKeys],
1331  &rec->size[recInfo_sortKeys]);
1332  return ZEBRA_OK;
1333 }
1334 
1336  const char *str, size_t slen, NMEM nmem, int level)
1337 {
1338  char keystr[200]; /* room for zints to print */
1339  char *dst_term = 0;
1340  int ord = CAST_ZINT_TO_INT(key->mem[0]);
1341  const char *index_type;
1342  int i;
1343  const char *string_index;
1344 
1345  zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1346  0/* db */, &string_index);
1347  assert(index_type);
1348  zebra_term_untrans_iconv(zh, nmem, index_type,
1349  &dst_term, str);
1350  *keystr = '\0';
1351  for (i = 0; i < key->len; i++)
1352  {
1353  sprintf(keystr + strlen(keystr), ZINT_FORMAT " ", key->mem[i]);
1354  }
1355 
1356  if (*str < CHR_BASE_CHAR)
1357  {
1358  int i;
1359  char dst_buf[200]; /* room for special chars */
1360 
1361  strcpy(dst_buf , "?");
1362 
1363  if (!strcmp(str, ""))
1364  strcpy(dst_buf, "alwaysmatches");
1365  if (!strcmp(str, FIRST_IN_FIELD_STR))
1366  strcpy(dst_buf, "firstinfield");
1367  else if (!strcmp(str, CHR_UNKNOWN))
1368  strcpy(dst_buf, "unknown");
1369  else if (!strcmp(str, CHR_SPACE))
1370  strcpy(dst_buf, "space");
1371 
1372  for (i = 0; i<slen; i++)
1373  {
1374  sprintf(dst_buf + strlen(dst_buf), " %d", str[i] & 0xff);
1375  }
1376  yaz_log(level, "%s%s %s %s", keystr, index_type,
1377  string_index, dst_buf);
1378 
1379  }
1380  else
1381  yaz_log(level, "%s%s %s \"%s\"", keystr, index_type,
1382  string_index, dst_term);
1383 }
1384 
1386  zebra_rec_keys_t reckeys,
1387  int level)
1388 {
1389  if (zebra_rec_keys_rewind(reckeys))
1390  {
1391  size_t slen;
1392  const char *str;
1393  struct it_key key;
1394  NMEM nmem = nmem_create();
1395 
1396  while(zebra_rec_keys_read(reckeys, &str, &slen, &key))
1397  {
1398  zebra_it_key_str_dump(zh, &key, str, slen, nmem, level);
1399  nmem_reset(nmem);
1400  }
1401  nmem_destroy(nmem);
1402  }
1403 }
1404 
1406  zebra_rec_keys_t reckeys)
1407 {
1408  ZebraExplainInfo zei = zh->reg->zei;
1409  struct ord_stat {
1410  int no;
1411  int ord;
1412  struct ord_stat *next;
1413  };
1414 
1415  if (zebra_rec_keys_rewind(reckeys))
1416  {
1417  struct ord_stat *ord_list = 0;
1418  struct ord_stat *p;
1419  size_t slen;
1420  const char *str;
1421  struct it_key key_in;
1422  while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1423  {
1424  int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1425 
1426  for (p = ord_list; p ; p = p->next)
1427  if (p->ord == ord)
1428  {
1429  p->no++;
1430  break;
1431  }
1432  if (!p)
1433  {
1434  p = xmalloc(sizeof(*p));
1435  p->no = 1;
1436  p->ord = ord;
1437  p->next = ord_list;
1438  ord_list = p;
1439  }
1440  }
1441 
1442  p = ord_list;
1443  while (p)
1444  {
1445  struct ord_stat *p1 = p;
1446 
1447  if (is_insert)
1448  zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
1449  else
1450  zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
1451  p = p->next;
1452  xfree(p1);
1453  }
1454  }
1455 }
1456 
1457 #if FLUSH2
1459  ZebraHandle zh, zint sysno,
1460  zebra_rec_keys_t ins_keys, zint ins_rank,
1461  zebra_rec_keys_t del_keys, zint del_rank)
1462 {
1463  ZebraExplainInfo zei = zh->reg->zei;
1464  int normal = 0;
1465  int optimized = 0;
1466 
1467  if (!zh->reg->key_block)
1468  {
1469  int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
1470  const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
1471  int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
1472  zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1473  }
1474 
1475  if (ins_keys)
1476  {
1477  extract_rec_keys_adjust(zh, 1, ins_keys);
1478  if (!del_keys)
1480  zebra_rec_keys_rewind(ins_keys);
1481  }
1482  if (del_keys)
1483  {
1484  extract_rec_keys_adjust(zh, 0, del_keys);
1485  if (!ins_keys)
1487  zebra_rec_keys_rewind(del_keys);
1488  }
1489 
1490  while (1)
1491  {
1492  size_t del_slen;
1493  const char *del_str;
1494  struct it_key del_key_in;
1495  int del = 0;
1496 
1497  size_t ins_slen;
1498  const char *ins_str;
1499  struct it_key ins_key_in;
1500  int ins = 0;
1501 
1502  if (del_keys)
1503  del = zebra_rec_keys_read(del_keys, &del_str, &del_slen,
1504  &del_key_in);
1505  if (ins_keys)
1506  ins = zebra_rec_keys_read(ins_keys, &ins_str, &ins_slen,
1507  &ins_key_in);
1508 
1509  if (del && ins && ins_rank == del_rank
1510  && !key_compare(&del_key_in, &ins_key_in)
1511  && ins_slen == del_slen && !memcmp(del_str, ins_str, del_slen))
1512  {
1513  optimized++;
1514  continue;
1515  }
1516  if (!del && !ins)
1517  break;
1518 
1519  normal++;
1520  if (del)
1521  key_block_write(zh->reg->key_block, sysno,
1522  &del_key_in, 0, del_str, del_slen,
1523  del_rank, zh->m_staticrank);
1524  if (ins)
1525  key_block_write(zh->reg->key_block, sysno,
1526  &ins_key_in, 1, ins_str, ins_slen,
1527  ins_rank, zh->m_staticrank);
1528  }
1529  yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized);
1530 }
1531 #else
1532 static void extract_flush_record_keys(
1533  ZebraHandle zh, zint sysno, int cmd,
1534  zebra_rec_keys_t reckeys,
1535  zint staticrank)
1536 {
1537  ZebraExplainInfo zei = zh->reg->zei;
1538 
1539  extract_rec_keys_adjust(zh, cmd, reckeys);
1540 
1541  if (log_level_details)
1542  {
1543  yaz_log(log_level_details, "Keys for record " ZINT_FORMAT " %s",
1544  sysno, cmd ? "insert" : "delete");
1545  extract_rec_keys_log(zh, cmd, reckeys, log_level_details);
1546  }
1547 
1548  if (!zh->reg->key_block)
1549  {
1550  int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
1551  const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
1552  int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
1553  zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1554  }
1555  zebraExplain_recordCountIncrement(zei, cmd ? 1 : -1);
1556 
1557 #if 0
1558  yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " cmd=%d", sysno, cmd);
1559  print_rec_keys(zh, reckeys);
1560 #endif
1561  if (zebra_rec_keys_rewind(reckeys))
1562  {
1563  size_t slen;
1564  const char *str;
1565  struct it_key key_in;
1566  while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1567  {
1568  key_block_write(zh->reg->key_block, sysno,
1569  &key_in, cmd, str, slen,
1570  staticrank, zh->m_staticrank);
1571  }
1572  }
1573 }
1574 #endif
1575 
1577  zebra_rec_keys_t reckeys,
1578  zebra_snippets *snippets)
1579 {
1580  NMEM nmem = nmem_create();
1581  if (zebra_rec_keys_rewind(reckeys))
1582  {
1583  const char *str;
1584  size_t slen;
1585  struct it_key key;
1586  while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1587  {
1588  char *dst_term = 0;
1589  int ord;
1590  zint seqno;
1591  const char *index_type;
1592 
1593  assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1594  seqno = key.mem[key.len-1];
1595  ord = CAST_ZINT_TO_INT(key.mem[0]);
1596 
1597  zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1598  0/* db */, 0 /* string_index */);
1599  assert(index_type);
1600  zebra_term_untrans_iconv(zh, nmem, index_type,
1601  &dst_term, str);
1602  zebra_snippets_append(snippets, seqno, 0, ord, dst_term);
1603  nmem_reset(nmem);
1604  }
1605  }
1606  nmem_destroy(nmem);
1607  return ZEBRA_OK;
1608 }
1609 
1611 {
1612  yaz_log(YLOG_LOG, "print_rec_keys");
1613  if (zebra_rec_keys_rewind(reckeys))
1614  {
1615  const char *str;
1616  size_t slen;
1617  struct it_key key;
1618  while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1619  {
1620  char dst_buf[IT_MAX_WORD];
1621  zint seqno;
1622  const char *index_type;
1623  int ord = CAST_ZINT_TO_INT(key.mem[0]);
1624  const char *db = 0;
1625  assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1626 
1627  zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, 0);
1628 
1629  seqno = key.mem[key.len-1];
1630 
1631  zebra_term_untrans(zh, index_type, dst_buf, str);
1632 
1633  yaz_log(YLOG_LOG, "ord=%d seqno=" ZINT_FORMAT
1634  " term=%s", ord, seqno, dst_buf);
1635  }
1636  }
1637 }
1638 
1640  const char *str, int length)
1641 {
1642  struct it_key key;
1643  ZebraHandle zh = p->extractCtrl->handle;
1644  ZebraExplainInfo zei = zh->reg->zei;
1645  int ch, i;
1646 
1647  ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1648  if (ch < 0)
1649  ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1650 
1651  i = 0;
1652  key.mem[i++] = ch;
1653  key.mem[i++] = p->record_id;
1654  key.mem[i++] = p->section_id;
1655 
1656  if (zh->m_segment_indexing)
1657  key.mem[i++] = p->segment;
1658  key.mem[i++] = p->seqno;
1659  key.len = i;
1660 
1661  zebra_rec_keys_write(zh->reg->keys, str, length, &key);
1662 }
1663 
1664 static void extract_add_sort_string(RecWord *p, const char *str, int length)
1665 {
1666  struct it_key key;
1667  ZebraHandle zh = p->extractCtrl->handle;
1668  ZebraExplainInfo zei = zh->reg->zei;
1669  int ch;
1671 
1672  ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1673  if (ch < 0)
1674  ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1675  key.len = 3;
1676  key.mem[0] = ch;
1677  key.mem[1] = p->record_id;
1678  key.mem[2] = p->section_id;
1679 
1680  zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
1681 }
1682 
1684  const char *str, int length)
1685 {
1686  char valz[40];
1687  struct recExtractCtrl *ctrl = p->extractCtrl;
1688 
1689  if (length > sizeof(valz)-1)
1690  length = sizeof(valz)-1;
1691 
1692  memcpy(valz, str, length);
1693  valz[length] = '\0';
1694  ctrl->staticrank = atozint(valz);
1695 }
1696 
1698  const char *string, int length)
1699 {
1700  assert(length > 0);
1701 
1702  if (!p->index_name)
1703  return;
1704  if (log_level_details)
1705  {
1706 
1707  WRBUF w = wrbuf_alloc();
1708 
1709  wrbuf_write_escaped(w, string, length);
1710  yaz_log(log_level_details, "extract_add_string: %s", wrbuf_cstr(w));
1711  wrbuf_destroy(w);
1712  }
1713  if (zebra_maps_is_index(zm))
1714  {
1716  string, length);
1718  {
1719  RecWord word;
1720  memcpy(&word, p, sizeof(word));
1721 
1722  word.seqno = 1;
1724  &word, zinfo_index_category_alwaysmatches, "", 0);
1725  }
1726  }
1727  else if (zebra_maps_is_sort(zm))
1728  {
1729  extract_add_sort_string(p, string, length);
1730  }
1731  else if (zebra_maps_is_staticrank(zm))
1732  {
1733  extract_add_staticrank_string(p, string, length);
1734  }
1735 }
1736 
1738 {
1739  const char *b = p->term_buf;
1740  int remain = p->term_len;
1741  int first = 1;
1742  const char **map = 0;
1743 
1744  if (remain > 0)
1745  map = zebra_maps_input(zm, &b, remain, 0);
1746 
1747  while (map)
1748  {
1749  char buf[IT_MAX_WORD+1];
1750  int i, remain;
1751 
1752  /* Skip spaces */
1753  while (map && *map && **map == *CHR_SPACE)
1754  {
1755  remain = p->term_len - (b - p->term_buf);
1756  if (remain > 0)
1757  map = zebra_maps_input(zm, &b, remain, 0);
1758  else
1759  map = 0;
1760  }
1761  if (!map)
1762  break;
1763  i = 0;
1764  while (map && *map && **map != *CHR_SPACE)
1765  {
1766  const char *cp = *map;
1767 
1768  while (i < IT_MAX_WORD && *cp)
1769  buf[i++] = *(cp++);
1770  remain = p->term_len - (b - p->term_buf);
1771  if (remain > 0)
1772  map = zebra_maps_input(zm, &b, remain, 0);
1773  else
1774  map = 0;
1775  }
1776  if (!i)
1777  return;
1778 
1779  if (first)
1780  {
1781  first = 0;
1783  {
1784  /* first in field marker */
1786  p->seqno++;
1787  }
1788  }
1789  extract_add_string(p, zm, buf, i);
1790  p->seqno++;
1791  }
1792 }
1793 
1795 {
1796  char buf[IT_MAX_WORD+1];
1797  int i = parse_complete_field(p, zm, buf);
1798  if (!i)
1799  return;
1800  extract_add_string(p, zm, buf, i);
1801  p->seqno++;
1802 }
1803 
1805 {
1806  const char *res_buf = 0;
1807  size_t res_len = 0;
1808 
1810  while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
1811  {
1812  if (res_len > IT_MAX_WORD)
1813  {
1814  yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len);
1815  res_len = IT_MAX_WORD;
1816  }
1817  extract_add_string(p, zm, res_buf, res_len);
1818  p->seqno++;
1819  }
1820 }
1821 
1822 
1839 {
1840  ZebraHandle zh = p->extractCtrl->handle;
1842 
1843  if (log_level_details)
1844  {
1845  yaz_log(log_level_details, "extract_token_add "
1846  "type=%s index=%s seqno=" ZINT_FORMAT " s=%.*s",
1847  p->index_type, p->index_name,
1848  p->seqno, p->term_len, p->term_buf);
1849  }
1850  if (zebra_maps_is_icu(zm))
1851  {
1852  extract_add_icu(p, zm);
1853  }
1854  else
1855  {
1856  if (zebra_maps_is_complete(zm))
1858  else
1860  }
1861 }
1862 
1864  void *buf, size_t sz)
1865 {
1866  ZebraHandle zh = (ZebraHandle) p->handle;
1867 
1868  xfree(zh->store_data_buf);
1869  zh->store_data_buf = 0;
1870  zh->store_data_size = 0;
1871  if (buf && sz)
1872  {
1873  zh->store_data_buf = xmalloc(sz);
1874  zh->store_data_size = sz;
1875  memcpy(zh->store_data_buf, buf, sz);
1876  }
1877 }
1878 
1880 {
1881  ZebraHandle zh = (ZebraHandle) p->handle;
1882  xfree(zh->store_data_buf);
1883  zh->store_data_buf = 0;
1884  zh->store_data_size = 0;
1886 }
1887 
1888 static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
1889 {
1890  ZebraHandle zh = (ZebraHandle) p->handle;
1891  zebraExplain_addSchema(zh->reg->zei, oid);
1892 }
1893 
1895  int cmd, zebra_rec_keys_t reckeys)
1896 {
1897 #if 0
1898  yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
1899  cmd, sysno);
1900  extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
1901 #endif
1902 
1903  if (zebra_rec_keys_rewind(reckeys))
1904  {
1905  zebra_sort_index_t si = zh->reg->sort_index;
1906  size_t slen;
1907  const char *str;
1908  struct it_key key_in;
1909 
1910  NMEM nmem = nmem_create();
1911  struct sort_add_ent {
1912  int ord;
1913  int cmd;
1914  struct sort_add_ent *next;
1915  WRBUF wrbuf;
1916  zint sysno;
1917  zint section_id;
1918  };
1919  struct sort_add_ent *sort_ent_list = 0;
1920 
1921  while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1922  {
1923  int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1924  zint filter_sysno = key_in.mem[1];
1925  zint section_id = key_in.mem[2];
1926 
1927  struct sort_add_ent **e = &sort_ent_list;
1928  for (; *e; e = &(*e)->next)
1929  if ((*e)->ord == ord && section_id == (*e)->section_id)
1930  break;
1931  if (!*e)
1932  {
1933  *e = nmem_malloc(nmem, sizeof(**e));
1934  (*e)->next = 0;
1935  (*e)->wrbuf = wrbuf_alloc();
1936  (*e)->ord = ord;
1937  (*e)->cmd = cmd;
1938  (*e)->sysno = filter_sysno ? filter_sysno : sysno;
1939  (*e)->section_id = section_id;
1940  }
1941 
1942  wrbuf_write((*e)->wrbuf, str, slen);
1943  wrbuf_putc((*e)->wrbuf, '\0');
1944  }
1945  if (sort_ent_list)
1946  {
1947  zint last_sysno = 0;
1948  struct sort_add_ent *e = sort_ent_list;
1949  for (; e; e = e->next)
1950  {
1951  if (last_sysno != e->sysno)
1952  {
1953  zebra_sort_sysno(si, e->sysno);
1954  last_sysno = e->sysno;
1955  }
1956  zebra_sort_type(si, e->ord);
1957  if (e->cmd == 1)
1958  zebra_sort_add(si, e->section_id, e->wrbuf);
1959  else
1960  zebra_sort_delete(si, e->section_id);
1961  wrbuf_destroy(e->wrbuf);
1962  }
1963  }
1964  nmem_destroy(nmem);
1965  }
1966 }
1967 
1968 /*
1969  * Local variables:
1970  * c-basic-offset: 4
1971  * c-file-style: "Stroustrup"
1972  * indent-tabs-mode: nil
1973  * End:
1974  * vim: shiftwidth=4 tabstop=8 expandtab
1975  */
1976