IDZEBRA  2.1.2
extract.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 #include <stdio.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #ifdef WIN32
31 #include <io.h>
32 #endif
33 #if HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36 #include <fcntl.h>
37 
38 
39 #include "index.h"
40 #include "orddict.h"
41 #include <direntz.h>
42 #include <charmap.h>
43 #include <yaz/snprintf.h>
44 
45 static int log_level_extract = 0;
46 static int log_level_details = 0;
47 static int log_level_initialized = 0;
48 
49 /* 1 if we use eliminitate identical delete/insert keys */
50 /* eventually this the 0-case code will be removed */
51 #define FLUSH2 1
52 
53 #if FLUSH2
55  zebra_rec_keys_t ins_keys,
56  zint ins_rank,
57  zebra_rec_keys_t del_keys,
58  zint del_rank);
59 #else
60 static void extract_flush_record_keys(ZebraHandle zh, zint sysno,
61  int cmd,
62  zebra_rec_keys_t reckeys,
63  zint staticrank);
64 #endif
65 
66 static void zebra_init_log_level(void)
67 {
69  {
71 
72  log_level_extract = yaz_log_module_level("extract");
73  log_level_details = yaz_log_module_level("indexdetails");
74  }
75 }
76 
77 static WRBUF wrbuf_hex_str(const char *cstr)
78 {
79  size_t i;
80  WRBUF w = wrbuf_alloc();
81  for (i = 0; cstr[i]; i++)
82  {
83  if (cstr[i] < ' ' || cstr[i] > 126)
84  wrbuf_printf(w, "\\%02X", cstr[i] & 0xff);
85  else
86  wrbuf_putc(w, cstr[i]);
87  }
88  return w;
89 }
90 
91 
92 static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
93  int cmd, zebra_rec_keys_t skp);
94 static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
95 static void extract_token_add(RecWord *p);
96 
98 {
100  {
101  yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest",
103  }
104 }
105 
106 static void logRecord(ZebraHandle zh)
107 {
108  check_log_limit(zh);
109  ++zh->records_processed;
110  if (!(zh->records_processed % 1000))
111  {
112  yaz_log(YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d "
116  }
117 }
118 
119 static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
120 {
121  ctrl->flagShowRecords = !zh->m_flag_rw;
122 }
123 
124 
125 static void extract_add_index_string(RecWord *p,
127  const char *str, int length);
128 
129 static void extract_set_store_data_prepare(struct recExtractCtrl *p);
130 
131 static void extract_init(struct recExtractCtrl *p, RecWord *w)
132 {
133  w->seqno = 1;
134  w->index_name = "any";
135  w->index_type = "w";
136  w->extractCtrl = p;
137  w->record_id = 0;
138  w->section_id = 0;
139  w->segment = 0;
140 }
141 
145 };
146 
148  char *buf)
149 {
150  const char *b = p->term_buf;
151  const char **map = 0;
152  int i = 0, remain = p->term_len;
153 
154  if (remain > 0)
155  map = zebra_maps_input(zm, &b, remain, 1);
156  while (remain > 0 && i < IT_MAX_WORD)
157  {
158  while (map && *map && **map == *CHR_SPACE)
159  {
160  remain = p->term_len - (b - p->term_buf);
161 
162  if (remain > 0)
163  {
164  int first = i ? 0 : 1; /* first position */
165  map = zebra_maps_input(zm, &b, remain, first);
166  }
167  else
168  map = 0;
169  }
170  if (!map)
171  break;
172 
173  if (i && i < IT_MAX_WORD)
174  buf[i++] = *CHR_SPACE;
175  while (map && *map && **map != *CHR_SPACE)
176  {
177  const char *cp = *map;
178 
179  if (**map == *CHR_CUT)
180  {
181  i = 0;
182  }
183  else
184  {
185  if (i >= IT_MAX_WORD)
186  break;
187  while (i < IT_MAX_WORD && *cp)
188  buf[i++] = *(cp++);
189  }
190  remain = p->term_len - (b - p->term_buf);
191  if (remain > 0)
192  {
193  map = zebra_maps_input(zm, &b, remain, 0);
194  }
195  else
196  map = 0;
197  }
198  }
199  return i;
200 }
201 
202 static void snippet_add_complete_field(RecWord *p, int ord,
203  zebra_map_t zm)
204 {
205  struct snip_rec_info *h = p->extractCtrl->handle;
206  char buf[IT_MAX_WORD+1];
207  int i = parse_complete_field(p, zm, buf);
208 
209  if (!i)
210  return;
211 
212  if (p->term_len && p->term_buf && zebra_maps_is_index(zm))
213  zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
214  p->term_buf, p->term_len);
215  p->seqno++;
216 }
217 
219 {
220  struct snip_rec_info *h = p->extractCtrl->handle;
221  const char *b = p->term_buf;
222  int remain = p->term_len;
223  int first = 1;
224  const char **map = 0;
225  const char *start = b;
226  const char *last = b;
227 
228  if (remain > 0)
229  map = zebra_maps_input(zm, &b, remain, 0);
230 
231  while (map)
232  {
233  int remain;
234 
235  /* Skip spaces */
236  while (map && *map && **map == *CHR_SPACE)
237  {
238  remain = p->term_len - (b - p->term_buf);
239  last = b;
240  if (remain > 0)
241  map = zebra_maps_input(zm, &b, remain, 0);
242  else
243  map = 0;
244  }
245  if (!map)
246  break;
247  if (start != last && zebra_maps_is_index(zm))
248  {
249  zebra_snippets_appendn(h->snippets, p->seqno, 1, ord,
250  start, last - start);
251  }
252  start = last;
253  while (map && *map && **map != *CHR_SPACE)
254  {
255  remain = p->term_len - (b - p->term_buf);
256  last = b;
257  if (remain > 0)
258  map = zebra_maps_input(zm, &b, remain, 0);
259  else
260  map = 0;
261  }
262  if (start == last)
263  return ;
264 
265  if (first)
266  {
267  first = 0;
269  {
270  /* first in field marker */
271  p->seqno++;
272  }
273  }
274  if (start != last && zebra_maps_is_index(zm))
275  zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
276  start, last - start);
277  start = last;
278  p->seqno++;
279  }
280 
281 }
282 
283 static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm)
284 {
285  struct snip_rec_info *h = p->extractCtrl->handle;
286 
287  const char *res_buf = 0;
288  size_t res_len = 0;
289 
290  const char *display_buf = 0;
291  size_t display_len = 0;
292 
294  while (zebra_map_tokenize_next(zm, &res_buf, &res_len,
295  &display_buf, &display_len))
296  {
297  if (zebra_maps_is_index(zm))
298  zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
299  display_buf, display_len);
300  p->seqno++;
301  }
302 }
303 
304 static void snippet_token_add(RecWord *p)
305 {
306  struct snip_rec_info *h = p->extractCtrl->handle;
307  ZebraHandle zh = h->zh;
309 
310  if (zm)
311  {
312  ZebraExplainInfo zei = zh->reg->zei;
315 
316  if (zebra_maps_is_icu(zm))
317  snippet_add_icu(p, ch, zm);
318  else
319  {
320  if (zebra_maps_is_complete(zm))
321  snippet_add_complete_field(p, ch, zm);
322  else
323  snippet_add_incomplete_field(p, ch, zm);
324  }
325  }
326 }
327 
328 static void snippet_schema_add(
329  struct recExtractCtrl *p, Odr_oid *oid)
330 {
331 
332 }
333 
335  struct ZebraRecStream *stream,
336  RecType rt, void *recTypeClientData)
337 {
338  struct recExtractCtrl extractCtrl;
339  struct snip_rec_info info;
340 
341  extractCtrl.stream = stream;
342  extractCtrl.first_record = 1;
343  extractCtrl.init = extract_init;
344  extractCtrl.tokenAdd = snippet_token_add;
345  extractCtrl.schemaAdd = snippet_schema_add;
346  assert(zh->reg);
347  assert(zh->reg->dh);
348 
349  extractCtrl.dh = zh->reg->dh;
350 
351  info.zh = zh;
352  info.snippets = sn;
353  extractCtrl.handle = &info;
354  extractCtrl.match_criteria[0] = '\0';
355  extractCtrl.staticrank = 0;
356  extractCtrl.action = action_insert;
357 
358  init_extractCtrl(zh, &extractCtrl);
359 
360  extractCtrl.setStoreData = 0;
361 
362  (*rt->extract)(recTypeClientData, &extractCtrl);
363 }
364 
366  zebra_rec_keys_t reckeys,
367  const char *index_name,
368  const char **ws, int ws_length)
369 {
370  int i;
371  int ch = -1;
373 
374  for (i = 0; i<ws_length; i++)
375  ws[i] = NULL;
376 
377  if (ch < 0)
378  ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "0", index_name);
379  if (ch < 0)
380  ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "p", index_name);
381  if (ch < 0)
382  ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, "w", index_name);
383 
384  if (ch < 0)
385  return ;
386 
387  if (zebra_rec_keys_rewind(reckeys))
388  {
389  zint startSeq = -1;
390  const char *str;
391  size_t slen;
392  struct it_key key;
393  zint seqno;
394  while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
395  {
396  assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
397 
398  seqno = key.mem[key.len-1];
399 
400  if (key.mem[0] == ch)
401  {
402  zint woff;
403 
404  if (startSeq == -1)
405  startSeq = seqno;
406  woff = seqno - startSeq;
407  if (woff >= 0 && woff < ws_length)
408  ws[woff] = str;
409  }
410  }
411  }
412 }
413 
414 #define FILE_MATCH_BLANK "\t "
415 
417  zebra_rec_keys_t reckeys,
418  const char *fname, const char *spec)
419 {
420  static char dstBuf[2048]; /* static here ??? */
421  char *dst = dstBuf;
422  const char *s = spec;
423 
424  while (1)
425  {
426  for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
427  ;
428  if (!*s)
429  break;
430  if (*s == '(')
431  {
432  const char *ws[32];
433  char attset_str[64], attname_str[64];
434  int i;
435  int first = 1;
436 
437  for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
438  ;
439  for (i = 0; *s && *s != ',' && *s != ')' &&
440  !strchr(FILE_MATCH_BLANK, *s); s++)
441  if (i+1 < sizeof(attset_str))
442  attset_str[i++] = *s;
443  attset_str[i] = '\0';
444 
445  for (; strchr(FILE_MATCH_BLANK, *s); s++)
446  ;
447  if (*s != ',')
448  strcpy(attname_str, attset_str);
449  else
450  {
451  for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
452  ;
453  for (i = 0; *s && *s != ')' &&
454  !strchr(FILE_MATCH_BLANK, *s); s++)
455  if (i+1 < sizeof(attname_str))
456  attname_str[i++] = *s;
457  attname_str[i] = '\0';
458  }
459  if (*s != ')')
460  {
461  yaz_log(YLOG_WARN, "Missing ) in match criteria %s in group %s",
462  spec, zh->m_group ? zh->m_group : "none");
463  return NULL;
464  }
465  s++;
466 
467  searchRecordKey(zh, reckeys, attname_str, ws, 32);
468  if (0) /* for debugging */
469  {
470  for (i = 0; i<32; i++)
471  {
472  if (ws[i])
473  {
474  WRBUF w = wrbuf_hex_str(ws[i]);
475  yaz_log(YLOG_LOG, "ws[%d] = %s", i, wrbuf_cstr(w));
476  wrbuf_destroy(w);
477  }
478  }
479  }
480 
481  for (i = 0; i<32; i++)
482  if (ws[i])
483  {
484  if (first)
485  {
486  *dst++ = ' ';
487  first = 0;
488  }
489  strcpy(dst, ws[i]);
490  dst += strlen(ws[i]);
491  }
492  if (first)
493  {
494  yaz_log(YLOG_WARN, "Record didn't contain match"
495  " fields in (%s,%s)", attset_str, attname_str);
496  return NULL;
497  }
498  }
499  else if (*s == '$')
500  {
501  int spec_len;
502  char special[64];
503  const char *spec_src = NULL;
504  const char *s1 = ++s;
505  while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
506  s1++;
507 
508  spec_len = s1 - s;
509  if (spec_len > sizeof(special)-1)
510  spec_len = sizeof(special)-1;
511  memcpy(special, s, spec_len);
512  special[spec_len] = '\0';
513  s = s1;
514 
515  if (!strcmp(special, "group"))
516  spec_src = zh->m_group;
517  else if (!strcmp(special, "database"))
518  spec_src = zh->basenames[0];
519  else if (!strcmp(special, "filename")) {
520  spec_src = fname;
521  }
522  else if (!strcmp(special, "type"))
523  spec_src = zh->m_record_type;
524  else
525  spec_src = NULL;
526  if (spec_src)
527  {
528  strcpy(dst, spec_src);
529  dst += strlen(spec_src);
530  }
531  }
532  else if (*s == '\"' || *s == '\'')
533  {
534  int stopMarker = *s++;
535  char tmpString[64];
536  int i = 0;
537 
538  while (*s && *s != stopMarker)
539  {
540  if (i+1 < sizeof(tmpString))
541  tmpString[i++] = *s++;
542  }
543  if (*s)
544  s++;
545  tmpString[i] = '\0';
546  strcpy(dst, tmpString);
547  dst += strlen(tmpString);
548  }
549  else
550  {
551  yaz_log(YLOG_WARN, "Syntax error in match criteria %s in group %s",
552  spec, zh->m_group ? zh->m_group : "none");
553  return NULL;
554  }
555  *dst++ = 1;
556  }
557  if (dst == dstBuf)
558  {
559  yaz_log(YLOG_WARN, "No match criteria for record %s in group %s",
560  fname, zh->m_group ? zh->m_group : "none");
561  return NULL;
562  }
563  *dst = '\0';
564 
565  if (0) /* for debugging */
566  {
567  WRBUF w = wrbuf_hex_str(dstBuf);
568  yaz_log(YLOG_LOG, "get_match_from_spec %s", wrbuf_cstr(w));
569  wrbuf_destroy(w);
570  }
571 
572  return dstBuf;
573 }
574 
576  const char *fname;
578  struct recordGroup *rGroup;
579 };
580 
591 static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id,
592  zint sysno)
593 {
594  RecWord word;
595  extract_init(ctrl, &word);
596  word.record_id = record_id;
597  /* we use the seqno as placeholder for a way to get back to
598  record database from _ALLRECORDS.. This is used if a custom
599  RECORD was defined */
600  word.seqno = sysno;
601  word.index_name = "_ALLRECORDS";
602  word.index_type = "w";
603 
605  "", 0);
606 }
607 
608 /* forward declaration */
610  struct ZebraRecStream *stream,
611  enum zebra_recctrl_action_t action,
612  const char *recordType,
613  zint *sysno,
614  const char *match_criteria,
615  const char *fname,
617  void *recTypeClientData);
618 
619 
620 ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
621  enum zebra_recctrl_action_t action)
622 {
623  ZEBRA_RES r = ZEBRA_OK;
624  int i, fd;
625  char gprefix[128];
626  char ext[128];
627  char ext_res[128];
628  const char *original_record_type = 0;
629  RecType recType;
630  void *recTypeClientData;
631  struct ZebraRecStream stream, *streamp;
632 
634 
635  if (!zh->m_group || !*zh->m_group)
636  *gprefix = '\0';
637  else
638  sprintf(gprefix, "%s.", zh->m_group);
639 
640  yaz_log(log_level_extract, "zebra_extract_file %s", fname);
641 
642  /* determine file extension */
643  *ext = '\0';
644  for (i = strlen(fname); --i >= 0; )
645  if (fname[i] == '/')
646  break;
647  else if (fname[i] == '.')
648  {
649  strcpy(ext, fname+i+1);
650  break;
651  }
652  /* determine file type - depending on extension */
653  original_record_type = zh->m_record_type;
654  if (!zh->m_record_type)
655  {
656  sprintf(ext_res, "%srecordType.%s", gprefix, ext);
657  zh->m_record_type = res_get(zh->res, ext_res);
658  }
659  if (!zh->m_record_type)
660  {
661  check_log_limit(zh);
662  if (zh->records_processed + zh->records_skipped
663  < zh->m_file_verbose_limit)
664  yaz_log(YLOG_LOG, "? %s", fname);
665  zh->records_skipped++;
666  return 0;
667  }
668  /* determine match criteria */
669  if (!zh->m_record_id)
670  {
671  sprintf(ext_res, "%srecordId.%s", gprefix, ext);
672  zh->m_record_id = res_get(zh->res, ext_res);
673  }
674 
675  if (!(recType =
677  &recTypeClientData)))
678  {
679  yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
680  return ZEBRA_FAIL;
681  }
682 
683  switch(recType->version)
684  {
685  case 0:
686  break;
687  default:
688  yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
689  }
690  if (sysno && (action == action_delete || action == action_a_delete))
691  {
692  streamp = 0;
693  }
694  else
695  {
696  char full_rep[1024];
697 
698  if (zh->path_reg && !yaz_is_abspath(fname))
699  {
700  strcpy(full_rep, zh->path_reg);
701  strcat(full_rep, "/");
702  strcat(full_rep, fname);
703  }
704  else
705  strcpy(full_rep, fname);
706 
707  if ((fd = open(full_rep, O_BINARY|O_RDONLY)) == -1)
708  {
709  yaz_log(YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
710  zh->m_record_type = original_record_type;
711  return ZEBRA_FAIL;
712  }
713  streamp = &stream;
714  zebra_create_stream_fd(streamp, fd, 0);
715  }
716  r = zebra_extract_records_stream(zh, streamp,
717  action,
718  zh->m_record_type,
719  sysno,
720  0, /*match_criteria */
721  fname,
722  recType, recTypeClientData);
723  if (streamp)
724  stream.destroy(streamp);
725  zh->m_record_type = original_record_type;
726  return r;
727 }
728 
729 /*
730  If sysno is provided, then it's used to identify the reocord.
731  If not, and match_criteria is provided, then sysno is guessed
732  If not, and a record is provided, then sysno is got from there
733 
734  */
735 
737  const char *buf, size_t buf_size,
738  enum zebra_recctrl_action_t action,
739  const char *recordType,
740  zint *sysno,
741  const char *match_criteria,
742  const char *fname)
743 {
744  struct ZebraRecStream stream;
745  ZEBRA_RES res;
746  void *clientData;
747  RecType recType = 0;
748 
749  if (recordType && *recordType)
750  {
751  yaz_log(log_level_extract,
752  "Record type explicitly specified: %s", recordType);
753  recType = recType_byName(zh->reg->recTypes, zh->res, recordType,
754  &clientData);
755  }
756  else
757  {
758  if (!(zh->m_record_type))
759  {
760  yaz_log(YLOG_WARN, "No such record type defined");
761  return ZEBRA_FAIL;
762  }
763  yaz_log(log_level_extract, "Get record type from rgroup: %s",
764  zh->m_record_type);
765  recType = recType_byName(zh->reg->recTypes, zh->res,
766  zh->m_record_type, &clientData);
767  recordType = zh->m_record_type;
768  }
769 
770  if (!recType)
771  {
772  yaz_log(YLOG_WARN, "No such record type: %s", recordType);
773  return ZEBRA_FAIL;
774  }
775 
776  zebra_create_stream_mem(&stream, buf, buf_size);
777 
778  res = zebra_extract_records_stream(zh, &stream,
779  action,
780  recordType,
781  sysno,
782  match_criteria,
783  fname,
784  recType, clientData);
785  stream.destroy(&stream);
786  return res;
787 }
788 
790  struct ZebraRecStream *stream,
791  enum zebra_recctrl_action_t action,
792  const char *recordType,
793  zint *sysno,
794  const char *match_criteria,
795  const char *fname,
796  RecType recType,
797  void *recTypeClientData,
798  int *more)
799 
800 {
801  zint sysno0 = 0;
802  RecordAttr *recordAttr;
803  struct recExtractCtrl extractCtrl;
804  int r;
805  const char *matchStr = 0;
806  Record rec;
807  off_t start_offset = 0, end_offset = 0;
808  const char *pr_fname = fname; /* filename to print .. */
809  int show_progress = zh->records_processed + zh->records_skipped
810  < zh->m_file_verbose_limit ? 1:0;
811 
813 
814  if (!pr_fname)
815  pr_fname = "<no file>"; /* make it printable if file is omitted */
816 
819 
820  if (zebraExplain_curDatabase(zh->reg->zei, zh->basenames[0]))
821  {
822  if (zebraExplain_newDatabase(zh->reg->zei, zh->basenames[0],
823  zh->m_explain_database))
824  return ZEBRA_FAIL;
825  }
826 
827  if (stream)
828  {
829  off_t null_offset = 0;
830  extractCtrl.stream = stream;
831 
832  start_offset = stream->tellf(stream);
833 
834  extractCtrl.first_record = start_offset ? 0 : 1;
835 
836  stream->endf(stream, &null_offset);;
837 
838  extractCtrl.init = extract_init;
839  extractCtrl.tokenAdd = extract_token_add;
840  extractCtrl.schemaAdd = extract_schema_add;
841  extractCtrl.dh = zh->reg->dh;
842  extractCtrl.handle = zh;
843  extractCtrl.match_criteria[0] = '\0';
844  extractCtrl.staticrank = 0;
845  extractCtrl.action = action;
846 
847  init_extractCtrl(zh, &extractCtrl);
848 
849  extract_set_store_data_prepare(&extractCtrl);
850 
851  r = (*recType->extract)(recTypeClientData, &extractCtrl);
852 
853  if (action == action_update)
854  {
855  action = extractCtrl.action;
856  }
857 
858  switch (r)
859  {
860  case RECCTRL_EXTRACT_EOF:
861  return ZEBRA_FAIL;
863  /* error occured during extraction ... */
864  yaz_log(YLOG_WARN, "extract error: generic");
865  return ZEBRA_FAIL;
867  /* error occured during extraction ... */
868  yaz_log(YLOG_WARN, "extract error: no such filter");
869  return ZEBRA_FAIL;
871  if (show_progress)
872  yaz_log(YLOG_LOG, "skip %s %s " ZINT_FORMAT,
873  recordType, pr_fname, (zint) start_offset);
874  *more = 1;
875 
876  end_offset = stream->endf(stream, 0);
877  if (end_offset)
878  stream->seekf(stream, end_offset);
879 
880  return ZEBRA_OK;
881  case RECCTRL_EXTRACT_OK:
882  break;
883  default:
884  yaz_log(YLOG_WARN, "extract error: unknown error: %d", r);
885  return ZEBRA_FAIL;
886  }
887  end_offset = stream->endf(stream, 0);
888  if (end_offset)
889  stream->seekf(stream, end_offset);
890  else
891  end_offset = stream->tellf(stream);
892 
893  if (extractCtrl.match_criteria[0])
894  match_criteria = extractCtrl.match_criteria;
895  }
896 
897  *more = 1;
898 
899  if (zh->m_flag_rw == 0)
900  {
901  yaz_log(YLOG_LOG, "test %s %s " ZINT_FORMAT, recordType,
902  pr_fname, (zint) start_offset);
903  /* test mode .. Do not perform match */
904  return ZEBRA_OK;
905  }
906 
907  if (!sysno)
908  {
909  sysno = &sysno0;
910 
911  if (match_criteria && *match_criteria)
912  matchStr = match_criteria;
913  else
914  {
915  if (zh->m_record_id && *zh->m_record_id)
916  {
917  matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname,
918  zh->m_record_id);
919  if (!matchStr)
920  {
921  yaz_log(YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType,
922  pr_fname, (zint) start_offset);
923  return ZEBRA_FAIL;
924  }
925  if (0 && matchStr)
926  {
927  WRBUF w = wrbuf_alloc();
928  size_t i;
929  for (i = 0; i < strlen(matchStr); i++)
930  {
931  wrbuf_printf(w, "%02X", matchStr[i] & 0xff);
932  }
933  yaz_log(YLOG_LOG, "Got match %s", wrbuf_cstr(w));
934  wrbuf_destroy(w);
935  }
936  }
937  }
938  if (matchStr)
939  {
940  int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
941  char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
942  matchStr);
943 
944 
945  if (log_level_extract)
946  {
947  WRBUF w = wrbuf_hex_str(matchStr);
948  yaz_log(log_level_extract, "matchStr: %s", wrbuf_cstr(w));
949  wrbuf_destroy(w);
950  }
951  if (rinfo)
952  {
953  assert(*rinfo == sizeof(*sysno));
954  memcpy(sysno, rinfo+1, sizeof(*sysno));
955  }
956  }
957  }
958 
959  if (! *sysno)
960  {
961  /* new record AKA does not exist already */
962  if (action == action_delete)
963  {
964  yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
965  pr_fname, (zint) start_offset);
966  yaz_log(YLOG_WARN, "cannot delete record above (seems new)");
967  return ZEBRA_FAIL;
968  }
969  else if (action == action_a_delete)
970  {
971  if (show_progress)
972  yaz_log(YLOG_LOG, "adelete %s %s " ZINT_FORMAT, recordType,
973  pr_fname, (zint) start_offset);
974  return ZEBRA_OK;
975  }
976  else if (action == action_replace)
977  {
978  yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
979  pr_fname, (zint) start_offset);
980  yaz_log(YLOG_WARN, "cannot update record above (seems new)");
981  return ZEBRA_FAIL;
982  }
983  if (show_progress)
984  yaz_log(YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
985  (zint) start_offset);
986  rec = rec_new(zh->reg->records);
987 
988  *sysno = rec->sysno;
989 
990 
991  if (stream)
992  {
993  all_matches_add(&extractCtrl,
995  *sysno);
996  }
997 
998 
999  recordAttr = rec_init_attr(zh->reg->zei, rec);
1000  if (extractCtrl.staticrank < 0)
1001  {
1002  yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0");
1003  extractCtrl.staticrank = 0;
1004  }
1005 
1006  if (matchStr)
1007  {
1008  int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
1009  dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
1010  sizeof(*sysno), sysno);
1011  }
1012 
1013  extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1014 #if FLUSH2
1015  extract_flush_record_keys2(zh, *sysno,
1016  zh->reg->keys, extractCtrl.staticrank,
1017  0, recordAttr->staticrank);
1018 #else
1019  extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
1020  extractCtrl.staticrank);
1021 #endif
1022  recordAttr->staticrank = extractCtrl.staticrank;
1023  zh->records_inserted++;
1024  }
1025  else
1026  {
1027  /* record already exists */
1030  if (action == action_insert)
1031  {
1032  yaz_log(YLOG_LOG, "skipped %s %s " ZINT_FORMAT,
1033  recordType, pr_fname, (zint) start_offset);
1034  logRecord(zh);
1035  return ZEBRA_FAIL;
1036  }
1037 
1038  rec = rec_get(zh->reg->records, *sysno);
1039  assert(rec);
1040 
1041  if (stream)
1042  {
1043  all_matches_add(&extractCtrl,
1045  *sysno);
1046  }
1047 
1048  recordAttr = rec_init_attr(zh->reg->zei, rec);
1049 
1050  /* decrease total size */
1052  - recordAttr->recordSize);
1053 
1054  zebra_rec_keys_set_buf(delkeys,
1055  rec->info[recInfo_delKeys],
1056  rec->size[recInfo_delKeys],
1057  0);
1058  zebra_rec_keys_set_buf(sortKeys,
1059  rec->info[recInfo_sortKeys],
1060  rec->size[recInfo_sortKeys],
1061  0);
1062 
1063  extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
1064 #if !FLUSH2
1065  extract_flush_record_keys(zh, *sysno, 0, delkeys,
1066  recordAttr->staticrank);
1067 #endif
1068  if (action == action_delete || action == action_a_delete)
1069  {
1070  /* record going to be deleted */
1071 #if FLUSH2
1072  extract_flush_record_keys2(zh, *sysno, 0, recordAttr->staticrank,
1073  delkeys, recordAttr->staticrank);
1074 #endif
1075  if (zebra_rec_keys_empty(delkeys))
1076  {
1077  yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1078  pr_fname, (zint) start_offset);
1079  yaz_log(YLOG_WARN, "cannot delete file above, "
1080  "storeKeys false (3)");
1081  }
1082  else
1083  {
1084  if (show_progress)
1085  yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1086  pr_fname, (zint) start_offset);
1087  zh->records_deleted++;
1088  if (matchStr)
1089  {
1090  int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
1091  dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
1092  }
1093  rec_del(zh->reg->records, &rec);
1094  }
1095  zebra_rec_keys_close(delkeys);
1096  zebra_rec_keys_close(sortKeys);
1097  rec_free(&rec);
1098  logRecord(zh);
1099  return ZEBRA_OK;
1100  }
1101  else
1102  { /* update or special_update */
1103  if (show_progress)
1104  yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
1105  pr_fname, (zint) start_offset);
1106  extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1107 
1108 #if FLUSH2
1109  extract_flush_record_keys2(zh, *sysno,
1110  zh->reg->keys, extractCtrl.staticrank,
1111  delkeys, recordAttr->staticrank);
1112 #else
1113  extract_flush_record_keys(zh, *sysno, 1,
1114  zh->reg->keys, extractCtrl.staticrank);
1115 #endif
1116  recordAttr->staticrank = extractCtrl.staticrank;
1117  zh->records_updated++;
1118  }
1119  zebra_rec_keys_close(delkeys);
1120  zebra_rec_keys_close(sortKeys);
1121  }
1122  /* update file type */
1123  xfree(rec->info[recInfo_fileType]);
1124  rec->info[recInfo_fileType] =
1125  rec_strdup(recordType, &rec->size[recInfo_fileType]);
1126 
1127  /* update filename */
1128  xfree(rec->info[recInfo_filename]);
1129  rec->info[recInfo_filename] =
1130  rec_strdup(fname, &rec->size[recInfo_filename]);
1131 
1132  /* update delete keys */
1133  xfree(rec->info[recInfo_delKeys]);
1134  if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
1135  {
1137  &rec->info[recInfo_delKeys],
1138  &rec->size[recInfo_delKeys]);
1139  }
1140  else
1141  {
1142  rec->info[recInfo_delKeys] = NULL;
1143  rec->size[recInfo_delKeys] = 0;
1144  }
1145  /* update sort keys */
1146  xfree(rec->info[recInfo_sortKeys]);
1147 
1149  &rec->info[recInfo_sortKeys],
1150  &rec->size[recInfo_sortKeys]);
1151 
1152  if (stream)
1153  {
1154  recordAttr->recordSize = end_offset - start_offset;
1156  recordAttr->recordSize);
1157  }
1158 
1159  /* set run-number for this record */
1160  recordAttr->runNumber =
1162 
1163  /* update store data */
1164  xfree(rec->info[recInfo_storeData]);
1165 
1166  /* update store data */
1167  if (zh->store_data_buf)
1168  {
1171  zh->store_data_buf = 0;
1172  recordAttr->recordSize = zh->store_data_size;
1173  }
1174  else if (zh->m_store_data)
1175  {
1176  off_t cur_offset = stream->tellf(stream);
1177 
1178  rec->size[recInfo_storeData] = recordAttr->recordSize;
1179  rec->info[recInfo_storeData] = (char *)
1180  xmalloc(recordAttr->recordSize);
1181  stream->seekf(stream, start_offset);
1182  stream->readf(stream, rec->info[recInfo_storeData],
1183  recordAttr->recordSize);
1184  stream->seekf(stream, cur_offset);
1185  }
1186  else
1187  {
1188  rec->info[recInfo_storeData] = NULL;
1189  rec->size[recInfo_storeData] = 0;
1190  }
1191  /* update database name */
1192  xfree(rec->info[recInfo_databaseName]);
1193  rec->info[recInfo_databaseName] =
1195 
1196  /* update offset */
1197  recordAttr->recordOffset = start_offset;
1198 
1199  /* commit this record */
1200  rec_put(zh->reg->records, &rec);
1201  logRecord(zh);
1202  return ZEBRA_OK;
1203 }
1204 
1218  struct ZebraRecStream *stream,
1220  const char *recordType,
1221  zint *sysno,
1222  const char *match_criteria,
1223  const char *fname,
1224  RecType recType,
1225  void *recTypeClientData)
1226 {
1227  ZEBRA_RES res = ZEBRA_OK;
1228  while (1)
1229  {
1230  int more = 0;
1231  res = zebra_extract_record_stream(zh, stream,
1232  action,
1233  recordType,
1234  sysno,
1235  match_criteria,
1236  fname,
1237  recType, recTypeClientData, &more);
1238  if (!more)
1239  {
1240  res = ZEBRA_OK;
1241  break;
1242  }
1243  if (res != ZEBRA_OK)
1244  break;
1245  if (sysno)
1246  break;
1247  }
1248  return res;
1249 }
1250 
1252 {
1253  ZebraHandle zh = (ZebraHandle) handle;
1254  struct recExtractCtrl extractCtrl;
1255 
1256  if (zebraExplain_curDatabase(zh->reg->zei,
1257  rec->info[recInfo_databaseName]))
1258  {
1259  abort();
1260  if (zebraExplain_newDatabase(zh->reg->zei,
1261  rec->info[recInfo_databaseName], 0))
1262  abort();
1263  }
1264 
1267 
1268  extractCtrl.init = extract_init;
1269  extractCtrl.tokenAdd = extract_token_add;
1270  extractCtrl.schemaAdd = extract_schema_add;
1271  extractCtrl.dh = zh->reg->dh;
1272 
1273  init_extractCtrl(zh, &extractCtrl);
1274 
1275  extractCtrl.flagShowRecords = 0;
1276  extractCtrl.match_criteria[0] = '\0';
1277  extractCtrl.staticrank = 0;
1278  extractCtrl.action = action_update;
1279 
1280  extractCtrl.handle = handle;
1281  extractCtrl.first_record = 1;
1282 
1283  extract_set_store_data_prepare(&extractCtrl);
1284 
1285  if (n)
1286  grs_extract_tree(&extractCtrl, n);
1287 
1288  if (rec->size[recInfo_delKeys])
1289  {
1291 
1293 
1295  rec->size[recInfo_delKeys],
1296  0);
1297 #if FLUSH2
1299  zh->reg->keys, 0, delkeys, 0);
1300 #else
1301  extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0);
1302  extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
1303 #endif
1304  zebra_rec_keys_close(delkeys);
1305 
1307  rec->size[recInfo_sortKeys],
1308  0);
1309 
1310  extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
1311  zebra_rec_keys_close(sortkeys);
1312  }
1313  else
1314  {
1315 #if FLUSH2
1316  extract_flush_record_keys2(zh, rec->sysno, zh->reg->keys, 0, 0, 0);
1317 #else
1318  extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
1319 #endif
1320  }
1321  extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
1322 
1323  xfree(rec->info[recInfo_delKeys]);
1325  &rec->info[recInfo_delKeys],
1326  &rec->size[recInfo_delKeys]);
1327 
1328  xfree(rec->info[recInfo_sortKeys]);
1330  &rec->info[recInfo_sortKeys],
1331  &rec->size[recInfo_sortKeys]);
1332  return ZEBRA_OK;
1333 }
1334 
1336  const char *str, size_t slen, NMEM nmem, int level)
1337 {
1338  char keystr[200]; /* room for zints to print */
1339  int ord = CAST_ZINT_TO_INT(key->mem[0]);
1340  const char *index_type;
1341  int i;
1342  const char *string_index;
1343 
1344  zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1345  0/* db */, &string_index);
1346  assert(index_type);
1347  *keystr = '\0';
1348  for (i = 0; i < key->len; i++)
1349  {
1350  sprintf(keystr + strlen(keystr), ZINT_FORMAT " ", key->mem[i]);
1351  }
1352 
1353  if (*str < CHR_BASE_CHAR)
1354  {
1355  int i;
1356  char dst_buf[200]; /* room for special chars */
1357 
1358  strcpy(dst_buf , "?");
1359 
1360  if (!strcmp(str, ""))
1361  strcpy(dst_buf, "alwaysmatches");
1362  if (!strcmp(str, FIRST_IN_FIELD_STR))
1363  strcpy(dst_buf, "firstinfield");
1364  else if (!strcmp(str, CHR_UNKNOWN))
1365  strcpy(dst_buf, "unknown");
1366  else if (!strcmp(str, CHR_SPACE))
1367  strcpy(dst_buf, "space");
1368 
1369  for (i = 0; i<slen; i++)
1370  {
1371  sprintf(dst_buf + strlen(dst_buf), " %d", str[i] & 0xff);
1372  }
1373  yaz_log(level, "%s%s %s %s", keystr, index_type,
1374  string_index, dst_buf);
1375  }
1376  else
1377  {
1378  char *dst_term = 0;
1379  zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str);
1380  if (dst_term)
1381  yaz_log(level, "%s%s %s \"%s\"", keystr, index_type,
1382  string_index, dst_term);
1383  else
1384  {
1385  WRBUF w = wrbuf_alloc();
1386  wrbuf_write_escaped(w, str, strlen(str));
1387  yaz_log(level, "%s%s %s %s", keystr, index_type,
1388  string_index, wrbuf_cstr(w));
1389  wrbuf_destroy(w);
1390  }
1391  }
1392 }
1393 
1394 void extract_rec_keys_log(ZebraHandle zh, int is_insert,
1395  zebra_rec_keys_t reckeys,
1396  int level)
1397 {
1398  if (zebra_rec_keys_rewind(reckeys))
1399  {
1400  size_t slen;
1401  const char *str;
1402  struct it_key key;
1403  NMEM nmem = nmem_create();
1404 
1405  while(zebra_rec_keys_read(reckeys, &str, &slen, &key))
1406  {
1407  zebra_it_key_str_dump(zh, &key, str, slen, nmem, level);
1408  nmem_reset(nmem);
1409  }
1410  nmem_destroy(nmem);
1411  }
1412 }
1413 
1414 void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
1415  zebra_rec_keys_t reckeys)
1416 {
1417  ZebraExplainInfo zei = zh->reg->zei;
1418  struct ord_stat {
1419  int no;
1420  int ord;
1421  struct ord_stat *next;
1422  };
1423 
1424  if (zebra_rec_keys_rewind(reckeys))
1425  {
1426  struct ord_stat *ord_list = 0;
1427  struct ord_stat *p;
1428  size_t slen;
1429  const char *str;
1430  struct it_key key_in;
1431  while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1432  {
1433  int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1434 
1435  for (p = ord_list; p ; p = p->next)
1436  if (p->ord == ord)
1437  {
1438  p->no++;
1439  break;
1440  }
1441  if (!p)
1442  {
1443  p = xmalloc(sizeof(*p));
1444  p->no = 1;
1445  p->ord = ord;
1446  p->next = ord_list;
1447  ord_list = p;
1448  }
1449  }
1450 
1451  p = ord_list;
1452  while (p)
1453  {
1454  struct ord_stat *p1 = p;
1455 
1456  if (is_insert)
1457  zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
1458  else
1459  zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
1460  p = p->next;
1461  xfree(p1);
1462  }
1463  }
1464 }
1465 
1466 #if FLUSH2
1468  ZebraHandle zh, zint sysno,
1469  zebra_rec_keys_t ins_keys, zint ins_rank,
1470  zebra_rec_keys_t del_keys, zint del_rank)
1471 {
1472  ZebraExplainInfo zei = zh->reg->zei;
1473  int normal = 0;
1474  int optimized = 0;
1475 
1476  if (!zh->reg->key_block)
1477  {
1478  int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
1479  const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
1480  int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
1481  zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1482  }
1483 
1484  if (ins_keys)
1485  {
1486  extract_rec_keys_adjust(zh, 1, ins_keys);
1487  if (!del_keys)
1489  zebra_rec_keys_rewind(ins_keys);
1490  }
1491  if (del_keys)
1492  {
1493  extract_rec_keys_adjust(zh, 0, del_keys);
1494  if (!ins_keys)
1496  zebra_rec_keys_rewind(del_keys);
1497  }
1498 
1499  while (1)
1500  {
1501  size_t del_slen;
1502  const char *del_str;
1503  struct it_key del_key_in;
1504  int del = 0;
1505 
1506  size_t ins_slen;
1507  const char *ins_str;
1508  struct it_key ins_key_in;
1509  int ins = 0;
1510 
1511  if (del_keys)
1512  del = zebra_rec_keys_read(del_keys, &del_str, &del_slen,
1513  &del_key_in);
1514  if (ins_keys)
1515  ins = zebra_rec_keys_read(ins_keys, &ins_str, &ins_slen,
1516  &ins_key_in);
1517 
1518  if (del && ins && ins_rank == del_rank
1519  && !key_compare(&del_key_in, &ins_key_in)
1520  && ins_slen == del_slen && !memcmp(del_str, ins_str, del_slen))
1521  {
1522  optimized++;
1523  continue;
1524  }
1525  if (!del && !ins)
1526  break;
1527 
1528  normal++;
1529  if (del)
1530  key_block_write(zh->reg->key_block, sysno,
1531  &del_key_in, 0, del_str, del_slen,
1532  del_rank, zh->m_staticrank);
1533  if (ins)
1534  key_block_write(zh->reg->key_block, sysno,
1535  &ins_key_in, 1, ins_str, ins_slen,
1536  ins_rank, zh->m_staticrank);
1537  }
1538  yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized);
1539 }
1540 #else
1541 static void extract_flush_record_keys(
1542  ZebraHandle zh, zint sysno, int cmd,
1543  zebra_rec_keys_t reckeys,
1544  zint staticrank)
1545 {
1546  ZebraExplainInfo zei = zh->reg->zei;
1547 
1548  extract_rec_keys_adjust(zh, cmd, reckeys);
1549 
1550  if (log_level_details)
1551  {
1552  yaz_log(log_level_details, "Keys for record " ZINT_FORMAT " %s",
1553  sysno, cmd ? "insert" : "delete");
1554  extract_rec_keys_log(zh, cmd, reckeys, log_level_details);
1555  }
1556 
1557  if (!zh->reg->key_block)
1558  {
1559  int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
1560  const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
1561  int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
1562  zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1563  }
1564  zebraExplain_recordCountIncrement(zei, cmd ? 1 : -1);
1565 
1566 #if 0
1567  yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " cmd=%d", sysno, cmd);
1568  print_rec_keys(zh, reckeys);
1569 #endif
1570  if (zebra_rec_keys_rewind(reckeys))
1571  {
1572  size_t slen;
1573  const char *str;
1574  struct it_key key_in;
1575  while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1576  {
1577  key_block_write(zh->reg->key_block, sysno,
1578  &key_in, cmd, str, slen,
1579  staticrank, zh->m_staticrank);
1580  }
1581  }
1582 }
1583 #endif
1584 
1586  zebra_rec_keys_t reckeys,
1588 {
1589  NMEM nmem = nmem_create();
1590  if (zebra_rec_keys_rewind(reckeys))
1591  {
1592  const char *str;
1593  size_t slen;
1594  struct it_key key;
1595  while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1596  {
1597  char *dst_term = 0;
1598  int ord;
1599  zint seqno;
1600  const char *index_type;
1601 
1602  assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1603  seqno = key.mem[key.len-1];
1604  ord = CAST_ZINT_TO_INT(key.mem[0]);
1605 
1606  zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1607  0/* db */, 0 /* string_index */);
1608  assert(index_type);
1609  zebra_term_untrans_iconv(zh, nmem, index_type,
1610  &dst_term, str);
1611  zebra_snippets_append(snippets, seqno, 0, ord, dst_term);
1612  nmem_reset(nmem);
1613  }
1614  }
1615  nmem_destroy(nmem);
1616  return ZEBRA_OK;
1617 }
1618 
1620 {
1621  yaz_log(YLOG_LOG, "print_rec_keys");
1622  if (zebra_rec_keys_rewind(reckeys))
1623  {
1624  const char *str;
1625  size_t slen;
1626  struct it_key key;
1627  while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1628  {
1629  char dst_buf[IT_MAX_WORD];
1630  zint seqno;
1631  const char *index_type;
1632  int ord = CAST_ZINT_TO_INT(key.mem[0]);
1633  const char *db = 0;
1634  assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1635 
1636  zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, 0);
1637 
1638  seqno = key.mem[key.len-1];
1639 
1640  zebra_term_untrans(zh, index_type, dst_buf, str);
1641 
1642  yaz_log(YLOG_LOG, "ord=%d seqno=" ZINT_FORMAT
1643  " term=%s", ord, seqno, dst_buf);
1644  }
1645  }
1646 }
1647 
1649  const char *str, int length)
1650 {
1651  struct it_key key;
1652  ZebraHandle zh = p->extractCtrl->handle;
1653  ZebraExplainInfo zei = zh->reg->zei;
1654  int ch, i;
1655 
1656  ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1657  if (ch < 0)
1658  ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1659 
1660  i = 0;
1661  key.mem[i++] = ch;
1662  key.mem[i++] = p->record_id;
1663  key.mem[i++] = p->section_id;
1664 
1665  if (zh->m_segment_indexing)
1666  key.mem[i++] = p->segment;
1667  key.mem[i++] = p->seqno;
1668  key.len = i;
1669 
1670  zebra_rec_keys_write(zh->reg->keys, str, length, &key);
1671 }
1672 
1673 static void extract_add_sort_string(RecWord *p, const char *str, int length)
1674 {
1675  struct it_key key;
1676  ZebraHandle zh = p->extractCtrl->handle;
1677  ZebraExplainInfo zei = zh->reg->zei;
1678  int ch;
1680 
1681  ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1682  if (ch < 0)
1683  ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1684  key.len = 3;
1685  key.mem[0] = ch;
1686  key.mem[1] = p->record_id;
1687  key.mem[2] = p->section_id;
1688 
1689  zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
1690 }
1691 
1693  const char *str, int length)
1694 {
1695  char valz[40];
1696  struct recExtractCtrl *ctrl = p->extractCtrl;
1697 
1698  if (length > sizeof(valz)-1)
1699  length = sizeof(valz)-1;
1700 
1701  memcpy(valz, str, length);
1702  valz[length] = '\0';
1703  ctrl->staticrank = atozint(valz);
1704 }
1705 
1707  const char *string, int length)
1708 {
1709  assert(length > 0);
1710 
1711  if (!p->index_name)
1712  return;
1713  if (log_level_details)
1714  {
1715 
1716  WRBUF w = wrbuf_alloc();
1717 
1718  wrbuf_write_escaped(w, string, length);
1719  yaz_log(log_level_details, "extract_add_string: %s", wrbuf_cstr(w));
1720  wrbuf_destroy(w);
1721  }
1722  if (zebra_maps_is_index(zm))
1723  {
1725  string, length);
1727  {
1728  RecWord word;
1729  memcpy(&word, p, sizeof(word));
1730 
1731  word.seqno = 1;
1733  &word, zinfo_index_category_alwaysmatches, "", 0);
1734  }
1735  }
1736  else if (zebra_maps_is_sort(zm))
1737  {
1738  extract_add_sort_string(p, string, length);
1739  }
1740  else if (zebra_maps_is_staticrank(zm))
1741  {
1742  extract_add_staticrank_string(p, string, length);
1743  }
1744 }
1745 
1747 {
1748  const char *b = p->term_buf;
1749  int remain = p->term_len;
1750  int first = 1;
1751  const char **map = 0;
1752 
1753  if (remain > 0)
1754  map = zebra_maps_input(zm, &b, remain, 0);
1755 
1756  while (map)
1757  {
1758  char buf[IT_MAX_WORD+1];
1759  int i, remain;
1760 
1761  /* Skip spaces */
1762  while (map && *map && **map == *CHR_SPACE)
1763  {
1764  remain = p->term_len - (b - p->term_buf);
1765  if (remain > 0)
1766  map = zebra_maps_input(zm, &b, remain, 0);
1767  else
1768  map = 0;
1769  }
1770  if (!map)
1771  break;
1772  i = 0;
1773  while (map && *map && **map != *CHR_SPACE)
1774  {
1775  const char *cp = *map;
1776 
1777  while (i < IT_MAX_WORD && *cp)
1778  buf[i++] = *(cp++);
1779  remain = p->term_len - (b - p->term_buf);
1780  if (remain > 0)
1781  map = zebra_maps_input(zm, &b, remain, 0);
1782  else
1783  map = 0;
1784  }
1785  if (!i)
1786  return;
1787 
1788  if (first)
1789  {
1790  first = 0;
1792  {
1793  /* first in field marker */
1795  p->seqno++;
1796  }
1797  }
1798  extract_add_string(p, zm, buf, i);
1799  p->seqno++;
1800  }
1801 }
1802 
1804 {
1805  char buf[IT_MAX_WORD+1];
1806  int i = parse_complete_field(p, zm, buf);
1807  if (!i)
1808  return;
1809  extract_add_string(p, zm, buf, i);
1810  p->seqno++;
1811 }
1812 
1814 {
1815  const char *res_buf = 0;
1816  size_t res_len = 0;
1817 
1819  while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
1820  {
1821  if (res_len > IT_MAX_WORD)
1822  {
1823  yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len);
1824  res_len = IT_MAX_WORD;
1825  }
1826  extract_add_string(p, zm, res_buf, res_len);
1827  p->seqno++;
1828  }
1829 }
1830 
1831 
1848 {
1849  ZebraHandle zh = p->extractCtrl->handle;
1851 
1852  if (log_level_details)
1853  {
1854  yaz_log(log_level_details, "extract_token_add "
1855  "type=%s index=%s seqno=" ZINT_FORMAT " s=%.*s",
1856  p->index_type, p->index_name,
1857  p->seqno, p->term_len, p->term_buf);
1858  }
1859  if (zebra_maps_is_icu(zm))
1860  {
1861  extract_add_icu(p, zm);
1862  }
1863  else
1864  {
1865  if (zebra_maps_is_complete(zm))
1867  else
1869  }
1870 }
1871 
1873  void *buf, size_t sz)
1874 {
1875  ZebraHandle zh = (ZebraHandle) p->handle;
1876 
1877  xfree(zh->store_data_buf);
1878  zh->store_data_buf = 0;
1879  zh->store_data_size = 0;
1880  if (buf && sz)
1881  {
1882  zh->store_data_buf = xmalloc(sz);
1883  zh->store_data_size = sz;
1884  memcpy(zh->store_data_buf, buf, sz);
1885  }
1886 }
1887 
1889 {
1890  ZebraHandle zh = (ZebraHandle) p->handle;
1891  xfree(zh->store_data_buf);
1892  zh->store_data_buf = 0;
1893  zh->store_data_size = 0;
1895 }
1896 
1897 static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
1898 {
1899  ZebraHandle zh = (ZebraHandle) p->handle;
1900  zebraExplain_addSchema(zh->reg->zei, oid);
1901 }
1902 
1904  int cmd, zebra_rec_keys_t reckeys)
1905 {
1906 #if 0
1907  yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
1908  cmd, sysno);
1909  extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
1910 #endif
1911 
1912  if (zebra_rec_keys_rewind(reckeys))
1913  {
1914  zebra_sort_index_t si = zh->reg->sort_index;
1915  size_t slen;
1916  const char *str;
1917  struct it_key key_in;
1918 
1919  NMEM nmem = nmem_create();
1920  struct sort_add_ent {
1921  int ord;
1922  int cmd;
1923  struct sort_add_ent *next;
1924  WRBUF wrbuf;
1925  zint sysno;
1926  zint section_id;
1927  };
1928  struct sort_add_ent *sort_ent_list = 0;
1929 
1930  while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1931  {
1932  int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1933  zint filter_sysno = key_in.mem[1];
1934  zint section_id = key_in.mem[2];
1935 
1936  struct sort_add_ent **e = &sort_ent_list;
1937  for (; *e; e = &(*e)->next)
1938  if ((*e)->ord == ord && section_id == (*e)->section_id)
1939  break;
1940  if (!*e)
1941  {
1942  *e = nmem_malloc(nmem, sizeof(**e));
1943  (*e)->next = 0;
1944  (*e)->wrbuf = wrbuf_alloc();
1945  (*e)->ord = ord;
1946  (*e)->cmd = cmd;
1947  (*e)->sysno = filter_sysno ? filter_sysno : sysno;
1948  (*e)->section_id = section_id;
1949  }
1950 
1951  wrbuf_write((*e)->wrbuf, str, slen);
1952  wrbuf_putc((*e)->wrbuf, '\0');
1953  }
1954  if (sort_ent_list)
1955  {
1956  zint last_sysno = 0;
1957  struct sort_add_ent *e = sort_ent_list;
1958  for (; e; e = e->next)
1959  {
1960  if (last_sysno != e->sysno)
1961  {
1962  zebra_sort_sysno(si, e->sysno);
1963  last_sysno = e->sysno;
1964  }
1965  zebra_sort_type(si, e->ord);
1966  if (e->cmd == 1)
1967  zebra_sort_add(si, e->section_id, e->wrbuf);
1968  else
1969  zebra_sort_delete(si, e->section_id);
1970  wrbuf_destroy(e->wrbuf);
1971  }
1972  }
1973  nmem_destroy(nmem);
1974  }
1975 }
1976 
1977 /*
1978  * Local variables:
1979  * c-basic-offset: 4
1980  * c-file-style: "Stroustrup"
1981  * indent-tabs-mode: nil
1982  * End:
1983  * vim: shiftwidth=4 tabstop=8 expandtab
1984  */
1985 
static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id, zint sysno)
add the always-matches index entry and map to real record ID
Definition: extract.c:591
int key_compare(const void *p1, const void *p2)
Definition: it_key.c:73
Record rec_get(Records p, zint sysno)
gets record - with given system number
Definition: records.c:927
#define RECCTRL_EXTRACT_SKIP
Definition: recctrl.h:167
int len
Definition: it_key.h:31
#define RECCTRL_EXTRACT_OK
Definition: recctrl.h:163
static int log_level_details
Definition: extract.c:46
void(* tokenAdd)(RecWord *w)
Definition: recctrl.h:105
void zebra_rec_keys_write(zebra_rec_keys_t keys, const char *str, size_t slen, const struct it_key *key)
Definition: reckeys.c:188
char * rec_strdup(const char *s, size_t *len)
Definition: records.c:1079
size_t store_data_size
Definition: index.h:229
static WRBUF wrbuf_hex_str(const char *cstr)
Definition: extract.c:77
static char * get_match_from_spec(ZebraHandle zh, zebra_rec_keys_t reckeys, const char *fname, const char *spec)
Definition: extract.c:416
void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
Definition: extract.c:1619
static void extract_add_complete_field(RecWord *p, zebra_map_t zm)
Definition: extract.c:1803
zebra_snippets * snippets
Definition: extract.c:144
off_t recordOffset
Definition: zinfo.h:107
enum zebra_recctrl_action_t action
Definition: recctrl.h:114
const char * res_get_def(Res r, const char *name, const char *def)
Definition: res.c:313
const char * index_name
Definition: recctrl.h:54
void zebra_rec_keys_get_buf(zebra_rec_keys_t p, char **buf, size_t *sz)
Definition: reckeys.c:133
int dict_delete_ord(Dict d, int ord, const char *p)
Definition: orddict.c:59
#define ZEBRA_OK
Definition: util.h:82
int m_store_keys
Definition: index.h:223
ZebraHandle zh
Definition: extract.c:143
const char * res_get(Res r, const char *name)
Definition: res.c:294
int zebra_maps_is_first_in_field(zebra_map_t zm)
Definition: zebramap.c:491
struct recExtractCtrl * extractCtrl
Definition: recctrl.h:67
int zebra_term_untrans(ZebraHandle zh, const char *index_type, char *dst, const char *src)
Definition: untrans.c:31
int recordSize
Definition: zinfo.h:106
char * path_reg
Definition: index.h:182
int recordOffset
Definition: extract.c:577
static void snippet_token_add(RecWord *p)
Definition: extract.c:304
int term_len
Definition: recctrl.h:58
void extract_rec_keys_log(ZebraHandle zh, int is_insert, zebra_rec_keys_t reckeys, int level)
Definition: extract.c:1394
void(* setStoreData)(struct recExtractCtrl *p, void *buf, size_t size)
Definition: recctrl.h:106
char * dict_lookup_ord(Dict d, int ord, const char *str)
Definition: orddict.c:42
int zebra_maps_is_staticrank(zebra_map_t zm)
Definition: zebramap.c:470
void zebra_create_stream_mem(struct ZebraRecStream *stream, const char *buf, size_t sz)
Definition: stream.c:123
int m_staticrank
Definition: index.h:205
ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, const char *buf, size_t buf_size, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname)
Definition: extract.c:736
#define FIRST_IN_FIELD_LEN
Definition: index.h:417
zint section_id
Definition: recctrl.h:66
int zebra_maps_is_sort(zebra_map_t zm)
Definition: zebramap.c:477
int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
Definition: recgrs.c:884
zint zebraExplain_runNumberIncrement(ZebraExplainInfo zei, int adjust_num)
Definition: zinfo.c:1586
struct zebra_register * reg
Definition: index.h:174
#define ZEBRA_FAIL
Definition: util.h:81
char ** basenames
Definition: index.h:178
static int log_level_extract
Definition: extract.c:45
data1_handle dh
Definition: index.h:142
static void extract_add_staticrank_string(RecWord *p, const char *str, int length)
Definition: extract.c:1692
void * handle
Definition: recctrl.h:113
ZebraHandle zh
Definition: zebrash.c:64
int zebra_maps_is_alwaysmatches(zebra_map_t zm)
Definition: zebramap.c:484
const char * m_record_type
Definition: index.h:221
zint staticrank
Definition: recctrl.h:110
zint records_deleted
Definition: index.h:210
#define FILE_MATCH_BLANK
Definition: extract.c:414
static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
Definition: extract.c:119
static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat, const char *str, int length)
Definition: extract.c:1648
#define RECCTRL_EXTRACT_ERROR_GENERIC
Definition: recctrl.h:165
zint records_processed
Definition: index.h:211
Records records
Definition: index.h:138
zint records_updated
Definition: index.h:209
static ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, RecType recType, void *recTypeClientData, int *more)
Definition: extract.c:789
int zebra_rec_keys_rewind(zebra_rec_keys_t keys)
Definition: reckeys.c:240
int zebraExplain_add_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *index_name)
Definition: zinfo.c:1546
void rec_free(Record *recpp)
frees record (from memory)
Definition: records.c:1043
const char * CHR_SPACE
Definition: charmap.c:49
const char * CHR_CUT
Definition: charmap.c:50
static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
Definition: extract.c:1897
#define RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER
Definition: recctrl.h:166
void extract_snippet(ZebraHandle zh, zebra_snippets *sn, struct ZebraRecStream *stream, RecType rt, void *recTypeClientData)
Definition: extract.c:334
zint atozint(const char *src)
Definition: zint.c:55
#define IT_MAX_WORD
Definition: it_key.h:27
const char * m_group
Definition: index.h:219
zint zebra_rec_keys_get_custom_record_id(zebra_rec_keys_t keys)
Definition: reckeys.c:286
#define RECCTRL_EXTRACT_EOF
Definition: recctrl.h:164
int zebraExplain_lookup_attr_str(ZebraExplainInfo zei, zinfo_index_category_t cat, const char *index_type, const char *str)
lookup ordinal from string index + index type
Definition: zinfo.c:1354
ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets)
Definition: extract.c:1585
zint staticrank
Definition: zinfo.h:109
Dict matchDict
Definition: index.h:133
void zebra_rec_keys_reset(zebra_rec_keys_t keys)
Definition: reckeys.c:230
void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF w)
adds multi-map content to sort file
Definition: sortidx.c:392
zebra_rec_keys_t sortKeys
Definition: index.h:151
zint record_id
Definition: recctrl.h:64
void zebraExplain_recordBytesIncrement(ZebraExplainInfo zei, int adjust_num)
Definition: zinfo.c:1564
const char ** zebra_maps_input(zebra_map_t zm, const char **from, int len, int first)
Definition: zebramap.c:398
int zebra_maps_is_icu(zebra_map_t zm)
Definition: zebramap.c:740
size_t size[REC_NO_INFO]
Definition: recindex.h:35
ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n)
Definition: extract.c:1251
zint seqno
Definition: recctrl.h:60
data1_handle dh
Definition: recctrl.h:112
ZebraExplainInfo zei
Definition: index.h:139
int zebra_map_tokenize_start(zebra_map_t zm, const char *buf, size_t len)
Definition: zebramap.c:701
void zebra_rec_keys_close(zebra_rec_keys_t p)
Definition: reckeys.c:143
static void extract_token_add(RecWord *p)
top-level indexing handler for recctrl system
Definition: extract.c:1847
int zebraExplain_newDatabase(ZebraExplainInfo zei, const char *database, int explain_database)
Definition: zinfo.c:883
zebra_key_block_t key_block
Definition: index.h:153
int(* extract)(void *clientData, struct recExtractCtrl *ctrl)
Definition: recctrl.h:157
static void zebra_init_log_level(void)
Definition: extract.c:66
int m_store_data
Definition: index.h:222
static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t skp)
Definition: extract.c:1903
#define CAST_ZINT_TO_INT(x)
Definition: util.h:96
void(* schemaAdd)(struct recExtractCtrl *p, Odr_oid *oid)
Definition: recctrl.h:111
void key_block_write(zebra_key_block_t p, zint sysno, struct it_key *key_in, int cmd, const char *str_buf, size_t str_len, zint staticrank, int static_rank_enable)
Definition: key_block.c:258
int zebra_maps_is_complete(zebra_map_t zm)
Definition: zebramap.c:449
static void extract_flush_record_keys2(ZebraHandle zh, zint sysno, zebra_rec_keys_t ins_keys, zint ins_rank, zebra_rec_keys_t del_keys, zint del_rank)
Definition: extract.c:1467
int zebra_map_tokenize_next(zebra_map_t zm, const char **result_buf, size_t *result_len, const char **display_buf, size_t *display_len)
Definition: zebramap.c:657
static void extract_init(struct recExtractCtrl *p, RecWord *w)
Definition: extract.c:131
int dict_insert_ord(Dict d, int ord, const char *p, int userlen, void *userinfo)
Definition: orddict.c:50
static void searchRecordKey(ZebraHandle zh, zebra_rec_keys_t reckeys, const char *index_name, const char **ws, int ws_length)
Definition: extract.c:365
const char * CHR_UNKNOWN
Definition: charmap.c:48
const char * index_type
Definition: recctrl.h:52
zebra_rec_keys_t zebra_rec_keys_open(void)
Definition: reckeys.c:88
void zebraExplain_addSchema(ZebraExplainInfo zei, Odr_oid *oid)
Definition: zinfo.c:1557
static void extract_add_incomplete_field(RecWord *p, zebra_map_t zm)
Definition: extract.c:1746
zebra_maps_t zebra_maps
Definition: index.h:143
ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, RecType recType, void *recTypeClientData)
extracts records from stream
Definition: extract.c:1217
char * info[REC_NO_INFO]
Definition: recindex.h:34
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition: recctrl.h:103
int m_flag_rw
Definition: index.h:225
static void snippet_add_complete_field(RecWord *p, int ord, zebra_map_t zm)
Definition: extract.c:202
int zebra_sort_type(zebra_sort_index_t si, int type)
sets type for sort usage
Definition: sortidx.c:234
off_t(* endf)(struct ZebraRecStream *s, off_t *offset)
set and get of record position
Definition: recctrl.h:81
zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id)
Definition: zebramap.c:363
int zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, const char *index_type, char **dst, const char *src)
Definition: untrans.c:64
static int parse_complete_field(RecWord *p, zebra_map_t zm, char *buf)
Definition: extract.c:147
int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord, int term_delta, int doc_delta)
Definition: zinfo.c:1436
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition: recctrl.h:75
static void extract_set_store_data_cb(struct recExtractCtrl *p, void *buf, size_t sz)
Definition: extract.c:1872
int zebraExplain_curDatabase(ZebraExplainInfo zei, const char *database)
Definition: zinfo.c:791
static void extract_add_string(RecWord *p, zebra_map_t zm, const char *string, int length)
Definition: extract.c:1706
zebra_sort_index_t sort_index
Definition: index.h:134
int version
Definition: recctrl.h:152
int m_explain_database
Definition: index.h:224
char match_criteria[256]
Definition: recctrl.h:109
static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm)
Definition: extract.c:218
void zebra_snippets_append(zebra_snippets *l, zint seqno, int ws, int ord, const char *term)
Definition: snippet.c:51
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
sets sort system number for read / add / delete
Definition: sortidx.c:339
zint mem[IT_KEY_LEVEL_MAX]
Definition: it_key.h:32
void zebra_sort_delete(zebra_sort_index_t si, zint section_id)
delete sort entry
Definition: sortidx.c:355
struct recordGroup * rGroup
Definition: extract.c:578
off_t(* seekf)(struct ZebraRecStream *s, off_t offset)
seek function
Definition: recctrl.h:77
zebra_rec_keys_t keys
Definition: index.h:150
void zebra_rec_keys_set_buf(zebra_rec_keys_t p, char *buf, size_t sz, int copy_buf)
Definition: reckeys.c:109
#define FIRST_IN_FIELD_STR
Definition: index.h:415
static void snippet_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
Definition: extract.c:328
RecTypes recTypes
Definition: index.h:145
Record rec_new(Records p)
creates new record (to be written to file storage)
Definition: records.c:990
zinfo_index_category_t
Definition: zinfo.h:37
int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, const char **index_type, const char **db, const char **string_index)
Definition: zinfo.c:1479
static void logRecord(ZebraHandle zh)
Definition: extract.c:106
RecType recType_byName(RecTypes rts, Res res, const char *name, void **clientDataP)
Definition: recctrl.c:264
int first_record
Definition: recctrl.h:107
zint sysno
Definition: recindex.h:32
zint runNumber
Definition: zinfo.h:108
int zebraExplain_get_database_ord(ZebraExplainInfo zei)
Definition: zinfo.c:1621
static void extract_add_icu(RecWord *p, zebra_map_t zm)
Definition: extract.c:1813
void extract_rec_keys_adjust(ZebraHandle zh, int is_insert, zebra_rec_keys_t reckeys)
Definition: extract.c:1414
long zint
Zebra integer.
Definition: util.h:66
const char * term_buf
Definition: recctrl.h:56
void(* destroy)(struct ZebraRecStream *s)
close and destroy stream
Definition: recctrl.h:83
static void extract_add_sort_string(RecWord *p, const char *str, int length)
Definition: extract.c:1673
int zebra_rec_keys_read(zebra_rec_keys_t keys, const char **str, size_t *slen, struct it_key *key)
Definition: reckeys.c:259
#define CHR_BASE_CHAR
Definition: charmap.h:33
zebra_key_block_t key_block_create(int mem, const char *key_tmp_dir, int use_threads)
Definition: key_block.c:190
int fd
Definition: tstlockscope.c:38
const char * m_record_id
Definition: index.h:220
off_t(* tellf)(struct ZebraRecStream *s)
tell function
Definition: recctrl.h:79
int m_file_verbose_limit
Definition: index.h:226
static void extract_set_store_data_prepare(struct recExtractCtrl *p)
Definition: extract.c:1888
record extract for indexing
Definition: recctrl.h:101
int zebra_maps_is_index(zebra_map_t zm)
Definition: zebramap.c:463
#define O_BINARY
Definition: agrep.c:46
#define IT_KEY_LEVEL_MAX
Definition: it_key.h:29
int flagShowRecords
Definition: recctrl.h:108
RecordAttr * rec_init_attr(ZebraExplainInfo zei, Record rec)
Definition: zinfo.c:1595
Definition: it_key.h:30
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80
ZEBRA_RES rec_put(Records p, Record *recpp)
puts record (writes into file storage)
Definition: records.c:1022
Definition: rset.h:35
zint segment
Definition: recctrl.h:62
zebra_recctrl_action_t
Definition: recctrl.h:87
const char * fname
Definition: extract.c:576
ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, enum zebra_recctrl_action_t action)
Definition: extract.c:620
void zebraExplain_recordCountIncrement(ZebraExplainInfo zei, int adjust_num)
Definition: zinfo.c:1575
struct ZebraRecStream * stream
Definition: recctrl.h:102
void zebra_snippets_appendn(zebra_snippets *l, zint seqno, int ws, int ord, const char *term, size_t term_len)
Definition: snippet.c:57
static void check_log_limit(ZebraHandle zh)
Definition: extract.c:97
zint records_inserted
Definition: index.h:208
zint records_skipped
Definition: index.h:212
int zebra_rec_keys_empty(zebra_rec_keys_t keys)
Definition: reckeys.c:252
void * store_data_buf
Definition: index.h:228
static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm)
Definition: extract.c:283
static int log_level_initialized
Definition: extract.c:47
void zebra_create_stream_fd(struct ZebraRecStream *stream, int fd, off_t start_offset)
Definition: stream.c:140
ZEBRA_RES rec_del(Records p, Record *recpp)
marks record for deletion (on file storage)
Definition: records.c:1000
record reader stream
Definition: recctrl.h:71
void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, const char *str, size_t slen, NMEM nmem, int level)
Definition: extract.c:1335
struct zebra_session * ZebraHandle
a Zebra Handle - (session)
Definition: api.h:71
#define ZINT_FORMAT
Definition: util.h:72