IDZEBRA  2.2.7
isamb.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdlib.h>
24 #include <string.h>
25 #include <assert.h>
26 #include <yaz/log.h>
27 #include <yaz/snprintf.h>
28 #include <yaz/xmalloc.h>
29 #include <idzebra/isamb.h>
30 
31 #ifndef ISAMB_DEBUG
32 #define ISAMB_DEBUG 0
33 #endif
34 
35 
36 #define ISAMB_MAJOR_VERSION 3
37 #define ISAMB_MINOR_VERSION_NO_ROOT 0
38 #define ISAMB_MINOR_VERSION_WITH_ROOT 1
39 
40 struct ISAMB_head {
46  int block_max;
48 };
49 
50 /* if 1, upper nodes items are encoded; 0 if not encoded */
51 #define INT_ENCODE 1
52 
53 /* maximum size of encoded buffer */
54 #define DST_ITEM_MAX 5000
55 
56 /* max page size for _any_ isamb use */
57 #define ISAMB_MAX_PAGE 32768
58 
59 #define ISAMB_MAX_LEVEL 10
60 /* approx 2*max page + max size of item */
61 #define DST_BUF_SIZE (2*ISAMB_MAX_PAGE+DST_ITEM_MAX+100)
62 
63 /* should be maximum block size of multiple thereof */
64 #define ISAMB_CACHE_ENTRY_SIZE ISAMB_MAX_PAGE
65 
66 /* CAT_MAX: _must_ be power of 2 */
67 #define CAT_MAX 4
68 #define CAT_MASK (CAT_MAX-1)
69 /* CAT_NO: <= CAT_MAX */
70 #define CAT_NO 4
71 
72 /* Smallest block size */
73 #define ISAMB_MIN_SIZE 32
74 /* Size factor */
75 #define ISAMB_FAC_SIZE 4
76 
77 /* ISAMB_PTR_CODEC = 1 var, =0 fixed */
78 #define ISAMB_PTR_CODEC 1
79 
82  unsigned char *buf;
83  int dirty;
84  int hits;
86 };
87 
88 struct ISAMB_file {
91  struct ISAMB_head head;
93 };
94 
95 struct ISAMB_s {
98 
99  struct ISAMB_file *file;
100  int no_cat;
101  int cache; /* 0 = no cache, 1 = use cache, -1 = dummy isam (for testing only) */
102  int log_io; /* log level for bf_read/bf_write calls */
103  int log_freelist; /* log level for freelist handling */
104  zint skipped_numbers; /* on a leaf node */
106  zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */
107  zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
110  int enable_int_count; /* whether we count nodes (or not) */
111  int cache_size; /* size of blocks to cache (if cache=1) */
114 };
115 
116 struct ISAMB_block {
118  int cat;
119  int size;
120  int leaf;
121  int dirty;
122  int deleted;
123  int offset;
124  zint no_items; /* number of nodes in this + children */
125  char *bytes;
126  char *cbuf;
127  unsigned char *buf;
129  int log_rw;
130 };
131 
132 struct ISAMB_PP_s {
135  int level;
136  int maxlevel; /* total depth */
139  zint skipped_numbers; /* on a leaf node */
141  zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */
142  zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
143  struct ISAMB_block **block;
144  int scope; /* on what level we forward */
145 };
146 
147 
148 #define encode_item_len encode_ptr
149 #if ISAMB_PTR_CODEC
150 static void encode_ptr(char **dst, zint pos)
151 {
152  unsigned char *bp = (unsigned char*) *dst;
153 
154  while (pos > 127)
155  {
156  *bp++ = (unsigned char) (128 | (pos & 127));
157  pos = pos >> 7;
158  }
159  *bp++ = (unsigned char) pos;
160  *dst = (char *) bp;
161 }
162 #else
163 static void encode_ptr(char **dst, zint pos)
164 {
165  memcpy(*dst, &pos, sizeof(pos));
166  (*dst) += sizeof(pos);
167 }
168 #endif
169 
170 #define decode_item_len decode_ptr
171 #if ISAMB_PTR_CODEC
172 static void decode_ptr(const char **src, zint *pos)
173 {
174  zint d = 0;
175  unsigned char c;
176  unsigned r = 0;
177 
178  while (((c = *(const unsigned char *)((*src)++)) & 128))
179  {
180  d += ((zint) (c & 127) << r);
181  r += 7;
182  }
183  d += ((zint) c << r);
184  *pos = d;
185 }
186 #else
187 static void decode_ptr(const char **src, zint *pos)
188 {
189  memcpy(pos, *src, sizeof(*pos));
190  (*src) += sizeof(*pos);
191 }
192 #endif
193 
194 
196 {
197  b->enable_int_count = v;
198 }
199 
201 {
202  b->cache_size = v;
203 }
204 
205 ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
206  int cache, int no_cat, int *sizes, int use_root_ptr)
207 {
208  ISAMB isamb = xmalloc(sizeof(*isamb));
209  int i;
210 
211  assert(no_cat <= CAT_MAX);
212 
213  isamb->bfs = bfs;
214  isamb->method = (ISAMC_M *) xmalloc(sizeof(*method));
215  memcpy(isamb->method, method, sizeof(*method));
216  isamb->no_cat = no_cat;
217  isamb->log_io = 0;
218  isamb->log_freelist = 0;
219  isamb->cache = cache;
220  isamb->skipped_numbers = 0;
221  isamb->returned_numbers = 0;
222  isamb->number_of_int_splits = 0;
223  isamb->number_of_leaf_splits = 0;
224  isamb->enable_int_count = 1;
225  isamb->cache_size = 40;
226 
227  if (use_root_ptr)
229  else
231 
232  isamb->root_ptr = 0;
233 
234  for (i = 0; i<ISAMB_MAX_LEVEL; i++)
235  isamb->skipped_nodes[i] = isamb->accessed_nodes[i] = 0;
236 
237  if (cache == -1)
238  {
239  yaz_log(YLOG_WARN, "isamb_open %s. Degraded TEST mode", name);
240  }
241  else
242  {
243  assert(cache == 0 || cache == 1);
244  }
245  isamb->file = xmalloc(sizeof(*isamb->file) * isamb->no_cat);
246 
247  for (i = 0; i < isamb->no_cat; i++)
248  {
249  isamb->file[i].bf = 0;
250  isamb->file[i].head_dirty = 0;
251  isamb->file[i].cache_entries = 0;
252  }
253 
254  for (i = 0; i < isamb->no_cat; i++)
255  {
256  char fname[DST_BUF_SIZE];
257  char hbuf[DST_BUF_SIZE];
258 
259  yaz_snprintf(fname, sizeof(fname), "%s%c", name, i+'A');
260  if (cache)
261  isamb->file[i].bf = bf_open(bfs, fname, ISAMB_CACHE_ENTRY_SIZE,
262  writeflag);
263  else
264  isamb->file[i].bf = bf_open(bfs, fname, sizes[i], writeflag);
265 
266  if (!isamb->file[i].bf)
267  {
268  isamb_close(isamb);
269  return 0;
270  }
271 
272  /* fill-in default values (for empty isamb) */
273  isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/sizes[i]+1;
274  isamb->file[i].head.last_block = isamb->file[i].head.first_block;
275  isamb->file[i].head.block_size = sizes[i];
276  assert(sizes[i] <= ISAMB_CACHE_ENTRY_SIZE);
277 #if ISAMB_PTR_CODEC
278  if (i == isamb->no_cat-1 || sizes[i] > 128)
279  isamb->file[i].head.block_offset = 8;
280  else
281  isamb->file[i].head.block_offset = 4;
282 #else
283  isamb->file[i].head.block_offset = 11;
284 #endif
285  isamb->file[i].head.block_max =
286  sizes[i] - isamb->file[i].head.block_offset;
287  isamb->file[i].head.free_list = 0;
288  if (bf_read(isamb->file[i].bf, 0, 0, 0, hbuf))
289  {
290  /* got header assume "isamb"major minor len can fit in 16 bytes */
291  zint zint_tmp;
292  int major, minor, len, pos = 0;
293  int left;
294  const char *src = 0;
295  if (memcmp(hbuf, "isamb", 5))
296  {
297  yaz_log(YLOG_WARN, "bad isamb header for file %s", fname);
298  isamb_close(isamb);
299  return 0;
300  }
301  if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3)
302  {
303  yaz_log(YLOG_WARN, "bad isamb header for file %s", fname);
304  isamb_close(isamb);
305  return 0;
306  }
307  if (major != ISAMB_MAJOR_VERSION)
308  {
309  yaz_log(YLOG_WARN, "bad major version for file %s %d, must be %d",
310  fname, major, ISAMB_MAJOR_VERSION);
311  isamb_close(isamb);
312  return 0;
313  }
314  for (left = len - sizes[i]; left > 0; left = left - sizes[i])
315  {
316  pos++;
317  if (!bf_read(isamb->file[i].bf, pos, 0, 0, hbuf + pos*sizes[i]))
318  {
319  yaz_log(YLOG_WARN, "truncated isamb header for "
320  "file=%s len=%d pos=%d",
321  fname, len, pos);
322  isamb_close(isamb);
323  return 0;
324  }
325  }
326  src = hbuf + 16;
327  decode_ptr(&src, &isamb->file[i].head.first_block);
328  decode_ptr(&src, &isamb->file[i].head.last_block);
329  decode_ptr(&src, &zint_tmp);
330  isamb->file[i].head.block_size = (int) zint_tmp;
331  decode_ptr(&src, &zint_tmp);
332  isamb->file[i].head.block_max = (int) zint_tmp;
333  decode_ptr(&src, &isamb->file[i].head.free_list);
335  decode_ptr(&src, &isamb->root_ptr);
336  }
337  assert(isamb->file[i].head.block_size >= isamb->file[i].head.block_offset);
338  /* must rewrite the header if root ptr is in use (bug #1017) */
339  if (use_root_ptr && writeflag)
340  isamb->file[i].head_dirty = 1;
341  else
342  isamb->file[i].head_dirty = 0;
343  assert(isamb->file[i].head.block_size == sizes[i]);
344  }
345 #if ISAMB_DEBUG
346  yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual");
347 #endif
348  return isamb;
349 }
350 
351 ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
352  int cache)
353 {
354  int sizes[CAT_NO];
355  int i, b_size = ISAMB_MIN_SIZE;
356 
357  for (i = 0; i<CAT_NO; i++)
358  {
359  sizes[i] = b_size;
360  b_size = b_size * ISAMB_FAC_SIZE;
361  }
362  return isamb_open2(bfs, name, writeflag, method, cache,
363  CAT_NO, sizes, 0);
364 }
365 
366 static void flush_blocks(ISAMB b, int cat)
367 {
368  while (b->file[cat].cache_entries)
369  {
370  struct ISAMB_cache_entry *ce_this = b->file[cat].cache_entries;
371  b->file[cat].cache_entries = ce_this->next;
372 
373  if (ce_this->dirty)
374  {
375  yaz_log(b->log_io, "bf_write: flush_blocks");
376  bf_write(b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);
377  }
378  xfree(ce_this->buf);
379  xfree(ce_this);
380  }
381 }
382 
383 static int cache_block(ISAMB b, ISAM_P pos, unsigned char *userbuf, int wr)
384 {
385  int cat = (int) (pos&CAT_MASK);
386  int off = (int) (((pos/CAT_MAX) &
387  (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1))
388  * b->file[cat].head.block_size);
390  int no = 0;
391  struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0;
392 
393  if (!b->cache)
394  return 0;
395 
396  assert(ISAMB_CACHE_ENTRY_SIZE >= b->file[cat].head.block_size);
397  for (ce = &b->file[cat].cache_entries; *ce; ce = &(*ce)->next, no++)
398  {
399  ce_last = ce;
400  if ((*ce)->pos == norm)
401  {
402  ce_this = *ce;
403  *ce = (*ce)->next; /* remove from list */
404 
405  ce_this->next = b->file[cat].cache_entries; /* move to front */
406  b->file[cat].cache_entries = ce_this;
407 
408  if (wr)
409  {
410  memcpy(ce_this->buf + off, userbuf,
411  b->file[cat].head.block_size);
412  ce_this->dirty = 1;
413  }
414  else
415  memcpy(userbuf, ce_this->buf + off,
416  b->file[cat].head.block_size);
417  return 1;
418  }
419  }
420  if (no >= b->cache_size)
421  {
422  assert(ce_last && *ce_last);
423  ce_this = *ce_last;
424  *ce_last = 0; /* remove the last entry from list */
425  if (ce_this->dirty)
426  {
427  yaz_log(b->log_io, "bf_write: cache_block");
428  bf_write(b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);
429  }
430  xfree(ce_this->buf);
431  xfree(ce_this);
432  }
433  ce_this = xmalloc(sizeof(*ce_this));
434  ce_this->next = b->file[cat].cache_entries;
435  b->file[cat].cache_entries = ce_this;
436  ce_this->buf = xmalloc(ISAMB_CACHE_ENTRY_SIZE);
437  ce_this->pos = norm;
438  yaz_log(b->log_io, "bf_read: cache_block");
439  if (!bf_read(b->file[cat].bf, norm, 0, 0, ce_this->buf))
440  memset(ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE);
441  if (wr)
442  {
443  memcpy(ce_this->buf + off, userbuf, b->file[cat].head.block_size);
444  ce_this->dirty = 1;
445  }
446  else
447  {
448  ce_this->dirty = 0;
449  memcpy(userbuf, ce_this->buf + off, b->file[cat].head.block_size);
450  }
451  return 1;
452 }
453 
454 
455 void isamb_close(ISAMB isamb)
456 {
457  int i;
458  for (i = 0; isamb->accessed_nodes[i]; i++)
459  yaz_log(YLOG_DEBUG, "isamb_close level leaf-%d: "ZINT_FORMAT" read, "
460  ZINT_FORMAT" skipped",
461  i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]);
462  yaz_log(YLOG_DEBUG, "isamb_close returned "ZINT_FORMAT" values, "
463  "skipped "ZINT_FORMAT,
464  isamb->skipped_numbers, isamb->returned_numbers);
465 
466  for (i = 0; i<isamb->no_cat; i++)
467  {
468  flush_blocks(isamb, i);
469  if (isamb->file[i].head_dirty)
470  {
471  char hbuf[DST_BUF_SIZE];
472  int major = ISAMB_MAJOR_VERSION;
473  int len = 16;
474  char *dst = hbuf + 16;
475  int pos = 0, left;
476  int b_size = isamb->file[i].head.block_size;
477 
478  encode_ptr(&dst, isamb->file[i].head.first_block);
479  encode_ptr(&dst, isamb->file[i].head.last_block);
480  encode_ptr(&dst, isamb->file[i].head.block_size);
481  encode_ptr(&dst, isamb->file[i].head.block_max);
482  encode_ptr(&dst, isamb->file[i].head.free_list);
483 
485  encode_ptr(&dst, isamb->root_ptr);
486 
487  memset(dst, '\0', b_size); /* ensure no random bytes are written */
488 
489  len = dst - hbuf;
490 
491  /* print exactly 16 bytes (including trailing 0) */
492  yaz_snprintf(hbuf, 16, "isamb%02d %02d %02d\r\n", major,
493  isamb->minor_version, len);
494 
495  bf_write(isamb->file[i].bf, pos, 0, 0, hbuf);
496 
497  for (left = len - b_size; left > 0; left = left - b_size)
498  {
499  pos++;
500  bf_write(isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size);
501  }
502  }
503  if (isamb->file[i].bf)
504  bf_close (isamb->file[i].bf);
505  }
506  xfree(isamb->file);
507  xfree(isamb->method);
508  xfree(isamb);
509 }
510 
511 /* open_block: read one block at pos.
512  Decode leading sys bytes .. consisting of
513  Offset:Meaning
514  0: leader byte, != 0 leaf, == 0, non-leaf
515  1-2: used size of block
516  3-7*: number of items and all children
517 
518  * Reserve 5 bytes for large block sizes. 1 for small ones .. Number
519  of items. We can thus have at most 2^40 nodes.
520 */
522 {
523  int cat = (int) (pos&CAT_MASK);
524  const char *src;
525  int offset = b->file[cat].head.block_offset;
526  struct ISAMB_block *p;
527  if (!pos)
528  return 0;
529  p = xmalloc(sizeof(*p));
530  p->pos = pos;
531  p->cat = (int) (pos & CAT_MASK);
532  p->buf = xmalloc(b->file[cat].head.block_size);
533  p->cbuf = 0;
534 
535  if (!cache_block (b, pos, p->buf, 0))
536  {
537  yaz_log(b->log_io, "bf_read: open_block");
538  if (bf_read(b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf) != 1)
539  {
540  yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld",
541  (long) pos, (long) pos/CAT_MAX);
542  zebra_exit("isamb:open_block");
543  }
544  }
545  p->bytes = (char *)p->buf + offset;
546  p->leaf = p->buf[0];
547  p->size = (p->buf[1] + 256 * p->buf[2]) - offset;
548  if (p->size < 0)
549  {
550  yaz_log(YLOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n",
551  p->size, pos);
552  }
553  assert(p->size >= 0);
554  src = (char*) p->buf + 3;
555  decode_ptr(&src, &p->no_items);
556 
557  p->offset = 0;
558  p->dirty = 0;
559  p->deleted = 0;
560  p->decodeClientData = (*b->method->codec.start)();
561  return p;
562 }
563 
564 struct ISAMB_block *new_block(ISAMB b, int leaf, int cat)
565 {
566  struct ISAMB_block *p;
567 
568  p = xmalloc(sizeof(*p));
569  p->buf = xmalloc(b->file[cat].head.block_size);
570 
571  if (!b->file[cat].head.free_list)
572  {
573  zint block_no;
574  block_no = b->file[cat].head.last_block++;
575  p->pos = block_no * CAT_MAX + cat;
576  if (b->log_freelist)
577  yaz_log(b->log_freelist, "got block "
578  ZINT_FORMAT " from last %d:" ZINT_FORMAT, p->pos,
579  cat, p->pos/CAT_MAX);
580  }
581  else
582  {
583  p->pos = b->file[cat].head.free_list;
584  assert((p->pos & CAT_MASK) == cat);
585  if (!cache_block(b, p->pos, p->buf, 0))
586  {
587  yaz_log(b->log_io, "bf_read: new_block");
588  if (!bf_read(b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf))
589  {
590  yaz_log(YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld",
591  (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX);
592  zebra_exit("isamb:new_block");
593  }
594  }
595  if (b->log_freelist)
596  yaz_log(b->log_freelist, "got block "
597  ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos,
598  cat, p->pos/CAT_MAX);
599  memcpy(&b->file[cat].head.free_list, p->buf, sizeof(zint));
600  }
601  p->cat = cat;
602  b->file[cat].head_dirty = 1;
603  memset(p->buf, 0, b->file[cat].head.block_size);
604  p->bytes = (char*)p->buf + b->file[cat].head.block_offset;
605  p->leaf = leaf;
606  p->size = 0;
607  p->dirty = 1;
608  p->deleted = 0;
609  p->offset = 0;
610  p->no_items = 0;
611  p->decodeClientData = (*b->method->codec.start)();
612  return p;
613 }
614 
615 struct ISAMB_block *new_leaf(ISAMB b, int cat)
616 {
617  return new_block(b, 1, cat);
618 }
619 
620 
621 struct ISAMB_block *new_int(ISAMB b, int cat)
622 {
623  return new_block(b, 0, cat);
624 }
625 
626 static void check_block(ISAMB b, struct ISAMB_block *p)
627 {
628  assert(b); /* mostly to make the compiler shut up about unused b */
629  if (p->leaf)
630  {
631  ;
632  }
633  else
634  {
635  /* sanity check */
636  char *startp = p->bytes;
637  const char *src = startp;
638  char *endp = p->bytes + p->size;
639  ISAM_P pos;
640  void *c1 = (*b->method->codec.start)();
641 
642  decode_ptr(&src, &pos);
643  assert((pos&CAT_MASK) == p->cat);
644  while (src != endp)
645  {
646 #if INT_ENCODE
647  char file_item_buf[DST_ITEM_MAX];
648  char *file_item = file_item_buf;
649  (*b->method->codec.reset)(c1);
650  (*b->method->codec.decode)(c1, &file_item, &src);
651 #else
652  zint item_len;
653  decode_item_len(&src, &item_len);
654  assert(item_len > 0 && item_len < 128);
655  src += item_len;
656 #endif
657  decode_ptr(&src, &pos);
658  if ((pos&CAT_MASK) != p->cat)
659  {
660  assert((pos&CAT_MASK) == p->cat);
661  }
662  }
663  (*b->method->codec.stop)(c1);
664  }
665 }
666 
667 void close_block(ISAMB b, struct ISAMB_block *p)
668 {
669  if (!p)
670  return;
671  if (p->deleted)
672  {
673  yaz_log(b->log_freelist, "release block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT,
674  p->pos, p->cat, p->pos/CAT_MAX);
675  memcpy(p->buf, &b->file[p->cat].head.free_list, sizeof(zint));
676  b->file[p->cat].head.free_list = p->pos;
677  b->file[p->cat].head_dirty = 1;
678  if (!cache_block(b, p->pos, p->buf, 1))
679  {
680  yaz_log(b->log_io, "bf_write: close_block (deleted)");
681  bf_write(b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf);
682  }
683  }
684  else if (p->dirty)
685  {
686  int offset = b->file[p->cat].head.block_offset;
687  int size = p->size + offset;
688  char *dst = (char*)p->buf + 3;
689  assert(p->size >= 0);
690 
691  /* memset becuase encode_ptr usually does not write all bytes */
692  memset(p->buf, 0, b->file[p->cat].head.block_offset);
693  p->buf[0] = p->leaf;
694  p->buf[1] = size & 255;
695  p->buf[2] = size >> 8;
696  encode_ptr(&dst, p->no_items);
697  check_block(b, p);
698  if (!cache_block(b, p->pos, p->buf, 1))
699  {
700  yaz_log(b->log_io, "bf_write: close_block");
701  bf_write(b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf);
702  }
703  }
704  (*b->method->codec.stop)(p->decodeClientData);
705  xfree(p->buf);
706  xfree(p);
707 }
708 
709 int insert_sub(ISAMB b, struct ISAMB_block **p,
710  void *new_item, int *mode,
711  ISAMC_I *stream,
712  struct ISAMB_block **sp,
713  void *sub_item, int *sub_size,
714  const void *max_item);
715 
716 int insert_int(ISAMB b, struct ISAMB_block *p, void *lookahead_item,
717  int *mode,
718  ISAMC_I *stream, struct ISAMB_block **sp,
719  void *split_item, int *split_size, const void *last_max_item)
720 {
721  char *startp = p->bytes;
722  const char *src = startp;
723  char *endp = p->bytes + p->size;
724  ISAM_P pos;
725  struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0;
726  char sub_item[DST_ITEM_MAX];
727  int sub_size;
728  int more = 0;
729  zint diff_terms = 0;
730  void *c1 = (*b->method->codec.start)();
731 
732  *sp = 0;
733 
734  assert(p->size >= 0);
735  decode_ptr(&src, &pos);
736  while (src != endp)
737  {
738  int d;
739  const char *src0 = src;
740 #if INT_ENCODE
741  char file_item_buf[DST_ITEM_MAX];
742  char *file_item = file_item_buf;
743  (*b->method->codec.reset)(c1);
744  (*b->method->codec.decode)(c1, &file_item, &src);
745  d = (*b->method->compare_item)(file_item_buf, lookahead_item);
746  if (d > 0)
747  {
748  sub_p1 = open_block(b, pos);
749  assert(sub_p1);
750  diff_terms -= sub_p1->no_items;
751  more = insert_sub(b, &sub_p1, lookahead_item, mode,
752  stream, &sub_p2,
753  sub_item, &sub_size, file_item_buf);
754  diff_terms += sub_p1->no_items;
755  src = src0;
756  break;
757  }
758 #else
759  zint item_len;
760  decode_item_len(&src, &item_len);
761  d = (*b->method->compare_item)(src, lookahead_item);
762  if (d > 0)
763  {
764  sub_p1 = open_block(b, pos);
765  assert(sub_p1);
766  diff_terms -= sub_p1->no_items;
767  more = insert_sub(b, &sub_p1, lookahead_item, mode,
768  stream, &sub_p2,
769  sub_item, &sub_size, src);
770  diff_terms += sub_p1->no_items;
771  src = src0;
772  break;
773  }
774  src += item_len;
775 #endif
776  decode_ptr(&src, &pos);
777  }
778  if (!sub_p1)
779  {
780  /* we reached the end. So lookahead > last item */
781  sub_p1 = open_block(b, pos);
782  assert(sub_p1);
783  diff_terms -= sub_p1->no_items;
784  more = insert_sub(b, &sub_p1, lookahead_item, mode, stream, &sub_p2,
785  sub_item, &sub_size, last_max_item);
786  diff_terms += sub_p1->no_items;
787  }
788  if (sub_p2)
789  diff_terms += sub_p2->no_items;
790  if (diff_terms)
791  {
792  p->dirty = 1;
793  p->no_items += diff_terms;
794  }
795  if (sub_p2)
796  {
797  /* there was a split - must insert pointer in this one */
798  char dst_buf[DST_BUF_SIZE];
799  char *dst = dst_buf;
800 #if INT_ENCODE
801  const char *sub_item_ptr = sub_item;
802 #endif
803  assert(sub_size < DST_ITEM_MAX && sub_size > 1);
804 
805  memcpy(dst, startp, src - startp);
806 
807  dst += src - startp;
808 
809 #if INT_ENCODE
810  (*b->method->codec.reset)(c1);
811  (*b->method->codec.encode)(c1, &dst, &sub_item_ptr);
812 #else
813  encode_item_len(&dst, sub_size); /* sub length and item */
814  memcpy(dst, sub_item, sub_size);
815  dst += sub_size;
816 #endif
817 
818  encode_ptr(&dst, sub_p2->pos); /* pos */
819 
820  if (endp - src) /* remaining data */
821  {
822  memcpy(dst, src, endp - src);
823  dst += endp - src;
824  }
825  p->size = dst - dst_buf;
826  assert(p->size >= 0);
827 
828  if (p->size <= b->file[p->cat].head.block_max)
829  {
830  /* it fits OK in this block */
831  memcpy(startp, dst_buf, dst - dst_buf);
832 
833  close_block(b, sub_p2);
834  }
835  else
836  {
837  /* must split _this_ block as well .. */
838  struct ISAMB_block *sub_p3;
839 #if INT_ENCODE
840  char file_item_buf[DST_ITEM_MAX];
841  char *file_item = file_item_buf;
842 #else
843  zint split_size_tmp;
844 #endif
845  zint no_items_first_half = 0;
846  int p_new_size;
847  const char *half;
848  src = dst_buf;
849  endp = dst;
850 
852 
853  p->dirty = 1;
854  close_block(b, sub_p2);
855 
856  half = src + b->file[p->cat].head.block_size/2;
857  decode_ptr(&src, &pos);
858 
859  if (b->enable_int_count)
860  {
861  /* read sub block so we can get no_items for it */
862  sub_p3 = open_block(b, pos);
863  no_items_first_half += sub_p3->no_items;
864  close_block(b, sub_p3);
865  }
866 
867  while (src <= half)
868  {
869 #if INT_ENCODE
870  file_item = file_item_buf;
871  (*b->method->codec.reset)(c1);
872  (*b->method->codec.decode)(c1, &file_item, &src);
873 #else
874  decode_item_len(&src, &split_size_tmp);
875  *split_size = (int) split_size_tmp;
876  src += *split_size;
877 #endif
878  decode_ptr(&src, &pos);
879 
880  if (b->enable_int_count)
881  {
882  /* read sub block so we can get no_items for it */
883  sub_p3 = open_block(b, pos);
884  no_items_first_half += sub_p3->no_items;
885  close_block(b, sub_p3);
886  }
887  }
888  /* p is first half */
889  p_new_size = src - dst_buf;
890  memcpy(p->bytes, dst_buf, p_new_size);
891 
892 #if INT_ENCODE
893  file_item = file_item_buf;
894  (*b->method->codec.reset)(c1);
895  (*b->method->codec.decode)(c1, &file_item, &src);
896  *split_size = file_item - file_item_buf;
897  memcpy(split_item, file_item_buf, *split_size);
898 #else
899  decode_item_len(&src, &split_size_tmp);
900  *split_size = (int) split_size_tmp;
901  memcpy(split_item, src, *split_size);
902  src += *split_size;
903 #endif
904  /* *sp is second half */
905  *sp = new_int(b, p->cat);
906  (*sp)->size = endp - src;
907  memcpy((*sp)->bytes, src, (*sp)->size);
908 
909  p->size = p_new_size;
910 
911  /* adjust no_items in first&second half */
912  (*sp)->no_items = p->no_items - no_items_first_half;
913  p->no_items = no_items_first_half;
914  }
915  p->dirty = 1;
916  }
917  close_block(b, sub_p1);
918  (*b->method->codec.stop)(c1);
919  return more;
920 }
921 
922 int insert_leaf(ISAMB b, struct ISAMB_block **sp1, void *lookahead_item,
923  int *lookahead_mode, ISAMC_I *stream,
924  struct ISAMB_block **sp2,
925  void *sub_item, int *sub_size,
926  const void *max_item)
927 {
928  struct ISAMB_block *p = *sp1;
929  char *endp = 0;
930  const char *src = 0;
931  char dst_buf[DST_BUF_SIZE], *dst = dst_buf;
932  int new_size;
933  void *c1 = (*b->method->codec.start)();
934  void *c2 = (*b->method->codec.start)();
935  int more = 1;
936  int quater = b->file[b->no_cat-1].head.block_max / 4;
937  char *mid_cut = dst_buf + quater * 2;
938  char *tail_cut = dst_buf + quater * 3;
939  char *maxp = dst_buf + b->file[b->no_cat-1].head.block_max;
940  char *half1 = 0;
941  char *half2 = 0;
942  char cut_item_buf[DST_ITEM_MAX];
943  int cut_item_size = 0;
944  int no_items = 0; /* number of items (total) */
945  int no_items_1 = 0; /* number of items (first half) */
946  int inserted_dst_bytes = 0;
947 
948  if (p && p->size)
949  {
950  char file_item_buf[DST_ITEM_MAX];
951  char *file_item = file_item_buf;
952 
953  src = p->bytes;
954  endp = p->bytes + p->size;
955  (*b->method->codec.decode)(c1, &file_item, &src);
956  while (1)
957  {
958  const char *dst_item = 0; /* resulting item to be inserted */
959  char *lookahead_next;
960  char *dst_0 = dst;
961  int d = -1;
962 
963  if (lookahead_item)
964  d = (*b->method->compare_item)(file_item_buf, lookahead_item);
965 
966  /* d now holds comparison between existing file item and
967  lookahead item
968  d = 0: equal
969  d > 0: lookahead before file
970  d < 0: lookahead after file
971  */
972  if (d > 0)
973  {
974  /* lookahead must be inserted */
975  dst_item = lookahead_item;
976  /* if this is not an insertion, it's really bad .. */
977  if (!*lookahead_mode)
978  {
979  yaz_log(YLOG_WARN, "isamb: Inconsistent register (1)");
980  assert(*lookahead_mode);
981  }
982  }
983  else if (d == 0 && *lookahead_mode == 2)
984  {
985  /* For mode == 2, we insert the new key anyway - even
986  though the comparison is 0. */
987  dst_item = lookahead_item;
988  p->dirty = 1;
989  }
990  else
991  dst_item = file_item_buf;
992 
993  if (d == 0 && !*lookahead_mode)
994  {
995  /* it's a deletion and they match so there is nothing
996  to be inserted anyway .. But mark the thing dirty
997  (file item was part of input.. The item will not be
998  part of output */
999  p->dirty = 1;
1000  }
1001  else if (!half1 && dst > mid_cut)
1002  {
1003  /* we have reached the splitting point for the first time */
1004  const char *dst_item_0 = dst_item;
1005  half1 = dst; /* candidate for splitting */
1006 
1007  /* encode the resulting item */
1008  (*b->method->codec.encode)(c2, &dst, &dst_item);
1009 
1010  cut_item_size = dst_item - dst_item_0;
1011  assert(cut_item_size > 0);
1012  memcpy(cut_item_buf, dst_item_0, cut_item_size);
1013 
1014  half2 = dst;
1015  no_items_1 = no_items;
1016  no_items++;
1017  }
1018  else
1019  {
1020  /* encode the resulting item */
1021  (*b->method->codec.encode)(c2, &dst, &dst_item);
1022  no_items++;
1023  }
1024 
1025  /* now move "pointers" .. result has been encoded .. */
1026  if (d > 0)
1027  {
1028  /* we must move the lookahead pointer */
1029 
1030  inserted_dst_bytes += (dst - dst_0);
1031  if (inserted_dst_bytes >= quater)
1032  /* no more room. Mark lookahead as "gone".. */
1033  lookahead_item = 0;
1034  else
1035  {
1036  /* move it really.. */
1037  lookahead_next = lookahead_item;
1038  if (!(*stream->read_item)(stream->clientData,
1039  &lookahead_next,
1040  lookahead_mode))
1041  {
1042  /* end of stream reached: no "more" and no lookahead */
1043  lookahead_item = 0;
1044  more = 0;
1045  }
1046  if (lookahead_item && max_item &&
1047  (*b->method->compare_item)(max_item, lookahead_item) <= 0)
1048  {
1049  /* the lookahead goes beyond what we allow in this
1050  leaf. Mark it as "gone" */
1051  lookahead_item = 0;
1052  }
1053 
1054  p->dirty = 1;
1055  }
1056  }
1057  else if (d == 0)
1058  {
1059  /* exact match .. move both pointers */
1060 
1061  lookahead_next = lookahead_item;
1062  if (!(*stream->read_item)(stream->clientData,
1063  &lookahead_next, lookahead_mode))
1064  {
1065  lookahead_item = 0;
1066  more = 0;
1067  }
1068  if (src == endp)
1069  break; /* end of file stream reached .. */
1070  file_item = file_item_buf; /* move file pointer */
1071  (*b->method->codec.decode)(c1, &file_item, &src);
1072  }
1073  else
1074  {
1075  /* file pointer must be moved */
1076  if (src == endp)
1077  break;
1078  file_item = file_item_buf;
1079  (*b->method->codec.decode)(c1, &file_item, &src);
1080  }
1081  }
1082  }
1083 
1084  /* this loop runs when we are "appending" to a leaf page. That is
1085  either it's empty (new) or all file items have been read in
1086  previous loop */
1087 
1088  maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater;
1089  while (lookahead_item)
1090  {
1091  char *dst_item;
1092  const char *src = lookahead_item;
1093  char *dst_0 = dst;
1094 
1095  /* if we have a lookahead item, we stop if we exceed the value of it */
1096  if (max_item &&
1097  (*b->method->compare_item)(max_item, lookahead_item) <= 0)
1098  {
1099  /* stop if we have reached the value of max item */
1100  break;
1101  }
1102  if (!*lookahead_mode)
1103  {
1104  /* this is append. So a delete is bad */
1105  yaz_log(YLOG_WARN, "isamb: Inconsistent register (2)");
1106  assert(*lookahead_mode);
1107  }
1108  else if (!half1 && dst > tail_cut)
1109  {
1110  const char *src_0 = src;
1111  half1 = dst; /* candidate for splitting */
1112 
1113  (*b->method->codec.encode)(c2, &dst, &src);
1114 
1115  cut_item_size = src - src_0;
1116  assert(cut_item_size > 0);
1117  memcpy(cut_item_buf, src_0, cut_item_size);
1118 
1119  no_items_1 = no_items;
1120  half2 = dst;
1121  }
1122  else
1123  (*b->method->codec.encode)(c2, &dst, &src);
1124 
1125  if (dst > maxp)
1126  {
1127  dst = dst_0;
1128  break;
1129  }
1130  no_items++;
1131  if (p)
1132  p->dirty = 1;
1133  dst_item = lookahead_item;
1134  if (!(*stream->read_item)(stream->clientData, &dst_item,
1135  lookahead_mode))
1136  {
1137  lookahead_item = 0;
1138  more = 0;
1139  }
1140  }
1141  new_size = dst - dst_buf;
1142  if (p && p->cat != b->no_cat-1 &&
1143  new_size > b->file[p->cat].head.block_max)
1144  {
1145  /* non-btree block will be removed */
1146  p->deleted = 1;
1147  close_block(b, p);
1148  /* delete it too!! */
1149  p = 0; /* make a new one anyway */
1150  }
1151  if (!p)
1152  { /* must create a new one */
1153  int i;
1154  for (i = 0; i < b->no_cat; i++)
1155  if (new_size <= b->file[i].head.block_max)
1156  break;
1157  if (i == b->no_cat)
1158  i = b->no_cat - 1;
1159  p = new_leaf(b, i);
1160  }
1161  if (new_size > b->file[p->cat].head.block_max)
1162  {
1163  char *first_dst;
1164  const char *cut_item = cut_item_buf;
1165 
1166  assert(half1);
1167  assert(half2);
1168 
1169  assert(cut_item_size > 0);
1170 
1171  /* first half */
1172  p->size = half1 - dst_buf;
1173  assert(p->size <= b->file[p->cat].head.block_max);
1174  memcpy(p->bytes, dst_buf, half1 - dst_buf);
1175  p->no_items = no_items_1;
1176 
1177  /* second half */
1178  *sp2 = new_leaf(b, p->cat);
1179 
1180  (*b->method->codec.reset)(c2);
1181 
1182  b->number_of_leaf_splits++;
1183 
1184  first_dst = (*sp2)->bytes;
1185 
1186  (*b->method->codec.encode)(c2, &first_dst, &cut_item);
1187 
1188  memcpy(first_dst, half2, dst - half2);
1189 
1190  (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2);
1191  assert((*sp2)->size <= b->file[p->cat].head.block_max);
1192  (*sp2)->no_items = no_items - no_items_1;
1193  (*sp2)->dirty = 1;
1194  p->dirty = 1;
1195  memcpy(sub_item, cut_item_buf, cut_item_size);
1196  *sub_size = cut_item_size;
1197  }
1198  else
1199  {
1200  memcpy(p->bytes, dst_buf, dst - dst_buf);
1201  p->size = new_size;
1202  p->no_items = no_items;
1203  }
1204  (*b->method->codec.stop)(c1);
1205  (*b->method->codec.stop)(c2);
1206  *sp1 = p;
1207  return more;
1208 }
1209 
1210 int insert_sub(ISAMB b, struct ISAMB_block **p, void *new_item,
1211  int *mode,
1212  ISAMC_I *stream,
1213  struct ISAMB_block **sp,
1214  void *sub_item, int *sub_size,
1215  const void *max_item)
1216 {
1217  if (!*p || (*p)->leaf)
1218  return insert_leaf(b, p, new_item, mode, stream, sp, sub_item,
1219  sub_size, max_item);
1220  else
1221  return insert_int(b, *p, new_item, mode, stream, sp, sub_item,
1222  sub_size, max_item);
1223 }
1224 
1226 {
1227  struct ISAMB_block *p1;
1228 
1229  if (!pos)
1230  return 0;
1231  p1 = open_block(b, pos);
1232  p1->deleted = 1;
1233  if (!p1->leaf)
1234  {
1235  zint sub_p;
1236  const char *src = p1->bytes + p1->offset;
1237 #if INT_ENCODE
1238  void *c1 = (*b->method->codec.start)();
1239 #endif
1240  decode_ptr(&src, &sub_p);
1241  isamb_unlink(b, sub_p);
1242 
1243  while (src != p1->bytes + p1->size)
1244  {
1245 #if INT_ENCODE
1246  char file_item_buf[DST_ITEM_MAX];
1247  char *file_item = file_item_buf;
1248  (*b->method->codec.reset)(c1);
1249  (*b->method->codec.decode)(c1, &file_item, &src);
1250 #else
1251  zint item_len;
1252  decode_item_len(&src, &item_len);
1253  src += item_len;
1254 #endif
1255  decode_ptr(&src, &sub_p);
1256  isamb_unlink(b, sub_p);
1257  }
1258 #if INT_ENCODE
1259  (*b->method->codec.stop)(c1);
1260 #endif
1261  }
1262  close_block(b, p1);
1263  return 0;
1264 }
1265 
1266 void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream)
1267 {
1268  char item_buf[DST_ITEM_MAX];
1269  char *item_ptr;
1270  int i_mode;
1271  int more;
1272  int must_delete = 0;
1273 
1274  if (b->cache < 0)
1275  {
1276  int more = 1;
1277  while (more)
1278  {
1279  item_ptr = item_buf;
1280  more =
1281  (*stream->read_item)(stream->clientData, &item_ptr, &i_mode);
1282  }
1283  *pos = 1;
1284  return;
1285  }
1286  item_ptr = item_buf;
1287  more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode);
1288  while (more)
1289  {
1290  struct ISAMB_block *p = 0, *sp = 0;
1291  char sub_item[DST_ITEM_MAX];
1292  int sub_size;
1293 
1294  if (*pos)
1295  p = open_block(b, *pos);
1296  more = insert_sub(b, &p, item_buf, &i_mode, stream, &sp,
1297  sub_item, &sub_size, 0);
1298  if (sp)
1299  { /* increase level of tree by one */
1300  struct ISAMB_block *p2 = new_int(b, p->cat);
1301  char *dst = p2->bytes + p2->size;
1302 #if INT_ENCODE
1303  void *c1 = (*b->method->codec.start)();
1304  const char *sub_item_ptr = sub_item;
1305 #endif
1306 
1307  encode_ptr(&dst, p->pos);
1308  assert(sub_size < DST_ITEM_MAX && sub_size > 1);
1309 #if INT_ENCODE
1310  (*b->method->codec.reset)(c1);
1311  (*b->method->codec.encode)(c1, &dst, &sub_item_ptr);
1312 #else
1313  encode_item_len(&dst, sub_size);
1314  memcpy(dst, sub_item, sub_size);
1315  dst += sub_size;
1316 #endif
1317  encode_ptr(&dst, sp->pos);
1318 
1319  p2->size = dst - p2->bytes;
1320  p2->no_items = p->no_items + sp->no_items;
1321  *pos = p2->pos; /* return new super page */
1322  close_block(b, sp);
1323  close_block(b, p2);
1324 #if INT_ENCODE
1325  (*b->method->codec.stop)(c1);
1326 #endif
1327  }
1328  else
1329  {
1330  *pos = p->pos; /* return current one (again) */
1331  }
1332  if (p->no_items == 0)
1333  must_delete = 1;
1334  else
1335  must_delete = 0;
1336  close_block(b, p);
1337  }
1338  if (must_delete)
1339  {
1340  isamb_unlink(b, *pos);
1341  *pos = 0;
1342  }
1343 }
1344 
1345 ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope)
1346 {
1347  ISAMB_PP pp = xmalloc(sizeof(*pp));
1348  int i;
1349 
1350  assert(pos);
1351 
1352  pp->isamb = isamb;
1353  pp->block = xmalloc(ISAMB_MAX_LEVEL * sizeof(*pp->block));
1354 
1355  pp->pos = pos;
1356  pp->level = 0;
1357  pp->maxlevel = 0;
1358  pp->total_size = 0;
1359  pp->no_blocks = 0;
1360  pp->skipped_numbers = 0;
1361  pp->returned_numbers = 0;
1362  pp->scope = scope;
1363  for (i = 0; i<ISAMB_MAX_LEVEL; i++)
1364  pp->skipped_nodes[i] = pp->accessed_nodes[i] = 0;
1365  while (1)
1366  {
1367  struct ISAMB_block *p = open_block(isamb, pos);
1368  const char *src = p->bytes + p->offset;
1369  pp->block[pp->level] = p;
1370 
1371  pp->total_size += p->size;
1372  pp->no_blocks++;
1373  if (p->leaf)
1374  break;
1375  decode_ptr(&src, &pos);
1376  p->offset = src - p->bytes;
1377  pp->level++;
1378  pp->accessed_nodes[pp->level]++;
1379  }
1380  pp->block[pp->level+1] = 0;
1381  pp->maxlevel = pp->level;
1382  if (level)
1383  *level = pp->level;
1384  return pp;
1385 }
1386 
1388 {
1389  return isamb_pp_open_x(isamb, pos, 0, scope);
1390 }
1391 
1393 {
1394  int i;
1395  if (!pp)
1396  return;
1397  yaz_log(YLOG_DEBUG, "isamb_pp_close lev=%d returned "ZINT_FORMAT" values, "
1398  "skipped "ZINT_FORMAT,
1399  pp->maxlevel, pp->skipped_numbers, pp->returned_numbers);
1400  for (i = pp->maxlevel; i>=0; i--)
1401  if (pp->skipped_nodes[i] || pp->accessed_nodes[i])
1402  yaz_log(YLOG_DEBUG, "isamb_pp_close level leaf-%d: "
1403  ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i,
1404  pp->accessed_nodes[i], pp->skipped_nodes[i]);
1407  for (i = pp->maxlevel; i>=0; i--)
1408  {
1409  pp->isamb->accessed_nodes[i] += pp->accessed_nodes[i];
1410  pp->isamb->skipped_nodes[i] += pp->skipped_nodes[i];
1411  }
1412  if (size)
1413  *size = pp->total_size;
1414  if (blocks)
1415  *blocks = pp->no_blocks;
1416  for (i = 0; i <= pp->level; i++)
1417  close_block(pp->isamb, pp->block[i]);
1418  xfree(pp->block);
1419  xfree(pp);
1420 }
1421 
1422 int isamb_block_info(ISAMB isamb, int cat)
1423 {
1424  if (cat >= 0 && cat < isamb->no_cat)
1425  return isamb->file[cat].head.block_size;
1426  return -1;
1427 }
1428 
1430 {
1431  isamb_pp_close_x(pp, 0, 0);
1432 }
1433 
1434 /* simple recursive dumper .. */
1435 static void isamb_dump_r(ISAMB b, ISAM_P pos, void (*pr)(const char *str),
1436  int level)
1437 {
1438  char buf[1024];
1439  char prefix_str[1024];
1440  if (pos)
1441  {
1442  struct ISAMB_block *p = open_block(b, pos);
1443  yaz_snprintf(prefix_str, sizeof(prefix_str),
1444  "%*s " ZINT_FORMAT " cat=%d size=%d max=%d items="
1445  ZINT_FORMAT, level*2, "", pos, p->cat, p->size,
1446  b->file[p->cat].head.block_max, p->no_items);
1447  (*pr)(prefix_str);
1448  yaz_snprintf(prefix_str, sizeof(prefix_str),
1449  "%*s " ZINT_FORMAT, level*2, "", pos);
1450  if (p->leaf)
1451  {
1452  while (p->offset < p->size)
1453  {
1454  const char *src = p->bytes + p->offset;
1455  char *dst = buf;
1456  (*b->method->codec.decode)(p->decodeClientData, &dst, &src);
1457  (*b->method->log_item)(YLOG_DEBUG, buf, prefix_str);
1458  p->offset = src - (char*) p->bytes;
1459  }
1460  assert(p->offset == p->size);
1461  }
1462  else
1463  {
1464  const char *src = p->bytes + p->offset;
1465  ISAM_P sub;
1466 
1467  decode_ptr(&src, &sub);
1468  p->offset = src - (char*) p->bytes;
1469 
1470  isamb_dump_r(b, sub, pr, level+1);
1471 
1472  while (p->offset < p->size)
1473  {
1474 #if INT_ENCODE
1475  char file_item_buf[DST_ITEM_MAX];
1476  char *file_item = file_item_buf;
1477  void *c1 = (*b->method->codec.start)();
1478  (*b->method->codec.decode)(c1, &file_item, &src);
1479  (*b->method->codec.stop)(c1);
1480  (*b->method->log_item)(YLOG_DEBUG, file_item_buf, prefix_str);
1481 #else
1482  zint item_len;
1483  decode_item_len(&src, &item_len);
1484  (*b->method->log_item)(YLOG_DEBUG, src, prefix_str);
1485  src += item_len;
1486 #endif
1487  decode_ptr(&src, &sub);
1488 
1489  p->offset = src - (char*) p->bytes;
1490 
1491  isamb_dump_r(b, sub, pr, level+1);
1492  }
1493  }
1494  close_block(b, p);
1495  }
1496 }
1497 
1498 void isamb_dump(ISAMB b, ISAM_P pos, void (*pr)(const char *str))
1499 {
1500  isamb_dump_r(b, pos, pr, 0);
1501 }
1502 
1504 {
1505  return isamb_pp_forward(pp, buf, 0);
1506 }
1507 
1508 
1509 void isamb_pp_pos(ISAMB_PP pp, double *current, double *total)
1510 { /* return an estimate of the current position and of the total number of */
1511  /* occureences in the isam tree, based on the current leaf */
1512  assert(total);
1513  assert(current);
1514 
1515  /* if end-of-stream PP may not be leaf */
1516 
1517  *total = (double) (pp->block[0]->no_items);
1518  *current = (double) pp->returned_numbers;
1519 #if ISAMB_DEBUG
1520  yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="
1521  ZINT_FORMAT, *current, *total, pp->returned_numbers);
1522 #endif
1523 }
1524 
1525 int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilb)
1526 {
1527  char *dst = buf;
1528  const char *src;
1529  struct ISAMB_block *p = pp->block[pp->level];
1530  ISAMB b = pp->isamb;
1531  if (!p)
1532  return 0;
1533  again:
1534  while (p->offset == p->size)
1535  {
1536  ISAM_P pos;
1537 #if INT_ENCODE
1538  const char *src_0;
1539  void *c1;
1540  char file_item_buf[DST_ITEM_MAX];
1541  char *file_item = file_item_buf;
1542 #else
1543  zint item_len;
1544 #endif
1545  while (p->offset == p->size)
1546  {
1547  if (pp->level == 0)
1548  return 0;
1549  close_block(pp->isamb, pp->block[pp->level]);
1550  pp->block[pp->level] = 0;
1551  (pp->level)--;
1552  p = pp->block[pp->level];
1553  assert(!p->leaf);
1554  }
1555 
1556  assert(!p->leaf);
1557  src = p->bytes + p->offset;
1558 
1559 #if INT_ENCODE
1560  c1 = (*b->method->codec.start)();
1561  (*b->method->codec.decode)(c1, &file_item, &src);
1562 #else
1563  decode_ptr(&src, &item_len);
1564  src += item_len;
1565 #endif
1566  decode_ptr(&src, &pos);
1567  p->offset = src - (char*) p->bytes;
1568 
1569  src = p->bytes + p->offset;
1570 
1571  while(1)
1572  {
1573  if (!untilb || p->offset == p->size)
1574  break;
1575  assert(p->offset < p->size);
1576 #if INT_ENCODE
1577  src_0 = src;
1578  file_item = file_item_buf;
1579  (*b->method->codec.reset)(c1);
1580  (*b->method->codec.decode)(c1, &file_item, &src);
1581  if ((*b->method->compare_item)(untilb, file_item_buf) < pp->scope)
1582  {
1583  src = src_0;
1584  break;
1585  }
1586 #else
1587  decode_item_len(&src, &item_len);
1588  if ((*b->method->compare_item)(untilb, src) < pp->scope)
1589  break;
1590  src += item_len;
1591 #endif
1592  decode_ptr(&src, &pos);
1593  p->offset = src - (char*) p->bytes;
1594  }
1595 
1596  pp->level++;
1597 
1598  while (1)
1599  {
1600  pp->block[pp->level] = p = open_block(pp->isamb, pos);
1601 
1602  pp->total_size += p->size;
1603  pp->no_blocks++;
1604 
1605  if (p->leaf)
1606  {
1607  break;
1608  }
1609 
1610  src = p->bytes + p->offset;
1611  while(1)
1612  {
1613  decode_ptr(&src, &pos);
1614  p->offset = src - (char*) p->bytes;
1615 
1616  if (!untilb || p->offset == p->size)
1617  break;
1618  assert(p->offset < p->size);
1619 #if INT_ENCODE
1620  src_0 = src;
1621  file_item = file_item_buf;
1622  (*b->method->codec.reset)(c1);
1623  (*b->method->codec.decode)(c1, &file_item, &src);
1624  if ((*b->method->compare_item)(untilb, file_item_buf) < pp->scope)
1625  {
1626  src = src_0;
1627  break;
1628  }
1629 #else
1630  decode_ptr(&src, &item_len);
1631  if ((*b->method->compare_item)(untilb, src) <= pp->scope)
1632  break;
1633  src += item_len;
1634 #endif
1635  }
1636  pp->level++;
1637  }
1638 #if INT_ENCODE
1639  (*b->method->codec.stop)(c1);
1640 #endif
1641  }
1642  assert(p->offset < p->size);
1643  assert(p->leaf);
1644  while(1)
1645  {
1646  char *dst0 = dst;
1647  src = p->bytes + p->offset;
1648  (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src);
1649  p->offset = src - (char*) p->bytes;
1650  if (!untilb || (*pp->isamb->method->compare_item)(untilb, dst0) < pp->scope)
1651  break;
1652  dst = dst0;
1653  if (p->offset == p->size) goto again;
1654  }
1655  pp->returned_numbers++;
1656  return 1;
1657 }
1658 
1660 {
1661  return b->number_of_int_splits;
1662 }
1663 
1665 {
1666  return b->number_of_leaf_splits;
1667 }
1668 
1670 {
1671  return b->root_ptr;
1672 }
1673 
1674 void isamb_set_root_ptr(ISAMB b, zint root_ptr)
1675 {
1676  b->root_ptr = root_ptr;
1677 }
1678 
1679 
1680 /*
1681  * Local variables:
1682  * c-basic-offset: 4
1683  * c-file-style: "Stroustrup"
1684  * indent-tabs-mode: nil
1685  * End:
1686  * vim: shiftwidth=4 tabstop=8 expandtab
1687  */
1688 
int bf_read(BFile bf, zint no, int offset, int nbytes, void *buf)
read from block file (may call exit)
Definition: bfile.c:205
void bf_close(BFile bf)
closes a Block file (may call exit)
Definition: bfile.c:139
BFile bf_open(BFiles bfs, const char *name, int block_size, int wflag)
opens and returns a Block file handle
Definition: bfile.c:150
int bf_write(BFile bf, zint no, int offset, int nbytes, const void *buf)
writes block of bytes to file (may call exit)
Definition: bfile.c:232
void isamb_pp_pos(ISAMB_PP pp, double *current, double *total)
Definition: isamb.c:1509
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition: isamb.c:1387
struct ISAMB_block * new_leaf(ISAMB b, int cat)
Definition: isamb.c:615
void isamb_pp_close_x(ISAMB_PP pp, zint *size, zint *blocks)
Definition: isamb.c:1392
static struct ISAMB_block * open_block(ISAMB b, ISAM_P pos)
Definition: isamb.c:521
static int cache_block(ISAMB b, ISAM_P pos, unsigned char *userbuf, int wr)
Definition: isamb.c:383
void isamb_set_int_count(ISAMB b, int v)
Definition: isamb.c:195
#define ISAMB_MAJOR_VERSION
Definition: isamb.c:36
zint isamb_get_leaf_splits(ISAMB b)
Definition: isamb.c:1664
ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache)
Definition: isamb.c:351
#define ISAMB_FAC_SIZE
Definition: isamb.c:75
int isamb_block_info(ISAMB isamb, int cat)
Definition: isamb.c:1422
zint isamb_get_int_splits(ISAMB b)
Definition: isamb.c:1659
#define CAT_MAX
Definition: isamb.c:67
ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope)
Definition: isamb.c:1345
static void isamb_dump_r(ISAMB b, ISAM_P pos, void(*pr)(const char *str), int level)
Definition: isamb.c:1435
int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilb)
Definition: isamb.c:1525
#define CAT_NO
Definition: isamb.c:70
int insert_leaf(ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int *lookahead_mode, ISAMC_I *stream, struct ISAMB_block **sp2, void *sub_item, int *sub_size, const void *max_item)
Definition: isamb.c:922
#define ISAMB_DEBUG
Definition: isamb.c:32
#define CAT_MASK
Definition: isamb.c:68
struct ISAMB_block * new_block(ISAMB b, int leaf, int cat)
Definition: isamb.c:564
int insert_int(ISAMB b, struct ISAMB_block *p, void *lookahead_item, int *mode, ISAMC_I *stream, struct ISAMB_block **sp, void *split_item, int *split_size, const void *last_max_item)
Definition: isamb.c:716
struct ISAMB_block * new_int(ISAMB b, int cat)
Definition: isamb.c:621
#define decode_item_len
Definition: isamb.c:170
#define DST_ITEM_MAX
Definition: isamb.c:54
zint isamb_get_root_ptr(ISAMB b)
Definition: isamb.c:1669
static void flush_blocks(ISAMB b, int cat)
Definition: isamb.c:366
void isamb_dump(ISAMB b, ISAM_P pos, void(*pr)(const char *str))
Definition: isamb.c:1498
void isamb_close(ISAMB isamb)
Definition: isamb.c:455
static void encode_ptr(char **dst, zint pos)
Definition: isamb.c:150
void isamb_set_root_ptr(ISAMB b, zint root_ptr)
Definition: isamb.c:1674
void close_block(ISAMB b, struct ISAMB_block *p)
Definition: isamb.c:667
void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream)
Definition: isamb.c:1266
#define ISAMB_MINOR_VERSION_WITH_ROOT
Definition: isamb.c:38
ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache, int no_cat, int *sizes, int use_root_ptr)
Definition: isamb.c:205
#define DST_BUF_SIZE
Definition: isamb.c:61
#define ISAMB_MIN_SIZE
Definition: isamb.c:73
void isamb_pp_close(ISAMB_PP pp)
Definition: isamb.c:1429
#define ISAMB_MINOR_VERSION_NO_ROOT
Definition: isamb.c:37
#define ISAMB_CACHE_ENTRY_SIZE
Definition: isamb.c:64
void isamb_set_cache_size(ISAMB b, int v)
Definition: isamb.c:200
int isamb_unlink(ISAMB b, ISAM_P pos)
Definition: isamb.c:1225
#define ISAMB_MAX_LEVEL
Definition: isamb.c:59
static void decode_ptr(const char **src, zint *pos)
Definition: isamb.c:172
#define encode_item_len
Definition: isamb.c:148
int insert_sub(ISAMB b, struct ISAMB_block **p, void *new_item, int *mode, ISAMC_I *stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, const void *max_item)
Definition: isamb.c:1210
int isamb_pp_read(ISAMB_PP pp, void *buf)
Definition: isamb.c:1503
static void check_block(ISAMB b, struct ISAMB_block *p)
Definition: isamb.c:626
zint ISAM_P
Definition: isamc.h:28
ISAM_P pos
Definition: isamb.c:134
zint accessed_nodes[ISAMB_MAX_LEVEL]
Definition: isamb.c:142
zint skipped_numbers
Definition: isamb.c:139
zint skipped_nodes[ISAMB_MAX_LEVEL]
Definition: isamb.c:141
int level
Definition: isamb.c:135
struct ISAMB_block ** block
Definition: isamb.c:143
zint returned_numbers
Definition: isamb.c:140
int scope
Definition: isamb.c:144
ISAMB isamb
Definition: isamb.c:133
zint total_size
Definition: isamb.c:137
int maxlevel
Definition: isamb.c:136
zint no_blocks
Definition: isamb.c:138
int deleted
Definition: isamb.c:122
char * bytes
Definition: isamb.c:125
char * cbuf
Definition: isamb.c:126
unsigned char * buf
Definition: isamb.c:127
int cat
Definition: isamb.c:118
int dirty
Definition: isamb.c:121
int log_rw
Definition: isamb.c:129
int leaf
Definition: isamb.c:120
int size
Definition: isamb.c:119
int offset
Definition: isamb.c:123
ISAM_P pos
Definition: isamb.c:117
void * decodeClientData
Definition: isamb.c:128
zint no_items
Definition: isamb.c:124
Definition: isamb.c:80
int dirty
Definition: isamb.c:83
ISAM_P pos
Definition: isamb.c:81
int hits
Definition: isamb.c:84
struct ISAMB_cache_entry * next
Definition: isamb.c:85
unsigned char * buf
Definition: isamb.c:82
BFile bf
Definition: isamb.c:89
struct ISAMB_cache_entry * cache_entries
Definition: isamb.c:92
struct ISAMB_head head
Definition: isamb.c:91
int head_dirty
Definition: isamb.c:90
zint first_block
Definition: isamb.c:41
zint last_block
Definition: isamb.c:42
int block_offset
Definition: isamb.c:47
zint no_items
Definition: isamb.c:44
int block_max
Definition: isamb.c:46
int block_size
Definition: isamb.c:45
zint free_list
Definition: isamb.c:43
Definition: isamb.c:95
int log_freelist
Definition: isamb.c:103
int cache
Definition: isamb.c:101
int no_cat
Definition: isamb.c:100
int minor_version
Definition: isamb.c:112
zint returned_numbers
Definition: isamb.c:105
zint root_ptr
Definition: isamb.c:113
zint number_of_int_splits
Definition: isamb.c:108
zint number_of_leaf_splits
Definition: isamb.c:109
zint skipped_numbers
Definition: isamb.c:104
struct ISAMB_file * file
Definition: isamb.c:99
zint accessed_nodes[ISAMB_MAX_LEVEL]
Definition: isamb.c:107
int cache_size
Definition: isamb.c:111
int enable_int_count
Definition: isamb.c:110
zint skipped_nodes[ISAMB_MAX_LEVEL]
Definition: isamb.c:106
ISAMC_M * method
Definition: isamb.c:97
int log_io
Definition: isamb.c:102
BFiles bfs
Definition: isamb.c:96
int(* read_item)(void *clientData, char **dst, int *insertMode)
Definition: isamc.h:53
void * clientData
Definition: isamc.h:54
int(* compare_item)(const void *a, const void *b)
Definition: isamc.h:43
ISAM_CODEC codec
Definition: isamc.h:46
void(* log_item)(int logmask, const void *p, const char *txt)
Definition: isamc.h:44
void(* decode)(void *p, char **dst, const char **src)
Definition: isam-codec.h:26
void(* stop)(void *p)
Definition: isam-codec.h:25
void(* encode)(void *p, char **dst, const char **src)
Definition: isam-codec.h:27
void(* reset)(void *p)
Definition: isam-codec.h:28
void *(* start)(void)
Definition: isam-codec.h:24
const char * scope
Definition: tstlockscope.c:40
long zint
Zebra integer.
Definition: util.h:66
void zebra_exit(const char *msg)
Definition: exit.c:26
#define ZINT_FORMAT
Definition: util.h:72