27 #include <yaz/snprintf.h>
28 #include <yaz/xmalloc.h>
36 #define ISAMB_MAJOR_VERSION 3
37 #define ISAMB_MINOR_VERSION_NO_ROOT 0
38 #define ISAMB_MINOR_VERSION_WITH_ROOT 1
54 #define DST_ITEM_MAX 5000
57 #define ISAMB_MAX_PAGE 32768
59 #define ISAMB_MAX_LEVEL 10
61 #define DST_BUF_SIZE (2*ISAMB_MAX_PAGE+DST_ITEM_MAX+100)
64 #define ISAMB_CACHE_ENTRY_SIZE ISAMB_MAX_PAGE
68 #define CAT_MASK (CAT_MAX-1)
73 #define ISAMB_MIN_SIZE 32
75 #define ISAMB_FAC_SIZE 4
78 #define ISAMB_PTR_CODEC 1
148 #define encode_item_len encode_ptr
152 unsigned char *bp = (
unsigned char*) *dst;
156 *bp++ = (
unsigned char) (128 | (
pos & 127));
159 *bp++ = (
unsigned char)
pos;
165 memcpy(*dst, &
pos,
sizeof(
pos));
166 (*dst) +=
sizeof(
pos);
170 #define decode_item_len decode_ptr
178 while (((c = *(
const unsigned char *)((*src)++)) & 128))
180 d += ((
zint) (c & 127) << r);
183 d += ((
zint) c << r);
189 memcpy(
pos, *src,
sizeof(*
pos));
190 (*src) +=
sizeof(*pos);
206 int cache,
int no_cat,
int *sizes,
int use_root_ptr)
208 ISAMB isamb = xmalloc(
sizeof(*isamb));
215 memcpy(isamb->
method, method,
sizeof(*method));
219 isamb->
cache = cache;
239 yaz_log(YLOG_WARN,
"isamb_open %s. Degraded TEST mode", name);
243 assert(cache == 0 || cache == 1);
247 for (i = 0; i < isamb->
no_cat; i++)
254 for (i = 0; i < isamb->
no_cat; i++)
259 yaz_snprintf(fname,
sizeof(fname),
"%s%c", name, i+
'A');
264 isamb->
file[i].
bf =
bf_open(bfs, fname, sizes[i], writeflag);
278 if (i == isamb->
no_cat-1 || sizes[i] > 128)
292 int major, minor, len,
pos = 0;
295 if (memcmp(hbuf,
"isamb", 5))
297 yaz_log(YLOG_WARN,
"bad isamb header for file %s", fname);
301 if (sscanf(hbuf+5,
"%d %d %d", &major, &minor, &len) != 3)
303 yaz_log(YLOG_WARN,
"bad isamb header for file %s", fname);
309 yaz_log(YLOG_WARN,
"bad major version for file %s %d, must be %d",
314 for (left = len - sizes[i]; left > 0; left = left - sizes[i])
319 yaz_log(YLOG_WARN,
"truncated isamb header for "
320 "file=%s len=%d pos=%d",
339 if (use_root_ptr && writeflag)
346 yaz_log(YLOG_WARN,
"isamb debug enabled. Things will be slower than usual");
357 for (i = 0; i<
CAT_NO; i++)
362 return isamb_open2(bfs, name, writeflag, method, cache,
375 yaz_log(b->
log_io,
"bf_write: flush_blocks");
400 if ((*ce)->pos == norm)
410 memcpy(ce_this->
buf + off, userbuf,
415 memcpy(userbuf, ce_this->
buf + off,
422 assert(ce_last && *ce_last);
427 yaz_log(b->
log_io,
"bf_write: cache_block");
433 ce_this = xmalloc(
sizeof(*ce_this));
438 yaz_log(b->
log_io,
"bf_read: cache_block");
459 yaz_log(YLOG_DEBUG,
"isamb_close level leaf-%d: "ZINT_FORMAT" read, "
462 yaz_log(YLOG_DEBUG,
"isamb_close returned "ZINT_FORMAT" values, "
466 for (i = 0; i<isamb->
no_cat; i++)
474 char *dst = hbuf + 16;
487 memset(dst,
'\0', b_size);
492 yaz_snprintf(hbuf, 16,
"isamb%02d %02d %02d\r\n", major,
497 for (left = len - b_size; left > 0; left = left - b_size)
529 p = xmalloc(
sizeof(*p));
537 yaz_log(b->
log_io,
"bf_read: open_block");
540 yaz_log(YLOG_FATAL,
"isamb: read fail for pos=%ld block=%ld",
550 yaz_log(YLOG_FATAL,
"Bad block size %d in pos=" ZINT_FORMAT "\n",
553 assert(p->
size >= 0);
554 src = (
char*) p->
buf + 3;
568 p = xmalloc(
sizeof(*p));
587 yaz_log(b->
log_io,
"bf_read: new_block");
590 yaz_log(YLOG_FATAL,
"isamb: read fail for pos=%ld block=%ld",
636 char *startp = p->
bytes;
637 const char *src = startp;
648 char *file_item = file_item_buf;
654 assert(item_len > 0 && item_len < 128);
680 yaz_log(b->
log_io,
"bf_write: close_block (deleted)");
688 char *dst = (
char*)p->
buf + 3;
689 assert(p->
size >= 0);
700 yaz_log(b->
log_io,
"bf_write: close_block");
710 void *new_item,
int *mode,
713 void *sub_item,
int *sub_size,
714 const void *max_item);
719 void *split_item,
int *split_size,
const void *last_max_item)
721 char *startp = p->
bytes;
722 const char *src = startp;
734 assert(p->
size >= 0);
739 const char *src0 = src;
742 char *file_item = file_item_buf;
751 more =
insert_sub(b, &sub_p1, lookahead_item, mode,
753 sub_item, &sub_size, file_item_buf);
767 more =
insert_sub(b, &sub_p1, lookahead_item, mode,
769 sub_item, &sub_size, src);
784 more =
insert_sub(b, &sub_p1, lookahead_item, mode, stream, &sub_p2,
785 sub_item, &sub_size, last_max_item);
789 diff_terms += sub_p2->no_items;
801 const char *sub_item_ptr = sub_item;
803 assert(sub_size < DST_ITEM_MAX && sub_size > 1);
805 memcpy(dst, startp, src - startp);
814 memcpy(dst, sub_item, sub_size);
822 memcpy(dst, src, endp - src);
825 p->
size = dst - dst_buf;
826 assert(p->
size >= 0);
831 memcpy(startp, dst_buf, dst - dst_buf);
841 char *file_item = file_item_buf;
845 zint no_items_first_half = 0;
863 no_items_first_half += sub_p3->
no_items;
870 file_item = file_item_buf;
875 *split_size = (int) split_size_tmp;
884 no_items_first_half += sub_p3->
no_items;
889 p_new_size = src - dst_buf;
890 memcpy(p->
bytes, dst_buf, p_new_size);
893 file_item = file_item_buf;
896 *split_size = file_item - file_item_buf;
897 memcpy(split_item, file_item_buf, *split_size);
900 *split_size = (int) split_size_tmp;
901 memcpy(split_item, src, *split_size);
906 (*sp)->size = endp - src;
907 memcpy((*sp)->bytes, src, (*sp)->size);
909 p->
size = p_new_size;
912 (*sp)->no_items = p->
no_items - no_items_first_half;
923 int *lookahead_mode,
ISAMC_I *stream,
925 void *sub_item,
int *sub_size,
926 const void *max_item)
937 char *mid_cut = dst_buf + quater * 2;
938 char *tail_cut = dst_buf + quater * 3;
943 int cut_item_size = 0;
946 int inserted_dst_bytes = 0;
951 char *file_item = file_item_buf;
958 const char *dst_item = 0;
959 char *lookahead_next;
975 dst_item = lookahead_item;
977 if (!*lookahead_mode)
979 yaz_log(YLOG_WARN,
"isamb: Inconsistent register (1)");
980 assert(*lookahead_mode);
983 else if (d == 0 && *lookahead_mode == 2)
987 dst_item = lookahead_item;
991 dst_item = file_item_buf;
993 if (d == 0 && !*lookahead_mode)
1001 else if (!half1 && dst > mid_cut)
1004 const char *dst_item_0 = dst_item;
1010 cut_item_size = dst_item - dst_item_0;
1011 assert(cut_item_size > 0);
1012 memcpy(cut_item_buf, dst_item_0, cut_item_size);
1030 inserted_dst_bytes += (dst - dst_0);
1031 if (inserted_dst_bytes >= quater)
1037 lookahead_next = lookahead_item;
1046 if (lookahead_item && max_item &&
1061 lookahead_next = lookahead_item;
1063 &lookahead_next, lookahead_mode))
1070 file_item = file_item_buf;
1078 file_item = file_item_buf;
1089 while (lookahead_item)
1092 const char *src = lookahead_item;
1102 if (!*lookahead_mode)
1105 yaz_log(YLOG_WARN,
"isamb: Inconsistent register (2)");
1106 assert(*lookahead_mode);
1108 else if (!half1 && dst > tail_cut)
1110 const char *src_0 = src;
1115 cut_item_size = src - src_0;
1116 assert(cut_item_size > 0);
1117 memcpy(cut_item_buf, src_0, cut_item_size);
1133 dst_item = lookahead_item;
1141 new_size = dst - dst_buf;
1154 for (i = 0; i < b->
no_cat; i++)
1155 if (new_size <= b->file[i].head.block_max)
1164 const char *cut_item = cut_item_buf;
1169 assert(cut_item_size > 0);
1172 p->
size = half1 - dst_buf;
1174 memcpy(p->
bytes, dst_buf, half1 - dst_buf);
1184 first_dst = (*sp2)->bytes;
1188 memcpy(first_dst, half2, dst - half2);
1190 (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2);
1192 (*sp2)->no_items =
no_items - no_items_1;
1195 memcpy(sub_item, cut_item_buf, cut_item_size);
1196 *sub_size = cut_item_size;
1200 memcpy(p->
bytes, dst_buf, dst - dst_buf);
1214 void *sub_item,
int *sub_size,
1215 const void *max_item)
1217 if (!*p || (*p)->
leaf)
1218 return insert_leaf(b, p, new_item, mode, stream, sp, sub_item,
1219 sub_size, max_item);
1221 return insert_int(b, *p, new_item, mode, stream, sp, sub_item,
1222 sub_size, max_item);
1247 char *file_item = file_item_buf;
1272 int must_delete = 0;
1279 item_ptr = item_buf;
1286 item_ptr = item_buf;
1296 more =
insert_sub(b, &p, item_buf, &i_mode, stream, &sp,
1297 sub_item, &sub_size, 0);
1304 const char *sub_item_ptr = sub_item;
1308 assert(sub_size < DST_ITEM_MAX && sub_size > 1);
1314 memcpy(dst, sub_item, sub_size);
1347 ISAMB_PP pp = xmalloc(
sizeof(*pp));
1397 yaz_log(YLOG_DEBUG,
"isamb_pp_close lev=%d returned "ZINT_FORMAT" values, "
1402 yaz_log(YLOG_DEBUG,
"isamb_pp_close level leaf-%d: "
1416 for (i = 0; i <= pp->
level; i++)
1424 if (
cat >= 0 && cat < isamb->no_cat)
1439 char prefix_str[1024];
1443 yaz_snprintf(prefix_str,
sizeof(prefix_str),
1444 "%*s " ZINT_FORMAT " cat=%d size=%d max=%d items="
1448 yaz_snprintf(prefix_str,
sizeof(prefix_str),
1476 char *file_item = file_item_buf;
1520 yaz_log(YLOG_LOG,
"isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="
1541 char *file_item = file_item_buf;
1578 file_item = file_item_buf;
1621 file_item = file_item_buf;
int bf_read(BFile bf, zint no, int offset, int nbytes, void *buf)
read from block file (may call exit)
void bf_close(BFile bf)
closes a Block file (may call exit)
BFile bf_open(BFiles bfs, const char *name, int block_size, int wflag)
opens and returns a Block file handle
int bf_write(BFile bf, zint no, int offset, int nbytes, const void *buf)
writes block of bytes to file (may call exit)
void isamb_pp_pos(ISAMB_PP pp, double *current, double *total)
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
struct ISAMB_block * new_leaf(ISAMB b, int cat)
void isamb_pp_close_x(ISAMB_PP pp, zint *size, zint *blocks)
static struct ISAMB_block * open_block(ISAMB b, ISAM_P pos)
static int cache_block(ISAMB b, ISAM_P pos, unsigned char *userbuf, int wr)
void isamb_set_int_count(ISAMB b, int v)
#define ISAMB_MAJOR_VERSION
zint isamb_get_leaf_splits(ISAMB b)
ISAMB isamb_open(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache)
int isamb_block_info(ISAMB isamb, int cat)
zint isamb_get_int_splits(ISAMB b)
ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope)
static void isamb_dump_r(ISAMB b, ISAM_P pos, void(*pr)(const char *str), int level)
int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilb)
int insert_leaf(ISAMB b, struct ISAMB_block **sp1, void *lookahead_item, int *lookahead_mode, ISAMC_I *stream, struct ISAMB_block **sp2, void *sub_item, int *sub_size, const void *max_item)
struct ISAMB_block * new_block(ISAMB b, int leaf, int cat)
int insert_int(ISAMB b, struct ISAMB_block *p, void *lookahead_item, int *mode, ISAMC_I *stream, struct ISAMB_block **sp, void *split_item, int *split_size, const void *last_max_item)
struct ISAMB_block * new_int(ISAMB b, int cat)
zint isamb_get_root_ptr(ISAMB b)
static void flush_blocks(ISAMB b, int cat)
void isamb_dump(ISAMB b, ISAM_P pos, void(*pr)(const char *str))
void isamb_close(ISAMB isamb)
static void encode_ptr(char **dst, zint pos)
void isamb_set_root_ptr(ISAMB b, zint root_ptr)
void close_block(ISAMB b, struct ISAMB_block *p)
void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *stream)
#define ISAMB_MINOR_VERSION_WITH_ROOT
ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache, int no_cat, int *sizes, int use_root_ptr)
void isamb_pp_close(ISAMB_PP pp)
#define ISAMB_MINOR_VERSION_NO_ROOT
#define ISAMB_CACHE_ENTRY_SIZE
void isamb_set_cache_size(ISAMB b, int v)
int isamb_unlink(ISAMB b, ISAM_P pos)
static void decode_ptr(const char **src, zint *pos)
int insert_sub(ISAMB b, struct ISAMB_block **p, void *new_item, int *mode, ISAMC_I *stream, struct ISAMB_block **sp, void *sub_item, int *sub_size, const void *max_item)
int isamb_pp_read(ISAMB_PP pp, void *buf)
static void check_block(ISAMB b, struct ISAMB_block *p)
zint accessed_nodes[ISAMB_MAX_LEVEL]
zint skipped_nodes[ISAMB_MAX_LEVEL]
struct ISAMB_block ** block
struct ISAMB_cache_entry * next
struct ISAMB_cache_entry * cache_entries
zint number_of_int_splits
zint number_of_leaf_splits
zint accessed_nodes[ISAMB_MAX_LEVEL]
zint skipped_nodes[ISAMB_MAX_LEVEL]
int(* read_item)(void *clientData, char **dst, int *insertMode)
int(* compare_item)(const void *a, const void *b)
void(* log_item)(int logmask, const void *p, const char *txt)
void(* decode)(void *p, char **dst, const char **src)
void(* encode)(void *p, char **dst, const char **src)
void zebra_exit(const char *msg)