IDZEBRA  2.2.7
sortidx.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <assert.h>
25 #include <string.h>
26 
27 #include <yaz/log.h>
28 #include <yaz/xmalloc.h>
29 #include <yaz/snprintf.h>
30 #include <idzebra/isamb.h>
31 #include <idzebra/bfile.h>
32 #include <sortidx.h>
33 #include "recindex.h"
34 
35 #define SORT_MAX_TERM 110
36 #define SORT_MAX_MULTI 4096
37 
38 #define SORT_IDX_BLOCKSIZE 64
39 
40 struct sort_term {
45 };
46 
47 
48 static void sort_term_log_item(int level, const void *b, const char *txt)
49 {
50  struct sort_term a1;
51 
52  memcpy(&a1, b, sizeof(a1));
53 
54  yaz_log(level, "%s " ZINT_FORMAT " " ZINT_FORMAT " %.*s", txt, a1.sysno,
55  a1.section_id, (int) a1.length-1, a1.term);
56 }
57 
58 static int sort_term_compare(const void *a, const void *b)
59 {
60  struct sort_term a1, b1;
61 
62  memcpy(&a1, a, sizeof(a1));
63  memcpy(&b1, b, sizeof(b1));
64 
65  if (a1.sysno > b1.sysno)
66  return 1;
67  else if (a1.sysno < b1.sysno)
68  return -1;
69  if (a1.section_id > b1.section_id)
70  return 1;
71  else if (a1.section_id < b1.section_id)
72  return -1;
73 
74  return 0;
75 }
76 
77 static void *sort_term_code_start(void)
78 {
79  return 0;
80 }
81 
82 static void sort_term_encode1(void *p, char **dst, const char **src)
83 {
84  struct sort_term a1;
85 
86  memcpy(&a1, *src, sizeof(a1));
87  *src += sizeof(a1);
88 
89  zebra_zint_encode(dst, a1.sysno); /* encode record id */
90  strcpy(*dst, a1.term); /* then sort term, 0 terminated */
91  *dst += strlen(a1.term) + 1;
92 }
93 
94 static void sort_term_encode2(void *p, char **dst, const char **src)
95 {
96  struct sort_term a1;
97 
98  memcpy(&a1, *src, sizeof(a1));
99  *src += sizeof(a1);
100 
101  zebra_zint_encode(dst, a1.sysno);
102  zebra_zint_encode(dst, a1.section_id);
103  zebra_zint_encode(dst, a1.length); /* encode length */
104  memcpy(*dst, a1.term, a1.length);
105  *dst += a1.length;
106 }
107 
108 static void sort_term_decode1(void *p, char **dst, const char **src)
109 {
110  struct sort_term a1;
111  size_t slen;
112 
113  zebra_zint_decode(src, &a1.sysno);
114  a1.section_id = 0;
115 
116  strcpy(a1.term, *src);
117  slen = 1 + strlen(a1.term);
118  *src += slen;
119  a1.length = slen;
120 
121  memcpy(*dst, &a1, sizeof(a1));
122  *dst += sizeof(a1);
123 }
124 
125 static void sort_term_decode2(void *p, char **dst, const char **src)
126 {
127  struct sort_term a1;
128 
129  zebra_zint_decode(src, &a1.sysno);
130  zebra_zint_decode(src, &a1.section_id);
131  zebra_zint_decode(src, &a1.length);
132 
133  memcpy(a1.term, *src, a1.length);
134  *src += a1.length;
135 
136  memcpy(*dst, &a1, sizeof(a1));
137  *dst += sizeof(a1);
138 }
139 
140 static void sort_term_code_reset(void *p)
141 {
142 }
143 
144 static void sort_term_code_stop(void *p)
145 {
146 }
147 
149  int no;
151  struct sort_term st;
152 };
153 
154 static int sort_term_code_read(void *vp, char **dst, int *insertMode)
155 {
156  struct sort_term_stream *s = (struct sort_term_stream *) vp;
157 
158  if (s->no == 0)
159  return 0;
160 
161  (s->no)--;
162 
163  *insertMode = s->insert_flag;
164  memcpy(*dst, &s->st, sizeof(s->st));
165  *dst += sizeof(s->st);
166  return 1;
167 }
168 
169 struct sortFileHead {
171 };
172 
173 struct sortFile {
174  int id;
175  union {
178  } u;
181  struct sortFile *next;
182  struct sortFileHead head;
185 };
186 
191  int type;
192  char *entry_buf;
194  struct sortFile *files;
195 };
196 
197 zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
198 {
199  zebra_sort_index_t si = (zebra_sort_index_t) xmalloc(sizeof(*si));
200  si->bfs = bfs;
201  si->write_flag = write_flag;
202  si->current_file = NULL;
203  si->files = NULL;
204  si->type = type;
205  si->entry_buf = (char *) xmalloc(SORT_IDX_ENTRYSIZE);
206  return si;
207 }
208 
210 {
211  struct sortFile *sf = si->files;
212  while (sf)
213  {
214  struct sortFile *sf_next = sf->next;
215  switch(si->type)
216  {
218  bf_close(sf->u.bf);
219  break;
222  if (sf->isam_pp)
223  isamb_pp_close(sf->isam_pp);
224  isamb_set_root_ptr(sf->u.isamb, sf->isam_p);
225  isamb_close(sf->u.isamb);
226  break;
227  }
228  xfree(sf);
229  sf = sf_next;
230  }
231  xfree(si->entry_buf);
232  xfree(si);
233 }
234 
236 {
237  int isam_block_size = 4096;
238 
239  ISAMC_M method;
240  char fname[80];
241  struct sortFile *sf;
242 
244  method.log_item = sort_term_log_item;
247  method.codec.stop = sort_term_code_stop;
248 
249  if (si->current_file && si->current_file->id == id)
250  return 0;
251  for (sf = si->files; sf; sf = sf->next)
252  if (sf->id == id)
253  {
254  si->current_file = sf;
255  return 0;
256  }
257  sf = (struct sortFile *) xmalloc(sizeof(*sf));
258  sf->id = id;
259 
260  switch(si->type)
261  {
263  sf->u.bf = NULL;
264  yaz_snprintf(fname, sizeof(fname), "sort%d", id);
265  yaz_log(YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag);
266  sf->u.bf = bf_open(si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag);
267  if (!sf->u.bf)
268  {
269  xfree(sf);
270  return -1;
271  }
272  if (!bf_read(sf->u.bf, 0, 0, sizeof(sf->head), &sf->head))
273  {
274  sf->head.sysno_max = 0;
275  if (!si->write_flag)
276  {
277  bf_close(sf->u.bf);
278  xfree(sf);
279  return -1;
280  }
281  }
282  break;
284  method.codec.encode = sort_term_encode1;
285  method.codec.decode = sort_term_decode1;
286 
287  yaz_snprintf(fname, sizeof(fname), "sortb%d", id);
288  sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
289  /* cache */ 0,
290  /* no_cat */ 1, &isam_block_size,
291  /* use_root_ptr */ 1);
292  if (!sf->u.isamb)
293  {
294  xfree(sf);
295  return -1;
296  }
297  else
298  {
299  sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
300  }
301  break;
303  isam_block_size = 32768;
304  method.codec.encode = sort_term_encode2;
305  method.codec.decode = sort_term_decode2;
306 
307  yaz_snprintf(fname, sizeof(fname), "sortm%d", id);
308  sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
309  /* cache */ 0,
310  /* no_cat */ 1, &isam_block_size,
311  /* use_root_ptr */ 1);
312  if (!sf->u.isamb)
313  {
314  xfree(sf);
315  return -1;
316  }
317  else
318  {
319  sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
320  }
321  break;
322  }
323  sf->isam_pp = 0;
324  sf->no_inserted = 0;
325  sf->no_deleted = 0;
326  sf->next = si->files;
327  si->current_file = si->files = sf;
328  return 0;
329 }
330 
331 static void zebra_sortf_rewind(struct sortFile *sf)
332 {
333  if (sf->isam_pp)
334  isamb_pp_close(sf->isam_pp);
335  sf->isam_pp = 0;
336  sf->no_inserted = 0;
337  sf->no_deleted = 0;
338 }
339 
341 {
342  zint new_sysno = rec_sysno_to_int(sysno);
343  struct sortFile *sf;
344 
345  for (sf = si->files; sf; sf = sf->next)
346  {
347  if (sf->no_inserted || sf->no_deleted)
348  zebra_sortf_rewind(sf);
349  else if (sf->isam_pp && new_sysno <= si->sysno)
350  zebra_sortf_rewind(sf);
351  }
352  si->sysno = new_sysno;
353 }
354 
355 
357 {
358  struct sortFile *sf = si->current_file;
359 
360  if (!sf || !sf->u.bf)
361  return;
362  switch(si->type)
363  {
365  memset(si->entry_buf, 0, SORT_IDX_ENTRYSIZE);
366  bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
367  break;
370  assert(sf->u.isamb);
371  if (sf->no_deleted == 0)
372  {
373  struct sort_term_stream s;
374  ISAMC_I isamc_i;
375 
376  s.st.sysno = si->sysno;
377  s.st.section_id = section_id;
378  s.st.length = 0;
379  s.st.term[0] = '\0';
380 
381  s.no = 1;
382  s.insert_flag = 0;
383  isamc_i.clientData = &s;
384  isamc_i.read_item = sort_term_code_read;
385 
386  isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
387  sf->no_deleted++;
388  }
389  break;
390  }
391 }
392 
393 void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf)
394 {
395  struct sortFile *sf = si->current_file;
396  int len;
397 
398  if (!sf || !sf->u.bf)
399  return;
400  switch(si->type)
401  {
403  /* take first entry from wrbuf - itself is 0-terminated */
404  len = strlen(wrbuf_buf(wrbuf));
405  if (len > SORT_IDX_ENTRYSIZE)
406  len = SORT_IDX_ENTRYSIZE;
407 
408  memcpy(si->entry_buf, wrbuf_buf(wrbuf), len);
409  if (len < SORT_IDX_ENTRYSIZE-len)
410  memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
411  bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
412  break;
414  assert(sf->u.isamb);
415 
416  if (sf->no_inserted == 0)
417  {
418  struct sort_term_stream s;
419  ISAMC_I isamc_i;
420  /* take first entry from wrbuf - itself is 0-terminated */
421 
422  len = wrbuf_len(wrbuf);
423  if (len > SORT_MAX_TERM)
424  {
425  len = SORT_MAX_TERM;
426  wrbuf_buf(wrbuf)[len-1] = '\0';
427  }
428  memcpy(s.st.term, wrbuf_buf(wrbuf), len);
429  s.st.length = len;
430  s.st.sysno = si->sysno;
431  s.st.section_id = 0;
432  s.no = 1;
433  s.insert_flag = 1;
434  isamc_i.clientData = &s;
435  isamc_i.read_item = sort_term_code_read;
436 
437  isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
438  sf->no_inserted++;
439  }
440  break;
442  assert(sf->u.isamb);
443  if (sf->no_inserted == 0)
444  {
445  struct sort_term_stream s;
446  ISAMC_I isamc_i;
447  len = wrbuf_len(wrbuf);
448  if (len > SORT_MAX_MULTI)
449  {
450  len = SORT_MAX_MULTI;
451  wrbuf_buf(wrbuf)[len-1] = '\0';
452  }
453  memcpy(s.st.term, wrbuf_buf(wrbuf), len);
454  s.st.length = len;
455  s.st.sysno = si->sysno;
456  s.st.section_id = section_id;
457  s.no = 1;
458  s.insert_flag = 1;
459  isamc_i.clientData = &s;
460  isamc_i.read_item = sort_term_code_read;
461 
462  isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
463  sf->no_inserted++;
464  }
465  break;
466  }
467 }
468 
469 
470 int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w)
471 {
472  int r;
473  struct sortFile *sf = si->current_file;
474  char tbuf[SORT_IDX_ENTRYSIZE];
475 
476  assert(sf);
477  assert(sf->u.bf);
478 
479  switch(si->type)
480  {
482  r = bf_read(sf->u.bf, si->sysno+1, 0, 0, tbuf);
483  if (r && *tbuf)
484  {
485  wrbuf_puts(w, tbuf);
486  wrbuf_putc(w, '\0');
487  return 1;
488  }
489  break;
492  if (sf->isam_p)
493  {
494 
495  if (!sf->isam_pp)
496  sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1);
497  if (sf->isam_pp)
498  {
499  struct sort_term st, st_untilbuf;
500 
501  st_untilbuf.sysno = si->sysno;
502  st_untilbuf.section_id = 0;
503  st_untilbuf.length = 0;
504  st_untilbuf.term[0] = '\0';
505  r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf);
506  if (r && st.sysno == si->sysno)
507  {
508  wrbuf_write(w, st.term, st.length);
509  if (section_id)
510  *section_id = st.section_id;
511  return 1;
512  }
513  }
514  }
515  break;
516  }
517  return 0;
518 }
519 /*
520  * Local variables:
521  * c-basic-offset: 4
522  * c-file-style: "Stroustrup"
523  * indent-tabs-mode: nil
524  * End:
525  * vim: shiftwidth=4 tabstop=8 expandtab
526  */
527 
Zebra Block File Layer.
int bf_read(BFile bf, zint no, int offset, int nbytes, void *buf)
read from block file (may call exit)
Definition: bfile.c:205
void bf_close(BFile bf)
closes a Block file (may call exit)
Definition: bfile.c:139
BFile bf_open(BFiles bfs, const char *name, int block_size, int wflag)
opens and returns a Block file handle
Definition: bfile.c:150
int bf_write(BFile bf, zint no, int offset, int nbytes, const void *buf)
writes block of bytes to file (may call exit)
Definition: bfile.c:232
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition: isamb.c:1387
zint isamb_get_root_ptr(ISAMB b)
Definition: isamb.c:1669
void isamb_close(ISAMB isamb)
Definition: isamb.c:455
void isamb_set_root_ptr(ISAMB b, zint root_ptr)
Definition: isamb.c:1674
int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilbuf)
Definition: isamb.c:1525
ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache, int no_cat, int *sizes, int use_root_ptr)
Definition: isamb.c:205
void isamb_pp_close(ISAMB_PP pp)
Definition: isamb.c:1429
void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *data)
Definition: isamb.c:1266
zint ISAM_P
Definition: isamc.h:28
zint rec_sysno_to_int(zint sysno)
Definition: records.c:130
int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w)
reads sort entry
Definition: sortidx.c:470
void zebra_sort_close(zebra_sort_index_t si)
frees sort handle
Definition: sortidx.c:209
static void zebra_sortf_rewind(struct sortFile *sf)
Definition: sortidx.c:331
static void sort_term_code_reset(void *p)
Definition: sortidx.c:140
static void * sort_term_code_start(void)
Definition: sortidx.c:77
static int sort_term_compare(const void *a, const void *b)
Definition: sortidx.c:58
zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
creates sort handle
Definition: sortidx.c:197
int zebra_sort_type(zebra_sort_index_t si, int id)
sets type for sort usage
Definition: sortidx.c:235
#define SORT_MAX_TERM
Definition: sortidx.c:35
static void sort_term_code_stop(void *p)
Definition: sortidx.c:144
static void sort_term_encode1(void *p, char **dst, const char **src)
Definition: sortidx.c:82
#define SORT_MAX_MULTI
Definition: sortidx.c:36
static void sort_term_log_item(int level, const void *b, const char *txt)
Definition: sortidx.c:48
static int sort_term_code_read(void *vp, char **dst, int *insertMode)
Definition: sortidx.c:154
#define SORT_IDX_BLOCKSIZE
Definition: sortidx.c:38
static void sort_term_decode2(void *p, char **dst, const char **src)
Definition: sortidx.c:125
static void sort_term_decode1(void *p, char **dst, const char **src)
Definition: sortidx.c:108
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
sets sort system number for read / add / delete
Definition: sortidx.c:340
void zebra_sort_delete(zebra_sort_index_t si, zint section_id)
delete sort entry
Definition: sortidx.c:356
static void sort_term_encode2(void *p, char **dst, const char **src)
Definition: sortidx.c:94
void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf)
adds multi-map content to sort file
Definition: sortidx.c:393
#define ZEBRA_SORT_TYPE_ISAMB
Definition: sortidx.h:37
struct zebra_sort_index * zebra_sort_index_t
sort index handle
Definition: sortidx.h:34
#define ZEBRA_SORT_TYPE_FLAT
Definition: sortidx.h:36
#define ZEBRA_SORT_TYPE_MULTI
Definition: sortidx.h:38
#define SORT_IDX_ENTRYSIZE
Definition: sortidx.h:29
Definition: isamb.c:95
int(* compare_item)(const void *a, const void *b)
Definition: isamc.h:43
ISAM_CODEC codec
Definition: isamc.h:46
void(* log_item)(int logmask, const void *p, const char *txt)
Definition: isamc.h:44
void(* decode)(void *p, char **dst, const char **src)
Definition: isam-codec.h:26
void(* stop)(void *p)
Definition: isam-codec.h:25
void(* encode)(void *p, char **dst, const char **src)
Definition: isam-codec.h:27
void(* reset)(void *p)
Definition: isam-codec.h:28
void *(* start)(void)
Definition: isam-codec.h:24
zint sysno_max
Definition: sortidx.c:170
ISAM_P isam_p
Definition: sortidx.c:179
ISAMB_PP isam_pp
Definition: sortidx.c:180
BFile bf
Definition: sortidx.c:176
int no_inserted
Definition: sortidx.c:183
struct sortFileHead head
Definition: sortidx.c:182
int no_deleted
Definition: sortidx.c:184
int id
Definition: sortidx.c:174
union sortFile::@25 u
struct sortFile * next
Definition: sortidx.c:181
ISAMB isamb
Definition: sortidx.c:177
struct sort_term st
Definition: sortidx.c:151
zint section_id
Definition: sortidx.c:42
zint sysno
Definition: sortidx.c:41
char term[SORT_MAX_MULTI]
Definition: sortidx.c:44
zint length
Definition: sortidx.c:43
char * entry_buf
Definition: sortidx.c:192
struct sortFile * current_file
Definition: sortidx.c:193
struct sortFile * files
Definition: sortidx.c:194
long zint
Zebra integer.
Definition: util.h:66
void zebra_zint_decode(const char **src, zint *pos)
Definition: zint.c:39
#define ZINT_FORMAT
Definition: util.h:72
void zebra_zint_encode(char **dst, zint pos)
Definition: zint.c:26