IDZEBRA  2.1.2
sortidx.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <assert.h>
25 #include <string.h>
26 
27 #include <yaz/log.h>
28 #include <yaz/xmalloc.h>
29 #include <idzebra/isamb.h>
30 #include <idzebra/bfile.h>
31 #include <sortidx.h>
32 #include "recindex.h"
33 
34 #define SORT_MAX_TERM 110
35 #define SORT_MAX_MULTI 4096
36 
37 #define SORT_IDX_BLOCKSIZE 64
38 
39 struct sort_term {
44 };
45 
46 
47 static void sort_term_log_item(int level, const void *b, const char *txt)
48 {
49  struct sort_term a1;
50 
51  memcpy(&a1, b, sizeof(a1));
52 
53  yaz_log(level, "%s " ZINT_FORMAT " " ZINT_FORMAT " %.*s", txt, a1.sysno,
54  a1.section_id, (int) a1.length-1, a1.term);
55 }
56 
57 static int sort_term_compare(const void *a, const void *b)
58 {
59  struct sort_term a1, b1;
60 
61  memcpy(&a1, a, sizeof(a1));
62  memcpy(&b1, b, sizeof(b1));
63 
64  if (a1.sysno > b1.sysno)
65  return 1;
66  else if (a1.sysno < b1.sysno)
67  return -1;
68  if (a1.section_id > b1.section_id)
69  return 1;
70  else if (a1.section_id < b1.section_id)
71  return -1;
72 
73  return 0;
74 }
75 
76 static void *sort_term_code_start(void)
77 {
78  return 0;
79 }
80 
81 static void sort_term_encode1(void *p, char **dst, const char **src)
82 {
83  struct sort_term a1;
84 
85  memcpy(&a1, *src, sizeof(a1));
86  *src += sizeof(a1);
87 
88  zebra_zint_encode(dst, a1.sysno); /* encode record id */
89  strcpy(*dst, a1.term); /* then sort term, 0 terminated */
90  *dst += strlen(a1.term) + 1;
91 }
92 
93 static void sort_term_encode2(void *p, char **dst, const char **src)
94 {
95  struct sort_term a1;
96 
97  memcpy(&a1, *src, sizeof(a1));
98  *src += sizeof(a1);
99 
100  zebra_zint_encode(dst, a1.sysno);
101  zebra_zint_encode(dst, a1.section_id);
102  zebra_zint_encode(dst, a1.length); /* encode length */
103  memcpy(*dst, a1.term, a1.length);
104  *dst += a1.length;
105 }
106 
107 static void sort_term_decode1(void *p, char **dst, const char **src)
108 {
109  struct sort_term a1;
110  size_t slen;
111 
112  zebra_zint_decode(src, &a1.sysno);
113  a1.section_id = 0;
114 
115  strcpy(a1.term, *src);
116  slen = 1 + strlen(a1.term);
117  *src += slen;
118  a1.length = slen;
119 
120  memcpy(*dst, &a1, sizeof(a1));
121  *dst += sizeof(a1);
122 }
123 
124 static void sort_term_decode2(void *p, char **dst, const char **src)
125 {
126  struct sort_term a1;
127 
128  zebra_zint_decode(src, &a1.sysno);
129  zebra_zint_decode(src, &a1.section_id);
130  zebra_zint_decode(src, &a1.length);
131 
132  memcpy(a1.term, *src, a1.length);
133  *src += a1.length;
134 
135  memcpy(*dst, &a1, sizeof(a1));
136  *dst += sizeof(a1);
137 }
138 
139 static void sort_term_code_reset(void *p)
140 {
141 }
142 
143 static void sort_term_code_stop(void *p)
144 {
145 }
146 
148  int no;
150  struct sort_term st;
151 };
152 
153 static int sort_term_code_read(void *vp, char **dst, int *insertMode)
154 {
155  struct sort_term_stream *s = (struct sort_term_stream *) vp;
156 
157  if (s->no == 0)
158  return 0;
159 
160  (s->no)--;
161 
162  *insertMode = s->insert_flag;
163  memcpy(*dst, &s->st, sizeof(s->st));
164  *dst += sizeof(s->st);
165  return 1;
166 }
167 
168 struct sortFileHead {
170 };
171 
172 struct sortFile {
173  int id;
174  union {
177  } u;
180  struct sortFile *next;
181  struct sortFileHead head;
184 };
185 
190  int type;
191  char *entry_buf;
193  struct sortFile *files;
194 };
195 
196 zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
197 {
198  zebra_sort_index_t si = (zebra_sort_index_t) xmalloc(sizeof(*si));
199  si->bfs = bfs;
200  si->write_flag = write_flag;
201  si->current_file = NULL;
202  si->files = NULL;
203  si->type = type;
204  si->entry_buf = (char *) xmalloc(SORT_IDX_ENTRYSIZE);
205  return si;
206 }
207 
209 {
210  struct sortFile *sf = si->files;
211  while (sf)
212  {
213  struct sortFile *sf_next = sf->next;
214  switch(si->type)
215  {
217  bf_close(sf->u.bf);
218  break;
221  if (sf->isam_pp)
222  isamb_pp_close(sf->isam_pp);
223  isamb_set_root_ptr(sf->u.isamb, sf->isam_p);
224  isamb_close(sf->u.isamb);
225  break;
226  }
227  xfree(sf);
228  sf = sf_next;
229  }
230  xfree(si->entry_buf);
231  xfree(si);
232 }
233 
235 {
236  int isam_block_size = 4096;
237 
238  ISAMC_M method;
239  char fname[80];
240  struct sortFile *sf;
241 
243  method.log_item = sort_term_log_item;
246  method.codec.stop = sort_term_code_stop;
247 
248  if (si->current_file && si->current_file->id == id)
249  return 0;
250  for (sf = si->files; sf; sf = sf->next)
251  if (sf->id == id)
252  {
253  si->current_file = sf;
254  return 0;
255  }
256  sf = (struct sortFile *) xmalloc(sizeof(*sf));
257  sf->id = id;
258 
259  switch(si->type)
260  {
262  sf->u.bf = NULL;
263  sprintf(fname, "sort%d", id);
264  yaz_log(YLOG_DEBUG, "sort idx %s wr=%d", fname, si->write_flag);
265  sf->u.bf = bf_open(si->bfs, fname, SORT_IDX_BLOCKSIZE, si->write_flag);
266  if (!sf->u.bf)
267  {
268  xfree(sf);
269  return -1;
270  }
271  if (!bf_read(sf->u.bf, 0, 0, sizeof(sf->head), &sf->head))
272  {
273  sf->head.sysno_max = 0;
274  if (!si->write_flag)
275  {
276  bf_close(sf->u.bf);
277  xfree(sf);
278  return -1;
279  }
280  }
281  break;
283  method.codec.encode = sort_term_encode1;
284  method.codec.decode = sort_term_decode1;
285 
286  sprintf(fname, "sortb%d", id);
287  sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
288  /* cache */ 0,
289  /* no_cat */ 1, &isam_block_size,
290  /* use_root_ptr */ 1);
291  if (!sf->u.isamb)
292  {
293  xfree(sf);
294  return -1;
295  }
296  else
297  {
298  sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
299  }
300  break;
302  isam_block_size = 32768;
303  method.codec.encode = sort_term_encode2;
304  method.codec.decode = sort_term_decode2;
305 
306  sprintf(fname, "sortm%d", id);
307  sf->u.isamb = isamb_open2(si->bfs, fname, si->write_flag, &method,
308  /* cache */ 0,
309  /* no_cat */ 1, &isam_block_size,
310  /* use_root_ptr */ 1);
311  if (!sf->u.isamb)
312  {
313  xfree(sf);
314  return -1;
315  }
316  else
317  {
318  sf->isam_p = isamb_get_root_ptr(sf->u.isamb);
319  }
320  break;
321  }
322  sf->isam_pp = 0;
323  sf->no_inserted = 0;
324  sf->no_deleted = 0;
325  sf->next = si->files;
326  si->current_file = si->files = sf;
327  return 0;
328 }
329 
330 static void zebra_sortf_rewind(struct sortFile *sf)
331 {
332  if (sf->isam_pp)
333  isamb_pp_close(sf->isam_pp);
334  sf->isam_pp = 0;
335  sf->no_inserted = 0;
336  sf->no_deleted = 0;
337 }
338 
340 {
341  zint new_sysno = rec_sysno_to_int(sysno);
342  struct sortFile *sf;
343 
344  for (sf = si->files; sf; sf = sf->next)
345  {
346  if (sf->no_inserted || sf->no_deleted)
347  zebra_sortf_rewind(sf);
348  else if (sf->isam_pp && new_sysno <= si->sysno)
349  zebra_sortf_rewind(sf);
350  }
351  si->sysno = new_sysno;
352 }
353 
354 
356 {
357  struct sortFile *sf = si->current_file;
358 
359  if (!sf || !sf->u.bf)
360  return;
361  switch(si->type)
362  {
364  memset(si->entry_buf, 0, SORT_IDX_ENTRYSIZE);
365  bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
366  break;
369  assert(sf->u.isamb);
370  if (sf->no_deleted == 0)
371  {
372  struct sort_term_stream s;
373  ISAMC_I isamc_i;
374 
375  s.st.sysno = si->sysno;
377  s.st.length = 0;
378  s.st.term[0] = '\0';
379 
380  s.no = 1;
381  s.insert_flag = 0;
382  isamc_i.clientData = &s;
383  isamc_i.read_item = sort_term_code_read;
384 
385  isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
386  sf->no_deleted++;
387  }
388  break;
389  }
390 }
391 
393 {
394  struct sortFile *sf = si->current_file;
395  int len;
396 
397  if (!sf || !sf->u.bf)
398  return;
399  switch(si->type)
400  {
402  /* take first entry from wrbuf - itself is 0-terminated */
403  len = strlen(wrbuf_buf(wrbuf));
404  if (len > SORT_IDX_ENTRYSIZE)
405  len = SORT_IDX_ENTRYSIZE;
406 
407  memcpy(si->entry_buf, wrbuf_buf(wrbuf), len);
408  if (len < SORT_IDX_ENTRYSIZE-len)
409  memset(si->entry_buf+len, 0, SORT_IDX_ENTRYSIZE-len);
410  bf_write(sf->u.bf, si->sysno+1, 0, 0, si->entry_buf);
411  break;
413  assert(sf->u.isamb);
414 
415  if (sf->no_inserted == 0)
416  {
417  struct sort_term_stream s;
418  ISAMC_I isamc_i;
419  /* take first entry from wrbuf - itself is 0-terminated */
420 
421  len = wrbuf_len(wrbuf);
422  if (len > SORT_MAX_TERM)
423  {
424  len = SORT_MAX_TERM;
425  wrbuf_buf(wrbuf)[len-1] = '\0';
426  }
427  memcpy(s.st.term, wrbuf_buf(wrbuf), len);
428  s.st.length = len;
429  s.st.sysno = si->sysno;
430  s.st.section_id = 0;
431  s.no = 1;
432  s.insert_flag = 1;
433  isamc_i.clientData = &s;
434  isamc_i.read_item = sort_term_code_read;
435 
436  isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
437  sf->no_inserted++;
438  }
439  break;
441  assert(sf->u.isamb);
442  if (sf->no_inserted == 0)
443  {
444  struct sort_term_stream s;
445  ISAMC_I isamc_i;
446  len = wrbuf_len(wrbuf);
447  if (len > SORT_MAX_MULTI)
448  {
449  len = SORT_MAX_MULTI;
450  wrbuf_buf(wrbuf)[len-1] = '\0';
451  }
452  memcpy(s.st.term, wrbuf_buf(wrbuf), len);
453  s.st.length = len;
454  s.st.sysno = si->sysno;
456  s.no = 1;
457  s.insert_flag = 1;
458  isamc_i.clientData = &s;
459  isamc_i.read_item = sort_term_code_read;
460 
461  isamb_merge(sf->u.isamb, &sf->isam_p, &isamc_i);
462  sf->no_inserted++;
463  }
464  break;
465  }
466 }
467 
468 
470 {
471  int r;
472  struct sortFile *sf = si->current_file;
473  char tbuf[SORT_IDX_ENTRYSIZE];
474 
475  assert(sf);
476  assert(sf->u.bf);
477 
478  switch(si->type)
479  {
481  r = bf_read(sf->u.bf, si->sysno+1, 0, 0, tbuf);
482  if (r && *tbuf)
483  {
484  wrbuf_puts(w, tbuf);
485  wrbuf_putc(w, '\0');
486  return 1;
487  }
488  break;
491  if (sf->isam_p)
492  {
493 
494  if (!sf->isam_pp)
495  sf->isam_pp = isamb_pp_open(sf->u.isamb, sf->isam_p, 1);
496  if (sf->isam_pp)
497  {
498  struct sort_term st, st_untilbuf;
499 
500  st_untilbuf.sysno = si->sysno;
501  st_untilbuf.section_id = 0;
502  st_untilbuf.length = 0;
503  st_untilbuf.term[0] = '\0';
504  r = isamb_pp_forward(sf->isam_pp, &st, &st_untilbuf);
505  if (r && st.sysno == si->sysno)
506  {
507  wrbuf_write(w, st.term, st.length);
508  if (section_id)
509  *section_id = st.section_id;
510  return 1;
511  }
512  }
513  }
514  break;
515  }
516  return 0;
517 }
518 /*
519  * Local variables:
520  * c-basic-offset: 4
521  * c-file-style: "Stroustrup"
522  * indent-tabs-mode: nil
523  * End:
524  * vim: shiftwidth=4 tabstop=8 expandtab
525  */
526 
static int sort_term_compare(const void *a, const void *b)
Definition: sortidx.c:57
int zebra_sort_type(zebra_sort_index_t si, int id)
sets type for sort usage
Definition: sortidx.c:234
#define ZEBRA_SORT_TYPE_MULTI
Definition: sortidx.h:38
zint section_id
Definition: sortidx.c:41
BFile bf
Definition: sortidx.c:175
static void sort_term_log_item(int level, const void *b, const char *txt)
Definition: sortidx.c:47
static void sort_term_decode2(void *p, char **dst, const char **src)
Definition: sortidx.c:124
zebra_sort_index_t zebra_sort_open(BFiles bfs, int write_flag, int type)
creates sort handle
Definition: sortidx.c:196
void zebra_sort_add(zebra_sort_index_t si, zint section_id, WRBUF wrbuf)
adds multi-map content to sort file
Definition: sortidx.c:392
zint ISAM_P
Definition: isamc.h:28
static void * sort_term_code_start(void)
Definition: sortidx.c:76
struct zebra_sort_index * zebra_sort_index_t
sort index handle
Definition: sortidx.h:34
zint length
Definition: sortidx.c:42
Zebra Block File Layer.
char term[SORT_MAX_MULTI]
Definition: sortidx.c:43
#define SORT_IDX_BLOCKSIZE
Definition: sortidx.c:37
int isamb_pp_forward(ISAMB_PP pp, void *buf, const void *untilbuf)
Definition: isamb.c:1523
void isamb_pp_close(ISAMB_PP pp)
Definition: isamb.c:1428
zint rec_sysno_to_int(zint sysno)
Definition: records.c:129
void(* reset)(void *p)
Definition: isam-codec.h:28
struct sortFile * files
Definition: sortidx.c:193
void zebra_sort_close(zebra_sort_index_t si)
frees sort handle
Definition: sortidx.c:208
int zebra_sort_read(zebra_sort_index_t si, zint *section_id, WRBUF w)
reads sort entry
Definition: sortidx.c:469
zint sysno
Definition: sortidx.c:40
void(* decode)(void *p, char **dst, const char **src)
Definition: isam-codec.h:26
static void sort_term_code_reset(void *p)
Definition: sortidx.c:139
static void sort_term_decode1(void *p, char **dst, const char **src)
Definition: sortidx.c:107
int no_inserted
Definition: sortidx.c:182
static void sort_term_encode2(void *p, char **dst, const char **src)
Definition: sortidx.c:93
ISAMB isamb_open2(BFiles bfs, const char *name, int writeflag, ISAMC_M *method, int cache, int no_cat, int *sizes, int use_root_ptr)
Definition: isamb.c:204
union sortFile::@25 u
int no_deleted
Definition: sortidx.c:183
void(* log_item)(int logmask, const void *p, const char *txt)
Definition: isamc.h:44
void zebra_zint_decode(const char **src, zint *pos)
Definition: zint.c:39
void zebra_sort_sysno(zebra_sort_index_t si, zint sysno)
sets sort system number for read / add / delete
Definition: sortidx.c:339
#define ZEBRA_SORT_TYPE_FLAT
Definition: sortidx.h:36
ISAMB_PP isamb_pp_open(ISAMB isamb, ISAM_P pos, int scope)
Definition: isamb.c:1386
#define ZEBRA_SORT_TYPE_ISAMB
Definition: sortidx.h:37
static void sort_term_code_stop(void *p)
Definition: sortidx.c:143
void bf_close(BFile bf)
closes a Block file (may call exit)
Definition: bfile.c:151
struct sortFileHead head
Definition: sortidx.c:181
Definition: isamb.c:94
ISAM_CODEC codec
Definition: isamc.h:46
#define SORT_MAX_TERM
Definition: sortidx.c:34
#define SORT_MAX_MULTI
Definition: sortidx.c:35
void isamb_merge(ISAMB b, ISAM_P *pos, ISAMC_I *data)
Definition: isamb.c:1265
void zebra_zint_encode(char **dst, zint pos)
Definition: zint.c:26
void *(* start)(void)
Definition: isam-codec.h:24
static int sort_term_code_read(void *vp, char **dst, int *insertMode)
Definition: sortidx.c:153
int bf_read(BFile bf, zint no, int offset, int nbytes, void *buf)
read from block file (may call exit)
Definition: bfile.c:319
ISAM_P isam_p
Definition: sortidx.c:178
void zebra_sort_delete(zebra_sort_index_t si, zint section_id)
delete sort entry
Definition: sortidx.c:355
struct sortFile * current_file
Definition: sortidx.c:192
void(* stop)(void *p)
Definition: isam-codec.h:25
zint sysno_max
Definition: sortidx.c:169
ISAMB_PP isam_pp
Definition: sortidx.c:179
long zint
Zebra integer.
Definition: util.h:66
int(* compare_item)(const void *a, const void *b)
Definition: isamc.h:43
struct sort_term st
Definition: sortidx.c:150
ISAMB isamb
Definition: sortidx.c:176
int bf_write(BFile bf, zint no, int offset, int nbytes, const void *buf)
writes block of bytes to file (may call exit)
Definition: bfile.c:346
struct sortFile * next
Definition: sortidx.c:180
zint isamb_get_root_ptr(ISAMB b)
Definition: isamb.c:1667
static void zebra_sortf_rewind(struct sortFile *sf)
Definition: sortidx.c:330
void isamb_close(ISAMB isamb)
Definition: isamb.c:454
void isamb_set_root_ptr(ISAMB b, zint root_ptr)
Definition: isamb.c:1672
#define SORT_IDX_ENTRYSIZE
Definition: sortidx.h:29
char * entry_buf
Definition: sortidx.c:191
int id
Definition: sortidx.c:173
BFile bf_open(BFiles bfs, const char *name, int block_size, int wflag)
opens and returns a Block file handle
Definition: bfile.c:261
static void sort_term_encode1(void *p, char **dst, const char **src)
Definition: sortidx.c:81
void(* encode)(void *p, char **dst, const char **src)
Definition: isam-codec.h:27
#define ZINT_FORMAT
Definition: util.h:72