IDZEBRA  2.1.2
insert.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 
22 #if HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <assert.h>
29 
30 #include "dict-p.h"
31 
32 #define CHECK 0
33 
34 static int dict_ins(Dict dict, const Dict_char *str,
35  Dict_ptr back_ptr, int userlen, void *userinfo);
36 static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
37  Dict_ptr subptr, char *userinfo);
38 
39 
40 static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp)
41 {
42  void *p;
43  Dict_ptr ptr = dict->head.last;
44  if (!dict->head.freelist)
45  {
46  dict_bf_newp(dict->dbf, dict->head.last, &p, dict->head.page_size);
47  (dict->head.last)++;
48  }
49  else
50  {
51  ptr = dict->head.freelist;
52  dict_bf_readp(dict->dbf, ptr, &p);
53  dict->head.freelist = DICT_backptr(p);
54  }
55  assert(p);
56  DICT_type(p) = 0;
57  DICT_backptr(p) = back_ptr;
58  DICT_nodir(p) = 0;
60  DICT_bsize(p) = dict->head.page_size;
61  if (pp)
62  *pp = p;
63  return ptr;
64 }
65 
66 static int split_page(Dict dict, Dict_ptr ptr, void *p)
67 {
68  void *subp;
69  char *info_here;
70  Dict_ptr subptr;
71  int i, j;
72  short *indxp, *best_indxp = NULL;
73  Dict_char best_char = 0;
74  Dict_char prev_char = 0;
75  int best_no = -1, no_current = 1;
76 
77  dict->no_split++;
78  /* determine splitting char... */
79  indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
80  for (i = DICT_nodir(p); --i >= 0; --indxp)
81  {
82  if (*indxp > 0) /* tail string here! */
83  {
84  Dict_char dc;
85 
86  memcpy(&dc, (char*) p + *indxp, sizeof(dc));
87  if (best_no < 0)
88  { /* first entry met */
89  best_char = prev_char = dc;
90  best_no = 1;
91  best_indxp = indxp;
92  }
93  else if (prev_char == dc)
94  { /* same char prefix. update */
95  if (++no_current > best_no)
96  { /* best entry so far */
97  best_no = no_current;
98  best_char = dc;
99  best_indxp = indxp;
100  }
101  }
102  else
103  { /* new char prefix. restore */
104  prev_char = dc;
105  no_current = 1;
106  }
107  }
108  }
109  assert(best_no >= 0); /* we didn't find any tail string entry at all! */
110 
111  j = best_indxp - (short*) p;
112  subptr = new_page(dict, ptr, &subp);
113  /* scan entries to see if there is a string with */
114  /* length 1. info_here indicates if such entry exist */
115  info_here = NULL;
116  for (i=0; i<best_no; i++, j++)
117  {
118  char *info, *info1;
119  int slen;
120  Dict_char dc;
121 
122  info = (char*) p + ((short*) p)[j];
123  /* entry start */
124  memcpy(&dc, info, sizeof(dc));
125  assert(dc == best_char);
126  slen = 1+dict_strlen((Dict_char*) info);
127 
128  assert(slen > 1);
129  if (slen == 2)
130  {
131  assert(!info_here);
132  info_here = info+slen*sizeof(Dict_char);
133  }
134  else
135  {
136  info1 = info+slen*sizeof(Dict_char); /* info start */
137  dict_ins(dict, (Dict_char*) (info+sizeof(Dict_char)),
138  subptr, *info1, info1+1);
139  dict_bf_readp(dict->dbf, ptr, &p);
140  }
141  }
142  /* now clean the page ... */
143  clean_page(dict, ptr, p, &best_char, subptr, info_here);
144  return 0;
145 }
146 
147 static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
148  Dict_ptr subptr, char *userinfo)
149 {
150  char *np = (char *) xmalloc(dict->head.page_size);
151  int i, slen, no = 0;
152  short *indxp1, *indxp2;
153  char *info1, *info2;
154 
155  DICT_bsize(np) = dict->head.page_size;
156  indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
157  indxp2 = (short*) ((char*) np+DICT_bsize(np));
158  info2 = (char*) np + DICT_infoffset;
159  for (i = DICT_nodir(p); --i >= 0; --indxp1)
160  {
161  if (*indxp1 > 0) /* tail string here! */
162  {
163  /* string (Dict_char *) DICT_EOS terminated */
164  /* unsigned char length of information */
165  /* char * information */
166 
167  info1 = (char*) p + *indxp1;
168  if (out && memcmp(out, info1, sizeof(Dict_char)) == 0)
169  {
170  if (subptr == 0)
171  continue;
172  *--indxp2 = -(info2 - np);
173  memcpy(info2, &subptr, sizeof(Dict_ptr));
174  info2 += sizeof(Dict_ptr);
175  memcpy(info2, out, sizeof(Dict_char));
176  info2 += sizeof(Dict_char);
177  if (userinfo)
178  {
179  memcpy(info2, userinfo, *userinfo+1);
180  info2 += *userinfo + 1;
181  }
182  else
183  *info2++ = 0;
184  subptr = 0;
185  ++no;
186  continue;
187  }
188  *--indxp2 = info2 - np;
189  slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
190  memcpy(info2, info1, slen);
191  info1 += slen;
192  info2 += slen;
193  }
194  else
195  {
196  /* Dict_ptr subptr */
197  /* Dict_char sub char */
198  /* unsigned char length of information */
199  /* char * information */
200 
201  assert(*indxp1 < 0);
202  *--indxp2 = -(info2 - np);
203  info1 = (char*) p - *indxp1;
204  memcpy(info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
205  info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
206  info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
207  }
208  slen = *info1+1;
209  memcpy(info2, info1, slen);
210  info2 += slen;
211  ++no;
212  }
213 #if 1
214  memcpy((char*)p+DICT_infoffset,
215  (char*)np+DICT_infoffset,
216  info2 - ((char*)np+DICT_infoffset));
217  memcpy((char*)p + ((char*)indxp2 - (char*)np),
218  indxp2,
219  ((char*) np+DICT_bsize(p)) - (char*)indxp2);
220 #else
221  memcpy((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
222  DICT_pagesize(dict)-DICT_infoffset);
223 #endif
224  DICT_size(p) = info2 - np;
225  DICT_type(p) = 0;
226  DICT_nodir(p) = no;
227  xfree(np);
228  dict_bf_touch(dict->dbf, ptr);
229 }
230 
231 
232 
233 /* return 0 if new */
234 /* return 1 if before but change of info */
235 /* return 2 if same as before */
236 
237 static int dict_ins(Dict dict, const Dict_char *str,
238  Dict_ptr ptr, int userlen, void *userinfo)
239 {
240  int hi, lo, mid, slen, cmp = 1;
241  short *indxp;
242  char *info;
243  void *p;
244 
245  dict_bf_readp(dict->dbf, ptr, &p);
246 
247  assert(p);
248  assert(ptr);
249 
250  mid = lo = 0;
251  hi = DICT_nodir(p)-1;
252  indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
253  while (lo <= hi)
254  {
255  mid = (lo+hi)/2;
256  if (indxp[-mid] > 0)
257  {
258  /* string (Dict_char *) DICT_EOS terminated */
259  /* unsigned char length of information */
260  /* char * information */
261  info = (char*)p + indxp[-mid];
262  cmp = dict_strcmp((Dict_char*) info, str);
263  if (!cmp)
264  {
265  info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
266  /* consider change of userinfo length... */
267  if (*info == userlen)
268  {
269  /* change of userinfo ? */
270  if (memcmp(info+1, userinfo, userlen))
271  {
272  dict_bf_touch(dict->dbf, ptr);
273  memcpy(info+1, userinfo, userlen);
274  return 1;
275  }
276  /* same userinfo */
277  return 2;
278  }
279  else if (*info > userlen)
280  {
281  /* room for new userinfo */
282  DICT_type(p) = 1;
283  *info = userlen;
284  dict_bf_touch(dict->dbf, ptr);
285  memcpy(info+1, userinfo, userlen);
286  return 1;
287  }
288  break;
289  }
290  }
291  else
292  {
293  Dict_char dc;
294  Dict_ptr subptr;
295 
296  /* Dict_ptr subptr */
297  /* Dict_char sub char */
298  /* unsigned char length of information */
299  /* char * information */
300  info = (char*)p - indxp[-mid];
301  memcpy(&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
302  cmp = dc- *str;
303  if (!cmp)
304  {
305  memcpy(&subptr, info, sizeof(Dict_ptr));
306  if (*++str == DICT_EOS)
307  {
308  /* finish of string. Store userinfo here... */
309 
310  int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
311  if (xlen == userlen)
312  {
313  if (memcmp(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
314  userinfo, userlen))
315  {
316  dict_bf_touch(dict->dbf, ptr);
317  memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
318  userinfo, userlen);
319  return 1;
320  }
321  return 2;
322  }
323  else if (xlen > userlen)
324  {
325  DICT_type(p) = 1;
326  info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
327  memcpy(info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
328  userinfo, userlen);
329  dict_bf_touch(dict->dbf, ptr);
330  return 1;
331  }
332  /* xlen < userlen, expanding needed ... */
333  if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
334  userlen >=
335  DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
336  {
337  /* not enough room - split needed ... */
338  if (DICT_type(p) == 1)
339  {
340  clean_page(dict, ptr, p, NULL, 0, NULL);
341  return dict_ins(dict, str-1, ptr,
342  userlen, userinfo);
343  }
344  if (split_page(dict, ptr, p))
345  {
346  yaz_log(YLOG_FATAL, "Unable to split page %d\n", ptr);
347  assert(0);
348  }
349  return dict_ins(dict, str-1, ptr, userlen, userinfo);
350  }
351  else
352  { /* enough room - no split needed ... */
353  info = (char*)p + DICT_size(p);
354  memcpy(info, &subptr, sizeof(subptr));
355  memcpy(info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
356  info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
357  memcpy(info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
358  userinfo, userlen);
359  indxp[-mid] = -DICT_size(p);
360  DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
361  +1+userlen;
362  DICT_type(p) = 1;
363  dict_bf_touch(dict->dbf, ptr);
364  }
365  if (xlen)
366  return 1;
367  return 0;
368  }
369  else
370  {
371  if (subptr == 0)
372  {
373  subptr = new_page(dict, ptr, NULL);
374  memcpy(info, &subptr, sizeof(subptr));
375  dict_bf_touch(dict->dbf, ptr);
376  }
377  return dict_ins(dict, str, subptr, userlen, userinfo);
378  }
379  }
380  }
381  if (cmp < 0)
382  lo = mid+1;
383  else
384  hi = mid-1;
385  }
386  indxp = indxp-mid;
387  if (lo>hi && cmp < 0)
388  --indxp;
389  slen = (dict_strlen(str)+1)*sizeof(Dict_char);
390  if (DICT_size(p)+slen+userlen >=
391  (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
392  {
393  if (DICT_type(p))
394  {
395  clean_page(dict, ptr, p, NULL, 0, NULL);
396  return dict_ins(dict, str, ptr, userlen, userinfo);
397  }
398  split_page(dict, ptr, p);
399  return dict_ins(dict, str, ptr, userlen, userinfo);
400  }
401  if (cmp)
402  {
403  short *indxp1;
404  (DICT_nodir(p))++;
405  indxp1 = (short*)((char*) p + DICT_bsize(p)
406  - DICT_nodir(p)*sizeof(short));
407  for (; indxp1 != indxp; indxp1++)
408  indxp1[0] = indxp1[1];
409 #if CHECK
410  indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
411  for (i = DICT_nodir (p); --i >= 0; --indxp1)
412  {
413  if (*indxp1 < 0)
414  {
415  info = (char*)p - *indxp1;
416  assert (info[sizeof(Dict_ptr)] > ' ');
417  }
418  }
419 #endif
420  }
421  else
422  DICT_type(p) = 1;
423  info = (char*)p + DICT_size(p);
424  memcpy(info, str, slen);
425  info += slen;
426  *info++ = userlen;
427  memcpy(info, userinfo, userlen);
428  info += userlen;
429 
430  *indxp = DICT_size(p);
431  DICT_size(p) = info- (char*) p;
432  dict_bf_touch(dict->dbf, ptr);
433  if (cmp)
434  return 0;
435  return 1;
436 }
437 
438 int dict_insert(Dict dict, const char *str, int userlen, void *userinfo)
439 {
440  if (!dict->rw)
441  return -1;
442  dict->no_insert++;
443  if (!dict->head.root)
444  {
445  void *p;
446  dict->head.root = new_page(dict, 0, &p);
447  if (!dict->head.root)
448  return -1;
449  }
450  return dict_ins(dict, (const Dict_char *) str, dict->head.root,
451  userlen, userinfo);
452 }
453 
454 /*
455  * Local variables:
456  * c-basic-offset: 4
457  * c-file-style: "Stroustrup"
458  * indent-tabs-mode: nil
459  * End:
460  * vim: shiftwidth=4 tabstop=8 expandtab
461  */
462 
zint no_split
Definition: dict-p.h:78
#define DICT_backptr(x)
Definition: dict-p.h:104
int dict_strcmp(const Dict_char *s1, const Dict_char *s2)
Definition: open.c:108
#define DICT_infoffset
Definition: dict-p.h:108
struct Dict_head head
Definition: dict-p.h:83
int dict_strlen(const Dict_char *s)
Definition: open.c:118
#define DICT_bsize(x)
Definition: dict-p.h:105
static void clean_page(Dict dict, Dict_ptr ptr, void *p, Dict_char *out, Dict_ptr subptr, char *userinfo)
Definition: insert.c:147
#define DICT_EOS
Definition: dict-p.h:102
zint no_insert
Definition: dict-p.h:80
int dict_bf_newp(Dict_BFile bf, int no, void **bufp, int nbytes)
Definition: drdwr.c:226
static Dict dict
Definition: dicttest.c:34
static Dict_ptr new_page(Dict dict, Dict_ptr back_ptr, void **pp)
Definition: insert.c:40
unsigned Dict_ptr
Definition: dict-p.h:34
Dict_BFile dbf
Definition: dict-p.h:74
#define DICT_type(x)
Definition: dict-p.h:103
int rw
Definition: dict-p.h:73
static int split_page(Dict dict, Dict_ptr ptr, void *p)
Definition: insert.c:66
int dict_bf_readp(Dict_BFile bf, int no, void **bufp)
Definition: drdwr.c:188
#define DICT_nodir(x)
Definition: dict-p.h:106
int dict_bf_touch(Dict_BFile bf, int no)
Definition: drdwr.c:244
Dict_ptr root
Definition: dict-p.h:40
Dict_ptr freelist
Definition: dict-p.h:40
#define DICT_size(x)
Definition: dict-p.h:107
int page_size
Definition: dict-p.h:38
Dict_ptr last
Definition: dict-p.h:40
int dict_insert(Dict dict, const char *str, int userlen, void *userinfo)
insert item into dictionary
Definition: insert.c:438
static int dict_ins(Dict dict, const Dict_char *str, Dict_ptr back_ptr, int userlen, void *userinfo)
Definition: insert.c:237
unsigned char Dict_char
Definition: dict-p.h:33