IDZEBRA  2.2.7
dicttest.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 #include <ctype.h>
27 
28 #include <idzebra/dict.h>
29 #include <idzebra/util.h>
30 #include <idzebra/res.h>
31 #include <yaz/yaz-util.h>
32 #include <yaz/snprintf.h>
33 
34 char *prog;
35 static Dict dict;
36 
37 static int look_hits;
38 
39 static int grep_handler (char *name, const char *info, void *client)
40 {
41  look_hits++;
42  printf("%s\n", name);
43  return 0;
44 }
45 
46 static int scan_handler (char *name, const char *info, int pos, void *client)
47 {
48  printf("%s\n", name);
49  return 0;
50 }
51 
52 int main (int argc, char **argv)
53 {
54  Res my_resource = 0;
55  BFiles bfs;
56  const char *name = NULL;
57  const char *inputfile = NULL;
58  const char *config = NULL;
59  const char *delete_term = NULL;
60  int scan_the_thing = 0;
61  int do_delete = 0;
62  int range = -1;
63  int srange = 0;
64  int rw = 0;
65  int infosize = 4;
66  int cache = 10;
67  int ret;
68  int unique = 0;
69  char *grep_pattern = NULL;
70  char *arg;
71  int no_of_iterations = 0;
72  int no_of_new = 0, no_of_same = 0, no_of_change = 0;
73  int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0;
74  int max_pos;
75 
76  prog = argv[0];
77  if (argc < 2)
78  {
79  fprintf(stderr, "usage:\n "
80  " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
81  "[-v n] [-i f] [-w] [-c n] config file\n\n",
82  prog);
83  fprintf(stderr, " -d delete instead of insert\n");
84  fprintf(stderr, " -D t delete subtree instead of insert\n");
85  fprintf(stderr, " -r n set regular match range\n");
86  fprintf(stderr, " -p n set regular match start range\n");
87  fprintf(stderr, " -u report if keys change during insert\n");
88  fprintf(stderr, " -g p try pattern n (see -r)\n");
89  fprintf(stderr, " -s n set info size to n (instead of 4)\n");
90  fprintf(stderr, " -v n set logging level\n");
91  fprintf(stderr, " -i f read file with words\n");
92  fprintf(stderr, " -w insert/delete instead of lookup\n");
93  fprintf(stderr, " -c n cache size (number of pages)\n");
94  fprintf(stderr, " -S scan the dictionary\n");
95  exit(1);
96  }
97  while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
98  {
99  if (ret == 0)
100  {
101  if (!config)
102  config = arg;
103  else if (!name)
104  name = arg;
105  else
106  {
107  yaz_log (YLOG_FATAL, "too many files specified\n");
108  exit (1);
109  }
110  }
111  else if (ret == 'D')
112  {
113  delete_term = arg;
114  }
115  else if (ret == 'd')
116  do_delete = 1;
117  else if (ret == 'g')
118  {
119  grep_pattern = arg;
120  }
121  else if (ret == 'r')
122  {
123  range = atoi (arg);
124  }
125  else if (ret == 'p')
126  {
127  srange = atoi (arg);
128  }
129  else if (ret == 'u')
130  {
131  unique = 1;
132  }
133  else if (ret == 'c')
134  {
135  cache = atoi(arg);
136  if (cache<2)
137  cache = 2;
138  }
139  else if (ret == 'w')
140  rw = 1;
141  else if (ret == 'i')
142  inputfile = arg;
143  else if (ret == 'S')
144  scan_the_thing = 1;
145  else if (ret == 's')
146  {
147  infosize = atoi(arg);
148  }
149  else if (ret == 'v')
150  {
151  yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
152  }
153  else
154  {
155  yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
156  exit (1);
157  }
158  }
159  if (!config || !name)
160  {
161  yaz_log (YLOG_FATAL, "no config and/or dictionary specified");
162  exit (1);
163  }
164  my_resource = res_open(0, 0);
165  if (!my_resource)
166  {
167  yaz_log (YLOG_FATAL, "cannot open resource `%s'", config);
168  exit (1);
169  }
170  res_read_file(my_resource, config);
171 
172  bfs = bfs_create (res_get(my_resource, "register"), 0);
173  if (!bfs)
174  {
175  yaz_log (YLOG_FATAL, "bfs_create fail");
176  exit (1);
177  }
178  dict = dict_open (bfs, name, cache, rw, 0, 4096);
179  if (!dict)
180  {
181  yaz_log (YLOG_FATAL, "dict_open fail of `%s'", name);
182  exit (1);
183  }
184  if (inputfile)
185  {
186  FILE *ipf;
187  char ipf_buf[1024];
188  int line = 1;
189  char infobytes[120];
190  memset(infobytes, 0, sizeof(infobytes));
191 
192  if (!(ipf = fopen(inputfile, "r")))
193  {
194  yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open %s", inputfile);
195  exit (1);
196  }
197 
198  while (fgets (ipf_buf, 1023, ipf))
199  {
200  char *ipf_ptr = ipf_buf;
201  yaz_snprintf(infobytes, sizeof(infobytes), "%d", line);
202  for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++)
203  {
204  if (isalpha(*ipf_ptr) || *ipf_ptr == '_')
205  {
206  int i = 1;
207  while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) ||
208  ipf_ptr[i] == '_'))
209  i++;
210  if (ipf_ptr[i])
211  ipf_ptr[i++] = '\0';
212  if (rw)
213  {
214  if (do_delete)
215  switch (dict_delete (dict, ipf_ptr))
216  {
217  case 0:
218  no_not_found++;
219  break;
220  case 1:
221  no_of_deleted++;
222  }
223  else
224  switch(dict_insert (dict, ipf_ptr,
225  infosize, infobytes))
226  {
227  case 0:
228  no_of_new++;
229  break;
230  case 1:
231  no_of_change++;
232  if (unique)
233  yaz_log (YLOG_LOG, "%s change\n", ipf_ptr);
234  break;
235  case 2:
236  if (unique)
237  yaz_log (YLOG_LOG, "%s duplicate\n", ipf_ptr);
238  no_of_same++;
239  break;
240  }
241  }
242  else if(range < 0)
243  {
244  char *cp;
245 
246  cp = dict_lookup (dict, ipf_ptr);
247  if (cp && *cp)
248  no_of_hits++;
249  else
250  no_of_misses++;
251  }
252  else
253  {
254  look_hits = 0;
255  dict_lookup_grep (dict, ipf_ptr, range, NULL,
256  &max_pos, srange, grep_handler);
257  if (look_hits)
258  no_of_hits++;
259  else
260  no_of_misses++;
261  }
262  ++no_of_iterations;
263  if ((no_of_iterations % 10000) == 0)
264  {
265  printf ("."); fflush(stdout);
266  }
267  ipf_ptr += (i-1);
268  }
269  }
270  ++line;
271  }
272  fclose (ipf);
273  }
274  if (rw && delete_term)
275  {
276  yaz_log (YLOG_LOG, "dict_delete_subtree %s", delete_term);
277  dict_delete_subtree (dict, delete_term, 0, 0);
278  }
279  if (grep_pattern)
280  {
281  if (range < 0)
282  range = 0;
283  yaz_log (YLOG_LOG, "Grepping '%s'", grep_pattern);
284  dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
285  srange, grep_handler);
286  }
287  if (rw)
288  {
289  yaz_log (YLOG_LOG, "Iterations.... %d", no_of_iterations);
290  if (do_delete)
291  {
292  yaz_log (YLOG_LOG, "No of deleted. %d", no_of_deleted);
293  yaz_log (YLOG_LOG, "No not found.. %d", no_not_found);
294  }
295  else
296  {
297  yaz_log (YLOG_LOG, "No of new..... %d", no_of_new);
298  yaz_log (YLOG_LOG, "No of change.. %d", no_of_change);
299  }
300  }
301  else
302  {
303  yaz_log (YLOG_LOG, "Lookups....... %d", no_of_iterations);
304  yaz_log (YLOG_LOG, "No of hits.... %d", no_of_hits);
305  yaz_log (YLOG_LOG, "No of misses.. %d", no_of_misses);
306  }
307  if (scan_the_thing)
308  {
309  char term_dict[1024];
310 
311  int before = 1000000;
312  int after = 1000000;
313  yaz_log (YLOG_LOG, "dict_scan");
314  term_dict[0] = 1;
315  term_dict[1] = 0;
316  dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
317  }
318  dict_close (dict);
319  bfs_destroy (bfs);
320  res_close (my_resource);
321  return 0;
322 }
323 /*
324  * Local variables:
325  * c-basic-offset: 4
326  * c-file-style: "Stroustrup"
327  * indent-tabs-mode: nil
328  * End:
329  * vim: shiftwidth=4 tabstop=8 expandtab
330  */
331 
BFiles bfs_create(const char *spec, const char *base)
creates a Block files collection
Definition: bfile.c:56
void bfs_destroy(BFiles bfiles)
destroys a block files handle
Definition: bfile.c:73
Zebra dictionary.
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
Definition: lookup.c:100
int dict_delete(Dict dict, const char *p)
deletes item from dictionary
Definition: delete.c:260
Dict dict_open(BFiles bfs, const char *name, int cache, int rw, int compact_flag, int page_size)
open dictionary
Definition: open.c:50
int dict_lookup_grep(Dict dict, const char *p, int range, void *client, int *max_pos, int init_pos, int(*f)(char *name, const char *info, void *client))
regular expression search with error correction
Definition: lookgrep.c:374
int dict_delete_subtree(Dict dict, const char *p, void *client, int(*f)(const char *info, void *client))
delete items with a given prefix from dictionary
Definition: delete.c:266
int dict_insert(Dict dict, const char *p, int userlen, void *userinfo)
insert item into dictionary
Definition: insert.c:439
int dict_scan(Dict dict, char *str, int *before, int *after, void *client, int(*f)(char *name, const char *info, int pos, void *client))
dictionary scan
Definition: scan.c:242
int dict_close(Dict dict)
closes dictionary
Definition: close.c:32
int main(int argc, char **argv)
Definition: dicttest.c:52
static int look_hits
Definition: dicttest.c:37
static int scan_handler(char *name, const char *info, int pos, void *client)
Definition: dicttest.c:46
static int grep_handler(char *name, const char *info, void *client)
Definition: dicttest.c:39
static Dict dict
Definition: dicttest.c:35
char * prog
Definition: dicttest.c:34
void res_close(Res r)
Definition: res.c:261
ZEBRA_RES res_read_file(Res r, const char *fname)
Definition: res.c:146
Res res_open(Res res_def, Res over_res)
Definition: res.c:234
const char * res_get(Res r, const char *name)
Definition: res.c:294
Definition: res.c:46