IDZEBRA  2.1.2
dicttest.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 #include <ctype.h>
27 
28 #include <idzebra/dict.h>
29 #include <idzebra/util.h>
30 #include <idzebra/res.h>
31 #include <yaz/yaz-util.h>
32 
33 char *prog;
34 static Dict dict;
35 
36 static int look_hits;
37 
38 static int grep_handler (char *name, const char *info, void *client)
39 {
40  look_hits++;
41  printf ("%s\n", name);
42  return 0;
43 }
44 
45 static int scan_handler (char *name, const char *info, int pos, void *client)
46 {
47  printf ("%s\n", name);
48  return 0;
49 }
50 
51 int main (int argc, char **argv)
52 {
53  Res my_resource = 0;
54  BFiles bfs;
55  const char *name = NULL;
56  const char *inputfile = NULL;
57  const char *config = NULL;
58  const char *delete_term = NULL;
59  int scan_the_thing = 0;
60  int do_delete = 0;
61  int range = -1;
62  int srange = 0;
63  int rw = 0;
64  int infosize = 4;
65  int cache = 10;
66  int ret;
67  int unique = 0;
68  char *grep_pattern = NULL;
69  char *arg;
70  int no_of_iterations = 0;
71  int no_of_new = 0, no_of_same = 0, no_of_change = 0;
72  int no_of_hits = 0, no_of_misses = 0, no_not_found = 0, no_of_deleted = 0;
73  int max_pos;
74 
75  prog = argv[0];
76  if (argc < 2)
77  {
78  fprintf (stderr, "usage:\n "
79  " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] "
80  "[-v n] [-i f] [-w] [-c n] config file\n\n",
81  prog);
82  fprintf (stderr, " -d delete instead of insert\n");
83  fprintf (stderr, " -D t delete subtree instead of insert\n");
84  fprintf (stderr, " -r n set regular match range\n");
85  fprintf (stderr, " -p n set regular match start range\n");
86  fprintf (stderr, " -u report if keys change during insert\n");
87  fprintf (stderr, " -g p try pattern n (see -r)\n");
88  fprintf (stderr, " -s n set info size to n (instead of 4)\n");
89  fprintf (stderr, " -v n set logging level\n");
90  fprintf (stderr, " -i f read file with words\n");
91  fprintf (stderr, " -w insert/delete instead of lookup\n");
92  fprintf (stderr, " -c n cache size (number of pages)\n");
93  fprintf (stderr, " -S scan the dictionary\n");
94  exit (1);
95  }
96  while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2)
97  {
98  if (ret == 0)
99  {
100  if (!config)
101  config = arg;
102  else if (!name)
103  name = arg;
104  else
105  {
106  yaz_log (YLOG_FATAL, "too many files specified\n");
107  exit (1);
108  }
109  }
110  else if (ret == 'D')
111  {
112  delete_term = arg;
113  }
114  else if (ret == 'd')
115  do_delete = 1;
116  else if (ret == 'g')
117  {
118  grep_pattern = arg;
119  }
120  else if (ret == 'r')
121  {
122  range = atoi (arg);
123  }
124  else if (ret == 'p')
125  {
126  srange = atoi (arg);
127  }
128  else if (ret == 'u')
129  {
130  unique = 1;
131  }
132  else if (ret == 'c')
133  {
134  cache = atoi(arg);
135  if (cache<2)
136  cache = 2;
137  }
138  else if (ret == 'w')
139  rw = 1;
140  else if (ret == 'i')
141  inputfile = arg;
142  else if (ret == 'S')
143  scan_the_thing = 1;
144  else if (ret == 's')
145  {
146  infosize = atoi(arg);
147  }
148  else if (ret == 'v')
149  {
150  yaz_log_init (yaz_log_mask_str(arg), prog, NULL);
151  }
152  else
153  {
154  yaz_log (YLOG_FATAL, "Unknown option '-%s'", arg);
155  exit (1);
156  }
157  }
158  if (!config || !name)
159  {
160  yaz_log (YLOG_FATAL, "no config and/or dictionary specified");
161  exit (1);
162  }
163  my_resource = res_open(0, 0);
164  if (!my_resource)
165  {
166  yaz_log (YLOG_FATAL, "cannot open resource `%s'", config);
167  exit (1);
168  }
169  res_read_file(my_resource, config);
170 
171  bfs = bfs_create (res_get(my_resource, "register"), 0);
172  if (!bfs)
173  {
174  yaz_log (YLOG_FATAL, "bfs_create fail");
175  exit (1);
176  }
177  dict = dict_open (bfs, name, cache, rw, 0, 4096);
178  if (!dict)
179  {
180  yaz_log (YLOG_FATAL, "dict_open fail of `%s'", name);
181  exit (1);
182  }
183  if (inputfile)
184  {
185  FILE *ipf;
186  char ipf_buf[1024];
187  int line = 1;
188  char infobytes[120];
189  memset (infobytes, 0, 120);
190 
191  if (!(ipf = fopen(inputfile, "r")))
192  {
193  yaz_log (YLOG_FATAL|YLOG_ERRNO, "cannot open %s", inputfile);
194  exit (1);
195  }
196 
197  while (fgets (ipf_buf, 1023, ipf))
198  {
199  char *ipf_ptr = ipf_buf;
200  sprintf (infobytes, "%d", line);
201  for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++)
202  {
203  if (isalpha(*ipf_ptr) || *ipf_ptr == '_')
204  {
205  int i = 1;
206  while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) ||
207  ipf_ptr[i] == '_'))
208  i++;
209  if (ipf_ptr[i])
210  ipf_ptr[i++] = '\0';
211  if (rw)
212  {
213  if (do_delete)
214  switch (dict_delete (dict, ipf_ptr))
215  {
216  case 0:
217  no_not_found++;
218  break;
219  case 1:
220  no_of_deleted++;
221  }
222  else
223  switch(dict_insert (dict, ipf_ptr,
224  infosize, infobytes))
225  {
226  case 0:
227  no_of_new++;
228  break;
229  case 1:
230  no_of_change++;
231  if (unique)
232  yaz_log (YLOG_LOG, "%s change\n", ipf_ptr);
233  break;
234  case 2:
235  if (unique)
236  yaz_log (YLOG_LOG, "%s duplicate\n", ipf_ptr);
237  no_of_same++;
238  break;
239  }
240  }
241  else if(range < 0)
242  {
243  char *cp;
244 
245  cp = dict_lookup (dict, ipf_ptr);
246  if (cp && *cp)
247  no_of_hits++;
248  else
249  no_of_misses++;
250  }
251  else
252  {
253  look_hits = 0;
254  dict_lookup_grep (dict, ipf_ptr, range, NULL,
255  &max_pos, srange, grep_handler);
256  if (look_hits)
257  no_of_hits++;
258  else
259  no_of_misses++;
260  }
261  ++no_of_iterations;
262  if ((no_of_iterations % 10000) == 0)
263  {
264  printf ("."); fflush(stdout);
265  }
266  ipf_ptr += (i-1);
267  }
268  }
269  ++line;
270  }
271  fclose (ipf);
272  }
273  if (rw && delete_term)
274  {
275  yaz_log (YLOG_LOG, "dict_delete_subtree %s", delete_term);
276  dict_delete_subtree (dict, delete_term, 0, 0);
277  }
278  if (grep_pattern)
279  {
280  if (range < 0)
281  range = 0;
282  yaz_log (YLOG_LOG, "Grepping '%s'", grep_pattern);
283  dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos,
284  srange, grep_handler);
285  }
286  if (rw)
287  {
288  yaz_log (YLOG_LOG, "Iterations.... %d", no_of_iterations);
289  if (do_delete)
290  {
291  yaz_log (YLOG_LOG, "No of deleted. %d", no_of_deleted);
292  yaz_log (YLOG_LOG, "No not found.. %d", no_not_found);
293  }
294  else
295  {
296  yaz_log (YLOG_LOG, "No of new..... %d", no_of_new);
297  yaz_log (YLOG_LOG, "No of change.. %d", no_of_change);
298  }
299  }
300  else
301  {
302  yaz_log (YLOG_LOG, "Lookups....... %d", no_of_iterations);
303  yaz_log (YLOG_LOG, "No of hits.... %d", no_of_hits);
304  yaz_log (YLOG_LOG, "No of misses.. %d", no_of_misses);
305  }
306  if (scan_the_thing)
307  {
308  char term_dict[1024];
309 
310  int before = 1000000;
311  int after = 1000000;
312  yaz_log (YLOG_LOG, "dict_scan");
313  term_dict[0] = 1;
314  term_dict[1] = 0;
315  dict_scan (dict, term_dict, &before, &after, 0, scan_handler);
316  }
317  dict_close (dict);
318  bfs_destroy (bfs);
319  res_close (my_resource);
320  return 0;
321 }
322 /*
323  * Local variables:
324  * c-basic-offset: 4
325  * c-file-style: "Stroustrup"
326  * indent-tabs-mode: nil
327  * End:
328  * vim: shiftwidth=4 tabstop=8 expandtab
329  */
330 
const char * res_get(Res r, const char *name)
Definition: res.c:294
int main(int argc, char **argv)
Definition: dicttest.c:51
Zebra dictionary.
int dict_close(Dict dict)
closes dictionary
Definition: close.c:32
char * prog
Definition: dicttest.c:33
BFiles bfs_create(const char *spec, const char *base)
creates a Block files collection
Definition: bfile.c:63
static int grep_handler(char *name, const char *info, void *client)
Definition: dicttest.c:38
int dict_insert(Dict dict, const char *p, int userlen, void *userinfo)
insert item into dictionary
Definition: insert.c:438
Definition: res.c:46
static Dict dict
Definition: dicttest.c:34
int dict_delete_subtree(Dict dict, const char *p, void *client, int(*f)(const char *info, void *client))
delete items with a given prefix from dictionary
Definition: delete.c:266
int dict_scan(Dict dict, char *str, int *before, int *after, void *client, int(*f)(char *name, const char *info, int pos, void *client))
dictionary scan
Definition: scan.c:242
Res res_open(Res res_def, Res over_res)
Definition: res.c:234
char * dict_lookup(Dict dict, const char *p)
lookup item in dictionary
Definition: lookup.c:100
static int scan_handler(char *name, const char *info, int pos, void *client)
Definition: dicttest.c:45
int rw
Definition: dict-p.h:73
void bfs_destroy(BFiles bfiles)
destroys a block files handle
Definition: bfile.c:80
void res_close(Res r)
Definition: res.c:261
Dict dict_open(BFiles bfs, const char *name, int cache, int rw, int compact_flag, int page_size)
open dictionary
Definition: open.c:50
int dict_lookup_grep(Dict dict, const char *p, int range, void *client, int *max_pos, int init_pos, int(*f)(char *name, const char *info, void *client))
regular expression search with error correction
Definition: lookgrep.c:374
ZEBRA_RES res_read_file(Res r, const char *fname)
Definition: res.c:146
static int look_hits
Definition: dicttest.c:36
int dict_delete(Dict dict, const char *p)
deletes item from dictionary
Definition: delete.c:260