IDZEBRA  2.2.7
invstat.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 #include <stdio.h>
25 #include <assert.h>
26 #include <string.h>
27 
28 #include "index.h"
29 
30 struct inv_stat_info {
35  int isam_bounds[20];
37  char tmp[128];
38  int isamb_levels[10][5];
41  unsigned long cksum;
42  int dumpwords;
43 };
44 
45 #define SINGLETON_TYPE 8 /* the type to use for singletons that */
46  /* have no block and no block type */
47 
48 static void print_dict_item (ZebraHandle zh, const char *s, zint count,
49  int firstsys, int firstseq, int lastsys, int lastseq )
50 {
51  char dst[IT_MAX_WORD+1];
52  int ord;
53  int len = key_SU_decode(&ord, (const unsigned char *) s);
54  const char *index_type;
55  const char *db = 0;
56 
57  if (!zh)
58  *dst = '\0';
59  else
60  {
61  zebraExplain_lookup_ord (zh->reg->zei, ord, &index_type, &db, 0);
62 
63  zebra_term_untrans(zh, index_type, dst, s + len);
64  }
65  printf("%02d:%10" ZINT_FORMAT0 " %s %d.%d - %d.%d\n", ord, count, dst,
66  firstsys, firstseq, lastsys, lastseq);
67 }
68 
69 static int inv_stat_handle (char *name, const char *info, int pos,
70  void *client)
71 {
72  zint occur = 0;
73  int i = 0;
74  struct inv_stat_info *stat_info = (struct inv_stat_info*) client;
75  ISAM_P isam_p;
76  int firstsys=-1;
77  int firstseq=-1;
78  int lastsys=-1;
79  int lastseq=-1;
80 
81  stat_info->no_dict_entries++;
82  stat_info->no_dict_bytes += strlen(name);
83 
84  assert (*info == sizeof(ISAM_P));
85  memcpy (&isam_p, info+1, sizeof(ISAM_P));
86 
87  if (stat_info->zh->reg->isams)
88  {
89  ISAMS_PP pp;
90  int occurx = 0;
91  struct it_key key;
92 
93  pp = isams_pp_open (stat_info->zh->reg->isams, isam_p);
94  occur = isams_pp_num (pp);
95  while (isams_pp_read(pp, &key))
96  {
97  occurx++;
98  }
99  assert (occurx == occur);
100  stat_info->no_isam_entries[0] += occur;
101  isams_pp_close (pp);
102  }
103  if (stat_info->zh->reg->isamc)
104  {
105  ISAMC_PP pp;
106  zint occurx = 0;
107  struct it_key key;
108 
109  pp = isamc_pp_open (stat_info->zh->reg->isamc, isam_p);
110  occur = isamc_pp_num (pp);
111  while (isamc_pp_read(pp, &key))
112  {
113  occurx++;
114  }
115  assert (occurx == occur);
116  stat_info->no_isam_entries[isamc_type(isam_p)] += occur;
117  isamc_pp_close (pp);
118  }
119  if (stat_info->zh->reg->isamb)
120  {
121  ISAMB_PP pp;
122  struct it_key key;
123  int cat = CAST_ZINT_TO_INT(isam_p & 3);
124  int level;
125  zint size;
126  zint blocks;
127 
128  pp = isamb_pp_open_x(stat_info->zh->reg->isamb, isam_p, &level, 0);
129 
130  while (isamb_pp_read(pp, &key))
131  {
132  occur++;
133  }
134  isamb_pp_close_x (pp, &size, &blocks);
135  stat_info->isamb_blocks[cat] += blocks;
136  stat_info->isamb_sizes[cat] += size;
137  if (level > 4)
138  level = 4;
139  stat_info->isamb_levels[cat][level] ++;
140  stat_info->no_isam_entries[cat] += occur;
141  }
142  i=0;
143  while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i])
144  i++;
145  ++(stat_info->isam_occurrences[i]);
146  if (stat_info->dumpwords)
147  print_dict_item(stat_info->zh, name, occur,
148  firstsys, firstseq, lastsys, lastseq);
149  return 0;
150 }
151 
152 static void show_bfs_stats(BFiles bfs)
153 {
154  int i = 0;
155  const char *directory = 0;
156  double used_bytes, max_bytes;
157  printf("Register:\n");
158  while (bfs_register_directory_stat(bfs, i, &directory,
159  &used_bytes, &max_bytes))
160  {
161  printf ("%s %10.0lf %10.0lf\n", directory, used_bytes, max_bytes);
162  i++;
163  }
164  i = 0;
165  printf("Shadow:\n");
166  while (bfs_shadow_directory_stat(bfs, i, &directory,
167  &used_bytes, &max_bytes))
168  {
169  printf ("%s %10.0lf %10.0lf\n", directory, used_bytes, max_bytes);
170  i++;
171  }
172 }
173 
175 {
176  int i, prev;
177  int before = 0;
178  zint occur;
179  int after = 1000000000;
180  struct inv_stat_info stat_info;
181  char term_dict[2*IT_MAX_WORD+2];
182 
183  if (zebra_begin_read (zh))
184  return 1;
185 
187 
188  stat_info.zh = zh;
189  stat_info.dumpwords=dumpdict;
190 
191  term_dict[0] = 1;
192  term_dict[1] = 0;
193 
194  for (i = 0; i<=SINGLETON_TYPE; i++)
195  stat_info.no_isam_entries[i] = 0;
196  stat_info.no_dict_entries = 0;
197  stat_info.no_dict_bytes = 0;
198  stat_info.isam_bounds[0] = 1;
199  stat_info.isam_bounds[1] = 2;
200  stat_info.isam_bounds[2] = 3;
201  stat_info.isam_bounds[3] = 6;
202  stat_info.isam_bounds[4] = 10;
203  stat_info.isam_bounds[5] = 20;
204  stat_info.isam_bounds[6] = 30;
205  stat_info.isam_bounds[7] = 50;
206  stat_info.isam_bounds[8] = 100;
207  stat_info.isam_bounds[9] = 200;
208  stat_info.isam_bounds[10] = 5000;
209  stat_info.isam_bounds[11] = 10000;
210  stat_info.isam_bounds[12] = 20000;
211  stat_info.isam_bounds[13] = 50000;
212  stat_info.isam_bounds[14] = 100000;
213  stat_info.isam_bounds[15] = 200000;
214  stat_info.isam_bounds[16] = 500000;
215  stat_info.isam_bounds[17] = 1000000;
216  stat_info.isam_bounds[18] = 0;
217 
218  stat_info.cksum = 0;
219 
220  for (i = 0; i<20; i++)
221  stat_info.isam_occurrences[i] = 0;
222 
223  for (i = 0; i<10; i++)
224  {
225  int j;
226  for (j = 0; j<5; j++)
227  stat_info.isamb_levels[i][j] = 0;
228  stat_info.isamb_sizes[i] = 0;
229  stat_info.isamb_blocks[i] = 0;
230  }
231 
232  dict_scan (zh->reg->dict, term_dict, &before, &after, &stat_info,
234 
235  if (zh->reg->isamc)
236  {
237  fprintf (stdout, " Blocks Occur Size KB Bytes/Entry\n");
238  for (i = 0; isamc_block_used (zh->reg->isamc, i) >= 0; i++)
239  {
240  fprintf (stdout, " %8" ZINT_FORMAT0 " %8" ZINT_FORMAT0,
241  isamc_block_used (zh->reg->isamc, i),
242  stat_info.no_isam_entries[i]);
243 
244  if (stat_info.no_isam_entries[i])
245  fprintf(stdout, " %8d %f",
246  (int) ((1023.0 + (double)
247  isamc_block_used(zh->reg->isamc, i) *
248  isamc_block_size(zh->reg->isamc,i))/1024),
249  ((double) isamc_block_used(zh->reg->isamc, i) *
251  stat_info.no_isam_entries[i]);
252  fprintf (stdout, "\n");
253  }
254  }
255 
256  if (zh->reg->isamb)
257  {
258  for (i = 0; i<4; i++)
259  {
260  int j;
261  int bsize = isamb_block_info(zh->reg->isamb, i);
262  if (bsize < 0)
263  break;
264  fprintf (stdout, "Category %d\n", i);
265  fprintf (stdout, "Block size %d\n", bsize);
266  fprintf (stdout, "Blocks: " ZINT_FORMAT "\n", stat_info.isamb_blocks[i]);
267  fprintf (stdout, "Size: " ZINT_FORMAT "\n", stat_info.isamb_sizes[i]);
268  fprintf (stdout, "Entries: " ZINT_FORMAT "\n",
269  stat_info.no_isam_entries[i]);
270  fprintf (stdout, "Total " ZINT_FORMAT "\n", stat_info.isamb_blocks[i]*
271  bsize);
272  for (j = 0; j<5; j++)
273  if (stat_info.isamb_levels[i][j])
274  fprintf (stdout, "Level%d %d\n", j,
275  stat_info.isamb_levels[i][j]);
276  fprintf (stdout, "\n");
277  }
278  }
279  fprintf (stdout, "Checksum %08lX\n", stat_info.cksum);
280 
281  fprintf (stdout, "Distinct words %d\n", stat_info.no_dict_entries);
282  occur = 0;
283  for (i = 0; i<9; i++)
284  occur += stat_info.no_isam_entries[i];
285  fprintf (stdout, "Word pos " ZINT_FORMAT "\n", occur);
286  fprintf (stdout, " Occurrences Words\n");
287  prev = 1;
288  for (i = 0; stat_info.isam_bounds[i]; i++)
289  {
290  int here = stat_info.isam_bounds[i];
291  fprintf (stdout, "%7d-%-7d %7d\n",
292  prev, here, stat_info.isam_occurrences[i]);
293  prev = here+1;
294  }
295  fprintf (stdout, "%7d- %7d\n",
296  prev, stat_info.isam_occurrences[i]);
297  rec_prstat(zh->reg->records, 0);
298  xmalloc_trav("unfreed");
299  zebra_end_read (zh);
300  return 0;
301 }
302 
303 /*
304  * Local variables:
305  * c-basic-offset: 4
306  * c-file-style: "Stroustrup"
307  * indent-tabs-mode: nil
308  * End:
309  * vim: shiftwidth=4 tabstop=8 expandtab
310  */
311 
struct BFiles_struct * zebra_get_bfs(ZebraHandle zh)
Definition: zebraapi.c:2733
int bfs_register_directory_stat(BFiles bfs, int no, const char **directory, double *used_bytes, double *max_bytes)
Definition: bfile.c:338
int bfs_shadow_directory_stat(BFiles bfs, int no, const char **directory, double *used_bytes, double *max_bytes)
Definition: bfile.c:346
int dict_scan(Dict dict, char *str, int *before, int *after, void *client, int(*f)(char *name, const char *info, int pos, void *client))
dictionary scan
Definition: scan.c:242
ZEBRA_RES zebra_end_read(ZebraHandle zh)
Definition: zebraapi.c:1676
int zebra_term_untrans(ZebraHandle zh, const char *index_type, char *dst, const char *src)
Definition: untrans.c:31
ZEBRA_RES zebra_begin_read(ZebraHandle zh)
Definition: zebraapi.c:1671
int zebra_register_statistics(ZebraHandle zh, int dumpdict)
Definition: invstat.c:174
#define SINGLETON_TYPE
Definition: invstat.c:45
static int inv_stat_handle(char *name, const char *info, int pos, void *client)
Definition: invstat.c:69
static void print_dict_item(ZebraHandle zh, const char *s, zint count, int firstsys, int firstseq, int lastsys, int lastseq)
Definition: invstat.c:48
static void show_bfs_stats(BFiles bfs)
Definition: invstat.c:152
void isamb_pp_close_x(ISAMB_PP pp, zint *size, zint *blocks)
Definition: isamb.c:1392
int isamb_block_info(ISAMB isamb, int cat)
Definition: isamb.c:1422
ISAMB_PP isamb_pp_open_x(ISAMB isamb, ISAM_P pos, int *level, int scope)
Definition: isamb.c:1345
int isamb_pp_read(ISAMB_PP pp, void *buf)
Definition: isamb.c:1503
int isamc_block_size(ISAMC is, int type)
Definition: isamc.c:179
zint isamc_pp_num(ISAMC_PP pp)
Definition: isamc.c:587
#define isamc_type(x)
Definition: isamc.h:90
int isamc_pp_read(ISAMC_PP pp, void *buf)
Definition: isamc.c:511
void isamc_pp_close(ISAMC_PP pp)
Definition: isamc.c:458
ISAMC_PP isamc_pp_open(ISAMC is, ISAM_P pos)
Definition: isamc.c:467
zint isamc_block_used(ISAMC is, int type)
Definition: isamc.c:172
zint ISAM_P
Definition: isamc.h:28
int isams_pp_num(ISAMS_PP pp)
Definition: isams.c:222
int isams_pp_read(ISAMS_PP pp, void *buf)
Definition: isams.c:227
void isams_pp_close(ISAMS_PP pp)
Definition: isams.c:215
ISAMS_PP isams_pp_open(ISAMS is, ISAM_P pos)
Definition: isams.c:187
int key_SU_decode(int *ch, const unsigned char *out)
Definition: su_codec.c:64
#define IT_MAX_WORD
Definition: it_key.h:27
void rec_prstat(Records p, int verbose)
Definition: records.c:1095
zint isamb_sizes[10]
Definition: invstat.c:39
int isamb_levels[10][5]
Definition: invstat.c:38
int dumpwords
Definition: invstat.c:42
zint isamb_blocks[10]
Definition: invstat.c:40
int no_dict_bytes
Definition: invstat.c:34
int isam_occurrences[20]
Definition: invstat.c:36
int no_dict_entries
Definition: invstat.c:33
char tmp[128]
Definition: invstat.c:37
int isam_bounds[20]
Definition: invstat.c:35
ZebraHandle zh
Definition: invstat.c:31
zint no_isam_entries[9]
Definition: invstat.c:32
unsigned long cksum
Definition: invstat.c:41
Definition: it_key.h:30
ZebraExplainInfo zei
Definition: index.h:139
ISAMS isams
Definition: index.h:129
ISAMB isamb
Definition: index.h:131
Records records
Definition: index.h:138
ISAMC isamc
Definition: index.h:130
Dict dict
Definition: index.h:132
struct zebra_register * reg
Definition: index.h:174
long zint
Zebra integer.
Definition: util.h:66
#define ZINT_FORMAT
Definition: util.h:72
#define CAST_ZINT_TO_INT(x)
Definition: util.h:96
#define ZINT_FORMAT0
Definition: util.h:67
int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord, const char **index_type, const char **db, const char **string_index)
Definition: zinfo.c:1478