IDZEBRA  2.1.3
mod_safari.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <yaz/oid_db.h>
24 #include <stdio.h>
25 #include <assert.h>
26 #include <ctype.h>
27 
28 #include <idzebra/util.h>
29 #include <idzebra/recctrl.h>
30 
31 struct filter_info {
32  int segments;
33 };
34 
35 static void *filter_init(Res res, RecType recType)
36 {
37  struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
38  tinfo->segments = 0;
39  return tinfo;
40 }
41 
42 static void *filter_init2(Res res, RecType recType)
43 {
44  struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
45  tinfo->segments = 1;
46  return tinfo;
47 }
48 
49 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
50 {
51  return ZEBRA_OK;
52 }
53 
54 static void filter_destroy(void *clientData)
55 {
56  struct filter_info *tinfo = clientData;
57  xfree (tinfo);
58 }
59 
60 struct fi_info {
61  struct recExtractCtrl *p;
62  char *buf;
63  int offset;
64  int max;
65 };
66 
67 static struct fi_info *fi_open(struct recExtractCtrl *p)
68 {
69  struct fi_info *fi = (struct fi_info *) xmalloc (sizeof(*fi));
70 
71  fi->p = p;
72  fi->buf = (char *) xmalloc (4096);
73  fi->offset = 1;
74  fi->max = 1;
75  return fi;
76 }
77 
78 static int fi_getchar(struct fi_info *fi, char *dst)
79 {
80  if (fi->offset >= fi->max)
81  {
82  if (fi->max <= 0)
83  return 0;
84  fi->max = fi->p->stream->readf(fi->p->stream, fi->buf, 4096);
85  fi->offset = 0;
86  if (fi->max <= 0)
87  return 0;
88  }
89  *dst = fi->buf[(fi->offset)++];
90  return 1;
91 }
92 
93 static int fi_gets(struct fi_info *fi, char *dst, int max)
94 {
95  int l = 0;
96  while(1)
97  {
98  char dstbyte;
99  if (!fi_getchar(fi, &dstbyte))
100  return 0;
101  if (dstbyte == '\n')
102  break;
103  if (l < max)
104  dst[l++] = dstbyte;
105  }
106  dst[l] = '\0';
107  return 1;
108 }
109 
110 static void fi_close (struct fi_info *fi)
111 {
112  xfree (fi->buf);
113  xfree (fi);
114 }
115 
116 static int filter_extract(void *clientData, struct recExtractCtrl *p)
117 {
118  struct filter_info *tinfo = clientData;
119  char line[512];
120  RecWord recWord;
121  int ret = RECCTRL_EXTRACT_OK;
122  struct fi_info *fi = fi_open(p);
123 
124 #if 0
125  yaz_log(YLOG_LOG, "filter_extract off=%ld",
126  (long) (*fi->p->tellf)(fi->p->fh));
127 #endif
128  (*p->init)(p, &recWord);
129 
130  if (!fi_gets(fi, line, sizeof(line)-1))
131  ret = RECCTRL_EXTRACT_EOF;
132  else
133  {
134  sscanf(line, "%255s", p->match_criteria);
135  while (fi_gets(fi, line, sizeof(line)-1))
136  {
137  int nor = 0;
138  char field[40];
139  const char *cp = line;
140  char type_cstr[2];
141 #if 0
142  yaz_log(YLOG_LOG, "safari line: %s", line);
143 #endif
144  type_cstr[1] = '\0';
145  if (*cp >= '0' && *cp <= '9')
146  type_cstr[0] = '0'; /* the default is 0 (raw) */
147  else
148  type_cstr[0] = *cp++; /* type given */
149  type_cstr[1] = '\0';
150 
151  recWord.index_type = type_cstr;
152  if (tinfo->segments)
153  {
154  if (sscanf(cp, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT
155  ZINT_FORMAT " %39s %n",
156  &recWord.record_id, &recWord.section_id,
157  &recWord.segment,
158  &recWord.seqno,
159  field, &nor) < 5)
160  {
161  yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
163  break;
164  }
165  }
166  else
167  {
168  if (sscanf(cp, ZINT_FORMAT " " ZINT_FORMAT " " ZINT_FORMAT " %39s %n",
169  &recWord.record_id, &recWord.section_id, &recWord.seqno,
170  field, &nor) < 4)
171  {
172  yaz_log(YLOG_WARN, "Bad safari record line: %s", line);
174  break;
175  }
176  }
177  for (cp = cp + nor; *cp == ' '; cp++)
178  ;
179  recWord.index_name = field;
180  recWord.term_buf = cp;
181  recWord.term_len = strlen(cp);
182  (*p->tokenAdd)(&recWord);
183  }
184  }
185  fi_close(fi);
186  return ret;
187 }
188 
189 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
190 {
191  int r, filter_ptr = 0;
192  static char *filter_buf = NULL;
193  static int filter_size = 0;
194  int make_header = 1;
195  int make_body = 1;
196  const char *elementSetName = NULL;
197  int no_lines = 0;
198 
199  if (p->comp && p->comp->which == Z_RecordComp_simple &&
200  p->comp->u.simple->which == Z_ElementSetNames_generic)
201  elementSetName = p->comp->u.simple->u.generic;
202 
203  if (elementSetName)
204  {
205  /* don't make header for the R(aw) element set name */
206  if (!strcmp(elementSetName, "R"))
207  {
208  make_header = 0;
209  make_body = 1;
210  }
211  /* only make header for the H(eader) element set name */
212  else if (!strcmp(elementSetName, "H"))
213  {
214  make_header = 1;
215  make_body = 0;
216  }
217  }
218  while (1)
219  {
220  if (filter_ptr + 4096 >= filter_size)
221  {
222  char *nb;
223 
224  filter_size = 2*filter_size + 8192;
225  nb = (char *) xmalloc (filter_size);
226  if (filter_buf)
227  {
228  memcpy (nb, filter_buf, filter_ptr);
229  xfree (filter_buf);
230  }
231  filter_buf = nb;
232  }
233  if (make_header && filter_ptr == 0)
234  {
235  if (p->score >= 0)
236  {
237  sprintf (filter_buf, "Rank: %d\n", p->score);
238  filter_ptr = strlen(filter_buf);
239  }
240  sprintf (filter_buf + filter_ptr, "Local Number: " ZINT_FORMAT "\n",
241  p->localno);
242  filter_ptr = strlen(filter_buf);
243  if (p->fname)
244  {
245  sprintf (filter_buf + filter_ptr, "Filename: %s\n", p->fname);
246  filter_ptr = strlen(filter_buf);
247  }
248  strcpy(filter_buf+filter_ptr++, "\n");
249  }
250  if (!make_body)
251  break;
252  r = p->stream->readf(p->stream, filter_buf + filter_ptr, 4096);
253  if (r <= 0)
254  break;
255  filter_ptr += r;
256  }
257  filter_buf[filter_ptr] = '\0';
258  if (elementSetName)
259  {
260  if (!strcmp (elementSetName, "B"))
261  no_lines = 4;
262  if (!strcmp (elementSetName, "M"))
263  no_lines = 20;
264  }
265  if (no_lines)
266  {
267  char *p = filter_buf;
268  int i = 0;
269 
270  while (++i <= no_lines && (p = strchr (p, '\n')))
271  p++;
272  if (p)
273  {
274  p[1] = '\0';
275  filter_ptr = p-filter_buf;
276  }
277  }
278  p->output_format = yaz_oid_recsyn_sutrs;
279  p->rec_buf = filter_buf;
280  p->rec_len = filter_ptr;
281  return 0;
282 }
283 
284 static struct recType filter_type = {
285  0,
286  "safari",
287  filter_init,
292 };
293 
294 static struct recType filter_type2 = {
295  0,
296  "safari2",
297  filter_init2,
302 };
303 
304 RecType
305 #if IDZEBRA_STATIC_SAFARI
306 idzebra_filter_safari
307 #else
309 #endif
310 
311 [] = {
312  &filter_type,
313  &filter_type2,
314  0,
315 };
316 /*
317  * Local variables:
318  * c-basic-offset: 4
319  * c-file-style: "Stroustrup"
320  * indent-tabs-mode: nil
321  * End:
322  * vim: shiftwidth=4 tabstop=8 expandtab
323  */
324 
RecType idzebra_filter[]
Definition: mod_safari.c:311
#define RECCTRL_EXTRACT_OK
Definition: recctrl.h:163
void(* tokenAdd)(RecWord *w)
Definition: recctrl.h:105
const char * index_name
Definition: recctrl.h:54
#define ZEBRA_OK
Definition: util.h:82
static void * filter_init2(Res res, RecType recType)
Definition: mod_safari.c:42
int term_len
Definition: recctrl.h:58
static int fi_getchar(struct fi_info *fi, char *dst)
Definition: mod_safari.c:78
static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
Definition: mod_safari.c:49
zint section_id
Definition: recctrl.h:66
static void fi_close(struct fi_info *fi)
Definition: mod_safari.c:110
int segments
Definition: mod_safari.c:32
#define RECCTRL_EXTRACT_ERROR_GENERIC
Definition: recctrl.h:165
int max
Definition: mod_safari.c:64
int offset
Definition: mod_safari.c:63
#define RECCTRL_EXTRACT_EOF
Definition: recctrl.h:164
zint record_id
Definition: recctrl.h:64
char * buf
Definition: mod_safari.c:62
zint seqno
Definition: recctrl.h:60
Definition: res.c:46
const Odr_oid * output_format
Definition: recctrl.h:134
static void * filter_init(Res res, RecType recType)
Definition: mod_safari.c:35
const char * index_type
Definition: recctrl.h:52
void(* init)(struct recExtractCtrl *p, RecWord *w)
Definition: recctrl.h:103
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition: recctrl.h:75
static void filter_destroy(void *clientData)
Definition: mod_safari.c:54
char match_criteria[256]
Definition: recctrl.h:109
struct recExtractCtrl * p
Definition: mod_safari.c:61
Z_RecordComposition * comp
Definition: recctrl.h:124
struct ZebraRecStream * stream
Definition: recctrl.h:119
const char * term_buf
Definition: recctrl.h:56
static int filter_retrieve(void *clientData, struct recRetrieveCtrl *p)
Definition: mod_safari.c:189
void * rec_buf
Definition: recctrl.h:135
record extract for indexing
Definition: recctrl.h:101
char * fname
Definition: recctrl.h:130
static struct fi_info * fi_open(struct recExtractCtrl *p)
Definition: mod_safari.c:67
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80
static int filter_extract(void *clientData, struct recExtractCtrl *p)
Definition: mod_safari.c:116
zint segment
Definition: recctrl.h:62
struct ZebraRecStream * stream
Definition: recctrl.h:102
static struct recType filter_type
Definition: mod_safari.c:284
static struct recType filter_type2
Definition: mod_safari.c:294
static int fi_gets(struct fi_info *fi, char *dst, int max)
Definition: mod_safari.c:93
#define ZINT_FORMAT
Definition: util.h:72