YAZ  5.23.1
ccltoken.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12 
13 #include <string.h>
14 #include <stdlib.h>
15 #include <yaz/yaz-iconv.h>
16 #include "cclp.h"
17 
18 /*
19  * token_cmp: Compare token with keyword(s)
20  * kw: Keyword list. Each keyword is separated by space.
21  * token: CCL token.
22  * return: 1 if token string matches one of the keywords in list;
23  * 0 otherwise.
24  */
25 static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
26 {
27  const char **aliases;
28  int case_sensitive = cclp->ccl_case_sensitive;
29  int i;
30 
31  aliases = ccl_qual_search_special(cclp->bibset, "case");
32  if (aliases)
33  case_sensitive = atoi(aliases[0]);
34 
35  for (i = 0; kw[i]; i++)
36  {
37  if (token->len == strlen(kw[i]))
38  {
39  if (case_sensitive)
40  {
41  if (!memcmp(kw[i], token->name, token->len))
42  return 1;
43  }
44  else
45  {
46  if (!ccl_memicmp(kw[i], token->name, token->len))
47  return 1;
48  }
49  }
50  }
51  return 0;
52 }
53 
54 /*
55  * ccl_tokenize: tokenize CCL command string.
56  * return: CCL token list.
57  */
58 struct ccl_token *ccl_parser_tokenize(CCL_parser cclp, const char *command)
59 {
60  const char **aliases;
61  const unsigned char *cp = (const unsigned char *) command;
62  struct ccl_token *first = NULL;
63  struct ccl_token *last = NULL;
64  cclp->start_pos = command;
65 
66  while (1)
67  {
68  const unsigned char *cp0 = cp;
69  while (*cp && strchr(" \t\r\n", *cp))
70  cp++;
71  if (!first)
72  {
73  first = last = (struct ccl_token *)xmalloc(sizeof(*first));
74  ccl_assert(first);
75  last->prev = NULL;
76  }
77  else
78  {
79  last->next = (struct ccl_token *)xmalloc(sizeof(*first));
80  ccl_assert(last->next);
81  last->next->prev = last;
82  last = last->next;
83  }
84  last->ws_prefix_buf = (const char *) cp0;
85  last->ws_prefix_len = cp - cp0;
86  last->next = NULL;
87  last->name = (const char *) cp;
88  last->len = 1;
89  switch (*cp++)
90  {
91  case '\0':
92  last->kind = CCL_TOK_EOL;
93  return first;
94  case '(':
95  last->kind = CCL_TOK_LP;
96  break;
97  case ')':
98  last->kind = CCL_TOK_RP;
99  break;
100  case ',':
101  last->kind = CCL_TOK_COMMA;
102  break;
103  case '%':
104  case '!':
105  last->kind = CCL_TOK_PROX;
106  while (yaz_isdigit(*cp))
107  {
108  ++ last->len;
109  cp++;
110  }
111  break;
112  case '>':
113  case '<':
114  case '=':
115  if (*cp == '=' || *cp == '<' || *cp == '>')
116  {
117  cp++;
118  last->kind = CCL_TOK_REL;
119  ++ last->len;
120  }
121  else if (cp[-1] == '=')
122  last->kind = CCL_TOK_EQ;
123  else
124  last->kind = CCL_TOK_REL;
125  break;
126  default:
127  --cp;
128  --last->len;
129 
130  last->kind = CCL_TOK_TERM;
131  last->name = (const char *) cp;
132  while (*cp && !strchr("(),%!><= \t\n\r", *cp))
133  {
134  if (*cp == '\\' && cp[1])
135  {
136  cp++;
137  ++ last->len;
138  }
139  else if (*cp == '"')
140  {
141  while (*cp)
142  {
143  cp++;
144  ++ last->len;
145  if (*cp == '\\' && cp[1])
146  {
147  cp++;
148  ++ last->len;
149  }
150  else if (*cp == '"')
151  break;
152  }
153  }
154  if (!*cp)
155  break;
156  cp++;
157  ++ last->len;
158  }
159  aliases = ccl_qual_search_special(cclp->bibset, "and");
160  if (!aliases)
161  aliases = cclp->ccl_token_and;
162  if (token_cmp(cclp, aliases, last))
163  last->kind = CCL_TOK_AND;
164 
165  aliases = ccl_qual_search_special(cclp->bibset, "or");
166  if (!aliases)
167  aliases = cclp->ccl_token_or;
168  if (token_cmp(cclp, aliases, last))
169  last->kind = CCL_TOK_OR;
170 
171  aliases = ccl_qual_search_special(cclp->bibset, "not");
172  if (!aliases)
173  aliases = cclp->ccl_token_not;
174  if (token_cmp(cclp, aliases, last))
175  last->kind = CCL_TOK_NOT;
176 
177  aliases = ccl_qual_search_special(cclp->bibset, "set");
178  if (!aliases)
179  aliases = cclp->ccl_token_set;
180 
181  if (token_cmp(cclp, aliases, last))
182  last->kind = CCL_TOK_SET;
183  }
184  }
185  return first;
186 }
187 
188 struct ccl_token *ccl_token_add(struct ccl_token *at)
189 {
190  struct ccl_token *n = (struct ccl_token *)xmalloc(sizeof(*n));
191  ccl_assert(n);
192  n->next = at->next;
193  n->prev = at;
194  at->next = n;
195  if (n->next)
196  n->next->prev = n;
197 
198  n->kind = CCL_TOK_TERM;
199  n->name = 0;
200  n->len = 0;
201  n->ws_prefix_buf = 0;
202  n->ws_prefix_len = 0;
203  return n;
204 }
205 
206 /*
207  * ccl_token_del: delete CCL tokens
208  */
209 void ccl_token_del(struct ccl_token *list)
210 {
211  struct ccl_token *list1;
212 
213  while (list)
214  {
215  list1 = list->next;
216  xfree(list);
217  list = list1;
218  }
219 }
220 
221 static const char **create_ar(const char *v1, const char *v2)
222 {
223  const char **a = (const char **) xmalloc(3 * sizeof(*a));
224  a[0] = xstrdup(v1);
225  if (v2)
226  {
227  a[1] = xstrdup(v2);
228  a[2] = 0;
229  }
230  else
231  a[1] = 0;
232  return a;
233 }
234 
235 static void destroy_ar(const char **a)
236 {
237  if (a)
238  {
239  int i;
240  for (i = 0; a[i]; i++)
241  xfree((char *) a[i]);
242  xfree((char **)a);
243  }
244 }
245 
247 {
248  CCL_parser p = (CCL_parser)xmalloc(sizeof(*p));
249  if (!p)
250  return p;
251  p->look_token = NULL;
252  p->error_code = 0;
253  p->error_pos = NULL;
254  p->bibset = bibset;
255 
256  p->ccl_token_and = create_ar("and", 0);
257  p->ccl_token_or = create_ar("or", 0);
258  p->ccl_token_not = create_ar("not", "andnot");
259  p->ccl_token_set = create_ar("set", 0);
260  p->ccl_case_sensitive = 1;
261 
262  return p;
263 }
264 
266 {
267  if (!p)
268  return;
273  xfree(p);
274 }
275 
276 void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
277 {
278  if (p)
279  p->ccl_case_sensitive = case_sensitivity_flag;
280 }
281 
282 int ccl_parser_get_error(CCL_parser cclp, int *pos)
283 {
284  if (pos && cclp->error_code)
285  *pos = cclp->error_pos - cclp->start_pos;
286  return cclp->error_code;
287 }
288 
289 /*
290  * Local variables:
291  * c-basic-offset: 4
292  * c-file-style: "Stroustrup"
293  * indent-tabs-mode: nil
294  * End:
295  * vim: shiftwidth=4 tabstop=8 expandtab
296  */
297 
#define CCL_TOK_OR
Definition: cclp.h:43
void ccl_token_del(struct ccl_token *list)
Definition: ccltoken.c:209
const char ** ccl_token_set
Definition: cclp.h:82
#define CCL_TOK_PROX
Definition: cclp.h:38
const char * ws_prefix_buf
Definition: cclp.h:56
const char * start_pos
Definition: cclp.h:68
size_t ws_prefix_len
Definition: cclp.h:57
struct ccl_token * look_token
Definition: cclp.h:63
#define CCL_TOK_LP
Definition: cclp.h:39
Definition: cql.c:96
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition: xmalloc.h:55
#define CCL_TOK_EQ
Definition: cclp.h:37
static const char ** create_ar(const char *v1, const char *v2)
Definition: ccltoken.c:221
const char ** ccl_token_not
Definition: cclp.h:80
#define CCL_TOK_AND
Definition: cclp.h:42
struct ccl_token * next
Definition: cclp.h:54
CCL_parser ccl_parser_create(CCL_bibset bibset)
Definition: ccltoken.c:246
struct ccl_token * ccl_parser_tokenize(CCL_parser cclp, const char *command)
Definition: ccltoken.c:58
Definition: cclp.h:50
CCL_bibset bibset
Definition: cclp.h:73
int error_code
Definition: cclp.h:66
#define CCL_TOK_NOT
Definition: cclp.h:44
int ccl_case_sensitive
Definition: cclp.h:84
#define CCL_TOK_RP
Definition: cclp.h:40
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
char kind
Definition: cclp.h:51
int ccl_parser_get_error(CCL_parser cclp, int *pos)
Definition: ccltoken.c:282
struct ccl_token * ccl_token_add(struct ccl_token *at)
Definition: ccltoken.c:188
void ccl_parser_set_case(CCL_parser p, int case_sensitivity_flag)
Definition: ccltoken.c:276
#define CCL_TOK_EOL
Definition: cclp.h:34
static void destroy_ar(const char **a)
Definition: ccltoken.c:235
#define CCL_TOK_SET
Definition: cclp.h:45
size_t len
Definition: cclp.h:52
const char ** ccl_token_or
Definition: cclp.h:78
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
#define CCL_TOK_TERM
Definition: cclp.h:35
static int token_cmp(CCL_parser cclp, const char **kw, struct ccl_token *token)
Definition: ccltoken.c:25
Header for YAZ iconv interface.
int ccl_memicmp(const char *s1, const char *s2, size_t n)
Definition: cclstr.c:45
CCL header with private definitions.
struct ccl_token * prev
Definition: cclp.h:55
#define yaz_isdigit(x)
Definition: yaz-iconv.h:86
#define CCL_TOK_COMMA
Definition: cclp.h:41
#define ccl_assert(x)
Definition: ccl.h:314
const char ** ccl_token_and
Definition: cclp.h:76
const char * error_pos
Definition: cclp.h:70
void ccl_parser_destroy(CCL_parser p)
Definition: ccltoken.c:265
struct ccl_parser * CCL_parser
CCL parser.
Definition: ccl.h:149
const char * name
Definition: cclp.h:53
#define CCL_TOK_REL
Definition: cclp.h:36
const char ** ccl_qual_search_special(CCL_bibset b, const char *name)
Definition: cclqual.c:401