YAZ  4.2.57
tokenizer.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12 
13 #include <assert.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <yaz/log.h>
17 #include <yaz/wrbuf.h>
18 #include <yaz/tokenizer.h>
19 
20 struct yaz_tok_parse {
23  int look;
24 
28 };
29 
30 struct yaz_tok_cfg {
31  int ref_count;
32  char *comment;
33  char *white_space;
37 };
38 
39 void yaz_tok_cfg_single_tokens(yaz_tok_cfg_t t, const char *simple)
40 {
41  xfree(t->single_tokens);
42  t->single_tokens = xstrdup(simple);
43 }
44 
46 {
47  yaz_tok_cfg_t t = (yaz_tok_cfg_t) xmalloc(sizeof(*t));
48  t->white_space = xstrdup(" \t\r\n");
49  t->single_tokens = xstrdup("");
50  t->quote_tokens_begin = xstrdup("\"");
51  t->quote_tokens_end = xstrdup("\"");
52  t->comment = xstrdup("#");
53  t->ref_count = 1;
54  return t;
55 }
56 
58 {
59  t->ref_count--;
60  if (t->ref_count == 0)
61  {
62  xfree(t->white_space);
63  xfree(t->single_tokens);
66  xfree(t->comment);
67  xfree(t);
68  }
69 }
70 
71 static int read_buf(void **vp)
72 {
73  const char *cp = *(const char **) vp;
74  int ch = *cp;
75  if (ch)
76  {
77  cp++;
78  *(const char **)vp = cp;
79  }
80  return ch;
81 }
82 
84 {
85  return yaz_tok_parse_create(t, read_buf, (void *) buf);
86 }
87 
88 static int get_byte(yaz_tok_parse_t tp)
89 {
90  int ch = tp->unget_byte;
91  assert(tp->get_byte_func);
92  if (ch)
93  tp->unget_byte = 0;
94  else
95  ch = tp->get_byte_func(&tp->get_byte_data);
96  return ch;
97 }
98 
99 static void unget_byte(yaz_tok_parse_t tp, int ch)
100 {
101  tp->unget_byte = ch;
102 }
103 
106  void *vp)
107 {
108  yaz_tok_parse_t tp = (yaz_tok_parse_t) xmalloc(sizeof(*tp));
109 
110  tp->cfg = t;
111  tp->cfg->ref_count++;
112  tp->get_byte_func = h;
113  tp->get_byte_data = vp;
114 
115  tp->look = YAZ_TOK_ERROR;
116  tp->unget_byte = 0;
117 
118  tp->wr_string = wrbuf_alloc();
119  return tp;
120 }
121 
122 
124 {
127  xfree(tp);
128 }
129 
131 {
132  yaz_tok_cfg_t t = tp->cfg;
133  const char *cp;
134  int ch = get_byte(tp);
135 
136  /* skip white space */
137  while (ch && strchr(t->white_space, ch))
138  ch = get_byte(tp);
139  if (!ch)
140  ch = YAZ_TOK_EOF;
141  else if (strchr(t->comment, ch))
142  ch = YAZ_TOK_EOF;
143  else if ((cp = strchr(t->single_tokens, ch)))
144  ch = *cp; /* single token match */
145  else if ((cp = strchr(t->quote_tokens_begin, ch)))
146  { /* quoted string */
147  int end_ch = t->quote_tokens_end[cp - t->quote_tokens_begin];
148  ch = get_byte(tp);
149  wrbuf_rewind(tp->wr_string);
150  while (ch && ch != end_ch)
151  wrbuf_putc(tp->wr_string, ch);
152  if (!ch)
153  ch = YAZ_TOK_ERROR;
154  else
155  ch = YAZ_TOK_QSTRING;
156  }
157  else
158  { /* unquoted string */
159  wrbuf_rewind(tp->wr_string);
160  while (ch && !strchr(t->white_space, ch)
161  && !strchr(t->single_tokens, ch)
162  && !strchr(t->comment, ch))
163  {
164  wrbuf_putc(tp->wr_string, ch);
165  ch = get_byte(tp);
166  }
167  unget_byte(tp, ch);
168  ch = YAZ_TOK_STRING;
169  }
170  tp->look = ch;
171  return ch;
172 }
173 
175 {
176  return wrbuf_cstr(tp->wr_string);
177 }
178 
179 /*
180  * Local variables:
181  * c-basic-offset: 4
182  * c-file-style: "Stroustrup"
183  * indent-tabs-mode: nil
184  * End:
185  * vim: shiftwidth=4 tabstop=8 expandtab
186  */
187