YAZ
4.2.57
Main Page
Data Structures
Files
File List
Globals
src
tokenizer.c
Go to the documentation of this file.
1
/* This file is part of the YAZ toolkit.
2
* Copyright (C) 1995-2013 Index Data
3
* See the file LICENSE for details.
4
*/
9
#if HAVE_CONFIG_H
10
#include <
config.h
>
11
#endif
12
13
#include <assert.h>
14
#include <stdio.h>
15
#include <string.h>
16
#include <
yaz/log.h
>
17
#include <
yaz/wrbuf.h
>
18
#include <
yaz/tokenizer.h
>
19
20
struct
yaz_tok_parse
{
21
int
unget_byte
;
22
WRBUF
wr_string
;
23
int
look
;
24
25
yaz_tok_cfg_t
cfg
;
26
yaz_tok_get_byte_t
get_byte_func
;
27
void
*
get_byte_data
;
28
};
29
30
struct
yaz_tok_cfg
{
31
int
ref_count
;
32
char
*
comment
;
33
char
*
white_space
;
34
char
*
single_tokens
;
35
char
*
quote_tokens_begin
;
36
char
*
quote_tokens_end
;
37
};
38
39
void
yaz_tok_cfg_single_tokens
(
yaz_tok_cfg_t
t,
const
char
*simple)
40
{
41
xfree
(t->
single_tokens
);
42
t->
single_tokens
=
xstrdup
(simple);
43
}
44
45
yaz_tok_cfg_t
yaz_tok_cfg_create
(
void
)
46
{
47
yaz_tok_cfg_t
t = (
yaz_tok_cfg_t
)
xmalloc
(
sizeof
(*t));
48
t->
white_space
=
xstrdup
(
" \t\r\n"
);
49
t->
single_tokens
=
xstrdup
(
""
);
50
t->
quote_tokens_begin
=
xstrdup
(
"\""
);
51
t->
quote_tokens_end
=
xstrdup
(
"\""
);
52
t->
comment
=
xstrdup
(
"#"
);
53
t->
ref_count
= 1;
54
return
t;
55
}
56
57
void
yaz_tok_cfg_destroy
(
yaz_tok_cfg_t
t)
58
{
59
t->
ref_count
--;
60
if
(t->
ref_count
== 0)
61
{
62
xfree
(t->
white_space
);
63
xfree
(t->
single_tokens
);
64
xfree
(t->
quote_tokens_begin
);
65
xfree
(t->
quote_tokens_end
);
66
xfree
(t->
comment
);
67
xfree
(t);
68
}
69
}
70
71
static
int
read_buf
(
void
**vp)
72
{
73
const
char
*cp = *(
const
char
**) vp;
74
int
ch = *cp;
75
if
(ch)
76
{
77
cp++;
78
*(
const
char
**)vp = cp;
79
}
80
return
ch;
81
}
82
83
yaz_tok_parse_t
yaz_tok_parse_buf
(
yaz_tok_cfg_t
t,
const
char
*buf)
84
{
85
return
yaz_tok_parse_create
(t,
read_buf
, (
void
*) buf);
86
}
87
88
static
int
get_byte
(
yaz_tok_parse_t
tp)
89
{
90
int
ch = tp->
unget_byte
;
91
assert(tp->
get_byte_func
);
92
if
(ch)
93
tp->
unget_byte
= 0;
94
else
95
ch = tp->
get_byte_func
(&tp->
get_byte_data
);
96
return
ch;
97
}
98
99
static
void
unget_byte
(
yaz_tok_parse_t
tp,
int
ch)
100
{
101
tp->
unget_byte
= ch;
102
}
103
104
yaz_tok_parse_t
yaz_tok_parse_create
(
yaz_tok_cfg_t
t,
105
yaz_tok_get_byte_t
h,
106
void
*vp)
107
{
108
yaz_tok_parse_t
tp = (
yaz_tok_parse_t
)
xmalloc
(
sizeof
(*tp));
109
110
tp->
cfg
= t;
111
tp->
cfg
->
ref_count
++;
112
tp->
get_byte_func
= h;
113
tp->
get_byte_data
= vp;
114
115
tp->
look
=
YAZ_TOK_ERROR
;
116
tp->
unget_byte
= 0;
117
118
tp->
wr_string
=
wrbuf_alloc
();
119
return
tp;
120
}
121
122
123
void
yaz_tok_parse_destroy
(
yaz_tok_parse_t
tp)
124
{
125
yaz_tok_cfg_destroy
(tp->
cfg
);
126
wrbuf_destroy
(tp->
wr_string
);
127
xfree
(tp);
128
}
129
130
int
yaz_tok_move
(
yaz_tok_parse_t
tp)
131
{
132
yaz_tok_cfg_t
t = tp->
cfg
;
133
const
char
*cp;
134
int
ch =
get_byte
(tp);
135
136
/* skip white space */
137
while
(ch && strchr(t->
white_space
, ch))
138
ch =
get_byte
(tp);
139
if
(!ch)
140
ch =
YAZ_TOK_EOF
;
141
else
if
(strchr(t->
comment
, ch))
142
ch =
YAZ_TOK_EOF
;
143
else
if
((cp = strchr(t->
single_tokens
, ch)))
144
ch = *cp;
/* single token match */
145
else
if
((cp = strchr(t->
quote_tokens_begin
, ch)))
146
{
/* quoted string */
147
int
end_ch = t->
quote_tokens_end
[cp - t->
quote_tokens_begin
];
148
ch =
get_byte
(tp);
149
wrbuf_rewind
(tp->
wr_string
);
150
while
(ch && ch != end_ch)
151
wrbuf_putc
(tp->
wr_string
, ch);
152
if
(!ch)
153
ch =
YAZ_TOK_ERROR
;
154
else
155
ch =
YAZ_TOK_QSTRING
;
156
}
157
else
158
{
/* unquoted string */
159
wrbuf_rewind
(tp->
wr_string
);
160
while
(ch && !strchr(t->
white_space
, ch)
161
&& !strchr(t->
single_tokens
, ch)
162
&& !strchr(t->
comment
, ch))
163
{
164
wrbuf_putc
(tp->
wr_string
, ch);
165
ch =
get_byte
(tp);
166
}
167
unget_byte
(tp, ch);
168
ch =
YAZ_TOK_STRING
;
169
}
170
tp->
look
= ch;
171
return
ch;
172
}
173
174
const
char
*
yaz_tok_parse_string
(
yaz_tok_parse_t
tp)
175
{
176
return
wrbuf_cstr
(tp->
wr_string
);
177
}
178
179
/*
180
* Local variables:
181
* c-basic-offset: 4
182
* c-file-style: "Stroustrup"
183
* indent-tabs-mode: nil
184
* End:
185
* vim: shiftwidth=4 tabstop=8 expandtab
186
*/
187
Generated on Wed May 15 2013 14:55:31 for YAZ by
1.8.1.2