YAZ
4.2.57
Main Page
Data Structures
Files
File List
Globals
src
ccl_stop_words.c
Go to the documentation of this file.
1
/* This file is part of the YAZ toolkit.
2
* Copyright (C) 1995-2013 Index Data
3
* See the file LICENSE for details.
4
*/
9
#if HAVE_CONFIG_H
10
#include <
config.h
>
11
#endif
12
13
#include <stdio.h>
14
#include <string.h>
15
#include <
yaz/ccl.h
>
16
#include <
yaz/nmem.h
>
17
18
struct
ccl_stop_info
{
19
char
*
qualname
;
20
char
*
term
;
21
struct
ccl_stop_info
*
next
;
22
};
23
24
struct
ccl_stop_words
{
25
char
*
blank_chars
;
26
NMEM
nmem
;
/* memory for removed items */
27
struct
ccl_stop_info
*
removed_items
;
28
};
29
30
static
void
append_removed_item
(
ccl_stop_words_t
csw,
31
const
char
*qname,
32
const
char
*t,
size_t
len)
33
{
34
struct
ccl_stop_info
*csi = (
struct
ccl_stop_info
*)
35
nmem_malloc
(csw->
nmem
,
sizeof
(*csi));
36
struct
ccl_stop_info
**csip = &csw->
removed_items
;
37
if
(qname)
38
csi->
qualname
=
nmem_strdup
(csw->
nmem
, qname);
39
else
40
csi->
qualname
= 0;
41
42
csi->
term
= (
char
*)
nmem_malloc
(csw->
nmem
, len+1);
43
memcpy(csi->
term
, t, len);
44
csi->
term
[len] =
'\0'
;
45
csi->
next
= 0;
46
47
while
(*csip)
48
csip = &(*csip)->
next
;
49
50
*csip = csi;
51
}
52
53
ccl_stop_words_t
ccl_stop_words_create
(
void
)
54
{
55
NMEM
nmem =
nmem_create
();
56
ccl_stop_words_t
csw = (
ccl_stop_words_t
)
xmalloc
(
sizeof
(*csw));
57
csw->
nmem
= nmem;
58
csw->
removed_items
= 0;
59
csw->
blank_chars
=
xstrdup
(
" \r\n\t"
);
60
return
csw;
61
}
62
63
void
ccl_stop_words_destroy
(
ccl_stop_words_t
csw)
64
{
65
if
(csw)
66
{
67
nmem_destroy
(csw->
nmem
);
68
xfree
(csw->
blank_chars
);
69
xfree
(csw);
70
}
71
}
72
73
struct
ccl_rpn_node
*
ccl_remove_stop_r
(
ccl_stop_words_t
csw,
74
CCL_bibset
bibset,
75
struct
ccl_rpn_node
*
p
)
76
{
77
struct
ccl_rpn_node
*left, *right;
78
switch
(p->
kind
)
79
{
80
case
CCL_RPN_AND
:
81
case
CCL_RPN_OR
:
82
case
CCL_RPN_NOT
:
83
case
CCL_RPN_PROX
:
84
left =
ccl_remove_stop_r
(csw, bibset, p->
u
.
p
[0]);
85
right =
ccl_remove_stop_r
(csw, bibset, p->
u
.
p
[1]);
86
if
(!left || !right)
87
{
88
/* we must delete our binary node and return child (if any) */
89
p->
u
.
p
[0] = 0;
90
p->
u
.
p
[1] = 0;
91
ccl_rpn_delete
(p);
92
if
(left)
93
return
left;
94
else
95
return
right;
96
}
97
break
;
98
case
CCL_RPN_SET
:
99
break
;
100
case
CCL_RPN_TERM
:
101
if
(p->
u
.
t
.term)
102
{
103
int
found = 1;
104
while
(found)
105
{
106
char
*cp = p->
u
.
t
.term;
107
found = 0;
108
while
(1)
109
{
110
while
(*cp && strchr(csw->
blank_chars
, *cp))
111
cp++;
112
if
(!*cp)
113
break
;
114
else
115
{
116
char
*cp0 = cp;
117
while
(*cp && !strchr(csw->
blank_chars
, *cp))
118
cp++;
119
if
(cp != cp0)
120
{
121
size_t
len = cp - cp0;
122
if
(
ccl_search_stop
(bibset, p->
u
.
t
.qual,
123
cp0, len))
124
{
125
append_removed_item
(csw, p->
u
.
t
.qual,
126
cp0, len);
127
while
(*cp && strchr(csw->
blank_chars
, *cp))
128
cp++;
129
memmove(cp0, cp, strlen(cp)+1);
130
found = 1;
131
break
;
132
}
133
}
134
}
135
}
136
}
137
}
138
/* chop right blanks .. and see if term it gets empty */
139
if
(p->
u
.
t
.term && csw->
removed_items
)
140
{
141
char
*cp = p->
u
.
t
.term + strlen(p->
u
.
t
.term);
142
while
(1)
143
{
144
if
(cp == p->
u
.
t
.term)
145
{
146
/* term is empty / blank */
147
ccl_rpn_delete
(p);
148
return
0;
149
}
150
if
(!strchr(csw->
blank_chars
, cp[-1]))
151
break
;
152
/* chop right */
153
cp[-1] = 0;
154
--cp;
155
}
156
}
157
break
;
158
}
159
return
p
;
160
}
161
162
int
ccl_stop_words_tree
(
ccl_stop_words_t
csw,
163
CCL_bibset
bibset,
struct
ccl_rpn_node
**
t
)
164
{
165
struct
ccl_rpn_node
*r;
166
167
/* remove list items */
168
nmem_reset
(csw->
nmem
);
169
csw->
removed_items
= 0;
170
171
r =
ccl_remove_stop_r
(csw, bibset, *t);
172
*t = r;
173
if
(csw->
removed_items
)
174
return
1;
175
return
0;
176
}
177
178
int
ccl_stop_words_info
(
ccl_stop_words_t
csw,
int
idx,
179
const
char
**qualname,
const
char
**
term
)
180
{
181
struct
ccl_stop_info
*csi = csw->
removed_items
;
182
int
i = 0;
183
while
(csi && i < idx)
184
{
185
csi = csi->
next
;
186
i++;
187
}
188
if
(csi)
189
{
190
*qualname = csi->
qualname
;
191
*term = csi->
term
;
192
return
1;
193
}
194
return
0;
195
}
196
197
/*
198
* Local variables:
199
* c-basic-offset: 4
200
* c-file-style: "Stroustrup"
201
* indent-tabs-mode: nil
202
* End:
203
* vim: shiftwidth=4 tabstop=8 expandtab
204
*/
205
Generated on Wed May 15 2013 14:55:30 for YAZ by
1.8.1.2