YAZ  5.34.0
ccl_stop_words.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12 
13 #include <stdio.h>
14 #include <string.h>
15 #include <yaz/ccl.h>
16 #include <yaz/nmem.h>
17 
18 struct ccl_stop_info {
19  char *qualname;
20  char *term;
22 };
23 
25  char *blank_chars;
26  NMEM nmem; /* memory for removed items */
28 };
29 
31  const char *qname,
32  const char *t, size_t len)
33 {
34  struct ccl_stop_info *csi = (struct ccl_stop_info *)
35  nmem_malloc(csw->nmem, sizeof(*csi));
36  struct ccl_stop_info **csip = &csw->removed_items;
37  if (qname)
38  csi->qualname = nmem_strdup(csw->nmem, qname);
39  else
40  csi->qualname = 0;
41 
42  csi->term = (char *) nmem_malloc(csw->nmem, len+1);
43  memcpy(csi->term, t, len);
44  csi->term[len] = '\0';
45  csi->next = 0;
46 
47  while (*csip)
48  csip = &(*csip)->next;
49 
50  *csip = csi;
51 }
52 
54 {
55  NMEM nmem = nmem_create();
56  ccl_stop_words_t csw = (ccl_stop_words_t) xmalloc(sizeof(*csw));
57  csw->nmem = nmem;
58  csw->removed_items = 0;
59  csw->blank_chars = xstrdup(" \r\n\t");
60  return csw;
61 }
62 
64 {
65  if (csw)
66  {
67  nmem_destroy(csw->nmem);
68  xfree(csw->blank_chars);
69  xfree(csw);
70  }
71 }
72 
74  CCL_bibset bibset,
75  struct ccl_rpn_node *p)
76 {
77  struct ccl_rpn_node *left, *right;
78  switch (p->kind)
79  {
80  case CCL_RPN_AND:
81  case CCL_RPN_OR:
82  case CCL_RPN_NOT:
83  case CCL_RPN_PROX:
84  left = ccl_remove_stop_r(csw, bibset, p->u.p[0]);
85  right = ccl_remove_stop_r(csw, bibset, p->u.p[1]);
86  if (!left || !right)
87  {
88  /* we must delete our binary node and return child (if any) */
89  p->u.p[0] = 0;
90  p->u.p[1] = 0;
92  if (left)
93  return left;
94  else
95  return right;
96  }
97  break;
98  case CCL_RPN_SET:
99  break;
100  case CCL_RPN_TERM:
101  if (p->u.t.term)
102  {
103  int found = 1;
104  while (found)
105  {
106  char *cp = p->u.t.term;
107  found = 0;
108  while (1)
109  {
110  while (*cp && strchr(csw->blank_chars, *cp))
111  cp++;
112  if (!*cp)
113  break;
114  else
115  {
116  char *cp0 = cp;
117  while (*cp && !strchr(csw->blank_chars, *cp))
118  cp++;
119  if (cp != cp0)
120  {
121  size_t len = cp - cp0;
122  if (ccl_search_stop(bibset, p->u.t.qual,
123  cp0, len))
124  {
125  append_removed_item(csw, p->u.t.qual,
126  cp0, len);
127  while (*cp && strchr(csw->blank_chars, *cp))
128  cp++;
129  memmove(cp0, cp, strlen(cp)+1);
130  found = 1;
131  break;
132  }
133  }
134  }
135  }
136  }
137  }
138  /* chop right blanks .. and see if term it gets empty */
139  if (p->u.t.term && csw->removed_items)
140  {
141  char *cp = p->u.t.term + strlen(p->u.t.term);
142  while (1)
143  {
144  if (cp == p->u.t.term)
145  {
146  /* term is empty / blank */
147  ccl_rpn_delete(p);
148  return 0;
149  }
150  if (!strchr(csw->blank_chars, cp[-1]))
151  break;
152  /* chop right */
153  cp[-1] = 0;
154  --cp;
155  }
156  }
157  break;
158  }
159  return p;
160 }
161 
163  CCL_bibset bibset, struct ccl_rpn_node **t)
164 {
165  struct ccl_rpn_node *r;
166 
167  /* remove list items */
168  nmem_reset(csw->nmem);
169  csw->removed_items = 0;
170 
171  r = ccl_remove_stop_r(csw, bibset, *t);
172  *t = r;
173  if (csw->removed_items)
174  return 1;
175  return 0;
176 }
177 
179  const char **qualname, const char **term)
180 {
181  struct ccl_stop_info *csi = csw->removed_items;
182  int i = 0;
183  while (csi && i < idx)
184  {
185  csi = csi->next;
186  i++;
187  }
188  if (csi)
189  {
190  *qualname = csi->qualname;
191  *term = csi->term;
192  return 1;
193  }
194  return 0;
195 }
196 
197 /*
198  * Local variables:
199  * c-basic-offset: 4
200  * c-file-style: "Stroustrup"
201  * indent-tabs-mode: nil
202  * End:
203  * vim: shiftwidth=4 tabstop=8 expandtab
204  */
205 
Header with public definitions for CCL.
struct ccl_stop_words * ccl_stop_words_t
stop words handle (pimpl)
Definition: ccl.h:288
@ CCL_RPN_AND
Definition: ccl.h:119
@ CCL_RPN_TERM
Definition: ccl.h:122
@ CCL_RPN_PROX
Definition: ccl.h:124
@ CCL_RPN_NOT
Definition: ccl.h:121
@ CCL_RPN_SET
Definition: ccl.h:123
@ CCL_RPN_OR
Definition: ccl.h:120
int ccl_stop_words_tree(ccl_stop_words_t csw, CCL_bibset bibset, struct ccl_rpn_node **t)
removes stop words from RPN tree
void ccl_stop_words_destroy(ccl_stop_words_t csw)
destroys stop words handle
ccl_stop_words_t ccl_stop_words_create(void)
creates stop words handle
static void append_removed_item(ccl_stop_words_t csw, const char *qname, const char *t, size_t len)
int ccl_stop_words_info(ccl_stop_words_t csw, int idx, const char **qualname, const char **term)
returns information about removed "stop" words
struct ccl_rpn_node * ccl_remove_stop_r(ccl_stop_words_t csw, CCL_bibset bibset, struct ccl_rpn_node *p)
void ccl_rpn_delete(struct ccl_rpn_node *rpn)
Definition: cclfind.c:141
int ccl_search_stop(CCL_bibset bibset, const char *qname, const char *src_str, size_t src_len)
Definition: cclqual.c:413
void nmem_reset(NMEM n)
releases memory associaged with an NMEM handle
Definition: nmem.c:129
NMEM nmem_create(void)
returns new NMEM handle
Definition: nmem.c:181
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition: nmem.c:145
void nmem_destroy(NMEM n)
destroys NMEM handle and memory associated with it
Definition: nmem.c:204
Header for Nibble Memory functions.
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition: nmemsdup.c:18
RPN tree structure node.
Definition: ccl.h:128
struct ccl_rpn_node * p[3]
Boolean including proximity 0=left, 1=right, 2=prox parms.
Definition: ccl.h:133
struct ccl_rpn_node::@8::@9 t
Attributes + Term.
enum ccl_rpn_kind kind
node type, one of CCL_RPN_AND, CCL_RPN_OR, etc
Definition: ccl.h:130
union ccl_rpn_node::@8 u
char * term
Definition: ccl.h:136
struct ccl_stop_info * next
struct ccl_stop_info * removed_items
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition: xmalloc.h:55
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49