YAZ  5.34.0
icu.h
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data.
3  * All rights reserved.
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of Index Data nor the names of its contributors
13  * may be used to endorse or promote products derived from this
14  * software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
33 #ifndef YAZ_ICU_H
34 #define YAZ_ICU_H
35 
36 #include <yaz/yconfig.h>
37 
38 #include <yaz/xmltypes.h>
39 
40 #include <unicode/utypes.h>
41 
43 
45 typedef struct icu_chain *yaz_icu_chain_t;
46 
48 YAZ_EXPORT void icu_chain_destroy(yaz_icu_chain_t chain);
49 
56 YAZ_EXPORT yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node,
57  int sort,
58  UErrorCode *status);
67  const char *src8cstr,
68  UErrorCode *status);
69 
80  UErrorCode *status);
81 
87 
94 YAZ_EXPORT const char * icu_chain_token_display(yaz_icu_chain_t chain);
95 
102 YAZ_EXPORT const char * icu_chain_token_norm(yaz_icu_chain_t chain);
103 
110 YAZ_EXPORT const char * icu_chain_token_sortkey(yaz_icu_chain_t chain);
111 
118  size_t *start, size_t *len);
119 
127  size_t *start, size_t *len,
128  const char **cstr);
129 
131 typedef struct icu_iter *yaz_icu_iter_t;
132 
137 YAZ_EXPORT
138 yaz_icu_iter_t icu_iter_create(struct icu_chain *chain);
139 
146 YAZ_EXPORT
147 void icu_iter_first(yaz_icu_iter_t iter, const char *src8cstr);
148 
154 YAZ_EXPORT
156 
160 YAZ_EXPORT
162 
167 YAZ_EXPORT
169 
174 YAZ_EXPORT
176 
181 YAZ_EXPORT
183 
188 YAZ_EXPORT
190 
196 YAZ_EXPORT
197 void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len);
198 
205 YAZ_EXPORT
206 void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len,
207  const char **cstr);
208 
210 
211 #endif /* YAZ_ICU_H */
212 
213 /*
214  * Local variables:
215  * c-basic-offset: 4
216  * c-file-style: "Stroustrup"
217  * indent-tabs-mode: nil
218  * End:
219  * vim: shiftwidth=4 tabstop=8 expandtab
220  */
221 
struct icu_iter * yaz_icu_iter_t
ICU tokenizer iterator type (opaque)
Definition: icu.h:131
const char * icu_iter_get_display(yaz_icu_iter_t iter)
returns ICU display string
void icu_chain_destroy(yaz_icu_chain_t chain)
destroys ICU chain
struct icu_chain * yaz_icu_chain_t
opaque ICU chain
Definition: icu.h:45
int icu_iter_get_token_number(yaz_icu_iter_t iter)
returns ICU token count for iterator
void icu_iter_destroy(yaz_icu_iter_t iter)
destroy ICU tokenizer iterator
const char * icu_iter_get_sortkey(yaz_icu_iter_t iter)
returns ICU sortkey string
const char * icu_iter_get_norm(yaz_icu_iter_t iter)
returns ICU normalized token
void icu_chain_get_org_info(yaz_icu_chain_t chain, size_t *start, size_t *len)
returns token as it relates to original text (legacy)
void icu_chain_get_org_info2(yaz_icu_chain_t chain, size_t *start, size_t *len, const char **cstr)
returns token as it relates to original text (2nd version)
yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node, int sort, UErrorCode *status)
constructs ICU chain from XML specification
int icu_chain_assign_cstr(yaz_icu_chain_t chain, const char *src8cstr, UErrorCode *status)
pass string to ICU for parsing/tokenization/etc
void icu_iter_first(yaz_icu_iter_t iter, const char *src8cstr)
starts iteration over string
const char * icu_chain_token_norm(yaz_icu_chain_t chain)
returns normalized token of last token processed
int icu_chain_token_number(yaz_icu_chain_t chain)
returns token number of last token processed
yaz_icu_iter_t icu_iter_create(struct icu_chain *chain)
create ICU tokenizer iterator from chain
int icu_iter_next(yaz_icu_iter_t iter)
iterates over one token
void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len)
returns ICU original token start (offset) and length (legacy)
int icu_chain_next_token(yaz_icu_chain_t chain, UErrorCode *status)
returns one token (if any)
void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr)
returns ICU original token start (offset) and length
const char * icu_chain_token_display(yaz_icu_chain_t chain)
returns display token of last token processed
const char * icu_chain_token_sortkey(yaz_icu_chain_t chain)
returns sortkey token of last token processed
Define xmlNode and xmlDocPtr if Libxml2 is present.
Header with fundamental macros.
#define YAZ_BEGIN_CDECL
Definition: yconfig.h:56
#define YAZ_END_CDECL
Definition: yconfig.h:57