40 #include <unicode/utypes.h>
118 size_t *start,
size_t *len);
127 size_t *start,
size_t *len,
struct icu_iter * yaz_icu_iter_t
ICU tokenizer iterator type (opaque)
const char * icu_iter_get_display(yaz_icu_iter_t iter)
returns ICU display string
void icu_chain_destroy(yaz_icu_chain_t chain)
destroys ICU chain
struct icu_chain * yaz_icu_chain_t
opaque ICU chain
int icu_iter_get_token_number(yaz_icu_iter_t iter)
returns ICU token count for iterator
void icu_iter_destroy(yaz_icu_iter_t iter)
destroy ICU tokenizer iterator
const char * icu_iter_get_sortkey(yaz_icu_iter_t iter)
returns ICU sortkey string
const char * icu_iter_get_norm(yaz_icu_iter_t iter)
returns ICU normalized token
void icu_chain_get_org_info(yaz_icu_chain_t chain, size_t *start, size_t *len)
returns token as it relates to original text (legacy)
void icu_chain_get_org_info2(yaz_icu_chain_t chain, size_t *start, size_t *len, const char **cstr)
returns token as it relates to original text (2nd version)
yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node, int sort, UErrorCode *status)
constructs ICU chain from XML specification
int icu_chain_assign_cstr(yaz_icu_chain_t chain, const char *src8cstr, UErrorCode *status)
pass string to ICU for parsing/tokenization/etc
void icu_iter_first(yaz_icu_iter_t iter, const char *src8cstr)
starts iteration over string
const char * icu_chain_token_norm(yaz_icu_chain_t chain)
returns normalized token of last token processed
int icu_chain_token_number(yaz_icu_chain_t chain)
returns token number of last token processed
yaz_icu_iter_t icu_iter_create(struct icu_chain *chain)
create ICU tokenizer iterator from chain
int icu_iter_next(yaz_icu_iter_t iter)
iterates over one token
void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len)
returns ICU original token start (offset) and length (legacy)
int icu_chain_next_token(yaz_icu_chain_t chain, UErrorCode *status)
returns one token (if any)
void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr)
returns ICU original token start (offset) and length
const char * icu_chain_token_display(yaz_icu_chain_t chain)
returns display token of last token processed
const char * icu_chain_token_sortkey(yaz_icu_chain_t chain)
returns sortkey token of last token processed
Define xmlNode and xmlDocPtr if Libxml2 is present.
Header with fundamental macros.