YAZ
5.34.0
|
ICU utilities. More...
Go to the source code of this file.
Typedefs | |
typedef struct icu_chain * | yaz_icu_chain_t |
opaque ICU chain More... | |
typedef struct icu_iter * | yaz_icu_iter_t |
ICU tokenizer iterator type (opaque) More... | |
Functions | |
void | icu_chain_destroy (yaz_icu_chain_t chain) |
destroys ICU chain More... | |
yaz_icu_chain_t | icu_chain_xml_config (const xmlNode *xml_node, int sort, UErrorCode *status) |
constructs ICU chain from XML specification More... | |
int | icu_chain_assign_cstr (yaz_icu_chain_t chain, const char *src8cstr, UErrorCode *status) |
pass string to ICU for parsing/tokenization/etc More... | |
int | icu_chain_next_token (yaz_icu_chain_t chain, UErrorCode *status) |
returns one token (if any) More... | |
int | icu_chain_token_number (yaz_icu_chain_t chain) |
returns token number of last token processed More... | |
const char * | icu_chain_token_display (yaz_icu_chain_t chain) |
returns display token of last token processed More... | |
const char * | icu_chain_token_norm (yaz_icu_chain_t chain) |
returns normalized token of last token processed More... | |
const char * | icu_chain_token_sortkey (yaz_icu_chain_t chain) |
returns sortkey token of last token processed More... | |
void | icu_chain_get_org_info (yaz_icu_chain_t chain, size_t *start, size_t *len) |
returns token as it relates to original text (legacy) More... | |
void | icu_chain_get_org_info2 (yaz_icu_chain_t chain, size_t *start, size_t *len, const char **cstr) |
returns token as it relates to original text (2nd version) More... | |
yaz_icu_iter_t | icu_iter_create (struct icu_chain *chain) |
create ICU tokenizer iterator from chain More... | |
void | icu_iter_first (yaz_icu_iter_t iter, const char *src8cstr) |
starts iteration over string More... | |
int | icu_iter_next (yaz_icu_iter_t iter) |
iterates over one token More... | |
void | icu_iter_destroy (yaz_icu_iter_t iter) |
destroy ICU tokenizer iterator More... | |
const char * | icu_iter_get_norm (yaz_icu_iter_t iter) |
returns ICU normalized token More... | |
const char * | icu_iter_get_sortkey (yaz_icu_iter_t iter) |
returns ICU sortkey string More... | |
const char * | icu_iter_get_display (yaz_icu_iter_t iter) |
returns ICU display string More... | |
int | icu_iter_get_token_number (yaz_icu_iter_t iter) |
returns ICU token count for iterator More... | |
void | icu_iter_get_org_info (yaz_icu_iter_t iter, size_t *start, size_t *len) |
returns ICU original token start (offset) and length (legacy) More... | |
void | icu_iter_get_org_info2 (yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr) |
returns ICU original token start (offset) and length More... | |
ICU utilities.
Definition in file icu.h.
typedef struct icu_chain* yaz_icu_chain_t |
typedef struct icu_iter* yaz_icu_iter_t |
int icu_chain_assign_cstr | ( | yaz_icu_chain_t | chain, |
const char * | src8cstr, | ||
UErrorCode * | status | ||
) |
pass string to ICU for parsing/tokenization/etc
chain | ICU chain to be used for parsing |
src8cstr | input C string (null-terminated) |
status | may include ICU error on failure |
0 | failure |
1 | success |
void icu_chain_destroy | ( | yaz_icu_chain_t | chain | ) |
destroys ICU chain
void icu_chain_get_org_info | ( | yaz_icu_chain_t | chain, |
size_t * | start, | ||
size_t * | len | ||
) |
returns token as it relates to original text (legacy)
chain | ICU chain |
start | offset in original text |
len | number of uchars in original text |
void icu_chain_get_org_info2 | ( | yaz_icu_chain_t | chain, |
size_t * | start, | ||
size_t * | len, | ||
const char ** | cstr | ||
) |
returns token as it relates to original text (2nd version)
chain | ICU chain |
start | offset in original text |
len | number of uchars in original text |
cstr | if not-null, holds original string in there |
int icu_chain_next_token | ( | yaz_icu_chain_t | chain, |
UErrorCode * | status | ||
) |
returns one token (if any)
chain | ICU chain |
status | may include ICU error on failure |
0 | error or end-of-tokens (no more tokens) |
>0 | token number (1, 3, 3, ..) |
This function tries to move to "next" token in assigned C-string .. Or returns 0 if no more is to be found
const char* icu_chain_token_display | ( | yaz_icu_chain_t | chain | ) |
returns display token of last token processed
chain | ICU chain |
const char* icu_chain_token_norm | ( | yaz_icu_chain_t | chain | ) |
returns normalized token of last token processed
chain | ICU chain |
int icu_chain_token_number | ( | yaz_icu_chain_t | chain | ) |
returns token number of last token processed
chain ICU chain
const char* icu_chain_token_sortkey | ( | yaz_icu_chain_t | chain | ) |
returns sortkey token of last token processed
chain | ICU chain |
yaz_icu_chain_t icu_chain_xml_config | ( | const xmlNode * | xml_node, |
int | sort, | ||
UErrorCode * | status | ||
) |
constructs ICU chain from XML specification
xml_node | icu_chain XML node - with attribute locale in it |
sort | 1 if ICU chain is to deal with sort keys; 0 otherwise |
status | May include ICU error code on failure |
yaz_icu_iter_t icu_iter_create | ( | struct icu_chain * | chain | ) |
create ICU tokenizer iterator from chain
chain | ICU chain |
void icu_iter_destroy | ( | yaz_icu_iter_t | iter | ) |
destroy ICU tokenizer iterator
iter | ICU tokenizer iterator |
void icu_iter_first | ( | yaz_icu_iter_t | iter, |
const char * | src8cstr | ||
) |
starts iteration over string
iter | ICU tokenizer iterator |
src8cstr | input string (0-terminated) |
Call icu_iter_next to iterate over each token.
const char* icu_iter_get_display | ( | yaz_icu_iter_t | iter | ) |
returns ICU display string
iter | ICU tokenizer iterator |
const char* icu_iter_get_norm | ( | yaz_icu_iter_t | iter | ) |
returns ICU normalized token
iter | ICU tokenizer iterator |
void icu_iter_get_org_info | ( | yaz_icu_iter_t | iter, |
size_t * | start, | ||
size_t * | len | ||
) |
returns ICU original token start (offset) and length (legacy)
iter | ICU tokenizer iterator |
start | offset of last token in original text |
len | length of last token in original text |
void icu_iter_get_org_info2 | ( | yaz_icu_iter_t | iter, |
size_t * | start, | ||
size_t * | len, | ||
const char ** | cstr | ||
) |
returns ICU original token start (offset) and length
iter | ICU tokenizer iterator |
start | offset of last token in original text |
len | length of last token in original text |
cstr | if non-null: original string |
const char* icu_iter_get_sortkey | ( | yaz_icu_iter_t | iter | ) |
returns ICU sortkey string
iter | ICU tokenizer iterator |
int icu_iter_get_token_number | ( | yaz_icu_iter_t | iter | ) |
returns ICU token count for iterator
iter | ICU tokenizer iterator |
int icu_iter_next | ( | yaz_icu_iter_t | iter | ) |
iterates over one token
iter | ICU tokenizer iterator |
0 | no more tokens (EOF) |
1 | got one token (use icu_iter_get..-functions) |