YAZ  5.23.1
Typedefs | Functions
icu.h File Reference

ICU utilities. More...

#include <yaz/yconfig.h>
#include <yaz/xmltypes.h>
#include <unicode/utypes.h>

Go to the source code of this file.

Typedefs

typedef struct icu_chain * yaz_icu_chain_t
 opaque ICU chain More...
 
typedef struct icu_iter * yaz_icu_iter_t
 ICU tokenizer iterator type (opaque) More...
 

Functions

void icu_chain_destroy (yaz_icu_chain_t chain)
 destroys ICU chain More...
 
yaz_icu_chain_t icu_chain_xml_config (const xmlNode *xml_node, int sort, UErrorCode *status)
 constructs ICU chain from XML specification More...
 
int icu_chain_assign_cstr (yaz_icu_chain_t chain, const char *src8cstr, UErrorCode *status)
 pass string to ICU for parsing/tokenization/etc More...
 
int icu_chain_next_token (yaz_icu_chain_t chain, UErrorCode *status)
 returns one token (if any) More...
 
int icu_chain_token_number (yaz_icu_chain_t chain)
 returns token number of last token processed More...
 
const char * icu_chain_token_display (yaz_icu_chain_t chain)
 returns display token of last token processed More...
 
const char * icu_chain_token_norm (yaz_icu_chain_t chain)
 returns normalized token of last token processed More...
 
const char * icu_chain_token_sortkey (yaz_icu_chain_t chain)
 returns sortkey token of last token processed More...
 
void icu_chain_get_org_info (yaz_icu_chain_t chain, size_t *start, size_t *len)
 returns token as it relates to original text (legacy) More...
 
void icu_chain_get_org_info2 (yaz_icu_chain_t chain, size_t *start, size_t *len, const char **cstr)
 returns token as it relates to original text (2nd version) More...
 
yaz_icu_iter_t icu_iter_create (struct icu_chain *chain)
 create ICU tokenizer iterator from chain More...
 
void icu_iter_first (yaz_icu_iter_t iter, const char *src8cstr)
 starts iteration over string More...
 
int icu_iter_next (yaz_icu_iter_t iter)
 iterates over one token More...
 
void icu_iter_destroy (yaz_icu_iter_t iter)
 destroy ICU tokenizer iterator More...
 
const char * icu_iter_get_norm (yaz_icu_iter_t iter)
 returns ICU normalized token More...
 
const char * icu_iter_get_sortkey (yaz_icu_iter_t iter)
 returns ICU sortkey string More...
 
const char * icu_iter_get_display (yaz_icu_iter_t iter)
 returns ICU display string More...
 
int icu_iter_get_token_number (yaz_icu_iter_t iter)
 returns ICU token count for iterator More...
 
void icu_iter_get_org_info (yaz_icu_iter_t iter, size_t *start, size_t *len)
 returns ICU original token start (offset) and length (legacy) More...
 
void icu_iter_get_org_info2 (yaz_icu_iter_t iter, size_t *start, size_t *len, const char **cstr)
 returns ICU original token start (offset) and length More...
 

Detailed Description

ICU utilities.

Definition in file icu.h.

Typedef Documentation

typedef struct icu_chain* yaz_icu_chain_t

opaque ICU chain

Definition at line 45 of file icu.h.

typedef struct icu_iter* yaz_icu_iter_t

ICU tokenizer iterator type (opaque)

Definition at line 131 of file icu.h.

Function Documentation

int icu_chain_assign_cstr ( yaz_icu_chain_t  chain,
const char *  src8cstr,
UErrorCode *  status 
)

pass string to ICU for parsing/tokenization/etc

Parameters
chainICU chain to be used for parsing
src8cstrinput C string (null-terminated)
statusmay include ICU error on failure
Return values
0failure
1success
void icu_chain_destroy ( yaz_icu_chain_t  chain)

destroys ICU chain

void icu_chain_get_org_info ( yaz_icu_chain_t  chain,
size_t *  start,
size_t *  len 
)

returns token as it relates to original text (legacy)

Parameters
chainICU chain
startoffset in original text
lennumber of uchars in original text
void icu_chain_get_org_info2 ( yaz_icu_chain_t  chain,
size_t *  start,
size_t *  len,
const char **  cstr 
)

returns token as it relates to original text (2nd version)

Parameters
chainICU chain
startoffset in original text
lennumber of uchars in original text
cstrif not-null, holds original string in there
int icu_chain_next_token ( yaz_icu_chain_t  chain,
UErrorCode *  status 
)

returns one token (if any)

Parameters
chainICU chain
statusmay include ICU error on failure
Return values
0error or end-of-tokens (no more tokens)
>0token number (1, 3, 3, ..)

This function tries to move to "next" token in assigned C-string .. Or returns 0 if no more is to be found

const char* icu_chain_token_display ( yaz_icu_chain_t  chain)

returns display token of last token processed

Parameters
chainICU chain
Returns
display token string (C string) This function returns display string for last token returned by icu_chain_next_token.
const char* icu_chain_token_norm ( yaz_icu_chain_t  chain)

returns normalized token of last token processed

Parameters
chainICU chain
Returns
normalized token string (C string) This function returns normalized string for last token returned by icu_chain_next_token.
int icu_chain_token_number ( yaz_icu_chain_t  chain)

returns token number of last token processed

chain ICU chain

Returns
token number (numbered from 1)
const char* icu_chain_token_sortkey ( yaz_icu_chain_t  chain)

returns sortkey token of last token processed

Parameters
chainICU chain
Returns
sortkey token string (C string) This function returns sortkey string for last token returned by icu_chain_next_token.
yaz_icu_chain_t icu_chain_xml_config ( const xmlNode *  xml_node,
int  sort,
UErrorCode *  status 
)

constructs ICU chain from XML specification

Parameters
xml_nodeicu_chain XML node - with attribute locale in it
sort1 if ICU chain is to deal with sort keys; 0 otherwise
statusMay include ICU error code on failure
Returns
chain ptr or NULL on failure in which case status may hold info
yaz_icu_iter_t icu_iter_create ( struct icu_chain *  chain)

create ICU tokenizer iterator from chain

Parameters
chainICU chain
Returns
ICU iterator
void icu_iter_destroy ( yaz_icu_iter_t  iter)

destroy ICU tokenizer iterator

Parameters
iterICU tokenizer iterator
void icu_iter_first ( yaz_icu_iter_t  iter,
const char *  src8cstr 
)

starts iteration over string

Parameters
iterICU tokenizer iterator
src8cstrinput string (0-terminated)

Call icu_iter_next to iterate over each token.

const char* icu_iter_get_display ( yaz_icu_iter_t  iter)

returns ICU display string

Parameters
iterICU tokenizer iterator
Returns
string (0-terminated)
const char* icu_iter_get_norm ( yaz_icu_iter_t  iter)

returns ICU normalized token

Parameters
iterICU tokenizer iterator
Returns
string (0-terminated)
void icu_iter_get_org_info ( yaz_icu_iter_t  iter,
size_t *  start,
size_t *  len 
)

returns ICU original token start (offset) and length (legacy)

Parameters
iterICU tokenizer iterator
startoffset of last token in original text
lenlength of last token in original text
void icu_iter_get_org_info2 ( yaz_icu_iter_t  iter,
size_t *  start,
size_t *  len,
const char **  cstr 
)

returns ICU original token start (offset) and length

Parameters
iterICU tokenizer iterator
startoffset of last token in original text
lenlength of last token in original text
cstrif non-null: original string
const char* icu_iter_get_sortkey ( yaz_icu_iter_t  iter)

returns ICU sortkey string

Parameters
iterICU tokenizer iterator
Returns
string (0-terminated)
int icu_iter_get_token_number ( yaz_icu_iter_t  iter)

returns ICU token count for iterator

Parameters
iterICU tokenizer iterator
Returns
token count (1, 2, 3...)
int icu_iter_next ( yaz_icu_iter_t  iter)

iterates over one token

Parameters
iterICU tokenizer iterator
Return values
0no more tokens (EOF)
1got one token (use icu_iter_get..-functions)