28 #include <yaz/xmalloc.h>
29 #include <yaz/wrbuf.h>
31 #include <yaz/yaz-version.h>
32 #include <yaz/xml_get.h>
49 static pp2_charset_t pp2_charset_create_icu(
struct icu_chain *icu_chn);
58 size_t *start,
size_t *len);
60 struct icu_chain * icu_chn;
70 size_t *start,
size_t *len);
77 size_t *start,
size_t *len);
150 for (pce = pft->
list; pce; pce = pce->
next)
151 if (!strcmp(default_id, pce->
name))
156 pce = xmalloc(
sizeof(*pce));
157 pce->
name = xstrdup(default_id);
170 xmlNode *xml_node,
const char *
id)
177 if (strcmp((
const char *) xml_node->name,
"icu_chain"))
179 yaz_log(YLOG_WARN,
"Wrapper element <%s> deprecated", xml_node->name);
180 yaz_log(YLOG_LOG,
"Use <icu_chain id=\"%s\">.. only", xml_node->name);
181 xml_node = xml_node->children;
182 while (xml_node && xml_node->type != XML_ELEMENT_NODE)
183 xml_node = xml_node->next;
187 yaz_log(YLOG_FATAL,
"Missing icu_chain element");
195 id = yaz_xml_get_prop(xml_node,
"id");
198 yaz_log(YLOG_WARN,
"Missing id for icu_chain");
215 UErrorCode status = U_ZERO_ERROR;
216 struct icu_chain *chain = 0;
217 chain = icu_chain_xml_config(xml_node, 1, &status);
218 if (!chain || U_FAILURE(status))
220 yaz_log(YLOG_FATAL,
"Could not parse ICU chain config:\n"
221 "<%s>\n ... \n</%s>",
222 xml_node->name, xml_node->name);
225 return pp2_charset_create_icu(chain);
227 yaz_log(YLOG_FATAL,
"Error: ICU support requested with element:\n"
228 "<%s>\n ... \n</%s>",
229 xml_node->name, xml_node->name);
231 "But no ICU support is compiled into the YAZ library.");
258 pp2_charset_t pp2_charset_create_icu(
struct icu_chain *icu_chn)
263 pct->icu_chn = icu_chn;
264 pct->icu_sts = U_ZERO_ERROR;
277 icu_chain_destroy(pct->icu_chn);
286 for (pce = pft->
list; pce; pce = pce->
next)
287 if (!strcmp(
id, pce->
name))
307 prt->iter = icu_iter_create(
pct->icu_chn);
315 const char *buf,
int skip_article)
321 char *pout = firstword;
322 char articles[] =
"the den der die des an a ";
324 for (; *p && *p !=
' ' && pout - firstword < (
sizeof(firstword)-2); p++)
325 *pout++ = tolower(*(
unsigned char *)p);
328 if (strstr(articles, firstword))
341 icu_iter_first(prt->iter, buf);
351 icu_iter_destroy(prt->iter);
382 #define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) : -1)
388 const char *cp = prt->
cp;
393 while (*cp && (c =
raw_char(tolower(*(
const unsigned char *)cp))) < 0)
405 while (*cp && (c =
raw_char(tolower(*cp))) >= 0)
421 char *tmp = xstrdup(prt->
last_cp);
443 size_t *start,
size_t *len)
451 const char *cp = prt->
cp;
464 if (icu_iter_next(prt->iter))
466 return icu_iter_get_norm(prt->iter);
473 return icu_iter_get_sortkey(prt->iter);
478 return icu_iter_get_display(prt->iter);
483 icu_iter_get_org_info(prt->iter, start, len);
const char * pp2_charset_token_next(pp2_charset_token_t prt)
static const char * pp2_charset_token_null(pp2_charset_token_t prt)
int pp2_charset_fact_define(pp2_charset_fact_t pft, xmlNode *xml_node, const char *id)
pp2_charset_fact_t pp2_charset_fact_create(void)
pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft, const char *id)
void pp2_charset_fact_incref(pp2_charset_fact_t pft)
void pp2_get_org(pp2_charset_token_t prt, size_t *start, size_t *len)
static const char * pp2_charset_token_a_to_z(pp2_charset_token_t prt)
static pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct)
static const char * pp2_get_sort_ascii(pp2_charset_token_t prt)
const char * pp2_get_display(pp2_charset_token_t prt)
static const char * pp2_get_display_ascii(pp2_charset_token_t prt)
static pp2_charset_t pp2_charset_create(void)
static void pp2_charset_destroy(pp2_charset_t pct)
void pp2_charset_fact_destroy(pp2_charset_fact_t pft)
void pp2_charset_token_first(pp2_charset_token_t prt, const char *buf, int skip_article)
void pp2_charset_token_destroy(pp2_charset_token_t prt)
static void pp2_get_org_ascii(pp2_charset_token_t prt, size_t *start, size_t *len)
static pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node)
static pp2_charset_t pp2_charset_create_a_to_z(void)
struct pp2_charset_s * pp2_charset_t
static int pp2_charset_fact_add(pp2_charset_fact_t pft, pp2_charset_t pct, const char *default_id)
const char * pp2_get_sort(pp2_charset_token_t prt)
Pazpar2 Character set facilities.
char * normalize7bit_mergekey(char *buf)
struct pp2_charset_entry * next
struct pp2_charset_entry * list
const char *(* token_next_handler)(pp2_charset_token_t prt)
const char *(* get_sort_handler)(pp2_charset_token_t prt)
const char *(* get_display_handler)(pp2_charset_token_t prt)
void(* get_org_handler)(pp2_charset_token_t ptr, size_t *start, size_t *len)