31 #include <yaz/snprintf.h>
63 #define STATE_HASH 199
64 #define POSET_CHUNK 100
119 struct Tnode *t1, *t2, *tn;
121 if (!(t1 =
expr_2 (parse_info)))
126 if (!(t2 =
expr_2 (parse_info)))
140 struct Tnode *t1, *t2, *tn;
142 if (!(t1 =
expr_3 (parse_info)))
151 if (!(t2 =
expr_3 (parse_info)))
165 struct Tnode *t1, *tn;
167 if (!(t1 =
expr_4 (parse_info)))
206 if (!(t1 =
expr_1 (parse_info)))
249 int start_anchor_flag = 0;
250 struct Tnode *t1, *t2, *tn;
253 parse_info->
expr_ptr = * (
const unsigned char **) s;
259 start_anchor_flag = 1;
266 t1->
u.
ch[1] = t1->
u.
ch[0] =
'\n';
276 t2->
u.
ch[1] = t2->
u.
ch[0] =
'\n';
291 t2->
u.
ch[0] = -(++parse_info->
rule);
292 t2->
u.
ch[1] = start_anchor_flag ? 0 : -(parse_info->
rule);
311 *s = (
const char *) parse_info->
expr_ptr;
319 else if (*parse_info->
expr_ptr !=
'\\')
360 int i, ch0, esc0, cc = 0;
365 if (!esc0 && ch0 ==
'^')
377 if (!esc0 && ch0 ==
']')
379 if (!esc0 && ch0 ==
'-')
394 if (parse_info->
cmap)
398 const char *mcp = mapfrom;
408 if (!esc1 && ch1 ==
'-')
413 if (!esc1 && ch1 ==
']')
422 else if (parse_info->
cmap)
426 const char *mcp = mapfrom;
428 mapto = (*parse_info->
cmap) (parse_info->
cmap_data, &mcp, 1);
432 for (i = ch0; ++i <= ch1;)
453 const char *cp0 = (
const char *) (parse_info->
expr_ptr-1);
454 int i = 0, len = strlen(cp0);
456 if (cp0[0] == 1 && cp0[1])
459 parse_info->
look_ch = ((
unsigned char *) cp0)[1];
462 if (!parse_info->
cmap)
465 mapto = (*parse_info->
cmap) (parse_info->
cmap_data, &cp0, len);
468 parse_info->
expr_ptr = (
const unsigned char *) cp0;
469 parse_info->
look_ch = ((
unsigned char **) mapto)[i][0];
470 yaz_log (YLOG_DEBUG,
"map from %c to %d", parse_info->
expr_ptr[-1], parse_info->
look_ch);
478 if (parse_info->
look_ch ==
'\"')
486 else if (parse_info->
look_ch ==
'[')
491 for (cc = parse_info->
charMap; *cc; cc += 2)
492 if (*cc == (
int) (parse_info->
look_ch))
512 if (c < 32 || c >= 127)
525 yaz_snprintf(s + 1,
sizeof(s) - 1,
"x%02x", c);
555 for (t = parse_info->
start; (t1 = t);)
574 parse_info->
start = tnew;
577 parse_info->
end = tnew;
599 tn0->
u.
ch[1] = ch1-1;
615 tn1->
u.
ch[1] = ch1-1;
730 printf (
"#%d (n#%d)", -n->
u.
ch[0], -n->
u.
ch[1]);
731 else if (n->
u.
ch[1] > n->
u.
ch[0])
735 if (n->
u.
ch[1] > n->
u.
ch[0]+1)
746 printf (
"\n nullable : %c\n", n->
nullable ?
'1' :
'0');
747 printf (
" firstpos :");
749 printf (
" lastpos :");
760 for (i = parse_info->
position+1; --i >= 0; fa++)
772 struct Tnode **posar;
778 posar = parse_info->
posar;
786 poset = parse_info->
poset;
792 for (tran_set = dfa_from->
set; tran_set; tran_set = tran_set->
next)
793 if ((c = posar[tran_set->
value]->
u.
ch[0]) >= 0 && c <= max_char)
794 *pos_i++ = tran_set->
value;
799 c = posar[tran_set->
value]->
u.
ch[1];
807 for (char_1 = 0; char_1 <= max_char; char_1++)
810 for (pos_i =
pos; (i = *pos_i) != -1; ++pos_i)
811 if (posar[i]->
u.ch[1] >= char_1
812 && (c=posar[i]->
u.
ch[0]) < char_0)
820 if (char_0 > max_char)
826 for (pos_i =
pos; (i = *pos_i) != -1; ++pos_i)
828 if ((c=posar[i]->
u.ch[0]) > char_0 && c <= char_1)
830 else if ((c=posar[i]->
u.ch[1]) >= char_0 && c < char_1)
832 if (posar[i]->
u.ch[1] >= char_0 && posar[i]->
u.
ch[0] <= char_0)
833 tran_set =
union_DFASet (poset, tran_set, followpos[i]);
850 int prev_no, i, c, no;
852 for (no=0; no < dfas->
no; ++no)
855 assert (s->
no == no);
856 printf (
"DFA state %d", no);
867 if (prev_no != tran->
to)
871 printf (
" goto %d on [", tran->
to);
874 for (c = tran->
ch[0]; c <= tran->
ch[1]; c++)
887 printf (
"%d/%d tree nodes used, %ld bytes each\n",
890 printf (
"%ld/%ld character sets, %ld bytes each\n",
891 i/k, j/k, (
long) k*
sizeof(
BSetWord));
893 printf (
"%ld/%ld poset items, %d bytes each\n", i, j, k);
894 printf (
"%d DFA states\n", dfas->
no);
901 printf (
"\nfollowsets:\n");
902 for (i=1; i <= parse_info->
position; i++)
908 if (posar[i]->
u.ch[0] < 0)
909 printf (
"#%d", -posar[i]->
u.ch[0]);
910 else if (posar[i]->
u.ch[1] > posar[i]->
u.
ch[0])
914 if (posar[i]->
u.ch[1] > posar[i]->
u.
ch[0]+1)
947 for (cp =
cmap; *cp; cp += 2)
949 size = cp -
cmap + 1;
966 for (cc = dfa->
charMap; *cc; cc += 2)
969 while ((cc[0] = cc[2]))
985 for (cc = dfa->
charMap; *cc; cc += 2)
1008 static int thompson_chars[] =
1034 parse_info->
rule = 0;
1035 parse_info->
root = NULL;
1044 parse_info->
start = parse_info->
end = NULL;
1047 parse_info->
cmap = NULL;
1054 assert (parse_info);
1067 assert (poset_chunk > 10);
1073 assert (parse_info->
root);
1110 const char **(*cmap)(
void *vp,
const char **from,
int len))
1130 do_parse (parse_info, pattern, &top);
1162 if ((*dfap)->parse_info)
1164 if ((*dfap)->state_info)
BSetHandle * mk_BSetHandle(int size, int chunk)
int trav_BSet(BSetHandle *sh, BSet src, unsigned member)
void add_BSet(BSetHandle *sh, BSet dst, unsigned member)
void res_BSet(BSetHandle *sh, BSet dst)
BSet mk_BSet(BSetHandle **shp)
void rm_BSetHandle(BSetHandle **shp)
int inf_BSetHandle(BSetHandle *sh, long *used, long *alloc)
int travi_BSet(BSetHandle *sh, BSet src, unsigned member)
void com_BSet(BSetHandle *sh, BSet dst)
int dfa_get_last_rule(struct DFA *dfa)
static void del_followpos(struct DFA_parse *parse_info)
static void init_followpos(struct DFA_parse *parse_info)
static void add_follow(struct DFA_parse *parse_info, DFASet lastpos, DFASet firstpos)
static void term_Tnode(struct DFA_parse *parse_info)
void dfa_parse_cmap_add(struct DFA *d, int from, int to)
void dfa_parse_cmap_thompson(struct DFA *d)
void dfa_parse_cmap_del(struct DFA *d, int from)
static void mk_dfa_tran(struct DFA_parse *parse_info, struct DFA_states *dfas)
static void pr_tran(struct DFA_parse *parse_info, struct DFA_states *dfas)
static void lex(struct DFA_parse *parse_info)
static int lex_sub(struct DFA_parse *parse_info)
void dfa_anyset_includes_nl(struct DFA *dfa)
static int read_charset(struct DFA_parse *parse_info)
static struct Tnode * mk_Tnode_cset(struct DFA_parse *parse_info, BSet charset)
static struct DFA_parse * dfa_parse_init(void)
int dfa_parse(struct DFA *dfa, const char **pattern)
static const char * str_char(unsigned c)
void dfa_mkstate(struct DFA *dfa)
void dfa_parse_cmap_new(struct DFA *d, const int *cmap)
static void del_pos(struct DFA_parse *parse_info)
static struct Tnode * expr_2(struct DFA_parse *parse_info)
static int nextchar_set(struct DFA_parse *parse_info, int *esc)
static void init_pos(struct DFA_parse *parse_info)
static struct Tnode * expr_1(struct DFA_parse *parse_info)
static void out_char(int c)
static struct DFA_states * mk_dfas(struct DFA_parse *dfap, int poset_chunk)
static int nextchar(struct DFA_parse *parse_info, int *esc)
static void pr_verbose(struct DFA_parse *parse_info, struct DFA_states *dfas)
static struct Tnode * expr_4(struct DFA_parse *parse_info)
void dfa_parse_cmap_clean(struct DFA *d)
static void do_parse(struct DFA_parse *parse_info, const char **s, struct Tnode **tnp)
static struct Tnode * expr_3(struct DFA_parse *parse_info)
static int map_l_char(struct DFA_parse *parse_info)
static void dfa_trav(struct DFA_parse *parse_info, struct Tnode *n)
void dfa_delete(struct DFA **dfap)
static struct Tnode * mk_Tnode(struct DFA_parse *parse_info)
void dfa_set_cmap(struct DFA *dfa, void *vp, const char **(*cmap)(void *vp, const char **from, int len))
struct DFA * dfa_init(void)
static void pr_followpos(struct DFA_parse *parse_info)
static void rm_dfa_parse(struct DFA_parse **dfap)
void sort_DFA_states(struct DFA_states *dfas)
void add_DFA_tran(struct DFA_states *, struct DFA_state *, int, int, int)
int rm_DFA_states(struct DFA_states **dfasp)
struct DFA_state * get_DFA_state(struct DFA_states *dfas)
int add_DFA_state(struct DFA_states *dfas, DFASet *s, struct DFA_state **sp)
int init_DFA_states(struct DFA_states **dfasp, DFASetType st, int hash)
DFASetType mk_DFASetType(int chunk)
DFASet add_DFASet(DFASetType st, DFASet s, int value)
DFASet cp_DFASet(DFASetType st, DFASet s)
void pr_DFASet(DFASetType st, DFASet s)
DFASet mk_DFASet(DFASetType st)
DFASet merge_DFASet(DFASetType st, DFASet s1, DFASet s2)
DFASet union_DFASet(DFASetType st, DFASet s1, DFASet s2)
DFASet rm_DFASet(DFASetType st, DFASet s)
int inf_DFASetType(DFASetType st, long *used, long *allocated)
DFASetType rm_DFASetType(DFASetType st)
void * imalloc(size_t size)
struct DFASetElement_ * next
const char **(* cmap)(void *vp, const char **from, int len)
const unsigned char * expr_ptr
struct DFA_state ** sortarray
struct DFA_states * state_info
struct DFA_state ** states
struct DFA_parse * parse_info