61 #define STATE_HASH 199
62 #define POSET_CHUNK 100
117 struct Tnode *t1, *t2, *tn;
119 if (!(t1 =
expr_2 (parse_info)))
124 if (!(t2 =
expr_2 (parse_info)))
138 struct Tnode *t1, *t2, *tn;
140 if (!(t1 =
expr_3 (parse_info)))
149 if (!(t2 =
expr_3 (parse_info)))
163 struct Tnode *t1, *tn;
165 if (!(t1 =
expr_4 (parse_info)))
204 if (!(t1 =
expr_1 (parse_info)))
247 int start_anchor_flag = 0;
248 struct Tnode *t1, *t2, *tn;
251 parse_info->
expr_ptr = * (
const unsigned char **) s;
257 start_anchor_flag = 1;
264 t1->
u.
ch[1] = t1->
u.
ch[0] =
'\n';
274 t2->
u.
ch[1] = t2->
u.
ch[0] =
'\n';
289 t2->
u.
ch[0] = -(++parse_info->
rule);
290 t2->
u.
ch[1] = start_anchor_flag ? 0 : -(parse_info->
rule);
309 *s = (
const char *) parse_info->
expr_ptr;
317 else if (*parse_info->
expr_ptr !=
'\\')
358 int i, ch0, esc0, cc = 0;
363 if (!esc0 && ch0 ==
'^')
375 if (!esc0 && ch0 ==
']')
377 if (!esc0 && ch0 ==
'-')
392 if (parse_info->
cmap)
396 const char *mcp = mapfrom;
406 if (!esc1 && ch1 ==
'-')
411 if (!esc1 && ch1 ==
']')
420 else if (parse_info->
cmap)
424 const char *mcp = mapfrom;
426 mapto = (*parse_info->
cmap) (parse_info->
cmap_data, &mcp, 1);
430 for (i = ch0; ++i <= ch1;)
451 const char *cp0 = (
const char *) (parse_info->
expr_ptr-1);
452 int i = 0, len = strlen(cp0);
454 if (cp0[0] == 1 && cp0[1])
457 parse_info->
look_ch = ((
unsigned char *) cp0)[1];
460 if (!parse_info->
cmap)
463 mapto = (*parse_info->
cmap) (parse_info->
cmap_data, &cp0, len);
466 parse_info->
expr_ptr = (
const unsigned char *) cp0;
467 parse_info->
look_ch = ((
unsigned char **) mapto)[i][0];
468 yaz_log (YLOG_DEBUG,
"map from %c to %d", parse_info->
expr_ptr[-1], parse_info->
look_ch);
476 if (parse_info->
look_ch ==
'\"')
484 else if (parse_info->
look_ch ==
'[')
489 for (cc = parse_info->
charMap; *cc; cc += 2)
490 if (*cc == (
int) (parse_info->
look_ch))
510 if (c < 32 || c >= 127)
523 sprintf (s+1,
"x%02x", c);
553 for (t = parse_info->
start; (t1 = t);)
572 parse_info->
start = tnew;
575 parse_info->
end = tnew;
597 tn0->
u.
ch[1] = ch1-1;
613 tn1->
u.
ch[1] = ch1-1;
644 lastpos = lastpos->
next;
728 printf (
"#%d (n#%d)", -n->
u.
ch[0], -n->
u.
ch[1]);
729 else if (n->
u.
ch[1] > n->
u.
ch[0])
733 if (n->
u.
ch[1] > n->
u.
ch[0]+1)
744 printf (
"\n nullable : %c\n", n->
nullable ?
'1' :
'0');
745 printf (
" firstpos :");
747 printf (
" lastpos :");
758 for (i = parse_info->
position+1; --i >= 0; fa++)
770 struct Tnode **posar;
776 posar = parse_info->
posar;
784 poset = parse_info->
poset;
790 for (tran_set = dfa_from->
set; tran_set; tran_set = tran_set->
next)
791 if ((c = posar[tran_set->
value]->
u.
ch[0]) >= 0 && c <= max_char)
792 *pos_i++ = tran_set->
value;
797 c = posar[tran_set->
value]->
u.
ch[1];
805 for (char_1 = 0; char_1 <= max_char; char_1++)
808 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
809 if (posar[i]->
u.ch[1] >= char_1
810 && (c=posar[i]->
u.
ch[0]) < char_0)
818 if (char_0 > max_char)
824 for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
826 if ((c=posar[i]->
u.ch[0]) > char_0 && c <= char_1)
828 else if ((c=posar[i]->
u.ch[1]) >= char_0 && c < char_1)
830 if (posar[i]->
u.ch[1] >= char_0 && posar[i]->
u.
ch[0] <= char_0)
831 tran_set =
union_DFASet (poset, tran_set, followpos[i]);
848 int prev_no, i, c, no;
850 for (no=0; no < dfas->
no; ++no)
853 assert (s->
no == no);
854 printf (
"DFA state %d", no);
865 if (prev_no != tran->
to)
869 printf (
" goto %d on [", tran->
to);
872 for (c = tran->
ch[0]; c <= tran->
ch[1]; c++)
885 printf (
"%d/%d tree nodes used, %ld bytes each\n",
888 printf (
"%ld/%ld character sets, %ld bytes each\n",
889 i/k, j/k, (
long) k*
sizeof(
BSetWord));
891 printf (
"%ld/%ld poset items, %d bytes each\n", i, j, k);
892 printf (
"%d DFA states\n", dfas->
no);
899 printf (
"\nfollowsets:\n");
900 for (i=1; i <= parse_info->
position; i++)
906 if (posar[i]->
u.ch[0] < 0)
907 printf (
"#%d", -posar[i]->
u.ch[0]);
908 else if (posar[i]->
u.ch[1] > posar[i]->
u.
ch[0])
912 if (posar[i]->
u.ch[1] > posar[i]->
u.
ch[0]+1)
945 for (cp = cmap; *cp; cp += 2)
947 size = cp - cmap + 1;
964 for (cc = dfa->
charMap; *cc; cc += 2)
967 while ((cc[0] = cc[2]))
983 for (cc = dfa->
charMap; *cc; cc += 2)
1006 static int thompson_chars[] =
1032 parse_info->
rule = 0;
1033 parse_info->
root = NULL;
1042 parse_info->
start = parse_info->
end = NULL;
1045 parse_info->
cmap = NULL;
1052 assert (parse_info);
1065 assert (poset_chunk > 10);
1071 assert (parse_info->
root);
1108 const char **(*cmap)(
void *vp,
const char **from,
int len))
1128 do_parse (parse_info, pattern, &top);
1160 if ((*dfap)->parse_info)
1162 if ((*dfap)->state_info)