IDZEBRA  2.1.2
mod_grs_regx.c
Go to the documentation of this file.
1 /* This file is part of the Zebra server.
2  Copyright (C) Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <assert.h>
26 #include <string.h>
27 #include <ctype.h>
28 
29 #include <yaz/tpath.h>
30 #include <idzebra/util.h>
31 #include <dfa.h>
32 #include <idzebra/recgrs.h>
33 
34 #if HAVE_TCL_H
35 #include <tcl.h>
36 
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
39 #endif
40 #endif
41 
42 #define REGX_DEBUG 0
43 
44 #define F_WIN_EOF 2000000000
45 #define F_WIN_READ 1
46 
47 #define REGX_EOF 0
48 #define REGX_PATTERN 1
49 #define REGX_BODY 2
50 #define REGX_BEGIN 3
51 #define REGX_END 4
52 #define REGX_CODE 5
53 #define REGX_CONTEXT 6
54 #define REGX_INIT 7
55 
56 struct regxCode {
57  char *str;
58 #if HAVE_TCL_OBJECTS
59  Tcl_Obj *tcl_obj;
60 #endif
61 };
62 
63 struct lexRuleAction {
64  int which;
65  union {
66  struct {
67  struct DFA *dfa; /* REGX_PATTERN */
68  int body;
69  } pattern;
70  struct regxCode *code; /* REGX_CODE */
71  } u;
73 };
74 
75 struct lexRuleInfo {
76  int no;
78 };
79 
80 struct lexRule {
81  struct lexRuleInfo info;
82  struct lexRule *next;
83 };
84 
85 struct lexContext {
86  char *name;
87  struct DFA *dfa;
88  struct lexRule *rules;
90  int ruleNo;
91  int initFlag;
92 
96  struct lexContext *next;
97 };
98 
99 struct lexConcatBuf {
100  int max;
101  char *buf;
102 };
103 
104 struct lexSpec {
105  char *name;
107 
111 
112  int lineNo;
113  NMEM m;
115 #if HAVE_TCL_H
116  Tcl_Interp *tcl_interp;
117 #endif
119  off_t (*f_win_ef)(struct ZebraRecStream *s, off_t *);
120 
121  int f_win_start; /* first byte of buffer is this file offset */
122  int f_win_end; /* last byte of buffer is this offset - 1 */
123  int f_win_size; /* size of buffer */
124  char *f_win_buf; /* buffer itself */
125  int (*f_win_rf)(struct ZebraRecStream *, char *, size_t);
126  off_t (*f_win_sf)(struct ZebraRecStream *, off_t);
127 
129  int maxLevel;
131  int d1_level;
133 
134  int *arg_start;
135  int *arg_end;
136  int arg_no;
137  int ptr;
138 };
139 
140 struct lexSpecs {
141  struct lexSpec *spec;
142  char type[256];
143 };
144 
145 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
146  int *size)
147 {
148  int i, r, off = start_pos - spec->f_win_start;
149 
150  if (off >= 0 && end_pos <= spec->f_win_end)
151  {
152  *size = end_pos - start_pos;
153  return spec->f_win_buf + off;
154  }
155  if (off < 0 || start_pos >= spec->f_win_end)
156  {
157  (*spec->f_win_sf)(spec->stream, start_pos);
158  spec->f_win_start = start_pos;
159 
160  if (!spec->f_win_buf)
161  spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
162  *size = (*spec->f_win_rf)(spec->stream, spec->f_win_buf,
163  spec->f_win_size);
164  spec->f_win_end = spec->f_win_start + *size;
165 
166  if (*size > end_pos - start_pos)
167  *size = end_pos - start_pos;
168  return spec->f_win_buf;
169  }
170  for (i = 0; i<spec->f_win_end - start_pos; i++)
171  spec->f_win_buf[i] = spec->f_win_buf[i + off];
172  r = (*spec->f_win_rf)(spec->stream,
173  spec->f_win_buf + i,
174  spec->f_win_size - i);
175  spec->f_win_start = start_pos;
176  spec->f_win_end += r;
177  *size = i + r;
178  if (*size > end_pos - start_pos)
179  *size = end_pos - start_pos;
180  return spec->f_win_buf;
181 }
182 
183 static int f_win_advance (struct lexSpec *spec, int *pos)
184 {
185  int size;
186  char *buf;
187 
188  if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
189  return spec->f_win_buf[(*pos)++ - spec->f_win_start];
190  if (*pos == F_WIN_EOF)
191  return 0;
192  buf = f_win_get (spec, *pos, *pos+1, &size);
193  if (size == 1)
194  {
195  (*pos)++;
196  return *buf;
197  }
198  *pos = F_WIN_EOF;
199  return 0;
200 }
201 
202 static void regxCodeDel (struct regxCode **pp)
203 {
204  struct regxCode *p = *pp;
205  if (p)
206  {
207 #if HAVE_TCL_OBJECTS
208  if (p->tcl_obj)
209  Tcl_DecrRefCount (p->tcl_obj);
210 #endif
211  xfree (p->str);
212  xfree (p);
213  *pp = NULL;
214  }
215 }
216 
217 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
218 {
219  struct regxCode *p;
220 
221  p = (struct regxCode *) xmalloc (sizeof(*p));
222  p->str = (char *) xmalloc (len+1);
223  memcpy (p->str, buf, len);
224  p->str[len] = '\0';
225 #if HAVE_TCL_OBJECTS
226  p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227  if (p->tcl_obj)
228  Tcl_IncrRefCount (p->tcl_obj);
229 #endif
230  *pp = p;
231 }
232 
233 static struct DFA *lexSpecDFA (void)
234 {
235  struct DFA *dfa;
236 
237  dfa = dfa_init ();
238  dfa_parse_cmap_del (dfa, ' ');
239  dfa_parse_cmap_del (dfa, '\t');
240  dfa_parse_cmap_add (dfa, '/', 0);
241  return dfa;
242 }
243 
244 static void actionListDel (struct lexRuleAction **rap)
245 {
246  struct lexRuleAction *ra1, *ra;
247 
248  for (ra = *rap; ra; ra = ra1)
249  {
250  ra1 = ra->next;
251  switch (ra->which)
252  {
253  case REGX_PATTERN:
254  dfa_delete (&ra->u.pattern.dfa);
255  break;
256  case REGX_CODE:
257  regxCodeDel (&ra->u.code);
258  break;
259  }
260  xfree (ra);
261  }
262  *rap = NULL;
263 }
264 
265 static struct lexContext *lexContextCreate (const char *name)
266 {
267  struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 
269  p->name = xstrdup (name);
270  p->ruleNo = 1;
271  p->initFlag = 0;
272  p->dfa = lexSpecDFA ();
273  p->rules = NULL;
274  p->fastRule = NULL;
275  p->beginActionList = NULL;
276  p->endActionList = NULL;
277  p->initActionList = NULL;
278  p->next = NULL;
279  return p;
280 }
281 
282 static void lexContextDestroy (struct lexContext *p)
283 {
284  struct lexRule *rp, *rp1;
285 
286  dfa_delete (&p->dfa);
287  xfree (p->fastRule);
288  for (rp = p->rules; rp; rp = rp1)
289  {
290  rp1 = rp->next;
292  xfree (rp);
293  }
297  xfree (p->name);
298  xfree (p);
299 }
300 
301 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
302 {
303  struct lexSpec *p;
304  int i;
305 
306  p = (struct lexSpec *) xmalloc (sizeof(*p));
307  p->name = (char *) xmalloc (strlen(name)+1);
308  strcpy (p->name, name);
309 
310 #if HAVE_TCL_H
311  p->tcl_interp = 0;
312 #endif
313  p->dh = dh;
314  p->context = NULL;
315  p->context_stack_size = 100;
316  p->context_stack = (struct lexContext **)
317  xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
318  p->f_win_buf = NULL;
319 
320  p->maxLevel = 128;
321  p->concatBuf = (struct lexConcatBuf *)
322  xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
323  for (i = 0; i < p->maxLevel; i++)
324  {
325  p->concatBuf[i].max = 0;
326  p->concatBuf[i].buf = 0;
327  }
328  p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
329  p->d1_level = 0;
330  return p;
331 }
332 
333 static void lexSpecDestroy (struct lexSpec **pp)
334 {
335  struct lexSpec *p;
336  struct lexContext *lt;
337  int i;
338 
339  assert (pp);
340  p = *pp;
341  if (!p)
342  return ;
343 
344  for (i = 0; i < p->maxLevel; i++)
345  xfree (p->concatBuf[i].buf);
346  xfree (p->concatBuf);
347 
348  lt = p->context;
349  while (lt)
350  {
351  struct lexContext *lt_next = lt->next;
352  lexContextDestroy (lt);
353  lt = lt_next;
354  }
355 #if HAVE_TCL_OBJECTS
356  if (p->tcl_interp)
357  Tcl_DeleteInterp (p->tcl_interp);
358 #endif
359  xfree (p->name);
360  xfree (p->f_win_buf);
361  xfree (p->context_stack);
362  xfree (p->d1_stack);
363  xfree (p);
364  *pp = NULL;
365 }
366 
367 static int readParseToken (const char **cpp, int *len)
368 {
369  const char *cp = *cpp;
370  char cmd[32];
371  int i, level;
372 
373  while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
374  cp++;
375  switch (*cp)
376  {
377  case '\0':
378  return 0;
379  case '/':
380  *cpp = cp+1;
381  return REGX_PATTERN;
382  case '{':
383  *cpp = cp+1;
384  level = 1;
385  while (*++cp)
386  {
387  if (*cp == '{')
388  level++;
389  else if (*cp == '}')
390  {
391  level--;
392  if (level == 0)
393  break;
394  }
395  }
396  *len = cp - *cpp;
397  return REGX_CODE;
398  default:
399  i = 0;
400  while (1)
401  {
402  if (*cp >= 'a' && *cp <= 'z')
403  cmd[i] = *cp;
404  else if (*cp >= 'A' && *cp <= 'Z')
405  cmd[i] = *cp + 'a' - 'A';
406  else
407  break;
408  if (i < (int) sizeof(cmd)-2)
409  i++;
410  cp++;
411  }
412  cmd[i] = '\0';
413  if (i == 0)
414  {
415  yaz_log (YLOG_WARN, "bad character %d %c", *cp, *cp);
416  cp++;
417  while (*cp && *cp != ' ' && *cp != '\t' &&
418  *cp != '\n' && *cp != '\r')
419  cp++;
420  *cpp = cp;
421  return 0;
422  }
423  *cpp = cp;
424  if (!strcmp (cmd, "begin"))
425  return REGX_BEGIN;
426  else if (!strcmp (cmd, "end"))
427  return REGX_END;
428  else if (!strcmp (cmd, "body"))
429  return REGX_BODY;
430  else if (!strcmp (cmd, "context"))
431  return REGX_CONTEXT;
432  else if (!strcmp (cmd, "init"))
433  return REGX_INIT;
434  else
435  {
436  yaz_log (YLOG_WARN, "bad command %s", cmd);
437  return 0;
438  }
439  }
440 }
441 
442 static int actionListMk (struct lexSpec *spec, const char *s,
443  struct lexRuleAction **ap)
444 {
445  int r, tok, len;
446  int bodyMark = 0;
447  const char *s0;
448 
449  while ((tok = readParseToken (&s, &len)))
450  {
451  switch (tok)
452  {
453  case REGX_BODY:
454  bodyMark = 1;
455  continue;
456  case REGX_CODE:
457  *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458  (*ap)->which = tok;
459  regxCodeMk (&(*ap)->u.code, s, len);
460  s += len+1;
461  break;
462  case REGX_PATTERN:
463  *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464  (*ap)->which = tok;
465  (*ap)->u.pattern.body = bodyMark;
466  bodyMark = 0;
467  (*ap)->u.pattern.dfa = lexSpecDFA ();
468  s0 = s;
469  r = dfa_parse ((*ap)->u.pattern.dfa, &s);
470  if (r || *s != '/')
471  {
472  int pos = s - s0;
473  xfree (*ap);
474  *ap = NULL;
475  yaz_log(YLOG_WARN, "regular expression error '%.*s'", pos, s0);
476  return -1;
477  }
478  else
479  {
480  int pos = s - s0;
481  if (debug_dfa_tran)
482  printf("pattern: %.*s\n", pos, s0);
483  dfa_mkstate((*ap)->u.pattern.dfa);
484  s++;
485  }
486  break;
487  case REGX_BEGIN:
488  yaz_log (YLOG_WARN, "cannot use BEGIN here");
489  continue;
490  case REGX_INIT:
491  yaz_log (YLOG_WARN, "cannot use INIT here");
492  continue;
493  case REGX_END:
494  *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
495  (*ap)->which = tok;
496  break;
497  }
498  ap = &(*ap)->next;
499  }
500  *ap = NULL;
501  return 0;
502 }
503 
504 int readOneSpec (struct lexSpec *spec, const char *s)
505 {
506  int len, r, tok;
507  struct lexRule *rp;
508  struct lexContext *lc;
509 
510  tok = readParseToken (&s, &len);
511  if (tok == REGX_CONTEXT)
512  {
513  char context_name[32];
514  tok = readParseToken (&s, &len);
515  if (tok != REGX_CODE)
516  {
517  yaz_log (YLOG_WARN, "missing name after CONTEXT keyword");
518  return 0;
519  }
520  if (len > 31)
521  len = 31;
522  memcpy (context_name, s, len);
523  context_name[len] = '\0';
524  lc = lexContextCreate (context_name);
525  lc->next = spec->context;
526  spec->context = lc;
527  return 0;
528  }
529  if (!spec->context)
530  spec->context = lexContextCreate ("main");
531 
532  switch (tok)
533  {
534  case REGX_BEGIN:
536  actionListMk (spec, s, &spec->context->beginActionList);
537  break;
538  case REGX_END:
540  actionListMk (spec, s, &spec->context->endActionList);
541  break;
542  case REGX_INIT:
544  actionListMk (spec, s, &spec->context->initActionList);
545  break;
546  case REGX_PATTERN:
547 #if REGX_DEBUG
548  yaz_log (YLOG_LOG, "rule %d %s", spec->context->ruleNo, s);
549 #endif
550  r = dfa_parse (spec->context->dfa, &s);
551  if (r)
552  {
553  yaz_log (YLOG_WARN, "regular expression error. r=%d", r);
554  return -1;
555  }
556  if (*s != '/')
557  {
558  yaz_log (YLOG_WARN, "expects / at end of pattern. got %c", *s);
559  return -1;
560  }
561  s++;
562  rp = (struct lexRule *) xmalloc (sizeof(*rp));
563  rp->info.no = spec->context->ruleNo++;
564  rp->next = spec->context->rules;
565  spec->context->rules = rp;
566  actionListMk (spec, s, &rp->info.actionList);
567  }
568  return 0;
569 }
570 
571 int readFileSpec (struct lexSpec *spec)
572 {
573  struct lexContext *lc;
574  int c, i, errors = 0;
575  FILE *spec_inf = 0;
576  WRBUF lineBuf;
577  char fname[256];
578 
579 #if HAVE_TCL_H
580  if (spec->tcl_interp)
581  {
582  sprintf (fname, "%s.tflt", spec->name);
583  spec_inf = data1_path_fopen (spec->dh, fname, "r");
584  }
585 #endif
586  if (!spec_inf)
587  {
588  sprintf (fname, "%s.flt", spec->name);
589  spec_inf = data1_path_fopen (spec->dh, fname, "r");
590  }
591  if (!spec_inf)
592  {
593  yaz_log (YLOG_ERRNO|YLOG_WARN, "cannot read spec file %s", spec->name);
594  return -1;
595  }
596  yaz_log (YLOG_LOG, "reading regx filter %s", fname);
597 #if HAVE_TCL_H
598  if (spec->tcl_interp)
599  yaz_log (YLOG_LOG, "Tcl enabled");
600 #endif
601 
602 #if 0
603  debug_dfa_trav = 0;
604  debug_dfa_tran = 1;
606  dfa_verbose = 1;
607 #endif
608 
609  lineBuf = wrbuf_alloc();
610  spec->lineNo = 0;
611  c = getc (spec_inf);
612  while (c != EOF)
613  {
614  wrbuf_rewind (lineBuf);
615  if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
616  {
617  while (c != '\n' && c != EOF)
618  c = getc (spec_inf);
619  spec->lineNo++;
620  if (c == '\n')
621  c = getc (spec_inf);
622  }
623  else
624  {
625  int addLine = 0;
626 
627  while (1)
628  {
629  int c1 = c;
630  wrbuf_putc(lineBuf, c);
631  c = getc (spec_inf);
632  while (c == '\r')
633  c = getc (spec_inf);
634  if (c == EOF)
635  break;
636  if (c1 == '\n')
637  {
638  if (c != ' ' && c != '\t')
639  break;
640  addLine++;
641  }
642  }
643  wrbuf_putc(lineBuf, '\0');
644  readOneSpec (spec, wrbuf_buf(lineBuf));
645  spec->lineNo += addLine;
646  }
647  }
648  fclose (spec_inf);
649  wrbuf_destroy(lineBuf);
650 
651  for (lc = spec->context; lc; lc = lc->next)
652  {
653  struct lexRule *rp;
654  lc->fastRule = (struct lexRuleInfo **)
655  xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
656  for (i = 0; i < lc->ruleNo; i++)
657  lc->fastRule[i] = NULL;
658  for (rp = lc->rules; rp; rp = rp->next)
659  lc->fastRule[rp->info.no] = &rp->info;
660  dfa_mkstate (lc->dfa);
661  }
662  if (errors)
663  return -1;
664 
665  return 0;
666 }
667 
668 #if 0
669 static struct lexSpec *curLexSpec = NULL;
670 #endif
671 
672 static void execData (struct lexSpec *spec,
673  const char *ebuf, int elen, int formatted_text,
674  const char *attribute_str, int attribute_len)
675 {
676  struct data1_node *res, *parent;
677  int org_len;
678 
679  if (elen == 0) /* shouldn't happen, but it does! */
680  return ;
681 #if REGX_DEBUG
682  if (elen > 80)
683  yaz_log (YLOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
684  ebuf, 40, ebuf + elen-40);
685  else if (elen == 1 && ebuf[0] == '\n')
686  {
687  yaz_log (YLOG_LOG, "data(new line)");
688  }
689  else if (elen > 0)
690  yaz_log (YLOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
691  else
692  yaz_log (YLOG_LOG, "data(%d bytes)", elen);
693 #endif
694 
695  if (spec->d1_level <= 1)
696  return;
697 
698  parent = spec->d1_stack[spec->d1_level -1];
699  assert (parent);
700 
701  if (attribute_str)
702  {
703  data1_xattr **ap;
704  res = parent;
705  if (res->which != DATA1N_tag)
706  return;
707  /* sweep through exising attributes.. */
708  for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next)
709  if (strlen((*ap)->name) == attribute_len &&
710  !memcmp((*ap)->name, attribute_str, attribute_len))
711  break;
712  if (!*ap)
713  {
714  /* new attribute. Create it with name + value */
715  *ap = nmem_malloc(spec->m, sizeof(**ap));
716 
717  (*ap)->name = nmem_malloc(spec->m, attribute_len+1);
718  memcpy((*ap)->name, attribute_str, attribute_len);
719  (*ap)->name[attribute_len] = '\0';
720 
721  (*ap)->value = nmem_malloc(spec->m, elen+1);
722  memcpy((*ap)->value, ebuf, elen);
723  (*ap)->value[elen] = '\0';
724  (*ap)->next = 0;
725  }
726  else
727  {
728  /* append to value if attribute already exists */
729  char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value));
730  strcpy(nv, (*ap)->value);
731  memcpy (nv + strlen(nv), ebuf, elen);
732  nv[strlen(nv)+elen] = '\0';
733  (*ap)->value = nv;
734  }
735  }
736  else
737  {
738  if ((res = spec->d1_stack[spec->d1_level]) &&
739  res->which == DATA1N_data)
740  org_len = res->u.data.len;
741  else
742  {
743  org_len = 0;
744 
745  res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
746  res->u.data.what = DATA1I_text;
747  res->u.data.len = 0;
748  res->u.data.formatted_text = formatted_text;
749  res->u.data.data = 0;
750 
751  if (spec->d1_stack[spec->d1_level])
752  spec->d1_stack[spec->d1_level]->next = res;
753  spec->d1_stack[spec->d1_level] = res;
754  }
755  if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
756  {
757  char *old_buf, *new_buf;
758 
759  spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
760  new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
761  if ((old_buf = spec->concatBuf[spec->d1_level].buf))
762  {
763  memcpy (new_buf, old_buf, org_len);
764  xfree (old_buf);
765  }
766  spec->concatBuf[spec->d1_level].buf = new_buf;
767  }
768  memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
769  res->u.data.len += elen;
770  }
771 }
772 
773 static void execDataP (struct lexSpec *spec,
774  const char *ebuf, int elen, int formatted_text)
775 {
776  execData (spec, ebuf, elen, formatted_text, 0, 0);
777 }
778 
779 static void tagDataRelease (struct lexSpec *spec)
780 {
781  data1_node *res;
782 
783  if ((res = spec->d1_stack[spec->d1_level]) &&
784  res->which == DATA1N_data &&
785  res->u.data.what == DATA1I_text)
786  {
787  assert (!res->u.data.data);
788  assert (res->u.data.len > 0);
789  if (res->u.data.len > DATA1_LOCALDATA)
790  res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
791  else
792  res->u.data.data = res->lbuf;
793  memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
794  res->u.data.len);
795  }
796 }
797 
798 static void variantBegin (struct lexSpec *spec,
799  const char *class_str, int class_len,
800  const char *type_str, int type_len,
801  const char *value_str, int value_len)
802 {
803  struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
804  char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
805  data1_vartype *tp;
806  int i;
807  data1_node *res;
808 
809  if (spec->d1_level == 0)
810  {
811  yaz_log (YLOG_WARN, "in variant begin. No record type defined");
812  return ;
813  }
814  if (class_len >= DATA1_MAX_SYMBOL)
815  class_len = DATA1_MAX_SYMBOL-1;
816  memcpy (tclass, class_str, class_len);
817  tclass[class_len] = '\0';
818 
819  if (type_len >= DATA1_MAX_SYMBOL)
820  type_len = DATA1_MAX_SYMBOL-1;
821  memcpy (ttype, type_str, type_len);
822  ttype[type_len] = '\0';
823 
824 #if REGX_DEBUG
825  yaz_log (YLOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
826  spec->d1_level);
827 #endif
828 
829  if (!(tp =
830  data1_getvartypeby_absyn(spec->dh, parent->root->u.root.absyn,
831  tclass, ttype)))
832  return;
833 
834  if (parent->which != DATA1N_variant)
835  {
836  res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
837  if (spec->d1_stack[spec->d1_level])
838  tagDataRelease (spec);
839  spec->d1_stack[spec->d1_level] = res;
840  spec->d1_stack[++(spec->d1_level)] = NULL;
841  }
842  for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
843  if (spec->d1_stack[i]->u.variant.type == tp)
844  {
845  spec->d1_level = i;
846  break;
847  }
848 
849 #if REGX_DEBUG
850  yaz_log (YLOG_LOG, "variant node(%d)", spec->d1_level);
851 #endif
852  parent = spec->d1_stack[spec->d1_level-1];
853  res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
854  res->u.variant.type = tp;
855 
856  if (value_len >= DATA1_LOCALDATA)
857  value_len =DATA1_LOCALDATA-1;
858  memcpy (res->lbuf, value_str, value_len);
859  res->lbuf[value_len] = '\0';
860 
861  res->u.variant.value = res->lbuf;
862 
863  if (spec->d1_stack[spec->d1_level])
864  tagDataRelease (spec);
865  spec->d1_stack[spec->d1_level] = res;
866  spec->d1_stack[++(spec->d1_level)] = NULL;
867 }
868 
869 static void tagStrip (const char **tag, int *len)
870 {
871  int i;
872 
873  for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
874  ;
875  *len = i;
876  for (i = 0; i < *len && isspace((*tag)[i]); i++)
877  ;
878  *tag += i;
879  *len -= i;
880 }
881 
882 static void tagBegin (struct lexSpec *spec,
883  const char *tag, int len)
884 {
885  if (spec->d1_level == 0)
886  {
887  yaz_log (YLOG_WARN, "in element begin. No record type defined");
888  return ;
889  }
890  tagStrip (&tag, &len);
891  if (spec->d1_stack[spec->d1_level])
892  tagDataRelease (spec);
893 
894 #if REGX_DEBUG
895  yaz_log (YLOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
896 #endif
897 
898  spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
899  spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
900  spec->d1_stack[++(spec->d1_level)] = NULL;
901 }
902 
903 static void tagEnd (struct lexSpec *spec, int min_level,
904  const char *tag, int len)
905 {
906  tagStrip (&tag, &len);
907  while (spec->d1_level > min_level)
908  {
909  tagDataRelease (spec);
910  (spec->d1_level)--;
911  if (spec->d1_level == 0)
912  break;
913  if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
914  (!tag ||
915  (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
916  (size_t) len &&
917  !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
918  break;
919  }
920 #if REGX_DEBUG
921  yaz_log (YLOG_LOG, "end tag(%d)", spec->d1_level);
922 #endif
923 }
924 
925 
926 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
927  struct DFA *dfa, int greedy)
928 {
929  struct DFA_state *state = dfa->states[0];
930  struct DFA_tran *t;
931  unsigned char c = 0;
932  unsigned char c_prev = 0;
933  int ptr = *pptr; /* current pointer */
934  int start_ptr = *pptr; /* first char of match */
935  int last_ptr = 0; /* last char of match */
936  int last_rule = 0; /* rule number of current match */
937  int restore_ptr = 0;
938  int i;
939 
940  if (ptr)
941  {
942  --ptr;
943  c = f_win_advance (spec, &ptr);
944  }
945  while (1)
946  {
947  if (dfa->states[0] == state)
948  {
949  c_prev = c;
950  restore_ptr = ptr;
951  }
952  c = f_win_advance (spec, &ptr);
953 
954  if (ptr == F_WIN_EOF)
955  {
956  if (last_rule)
957  {
958  *mptr = start_ptr;
959  *pptr = last_ptr;
960  return 1;
961  }
962  break;
963  }
964 
965  t = state->trans;
966  i = state->tran_no;
967  while (1)
968  if (--i < 0) /* no transition for character c */
969  {
970  if (last_rule)
971  {
972  *mptr = start_ptr; /* match starts here */
973  *pptr = last_ptr; /* match end here (+1) */
974  return 1;
975  }
976  state = dfa->states[0];
977 
978  ptr = restore_ptr;
979  c = f_win_advance (spec, &ptr);
980 
981  start_ptr = ptr;
982 
983  break;
984  }
985  else if (c >= t->ch[0] && c <= t->ch[1])
986  {
987  state = dfa->states[t->to];
988  if (state->rule_no && c_prev == '\n')
989  {
990  last_rule = state->rule_no;
991  last_ptr = ptr;
992  }
993  else if (state->rule_nno)
994  {
995  last_rule = state->rule_nno;
996  last_ptr = ptr;
997  }
998  break;
999  }
1000  else
1001  t++;
1002  }
1003  return 0;
1004 }
1005 
1006 static int execTok (struct lexSpec *spec, const char **src,
1007  const char **tokBuf, int *tokLen)
1008 {
1009  const char *s = *src;
1010 
1011  while (*s == ' ' || *s == '\t')
1012  s++;
1013  if (!*s)
1014  return 0;
1015  if (*s == '$' && s[1] >= '0' && s[1] <= '9')
1016  {
1017  int n = 0;
1018  s++;
1019  while (*s >= '0' && *s <= '9')
1020  n = n*10 + (*s++ -'0');
1021  if (spec->arg_no == 0)
1022  {
1023  *tokBuf = "";
1024  *tokLen = 0;
1025  }
1026  else
1027  {
1028  if (n >= spec->arg_no)
1029  n = spec->arg_no-1;
1030  *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
1031  tokLen);
1032  }
1033  }
1034  else if (*s == '\"')
1035  {
1036  *tokBuf = ++s;
1037  while (*s && *s != '\"')
1038  s++;
1039  *tokLen = s - *tokBuf;
1040  if (*s)
1041  s++;
1042  *src = s;
1043  }
1044  else if (*s == '\n' || *s == ';')
1045  {
1046  *src = s+1;
1047  return 1;
1048  }
1049  else if (*s == '-')
1050  {
1051  *tokBuf = s++;
1052  while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1053  *s != ';')
1054  s++;
1055  *tokLen = s - *tokBuf;
1056  *src = s;
1057  return 3;
1058  }
1059  else
1060  {
1061  *tokBuf = s++;
1062  while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1063  *s != ';')
1064  s++;
1065  *tokLen = s - *tokBuf;
1066  }
1067  *src = s;
1068  return 2;
1069 }
1070 
1071 static char *regxStrz (const char *src, int len, char *str)
1072 {
1073  if (len > 63)
1074  len = 63;
1075  memcpy (str, src, len);
1076  str[len] = '\0';
1077  return str;
1078 }
1079 
1080 #if HAVE_TCL_H
1081 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1082  int argc, const char **argv)
1083 {
1084  struct lexSpec *spec = (struct lexSpec *) clientData;
1085  if (argc < 2)
1086  return TCL_ERROR;
1087  if (!strcmp(argv[1], "record") && argc == 3)
1088  {
1089  const char *absynName = argv[2];
1090  data1_node *res;
1091 
1092 #if REGX_DEBUG
1093  yaz_log (YLOG_LOG, "begin record %s", absynName);
1094 #endif
1095  res = data1_mk_root (spec->dh, spec->m, absynName);
1096 
1097  spec->d1_level = 0;
1098 
1099  spec->d1_stack[spec->d1_level++] = res;
1100 
1101  res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1102 
1103  spec->d1_stack[spec->d1_level++] = res;
1104 
1105  spec->d1_stack[spec->d1_level] = NULL;
1106  }
1107  else if (!strcmp(argv[1], "element") && argc == 3)
1108  {
1109  tagBegin (spec, argv[2], strlen(argv[2]));
1110  }
1111  else if (!strcmp (argv[1], "variant") && argc == 5)
1112  {
1113  variantBegin (spec, argv[2], strlen(argv[2]),
1114  argv[3], strlen(argv[3]),
1115  argv[4], strlen(argv[4]));
1116  }
1117  else if (!strcmp (argv[1], "context") && argc == 3)
1118  {
1119  struct lexContext *lc = spec->context;
1120 #if REGX_DEBUG
1121  yaz_log (YLOG_LOG, "begin context %s",argv[2]);
1122 #endif
1123  while (lc && strcmp (argv[2], lc->name))
1124  lc = lc->next;
1125  if (lc)
1126  {
1127  spec->context_stack[++(spec->context_stack_top)] = lc;
1128  }
1129  else
1130  yaz_log (YLOG_WARN, "unknown context %s", argv[2]);
1131  }
1132  else
1133  return TCL_ERROR;
1134  return TCL_OK;
1135 }
1136 
1137 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1138  int argc, const char **argv)
1139 {
1140  struct lexSpec *spec = (struct lexSpec *) clientData;
1141  if (argc < 2)
1142  return TCL_ERROR;
1143 
1144  if (!strcmp (argv[1], "record"))
1145  {
1146  while (spec->d1_level)
1147  {
1148  tagDataRelease (spec);
1149  (spec->d1_level)--;
1150  }
1151 #if REGX_DEBUG
1152  yaz_log (YLOG_LOG, "end record");
1153 #endif
1154  spec->stop_flag = 1;
1155  }
1156  else if (!strcmp (argv[1], "element"))
1157  {
1158  int min_level = 2;
1159  const char *element = 0;
1160  if (argc >= 3 && !strcmp(argv[2], "-record"))
1161  {
1162  min_level = 0;
1163  if (argc == 4)
1164  element = argv[3];
1165  }
1166  else
1167  if (argc == 3)
1168  element = argv[2];
1169  tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1170  if (spec->d1_level <= 1)
1171  {
1172 #if REGX_DEBUG
1173  yaz_log (YLOG_LOG, "end element end records");
1174 #endif
1175  spec->stop_flag = 1;
1176  }
1177  }
1178  else if (!strcmp (argv[1], "context"))
1179  {
1180 #if REGX_DEBUG
1181  yaz_log (YLOG_LOG, "end context");
1182 #endif
1183  if (spec->context_stack_top)
1184  (spec->context_stack_top)--;
1185  }
1186  else
1187  return TCL_ERROR;
1188  return TCL_OK;
1189 }
1190 
1191 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1192  int argc, const char **argv)
1193 {
1194  int argi = 1;
1195  int textFlag = 0;
1196  const char *element = 0;
1197  const char *attribute = 0;
1198  struct lexSpec *spec = (struct lexSpec *) clientData;
1199 
1200  while (argi < argc)
1201  {
1202  if (!strcmp("-text", argv[argi]))
1203  {
1204  textFlag = 1;
1205  argi++;
1206  }
1207  else if (!strcmp("-element", argv[argi]))
1208  {
1209  argi++;
1210  if (argi < argc)
1211  element = argv[argi++];
1212  }
1213  else if (!strcmp("-attribute", argv[argi]))
1214  {
1215  argi++;
1216  if (argi < argc)
1217  attribute = argv[argi++];
1218  }
1219  else
1220  break;
1221  }
1222  if (element)
1223  tagBegin (spec, element, strlen(element));
1224 
1225  while (argi < argc)
1226  {
1227 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1228  Tcl_DString ds;
1229  char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1230  execData (spec, native, strlen(native), textFlag, attribute,
1231  attribute ? strlen(attribute) : 0);
1232  Tcl_DStringFree (&ds);
1233 #else
1234  execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute,
1235  attribute ? strlen(attribute) : 0);
1236 #endif
1237  argi++;
1238  }
1239  if (element)
1240  tagEnd (spec, 2, NULL, 0);
1241  return TCL_OK;
1242 }
1243 
1244 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1245  int argc, const char **argv)
1246 {
1247  struct lexSpec *spec = (struct lexSpec *) clientData;
1248  int argi = 1;
1249  int offset = 0;
1250  int no;
1251 
1252  while (argi < argc)
1253  {
1254  if (!strcmp("-offset", argv[argi]))
1255  {
1256  argi++;
1257  if (argi < argc)
1258  {
1259  offset = atoi(argv[argi]);
1260  argi++;
1261  }
1262  }
1263  else
1264  break;
1265  }
1266  if (argi != argc-1)
1267  return TCL_ERROR;
1268  no = atoi(argv[argi]);
1269  if (no >= spec->arg_no)
1270  no = spec->arg_no - 1;
1271  spec->ptr = spec->arg_start[no] + offset;
1272  return TCL_OK;
1273 }
1274 
1275 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1276 {
1277  int i;
1278  int ret;
1279  for (i = 0; i < spec->arg_no; i++)
1280  {
1281  char var_name[10], *var_buf;
1282  int var_len, ch;
1283 
1284  sprintf (var_name, "%d", i);
1285  var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1286  &var_len);
1287  if (var_buf)
1288  {
1289  ch = var_buf[var_len];
1290  var_buf[var_len] = '\0';
1291  Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1292  var_buf[var_len] = ch;
1293  }
1294  }
1295 #if HAVE_TCL_OBJECTS
1296  ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1297 #else
1298  ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1299 #endif
1300  if (ret != TCL_OK)
1301  {
1302  const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1303  yaz_log(YLOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1304 #if TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION <= 5
1305  spec->tcl_interp->errorLine,
1306  spec->tcl_interp->result,
1307 #else
1308  Tcl_GetErrorLine(spec->tcl_interp),
1309  Tcl_GetStringResult(spec->tcl_interp),
1310 #endif
1311  err ? err : "[NO ERRORINFO]");
1312  }
1313 }
1314 /* HAVE_TCL_H */
1315 #endif
1316 
1317 static void execCode (struct lexSpec *spec, struct regxCode *code)
1318 {
1319  const char *s = code->str;
1320  int cmd_len, r;
1321  const char *cmd_str;
1322 
1323  r = execTok (spec, &s, &cmd_str, &cmd_len);
1324  while (r)
1325  {
1326  char *p, ptmp[64];
1327 
1328  if (r == 1)
1329  {
1330  r = execTok (spec, &s, &cmd_str, &cmd_len);
1331  continue;
1332  }
1333  p = regxStrz (cmd_str, cmd_len, ptmp);
1334  if (!strcmp (p, "begin"))
1335  {
1336  r = execTok (spec, &s, &cmd_str, &cmd_len);
1337  if (r < 2)
1338  {
1339  yaz_log (YLOG_WARN, "missing keyword after 'begin'");
1340  continue;
1341  }
1342  p = regxStrz (cmd_str, cmd_len, ptmp);
1343  if (!strcmp (p, "record"))
1344  {
1345  r = execTok (spec, &s, &cmd_str, &cmd_len);
1346  if (r < 2)
1347  continue;
1348  if (spec->d1_level <= 1)
1349  {
1350  static char absynName[64];
1351  data1_node *res;
1352 
1353  if (cmd_len > 63)
1354  cmd_len = 63;
1355  memcpy (absynName, cmd_str, cmd_len);
1356  absynName[cmd_len] = '\0';
1357 #if REGX_DEBUG
1358  yaz_log (YLOG_LOG, "begin record %s", absynName);
1359 #endif
1360  res = data1_mk_root (spec->dh, spec->m, absynName);
1361 
1362  spec->d1_level = 0;
1363 
1364  spec->d1_stack[spec->d1_level++] = res;
1365 
1366  res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1367 
1368  spec->d1_stack[spec->d1_level++] = res;
1369 
1370  spec->d1_stack[spec->d1_level] = NULL;
1371  }
1372  r = execTok (spec, &s, &cmd_str, &cmd_len);
1373  }
1374  else if (!strcmp (p, "element"))
1375  {
1376  r = execTok (spec, &s, &cmd_str, &cmd_len);
1377  if (r < 2)
1378  continue;
1379  tagBegin (spec, cmd_str, cmd_len);
1380  r = execTok (spec, &s, &cmd_str, &cmd_len);
1381  }
1382  else if (!strcmp (p, "variant"))
1383  {
1384  int class_len;
1385  const char *class_str = NULL;
1386  int type_len;
1387  const char *type_str = NULL;
1388  int value_len;
1389  const char *value_str = NULL;
1390  r = execTok (spec, &s, &cmd_str, &cmd_len);
1391  if (r < 2)
1392  continue;
1393  class_str = cmd_str;
1394  class_len = cmd_len;
1395  r = execTok (spec, &s, &cmd_str, &cmd_len);
1396  if (r < 2)
1397  continue;
1398  type_str = cmd_str;
1399  type_len = cmd_len;
1400 
1401  r = execTok (spec, &s, &cmd_str, &cmd_len);
1402  if (r < 2)
1403  continue;
1404  value_str = cmd_str;
1405  value_len = cmd_len;
1406 
1407  variantBegin (spec, class_str, class_len,
1408  type_str, type_len, value_str, value_len);
1409 
1410 
1411  r = execTok (spec, &s, &cmd_str, &cmd_len);
1412  }
1413  else if (!strcmp (p, "context"))
1414  {
1415  if (r > 1)
1416  {
1417  struct lexContext *lc = spec->context;
1418  r = execTok (spec, &s, &cmd_str, &cmd_len);
1419  p = regxStrz (cmd_str, cmd_len, ptmp);
1420 #if REGX_DEBUG
1421  yaz_log (YLOG_LOG, "begin context %s", p);
1422 #endif
1423  while (lc && strcmp (p, lc->name))
1424  lc = lc->next;
1425  if (lc)
1426  spec->context_stack[++(spec->context_stack_top)] = lc;
1427  else
1428  yaz_log (YLOG_WARN, "unknown context %s", p);
1429 
1430  }
1431  r = execTok (spec, &s, &cmd_str, &cmd_len);
1432  }
1433  else
1434  {
1435  yaz_log (YLOG_WARN, "bad keyword '%s' after begin", p);
1436  }
1437  }
1438  else if (!strcmp (p, "end"))
1439  {
1440  r = execTok (spec, &s, &cmd_str, &cmd_len);
1441  if (r < 2)
1442  {
1443  yaz_log (YLOG_WARN, "missing keyword after 'end'");
1444  continue;
1445  }
1446  p = regxStrz (cmd_str, cmd_len, ptmp);
1447  if (!strcmp (p, "record"))
1448  {
1449  while (spec->d1_level)
1450  {
1451  tagDataRelease (spec);
1452  (spec->d1_level)--;
1453  }
1454  r = execTok (spec, &s, &cmd_str, &cmd_len);
1455 #if REGX_DEBUG
1456  yaz_log (YLOG_LOG, "end record");
1457 #endif
1458  spec->stop_flag = 1;
1459  }
1460  else if (!strcmp (p, "element"))
1461  {
1462  int min_level = 2;
1463  while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1464  {
1465  if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1466  min_level = 0;
1467  }
1468  if (r > 2)
1469  {
1470  tagEnd (spec, min_level, cmd_str, cmd_len);
1471  r = execTok (spec, &s, &cmd_str, &cmd_len);
1472  }
1473  else
1474  tagEnd (spec, min_level, NULL, 0);
1475  if (spec->d1_level <= 1)
1476  {
1477 #if REGX_DEBUG
1478  yaz_log (YLOG_LOG, "end element end records");
1479 #endif
1480  spec->stop_flag = 1;
1481  }
1482 
1483  }
1484  else if (!strcmp (p, "context"))
1485  {
1486 #if REGX_DEBUG
1487  yaz_log (YLOG_LOG, "end context");
1488 #endif
1489  if (spec->context_stack_top)
1490  (spec->context_stack_top)--;
1491  r = execTok (spec, &s, &cmd_str, &cmd_len);
1492  }
1493  else
1494  yaz_log (YLOG_WARN, "bad keyword '%s' after end", p);
1495  }
1496  else if (!strcmp (p, "data"))
1497  {
1498  int textFlag = 0;
1499  int element_len;
1500  const char *element_str = NULL;
1501  int attribute_len;
1502  const char *attribute_str = NULL;
1503 
1504  while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1505  {
1506  if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1507  textFlag = 1;
1508  else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1509  {
1510  r = execTok (spec, &s, &element_str, &element_len);
1511  if (r < 2)
1512  break;
1513  }
1514  else if (cmd_len==10 && !memcmp ("-attribute", cmd_str,
1515  cmd_len))
1516  {
1517  r = execTok (spec, &s, &attribute_str, &attribute_len);
1518  if (r < 2)
1519  break;
1520  }
1521  else
1522  yaz_log (YLOG_WARN, "bad data option: %.*s",
1523  cmd_len, cmd_str);
1524  }
1525  if (r != 2)
1526  {
1527  yaz_log (YLOG_WARN, "missing data item after data");
1528  continue;
1529  }
1530  if (element_str)
1531  tagBegin (spec, element_str, element_len);
1532  do
1533  {
1534  execData (spec, cmd_str, cmd_len, textFlag,
1535  attribute_str, attribute_len);
1536  r = execTok (spec, &s, &cmd_str, &cmd_len);
1537  } while (r > 1);
1538  if (element_str)
1539  tagEnd (spec, 2, NULL, 0);
1540  }
1541  else if (!strcmp (p, "unread"))
1542  {
1543  int no, offset;
1544  r = execTok (spec, &s, &cmd_str, &cmd_len);
1545  if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1546  {
1547  r = execTok (spec, &s, &cmd_str, &cmd_len);
1548  if (r < 2)
1549  {
1550  yaz_log (YLOG_WARN, "missing number after -offset");
1551  continue;
1552  }
1553  p = regxStrz (cmd_str, cmd_len, ptmp);
1554  offset = atoi (p);
1555  r = execTok (spec, &s, &cmd_str, &cmd_len);
1556  }
1557  else
1558  offset = 0;
1559  if (r < 2)
1560  {
1561  yaz_log (YLOG_WARN, "missing index after unread command");
1562  continue;
1563  }
1564  if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1565  {
1566  yaz_log (YLOG_WARN, "bad index after unread command");
1567  continue;
1568  }
1569  else
1570  {
1571  no = *cmd_str - '0';
1572  if (no >= spec->arg_no)
1573  no = spec->arg_no - 1;
1574  spec->ptr = spec->arg_start[no] + offset;
1575  }
1576  r = execTok (spec, &s, &cmd_str, &cmd_len);
1577  }
1578  else if (!strcmp (p, "context"))
1579  {
1580  if (r > 1)
1581  {
1582  struct lexContext *lc = spec->context;
1583  r = execTok (spec, &s, &cmd_str, &cmd_len);
1584  p = regxStrz (cmd_str, cmd_len, ptmp);
1585 
1586  while (lc && strcmp (p, lc->name))
1587  lc = lc->next;
1588  if (lc)
1589  spec->context_stack[spec->context_stack_top] = lc;
1590  else
1591  yaz_log (YLOG_WARN, "unknown context %s", p);
1592 
1593  }
1594  r = execTok (spec, &s, &cmd_str, &cmd_len);
1595  }
1596  else
1597  {
1598  yaz_log (YLOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1599  r = execTok (spec, &s, &cmd_str, &cmd_len);
1600  continue;
1601  }
1602  if (r > 1)
1603  {
1604  yaz_log (YLOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1605  do {
1606  r = execTok (spec, &s, &cmd_str, &cmd_len);
1607  } while (r > 1);
1608  }
1609  }
1610 }
1611 
1612 
1613 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1614  int start_ptr, int *pptr)
1615 {
1616  int sptr;
1617  int arg_start[20];
1618  int arg_end[20];
1619  int arg_no = 1;
1620 
1621  if (!ap)
1622  return 1;
1623  arg_start[0] = start_ptr;
1624  arg_end[0] = *pptr;
1625  spec->arg_start = arg_start;
1626  spec->arg_end = arg_end;
1627 
1628  while (ap)
1629  {
1630  switch (ap->which)
1631  {
1632  case REGX_PATTERN:
1633  if (ap->u.pattern.body)
1634  {
1635  arg_start[arg_no] = *pptr;
1636  if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
1637  {
1638  arg_end[arg_no] = F_WIN_EOF;
1639  arg_no++;
1640  arg_start[arg_no] = F_WIN_EOF;
1641  arg_end[arg_no] = F_WIN_EOF;
1642  yaz_log(YLOG_DEBUG, "Pattern match rest of record");
1643  *pptr = F_WIN_EOF;
1644  }
1645  else
1646  {
1647  arg_end[arg_no] = sptr;
1648  arg_no++;
1649  arg_start[arg_no] = sptr;
1650  arg_end[arg_no] = *pptr;
1651  }
1652  }
1653  else
1654  {
1655  arg_start[arg_no] = *pptr;
1656  if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
1657  return 1;
1658  if (sptr != arg_start[arg_no])
1659  return 1;
1660  arg_end[arg_no] = *pptr;
1661  }
1662  arg_no++;
1663  break;
1664  case REGX_CODE:
1665  spec->arg_no = arg_no;
1666  spec->ptr = *pptr;
1667 #if HAVE_TCL_H
1668  if (spec->tcl_interp)
1669  execTcl(spec, ap->u.code);
1670  else
1671  execCode (spec, ap->u.code);
1672 #else
1673  execCode (spec, ap->u.code);
1674 #endif
1675  *pptr = spec->ptr;
1676  if (spec->stop_flag)
1677  return 0;
1678  break;
1679  case REGX_END:
1680  arg_start[arg_no] = *pptr;
1681  arg_end[arg_no] = F_WIN_EOF;
1682  arg_no++;
1683  *pptr = F_WIN_EOF;
1684  }
1685  ap = ap->next;
1686  }
1687  return 1;
1688 }
1689 
1690 static int execRule (struct lexSpec *spec, struct lexContext *context,
1691  int ruleNo, int start_ptr, int *pptr)
1692 {
1693 #if REGX_DEBUG
1694  yaz_log (YLOG_LOG, "exec rule %d", ruleNo);
1695 #endif
1696  return execAction (spec, context->fastRule[ruleNo]->actionList,
1697  start_ptr, pptr);
1698 }
1699 
1700 int lexNode (struct lexSpec *spec, int *ptr)
1701 {
1702  struct lexContext *context = spec->context_stack[spec->context_stack_top];
1703  struct DFA_state *state = context->dfa->states[0];
1704  struct DFA_tran *t;
1705  unsigned char c;
1706  unsigned char c_prev = '\n';
1707  int i;
1708  int last_rule = 0; /* rule number of current match */
1709  int last_ptr = *ptr; /* last char of match */
1710  int start_ptr = *ptr; /* first char of match */
1711  int skip_ptr = *ptr; /* first char of run */
1712  int more = 0;
1713 
1714  while (1)
1715  {
1716  c = f_win_advance (spec, ptr);
1717  if (*ptr == F_WIN_EOF)
1718  {
1719  /* end of file met */
1720  if (last_rule)
1721  {
1722  /* there was a match */
1723  if (skip_ptr < start_ptr)
1724  {
1725  /* deal with chars that didn't match */
1726  int size;
1727  char *buf;
1728  buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1729  execDataP (spec, buf, size, 0);
1730  }
1731  /* restore pointer */
1732  *ptr = last_ptr;
1733  /* execute rule */
1734  if (!execRule (spec, context, last_rule, start_ptr, ptr))
1735  return more;
1736  /* restore skip pointer */
1737  skip_ptr = *ptr;
1738  last_rule = 0;
1739  }
1740  else if (skip_ptr < *ptr)
1741  {
1742  /* deal with chars that didn't match */
1743  int size;
1744  char *buf;
1745  buf = f_win_get (spec, skip_ptr, *ptr, &size);
1746  execDataP (spec, buf, size, 0);
1747  }
1748  state = context->dfa->states[0];
1749  if (*ptr == F_WIN_EOF)
1750  return more;
1751  }
1752  t = state->trans;
1753  i = state->tran_no;
1754  while (1)
1755  if (--i < 0)
1756  { /* no transition for character c ... */
1757  if (last_rule)
1758  {
1759  if (skip_ptr < start_ptr)
1760  {
1761  /* deal with chars that didn't match */
1762  int size;
1763  char *buf;
1764  buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1765  execDataP (spec, buf, size, 0);
1766  }
1767  /* restore pointer */
1768  *ptr = last_ptr;
1769  if (!execRule (spec, context, last_rule, start_ptr, ptr))
1770  {
1771  if (spec->f_win_ef && *ptr != F_WIN_EOF)
1772  {
1773  off_t end_offset = *ptr;
1774 #if REGX_DEBUG
1775  yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr);
1776 #endif
1777  (*spec->f_win_ef)(spec->stream, &end_offset);
1778  }
1779  return more;
1780  }
1781  context = spec->context_stack[spec->context_stack_top];
1782  skip_ptr = *ptr;
1783  last_rule = 0;
1784  last_ptr = start_ptr = *ptr;
1785  if (start_ptr > 0)
1786  {
1787  --start_ptr;
1788  c_prev = f_win_advance (spec, &start_ptr);
1789  }
1790  }
1791  else
1792  {
1793  c_prev = f_win_advance (spec, &start_ptr);
1794  *ptr = start_ptr;
1795  }
1796  state = context->dfa->states[0];
1797  break;
1798  }
1799  else if (c >= t->ch[0] && c <= t->ch[1])
1800  { /* transition ... */
1801  state = context->dfa->states[t->to];
1802  if (state->rule_no)
1803  {
1804  if (c_prev == '\n')
1805  {
1806  last_rule = state->rule_no;
1807  last_ptr = *ptr;
1808  }
1809  else if (state->rule_nno)
1810  {
1811  last_rule = state->rule_nno;
1812  last_ptr = *ptr;
1813  }
1814  more = 1;
1815  }
1816  break;
1817  }
1818  else
1819  t++;
1820  }
1821  return more;
1822 }
1823 
1824 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1825  const char *context_name)
1826 {
1827  struct lexContext *lt = spec->context;
1828  int ptr = offset;
1829  int ret;
1830 
1831  spec->stop_flag = 0;
1832  spec->d1_level = 0;
1833  spec->context_stack_top = 0;
1834  while (lt)
1835  {
1836  if (!strcmp (lt->name, context_name))
1837  break;
1838  lt = lt->next;
1839  }
1840  if (!lt)
1841  {
1842  yaz_log (YLOG_WARN, "cannot find context %s", context_name);
1843  return NULL;
1844  }
1845  spec->context_stack[spec->context_stack_top] = lt;
1846  spec->d1_stack[spec->d1_level] = NULL;
1847 #if 1
1848  if (!lt->initFlag)
1849  {
1850  lt->initFlag = 1;
1851  execAction (spec, lt->initActionList, ptr, &ptr);
1852  }
1853 #endif
1854  execAction (spec, lt->beginActionList, ptr, &ptr);
1855 
1856  ret = lexNode (spec, &ptr);
1857  while (spec->d1_level)
1858  {
1859  tagDataRelease (spec);
1860  (spec->d1_level)--;
1861  }
1862  if (!ret)
1863  return 0;
1864  execAction (spec, lt->endActionList, ptr, &ptr);
1865  return spec->d1_stack[0];
1866 }
1867 
1868 void grs_destroy(void *clientData)
1869 {
1870  struct lexSpecs *specs = (struct lexSpecs *) clientData;
1871  if (specs->spec)
1872  {
1873  lexSpecDestroy(&specs->spec);
1874  }
1875  xfree (specs);
1876 }
1877 
1879 {
1880  struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1881  specs->spec = 0;
1882  strcpy(specs->type, "");
1883  return specs;
1884 }
1885 
1886 
1887 ZEBRA_RES grs_config(void *clientData, Res res, const char *args)
1888 {
1889  struct lexSpecs *specs = (struct lexSpecs *) clientData;
1890  if (strlen(args) < sizeof(specs->type))
1891  strcpy(specs->type, args);
1892  return ZEBRA_OK;
1893 }
1894 
1896 {
1897  int res;
1898  struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1899  struct lexSpec **curLexSpec = &specs->spec;
1900  off_t start_offset;
1901 
1902 #if REGX_DEBUG
1903  yaz_log (YLOG_LOG, "grs_read_regx");
1904 #endif
1905  if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
1906  {
1907  if (*curLexSpec)
1908  lexSpecDestroy (curLexSpec);
1909  *curLexSpec = lexSpecCreate (specs->type, p->dh);
1910  res = readFileSpec (*curLexSpec);
1911  if (res)
1912  {
1913  lexSpecDestroy (curLexSpec);
1914  return NULL;
1915  }
1916  }
1917  (*curLexSpec)->dh = p->dh;
1918  start_offset = p->stream->tellf(p->stream);
1919  if (start_offset == 0)
1920  {
1921  (*curLexSpec)->f_win_start = 0;
1922  (*curLexSpec)->f_win_end = 0;
1923  (*curLexSpec)->f_win_rf = p->stream->readf;
1924  (*curLexSpec)->f_win_sf = p->stream->seekf;
1925  (*curLexSpec)->stream = p->stream;
1926  (*curLexSpec)->f_win_ef = p->stream->endf;
1927  (*curLexSpec)->f_win_size = 500000;
1928  }
1929  (*curLexSpec)->m = p->mem;
1930  return lexRoot (*curLexSpec, start_offset, "main");
1931 }
1932 
1933 static int extract_regx(void *clientData, struct recExtractCtrl *ctrl)
1934 {
1935  return zebra_grs_extract(clientData, ctrl, grs_read_regx);
1936 }
1937 
1938 static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl)
1939 {
1940  return zebra_grs_retrieve(clientData, ctrl, grs_read_regx);
1941 }
1942 
1943 static struct recType regx_type = {
1944  0,
1945  "grs.regx",
1946  grs_init,
1947  grs_config,
1948  grs_destroy,
1949  extract_regx,
1950  retrieve_regx,
1951 };
1952 
1953 
1954 #if HAVE_TCL_H
1955 data1_node *grs_read_tcl (struct grs_read_info *p)
1956 {
1957  int res;
1958  struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1959  struct lexSpec **curLexSpec = &specs->spec;
1960  off_t start_offset;
1961 
1962 #if REGX_DEBUG
1963  yaz_log (YLOG_LOG, "grs_read_tcl");
1964 #endif
1965  if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
1966  {
1967  Tcl_Interp *tcl_interp;
1968  if (*curLexSpec)
1969  lexSpecDestroy (curLexSpec);
1970  *curLexSpec = lexSpecCreate (specs->type, p->dh);
1971  Tcl_FindExecutable("");
1972  tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1973  Tcl_Init(tcl_interp);
1974  Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1975  Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1976  Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1977  Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1978  *curLexSpec, 0);
1979  res = readFileSpec (*curLexSpec);
1980  if (res)
1981  {
1982  lexSpecDestroy (curLexSpec);
1983  return NULL;
1984  }
1985  }
1986  (*curLexSpec)->dh = p->dh;
1987  start_offset = p->stream->tellf(p->stream);
1988  if (start_offset == 0)
1989  {
1990  (*curLexSpec)->f_win_start = 0;
1991  (*curLexSpec)->f_win_end = 0;
1992  (*curLexSpec)->f_win_rf = p->stream->readf;
1993  (*curLexSpec)->f_win_sf = p->stream->seekf;
1994  (*curLexSpec)->stream = p->stream;
1995  (*curLexSpec)->f_win_ef = p->stream->endf;
1996  (*curLexSpec)->f_win_size = 500000;
1997  }
1998  (*curLexSpec)->m = p->mem;
1999  return lexRoot (*curLexSpec, start_offset, "main");
2000 }
2001 
2002 static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl)
2003 {
2004  return zebra_grs_extract(clientData, ctrl, grs_read_tcl);
2005 }
2006 
2007 static int retrieve_tcl(void *clientData, struct recRetrieveCtrl *ctrl)
2008 {
2009  return zebra_grs_retrieve(clientData, ctrl, grs_read_tcl);
2010 }
2011 
2012 static struct recType tcl_type = {
2013  0,
2014  "grs.tcl",
2015  grs_init,
2016  grs_config,
2017  grs_destroy,
2018  extract_tcl,
2019  retrieve_tcl,
2020 };
2021 
2022 #endif
2023 
2024 RecType
2025 #if IDZEBRA_STATIC_GRS_REGX
2026 idzebra_filter_grs_regx
2027 #else
2029 #endif
2030 
2031 [] = {
2032  &regx_type,
2033 #if HAVE_TCL_H
2034  &tcl_type,
2035 #endif
2036  0,
2037 };
2038 /*
2039  * Local variables:
2040  * c-basic-offset: 4
2041  * c-file-style: "Stroustrup"
2042  * indent-tabs-mode: nil
2043  * End:
2044  * vim: shiftwidth=4 tabstop=8 expandtab
2045  */
2046 
unsigned short to
Definition: dfa.h:32
Definition: dfa.h:30
int stop_flag
Definition: mod_grs_regx.c:132
static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl)
struct DFA * dfa_init(void)
Definition: dfa.c:1090
void dfa_mkstate(struct DFA *)
Definition: dfa.c:1146
char * name
Definition: mod_grs_regx.c:105
#define DATA1N_tag
Definition: data1.h:276
static int execRule(struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr)
data1_node * data1_mk_root(data1_handle dh, NMEM nmem, const char *name)
Definition: d1_read.c:173
#define ZEBRA_OK
Definition: util.h:82
char lbuf[DATA1_LOCALDATA]
Definition: data1.h:339
#define REGX_END
Definition: mod_grs_regx.c:51
static void lexSpecDestroy(struct lexSpec **pp)
Definition: mod_grs_regx.c:333
off_t(* f_win_sf)(struct ZebraRecStream *, off_t)
Definition: mod_grs_regx.c:126
struct lexContext * context
Definition: mod_grs_regx.c:106
static struct lexSpec * lexSpecCreate(const char *name, data1_handle dh)
Definition: mod_grs_regx.c:301
static void execDataP(struct lexSpec *spec, const char *ebuf, int elen, int formatted_text)
Definition: mod_grs_regx.c:773
struct lexSpec * spec
Definition: mod_grs_regx.c:141
struct lexRuleInfo ** fastRule
Definition: mod_grs_regx.c:89
char * str
Definition: mod_grs_regx.c:57
static int execTok(struct lexSpec *spec, const char **src, const char **tokBuf, int *tokLen)
#define DATA1_MAX_SYMBOL
Definition: data1.h:38
int f_win_end
Definition: mod_grs_regx.c:122
int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:935
data1_node * data1_mk_tag_n(data1_handle dh, NMEM nmem, const char *tag, size_t len, const char **attr, data1_node *at)
Definition: d1_read.c:258
int lineNo
Definition: mod_grs_regx.c:112
static char * f_win_get(struct lexSpec *spec, off_t start_pos, off_t end_pos, int *size)
Definition: mod_grs_regx.c:145
static void tagBegin(struct lexSpec *spec, const char *tag, int len)
Definition: mod_grs_regx.c:882
struct lexRuleAction * initActionList
Definition: mod_grs_regx.c:95
struct data1_xattr * next
Definition: data1.h:262
short rule_nno
Definition: dfa.h:50
int d1_level
Definition: mod_grs_regx.c:131
static void tagStrip(const char **tag, int *len)
Definition: mod_grs_regx.c:869
struct DFA * dfa
Definition: mod_grs_regx.c:67
void * clientData
Definition: recgrs.h:29
static void execData(struct lexSpec *spec, const char *ebuf, int elen, int formatted_text, const char *attribute_str, int attribute_len)
Definition: mod_grs_regx.c:672
struct DFA_state ** states
Definition: dfa.h:55
char * name
Definition: mod_grs_regx.c:86
int context_stack_size
Definition: mod_grs_regx.c:109
#define REGX_PATTERN
Definition: mod_grs_regx.c:48
void * grs_init(Res res, RecType recType)
static struct DFA * lexSpecDFA(void)
Definition: mod_grs_regx.c:233
char * f_win_buf
Definition: mod_grs_regx.c:124
struct lexRule * rules
Definition: mod_grs_regx.c:88
unsigned char ch[2]
Definition: dfa.h:31
FILE * data1_path_fopen(data1_handle dh, const char *file, const char *mode)
Definition: d1_handle.c:151
data1_node * data1_mk_tag(data1_handle dh, NMEM nmem, const char *tag, const char **attr, data1_node *at)
Definition: d1_read.c:294
static data1_node * lexRoot(struct lexSpec *spec, off_t offset, const char *context_name)
int f_win_size
Definition: mod_grs_regx.c:123
struct ZebraRecStream * stream
Definition: mod_grs_regx.c:118
static struct lexContext * lexContextCreate(const char *name)
Definition: mod_grs_regx.c:265
Definition: res.c:46
int context_stack_top
Definition: mod_grs_regx.c:110
RecType idzebra_filter[]
static void regxCodeMk(struct regxCode **pp, const char *buf, int len)
Definition: mod_grs_regx.c:217
#define DATA1I_text
Definition: data1.h:314
data1_node * data1_mk_node2(data1_handle dh, NMEM m, int type, data1_node *parent)
Definition: d1_read.c:145
struct DFA_tran * trans
Definition: dfa.h:45
int * arg_end
Definition: mod_grs_regx.c:135
void dfa_delete(struct DFA **)
Definition: dfa.c:1156
static void execCode(struct lexSpec *spec, struct regxCode *code)
static void variantBegin(struct lexSpec *spec, const char *class_str, int class_len, const char *type_str, int type_len, const char *value_str, int value_len)
Definition: mod_grs_regx.c:798
int readFileSpec(struct lexSpec *spec)
Definition: mod_grs_regx.c:571
char * tag
Definition: data1.h:296
#define REGX_CODE
Definition: mod_grs_regx.c:52
union data1_node::@2 u
int f_win_start
Definition: mod_grs_regx.c:121
struct lexRuleAction::@22::@23 pattern
struct data1_node::@2::@3 root
struct DFA * dfa
Definition: mod_grs_regx.c:87
off_t(* endf)(struct ZebraRecStream *s, off_t *offset)
set and get of record position
Definition: recctrl.h:81
int readOneSpec(struct lexSpec *spec, const char *s)
Definition: mod_grs_regx.c:504
struct lexRuleInfo info
Definition: mod_grs_regx.c:81
int(* readf)(struct ZebraRecStream *s, char *buf, size_t count)
read function
Definition: recctrl.h:75
struct data1_node::@2::@6 variant
int arg_no
Definition: mod_grs_regx.c:136
struct lexRuleAction * endActionList
Definition: mod_grs_regx.c:94
unsigned formatted_text
Definition: data1.h:322
int lexNode(struct lexSpec *spec, int *ptr)
#define F_WIN_EOF
Definition: mod_grs_regx.c:44
static void tagEnd(struct lexSpec *spec, int min_level, const char *tag, int len)
Definition: mod_grs_regx.c:903
off_t(* f_win_ef)(struct ZebraRecStream *s, off_t *)
Definition: mod_grs_regx.c:119
struct lexConcatBuf * concatBuf
Definition: mod_grs_regx.c:128
static void lexContextDestroy(struct lexContext *p)
Definition: mod_grs_regx.c:282
ZEBRA_RES grs_config(void *clientData, Res res, const char *args)
int which
Definition: data1.h:285
int dfa_verbose
Definition: dfa.c:67
off_t(* seekf)(struct ZebraRecStream *s, off_t offset)
seek function
Definition: recctrl.h:77
struct data1_node * parent
Definition: data1.h:343
data1_node ** d1_stack
Definition: mod_grs_regx.c:130
struct lexRuleAction * next
Definition: mod_grs_regx.c:72
static char * regxStrz(const char *src, int len, char *str)
int debug_dfa_followpos
Definition: dfa.c:66
int(* f_win_rf)(struct ZebraRecStream *, char *, size_t)
Definition: mod_grs_regx.c:125
static int readParseToken(const char **cpp, int *len)
Definition: mod_grs_regx.c:367
data1_handle dh
Definition: mod_grs_regx.c:114
static void tagDataRelease(struct lexSpec *spec)
Definition: mod_grs_regx.c:779
#define REGX_CONTEXT
Definition: mod_grs_regx.c:53
data1_node * grs_read_regx(struct grs_read_info *p)
struct lexContext ** context_stack
Definition: mod_grs_regx.c:108
Definition: dfa.h:42
#define REGX_DEBUG
Definition: mod_grs_regx.c:42
int dfa_parse(struct DFA *, const char **)
Definition: dfa.c:1119
struct data1_node * next
Definition: data1.h:340
char * data
Definition: data1.h:307
struct lexRule * next
Definition: mod_grs_regx.c:82
data1_vartype * data1_getvartypeby_absyn(data1_handle dh, data1_absyn *absyn, char *zclass, char *type)
Definition: d1_varset.c:50
short tran_no
Definition: dfa.h:48
#define DATA1N_variant
Definition: data1.h:280
int debug_dfa_tran
Definition: dfa.c:65
static int tryMatch(struct lexSpec *spec, int *pptr, int *mptr, struct DFA *dfa, int greedy)
Definition: mod_grs_regx.c:926
int len
Definition: data1.h:308
void dfa_parse_cmap_del(struct DFA *d, int from)
Definition: dfa.c:958
static void regxCodeDel(struct regxCode **pp)
Definition: mod_grs_regx.c:202
struct lexRuleAction * beginActionList
Definition: mod_grs_regx.c:93
data1_handle dh
Definition: recgrs.h:31
char type[256]
Definition: mod_grs_regx.c:142
void grs_destroy(void *clientData)
off_t(* tellf)(struct ZebraRecStream *s)
tell function
Definition: recctrl.h:79
int debug_dfa_trav
Definition: dfa.c:64
static void actionListDel(struct lexRuleAction **rap)
Definition: mod_grs_regx.c:244
record extract for indexing
Definition: recctrl.h:101
void dfa_parse_cmap_add(struct DFA *d, int from, int to)
Definition: dfa.c:976
static int execAction(struct lexSpec *spec, struct lexRuleAction *ap, int start_ptr, int *pptr)
union lexRuleAction::@22 u
short ZEBRA_RES
Common return type for Zebra API.
Definition: util.h:80
static int extract_regx(void *clientData, struct recExtractCtrl *ctrl)
NMEM mem
Definition: recgrs.h:30
static int actionListMk(struct lexSpec *spec, const char *s, struct lexRuleAction **ap)
Definition: mod_grs_regx.c:442
int maxLevel
Definition: mod_grs_regx.c:129
int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, data1_node *(*grs_read)(struct grs_read_info *))
Definition: recgrs.c:1071
#define REGX_BEGIN
Definition: mod_grs_regx.c:50
struct ZebraRecStream * stream
Definition: recgrs.h:28
int * arg_start
Definition: mod_grs_regx.c:134
struct regxCode * code
Definition: mod_grs_regx.c:70
#define DATA1N_data
Definition: data1.h:278
struct lexRuleAction * actionList
Definition: mod_grs_regx.c:77
static int f_win_advance(struct lexSpec *spec, int *pos)
Definition: mod_grs_regx.c:183
short rule_no
Definition: dfa.h:49
#define DATA1_LOCALDATA
Definition: data1.h:338
#define REGX_INIT
Definition: mod_grs_regx.c:54
#define REGX_BODY
Definition: mod_grs_regx.c:49
struct lexContext * next
Definition: mod_grs_regx.c:96
record reader stream
Definition: recctrl.h:71
Definition: dfa.h:53
static struct recType regx_type