27 #include <yaz/matchstr.h>
29 #define SPACECHR " \t\r\n\f"
39 const char *text_start,
const char *text_end);
41 const char *tag,
int tag_len,
const char *cp);
43 const char *name,
int len,
46 const char *cp,
int *attr_len,
47 const char **value,
int *val_len,
int *tr);
57 mp::HTMLParser::Rep::Rep()
63 mp::HTMLParser::Rep::~Rep()
83 m_p->parse_str(event, str);
88 return (c >=
'a' && c <=
'z') || (c >=
'A' && c <=
'Z');
94 while (cp[i] && strchr(
SPACECHR, cp[i]))
102 for (i = 0; cp[i] && !strchr(
SPACECHR "/><=", cp[i]); i++)
108 const char *cp,
int *attr_len,
109 const char **value,
int *val_len,
123 if (cp[i] ==
'\"' || cp[i] ==
'\'')
127 while (cp[i] != *tr && cp[i])
137 while (cp[i] && !strchr(
SPACECHR ">", cp[i]))
149 const char *name,
int len,
153 while (cp[i] && !strchr(
"/><", cp[i]))
155 const char *attr_name = cp + i;
161 int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr);
170 printf(
"------ attr %.*s", attr_len, attr_name);
172 printf(
"=%.*s", val_len, value);
175 event.attribute(name, len, attr_name, attr_len, value, val_len, x);
181 const char *tag,
int tag_len,
const char *cp)
185 for (; cp[i] && !strchr(
"/><", cp[i]); i++)
190 printf(
"------ text %.*s\n", i, cp);
201 printf(
"------ any tag %s %.*s\n",
202 close_it ?
"close" :
"end", tag_len, tag);
203 event.anyTagEnd(tag, tag_len, close_it);
210 const char *text_start,
const char *text_end)
212 if (text_end - text_start)
215 printf(
"------ text %.*s\n",
216 (
int) (text_end - text_start), text_start);
217 event.text(text_start, text_end-text_start);
223 const char *text_start = cp;
229 if (nest && *cp ==
'!')
232 tagText(event, text_start, cp - 1);
233 if (cp[1] ==
'-' && cp[2] ==
'-')
235 for (i = 3; cp[i]; i++)
236 if (cp[i] ==
'-' && cp[i+1] ==
'-' && cp[i+2] ==
'>')
239 event.openTagStart(cp, i);
245 for (i = 1; cp[i] && cp[i] !=
'>'; i++)
247 event.openTagStart(cp, i);
250 printf(
"------ dtd %.*s\n", i, cp);
251 i += tagEnd(event, cp, i, cp + i);
255 else if (nest && *cp ==
'?')
258 tagText(event, text_start, cp - 1);
259 for (i = 1; cp[i] && cp[i] !=
'>'; i++)
261 event.openTagStart(cp, i);
263 printf(
"------ pi %.*s\n", i, cp);
264 i += tagEnd(event, cp, i, cp + i);
268 else if (*cp ==
'/' &&
isAlpha(cp[1]))
276 if (i == 6 && !yaz_strncasecmp(cp,
"script", i))
279 if (cp[ws + 6] ==
'>')
285 tagText(event, text_start, cp - 2);
286 event.closeTag(cp, i);
288 printf(
"------ tag close %.*s\n", i, cp);
289 i += tagEnd(event, cp, i, cp + i);
296 tagText(event, text_start, cp - 1);
298 event.openTagStart(cp, i);
300 printf(
"------ tag open %.*s\n", i, cp);
301 j = tagAttrs(event, cp, i, cp + i);
302 j += tagEnd(event, cp, i, cp + i + j);
304 if (i == 6 && !yaz_strncasecmp(cp,
"script", i))
311 tagText(event, text_start, cp);
314 mp::HTMLParserEvent::~HTMLParserEvent()
static int skipName(const char *cp)
static int isAlpha(int c)
static int skipSpace(const char *cp)