20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
30 #include <boost/regex.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/algorithm/string.hpp>
37 namespace yf = mp::filter;
48 const std::map<std::string, std::string> & vars)
const;
56 std::map<std::string, std::string> &vars,
57 std::string &txt,
bool anchor,
58 std::list<boost::regex> &skip_list);
68 bool exec(std::map<std::string, std::string> &vars,
69 std::string &txt,
bool anchor,
70 std::list<boost::regex> &skip_list)
const;
79 std::map<std::string, RulePtr > &rules);
81 std::map<std::string, std::string> &vars,
82 std::list<boost::regex> & skip_list)
const;
83 void parse(
int verbose, std::string &content,
84 std::map<std::string, std::string> & vars,
85 std::list<boost::regex> & skip_list )
const;
93 std::list<boost::regex> &skip_list, std::string bind_addr);
95 std::map<std::string, std::string> & vars, std::string bind_addr)
const;
97 std::map<std::string, std::string> & vars)
const;
99 const char *content_type,
100 char **content_buf,
int *content_len,
101 std::map<std::string, std::string> & vars,
102 std::list<boost::regex> & skip_list )
const;
106 void anyTagEnd(
const char *tag,
int tag_len,
int close_it);
107 void attribute(
const char *tag,
int tag_len,
108 const char *attr,
int attr_len,
109 const char *value,
int val_len,
111 void closeTag(
const char *tag,
int tag_len);
112 void text(
const char *value,
int len);
115 std::stack<std::list<Within>::const_iterator>
s_within;
116 std::map<std::string, std::string> &
m_vars;
120 std::map<std::string, std::string> &vars,
121 std::list<boost::regex> & skip_list );
128 yf::HttpRewrite::HttpRewrite() :
133 yf::HttpRewrite::~HttpRewrite()
137 void yf::HttpRewrite::process(mp::Package & package)
const
139 yaz_log(YLOG_LOG,
"HttpRewrite begins....");
140 Z_GDU *gdu = package.request().get();
142 std::map<std::string, std::string> vars;
145 std::list<boost::regex> skip_list;
147 if (gdu && gdu->which == Z_GDU_HTTP_Request)
149 Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
151 std::string bind_addr = package.origin(). get_bind_address();
152 req_phase->rewrite_reqline(o, hreq, vars, bind_addr);
153 res_phase->read_skip_headers(hreq, skip_list, bind_addr);
154 yaz_log(YLOG_LOG,
">> Request headers");
155 req_phase->rewrite_headers(o, hreq->headers, vars);
156 req_phase->rewrite_body(o,
157 z_HTTP_header_lookup(hreq->headers,
159 &hreq->content_buf, &hreq->content_len,
161 package.request() = gdu;
164 gdu = package.response().get();
165 if (gdu && gdu->which == Z_GDU_HTTP_Response)
167 Z_HTTP_Response *hres = gdu->u.HTTP_Response;
168 yaz_log(YLOG_LOG,
"Response code %d", hres->code);
170 yaz_log(YLOG_LOG,
"<< Respose headers");
171 res_phase->rewrite_headers(o, hres->headers, vars);
172 res_phase->rewrite_body(o,
173 z_HTTP_header_lookup(hres->headers,
175 &hres->content_buf, &hres->content_len,
177 package.response() = gdu;
182 void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq,
183 std::list<boost::regex> &skip_list,
184 std::string bind_addr )
186 std::string url(hreq->path);
187 if ( url.substr(0,7) !=
"http://" && url.substr(0,8) !=
"https://")
190 const char *host = z_HTTP_header_lookup(hreq->headers,
"Host");
192 if (bind_addr.find(
"ssl:") == 0) {
198 url = proto +
"://" + std::string(host) + hreq->path ;
201 while (
const char *hv = z_HTTP_header_remove( &(hreq->headers),
202 "X-Metaproxy-SkipLink") )
204 yaz_log(YLOG_LOG,
"Found SkipLink '%s'", hv );
205 const char *p = strchr(hv,
' ');
208 std::string page(hv,p);
209 std::string link(p+1);
210 boost::regex pagere(page);
211 if ( boost::regex_search(url, pagere) )
213 yaz_log(YLOG_LOG,
"SkipLink '%s' matches URL %s",
214 page.c_str(), url.c_str() );
215 boost::regex linkre(link);
216 skip_list.push_back(linkre);
220 yaz_log(YLOG_LOG,
"SkipLink ignored, '%s' does not match '%s'",
221 url.c_str(), page.c_str() );
227 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
228 Z_HTTP_Request *hreq,
229 std::map<std::string, std::string> & vars,
230 std::string bind_addr)
const
233 if (bind_addr.find(
"ssl:") == 0) {
238 yaz_log(YLOG_LOG,
"rewrite_reqline: p='%s' ba='%s'",
239 hreq->path, proto.c_str() );
241 if ((strstr(hreq->path,
"http://") == hreq->path) ||
242 (strstr(hreq->path,
"https://") == hreq->path) )
244 yaz_log(YLOG_LOG,
"Path in the method line is absolute, "
245 "possibly a proxy request");
250 const char *host = z_HTTP_header_lookup(hreq->headers,
"Host");
254 path = proto +
"://";
259 std::list<Content>::const_iterator cit = content_list.begin();
260 for (; cit != content_list.end(); cit++)
261 if (cit->type ==
"headers")
264 if (cit == content_list.end())
267 std::list<Within>::const_iterator it = cit->within_list.begin();
268 for (; it != cit->within_list.end(); it++)
271 yaz_log(YLOG_LOG,
"Proxy request URL is %s", path.c_str());
272 std::list<boost::regex> dummy_skip_list;
273 if (it->exec(vars, path,
true, dummy_skip_list))
275 yaz_log(YLOG_LOG,
"Rewritten request URL is %s", path.c_str());
276 hreq->path = odr_strdup(o, path.c_str());
281 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
282 Z_HTTP_Header *headers,
283 std::map<std::string, std::string> & vars )
const
285 std::list<Content>::const_iterator cit = content_list.begin();
286 for (; cit != content_list.end(); cit++)
287 if (cit->type ==
"headers")
290 if (cit == content_list.end())
293 for (Z_HTTP_Header *header = headers; header; header = header->next)
295 std::list<Within>::const_iterator it = cit->within_list.begin();
296 for (; it != cit->within_list.end(); it++)
298 if (!it->header.empty() &&
299 regex_match(header->name, it->header))
302 std::string hval(header->value);
303 std::list<boost::regex> dummy_skip_list;
304 if (it->exec(vars, hval,
true, dummy_skip_list))
306 header->value = odr_strdup(o, hval.c_str());
313 void yf::HttpRewrite::Phase::rewrite_body(
315 const char *content_type,
318 std::map<std::string, std::string> & vars,
319 std::list<boost::regex> & skip_list )
const
321 if (*content_len == 0)
324 yaz_log(YLOG_LOG,
"rewrite_body: null content_type, can not rewrite");
327 std::list<Content>::const_iterator cit = content_list.begin();
328 for (; cit != content_list.end(); cit++)
330 yaz_log(YLOG_LOG,
"rewrite_body: content_type=%s type=%s",
331 content_type, cit->type.c_str());
332 if (cit->type !=
"headers"
333 && regex_match(content_type, cit->content_re))
336 if (cit == content_list.end()) {
337 yaz_log(YLOG_LOG,
"rewrite_body: No content rule matched %s, not rewriting",
343 for (i = 0; i < *content_len; i++)
344 if ((*content_buf)[i] == 0) {
345 yaz_log(YLOG_LOG,
"rewrite_body: Looks like binary stuff, not rewriting");
349 std::string content(*content_buf, *content_len);
350 cit->parse(m_verbose, content, vars, skip_list);
351 *content_buf = odr_strdup(o, content.c_str());
352 *content_len = strlen(*content_buf);
355 yf::HttpRewrite::Event::Event(
const Content *p,
356 std::map<std::string, std::string> & vars,
357 std::list<boost::regex> & skip_list
358 ) : m_content(p), m_vars(vars), m_skips(skip_list)
363 yf::HttpRewrite::Event::~Event()
368 const char *yf::HttpRewrite::Event::result()
370 return wrbuf_cstr(m_w);
373 void yf::HttpRewrite::Event::openTagStart(
const char *tag,
int tag_len)
375 wrbuf_putc(m_w,
'<');
376 wrbuf_write(m_w, tag, tag_len);
378 std::string t(tag, tag_len);
379 std::list<Within>::const_iterator it = m_content->within_list.begin();
380 for (; it != m_content->within_list.end(); it++)
382 if (!it->tag.empty() && regex_match(t, it->tag))
384 if (!it->attr.empty() && regex_match(
"#text", it->attr))
393 void yf::HttpRewrite::Event::anyTagEnd(
const char *tag,
int tag_len,
398 if (!s_within.empty())
400 std::list<Within>::const_iterator it = s_within.top();
401 std::string t(tag, tag_len);
402 if (regex_match(t, it->tag))
407 wrbuf_putc(m_w,
'/');
408 wrbuf_putc(m_w,
'>');
411 void yf::HttpRewrite::Event::attribute(
const char *tag,
int tag_len,
412 const char *attr,
int attr_len,
413 const char *value,
int val_len,
416 std::list<Within>::const_iterator it = m_content->within_list.begin();
419 for (; it != m_content->within_list.end(); it++)
421 std::string t(tag, tag_len);
422 if (it->tag.empty() || regex_match(t, it->tag))
424 std::string a(attr, attr_len);
425 if (!it->attr.empty() && regex_match(a, it->attr))
432 wrbuf_putc(m_w,
' ');
433 wrbuf_write(m_w, attr, attr_len);
436 wrbuf_puts(m_w,
"=");
437 wrbuf_puts(m_w, sep);
442 std::string s(value, val_len);
443 it->exec(m_vars, s,
true, m_skips);
444 wrbuf_puts(m_w, s.c_str());
447 wrbuf_write(m_w, value, val_len);
448 wrbuf_puts(m_w, sep);
452 void yf::HttpRewrite::Event::closeTag(
const char *tag,
int tag_len)
454 if (!s_within.empty())
456 std::list<Within>::const_iterator it = s_within.top();
457 std::string t(tag, tag_len);
458 if (regex_match(t, it->tag))
461 wrbuf_puts(m_w,
"</");
462 wrbuf_write(m_w, tag, tag_len);
465 void yf::HttpRewrite::Event::text(
const char *value,
int len)
467 std::list<Within>::const_iterator it = m_content->within_list.end();
468 if (!s_within.empty())
470 if (it != m_content->within_list.end())
472 std::string s(value, len);
473 it->exec(m_vars, s,
false, m_skips);
474 wrbuf_puts(m_w, s.c_str());
477 wrbuf_write(m_w, value, len);
481 std::string &content,
482 std::map<std::string, std::string> &vars,
483 mp::filter::HttpRewrite::RulePtr ruleptr,
485 std::list<boost::regex> &skip_list)
487 bool replace =
false;
489 const char *cp = content.c_str();
490 const char *cp0 = cp;
493 if (html_context && !strncmp(cp,
""", 6))
496 res.append(cp0, cp - cp0);
500 if (!strncmp(cp,
""", 6))
508 std::string s(cp0, cp - cp0);
509 if (ruleptr->test_patterns(vars, s,
true, skip_list))
514 else if (*cp ==
'"' || *cp ==
'\'')
518 res.append(cp0, cp - cp0);
522 if (cp[-1] !=
'\\' && *cp == m)
530 std::string s(cp0, cp - cp0);
531 if (ruleptr->test_patterns(vars, s,
true, skip_list))
536 else if (*cp ==
'/' && cp[1] ==
'/')
538 while (cp[1] && cp[1] !=
'\n')
543 res.append(cp0, cp - cp0);
548 bool yf::HttpRewrite::Within::exec(
549 std::map<std::string, std::string> & vars,
550 std::string & txt,
bool anchor,
551 std::list<boost::regex> & skip_list)
const
553 if (type ==
"quoted-literal")
559 return rule->test_patterns(vars, txt, anchor, skip_list);
563 bool yf::HttpRewrite::Rule::test_patterns(
564 std::map<std::string, std::string> & vars,
565 std::string & txt,
bool anchor,
566 std::list<boost::regex> & skip_list )
568 bool replaces =
false;
571 std::string::const_iterator start, end;
576 std::list<Replace>::iterator bit = replace_list.end();
578 bool match_one =
false;
580 std::list<Replace>::iterator it = replace_list.begin();
581 for (; it != replace_list.end(); it++)
583 if (it->start_anchor && !first)
586 if (regex_search(start, end, what, it->re))
588 if (!match_one || what[0].first < bwhat[0].first)
602 for (i = 1; i < bwhat.size(); ++i)
605 std::map<int, std::string>::const_iterator git
606 = bit->group_index.find(i);
607 if (git != bit->group_index.end())
609 vars[git->second] = bwhat[i];
614 bool skipthis =
false;
615 std::list<boost::regex>::iterator si = skip_list.begin();
616 for ( ; si != skip_list.end(); si++) {
617 if ( boost::regex_search(bwhat.str(0), *si) )
624 std::string rvalue = bit->sub_vars(vars);
625 out.append(start, bwhat[0].first);
628 yaz_log(YLOG_LOG,
"! Not rewriting '%s', skiplist match",
629 bwhat.str(0).c_str() );
630 out.append(bwhat.str(0).c_str());
634 yaz_log(YLOG_LOG,
"! Rewritten '%s' to '%s'",
635 bwhat.str(0).c_str(), rvalue.c_str());
638 start = bwhat[0].second;
640 out.append(start, end);
645 void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
649 const std::string &str = pattern;
651 start_anchor = str[0] ==
'^';
652 yaz_log(YLOG_LOG,
"Parsing groups from '%s'", str.c_str());
653 for (
size_t i = 0; i < str.size(); ++i)
656 if (!esc && str[i] ==
'\\')
661 if (!esc && str[i] ==
'(')
664 if (i+1 < str.size() && str[i+1] ==
'?')
667 if (i+1 < str.size() && str[i+1] ==
':')
669 if (gnum > 0) gnum--;
675 if (i+1 < str.size() && str[i+1] ==
'P')
677 if (i+1 < str.size() && str[i+1] ==
'<')
682 while (++i < str.size())
684 if (str[i] ==
'>') { term =
true;
break; }
685 if (!isalnum(str[i]))
686 throw mp::filter::FilterException
687 (
"Only alphanumeric chars allowed, found "
691 + boost::lexical_cast<std::string>(i));
695 throw mp::filter::FilterException
696 (
"Unterminated group name '" + gname
697 +
" in '" + str +
"'");
698 group_index[gnum] = gname;
699 yaz_log(YLOG_LOG,
"Found named group '%s' at $%d",
700 gname.c_str(), gnum);
709 std::string yf::HttpRewrite::Replace::sub_vars(
710 const std::map<std::string, std::string> & vars)
const
714 const std::string & in = recipe;
715 for (
size_t i = 0; i < in.size(); ++i)
717 if (!esc && in[i] ==
'\\')
722 if (!esc && in[i] ==
'$')
724 if (i+1 < in.size() && in[i+1] ==
'{')
729 while (++i < in.size())
731 if (in[i] ==
'}') { term =
true;
break; }
734 if (!term)
throw mp::filter::FilterException
735 (
"Unterminated var ref in '"+in+
"' at "
736 + boost::lexical_cast<std::string>(i));
737 std::map<std::string, std::string>::const_iterator it
739 if (it != vars.end())
746 throw mp::filter::FilterException
747 (
"Malformed or trimmed var ref in '"
748 +in+
"' at "+boost::lexical_cast<std::string>(i));
759 yf::HttpRewrite::Phase::Phase() : m_verbose(0)
763 void yf::HttpRewrite::Content::parse(
765 std::string &content,
766 std::map<std::string, std::string> &vars,
767 std::list<boost::regex> & skip_list )
const
772 Event ev(
this, vars, skip_list);
776 parser.
parse(ev, content.c_str());
779 if (type ==
"quoted-literal")
781 quoted_literal(content, vars, skip_list);
785 void yf::HttpRewrite::Content::quoted_literal(
786 std::string &content,
787 std::map<std::string, std::string> &vars,
788 std::list<boost::regex> & skip_list )
const
790 std::list<Within>::const_iterator it = within_list.begin();
791 if (it != within_list.end())
795 void yf::HttpRewrite::Content::configure(
796 const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
798 for (; ptr; ptr = ptr->next)
800 if (ptr->type != XML_ELEMENT_NODE)
802 if (!strcmp((
const char *) ptr->name,
"within"))
804 static const char *names[7] =
805 {
"header",
"attr",
"tag",
"rule",
"reqline",
"type", 0 };
806 std::string values[6];
807 mp::xml::parse_attr(ptr, names, values);
809 if (values[0].length() > 0)
810 w.
header.assign(values[0], boost::regex_constants::icase);
811 if (values[1].length() > 0)
812 w.
attr.assign(values[1], boost::regex_constants::icase);
813 if (values[2].length() > 0)
814 w.
tag.assign(values[2], boost::regex_constants::icase);
816 std::vector<std::string> rulenames;
817 boost::split(rulenames, values[3], boost::is_any_of(
","));
818 if (rulenames.size() == 0)
820 throw mp::filter::FilterException
821 (
"Empty rule in '" + values[3] +
822 "' in http_rewrite filter");
824 else if (rulenames.size() == 1)
826 std::map<std::string,RulePtr>::const_iterator it =
827 rules.find(rulenames[0]);
828 if (it == rules.end())
829 throw mp::filter::FilterException
830 (
"Reference to non-existing rule '" + rulenames[0] +
831 "' in http_rewrite filter");
839 for (i = 0; i < rulenames.size(); i++)
841 std::map<std::string,RulePtr>::const_iterator it =
842 rules.find(rulenames[i]);
843 if (it == rules.end())
844 throw mp::filter::FilterException
845 (
"Reference to non-existing rule '" + rulenames[i] +
846 "' in http_rewrite filter");
848 std::list<Replace>::iterator rit =
849 subRule->replace_list.begin();
850 for (; rit != subRule->replace_list.end(); rit++)
851 rule->replace_list.push_back(*rit);
857 if (w.
type.empty() || w.
type ==
"quoted-literal")
860 throw mp::filter::FilterException
861 (
"within type must be quoted-literal or none in "
862 " in http_rewrite filter");
863 within_list.push_back(w);
868 void yf::HttpRewrite::configure_phase(
const xmlNode *ptr,
Phase &phase)
870 static const char *names[2] = {
"verbose", 0 };
871 std::string values[1];
873 mp::xml::parse_attr(ptr, names, values);
875 phase.
m_verbose = atoi(values[0].c_str());
877 std::map<std::string, RulePtr > rules;
878 for (ptr = ptr->children; ptr; ptr = ptr->next)
880 if (ptr->type != XML_ELEMENT_NODE)
882 else if (!strcmp((
const char *) ptr->name,
"rule"))
884 static const char *names[2] = {
"name", 0 };
885 std::string values[1];
886 values[0] =
"default";
887 mp::xml::parse_attr(ptr, names, values);
890 for (xmlNode *p = ptr->children; p; p = p->next)
892 if (p->type != XML_ELEMENT_NODE)
894 if (!strcmp((
const char *) p->name,
"rewrite"))
898 const struct _xmlAttr *attr;
899 for (attr = p->properties; attr; attr = attr->next)
901 if (!strcmp((
const char *) attr->name,
"from"))
902 from = mp::xml::get_text(attr->children);
903 else if (!strcmp((
const char *) attr->name,
"to"))
904 replace.
recipe = mp::xml::get_text(attr->children);
906 throw mp::filter::FilterException
908 + std::string((
const char *) attr->name)
909 +
" in rewrite section of http_rewrite");
911 yaz_log(YLOG_LOG,
"Found rewrite rule from '%s' to '%s'",
912 from.c_str(), replace.
recipe.c_str());
916 rule->replace_list.push_back(replace);
920 throw mp::filter::FilterException
922 + std::string((
const char *) p->name)
923 +
" in http_rewrite filter");
925 rules[values[0]] = rule;
927 else if (!strcmp((
const char *) ptr->name,
"content"))
929 static const char *names[3] =
930 {
"type",
"mime", 0 };
931 std::string values[2];
932 mp::xml::parse_attr(ptr, names, values);
933 if (values[0].empty())
935 throw mp::filter::FilterException
936 (
"Missing attribute, type for for element "
937 + std::string((
const char *) ptr->name)
938 +
" in http_rewrite filter");
943 if (!values[1].empty())
944 c.
content_re.assign(values[1], boost::regex::icase);
950 throw mp::filter::FilterException
952 + std::string((
const char *) ptr->name)
953 +
" in http_rewrite filter");
958 void yf::HttpRewrite::configure(
const xmlNode * ptr,
bool test_only,
961 for (ptr = ptr->children; ptr; ptr = ptr->next)
963 if (ptr->type != XML_ELEMENT_NODE)
965 else if (!strcmp((
const char *) ptr->name,
"request"))
969 else if (!strcmp((
const char *) ptr->name,
"response"))
975 throw mp::filter::FilterException
977 + std::string((
const char *) ptr->name)
978 +
" in http_rewrite filter");
985 return new mp::filter::HttpRewrite;
void configure(const xmlNode *ptr, std::map< std::string, RulePtr > &rules)
void quoted_literal(std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void parse(int verbose, std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
std::list< Within > within_list
static bool embed_quoted_literal(std::string &content, std::map< std::string, std::string > &vars, mp::filter::HttpRewrite::RulePtr ruleptr, bool html_context, std::list< boost::regex > &skip_list)
struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite
static mp::filter::Base * filter_creator()