metaproxy  1.21.0
filter_http_rewrite.cpp
Go to the documentation of this file.
1 /* This file is part of Metaproxy.
2  Copyright (C) Index Data
3 
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24 #include "html_parser.hpp"
25 
26 #include <yaz/zgdu.h>
27 #include <yaz/log.h>
28 
29 #include <stack>
30 #include <boost/regex.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/algorithm/string.hpp>
33 
34 #include <map>
35 
36 namespace mp = metaproxy_1;
37 namespace yf = mp::filter;
38 
39 namespace metaproxy_1 {
40  namespace filter {
42  public:
44  boost::regex re;
45  std::string recipe;
46  std::map<int, std::string> group_index;
47  std::string sub_vars(
48  const std::map<std::string, std::string> & vars) const;
49  void parse_groups(std::string pattern);
50  };
51 
53  public:
54  std::list<Replace> replace_list;
55  bool test_patterns(
56  std::map<std::string, std::string> &vars,
57  std::string &txt, bool anchor,
58  std::list<boost::regex> &skip_list);
59  };
61  public:
62  boost::regex header;
63  boost::regex attr;
64  boost::regex tag;
65  std::string type;
66  bool reqline;
68  bool exec(std::map<std::string, std::string> &vars,
69  std::string &txt, bool anchor,
70  std::list<boost::regex> &skip_list) const;
71  };
72 
74  public:
75  std::string type;
76  boost::regex content_re;
77  std::list<Within> within_list;
78  void configure(const xmlNode *ptr,
79  std::map<std::string, RulePtr > &rules);
80  void quoted_literal(std::string &content,
81  std::map<std::string, std::string> &vars,
82  std::list<boost::regex> & skip_list) const;
83  void parse(int verbose, std::string &content,
84  std::map<std::string, std::string> & vars,
85  std::list<boost::regex> & skip_list ) const;
86  };
88  public:
89  Phase();
90  int m_verbose;
91  std::list<Content> content_list;
92  void read_skip_headers(Z_HTTP_Request *hreq,
93  std::list<boost::regex> &skip_list, std::string bind_addr);
94  void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
95  std::map<std::string, std::string> & vars, std::string bind_addr) const;
96  void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
97  std::map<std::string, std::string> & vars) const;
98  void rewrite_body(mp::odr & o,
99  const char *content_type,
100  char **content_buf, int *content_len,
101  std::map<std::string, std::string> & vars,
102  std::list<boost::regex> & skip_list ) const;
103  };
105  void openTagStart(const char *tag, int tag_len);
106  void anyTagEnd(const char *tag, int tag_len, int close_it);
107  void attribute(const char *tag, int tag_len,
108  const char *attr, int attr_len,
109  const char *value, int val_len,
110  const char *sep);
111  void closeTag(const char *tag, int tag_len);
112  void text(const char *value, int len);
114  WRBUF m_w;
115  std::stack<std::list<Within>::const_iterator> s_within;
116  std::map<std::string, std::string> &m_vars;
117  std::list<boost::regex> & m_skips;
118  public:
119  Event(const Content *p,
120  std::map<std::string, std::string> &vars,
121  std::list<boost::regex> & skip_list );
122  ~Event();
123  const char *result();
124  };
125  }
126 }
127 
128 yf::HttpRewrite::HttpRewrite() :
129  req_phase(new Phase), res_phase(new Phase)
130 {
131 }
132 
133 yf::HttpRewrite::~HttpRewrite()
134 {
135 }
136 
137 void yf::HttpRewrite::process(mp::Package & package) const
138 {
139  yaz_log(YLOG_LOG, "HttpRewrite begins....");
140  Z_GDU *gdu = package.request().get();
141  //map of request/response vars
142  std::map<std::string, std::string> vars;
143  //we have an http req
144 
145  std::list<boost::regex> skip_list;
146 
147  if (gdu && gdu->which == Z_GDU_HTTP_Request)
148  {
149  Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
150  mp::odr o;
151  std::string bind_addr = package.origin(). get_bind_address();
152  req_phase->rewrite_reqline(o, hreq, vars, bind_addr);
153  res_phase->read_skip_headers(hreq, skip_list, bind_addr);
154  yaz_log(YLOG_LOG, ">> Request headers");
155  req_phase->rewrite_headers(o, hreq->headers, vars);
156  req_phase->rewrite_body(o,
157  z_HTTP_header_lookup(hreq->headers,
158  "Content-Type"),
159  &hreq->content_buf, &hreq->content_len,
160  vars, skip_list);
161  package.request() = gdu;
162  }
163  package.move();
164  gdu = package.response().get();
165  if (gdu && gdu->which == Z_GDU_HTTP_Response)
166  {
167  Z_HTTP_Response *hres = gdu->u.HTTP_Response;
168  yaz_log(YLOG_LOG, "Response code %d", hres->code);
169  mp::odr o;
170  yaz_log(YLOG_LOG, "<< Respose headers");
171  res_phase->rewrite_headers(o, hres->headers, vars);
172  res_phase->rewrite_body(o,
173  z_HTTP_header_lookup(hres->headers,
174  "Content-Type"),
175  &hres->content_buf, &hres->content_len,
176  vars, skip_list);
177  package.response() = gdu;
178  }
179 }
180 
181 // Read (and remove) the X-Metaproxy-SkipLink headers
182 void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq,
183  std::list<boost::regex> &skip_list,
184  std::string bind_addr )
185 {
186  std::string url(hreq->path);
187  if ( url.substr(0,7) != "http://" && url.substr(0,8) != "https://")
188  { // path was relative, as it often is
189  // make absolute, so we can match the page regex against it
190  const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
191  std::string proto;
192  if (bind_addr.find("ssl:") == 0) {
193  proto = "https";
194  } else {
195  proto = "http";
196  }
197  if (host)
198  url = proto + "://" + std::string(host) + hreq->path ;
199  }
200 
201  while ( const char *hv = z_HTTP_header_remove( &(hreq->headers),
202  "X-Metaproxy-SkipLink") )
203  {
204  yaz_log(YLOG_LOG,"Found SkipLink '%s'", hv );
205  const char *p = strchr(hv,' ');
206  if (!p)
207  continue; // should not happen
208  std::string page(hv,p);
209  std::string link(p+1);
210  boost::regex pagere(page);
211  if ( boost::regex_search(url, pagere) )
212  {
213  yaz_log(YLOG_LOG,"SkipLink '%s' matches URL %s",
214  page.c_str(), url.c_str() );
215  boost::regex linkre(link);
216  skip_list.push_back(linkre);
217  }
218  else
219  {
220  yaz_log(YLOG_LOG,"SkipLink ignored, '%s' does not match '%s'",
221  url.c_str(), page.c_str() );
222  }
223  }
224 }
225 
226 
227 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
228  Z_HTTP_Request *hreq,
229  std::map<std::string, std::string> & vars,
230  std::string bind_addr) const
231 {
232  std::string proto;
233  if (bind_addr.find("ssl:") == 0) {
234  proto = "https";
235  } else {
236  proto = "http";
237  }
238  yaz_log(YLOG_LOG,"rewrite_reqline: p='%s' ba='%s'",
239  hreq->path, proto.c_str() );
240  std::string path;
241  if ((strstr(hreq->path, "http://") == hreq->path) ||
242  (strstr(hreq->path, "https://") == hreq->path) )
243  {
244  yaz_log(YLOG_LOG, "Path in the method line is absolute, "
245  "possibly a proxy request"); // the usual case with cf_proxy
246  path = hreq->path;
247  }
248  else
249  {
250  const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
251  if (!host)
252  return;
253 
254  path = proto + "://";
255  path += host;
256  path += hreq->path;
257  }
258 
259  std::list<Content>::const_iterator cit = content_list.begin();
260  for (; cit != content_list.end(); cit++)
261  if (cit->type == "headers")
262  break;
263 
264  if (cit == content_list.end())
265  return;
266 
267  std::list<Within>::const_iterator it = cit->within_list.begin();
268  for (; it != cit->within_list.end(); it++)
269  if (it->reqline)
270  {
271  yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
272  std::list<boost::regex> dummy_skip_list; // no skips here!
273  if (it->exec(vars, path, true, dummy_skip_list))
274  {
275  yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str());
276  hreq->path = odr_strdup(o, path.c_str());
277  }
278  }
279 }
280 
281 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
282  Z_HTTP_Header *headers,
283  std::map<std::string, std::string> & vars ) const
284 {
285  std::list<Content>::const_iterator cit = content_list.begin();
286  for (; cit != content_list.end(); cit++)
287  if (cit->type == "headers")
288  break;
289 
290  if (cit == content_list.end())
291  return;
292 
293  for (Z_HTTP_Header *header = headers; header; header = header->next)
294  {
295  std::list<Within>::const_iterator it = cit->within_list.begin();
296  for (; it != cit->within_list.end(); it++)
297  {
298  if (!it->header.empty() &&
299  regex_match(header->name, it->header))
300  {
301  // Match and replace only the header value
302  std::string hval(header->value);
303  std::list<boost::regex> dummy_skip_list; // no skips here!
304  if (it->exec(vars, hval, true, dummy_skip_list))
305  {
306  header->value = odr_strdup(o, hval.c_str());
307  }
308  }
309  }
310  }
311 }
312 
313 void yf::HttpRewrite::Phase::rewrite_body(
314  mp::odr &o,
315  const char *content_type,
316  char **content_buf,
317  int *content_len,
318  std::map<std::string, std::string> & vars,
319  std::list<boost::regex> & skip_list ) const
320 {
321  if (*content_len == 0)
322  return;
323  if (!content_type) {
324  yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
325  return;
326  }
327  std::list<Content>::const_iterator cit = content_list.begin();
328  for (; cit != content_list.end(); cit++)
329  {
330  yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s",
331  content_type, cit->type.c_str());
332  if (cit->type != "headers"
333  && regex_match(content_type, cit->content_re))
334  break;
335  }
336  if (cit == content_list.end()) {
337  yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
338  content_type );
339  return;
340  }
341 
342  int i;
343  for (i = 0; i < *content_len; i++)
344  if ((*content_buf)[i] == 0) {
345  yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
346  return; // binary content. skip
347  }
348 
349  std::string content(*content_buf, *content_len);
350  cit->parse(m_verbose, content, vars, skip_list);
351  *content_buf = odr_strdup(o, content.c_str());
352  *content_len = strlen(*content_buf);
353 }
354 
355 yf::HttpRewrite::Event::Event(const Content *p,
356  std::map<std::string, std::string> & vars,
357  std::list<boost::regex> & skip_list
358  ) : m_content(p), m_vars(vars), m_skips(skip_list)
359 {
360  m_w = wrbuf_alloc();
361 }
362 
363 yf::HttpRewrite::Event::~Event()
364 {
365  wrbuf_destroy(m_w);
366 }
367 
368 const char *yf::HttpRewrite::Event::result()
369 {
370  return wrbuf_cstr(m_w);
371 }
372 
373 void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
374 {
375  wrbuf_putc(m_w, '<');
376  wrbuf_write(m_w, tag, tag_len);
377 
378  std::string t(tag, tag_len);
379  std::list<Within>::const_iterator it = m_content->within_list.begin();
380  for (; it != m_content->within_list.end(); it++)
381  {
382  if (!it->tag.empty() && regex_match(t, it->tag))
383  {
384  if (!it->attr.empty() && regex_match("#text", it->attr))
385  {
386  s_within.push(it);
387  return;
388  }
389  }
390  }
391 }
392 
393 void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len,
394  int close_it)
395 {
396  if (close_it)
397  {
398  if (!s_within.empty())
399  {
400  std::list<Within>::const_iterator it = s_within.top();
401  std::string t(tag, tag_len);
402  if (regex_match(t, it->tag))
403  s_within.pop();
404  }
405  }
406  if (close_it)
407  wrbuf_putc(m_w, '/');
408  wrbuf_putc(m_w, '>');
409 }
410 
411 void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
412  const char *attr, int attr_len,
413  const char *value, int val_len,
414  const char *sep)
415 {
416  std::list<Within>::const_iterator it = m_content->within_list.begin();
417  bool subst = false;
418 
419  for (; it != m_content->within_list.end(); it++)
420  {
421  std::string t(tag, tag_len);
422  if (it->tag.empty() || regex_match(t, it->tag))
423  {
424  std::string a(attr, attr_len);
425  if (!it->attr.empty() && regex_match(a, it->attr))
426  subst = true;
427  }
428  if (subst)
429  break;
430  }
431 
432  wrbuf_putc(m_w, ' ');
433  wrbuf_write(m_w, attr, attr_len);
434  if (value)
435  {
436  wrbuf_puts(m_w, "=");
437  wrbuf_puts(m_w, sep);
438 
439  std::string output;
440  if (subst)
441  {
442  std::string s(value, val_len);
443  it->exec(m_vars, s, true, m_skips);
444  wrbuf_puts(m_w, s.c_str());
445  }
446  else
447  wrbuf_write(m_w, value, val_len);
448  wrbuf_puts(m_w, sep);
449  }
450 }
451 
452 void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
453 {
454  if (!s_within.empty())
455  {
456  std::list<Within>::const_iterator it = s_within.top();
457  std::string t(tag, tag_len);
458  if (regex_match(t, it->tag))
459  s_within.pop();
460  }
461  wrbuf_puts(m_w, "</");
462  wrbuf_write(m_w, tag, tag_len);
463 }
464 
465 void yf::HttpRewrite::Event::text(const char *value, int len)
466 {
467  std::list<Within>::const_iterator it = m_content->within_list.end();
468  if (!s_within.empty())
469  it = s_within.top();
470  if (it != m_content->within_list.end())
471  {
472  std::string s(value, len);
473  it->exec(m_vars, s, false, m_skips);
474  wrbuf_puts(m_w, s.c_str());
475  }
476  else
477  wrbuf_write(m_w, value, len);
478 }
479 
481  std::string &content,
482  std::map<std::string, std::string> &vars,
483  mp::filter::HttpRewrite::RulePtr ruleptr,
484  bool html_context,
485  std::list<boost::regex> &skip_list)
486 {
487  bool replace = false;
488  std::string res;
489  const char *cp = content.c_str();
490  const char *cp0 = cp;
491  while (*cp)
492  {
493  if (html_context && !strncmp(cp, "&quot;", 6))
494  {
495  cp += 6;
496  res.append(cp0, cp - cp0);
497  cp0 = cp;
498  while (*cp)
499  {
500  if (!strncmp(cp, "&quot;", 6))
501  break;
502  if (*cp == '\n')
503  break;
504  cp++;
505  }
506  if (!*cp)
507  break;
508  std::string s(cp0, cp - cp0);
509  if (ruleptr->test_patterns(vars, s, true, skip_list))
510  replace = true;
511  cp0 = cp;
512  res.append(s);
513  }
514  else if (*cp == '"' || *cp == '\'')
515  {
516  int m = *cp;
517  cp++;
518  res.append(cp0, cp - cp0);
519  cp0 = cp;
520  while (*cp)
521  {
522  if (cp[-1] != '\\' && *cp == m)
523  break;
524  if (*cp == '\n')
525  break;
526  cp++;
527  }
528  if (!*cp)
529  break;
530  std::string s(cp0, cp - cp0);
531  if (ruleptr->test_patterns(vars, s, true, skip_list))
532  replace = true;
533  cp0 = cp;
534  res.append(s);
535  }
536  else if (*cp == '/' && cp[1] == '/')
537  {
538  while (cp[1] && cp[1] != '\n')
539  cp++;
540  }
541  cp++;
542  }
543  res.append(cp0, cp - cp0);
544  content = res;
545  return replace;
546 }
547 
548 bool yf::HttpRewrite::Within::exec(
549  std::map<std::string, std::string> & vars,
550  std::string & txt, bool anchor,
551  std::list<boost::regex> & skip_list) const
552 {
553  if (type == "quoted-literal")
554  {
555  return embed_quoted_literal(txt, vars, rule, true, skip_list);
556  }
557  else
558  {
559  return rule->test_patterns(vars, txt, anchor, skip_list);
560  }
561 }
562 
563 bool yf::HttpRewrite::Rule::test_patterns(
564  std::map<std::string, std::string> & vars,
565  std::string & txt, bool anchor,
566  std::list<boost::regex> & skip_list )
567 {
568  bool replaces = false;
569  bool first = anchor;
570  std::string out;
571  std::string::const_iterator start, end;
572  start = txt.begin();
573  end = txt.end();
574  while (1)
575  {
576  std::list<Replace>::iterator bit = replace_list.end();
577  boost::smatch bwhat;
578  bool match_one = false;
579  {
580  std::list<Replace>::iterator it = replace_list.begin();
581  for (; it != replace_list.end(); it++)
582  {
583  if (it->start_anchor && !first)
584  continue;
585  boost::smatch what;
586  if (regex_search(start, end, what, it->re))
587  {
588  if (!match_one || what[0].first < bwhat[0].first)
589  {
590  bwhat = what;
591  bit = it;
592  }
593  match_one = true;
594  }
595  }
596  if (!match_one)
597  break;
598  }
599  first = false;
600  replaces = true;
601  size_t i;
602  for (i = 1; i < bwhat.size(); ++i)
603  {
604  //check if the group is named
605  std::map<int, std::string>::const_iterator git
606  = bit->group_index.find(i);
607  if (git != bit->group_index.end())
608  { //it is
609  vars[git->second] = bwhat[i];
610  }
611 
612  }
613  // Compare against skip_list
614  bool skipthis = false;
615  std::list<boost::regex>::iterator si = skip_list.begin();
616  for ( ; si != skip_list.end(); si++) {
617  if ( boost::regex_search(bwhat.str(0), *si) )
618  {
619  skipthis = true;
620  break;
621  }
622  }
623  //prepare replacement string
624  std::string rvalue = bit->sub_vars(vars);
625  out.append(start, bwhat[0].first);
626  if ( skipthis )
627  {
628  yaz_log(YLOG_LOG,"! Not rewriting '%s', skiplist match",
629  bwhat.str(0).c_str() );
630  out.append(bwhat.str(0).c_str());
631  }
632  else
633  {
634  yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
635  bwhat.str(0).c_str(), rvalue.c_str());
636  out.append(rvalue);
637  }
638  start = bwhat[0].second; //move search forward
639  }
640  out.append(start, end);
641  txt = out;
642  return replaces;
643 }
644 
645 void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
646 {
647  int gnum = 0;
648  bool esc = false;
649  const std::string &str = pattern;
650  std::string res;
651  start_anchor = str[0] == '^';
652  yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
653  for (size_t i = 0; i < str.size(); ++i)
654  {
655  res += str[i];
656  if (!esc && str[i] == '\\')
657  {
658  esc = true;
659  continue;
660  }
661  if (!esc && str[i] == '(') //group starts
662  {
663  gnum++;
664  if (i+1 < str.size() && str[i+1] == '?') //group with attrs
665  {
666  i++;
667  if (i+1 < str.size() && str[i+1] == ':') //non-capturing
668  {
669  if (gnum > 0) gnum--;
670  res += str[i];
671  i++;
672  res += str[i];
673  continue;
674  }
675  if (i+1 < str.size() && str[i+1] == 'P') //optional, python
676  i++;
677  if (i+1 < str.size() && str[i+1] == '<') //named
678  {
679  i++;
680  std::string gname;
681  bool term = false;
682  while (++i < str.size())
683  {
684  if (str[i] == '>') { term = true; break; }
685  if (!isalnum(str[i]))
686  throw mp::filter::FilterException
687  ("Only alphanumeric chars allowed, found "
688  " in '"
689  + str
690  + "' at "
691  + boost::lexical_cast<std::string>(i));
692  gname += str[i];
693  }
694  if (!term)
695  throw mp::filter::FilterException
696  ("Unterminated group name '" + gname
697  + " in '" + str +"'");
698  group_index[gnum] = gname;
699  yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
700  gname.c_str(), gnum);
701  }
702  }
703  }
704  esc = false;
705  }
706  re = res;
707 }
708 
709 std::string yf::HttpRewrite::Replace::sub_vars(
710  const std::map<std::string, std::string> & vars) const
711 {
712  std::string out;
713  bool esc = false;
714  const std::string & in = recipe;
715  for (size_t i = 0; i < in.size(); ++i)
716  {
717  if (!esc && in[i] == '\\')
718  {
719  esc = true;
720  continue;
721  }
722  if (!esc && in[i] == '$') //var
723  {
724  if (i+1 < in.size() && in[i+1] == '{') //ref prefix
725  {
726  ++i;
727  std::string name;
728  bool term = false;
729  while (++i < in.size())
730  {
731  if (in[i] == '}') { term = true; break; }
732  name += in[i];
733  }
734  if (!term) throw mp::filter::FilterException
735  ("Unterminated var ref in '"+in+"' at "
736  + boost::lexical_cast<std::string>(i));
737  std::map<std::string, std::string>::const_iterator it
738  = vars.find(name);
739  if (it != vars.end())
740  {
741  out += it->second;
742  }
743  }
744  else
745  {
746  throw mp::filter::FilterException
747  ("Malformed or trimmed var ref in '"
748  +in+"' at "+boost::lexical_cast<std::string>(i));
749  }
750  continue;
751  }
752  //passthru
753  out += in[i];
754  esc = false;
755  }
756  return out;
757 }
758 
759 yf::HttpRewrite::Phase::Phase() : m_verbose(0)
760 {
761 }
762 
763 void yf::HttpRewrite::Content::parse(
764  int verbose,
765  std::string &content,
766  std::map<std::string, std::string> &vars,
767  std::list<boost::regex> & skip_list ) const
768 {
769  if (type == "html")
770  {
771  HTMLParser parser;
772  Event ev(this, vars, skip_list);
773 
774  parser.set_verbose(verbose);
775 
776  parser.parse(ev, content.c_str());
777  content = ev.result();
778  }
779  if (type == "quoted-literal")
780  {
781  quoted_literal(content, vars, skip_list);
782  }
783 }
784 
785 void yf::HttpRewrite::Content::quoted_literal(
786  std::string &content,
787  std::map<std::string, std::string> &vars,
788  std::list<boost::regex> & skip_list ) const
789 {
790  std::list<Within>::const_iterator it = within_list.begin();
791  if (it != within_list.end())
792  embed_quoted_literal(content, vars, it->rule, false, skip_list);
793 }
794 
795 void yf::HttpRewrite::Content::configure(
796  const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
797 {
798  for (; ptr; ptr = ptr->next)
799  {
800  if (ptr->type != XML_ELEMENT_NODE)
801  continue;
802  if (!strcmp((const char *) ptr->name, "within"))
803  {
804  static const char *names[7] =
805  { "header", "attr", "tag", "rule", "reqline", "type", 0 };
806  std::string values[6];
807  mp::xml::parse_attr(ptr, names, values);
808  Within w;
809  if (values[0].length() > 0)
810  w.header.assign(values[0], boost::regex_constants::icase);
811  if (values[1].length() > 0)
812  w.attr.assign(values[1], boost::regex_constants::icase);
813  if (values[2].length() > 0)
814  w.tag.assign(values[2], boost::regex_constants::icase);
815 
816  std::vector<std::string> rulenames;
817  boost::split(rulenames, values[3], boost::is_any_of(","));
818  if (rulenames.size() == 0)
819  {
820  throw mp::filter::FilterException
821  ("Empty rule in '" + values[3] +
822  "' in http_rewrite filter");
823  }
824  else if (rulenames.size() == 1)
825  {
826  std::map<std::string,RulePtr>::const_iterator it =
827  rules.find(rulenames[0]);
828  if (it == rules.end())
829  throw mp::filter::FilterException
830  ("Reference to non-existing rule '" + rulenames[0] +
831  "' in http_rewrite filter");
832  w.rule = it->second;
833 
834  }
835  else
836  {
837  RulePtr rule(new Rule);
838  size_t i;
839  for (i = 0; i < rulenames.size(); i++)
840  {
841  std::map<std::string,RulePtr>::const_iterator it =
842  rules.find(rulenames[i]);
843  if (it == rules.end())
844  throw mp::filter::FilterException
845  ("Reference to non-existing rule '" + rulenames[i] +
846  "' in http_rewrite filter");
847  RulePtr subRule = it->second;
848  std::list<Replace>::iterator rit =
849  subRule->replace_list.begin();
850  for (; rit != subRule->replace_list.end(); rit++)
851  rule->replace_list.push_back(*rit);
852  }
853  w.rule = rule;
854  }
855  w.reqline = values[4] == "1";
856  w.type = values[5];
857  if (w.type.empty() || w.type == "quoted-literal")
858  ;
859  else
860  throw mp::filter::FilterException
861  ("within type must be quoted-literal or none in "
862  " in http_rewrite filter");
863  within_list.push_back(w);
864  }
865  }
866 }
867 
868 void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
869 {
870  static const char *names[2] = { "verbose", 0 };
871  std::string values[1];
872  values[0] = "0";
873  mp::xml::parse_attr(ptr, names, values);
874 
875  phase.m_verbose = atoi(values[0].c_str());
876 
877  std::map<std::string, RulePtr > rules;
878  for (ptr = ptr->children; ptr; ptr = ptr->next)
879  {
880  if (ptr->type != XML_ELEMENT_NODE)
881  continue;
882  else if (!strcmp((const char *) ptr->name, "rule"))
883  {
884  static const char *names[2] = { "name", 0 };
885  std::string values[1];
886  values[0] = "default";
887  mp::xml::parse_attr(ptr, names, values);
888 
889  RulePtr rule(new Rule);
890  for (xmlNode *p = ptr->children; p; p = p->next)
891  {
892  if (p->type != XML_ELEMENT_NODE)
893  continue;
894  if (!strcmp((const char *) p->name, "rewrite"))
895  {
896  Replace replace;
897  std::string from;
898  const struct _xmlAttr *attr;
899  for (attr = p->properties; attr; attr = attr->next)
900  {
901  if (!strcmp((const char *) attr->name, "from"))
902  from = mp::xml::get_text(attr->children);
903  else if (!strcmp((const char *) attr->name, "to"))
904  replace.recipe = mp::xml::get_text(attr->children);
905  else
906  throw mp::filter::FilterException
907  ("Bad attribute "
908  + std::string((const char *) attr->name)
909  + " in rewrite section of http_rewrite");
910  }
911  yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
912  from.c_str(), replace.recipe.c_str());
913  if (!from.empty())
914  {
915  replace.parse_groups(from);
916  rule->replace_list.push_back(replace);
917  }
918  }
919  else
920  throw mp::filter::FilterException
921  ("Bad element "
922  + std::string((const char *) p->name)
923  + " in http_rewrite filter");
924  }
925  rules[values[0]] = rule;
926  }
927  else if (!strcmp((const char *) ptr->name, "content"))
928  {
929  static const char *names[3] =
930  { "type", "mime", 0 };
931  std::string values[2];
932  mp::xml::parse_attr(ptr, names, values);
933  if (values[0].empty())
934  {
935  throw mp::filter::FilterException
936  ("Missing attribute, type for for element "
937  + std::string((const char *) ptr->name)
938  + " in http_rewrite filter");
939  }
940  Content c;
941 
942  c.type = values[0];
943  if (!values[1].empty())
944  c.content_re.assign(values[1], boost::regex::icase);
945  c.configure(ptr->children, rules);
946  phase.content_list.push_back(c);
947  }
948  else
949  {
950  throw mp::filter::FilterException
951  ("Bad element "
952  + std::string((const char *) ptr->name)
953  + " in http_rewrite filter");
954  }
955  }
956 }
957 
958 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
959  const char *path)
960 {
961  for (ptr = ptr->children; ptr; ptr = ptr->next)
962  {
963  if (ptr->type != XML_ELEMENT_NODE)
964  continue;
965  else if (!strcmp((const char *) ptr->name, "request"))
966  {
967  configure_phase(ptr, *req_phase);
968  }
969  else if (!strcmp((const char *) ptr->name, "response"))
970  {
971  configure_phase(ptr, *res_phase);
972  }
973  else
974  {
975  throw mp::filter::FilterException
976  ("Bad element "
977  + std::string((const char *) ptr->name)
978  + " in http_rewrite filter");
979  }
980  }
981 }
982 
983 static mp::filter::Base* filter_creator()
984 {
985  return new mp::filter::HttpRewrite;
986 }
987 
988 extern "C" {
989  struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
990  0,
991  "http_rewrite",
993  };
994 }
995 
996 
997 /*
998  * Local variables:
999  * c-basic-offset: 4
1000  * c-file-style: "Stroustrup"
1001  * indent-tabs-mode: nil
1002  * End:
1003  * vim: shiftwidth=4 tabstop=8 expandtab
1004  */
1005 
void parse(HTMLParserEvent &event, const char *str) const
Definition: html_parser.cpp:81
void configure(const xmlNode *ptr, std::map< std::string, RulePtr > &rules)
void quoted_literal(std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void parse(int verbose, std::string &content, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void anyTagEnd(const char *tag, int tag_len, int close_it)
std::stack< std::list< Within >::const_iterator > s_within
Event(const Content *p, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list)
void closeTag(const char *tag, int tag_len)
void openTagStart(const char *tag, int tag_len)
void text(const char *value, int len)
std::map< std::string, std::string > & m_vars
void attribute(const char *tag, int tag_len, const char *attr, int attr_len, const char *value, int val_len, const char *sep)
void rewrite_reqline(mp::odr &o, Z_HTTP_Request *hreq, std::map< std::string, std::string > &vars, std::string bind_addr) const
void rewrite_body(mp::odr &o, const char *content_type, char **content_buf, int *content_len, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list) const
void read_skip_headers(Z_HTTP_Request *hreq, std::list< boost::regex > &skip_list, std::string bind_addr)
void rewrite_headers(mp::odr &o, Z_HTTP_Header *headers, std::map< std::string, std::string > &vars) const
std::string sub_vars(const std::map< std::string, std::string > &vars) const
bool test_patterns(std::map< std::string, std::string > &vars, std::string &txt, bool anchor, std::list< boost::regex > &skip_list)
bool exec(std::map< std::string, std::string > &vars, std::string &txt, bool anchor, std::list< boost::regex > &skip_list) const
void configure_phase(const xmlNode *ptr, Phase &phase)
boost::shared_ptr< Rule > RulePtr
boost::scoped_ptr< Phase > res_phase
boost::scoped_ptr< Phase > req_phase
static bool embed_quoted_literal(std::string &content, std::map< std::string, std::string > &vars, mp::filter::HttpRewrite::RulePtr ruleptr, bool html_context, std::list< boost::regex > &skip_list)
struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite
static mp::filter::Base * filter_creator()