metaproxy  1.13.0
filter_http_rewrite.cpp
Go to the documentation of this file.
1 /* This file is part of Metaproxy.
2  Copyright (C) Index Data
3 
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22 #include <metaproxy/util.hpp>
23 #include "filter_http_rewrite.hpp"
24 #include "html_parser.hpp"
25 
26 #include <yaz/zgdu.h>
27 #include <yaz/log.h>
28 
29 #include <stack>
30 #include <boost/regex.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/algorithm/string.hpp>
33 
34 #include <map>
35 
36 namespace mp = metaproxy_1;
37 namespace yf = mp::filter;
38 
39 namespace metaproxy_1 {
40  namespace filter {
42  public:
44  boost::regex re;
45  std::string recipe;
46  std::map<int, std::string> group_index;
47  std::string sub_vars(
48  const std::map<std::string, std::string> & vars) const;
49  void parse_groups(std::string pattern);
50  };
51 
53  public:
54  std::list<Replace> replace_list;
55  bool test_patterns(
56  std::map<std::string, std::string> &vars,
57  std::string &txt, bool anchor,
58  std::list<boost::regex> &skip_list);
59  };
61  public:
62  boost::regex header;
63  boost::regex attr;
64  boost::regex tag;
65  std::string type;
66  bool reqline;
68  bool exec(std::map<std::string, std::string> &vars,
69  std::string &txt, bool anchor,
70  std::list<boost::regex> &skip_list) const;
71  };
72 
74  public:
75  std::string type;
76  boost::regex content_re;
77  std::list<Within> within_list;
78  void configure(const xmlNode *ptr,
79  std::map<std::string, RulePtr > &rules);
80  void quoted_literal(std::string &content,
81  std::map<std::string, std::string> &vars,
82  std::list<boost::regex> & skip_list) const;
83  void parse(int verbose, std::string &content,
84  std::map<std::string, std::string> & vars,
85  std::list<boost::regex> & skip_list ) const;
86  };
88  public:
89  Phase();
90  int m_verbose;
91  std::list<Content> content_list;
92  void read_skip_headers(Z_HTTP_Request *hreq,
93  std::list<boost::regex> &skip_list);
94  void rewrite_reqline(mp::odr & o, Z_HTTP_Request *hreq,
95  std::map<std::string, std::string> & vars) const;
96  void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers,
97  std::map<std::string, std::string> & vars) const;
98  void rewrite_body(mp::odr & o,
99  const char *content_type,
100  char **content_buf, int *content_len,
101  std::map<std::string, std::string> & vars,
102  std::list<boost::regex> & skip_list ) const;
103  };
105  void openTagStart(const char *tag, int tag_len);
106  void anyTagEnd(const char *tag, int tag_len, int close_it);
107  void attribute(const char *tag, int tag_len,
108  const char *attr, int attr_len,
109  const char *value, int val_len,
110  const char *sep);
111  void closeTag(const char *tag, int tag_len);
112  void text(const char *value, int len);
114  WRBUF m_w;
115  std::stack<std::list<Within>::const_iterator> s_within;
116  std::map<std::string, std::string> &m_vars;
117  std::list<boost::regex> & m_skips;
118  public:
119  Event(const Content *p,
120  std::map<std::string, std::string> &vars,
121  std::list<boost::regex> & skip_list );
122  ~Event();
123  const char *result();
124  };
125  }
126 }
127 
128 yf::HttpRewrite::HttpRewrite() :
129  req_phase(new Phase), res_phase(new Phase)
130 {
131 }
132 
133 yf::HttpRewrite::~HttpRewrite()
134 {
135 }
136 
137 void yf::HttpRewrite::process(mp::Package & package) const
138 {
139  yaz_log(YLOG_LOG, "HttpRewrite begins....");
140  Z_GDU *gdu = package.request().get();
141  //map of request/response vars
142  std::map<std::string, std::string> vars;
143  //we have an http req
144 
145  std::list<boost::regex> skip_list;
146 
147  if (gdu && gdu->which == Z_GDU_HTTP_Request)
148  {
149  Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
150  mp::odr o;
151  req_phase->rewrite_reqline(o, hreq, vars);
152  res_phase->read_skip_headers(hreq, skip_list);
153  yaz_log(YLOG_LOG, ">> Request headers");
154  req_phase->rewrite_headers(o, hreq->headers, vars);
155  req_phase->rewrite_body(o,
156  z_HTTP_header_lookup(hreq->headers,
157  "Content-Type"),
158  &hreq->content_buf, &hreq->content_len,
159  vars, skip_list);
160  package.request() = gdu;
161  }
162  package.move();
163  gdu = package.response().get();
164  if (gdu && gdu->which == Z_GDU_HTTP_Response)
165  {
166  Z_HTTP_Response *hres = gdu->u.HTTP_Response;
167  yaz_log(YLOG_LOG, "Response code %d", hres->code);
168  mp::odr o;
169  yaz_log(YLOG_LOG, "<< Respose headers");
170  res_phase->rewrite_headers(o, hres->headers, vars);
171  res_phase->rewrite_body(o,
172  z_HTTP_header_lookup(hres->headers,
173  "Content-Type"),
174  &hres->content_buf, &hres->content_len,
175  vars, skip_list);
176  package.response() = gdu;
177  }
178 }
179 
180 // Read (and remove) the X-Metaproxy-SkipLink headers
181 void yf::HttpRewrite::Phase::read_skip_headers(Z_HTTP_Request *hreq,
182  std::list<boost::regex> &skip_list )
183 {
184  std::string url(hreq->path);
185  if ( url.substr(0,7) != "http://" )
186  { // path was relative, as it usually is
187  const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
188  if (host)
189  url = "http://" + std::string(host) + hreq->path ;
190  }
191 
192  while ( const char *hv = z_HTTP_header_remove( &(hreq->headers),
193  "X-Metaproxy-SkipLink") )
194  {
195  yaz_log(YLOG_LOG,"Found SkipLink '%s'", hv );
196  const char *p = strchr(hv,' ');
197  if (!p)
198  continue; // should not happen
199  std::string page(hv,p);
200  std::string link(p+1);
201  boost::regex pagere(page);
202  if ( boost::regex_search(url, pagere) )
203  {
204  yaz_log(YLOG_LOG,"SkipLink '%s' matches URL %s",
205  page.c_str(), url.c_str() );
206  boost::regex linkre(link);
207  skip_list.push_back(linkre);
208  }
209  else
210  {
211  yaz_log(YLOG_LOG,"SkipLink ignored, '%s' does not match '%s'",
212  url.c_str(), page.c_str() );
213  }
214  }
215 }
216 
217 
218 void yf::HttpRewrite::Phase::rewrite_reqline (mp::odr & o,
219  Z_HTTP_Request *hreq,
220  std::map<std::string, std::string> & vars) const
221 {
222  //rewrite the request line
223  std::string path;
224  if (strstr(hreq->path, "http://") == hreq->path)
225  {
226  yaz_log(YLOG_LOG, "Path in the method line is absolute, "
227  "possibly a proxy request");
228  path += hreq->path;
229  }
230  else
231  {
232  //TODO what about proto
233  const char *host = z_HTTP_header_lookup(hreq->headers, "Host");
234  if (!host)
235  return;
236 
237  path += "http://";
238  path += host;
239  path += hreq->path;
240  }
241 
242  std::list<Content>::const_iterator cit = content_list.begin();
243  for (; cit != content_list.end(); cit++)
244  if (cit->type == "headers")
245  break;
246 
247  if (cit == content_list.end())
248  return;
249 
250  std::list<Within>::const_iterator it = cit->within_list.begin();
251  for (; it != cit->within_list.end(); it++)
252  if (it->reqline)
253  {
254  yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
255  std::list<boost::regex> dummy_skip_list; // no skips here!
256  if (it->exec(vars, path, true, dummy_skip_list))
257  {
258  yaz_log(YLOG_LOG, "Rewritten request URL is %s", path.c_str());
259  hreq->path = odr_strdup(o, path.c_str());
260  }
261  }
262 }
263 
264 void yf::HttpRewrite::Phase::rewrite_headers(mp::odr & o,
265  Z_HTTP_Header *headers,
266  std::map<std::string, std::string> & vars ) const
267 {
268  std::list<Content>::const_iterator cit = content_list.begin();
269  for (; cit != content_list.end(); cit++)
270  if (cit->type == "headers")
271  break;
272 
273  if (cit == content_list.end())
274  return;
275 
276  for (Z_HTTP_Header *header = headers; header; header = header->next)
277  {
278  std::list<Within>::const_iterator it = cit->within_list.begin();
279  for (; it != cit->within_list.end(); it++)
280  {
281  if (!it->header.empty() &&
282  regex_match(header->name, it->header))
283  {
284  // Match and replace only the header value
285  std::string hval(header->value);
286  std::list<boost::regex> dummy_skip_list; // no skips here!
287  if (it->exec(vars, hval, true, dummy_skip_list))
288  {
289  header->value = odr_strdup(o, hval.c_str());
290  }
291  }
292  }
293  }
294 }
295 
296 void yf::HttpRewrite::Phase::rewrite_body(
297  mp::odr &o,
298  const char *content_type,
299  char **content_buf,
300  int *content_len,
301  std::map<std::string, std::string> & vars,
302  std::list<boost::regex> & skip_list ) const
303 {
304  if (*content_len == 0)
305  return;
306  if (!content_type) {
307  yaz_log(YLOG_LOG, "rewrite_body: null content_type, can not rewrite");
308  return;
309  }
310  std::list<Content>::const_iterator cit = content_list.begin();
311  for (; cit != content_list.end(); cit++)
312  {
313  yaz_log(YLOG_LOG, "rewrite_body: content_type=%s type=%s",
314  content_type, cit->type.c_str());
315  if (cit->type != "headers"
316  && regex_match(content_type, cit->content_re))
317  break;
318  }
319  if (cit == content_list.end()) {
320  yaz_log(YLOG_LOG,"rewrite_body: No content rule matched %s, not rewriting",
321  content_type );
322  return;
323  }
324 
325  int i;
326  for (i = 0; i < *content_len; i++)
327  if ((*content_buf)[i] == 0) {
328  yaz_log(YLOG_LOG,"rewrite_body: Looks like binary stuff, not rewriting");
329  return; // binary content. skip
330  }
331 
332  std::string content(*content_buf, *content_len);
333  cit->parse(m_verbose, content, vars, skip_list);
334  *content_buf = odr_strdup(o, content.c_str());
335  *content_len = strlen(*content_buf);
336 }
337 
338 yf::HttpRewrite::Event::Event(const Content *p,
339  std::map<std::string, std::string> & vars,
340  std::list<boost::regex> & skip_list
341  ) : m_content(p), m_vars(vars), m_skips(skip_list)
342 {
343  m_w = wrbuf_alloc();
344 }
345 
346 yf::HttpRewrite::Event::~Event()
347 {
348  wrbuf_destroy(m_w);
349 }
350 
351 const char *yf::HttpRewrite::Event::result()
352 {
353  return wrbuf_cstr(m_w);
354 }
355 
356 void yf::HttpRewrite::Event::openTagStart(const char *tag, int tag_len)
357 {
358  wrbuf_putc(m_w, '<');
359  wrbuf_write(m_w, tag, tag_len);
360 
361  std::string t(tag, tag_len);
362  std::list<Within>::const_iterator it = m_content->within_list.begin();
363  for (; it != m_content->within_list.end(); it++)
364  {
365  if (!it->tag.empty() && regex_match(t, it->tag))
366  {
367  if (!it->attr.empty() && regex_match("#text", it->attr))
368  {
369  s_within.push(it);
370  return;
371  }
372  }
373  }
374 }
375 
376 void yf::HttpRewrite::Event::anyTagEnd(const char *tag, int tag_len,
377  int close_it)
378 {
379  if (close_it)
380  {
381  if (!s_within.empty())
382  {
383  std::list<Within>::const_iterator it = s_within.top();
384  std::string t(tag, tag_len);
385  if (regex_match(t, it->tag))
386  s_within.pop();
387  }
388  }
389  if (close_it)
390  wrbuf_putc(m_w, '/');
391  wrbuf_putc(m_w, '>');
392 }
393 
394 void yf::HttpRewrite::Event::attribute(const char *tag, int tag_len,
395  const char *attr, int attr_len,
396  const char *value, int val_len,
397  const char *sep)
398 {
399  std::list<Within>::const_iterator it = m_content->within_list.begin();
400  bool subst = false;
401 
402  for (; it != m_content->within_list.end(); it++)
403  {
404  std::string t(tag, tag_len);
405  if (it->tag.empty() || regex_match(t, it->tag))
406  {
407  std::string a(attr, attr_len);
408  if (!it->attr.empty() && regex_match(a, it->attr))
409  subst = true;
410  }
411  if (subst)
412  break;
413  }
414 
415  wrbuf_putc(m_w, ' ');
416  wrbuf_write(m_w, attr, attr_len);
417  if (value)
418  {
419  wrbuf_puts(m_w, "=");
420  wrbuf_puts(m_w, sep);
421 
422  std::string output;
423  if (subst)
424  {
425  std::string s(value, val_len);
426  it->exec(m_vars, s, true, m_skips);
427  wrbuf_puts(m_w, s.c_str());
428  }
429  else
430  wrbuf_write(m_w, value, val_len);
431  wrbuf_puts(m_w, sep);
432  }
433 }
434 
435 void yf::HttpRewrite::Event::closeTag(const char *tag, int tag_len)
436 {
437  if (!s_within.empty())
438  {
439  std::list<Within>::const_iterator it = s_within.top();
440  std::string t(tag, tag_len);
441  if (regex_match(t, it->tag))
442  s_within.pop();
443  }
444  wrbuf_puts(m_w, "</");
445  wrbuf_write(m_w, tag, tag_len);
446 }
447 
448 void yf::HttpRewrite::Event::text(const char *value, int len)
449 {
450  std::list<Within>::const_iterator it = m_content->within_list.end();
451  if (!s_within.empty())
452  it = s_within.top();
453  if (it != m_content->within_list.end())
454  {
455  std::string s(value, len);
456  it->exec(m_vars, s, false, m_skips);
457  wrbuf_puts(m_w, s.c_str());
458  }
459  else
460  wrbuf_write(m_w, value, len);
461 }
462 
464  std::string &content,
465  std::map<std::string, std::string> &vars,
466  mp::filter::HttpRewrite::RulePtr ruleptr,
467  bool html_context,
468  std::list<boost::regex> &skip_list)
469 {
470  bool replace = false;
471  std::string res;
472  const char *cp = content.c_str();
473  const char *cp0 = cp;
474  while (*cp)
475  {
476  if (html_context && !strncmp(cp, "&quot;", 6))
477  {
478  cp += 6;
479  res.append(cp0, cp - cp0);
480  cp0 = cp;
481  while (*cp)
482  {
483  if (!strncmp(cp, "&quot;", 6))
484  break;
485  if (*cp == '\n')
486  break;
487  cp++;
488  }
489  if (!*cp)
490  break;
491  std::string s(cp0, cp - cp0);
492  if (ruleptr->test_patterns(vars, s, true, skip_list))
493  replace = true;
494  cp0 = cp;
495  res.append(s);
496  }
497  else if (*cp == '"' || *cp == '\'')
498  {
499  int m = *cp;
500  cp++;
501  res.append(cp0, cp - cp0);
502  cp0 = cp;
503  while (*cp)
504  {
505  if (cp[-1] != '\\' && *cp == m)
506  break;
507  if (*cp == '\n')
508  break;
509  cp++;
510  }
511  if (!*cp)
512  break;
513  std::string s(cp0, cp - cp0);
514  if (ruleptr->test_patterns(vars, s, true, skip_list))
515  replace = true;
516  cp0 = cp;
517  res.append(s);
518  }
519  else if (*cp == '/' && cp[1] == '/')
520  {
521  while (cp[1] && cp[1] != '\n')
522  cp++;
523  }
524  cp++;
525  }
526  res.append(cp0, cp - cp0);
527  content = res;
528  return replace;
529 }
530 
531 bool yf::HttpRewrite::Within::exec(
532  std::map<std::string, std::string> & vars,
533  std::string & txt, bool anchor,
534  std::list<boost::regex> & skip_list) const
535 {
536  if (type == "quoted-literal")
537  {
538  return embed_quoted_literal(txt, vars, rule, true, skip_list);
539  }
540  else
541  {
542  return rule->test_patterns(vars, txt, anchor, skip_list);
543  }
544 }
545 
546 bool yf::HttpRewrite::Rule::test_patterns(
547  std::map<std::string, std::string> & vars,
548  std::string & txt, bool anchor,
549  std::list<boost::regex> & skip_list )
550 {
551  bool replaces = false;
552  bool first = anchor;
553  std::string out;
554  std::string::const_iterator start, end;
555  start = txt.begin();
556  end = txt.end();
557  while (1)
558  {
559  std::list<Replace>::iterator bit = replace_list.end();
560  boost::smatch bwhat;
561  bool match_one = false;
562  {
563  std::list<Replace>::iterator it = replace_list.begin();
564  for (; it != replace_list.end(); it++)
565  {
566  if (it->start_anchor && !first)
567  continue;
568  boost::smatch what;
569  if (regex_search(start, end, what, it->re))
570  {
571  if (!match_one || what[0].first < bwhat[0].first)
572  {
573  bwhat = what;
574  bit = it;
575  }
576  match_one = true;
577  }
578  }
579  if (!match_one)
580  break;
581  }
582  first = false;
583  replaces = true;
584  size_t i;
585  for (i = 1; i < bwhat.size(); ++i)
586  {
587  //check if the group is named
588  std::map<int, std::string>::const_iterator git
589  = bit->group_index.find(i);
590  if (git != bit->group_index.end())
591  { //it is
592  vars[git->second] = bwhat[i];
593  }
594 
595  }
596  // Compare against skip_list
597  bool skipthis = false;
598  std::list<boost::regex>::iterator si = skip_list.begin();
599  for ( ; si != skip_list.end(); si++) {
600  if ( boost::regex_search(bwhat.str(0), *si) )
601  {
602  skipthis = true;
603  break;
604  }
605  }
606  //prepare replacement string
607  std::string rvalue = bit->sub_vars(vars);
608  out.append(start, bwhat[0].first);
609  if ( skipthis )
610  {
611  yaz_log(YLOG_LOG,"! Not rewriting '%s', skiplist match",
612  bwhat.str(0).c_str() );
613  out.append(bwhat.str(0).c_str());
614  }
615  else
616  {
617  yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
618  bwhat.str(0).c_str(), rvalue.c_str());
619  out.append(rvalue);
620  }
621  start = bwhat[0].second; //move search forward
622  }
623  out.append(start, end);
624  txt = out;
625  return replaces;
626 }
627 
628 void yf::HttpRewrite::Replace::parse_groups(std::string pattern)
629 {
630  int gnum = 0;
631  bool esc = false;
632  const std::string &str = pattern;
633  std::string res;
634  start_anchor = str[0] == '^';
635  yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str());
636  for (size_t i = 0; i < str.size(); ++i)
637  {
638  res += str[i];
639  if (!esc && str[i] == '\\')
640  {
641  esc = true;
642  continue;
643  }
644  if (!esc && str[i] == '(') //group starts
645  {
646  gnum++;
647  if (i+1 < str.size() && str[i+1] == '?') //group with attrs
648  {
649  i++;
650  if (i+1 < str.size() && str[i+1] == ':') //non-capturing
651  {
652  if (gnum > 0) gnum--;
653  res += str[i];
654  i++;
655  res += str[i];
656  continue;
657  }
658  if (i+1 < str.size() && str[i+1] == 'P') //optional, python
659  i++;
660  if (i+1 < str.size() && str[i+1] == '<') //named
661  {
662  i++;
663  std::string gname;
664  bool term = false;
665  while (++i < str.size())
666  {
667  if (str[i] == '>') { term = true; break; }
668  if (!isalnum(str[i]))
669  throw mp::filter::FilterException
670  ("Only alphanumeric chars allowed, found "
671  " in '"
672  + str
673  + "' at "
674  + boost::lexical_cast<std::string>(i));
675  gname += str[i];
676  }
677  if (!term)
678  throw mp::filter::FilterException
679  ("Unterminated group name '" + gname
680  + " in '" + str +"'");
681  group_index[gnum] = gname;
682  yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
683  gname.c_str(), gnum);
684  }
685  }
686  }
687  esc = false;
688  }
689  re = res;
690 }
691 
692 std::string yf::HttpRewrite::Replace::sub_vars(
693  const std::map<std::string, std::string> & vars) const
694 {
695  std::string out;
696  bool esc = false;
697  const std::string & in = recipe;
698  for (size_t i = 0; i < in.size(); ++i)
699  {
700  if (!esc && in[i] == '\\')
701  {
702  esc = true;
703  continue;
704  }
705  if (!esc && in[i] == '$') //var
706  {
707  if (i+1 < in.size() && in[i+1] == '{') //ref prefix
708  {
709  ++i;
710  std::string name;
711  bool term = false;
712  while (++i < in.size())
713  {
714  if (in[i] == '}') { term = true; break; }
715  name += in[i];
716  }
717  if (!term) throw mp::filter::FilterException
718  ("Unterminated var ref in '"+in+"' at "
719  + boost::lexical_cast<std::string>(i));
720  std::map<std::string, std::string>::const_iterator it
721  = vars.find(name);
722  if (it != vars.end())
723  {
724  out += it->second;
725  }
726  }
727  else
728  {
729  throw mp::filter::FilterException
730  ("Malformed or trimmed var ref in '"
731  +in+"' at "+boost::lexical_cast<std::string>(i));
732  }
733  continue;
734  }
735  //passthru
736  out += in[i];
737  esc = false;
738  }
739  return out;
740 }
741 
742 yf::HttpRewrite::Phase::Phase() : m_verbose(0)
743 {
744 }
745 
746 void yf::HttpRewrite::Content::parse(
747  int verbose,
748  std::string &content,
749  std::map<std::string, std::string> &vars,
750  std::list<boost::regex> & skip_list ) const
751 {
752  if (type == "html")
753  {
754  HTMLParser parser;
755  Event ev(this, vars, skip_list);
756 
757  parser.set_verbose(verbose);
758 
759  parser.parse(ev, content.c_str());
760  content = ev.result();
761  }
762  if (type == "quoted-literal")
763  {
764  quoted_literal(content, vars, skip_list);
765  }
766 }
767 
768 void yf::HttpRewrite::Content::quoted_literal(
769  std::string &content,
770  std::map<std::string, std::string> &vars,
771  std::list<boost::regex> & skip_list ) const
772 {
773  std::list<Within>::const_iterator it = within_list.begin();
774  if (it != within_list.end())
775  embed_quoted_literal(content, vars, it->rule, false, skip_list);
776 }
777 
778 void yf::HttpRewrite::Content::configure(
779  const xmlNode *ptr, std::map<std::string, RulePtr > &rules)
780 {
781  for (; ptr; ptr = ptr->next)
782  {
783  if (ptr->type != XML_ELEMENT_NODE)
784  continue;
785  if (!strcmp((const char *) ptr->name, "within"))
786  {
787  static const char *names[7] =
788  { "header", "attr", "tag", "rule", "reqline", "type", 0 };
789  std::string values[6];
790  mp::xml::parse_attr(ptr, names, values);
791  Within w;
792  if (values[0].length() > 0)
793  w.header.assign(values[0], boost::regex_constants::icase);
794  if (values[1].length() > 0)
795  w.attr.assign(values[1], boost::regex_constants::icase);
796  if (values[2].length() > 0)
797  w.tag.assign(values[2], boost::regex_constants::icase);
798 
799  std::vector<std::string> rulenames;
800  boost::split(rulenames, values[3], boost::is_any_of(","));
801  if (rulenames.size() == 0)
802  {
803  throw mp::filter::FilterException
804  ("Empty rule in '" + values[3] +
805  "' in http_rewrite filter");
806  }
807  else if (rulenames.size() == 1)
808  {
809  std::map<std::string,RulePtr>::const_iterator it =
810  rules.find(rulenames[0]);
811  if (it == rules.end())
812  throw mp::filter::FilterException
813  ("Reference to non-existing rule '" + rulenames[0] +
814  "' in http_rewrite filter");
815  w.rule = it->second;
816 
817  }
818  else
819  {
820  RulePtr rule(new Rule);
821  size_t i;
822  for (i = 0; i < rulenames.size(); i++)
823  {
824  std::map<std::string,RulePtr>::const_iterator it =
825  rules.find(rulenames[i]);
826  if (it == rules.end())
827  throw mp::filter::FilterException
828  ("Reference to non-existing rule '" + rulenames[i] +
829  "' in http_rewrite filter");
830  RulePtr subRule = it->second;
831  std::list<Replace>::iterator rit =
832  subRule->replace_list.begin();
833  for (; rit != subRule->replace_list.end(); rit++)
834  rule->replace_list.push_back(*rit);
835  }
836  w.rule = rule;
837  }
838  w.reqline = values[4] == "1";
839  w.type = values[5];
840  if (w.type.empty() || w.type == "quoted-literal")
841  ;
842  else
843  throw mp::filter::FilterException
844  ("within type must be quoted-literal or none in "
845  " in http_rewrite filter");
846  within_list.push_back(w);
847  }
848  }
849 }
850 
851 void yf::HttpRewrite::configure_phase(const xmlNode *ptr, Phase &phase)
852 {
853  static const char *names[2] = { "verbose", 0 };
854  std::string values[1];
855  values[0] = "0";
856  mp::xml::parse_attr(ptr, names, values);
857 
858  phase.m_verbose = atoi(values[0].c_str());
859 
860  std::map<std::string, RulePtr > rules;
861  for (ptr = ptr->children; ptr; ptr = ptr->next)
862  {
863  if (ptr->type != XML_ELEMENT_NODE)
864  continue;
865  else if (!strcmp((const char *) ptr->name, "rule"))
866  {
867  static const char *names[2] = { "name", 0 };
868  std::string values[1];
869  values[0] = "default";
870  mp::xml::parse_attr(ptr, names, values);
871 
872  RulePtr rule(new Rule);
873  for (xmlNode *p = ptr->children; p; p = p->next)
874  {
875  if (p->type != XML_ELEMENT_NODE)
876  continue;
877  if (!strcmp((const char *) p->name, "rewrite"))
878  {
879  Replace replace;
880  std::string from;
881  const struct _xmlAttr *attr;
882  for (attr = p->properties; attr; attr = attr->next)
883  {
884  if (!strcmp((const char *) attr->name, "from"))
885  from = mp::xml::get_text(attr->children);
886  else if (!strcmp((const char *) attr->name, "to"))
887  replace.recipe = mp::xml::get_text(attr->children);
888  else
889  throw mp::filter::FilterException
890  ("Bad attribute "
891  + std::string((const char *) attr->name)
892  + " in rewrite section of http_rewrite");
893  }
894  yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
895  from.c_str(), replace.recipe.c_str());
896  if (!from.empty())
897  {
898  replace.parse_groups(from);
899  rule->replace_list.push_back(replace);
900  }
901  }
902  else
903  throw mp::filter::FilterException
904  ("Bad element "
905  + std::string((const char *) p->name)
906  + " in http_rewrite filter");
907  }
908  rules[values[0]] = rule;
909  }
910  else if (!strcmp((const char *) ptr->name, "content"))
911  {
912  static const char *names[3] =
913  { "type", "mime", 0 };
914  std::string values[2];
915  mp::xml::parse_attr(ptr, names, values);
916  if (values[0].empty())
917  {
918  throw mp::filter::FilterException
919  ("Missing attribute, type for for element "
920  + std::string((const char *) ptr->name)
921  + " in http_rewrite filter");
922  }
923  Content c;
924 
925  c.type = values[0];
926  if (!values[1].empty())
927  c.content_re.assign(values[1], boost::regex::icase);
928  c.configure(ptr->children, rules);
929  phase.content_list.push_back(c);
930  }
931  else
932  {
933  throw mp::filter::FilterException
934  ("Bad element "
935  + std::string((const char *) ptr->name)
936  + " in http_rewrite filter");
937  }
938  }
939 }
940 
941 void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
942  const char *path)
943 {
944  for (ptr = ptr->children; ptr; ptr = ptr->next)
945  {
946  if (ptr->type != XML_ELEMENT_NODE)
947  continue;
948  else if (!strcmp((const char *) ptr->name, "request"))
949  {
950  configure_phase(ptr, *req_phase);
951  }
952  else if (!strcmp((const char *) ptr->name, "response"))
953  {
954  configure_phase(ptr, *res_phase);
955  }
956  else
957  {
958  throw mp::filter::FilterException
959  ("Bad element "
960  + std::string((const char *) ptr->name)
961  + " in http_rewrite1 filter");
962  }
963  }
964 }
965 
966 static mp::filter::Base* filter_creator()
967 {
968  return new mp::filter::HttpRewrite;
969 }
970 
971 extern "C" {
972  struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
973  0,
974  "http_rewrite",
976  };
977 }
978 
979 
980 /*
981  * Local variables:
982  * c-basic-offset: 4
983  * c-file-style: "Stroustrup"
984  * indent-tabs-mode: nil
985  * End:
986  * vim: shiftwidth=4 tabstop=8 expandtab
987  */
988 
boost::shared_ptr< Rule > RulePtr
void configure(const xmlNode *ptr, bool test_only, const char *path)
struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite
std::map< std::string, std::string > & m_vars
void configure_phase(const xmlNode *ptr, Phase &phase)
Event(const Content *p, std::map< std::string, std::string > &vars, std::list< boost::regex > &skip_list)
static bool embed_quoted_literal(std::string &content, std::map< std::string, std::string > &vars, mp::filter::HttpRewrite::RulePtr ruleptr, bool html_context, std::list< boost::regex > &skip_list)
std::string sub_vars(const std::map< std::string, std::string > &vars) const
boost::scoped_ptr< Phase > req_phase
boost::scoped_ptr< Phase > res_phase
void parse_groups(std::string pattern)
std::stack< std::list< Within >::const_iterator > s_within
static mp::filter::Base * filter_creator()