YAZ  5.23.1
url.c
Go to the documentation of this file.
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12 
13 #include <yaz/url.h>
14 #include <yaz/comstack.h>
15 #include <yaz/log.h>
16 #include <yaz/wrbuf.h>
17 #include <yaz/cookie.h>
18 #include <yaz/poll.h>
19 
20 struct yaz_url {
23  char *proxy;
26  int verbose;
30 };
31 
33 {
34  yaz_url_t p = xmalloc(sizeof(*p));
37  p->proxy = 0;
38  p->max_redirects = 10;
39  p->w_error = wrbuf_alloc();
40  p->verbose = 0;
41  p->timeout_sec = 30;
42  p->timeout_ns = 0;
44  return p;
45 }
46 
48 {
49  if (p)
50  {
51  odr_destroy(p->odr_in);
52  odr_destroy(p->odr_out);
53  xfree(p->proxy);
56  xfree(p);
57  }
58 }
59 
60 void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
61 {
62  xfree(p->proxy);
63  p->proxy = 0;
64  if (proxy && *proxy)
65  p->proxy = xstrdup(proxy);
66 }
67 
69 {
70  p->max_redirects = num;
71 }
72 
74 {
75  p->verbose = num;
76 }
77 
78 void yaz_url_set_timeout(yaz_url_t p, int sec, int ns)
79 {
80  p->timeout_sec = sec;
81  p->timeout_ns = ns;
82 }
83 
84 static void extract_user_pass(NMEM nmem,
85  const char *uri,
86  char **uri_lean, char **http_user,
87  char **http_pass)
88 {
89  const char *cp1 = strchr(uri, '/');
90  *uri_lean = 0;
91  *http_user = 0;
92  *http_pass = 0;
93  if (cp1 && cp1 > uri)
94  {
95  cp1--;
96 
97  if (!strncmp(cp1, "://", 3))
98  {
99  const char *cp3 = 0;
100  const char *cp2 = cp1 + 3;
101  while (*cp2 && *cp2 != '/' && *cp2 != '@')
102  {
103  if (*cp2 == ':')
104  cp3 = cp2;
105  cp2++;
106  }
107  if (*cp2 == '@' && cp3)
108  {
109  *uri_lean = nmem_malloc(nmem, strlen(uri) + 1);
110  memcpy(*uri_lean, uri, cp1 + 3 - uri);
111  strcpy(*uri_lean + (cp1 + 3 - uri), cp2 + 1);
112 
113  *http_user = nmem_strdupn(nmem, cp1 + 3, cp3 - (cp1 + 3));
114  *http_pass = nmem_strdupn(nmem, cp3 + 1, cp2 - (cp3 + 1));
115  }
116  }
117  }
118  if (*uri_lean == 0)
119  *uri_lean = nmem_strdup(nmem, uri);
120 }
121 
123 {
124  return wrbuf_cstr(p->w_error);
125 }
126 
127 static void log_warn(yaz_url_t p)
128 {
129  yaz_log(YLOG_WARN, "yaz_url: %s", wrbuf_cstr(p->w_error));
130 }
131 
133  const char *method,
134  Z_HTTP_Header *user_headers,
135  const char *buf, size_t len)
136 {
137  Z_HTTP_Response *res = 0;
138  int number_of_redirects = 0;
139 
140  odr_reset(p->odr_out);
142  wrbuf_rewind(p->w_error);
143  while (1)
144  {
145  void *add;
146  COMSTACK conn = 0;
147  int code;
148  const char *location = 0;
149  char *http_user = 0;
150  char *http_pass = 0;
151  char *uri_lean = 0;
152  int proxy_mode = 0;
153 
154  extract_user_pass(p->odr_out->mem, uri, &uri_lean,
155  &http_user, &http_pass);
156  conn = cs_create_host2(uri_lean, 0, &add, p->proxy, &proxy_mode);
157  if (!conn)
158  {
159  wrbuf_printf(p->w_error, "Can not resolve URL %s", uri);
160  log_warn(p);
161  }
162  else
163  {
164  int ret;
165  Z_GDU *gdu =
166  z_get_HTTP_Request_uri(p->odr_out, uri_lean, 0, proxy_mode);
167  gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method);
169  for ( ; user_headers; user_headers = user_headers->next)
170  {
171  /* prefer new Host over user-supplied Host */
172  if (!strcmp(user_headers->name, "Host"))
173  ;
174  /* prefer user-supplied User-Agent over YAZ' own */
175  else if (!strcmp(user_headers->name, "User-Agent"))
177  user_headers->name, user_headers->value);
178  else
180  user_headers->name, user_headers->value);
181  }
182  if (http_user && http_pass)
184  &gdu->u.HTTP_Request->headers,
185  http_user, http_pass);
186  res = 0;
187  if (buf && len)
188  {
189  gdu->u.HTTP_Request->content_buf = (char *) buf;
190  gdu->u.HTTP_Request->content_len = len;
191  }
192  if (!z_GDU(p->odr_out, &gdu, 0, 0))
193  {
194  wrbuf_printf(p->w_error, "Can not encode HTTP request for URL %s",
195  uri);
196  log_warn(p);
197  return 0;
198  }
199  ret = cs_connect(conn, add);
200  if (ret < 0) /* error */
201  {
202  wrbuf_printf(p->w_error, "Can not connect to URL %s", uri);
203  log_warn(p);
204  }
205  else
206  {
207  char *netbuffer = 0;
208  int netlen = 0;
209  int len_out;
210  char *buf_out = odr_getbuf(p->odr_out, &len_out, 0);
211  int state = 0; /* 0=connect phase, 1=send, 2=recv */
212 
213  if (p->verbose)
214  fwrite(buf_out, 1, len_out, stdout);
215  if (!strcmp(gdu->u.HTTP_Request->method, "HEAD"))
216  cs_set_head_only(conn, 1);
217  if (ret == 0)
218  state = 1; /* connect complete, so send phase */
219  while (1)
220  {
221  if (ret == 1) /* incomplete , wait */
222  {
223  struct yaz_poll_fd yp;
225  yaz_poll_add(input_mask, yaz_poll_except);
226  if (conn->io_pending & CS_WANT_WRITE)
227  yaz_poll_add(input_mask, yaz_poll_write);
228  if (conn->io_pending & CS_WANT_READ)
229  yaz_poll_add(input_mask, yaz_poll_read);
230  yp.fd = cs_fileno(conn);
231  yp.input_mask = input_mask;
232  ret = yaz_poll(&yp, 1, p->timeout_sec, p->timeout_ns);
233  if (ret == 0)
234  {
235  wrbuf_printf(p->w_error, "timeout URL %s", uri);
236  break;
237  }
238  else if (ret < 0)
239  {
240  wrbuf_printf(p->w_error, "poll error URL %s", uri);
241  break;
242  }
243  }
244  if (state == 0) /* connect phase */
245  {
246  ret = cs_rcvconnect(conn);
247  if (ret < 0)
248  {
249  wrbuf_printf(p->w_error, "cs_rcvconnect failed for URL %s", uri);
250  log_warn(p);
251  break;
252  }
253  else if (ret == 0)
254  state = 1;
255  }
256  else if (state == 1) /* write request phase */
257  {
258  ret = cs_put(conn, buf_out, len_out);
259  if (ret < 0)
260  {
261  wrbuf_printf(p->w_error, "cs_put fail for URL %s", uri);
262  log_warn(p);
263  }
264  else if (ret == 0)
265  {
266  state = 2;
267  }
268  }
269  else if (state == 2) /* read response phase */
270  {
271  ret = cs_get(conn, &netbuffer, &netlen);
272  if (ret <= 0)
273  {
274  wrbuf_printf(p->w_error, "cs_get failed for URL %s", uri);
275  log_warn(p);
276  break;
277  }
278  else if (ret > 1)
279  {
280  Z_GDU *gdu;
281  if (p->verbose)
282  fwrite(netbuffer, 1, ret, stdout);
283  odr_setbuf(p->odr_in, netbuffer, ret, 0);
284  if (!z_GDU(p->odr_in, &gdu, 0, 0)
285  || gdu->which != Z_GDU_HTTP_Response)
286  {
287  wrbuf_printf(p->w_error, "HTTP decoding fail for "
288  "URL %s", uri);
289  log_warn(p);
290  }
291  else
292  {
293  res = gdu->u.HTTP_Response;
294  break;
295  }
296  }
297  }
298  }
299  xfree(netbuffer);
300  }
301  cs_close(conn);
302  }
303  if (!res)
304  break;
305  code = res->code;
306  location = z_HTTP_header_lookup(res->headers, "Location");
307  if (++number_of_redirects <= p->max_redirects &&
308  location && (code == 301 || code == 302 || code == 307))
309  {
310  int host_change = 0;
311  const char *nlocation = yaz_check_location(p->odr_in, uri,
312  location, &host_change);
313 
314  odr_reset(p->odr_out);
315  uri = odr_strdup(p->odr_out, nlocation);
316  }
317  else
318  break;
319  yaz_cookies_response(p->cookies, res);
320  odr_reset(p->odr_in);
321  }
322  return res;
323 }
324 
325 /*
326  * Local variables:
327  * c-basic-offset: 4
328  * c-file-style: "Stroustrup"
329  * indent-tabs-mode: nil
330  * End:
331  * vim: shiftwidth=4 tabstop=8 expandtab
332  */
333 
char * value
Definition: zgdu.h:44
ODR odr_in
Definition: url.c:21
Z_HTTP_Header * headers
Definition: zgdu.h:60
const char * yaz_url_get_error(yaz_url_t p)
get last error from yaz_url_exec
Definition: url.c:122
union Z_GDU::@135 u
int code
Definition: zgdu.h:58
char * odr_strdup(ODR o, const char *str)
Definition: odr_mem.c:36
Header for WRBUF (growing buffer)
void z_HTTP_header_add_basic_auth(ODR o, Z_HTTP_Header **hp, const char *username, const char *password)
Definition: http.c:168
#define cs_close(handle)
Definition: comstack.h:99
#define ODR_ENCODE
Definition: odr.h:96
char * odr_getbuf(ODR o, int *len, int *size)
Definition: odr.c:277
const char * wrbuf_cstr(WRBUF b)
returns WRBUF content as C-string
Definition: wrbuf.c:281
WRBUF w_error
Definition: url.c:25
int timeout_ns
Definition: url.c:28
select/poll fd info
Definition: poll.h:50
yaz_cookies_t cookies
Definition: url.c:29
char * content_buf
Definition: zgdu.h:53
#define cs_get(handle, buf, size)
Definition: comstack.h:91
#define xstrdup(s)
utility macro which calls xstrdup_f
Definition: xmalloc.h:55
static void extract_user_pass(NMEM nmem, const char *uri, char **uri_lean, char **http_user, char **http_pass)
Definition: url.c:84
#define CS_WANT_WRITE
Definition: comstack.h:115
void * nmem_malloc(NMEM n, size_t size)
allocates memory block on NMEM handle
Definition: nmem.c:145
string buffer
Definition: wrbuf.h:42
void yaz_url_set_verbose(yaz_url_t p, int num)
sets verbose level 0=none, >0 verbose
Definition: url.c:73
#define yaz_poll_add(var, value)
Definition: poll.h:76
Poll, select wrappers.
int which
Definition: zgdu.h:69
int max_redirects
Definition: url.c:24
void wrbuf_rewind(WRBUF b)
empty WRBUF content (length of buffer set to 0)
Definition: wrbuf.c:47
int cs_set_head_only(COMSTACK cs, int head_only)
Definition: tcpip.c:1694
void odr_setbuf(ODR o, char *buf, int len, int can_grow)
Definition: odr.c:267
Z_HTTP_Response * yaz_url_exec(yaz_url_t p, const char *uri, const char *method, Z_HTTP_Header *user_headers, const char *buf, size_t len)
executes the actual HTTP request (including redirects, etc)
Definition: url.c:132
#define cs_put(handle, buf, size)
Definition: comstack.h:90
void odr_reset(ODR o)
Definition: odr.c:226
void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
sets proxy for URL fetcher
Definition: url.c:60
#define xfree(x)
utility macro which calls xfree_f
Definition: xmalloc.h:53
Definition: url.c:20
void wrbuf_printf(WRBUF b, const char *fmt,...)
writes printf result to WRBUF
Definition: wrbuf.c:178
Z_HTTP_Header * next
Definition: zgdu.h:45
void wrbuf_destroy(WRBUF b)
destroy WRBUF and its buffer
Definition: wrbuf.c:38
Header for COMSTACK.
void z_HTTP_header_add(ODR o, Z_HTTP_Header **hp, const char *n, const char *v)
Definition: http.c:189
char * nmem_strdup(NMEM mem, const char *src)
allocates string on NMEM handle (similar strdup)
Definition: nmemsdup.c:18
#define cs_connect(handle, address)
Definition: comstack.h:93
#define cs_rcvconnect(handle)
Definition: comstack.h:94
unsigned io_pending
Definition: comstack.h:63
NMEM mem
Definition: odr.h:130
void odr_destroy(ODR o)
Definition: odr.c:253
#define cs_fileno(handle)
Definition: comstack.h:104
Definition: odr.h:124
int content_len
Definition: zgdu.h:54
void yaz_url_destroy(yaz_url_t p)
destroys a URL fetcher
Definition: url.c:47
int verbose
Definition: url.c:26
#define CS_WANT_READ
Definition: comstack.h:114
int fd
Definition: poll.h:56
Z_HTTP_Response * HTTP_Response
Definition: zgdu.h:73
#define Z_GDU_HTTP_Response
Definition: zgdu.h:67
const char * z_HTTP_header_lookup(const Z_HTTP_Header *hp, const char *n)
Definition: http.c:233
#define xmalloc(x)
utility macro which calls malloc_f
Definition: xmalloc.h:49
enum yaz_poll_mask input_mask
Definition: poll.h:52
#define YLOG_WARN
log level: warning
Definition: log.h:46
static void log_warn(yaz_url_t p)
Definition: url.c:127
void yaz_url_set_timeout(yaz_url_t p, int sec, int ns)
sets I/O timeout
Definition: url.c:78
char * name
Definition: zgdu.h:43
void yaz_log(int level, const char *fmt,...)
Writes log message.
Definition: log.c:485
char * nmem_strdupn(NMEM mem, const char *src, size_t n)
allocates string of certain size on NMEM handle
Definition: nmemsdup.c:33
char * method
Definition: zgdu.h:49
char * proxy
Definition: url.c:23
const char * yaz_check_location(ODR odr, const char *uri, const char *location, int *host_change)
Definition: http.c:659
Definition: zgdu.h:68
ODR odr_createmem(int direction)
Definition: odr.c:200
Z_HTTP_Header * headers
Definition: zgdu.h:52
void yaz_url_set_max_redirects(yaz_url_t p, int num)
sets maximum number of redirects
Definition: url.c:68
int z_GDU(ODR o, Z_GDU **p, int opt, const char *name)
Definition: zgdu.c:17
Z_GDU * z_get_HTTP_Request_uri(ODR odr, const char *uri, const char *args, int use_full_uri)
Definition: http.c:291
Logging utility.
COMSTACK cs_create_host2(const char *vhost, int blocking, void **vp, const char *proxy_host, int *proxy_mode)
Definition: comstack.c:179
yaz_url_t yaz_url_create(void)
creates a URL fetcher handle
Definition: url.c:32
ODR odr_out
Definition: url.c:22
int timeout_sec
Definition: url.c:27
URL fetch utility.
WRBUF wrbuf_alloc(void)
construct WRBUF
Definition: wrbuf.c:25
Z_HTTP_Request * HTTP_Request
Definition: zgdu.h:72
#define ODR_DECODE
Definition: odr.h:95
int yaz_poll(struct yaz_poll_fd *fds, int num_fds, int sec, int nsec)
poll wrapper for poll or select
Definition: poll.c:161
void z_HTTP_header_set(ODR o, Z_HTTP_Header **hp, const char *n, const char *v)
Definition: http.c:200
yaz_poll_mask
select/poll masks .. timeout is "output" only
Definition: poll.h:41