pazpar2  1.6.30
session.c
Go to the documentation of this file.
1 /* This file is part of Pazpar2.
2  Copyright (C) 2006-2013 Index Data
3 
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <time.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_SYS_TIME_H
33 #include <sys/time.h>
34 #endif
35 #if HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38 #ifdef WIN32
39 #include <windows.h>
40 #endif
41 #include <signal.h>
42 #include <ctype.h>
43 #include <assert.h>
44 #include <math.h>
45 
46 #include <yaz/marcdisp.h>
47 #include <yaz/comstack.h>
48 #include <yaz/tcpip.h>
49 #include <yaz/proto.h>
50 #include <yaz/readconf.h>
51 #include <yaz/pquery.h>
52 #include <yaz/otherinfo.h>
53 #include <yaz/yaz-util.h>
54 #include <yaz/nmem.h>
55 #include <yaz/query-charset.h>
56 #include <yaz/querytowrbuf.h>
57 #include <yaz/oid_db.h>
58 #include <yaz/snprintf.h>
59 
60 #define USE_TIMING 1
61 #if USE_TIMING
62 #include <yaz/timing.h>
63 #endif
64 
65 #include "ppmutex.h"
66 #include "parameters.h"
67 #include "session.h"
68 #include "eventl.h"
69 #include "http.h"
70 #include "termlists.h"
71 #include "reclists.h"
72 #include "relevance.h"
73 #include "database.h"
74 #include "client.h"
75 #include "settings.h"
76 #include "normalize7bit.h"
77 
78 #include <libxml/tree.h>
79 
80 #define MAX_CHUNK 15
81 
82 #define MAX(a,b) ((a)>(b)?(a):(b))
83 
84 // Note: Some things in this structure will eventually move to configuration
86 {
87  0, // dump_records
88  0, // debug_mode
89  0, // predictable sessions
90 };
91 
92 struct client_list {
93  struct client *client;
94  struct client_list *next;
95 };
96 
97 /* session counting (1) , disable client counting (0) */
98 static YAZ_MUTEX g_session_mutex = 0;
99 static int no_sessions = 0;
100 static int no_session_total = 0;
101 
102 static int session_use(int delta)
103 {
104  int sessions;
105  if (!g_session_mutex)
106  yaz_mutex_create(&g_session_mutex);
107  yaz_mutex_enter(g_session_mutex);
108  no_sessions += delta;
109  if (delta > 0)
110  no_session_total += delta;
111  sessions = no_sessions;
112  yaz_mutex_leave(g_session_mutex);
113  yaz_log(YLOG_DEBUG, "%s sessions=%d", delta == 0 ? "" : (delta > 0 ? "INC" : "DEC"), no_sessions);
114  return sessions;
115 }
116 
117 int sessions_count(void)
118 {
119  return session_use(0);
120 }
121 
123 {
124  int total = 0;
125  if (!g_session_mutex)
126  return 0;
127  yaz_mutex_enter(g_session_mutex);
128  total = no_session_total;
129  yaz_mutex_leave(g_session_mutex);
130  return total;
131 }
132 
133 static void log_xml_doc(xmlDoc *doc)
134 {
135  FILE *lf = yaz_log_file();
136  xmlChar *result = 0;
137  int len = 0;
138 #if LIBXML_VERSION >= 20600
139  xmlDocDumpFormatMemory(doc, &result, &len, 1);
140 #else
141  xmlDocDumpMemory(doc, &result, &len);
142 #endif
143  if (lf && len)
144  {
145  (void) fwrite(result, 1, len, lf);
146  fprintf(lf, "\n");
147  }
148  xmlFree(result);
149 }
150 
151 static void session_enter(struct session *s, const char *caller)
152 {
153  if (caller)
154  session_log(s, YLOG_DEBUG, "Session lock by %s", caller);
155  yaz_mutex_enter(s->session_mutex);
156 }
157 
158 static void session_leave(struct session *s, const char *caller)
159 {
160  yaz_mutex_leave(s->session_mutex);
161  if (caller)
162  session_log(s, YLOG_DEBUG, "Session unlock by %s", caller);
163 }
164 
165 static void session_normalize_facet(struct session *s, const char *type,
166  const char *value,
167  WRBUF display_wrbuf,
168  WRBUF facet_wrbuf)
169 {
170  struct conf_service *service = s->service;
172  const char *facet_component;
173  int i;
174  const char *icu_chain_id = 0;
175 
176  for (i = 0; i < service->num_metadata; i++)
177  if (!strcmp((service->metadata + i)->name, type))
178  icu_chain_id = (service->metadata + i)->facetrule;
179  if (!icu_chain_id)
180  icu_chain_id = "facet";
181  prt = pp2_charset_token_create(service->charsets, icu_chain_id);
182  if (!prt)
183  {
184  session_log(s, YLOG_FATAL,
185  "Unknown ICU chain '%s' for facet of type '%s'",
186  icu_chain_id, type);
187  wrbuf_destroy(facet_wrbuf);
188  wrbuf_destroy(display_wrbuf);
189  return;
190  }
191  pp2_charset_token_first(prt, value, 0);
192  while ((facet_component = pp2_charset_token_next(prt)))
193  {
194  const char *display_component;
195  if (*facet_component)
196  {
197  if (wrbuf_len(facet_wrbuf))
198  wrbuf_puts(facet_wrbuf, " ");
199  wrbuf_puts(facet_wrbuf, facet_component);
200  }
201  display_component = pp2_get_display(prt);
202  if (display_component)
203  {
204  if (wrbuf_len(display_wrbuf))
205  wrbuf_puts(display_wrbuf, " ");
206  wrbuf_puts(display_wrbuf, display_component);
207  }
208  }
210 }
211 
212 void add_facet(struct session *s, const char *type, const char *value, int count)
213 {
214  WRBUF facet_wrbuf = wrbuf_alloc();
215  WRBUF display_wrbuf = wrbuf_alloc();
216 
217  session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf);
218 
219  if (wrbuf_len(facet_wrbuf))
220  {
221  int i;
222  for (i = 0; i < s->num_termlists; i++)
223  if (!strcmp(s->termlists[i].name, type))
224  break;
225  if (i == s->num_termlists)
226  {
227  if (i == SESSION_MAX_TERMLISTS)
228  {
229  session_log(s, YLOG_FATAL, "Too many termlists");
230  wrbuf_destroy(facet_wrbuf);
231  wrbuf_destroy(display_wrbuf);
232  return;
233  }
234 
235  s->termlists[i].name = nmem_strdup(s->nmem, type);
237  s->num_termlists = i + 1;
238  }
239 
240 #if 0
241  session_log(s, YLOG_LOG, "Facets for %s: %s norm:%s (%d)", type, value, wrbuf_cstr(facet_wrbuf), count);
242 #endif
243  termlist_insert(s->termlists[i].termlist, wrbuf_cstr(display_wrbuf),
244  wrbuf_cstr(facet_wrbuf), count);
245  }
246  wrbuf_destroy(facet_wrbuf);
247  wrbuf_destroy(display_wrbuf);
248 }
249 
250 static xmlDoc *record_to_xml(struct session *se,
251  struct session_database *sdb, const char *rec)
252 {
253  struct database *db = sdb->database;
254  xmlDoc *rdoc = 0;
255 
256  rdoc = xmlParseMemory(rec, strlen(rec));
257 
258  if (!rdoc)
259  {
260  session_log(se, YLOG_WARN, "Non-wellformed XML");
261  return 0;
262  }
263 
264  if (global_parameters.dump_records)
265  {
266  session_log(se, YLOG_LOG, "Un-normalized record from %s", db->id);
267  log_xml_doc(rdoc);
268  }
269 
270  return rdoc;
271 }
272 
273 #define MAX_XSLT_ARGS 16
274 
275 // Add static values from session database settings if applicable
277  struct conf_service *service,
278  char **parms,
279  NMEM nmem)
280 {
281  int i;
282  int nparms = 0;
283  int offset = 0;
284 
285  for (i = 0; i < service->num_metadata; i++)
286  {
287  struct conf_metadata *md = &service->metadata[i];
288  int setting;
289 
290  if (md->setting == Metadata_setting_parameter &&
291  (setting = settings_lookup_offset(service, md->name)) >= 0)
292  {
293  const char *val = session_setting_oneval(sdb, setting);
294  if (val && nparms < MAX_XSLT_ARGS)
295  {
296  char *buf;
297  int len = strlen(val);
298  buf = nmem_malloc(nmem, len + 3);
299  buf[0] = '\'';
300  strcpy(buf + 1, val);
301  buf[len+1] = '\'';
302  buf[len+2] = '\0';
303  parms[offset++] = md->name;
304  parms[offset++] = buf;
305  nparms++;
306  }
307  }
308  }
309  parms[offset] = 0;
310 }
311 
312 // Add static values from session database settings if applicable
313 static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
314  struct conf_service *service)
315 {
316  int i;
317 
318  for (i = 0; i < service->num_metadata; i++)
319  {
320  struct conf_metadata *md = &service->metadata[i];
321  int offset;
322 
323  if (md->setting == Metadata_setting_postproc &&
324  (offset = settings_lookup_offset(service, md->name)) >= 0)
325  {
326  const char *val = session_setting_oneval(sdb, offset);
327  if (val)
328  {
329  xmlNode *r = xmlDocGetRootElement(doc);
330  xmlNode *n = xmlNewTextChild(r, 0, (xmlChar *) "metadata",
331  (xmlChar *) val);
332  xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name);
333  }
334  }
335  }
336 }
337 
338 static xmlDoc *normalize_record(struct session *se,
339  struct session_database *sdb,
340  struct conf_service *service,
341  const char *rec, NMEM nmem)
342 {
343  xmlDoc *rdoc = record_to_xml(se, sdb, rec);
344 
345  if (rdoc)
346  {
347  char *parms[MAX_XSLT_ARGS*2+1];
348 
349  insert_settings_parameters(sdb, service, parms, nmem);
350 
351  if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms))
352  {
353  session_log(se, YLOG_WARN, "Normalize failed");
354  }
355  else
356  {
357  insert_settings_values(sdb, rdoc, service);
358 
359  if (global_parameters.dump_records)
360  {
361  session_log(se, YLOG_LOG, "Normalized record from %s",
362  sdb->database->id);
363  log_xml_doc(rdoc);
364  }
365  }
366  }
367  return rdoc;
368 }
369 
371  struct session_database *db,
372  WRBUF w)
373 {
374  if (db->settings)
375  {
376  int i, num = db->num_settings;
377  for (i = 0; i < num; i++)
378  {
379  struct setting *s = db->settings[i];
380  for (;s ; s = s->next)
381  {
382  wrbuf_puts(w, "<set name=\"");
383  wrbuf_xmlputs(w, s->name);
384  wrbuf_puts(w, "\" value=\"");
385  wrbuf_xmlputs(w, s->value);
386  wrbuf_puts(w, "\"/>");
387  }
388  if (db->settings[i])
389  wrbuf_puts(w, "\n");
390  }
391  }
392 }
393 
394 // Retrieve first defined value for 'name' for given database.
395 // Will be extended to take into account user associated with session
396 const char *session_setting_oneval(struct session_database *db, int offset)
397 {
398  if (offset >= db->num_settings || !db->settings[offset])
399  return "";
400  return db->settings[offset]->value;
401 }
402 
403 // Prepare XSLT stylesheets for record normalization
404 // Structures are allocated on the session_wide nmem to avoid having
405 // to recompute this for every search. This would lead
406 // to leaking if a single session was to repeatedly change the PZ_XSLT
407 // setting. However, this is not a realistic use scenario.
408 static int prepare_map(struct session *se, struct session_database *sdb)
409 {
410  if (sdb->settings && !sdb->map)
411  {
412  const char *s;
413 
414  if (sdb->settings[PZ_XSLT] &&
415  (s = session_setting_oneval(sdb, PZ_XSLT)))
416  {
417  char auto_stylesheet[256];
418 
419  if (!strcmp(s, "auto"))
420  {
421  const char *request_syntax = session_setting_oneval(
422  sdb, PZ_REQUESTSYNTAX);
423  if (request_syntax)
424  {
425  char *cp;
426  yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
427  "%s.xsl", request_syntax);
428  for (cp = auto_stylesheet; *cp; cp++)
429  {
430  /* deliberately only consider ASCII */
431  if (*cp > 32 && *cp < 127)
432  *cp = tolower(*cp);
433  }
434  s = auto_stylesheet;
435  }
436  else
437  {
438  session_log(se, YLOG_WARN,
439  "No pz:requestsyntax for auto stylesheet");
440  }
441  }
443  se->service, s);
444  if (!sdb->map)
445  return -1;
446  }
447  }
448  return 0;
449 }
450 
451 // called if watch should be removed because http_channel is to be destroyed
452 static void session_watch_cancel(void *data, struct http_channel *c,
453  void *data2)
454 {
455  struct session_watchentry *ent = data;
456 
457  ent->fun = 0;
458  ent->data = 0;
459  ent->obs = 0;
460 }
461 
462 // set watch. Returns 0=OK, -1 if watch is already set
463 int session_set_watch(struct session *s, int what,
464  session_watchfun fun, void *data,
465  struct http_channel *chan)
466 {
467  int ret;
468  session_enter(s, "session_set_watch");
469  if (s->watchlist[what].fun)
470  ret = -1;
471  else
472  {
473 
474  s->watchlist[what].fun = fun;
475  s->watchlist[what].data = data;
476  s->watchlist[what].obs = http_add_observer(chan, &s->watchlist[what],
478  ret = 0;
479  }
480  session_leave(s, "session_set_watch");
481  return ret;
482 }
483 
484 void session_alert_watch(struct session *s, int what)
485 {
486  assert(s);
487  session_enter(s, "session_alert_watch");
488  if (s->watchlist[what].fun)
489  {
490  /* our watch is no longer associated with http_channel */
491  void *data;
493 
495  fun = s->watchlist[what].fun;
496  data = s->watchlist[what].data;
497 
498  /* reset watch before fun is invoked - in case fun wants to set
499  it again */
500  s->watchlist[what].fun = 0;
501  s->watchlist[what].data = 0;
502  s->watchlist[what].obs = 0;
503 
504  session_leave(s, "session_alert_watch");
505  session_log(s, YLOG_DEBUG,
506  "Alert Watch: %d calling function: %p", what, fun);
507  fun(data);
508  }
509  else
510  session_leave(s,"session_alert_watch");
511 }
512 
513 //callback for grep_databases
514 static void select_targets_callback(struct session *se,
515  struct session_database *db)
516 {
517  struct client *cl;
518  struct client_list *l;
519 
520  for (l = se->clients_cached; l; l = l->next)
521  if (client_get_database(l->client) == db)
522  break;
523 
524  if (l)
525  cl = l->client;
526  else
527  {
528  cl = client_create(db->database->id);
529  client_set_database(cl, db);
530 
531  l = xmalloc(sizeof(*l));
532  l->client = cl;
533  l->next = se->clients_cached;
534  se->clients_cached = l;
535  }
536  /* set session always. If may be 0 if client is not active */
537  client_set_session(cl, se);
538 
539  l = xmalloc(sizeof(*l));
540  l->client = cl;
541  l->next = se->clients_active;
542  se->clients_active = l;
543 }
544 
545 static void session_reset_active_clients(struct session *se,
546  struct client_list *new_list)
547 {
548  struct client_list *l;
549 
550  session_enter(se, "session_reset_active_clients");
551  l = se->clients_active;
552  se->clients_active = new_list;
553  session_leave(se, "session_reset_active_clients");
554 
555  while (l)
556  {
557  struct client_list *l_next = l->next;
558 
559  client_lock(l->client);
560  client_set_session(l->client, 0); /* mark client inactive */
561  client_unlock(l->client);
562 
563  xfree(l);
564  l = l_next;
565  }
566 }
567 
568 static void session_remove_cached_clients(struct session *se)
569 {
570  struct client_list *l;
571 
573 
574  session_enter(se, "session_remove_cached_clients");
575  l = se->clients_cached;
576  se->clients_cached = 0;
577  session_leave(se, "session_remove_cached_clients");
578 
579  while (l)
580  {
581  struct client_list *l_next = l->next;
582  client_lock(l->client);
583  client_set_session(l->client, 0);
585  client_unlock(l->client);
587  xfree(l);
588  l = l_next;
589  }
590 }
591 
592 // Associates a set of clients with a session;
593 // Note: Session-databases represent databases with per-session
594 // setting overrides
595 static int select_targets(struct session *se, const char *filter)
596 {
598 }
599 
601 {
602  struct client_list *l;
603  int res = 0;
604 
605  for (l = s->clients_active; l; l = l->next)
606  if (client_is_active(l->client))
607  res++;
608 
609  return res;
610 }
611 
613 {
614  struct client_list *l;
615  int res = 0;
616 
617  for (l = s->clients_active; l; l = l->next)
619  res++;
620  session_log(s, YLOG_DEBUG, "Has %d active preferred clients.", res);
621  return res == 0;
622 }
623 
624 static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
625 {
627  if (nmem_total(se->nmem))
628  session_log(se, YLOG_DEBUG, "NMEN operation usage %zd",
629  nmem_total(se->nmem));
630  nmem_reset(se->nmem);
631  se->total_records = se->total_merged = 0;
632  se->num_termlists = 0;
633 
634  /* reset list of sorted results and clear to relevance search */
635  se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
636  se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
638  se->sorted_results->type = sp->type;
639  se->sorted_results->next = 0;
640 
641  session_log(se, YLOG_DEBUG, "clear_set session_sort: field=%s increasing=%d type=%d configured",
642  sp->name, sp->increasing, sp->type);
643 
644  se->reclist = reclist_create(se->nmem);
645 }
646 
647 static void session_sort_unlocked(struct session *se, struct reclist_sortparms *sp)
648 {
649  struct reclist_sortparms *sr;
650  struct client_list *l;
651  const char *field = sp->name;
652  int increasing = sp->increasing;
653  int type = sp->type;
654  int clients_research = 0;
655 
656  session_log(se, YLOG_DEBUG, "session_sort field=%s increasing=%d type=%d",
657  field, increasing, type);
658  /* see if we already have sorted for this criteria */
659  for (sr = se->sorted_results; sr; sr = sr->next)
660  {
661  if (!reclist_sortparms_cmp(sr, sp))
662  break;
663  }
664  if (sr)
665  {
666  session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d already fetched",
667  field, increasing, type);
668  return;
669  }
670  session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d must fetch",
671  field, increasing, type);
672 
673  // We need to reset reclist on every sort that changes the records, not just for position
674  // So if just one client requires new searching, we need to clear set.
675  // Ask each of the client if sorting requires re-search due to native sort
676  // If it does it will require us to
677  for (l = se->clients_active; l; l = l->next)
678  {
679  struct client *cl = l->client;
680  // Assume no re-search is required.
681  client_parse_init(cl, 1);
682  clients_research += client_parse_sort(cl, sp);
683  }
684  if (clients_research) {
685  session_log(se, YLOG_DEBUG,
686  "Reset results due to %d clients researching",
687  clients_research);
688  session_clear_set(se, sp);
689  }
690  else {
691  // A new sorting based on same record set
692  sr = nmem_malloc(se->nmem, sizeof(*sr));
693  sr->name = nmem_strdup(se->nmem, field);
694  sr->increasing = increasing;
695  sr->type = type;
696  sr->next = se->sorted_results;
697  se->sorted_results = sr;
698  session_log(se, YLOG_DEBUG, "No research/ingesting done");
699  return ;
700  }
701  session_log(se, YLOG_DEBUG, "Re- search/ingesting for clients due to change in sort order");
702 
703  for (l = se->clients_active; l; l = l->next)
704  {
705  struct client *cl = l->client;
706  if (client_get_state(cl) == Client_Connecting ||
707  client_get_state(cl) == Client_Idle ||
710  }
711  else {
712  session_log(se, YLOG_DEBUG,
713  "Client %s: No re-start/ingest in show. Wrong client state: %d",
715  }
716 
717  }
718 }
719 
720 void session_sort(struct session *se, struct reclist_sortparms *sp) {
721  //session_enter(se, "session_sort");
722  session_sort_unlocked(se, sp);
723  //session_leave(se, "session_sort");
724 }
725 
726 
728  const char *query,
729  const char *startrecs,
730  const char *maxrecs,
731  const char *filter,
732  const char *limit,
733  const char **addinfo,
734  struct reclist_sortparms *sp)
735 {
736  int live_channels = 0;
737  int no_working = 0;
738  int no_failed_query = 0;
739  int no_failed_limit = 0;
740  struct client_list *l, *l0;
741 
742  session_log(se, YLOG_DEBUG, "Search");
743 
744  *addinfo = 0;
745 
746  if (se->settings_modified) {
748  }
749  else
751 
752  session_enter(se, "session_search");
753  se->settings_modified = 0;
754 
755  session_clear_set(se, sp);
757 
758  live_channels = select_targets(se, filter);
759  if (!live_channels)
760  {
761  session_leave(se, "session_search");
762  return PAZPAR2_NO_TARGETS;
763  }
764 
766  se->facet_limits = facet_limits_create(limit);
767  if (!se->facet_limits)
768  {
769  *addinfo = "limit";
770  session_leave(se, "session_search");
772  }
773 
774  l0 = se->clients_active;
775  se->clients_active = 0;
776  session_leave(se, "session_search");
777 
778  for (l = l0; l; l = l->next)
779  {
780  int parse_ret;
781  struct client *cl = l->client;
782  client_parse_init(cl, 1);
783  if (prepare_map(se, client_get_database(cl)) < 0)
784  continue;
785 
786  parse_ret = client_parse_query(cl, query, se->facet_limits);
787  if (parse_ret == -1)
788  no_failed_query++;
789  else if (parse_ret == -2)
790  no_failed_limit++;
791  else if (parse_ret < 0)
792  no_working++; /* other error, such as bad CCL map */
793  else
794  {
795  client_parse_range(cl, startrecs, maxrecs);
796  client_parse_sort(cl, sp);
798  no_working++;
799  }
800  }
802 
803  if (no_working == 0)
804  {
805  if (no_failed_query > 0)
806  {
807  *addinfo = "query";
809  }
810  else if (no_failed_limit > 0)
811  {
812  *addinfo = "limit";
814  }
815  else
816  return PAZPAR2_NO_TARGETS;
817  }
818  session_log(se, YLOG_LOG, "session_start_search done");
819  return PAZPAR2_NO_ERROR;
820 }
821 
822 // Creates a new session_database object for a database
823 static void session_init_databases_fun(void *context, struct database *db)
824 {
825  struct session *se = (struct session *) context;
826  struct session_database *new = nmem_malloc(se->session_nmem, sizeof(*new));
827  int i;
828 
829  new->database = db;
830 
831  new->map = 0;
832  assert(db->settings);
833  new->settings = nmem_malloc(se->session_nmem,
834  sizeof(struct settings *) * db->num_settings);
835  new->num_settings = db->num_settings;
836  for (i = 0; i < db->num_settings; i++)
837  {
838  struct setting *setting = db->settings[i];
839  new->settings[i] = setting;
840  }
841  new->next = se->databases;
842  se->databases = new;
843 }
844 
845 // Doesn't free memory associated with sdb -- nmem takes care of that
847 {
848  sdb->map = 0;
849 }
850 
851 // Initialize session_database list -- this represents this session's view
852 // of the database list -- subject to modification by the settings ws command
854 {
855  se->databases = 0;
857 }
858 
859 // Probably session_init_databases_fun should be refactored instead of
860 // called here.
862  char *id)
863 {
865  session_init_databases_fun((void*) se, db);
866 
867  // New sdb is head of se->databases list
868  return se->databases;
869 }
870 
871 // Find an existing session database. If not found, load it
873  char *id)
874 {
875  struct session_database *sdb;
876 
877  for (sdb = se->databases; sdb; sdb = sdb->next)
878  if (!strcmp(sdb->database->id, id))
879  return sdb;
880  return load_session_database(se, id);
881 }
882 
883 // Apply a session override to a database
884 void session_apply_setting(struct session *se, char *dbname, char *setting,
885  char *value)
886 {
887  struct session_database *sdb = find_session_database(se, dbname);
888  struct conf_service *service = se->service;
889  struct setting *new = nmem_malloc(se->session_nmem, sizeof(*new));
890  int offset = settings_create_offset(service, setting);
891 
892  expand_settings_array(&sdb->settings, &sdb->num_settings, offset,
893  se->session_nmem);
894  new->precedence = 0;
895  new->target = dbname;
896  new->name = setting;
897  new->value = value;
898  new->next = sdb->settings[offset];
899  sdb->settings[offset] = new;
900 
901  se->settings_modified = 1;
902 
903  // Force later recompute of settings-driven data structures
904  // (happens when a search starts and client connections are prepared)
905  switch (offset)
906  {
907  case PZ_XSLT:
908  if (sdb->map)
909  {
910  sdb->map = 0;
911  }
912  break;
913  }
914 }
915 
916 void session_destroy(struct session *se)
917 {
918  struct session_database *sdb;
919  session_log(se, YLOG_DEBUG, "Destroying");
920  session_use(-1);
922 
923  for (sdb = se->databases; sdb; sdb = sdb->next)
928  if (nmem_total(se->nmem))
929  session_log(se, YLOG_DEBUG, "NMEN operation usage %zd", nmem_total(se->nmem));
930  if (nmem_total(se->session_nmem))
931  session_log(se, YLOG_DEBUG, "NMEN session usage %zd", nmem_total(se->session_nmem));
933  nmem_destroy(se->nmem);
935  yaz_mutex_destroy(&se->session_mutex);
936 }
937 
939  size_t session_nmem;
940  if (session == 0)
941  return 0;
942  session_enter(session, "session_get_memory_status");
943  session_nmem = nmem_total(session->nmem);
944  session_leave(session, "session_get_memory_status");
945  return session_nmem;
946 }
947 
948 
950  unsigned session_id)
951 {
952  int i;
953  struct session *session = nmem_malloc(nmem, sizeof(*session));
954 
955  char tmp_str[50];
956 
957  sprintf(tmp_str, "session#%u", session_id);
958 
959  session->session_id = session_id;
960  session_log(session, YLOG_DEBUG, "New");
961  session->service = service;
962  session->relevance = 0;
963  session->total_records = 0;
966  session->num_termlists = 0;
967  session->reclist = reclist_create(nmem);
968  session->clients_active = 0;
969  session->clients_cached = 0;
970  session->settings_modified = 0;
971  session->session_nmem = nmem;
972  session->nmem = nmem_create();
973  session->databases = 0;
974  session->sorted_results = 0;
975  session->facet_limits = 0;
976 
977  for (i = 0; i <= SESSION_WATCH_MAX; i++)
978  {
979  session->watchlist[i].data = 0;
980  session->watchlist[i].fun = 0;
981  }
983  session->session_mutex = 0;
984  pazpar2_mutex_create(&session->session_mutex, tmp_str);
985  session_use(1);
986  return session;
987 }
988 
989 const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf);
990 
991 static struct hitsbytarget *hitsbytarget_nb(struct session *se,
992  int *count, NMEM nmem)
993 {
994  struct hitsbytarget *res = 0;
995  struct client_list *l;
996  size_t sz = 0;
997 
998  for (l = se->clients_active; l; l = l->next)
999  sz++;
1000 
1001  res = nmem_malloc(nmem, sizeof(*res) * sz);
1002  *count = 0;
1003  for (l = se->clients_active; l; l = l->next)
1004  {
1005  struct client *cl = l->client;
1006  WRBUF w = wrbuf_alloc();
1008  PZ_NAME);
1009 
1010  res[*count].id = client_get_id(cl);
1011  res[*count].name = *name ? name : "Unknown";
1012  res[*count].hits = client_get_hits(cl);
1013  res[*count].approximation = client_get_approximation(cl);
1014  res[*count].records = client_get_num_records(cl);
1015  res[*count].filtered = client_get_num_records_filtered(cl);
1016  res[*count].diagnostic =
1017  client_get_diagnostic(cl, &res[*count].message,
1018  &res[*count].addinfo);
1019  res[*count].state = client_get_state_str(cl);
1020  res[*count].connected = client_get_connection(cl) ? 1 : 0;
1022  res[*count].settings_xml = nmem_strdup(nmem, wrbuf_cstr(w));
1023  wrbuf_rewind(w);
1024  wrbuf_puts(w, "");
1025  res[*count].suggestions_xml = nmem_strdup(nmem, client_get_suggestions_xml(cl, w));
1026  wrbuf_destroy(w);
1027  (*count)++;
1028  }
1029  return res;
1030 }
1031 
1032 struct hitsbytarget *get_hitsbytarget(struct session *se, int *count, NMEM nmem)
1033 {
1034  struct hitsbytarget *p;
1035  session_enter(se, "get_hitsbytarget");
1036  p = hitsbytarget_nb(se, count, nmem);
1037  session_leave(se, "get_hitsbytarget");
1038  return p;
1039 }
1040 
1041 // Compares two hitsbytarget nodes by hitcount
1042 static int cmp_ht(const void *p1, const void *p2)
1043 {
1044  const struct hitsbytarget *h1 = p1;
1045  const struct hitsbytarget *h2 = p2;
1046  return h2->hits - h1->hits;
1047 }
1048 
1049 // Compares two hitsbytarget nodes by hitcount
1050 static int cmp_ht_approx(const void *p1, const void *p2)
1051 {
1052  const struct hitsbytarget *h1 = p1;
1053  const struct hitsbytarget *h2 = p2;
1054  return h2->approximation - h1->approximation;
1055 }
1056 
1057 static int targets_termlist_nb(WRBUF wrbuf, struct session *se, int num,
1058  NMEM nmem, int version)
1059 {
1060  struct hitsbytarget *ht;
1061  int count, i;
1062 
1063  ht = hitsbytarget_nb(se, &count, nmem);
1064  if (version >= 2)
1065  qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht_approx);
1066  else
1067  qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht);
1068  for (i = 0; i < count && i < num && ht[i].hits > 0; i++)
1069  {
1070 
1071  // do only print terms which have display names
1072 
1073  wrbuf_puts(wrbuf, "<term>\n");
1074 
1075  wrbuf_puts(wrbuf, "<id>");
1076  wrbuf_xmlputs(wrbuf, ht[i].id);
1077  wrbuf_puts(wrbuf, "</id>\n");
1078 
1079  wrbuf_puts(wrbuf, "<name>");
1080  if (!ht[i].name || !ht[i].name[0])
1081  wrbuf_xmlputs(wrbuf, "NO TARGET NAME");
1082  else
1083  wrbuf_xmlputs(wrbuf, ht[i].name);
1084  wrbuf_puts(wrbuf, "</name>\n");
1085 
1086  wrbuf_printf(wrbuf, "<frequency>" ODR_INT_PRINTF "</frequency>\n",
1087  ht[i].hits);
1088 
1089  if (version >= 2) {
1090  // Should not print if we know it isn't a approximation.
1091  wrbuf_printf(wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", ht[i].approximation);
1092  wrbuf_printf(wrbuf, "<records>%d</records>\n", ht[i].records - ht[i].filtered);
1093  wrbuf_printf(wrbuf, "<filtered>%d</filtered>\n", ht[i].filtered);
1094  }
1095 
1096  wrbuf_puts(wrbuf, "<state>");
1097  wrbuf_xmlputs(wrbuf, ht[i].state);
1098  wrbuf_puts(wrbuf, "</state>\n");
1099 
1100  wrbuf_printf(wrbuf, "<diagnostic>%d</diagnostic>\n",
1101  ht[i].diagnostic);
1102  wrbuf_puts(wrbuf, "</term>\n");
1103  }
1104  return count;
1105 }
1106 
1107 void perform_termlist(struct http_channel *c, struct session *se,
1108  const char *name, int num, int version)
1109 {
1110  int i, j;
1111  NMEM nmem_tmp = nmem_create();
1112  char **names;
1113  int num_names = 0;
1114 
1115  if (!name)
1116  name = "*";
1117 
1118  nmem_strsplit(nmem_tmp, ",", name, &names, &num_names);
1119 
1120  session_enter(se, "perform_termlist");
1121 
1122  for (j = 0; j < num_names; j++)
1123  {
1124  const char *tname;
1125  int must_generate_empty = 1; /* bug 5350 */
1126 
1127  for (i = 0; i < se->num_termlists; i++)
1128  {
1129  tname = se->termlists[i].name;
1130  if (!strcmp(names[j], tname) || !strcmp(names[j], "*"))
1131  {
1132  struct termlist_score **p = 0;
1133  int len;
1134 
1135  wrbuf_puts(c->wrbuf, "<list name=\"");
1136  wrbuf_xmlputs(c->wrbuf, tname);
1137  wrbuf_puts(c->wrbuf, "\">\n");
1138  must_generate_empty = 0;
1139 
1140  p = termlist_highscore(se->termlists[i].termlist, &len,
1141  nmem_tmp);
1142  if (p)
1143  {
1144  int i;
1145  for (i = 0; i < len && i < num; i++)
1146  {
1147  // prevent sending empty term elements
1148  if (!p[i]->display_term || !p[i]->display_term[0])
1149  continue;
1150 
1151  wrbuf_puts(c->wrbuf, "<term>");
1152  wrbuf_puts(c->wrbuf, "<name>");
1153  wrbuf_xmlputs(c->wrbuf, p[i]->display_term);
1154  wrbuf_puts(c->wrbuf, "</name>");
1155 
1156  wrbuf_printf(c->wrbuf,
1157  "<frequency>%d</frequency>",
1158  p[i]->frequency);
1159  wrbuf_puts(c->wrbuf, "</term>\n");
1160  }
1161  }
1162  wrbuf_puts(c->wrbuf, "</list>\n");
1163  }
1164  }
1165  tname = "xtargets";
1166  if (!strcmp(names[j], tname) || !strcmp(names[j], "*"))
1167  {
1168  wrbuf_puts(c->wrbuf, "<list name=\"");
1169  wrbuf_xmlputs(c->wrbuf, tname);
1170  wrbuf_puts(c->wrbuf, "\">\n");
1171 
1172  targets_termlist_nb(c->wrbuf, se, num, c->nmem, version);
1173  wrbuf_puts(c->wrbuf, "</list>\n");
1174  must_generate_empty = 0;
1175  }
1176  if (must_generate_empty)
1177  {
1178  wrbuf_puts(c->wrbuf, "<list name=\"");
1179  wrbuf_xmlputs(c->wrbuf, names[j]);
1180  wrbuf_puts(c->wrbuf, "\"/>\n");
1181  }
1182  }
1183  session_leave(se, "perform_termlist");
1184  nmem_destroy(nmem_tmp);
1185 }
1186 
1187 #ifdef MISSING_HEADERS
1188 void report_nmem_stats(void)
1189 {
1190  size_t in_use, is_free;
1191 
1192  nmem_get_memory_in_use(&in_use);
1193  nmem_get_memory_free(&is_free);
1194 
1195  yaz_log(YLOG_LOG, "nmem stat: use=%ld free=%ld",
1196  (long) in_use, (long) is_free);
1197 }
1198 #endif
1199 
1200 struct record_cluster *show_single_start(struct session *se, const char *id,
1201  struct record_cluster **prev_r,
1202  struct record_cluster **next_r)
1203 {
1204  struct record_cluster *r = 0;
1205 
1206  session_enter(se, "show_single_start");
1207  *prev_r = 0;
1208  *next_r = 0;
1209  reclist_limit(se->reclist, se);
1210 
1211  reclist_enter(se->reclist);
1212  while ((r = reclist_read_record(se->reclist)))
1213  {
1214  if (!strcmp(r->recid, id))
1215  {
1216  *next_r = reclist_read_record(se->reclist);
1217  break;
1218  }
1219  *prev_r = r;
1220  }
1221  reclist_leave(se->reclist);
1222  if (!r)
1223  session_leave(se, "show_single_start");
1224  return r;
1225 }
1226 
1227 void show_single_stop(struct session *se, struct record_cluster *rec)
1228 {
1229  session_leave(se, "show_single_stop");
1230 }
1231 
1232 
1234 {
1235  struct client_list *l;
1236  int ret = 0;
1237 
1238  for (l = se->clients_active; l; l = l->next)
1239  {
1240  struct client *cl = l->client;
1241  if (client_get_state(cl) == Client_Idle)
1242  {
1243  if (client_fetch_more(cl))
1244  {
1245  session_log(se, YLOG_LOG, "%s: more to fetch",
1246  client_get_id(cl));
1247  ret = 1;
1248  }
1249  else
1250  {
1251  session_log(se, YLOG_LOG, "%s: no more to fetch",
1252  client_get_id(cl));
1253  }
1254  }
1255  else
1256  {
1257  session_log(se, YLOG_LOG, "%s: no fetch due to state=%s",
1259  }
1260 
1261  }
1262  return ret;
1263 }
1264 
1266  struct reclist_sortparms *sp,
1267  int start, int *num, int *total,
1268  Odr_int *sumhits, Odr_int *approx_hits,
1269  void (*show_records_ready)(void *data),
1270  struct http_channel *chan)
1271 {
1272  struct record_cluster **recs = 0;
1273  struct reclist_sortparms *spp;
1274  struct client_list *l;
1275  int i;
1276 #if USE_TIMING
1277  yaz_timing_t t = yaz_timing_create();
1278 #endif
1279  session_enter(se, "show_range_start");
1280  *sumhits = 0;
1281  *approx_hits = 0;
1282  *total = 0;
1283  reclist_limit(se->reclist, se);
1284  if (se->relevance)
1285  {
1286  for (spp = sp; spp; spp = spp->next)
1287  if (spp->type == Metadata_sortkey_relevance)
1288  {
1290  break;
1291  }
1292  for (l = se->clients_active; l; l = l->next) {
1293  *sumhits += client_get_hits(l->client);
1294  *approx_hits += client_get_approximation(l->client);
1295  }
1296  }
1297  reclist_sort(se->reclist, sp);
1298 
1299  reclist_enter(se->reclist);
1300  *total = reclist_get_num_records(se->reclist);
1301 
1302  for (l = se->clients_active; l; l = l->next)
1304 
1305  for (i = 0; i < start; i++)
1306  {
1307  struct record_cluster *r = reclist_read_record(se->reclist);
1308  if (!r)
1309  {
1310  *num = 0;
1311  break;
1312  }
1313  else
1314  {
1315  struct record *rec = r->records;
1316  for (;rec; rec = rec->next)
1318  }
1319  }
1320  recs = nmem_malloc(se->nmem, (*num > 0 ? *num : 1) * sizeof(*recs));
1321  for (i = 0; i < *num; i++)
1322  {
1323  struct record_cluster *r = reclist_read_record(se->reclist);
1324  if (!r)
1325  {
1326  *num = i;
1327  break;
1328  }
1329  else
1330  {
1331  struct record *rec = r->records;
1332  for (;rec; rec = rec->next)
1334  recs[i] = r;
1335  }
1336  }
1337  reclist_leave(se->reclist);
1338 #if USE_TIMING
1339  yaz_timing_stop(t);
1340  session_log(se, YLOG_LOG, "show %6.5f %3.2f %3.2f",
1341  yaz_timing_get_real(t), yaz_timing_get_user(t),
1342  yaz_timing_get_sys(t));
1343  yaz_timing_destroy(&t);
1344 #endif
1345 
1346  if (!session_fetch_more(se))
1347  session_log(se, YLOG_LOG, "can not fetch more");
1348  else
1349  {
1350  show_range_stop(se, recs);
1351  session_log(se, YLOG_LOG, "fetching more in progress");
1353  show_records_ready, chan, chan))
1354  {
1355  session_log(se, YLOG_WARN, "Ignoring show block");
1356  session_enter(se, "show_range_start");
1357  }
1358  else
1359  {
1360  session_log(se, YLOG_LOG, "session watch OK");
1361  return 0;
1362  }
1363  }
1364  return recs;
1365 }
1366 
1367 void show_range_stop(struct session *se, struct record_cluster **recs)
1368 {
1369  session_leave(se, "show_range_stop");
1370 }
1371 
1372 void statistics(struct session *se, struct statistics *stat)
1373 {
1374  struct client_list *l;
1375  int count = 0;
1376 
1377  memset(stat, 0, sizeof(*stat));
1378  stat->num_hits = 0;
1379  for (l = se->clients_active; l; l = l->next)
1380  {
1381  struct client *cl = l->client;
1382  if (!client_get_connection(cl))
1383  stat->num_no_connection++;
1384  stat->num_hits += client_get_hits(cl);
1385  switch (client_get_state(cl))
1386  {
1387  case Client_Connecting: stat->num_connecting++; break;
1388  case Client_Working: stat->num_working++; break;
1389  case Client_Idle: stat->num_idle++; break;
1390  case Client_Failed: stat->num_failed++; break;
1391  case Client_Error: stat->num_error++; break;
1392  default: break;
1393  }
1394  count++;
1395  }
1396  stat->num_records = se->total_records;
1397 
1398  stat->num_clients = count;
1399 }
1400 
1402  NMEM nmem, const char *value, enum conf_metadata_type type,
1403  struct _xmlAttr *attr)
1404 {
1405  struct record_metadata *rec_md = record_metadata_create(nmem);
1406  struct record_metadata_attr **attrp = &rec_md->attributes;
1407 
1408  for (; attr; attr = attr->next)
1409  {
1410  if (attr->children && attr->children->content)
1411  {
1412  if (strcmp((const char *) attr->name, "type")
1413  && strcmp((const char *) attr->name, "empty"))
1414  { /* skip the "type" + "empty" attribute..
1415  The "Type" is already part of the element in output
1416  (md-%s) and so repeating it here is redundant */
1417  *attrp = nmem_malloc(nmem, sizeof(**attrp));
1418  (*attrp)->name =
1419  nmem_strdup(nmem, (const char *) attr->name);
1420  (*attrp)->value =
1421  nmem_strdup(nmem, (const char *) attr->children->content);
1422  attrp = &(*attrp)->next;
1423  }
1424  }
1425  }
1426  *attrp = 0;
1427 
1428  if (type == Metadata_type_generic)
1429  {
1430  char *p = nmem_strdup(nmem, value);
1431 
1432  p = normalize7bit_generic(p, " ,/.:([");
1433 
1434  rec_md->data.text.disp = p;
1435  rec_md->data.text.sort = 0;
1436  }
1437  else if (type == Metadata_type_year || type == Metadata_type_date)
1438  {
1439  int first, last;
1440  int longdate = 0;
1441 
1442  if (type == Metadata_type_date)
1443  longdate = 1;
1444  if (extract7bit_dates((char *) value, &first, &last, longdate) < 0)
1445  return 0;
1446 
1447  rec_md->data.number.min = first;
1448  rec_md->data.number.max = last;
1449  }
1450  else
1451  return 0;
1452  return rec_md;
1453 }
1454 
1455 static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
1456  struct conf_service *service, WRBUF norm_wr)
1457 {
1458  xmlNode *n;
1459  int no_found = 0;
1460  for (n = root->children; n; n = n->next)
1461  {
1462  if (n->type != XML_ELEMENT_NODE)
1463  continue;
1464  if (!strcmp((const char *) n->name, "metadata"))
1465  {
1466  xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
1467  if (type == NULL) {
1468  yaz_log(YLOG_FATAL, "Missing type attribute on metadata element. Skipping!");
1469  }
1470  else if (!strcmp(name, (const char *) type))
1471  {
1472  xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
1473  if (value)
1474  {
1475  const char *norm_str;
1476  pp2_charset_token_t prt =
1477  pp2_charset_token_create(service->charsets, "mergekey");
1478 
1479  pp2_charset_token_first(prt, (const char *) value, 0);
1480  if (wrbuf_len(norm_wr) > 0)
1481  wrbuf_puts(norm_wr, " ");
1482  wrbuf_puts(norm_wr, name);
1483  while ((norm_str =
1484  pp2_charset_token_next(prt)))
1485  {
1486  if (*norm_str)
1487  {
1488  wrbuf_puts(norm_wr, " ");
1489  wrbuf_puts(norm_wr, norm_str);
1490  }
1491  }
1492  xmlFree(value);
1494  no_found++;
1495  }
1496  }
1497  xmlFree(type);
1498  }
1499  }
1500  return no_found;
1501 }
1502 
1503 static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
1504  struct conf_service *service, NMEM nmem)
1505 {
1506  char *mergekey_norm = 0;
1507  xmlNode *root = xmlDocGetRootElement(doc);
1508  WRBUF norm_wr = wrbuf_alloc();
1509 
1510  /* consider mergekey from XSL first */
1511  xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey");
1512  if (mergekey)
1513  {
1514  const char *norm_str;
1515  pp2_charset_token_t prt =
1516  pp2_charset_token_create(service->charsets, "mergekey");
1517 
1518  pp2_charset_token_first(prt, (const char *) mergekey, 0);
1519  while ((norm_str = pp2_charset_token_next(prt)))
1520  {
1521  if (*norm_str)
1522  {
1523  if (wrbuf_len(norm_wr))
1524  wrbuf_puts(norm_wr, " ");
1525  wrbuf_puts(norm_wr, norm_str);
1526  }
1527  }
1529  xmlFree(mergekey);
1530  }
1531  else
1532  {
1533  /* no mergekey defined in XSL. Look for mergekey metadata instead */
1534  int field_id;
1535  for (field_id = 0; field_id < service->num_metadata; field_id++)
1536  {
1537  struct conf_metadata *ser_md = &service->metadata[field_id];
1538  if (ser_md->mergekey != Metadata_mergekey_no)
1539  {
1540  int r = get_mergekey_from_doc(doc, root, ser_md->name,
1541  service, norm_wr);
1542  if (r == 0 && ser_md->mergekey == Metadata_mergekey_required)
1543  {
1544  /* no mergekey on this one and it is required..
1545  Generate unique key instead */
1546  wrbuf_rewind(norm_wr);
1547  break;
1548  }
1549  }
1550  }
1551  }
1552 
1553  /* generate unique key if none is not generated already or is empty */
1554  if (wrbuf_len(norm_wr) == 0)
1555  {
1556  wrbuf_printf(norm_wr, "position: %s-%d",
1557  client_get_id(cl), record_no);
1558  }
1559  else
1560  {
1561  const char *lead = "content: ";
1562  wrbuf_insert(norm_wr, 0, lead, strlen(lead));
1563  }
1564  if (wrbuf_len(norm_wr) > 0)
1565  mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr));
1566  wrbuf_destroy(norm_wr);
1567  return mergekey_norm;
1568 }
1569 
1580 static int check_record_filter(xmlNode *root, struct session_database *sdb)
1581 {
1582  int match = 0;
1583  xmlNode *n;
1584  const char *s;
1586 
1587  if (!s || !*s)
1588  return 1;
1589 
1590  for (n = root->children; n; n = n->next)
1591  {
1592  if (n->type != XML_ELEMENT_NODE)
1593  continue;
1594  if (!strcmp((const char *) n->name, "metadata"))
1595  {
1596  xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
1597  if (type)
1598  {
1599  size_t len;
1600  int substring;
1601  const char *eq;
1602 
1603  if ((eq = strchr(s, '=')))
1604  substring = 0;
1605  else if ((eq = strchr(s, '~')))
1606  substring = 1;
1607  if (eq)
1608  len = eq - s;
1609  else
1610  len = strlen(s);
1611  if (len == strlen((const char *)type) &&
1612  !memcmp((const char *) type, s, len))
1613  {
1614  xmlChar *value = xmlNodeGetContent(n);
1615  if (value && *value)
1616  {
1617  if (!eq ||
1618  (substring && strstr((const char *) value, eq+1)) ||
1619  (!substring && !strcmp((const char *) value, eq + 1)))
1620  match = 1;
1621  }
1622  xmlFree(value);
1623  }
1624  xmlFree(type);
1625  }
1626  }
1627  }
1628  return match;
1629 }
1630 
1631 
1632 static int ingest_to_cluster(struct client *cl,
1633  xmlDoc *xdoc,
1634  xmlNode *root,
1635  int record_no,
1636  const char *mergekey_norm);
1637 
1647 int ingest_record(struct client *cl, const char *rec,
1648  int record_no, NMEM nmem)
1649 {
1650  struct session *se = client_get_session(cl);
1651  int ret = 0;
1652  struct session_database *sdb = client_get_database(cl);
1653  struct conf_service *service = se->service;
1654  xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem);
1655  xmlNode *root;
1656  const char *mergekey_norm;
1657 
1658  if (!xdoc)
1659  return -1;
1660 
1661  root = xmlDocGetRootElement(xdoc);
1662 
1663  if (!check_record_filter(root, sdb))
1664  {
1665  session_log(se, YLOG_LOG, "Filtered out record no %d from %s", record_no, sdb->database->id);
1666  xmlFreeDoc(xdoc);
1667  return -2;
1668  }
1669 
1670  mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem);
1671  if (!mergekey_norm)
1672  {
1673  session_log(se, YLOG_WARN, "Got no mergekey");
1674  xmlFreeDoc(xdoc);
1675  return -1;
1676  }
1677  session_enter(se, "ingest_record");
1678  if (client_get_session(cl) == se && se->relevance)
1679  ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm);
1680  session_leave(se, "ingest_record");
1681 
1682  xmlFreeDoc(xdoc);
1683  return ret;
1684 }
1685 
1686 // struct conf_metadata *ser_md = &service->metadata[md_field_id];
1687 // struct record_metadata *rec_md = record->metadata[md_field_id];
1688 static int match_metadata_local(struct conf_metadata *ser_md,
1689  struct record_metadata *rec_md0,
1690  char **values, int num_v)
1691 {
1692  int i;
1693  struct record_metadata *rec_md = rec_md0;
1694  for (i = 0; i < num_v; )
1695  {
1696  if (rec_md)
1697  {
1698  if (ser_md->type == Metadata_type_year
1699  || ser_md->type == Metadata_type_date)
1700  {
1701  int y = atoi(values[i]);
1702  if (y >= rec_md->data.number.min
1703  && y <= rec_md->data.number.max)
1704  break;
1705  }
1706  else
1707  {
1708  yaz_log(YLOG_DEBUG, "cmp: '%s' '%s'", rec_md->data.text.disp, values[i]);
1709  if (!strcmp(rec_md->data.text.disp, values[i]))
1710  {
1711  // Value equals, should not be filtered.
1712  break;
1713  }
1714  }
1715  rec_md = rec_md->next;
1716  }
1717  else
1718  {
1719  rec_md = rec_md0;
1720  i++;
1721  }
1722  }
1723  return i < num_v ? 1 : 0;
1724 }
1725 
1727 {
1728  int i;
1729  struct conf_service *service = se->service;
1730  int ret = 1;
1731  const char *name;
1732  const char *value;
1733  NMEM nmem_tmp = nmem_create();
1734 
1735  for (i = 0; (name = facet_limits_get(se->facet_limits, i, &value)); i++)
1736  {
1737  int j;
1738  for (j = 0; j < service->num_metadata; j++)
1739  {
1740  struct conf_metadata *md = service->metadata + j;
1741  if (!strcmp(md->name, name) && md->limitcluster)
1742  {
1743  char **values = 0;
1744  int num = 0;
1745  int md_field_id =
1747  md->limitcluster);
1748 
1749  if (md_field_id < 0)
1750  {
1751  ret = 0;
1752  break;
1753  }
1754 
1755  nmem_strsplit_escape2(nmem_tmp, "|", value, &values,
1756  &num, 1, '\\', 1);
1757 
1758  if (!match_metadata_local(&service->metadata[md_field_id],
1759  rec->metadata[md_field_id],
1760  values, num))
1761  {
1762  ret = 0;
1763  break;
1764  }
1765  }
1766  }
1767  }
1768  nmem_destroy(nmem_tmp);
1769  return ret;
1770 }
1771 
1772 // Skip record on non-zero
1773 static int check_limit_local(struct client *cl,
1774  struct record *record,
1775  int record_no)
1776 {
1777  int skip_record = 0;
1778  struct session *se = client_get_session(cl);
1779  struct conf_service *service = se->service;
1780  NMEM nmem_tmp = nmem_create();
1781  struct session_database *sdb = client_get_database(cl);
1782  int l = 0;
1783  while (!skip_record)
1784  {
1785  int md_field_id;
1786  char **values = 0;
1787  int num_v = 0;
1788  const char *name =
1789  client_get_facet_limit_local(cl, sdb, &l, nmem_tmp,
1790  &num_v, &values);
1791  if (!name)
1792  break;
1793 
1794  if (!strcmp(name, "*"))
1795  {
1796  for (md_field_id = 0; md_field_id < service->num_metadata;
1797  md_field_id++)
1798  {
1800  &service->metadata[md_field_id],
1801  record->metadata[md_field_id],
1802  values, num_v))
1803  break;
1804  }
1805  if (md_field_id == service->num_metadata)
1806  skip_record = 1;
1807  }
1808  else
1809  {
1810  md_field_id = conf_service_metadata_field_id(service, name);
1811  if (md_field_id < 0)
1812  {
1813  skip_record = 1;
1814  break;
1815  }
1816  if (!match_metadata_local(
1817  &service->metadata[md_field_id],
1818  record->metadata[md_field_id],
1819  values, num_v))
1820  {
1821  skip_record = 1;
1822  }
1823  }
1824  }
1825  nmem_destroy(nmem_tmp);
1826  return skip_record;
1827 }
1828 
1829 static int ingest_to_cluster(struct client *cl,
1830  xmlDoc *xdoc,
1831  xmlNode *root,
1832  int record_no,
1833  const char *mergekey_norm)
1834 {
1835  xmlNode *n;
1836  xmlChar *type = 0;
1837  xmlChar *value = 0;
1838  struct session *se = client_get_session(cl);
1839  struct conf_service *service = se->service;
1840  int term_factor = 1;
1841  struct record_cluster *cluster;
1842  struct record_metadata **metadata0;
1843  struct session_database *sdb = client_get_database(cl);
1844  struct record *record = record_create(se->nmem,
1845  service->num_metadata,
1846  service->num_sortkeys, cl,
1847  record_no);
1848 
1849  for (n = root->children; n; n = n->next)
1850  {
1851  if (type)
1852  xmlFree(type);
1853  if (value)
1854  xmlFree(value);
1855  type = value = 0;
1856 
1857  if (n->type != XML_ELEMENT_NODE)
1858  continue;
1859  if (!strcmp((const char *) n->name, "metadata"))
1860  {
1861  struct conf_metadata *ser_md = 0;
1862  struct record_metadata **wheretoput = 0;
1863  struct record_metadata *rec_md = 0;
1864  int md_field_id = -1;
1865 
1866  type = xmlGetProp(n, (xmlChar *) "type");
1867  value = xmlNodeListGetString(xdoc, n->children, 1);
1868  if (!type)
1869  continue;
1870  if (!value || !*value)
1871  {
1872  xmlChar *empty = xmlGetProp(n, (xmlChar *) "empty");
1873  if (!empty)
1874  continue;
1875  if (value)
1876  xmlFree(value);
1877  value = empty;
1878  }
1879  md_field_id
1880  = conf_service_metadata_field_id(service, (const char *) type);
1881  if (md_field_id < 0)
1882  {
1884  {
1885  session_log(se, YLOG_WARN,
1886  "Ignoring unknown metadata element: %s", type);
1887  }
1889  continue;
1890  }
1891 
1892  ser_md = &service->metadata[md_field_id];
1893 
1894  // non-merged metadata
1895  rec_md = record_metadata_init(se->nmem, (const char *) value,
1896  ser_md->type, n->properties);
1897  if (!rec_md)
1898  {
1899  session_log(se, YLOG_WARN, "bad metadata data '%s' "
1900  "for element '%s'", value, type);
1901  continue;
1902  }
1903  wheretoput = &record->metadata[md_field_id];
1904  while (*wheretoput)
1905  wheretoput = &(*wheretoput)->next;
1906  *wheretoput = rec_md;
1907  }
1908  }
1909 
1910  if (check_limit_local(cl, record, record_no))
1911  {
1912  session_log(se, YLOG_LOG, "Facet filtered out record no %d from %s",
1913  record_no, sdb->database->id);
1914  if (type)
1915  xmlFree(type);
1916  if (value)
1917  xmlFree(value);
1918  return -2;
1919  }
1920  cluster = reclist_insert(se->reclist, service, record,
1921  mergekey_norm, &se->total_merged);
1922  if (!cluster)
1923  return 0; // complete match with existing record
1924 
1925  {
1926  const char *use_term_factor_str =
1928  if (use_term_factor_str && use_term_factor_str[0] == '1')
1929  {
1930  int maxrecs = client_get_maxrecs(cl);
1931  int hits = (int) client_get_hits(cl);
1932  term_factor = MAX(hits, maxrecs) / MAX(1, maxrecs);
1933  assert(term_factor >= 1);
1934  session_log(se, YLOG_DEBUG, "Using term factor: %d (%d / %d)",
1935  term_factor, MAX(hits, maxrecs), MAX(1, maxrecs));
1936  }
1937  }
1938 
1939  if (global_parameters.dump_records)
1940  session_log(se, YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
1941  sdb->database->id, record_no);
1942 
1943 
1944  relevance_newrec(se->relevance, cluster);
1945 
1946  // original metadata, to check if first existence of a field
1947  metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata);
1948  memcpy(metadata0, cluster->metadata,
1949  sizeof(*metadata0) * service->num_metadata);
1950 
1951  // now parsing XML record and adding data to cluster or record metadata
1952  for (n = root->children; n; n = n->next)
1953  {
1954  pp2_charset_token_t prt;
1955  if (type)
1956  xmlFree(type);
1957  if (value)
1958  xmlFree(value);
1959  type = value = 0;
1960 
1961  if (n->type != XML_ELEMENT_NODE)
1962  continue;
1963  if (!strcmp((const char *) n->name, "metadata"))
1964  {
1965  struct conf_metadata *ser_md = 0;
1966  struct conf_sortkey *ser_sk = 0;
1967  struct record_metadata **wheretoput = 0;
1968  struct record_metadata *rec_md = 0;
1969  int md_field_id = -1;
1970  int sk_field_id = -1;
1971  const char *rank;
1972  xmlChar *xml_rank;
1973 
1974  type = xmlGetProp(n, (xmlChar *) "type");
1975  value = xmlNodeListGetString(xdoc, n->children, 1);
1976 
1977  if (!type || !value || !*value)
1978  continue;
1979 
1980  md_field_id
1981  = conf_service_metadata_field_id(service, (const char *) type);
1982  if (md_field_id < 0)
1983  continue;
1984 
1985  ser_md = &service->metadata[md_field_id];
1986 
1987  if (ser_md->sortkey_offset >= 0)
1988  {
1989  sk_field_id = ser_md->sortkey_offset;
1990  ser_sk = &service->sortkeys[sk_field_id];
1991  }
1992 
1993  // merged metadata
1994  rec_md = record_metadata_init(se->nmem, (const char *) value,
1995  ser_md->type, 0);
1996 
1997  // see if the field was not in cluster already (from beginning)
1998 
1999  if (!rec_md)
2000  continue;
2001 
2002  xml_rank = xmlGetProp(n, (xmlChar *) "rank");
2003  rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
2004 
2005  wheretoput = &cluster->metadata[md_field_id];
2006 
2007  if (ser_md->merge == Metadata_merge_first)
2008  {
2009  if (!metadata0[md_field_id])
2010  {
2011  while (*wheretoput)
2012  wheretoput = &(*wheretoput)->next;
2013  *wheretoput = rec_md;
2014  }
2015  }
2016  else if (ser_md->merge == Metadata_merge_unique)
2017  {
2018  while (*wheretoput)
2019  {
2020  if (!strcmp((const char *) (*wheretoput)->data.text.disp,
2021  rec_md->data.text.disp))
2022  break;
2023  wheretoput = &(*wheretoput)->next;
2024  }
2025  if (!*wheretoput)
2026  *wheretoput = rec_md;
2027  }
2028  else if (ser_md->merge == Metadata_merge_longest)
2029  {
2030  if (!*wheretoput
2031  || strlen(rec_md->data.text.disp)
2032  > strlen((*wheretoput)->data.text.disp))
2033  {
2034  *wheretoput = rec_md;
2035  if (ser_sk)
2036  {
2037  const char *sort_str = 0;
2038  int skip_article =
2040 
2041  if (!cluster->sortkeys[sk_field_id])
2042  cluster->sortkeys[sk_field_id] =
2043  nmem_malloc(se->nmem,
2044  sizeof(union data_types));
2045 
2046  prt =
2047  pp2_charset_token_create(service->charsets, "sort");
2048 
2049  pp2_charset_token_first(prt, rec_md->data.text.disp,
2050  skip_article);
2051 
2053 
2054  sort_str = pp2_get_sort(prt);
2055 
2056  cluster->sortkeys[sk_field_id]->text.disp =
2057  rec_md->data.text.disp;
2058  if (!sort_str)
2059  {
2060  sort_str = rec_md->data.text.disp;
2061  session_log(se, YLOG_WARN,
2062  "Could not make sortkey. Bug #1858");
2063  }
2064  cluster->sortkeys[sk_field_id]->text.sort =
2065  nmem_strdup(se->nmem, sort_str);
2067  }
2068  }
2069  }
2070  else if (ser_md->merge == Metadata_merge_all)
2071  {
2072  while (*wheretoput)
2073  wheretoput = &(*wheretoput)->next;
2074  *wheretoput = rec_md;
2075  }
2076  else if (ser_md->merge == Metadata_merge_range)
2077  {
2078  if (!*wheretoput)
2079  {
2080  *wheretoput = rec_md;
2081  if (ser_sk)
2082  cluster->sortkeys[sk_field_id]
2083  = &rec_md->data;
2084  }
2085  else
2086  {
2087  int this_min = rec_md->data.number.min;
2088  int this_max = rec_md->data.number.max;
2089  if (this_min < (*wheretoput)->data.number.min)
2090  (*wheretoput)->data.number.min = this_min;
2091  if (this_max > (*wheretoput)->data.number.max)
2092  (*wheretoput)->data.number.max = this_max;
2093  }
2094  }
2095 
2096  // ranking of _all_ fields enabled ...
2097  if (rank)
2098  {
2099  relevance_countwords(se->relevance, cluster,
2100  (char *) value, rank, ser_md->name);
2101  }
2102 
2103  // construct facets ... unless the client already has reported them
2104  if (ser_md->termlist && !client_has_facet(cl, (char *) type))
2105  {
2106  if (ser_md->type == Metadata_type_year)
2107  {
2108  char year[64];
2109  sprintf(year, "%d", rec_md->data.number.max);
2110 
2111  add_facet(se, (char *) type, year, term_factor);
2112  if (rec_md->data.number.max != rec_md->data.number.min)
2113  {
2114  sprintf(year, "%d", rec_md->data.number.min);
2115  add_facet(se, (char *) type, year, term_factor);
2116  }
2117  }
2118  else
2119  add_facet(se, (char *) type, (char *) value, term_factor);
2120  }
2121 
2122  // cleaning up
2123  if (xml_rank)
2124  xmlFree(xml_rank);
2125  xmlFree(type);
2126  xmlFree(value);
2127  type = value = 0;
2128  }
2129  else
2130  {
2132  session_log(se, YLOG_WARN,
2133  "Unexpected element in internal record: %s", n->name);
2135  }
2136  }
2137  if (type)
2138  xmlFree(type);
2139  if (value)
2140  xmlFree(value);
2141 
2142  xfree(metadata0);
2143  relevance_donerecord(se->relevance, cluster);
2144  se->total_records++;
2145 
2146  return 0;
2147 }
2148 
2149 void session_log(struct session *s, int level, const char *fmt, ...)
2150 {
2151  char buf[1024];
2152  va_list ap;
2153  va_start(ap, fmt);
2154 
2155  yaz_vsnprintf(buf, sizeof(buf)-30, fmt, ap);
2156  yaz_log(level, "Session %u: %s", s ? s->session_id : 0, buf);
2157 
2158  va_end(ap);
2159 }
2160 
2161 /*
2162  * Local variables:
2163  * c-basic-offset: 4
2164  * c-file-style: "Stroustrup"
2165  * indent-tabs-mode: nil
2166  * End:
2167  * vim: shiftwidth=4 tabstop=8 expandtab
2168  */
2169