pazpar2  1.14.1
relevance.c
Go to the documentation of this file.
1 /* This file is part of Pazpar2.
2  Copyright (C) Index Data
3 
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 
24 #include <assert.h>
25 #include <math.h>
26 #include <stdlib.h>
27 
28 #include "relevance.h"
29 #include "session.h"
30 #include "client.h"
31 #include "settings.h"
32 
33 #ifdef WIN32
34 #define log2(x) (log(x)/log(2))
35 #endif
36 
37 struct relevance
38 {
41  int *term_pos;
42  int vec_len;
46  double follow_factor;
47  double lead_decay;
49  NMEM nmem;
50  struct norm_client *norm;
51 };
52 
53 struct word_entry {
54  const char *norm_str;
55  const char *display_str;
56  int termno;
57  char *ccl_field;
58  struct word_entry *next;
59 };
60 
61 // Structure to keep data for norm_client scores from one client
63 {
64  int num; // number of the client
65  float max;
66  float min;
67  int count;
68  const char *native_score;
70  float a,b; // Rn = a*R + b
71  struct client *client;
72  struct norm_client *next;
74 };
75 
76 const int scorefield_none = -1; // Do not normalize anything, use tf/idf as is
77  // This is the old behavior, and the default
78 const int scorefield_internal = -2; // use our tf/idf, but normalize it
79 const int scorefield_position = -3; // fake a score based on the position
80 // Positive numbers indicate the field to be used for scoring.
81 
82 // A structure for each (sub)record. There is one list for each client
84 {
85  struct record *record;
86  float score;
88  struct norm_record *next;
89 };
90 
91 // Find the norm_client entry for this client, or create one if not there
92 struct norm_client *findnorm( struct relevance *rel, struct client* client)
93 {
94  struct norm_client *n = rel->norm;
95  struct session_database *sdb;
96  while (n) {
97  if (n->client == client )
98  return n;
99  n = n->next;
100  }
101  n = nmem_malloc(rel->nmem, sizeof(struct norm_client) );
102  if ( rel->norm )
103  n->num = rel->norm->num +1;
104  else
105  n->num = 1;
106  n->count = 0;
107  n->max = 0.0;
108  n->min = 0.0;
109  n->client = client;
110  n->next = rel->norm;
111  rel->norm = n;
114  n->records = 0;
116  yaz_log(YLOG_LOG,"Normalizing: Client %d uses '%s'", n->num, n->native_score );
117  if ( ! n->native_score || ! *n->native_score ) // not specified
119  else if ( strcmp(n->native_score,"position") == 0 )
121  else if ( strcmp(n->native_score,"internal") == 0 )
123  else
124  { // Get the field index for the score
125  struct session *se = client_get_session(client);
127  }
128  yaz_log(YLOG_LOG,"Normalizing: Client %d uses '%s' = %d",
129  n->num, n->native_score, n->scorefield );
130  return n;
131 }
132 
133 
134 // Add all records from a cluster into the list for that client, for normalizing later
135 static void setup_norm_record( struct relevance *rel, struct record_cluster *clust)
136 {
137  struct record *record;
138  for (record = clust->records; record; record = record->next)
139  {
140  struct norm_client *norm = findnorm(rel, record->client);
141  struct norm_record *rp;
142  if ( norm->scorefield == scorefield_none)
143  break; // not interested in normalizing this client
144  rp = nmem_malloc(rel->nmem, sizeof(struct norm_record) );
145  norm->count ++;
146  rp->next = norm->records;
147  norm->records = rp;
148  rp->clust = clust;
149  rp->record = record;
150  if ( norm->scorefield == scorefield_position )
151  rp->score = 1.0 / record->position;
152  else if ( norm->scorefield == scorefield_internal )
153  rp->score = clust->relevance_score; // the tf/idf for the whole cluster
154  // TODO - Get them for each record, merge later!
155  else
156  {
157  struct record_metadata *md = record->metadata[norm->scorefield];
158  rp->score = md->data.fnumber;
159  }
160  yaz_log(YLOG_LOG,"Got score for %d/%d : %f ",
161  norm->num, record->position, rp->score );
162  record -> score = rp->score;
163  if ( norm->count == 1 )
164  {
165  norm->max = rp->score;
166  norm->min = rp->score;
167  } else {
168  if ( rp->score > norm->max )
169  norm->max = rp->score;
170  if ( rp->score < norm->min )
171  norm->min = rp->score;
172  }
173  }
174 }
175 
176 // Calculate the squared sum of residuals, that is the difference from
177 // normalized values to the target curve, which is 1/n
178 static double squaresum( struct norm_record *rp, double a, double b)
179 {
180  double sum = 0.0;
181  for ( ; rp; rp = rp->next )
182  {
183  double target = 1.0 / rp->record->position;
184  double normscore = rp->score * a + b;
185  double diff = target - normscore;
186  sum += diff * diff;
187  }
188  return sum;
189 }
190 
191 // For each client, normalize scores
192 static void normalize_scores(struct relevance *rel)
193 {
194  const int maxiterations = 1000;
195  const double enough = 100.0; // sets the number of decimals we are happy with
196  const double stepchange = 0.5; // reduction of the step size when finding middle
197  // 0.5 sems to be magical, much better than 0.4 or 0.6
198  struct norm_client *norm;
199  for ( norm = rel->norm; norm; norm = norm->next )
200  {
201  yaz_log(YLOG_LOG,"Normalizing client %d: scorefield=%d count=%d range=%f %f = %f",
202  norm->num, norm->scorefield, norm->count, norm->min,
203  norm->max, norm->max-norm->min);
204  norm->a = 1.0; // default normalizing factors, no change
205  norm->b = 0.0;
206  if ( norm->scorefield != scorefield_none &&
208  { // have something to normalize
209  double range = norm->max - norm->min;
210  int it = 0;
211  double a,b; // params to optimize
212  double as,bs; // step sizes
213  double chi;
214  char *branch = "?";
215  // initial guesses for the parameters
216  // Rmax = a * rmax + b # want to be 1.0
217  // Rmin = a * rmin + b # want to be 0.0
218  // Rmax - Rmin = a ( rmax - rmin ) # subtracting equations
219  // 1.0 - 0.0 = a ( rmax - rmin )
220  // a = 1 / range
221  // Rmin = a * rmin + b
222  // b = Rmin - a * rmin
223  // = 0.0 - 1/range * rmin
224  // = - rmin / range
225 
226  if ( range < 1e-6 ) // practically zero
227  range = norm->max;
228  a = 1.0 / range;
229  b = -1.0 * norm->min / range;
230  // b = fabs(norm->min) / range;
231  as = a / 10;
232  bs = fabs(b) / 10;
233  chi = squaresum( norm->records, a,b);
234  yaz_log(YLOG_LOG,"Initial done: it=%d: a=%f / %f b=%f / %f chi = %f",
235  0, a, as, b, bs, chi );
236  while (it++ < maxiterations) // safeguard against things not converging
237  {
238  double aplus = squaresum(norm->records, a+as, b);
239  double aminus= squaresum(norm->records, a-as, b);
240  double bplus = squaresum(norm->records, a, b+bs);
241  double bminus= squaresum(norm->records, a, b-bs);
242  double prevchi = chi;
243  if ( aplus < chi && aplus < aminus && aplus < bplus && aplus < bminus)
244  {
245  a = a + as;
246  chi = aplus;
247  as = as * (1.0 + stepchange);
248  branch = "aplus ";
249  }
250  else if ( aminus < chi && aminus < aplus && aminus < bplus && aminus < bminus)
251  {
252  a = a - as;
253  chi = aminus;
254  as = as * (1.0 + stepchange);
255  branch = "aminus";
256  }
257  else if ( bplus < chi && bplus < aplus && bplus < aminus && bplus < bminus)
258  {
259  b = b + bs;
260  chi = bplus;
261  bs = bs * (1.0 + stepchange);
262  branch = "bplus ";
263  }
264  else if ( bminus < chi && bminus < aplus && bminus < bplus && bminus < aminus)
265  {
266  b = b - bs;
267  chi = bminus;
268  branch = "bminus";
269  bs = bs * (1.0+stepchange);
270  }
271  else
272  { // a,b is the best so far, adjust one step size
273  // which one? The one that has the greatest effect to chi
274  // That is, the average of plus and minus is further away from chi
275  double adif = 0.5 * ( aplus + aminus ) - prevchi;
276  double bdif = 0.5 * ( bplus + bminus ) - prevchi;
277  if ( fabs(adif) > fabs(bdif) )
278  {
279  as = as * ( 1.0 - stepchange);
280  branch = "step a";
281  }
282  else
283  {
284  bs = bs * ( 1.0 - stepchange);
285  branch = "step b";
286  }
287  }
288  yaz_log(YLOG_LOG,"Fitting %s it=%d: a=%g %g b=%g %g chi=%g ap=%g am=%g, bp=%g bm=%g p=%g",
289  branch, it, a, as, b, bs, chi,
290  aplus, aminus, bplus, bminus, prevchi );
291  norm->a = a;
292  norm->b = b;
293  if ( fabs(as) * enough < fabs(a) &&
294  fabs(bs) * enough < fabs(b) ) {
295  break; // not changing much any more
296 
297  }
298  }
299  yaz_log(YLOG_LOG,"Fitting done: it=%d: a=%g / %g b=%g / %g chi = %g",
300  it-1, a, as, b, bs, chi );
301  }
302 
303  if ( norm->scorefield != scorefield_none )
304  { // distribute the normalized scores to the records
305  struct norm_record *nr = norm->records;
306  for ( ; nr ; nr = nr->next ) {
307  double r = nr->score;
308  r = norm->a * r + norm -> b;
309  nr->clust->relevance_score = 10000 * r;
310  nr->record->score = r;
311  yaz_log(YLOG_LOG,"Normalized %f * %f + %f = %f",
312  nr->score, norm->a, norm->b, r );
313  // TODO - This keeps overwriting the cluster score in random order!
314  // Need to merge results better
315  }
316  }
317  } // client loop
318 }
319 
320 
321 static struct word_entry *word_entry_match(struct relevance *r,
322  const char *norm_str,
323  const char *rank, int *weight)
324 {
325  int i = 1;
326  struct word_entry *entries = r->entries;
327  for (; entries; entries = entries->next, i++)
328  {
329  if (*norm_str && !strcmp(norm_str, entries->norm_str))
330  {
331  const char *cp = 0;
332  int no_read = 0;
333  sscanf(rank, "%d%n", weight, &no_read);
334  rank += no_read;
335  while (*rank == ' ')
336  rank++;
337  if (no_read > 0 && (cp = strchr(rank, ' ')))
338  {
339  if ((cp - rank) == strlen(entries->ccl_field) &&
340  memcmp(entries->ccl_field, rank, cp - rank) == 0)
341  *weight = atoi(cp + 1);
342  }
343  return entries;
344  }
345  }
346  return 0;
347 }
348 
350  const char *words, const char *name,
351  WRBUF w_snippet)
352 {
353  int no = 0;
354  const char *norm_str;
355  int highlight = 0;
356 
357  pp2_charset_token_first(r->prt, words, 0);
358  while ((norm_str = pp2_charset_token_next(r->prt)))
359  {
360  size_t org_start, org_len;
361  struct word_entry *entries = r->entries;
362  int i;
363 
364  pp2_get_org(r->prt, &org_start, &org_len);
365  for (; entries; entries = entries->next, i++)
366  {
367  if (*norm_str && !strcmp(norm_str, entries->norm_str))
368  break;
369  }
370  if (entries)
371  {
372  if (!highlight)
373  {
374  highlight = 1;
375  wrbuf_puts(w_snippet, "<match>");
376  no++;
377  }
378  }
379  else
380  {
381  if (highlight)
382  {
383  highlight = 0;
384  wrbuf_puts(w_snippet, "</match>");
385  }
386  }
387  wrbuf_xmlputs_n(w_snippet, words + org_start, org_len);
388  }
389  if (highlight)
390  wrbuf_puts(w_snippet, "</match>");
391  if (no)
392  {
393  yaz_log(YLOG_DEBUG, "SNIPPET match: %s", wrbuf_cstr(w_snippet));
394  }
395  return no;
396 }
397 
398 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
399  const char *words, const char *rank,
400  const char *name)
401 {
402  int *w = r->term_frequency_vec_tmp;
403  const char *norm_str;
404  int i, length = 0;
405  double lead_decay = r->lead_decay;
406  struct word_entry *e;
407  WRBUF wr = cluster->relevance_explain1;
408  int printed_about_field = 0;
409 
410  pp2_charset_token_first(r->prt, words, 0);
411  for (e = r->entries, i = 1; i < r->vec_len; i++, e = e->next)
412  {
413  w[i] = 0;
414  r->term_pos[i] = 0;
415  }
416 
417  assert(rank);
418  while ((norm_str = pp2_charset_token_next(r->prt)))
419  {
420  int local_weight = 0;
421  e = word_entry_match(r, norm_str, rank, &local_weight);
422  if (e)
423  {
424  int res = e->termno;
425  int j;
426 
427  if (!printed_about_field)
428  {
429  printed_about_field = 1;
430  wrbuf_printf(wr, "field=%s content=", name);
431  if (strlen(words) > 50)
432  {
433  wrbuf_xmlputs_n(wr, words, 49);
434  wrbuf_puts(wr, " ...");
435  }
436  else
437  wrbuf_xmlputs(wr, words);
438  wrbuf_puts(wr, ";\n");
439  }
440  assert(res < r->vec_len);
441  w[res] += local_weight / (1 + log2(1 + lead_decay * length));
442  wrbuf_printf(wr, "%s: w[%d] += w(%d) / "
443  "(1+log2(1+lead_decay(%f) * length(%d)));\n",
444  e->display_str, res, local_weight, lead_decay, length);
445  j = res - 1;
446  if (j > 0 && r->term_pos[j])
447  {
448  int d = length + 1 - r->term_pos[j];
449  wrbuf_printf(wr, "%s: w[%d] += w[%d](%d) * follow(%f) / "
450  "(1+log2(d(%d));\n",
451  e->display_str, res, res, w[res],
452  r->follow_factor, d);
453  w[res] += w[res] * r->follow_factor / (1 + log2(d));
454  }
455  for (j = 0; j < r->vec_len; j++)
456  r->term_pos[j] = j < res ? 0 : length + 1;
457  }
458  length++;
459  }
460 
461  for (e = r->entries, i = 1; i < r->vec_len; i++, e = e->next)
462  {
463  if (length == 0 || w[i] == 0)
464  continue;
465  wrbuf_printf(wr, "%s: tf[%d] += w[%d](%d)", e->display_str, i, i, w[i]);
466  switch (r->length_divide)
467  {
468  case 0:
469  cluster->term_frequency_vecf[i] += (double) w[i];
470  break;
471  case 1:
472  wrbuf_printf(wr, " / log2(1+length(%d))", length);
473  cluster->term_frequency_vecf[i] +=
474  (double) w[i] / log2(1 + length);
475  break;
476  case 2:
477  wrbuf_printf(wr, " / length(%d)", length);
478  cluster->term_frequency_vecf[i] += (double) w[i] / length;
479  }
480  cluster->term_frequency_vec[i] += w[i];
481  wrbuf_printf(wr, " (%f);\n", cluster->term_frequency_vecf[i]);
482  }
483 
484  cluster->term_frequency_vec[0] += length;
485 }
486 
487 static void pull_terms(struct relevance *res, struct ccl_rpn_node *n)
488 {
489  char **words;
490  int numwords;
491  char *ccl_field;
492  int i;
493 
494  switch (n->kind)
495  {
496  case CCL_RPN_AND:
497  case CCL_RPN_OR:
498  case CCL_RPN_NOT:
499  case CCL_RPN_PROX:
500  pull_terms(res, n->u.p[0]);
501  pull_terms(res, n->u.p[1]);
502  break;
503  case CCL_RPN_TERM:
504  nmem_strsplit(res->nmem, " ", n->u.t.term, &words, &numwords);
505  for (i = 0; i < numwords; i++)
506  {
507  const char *norm_str;
508 
509  ccl_field = nmem_strdup_null(res->nmem, n->u.t.qual);
510 
511  pp2_charset_token_first(res->prt, words[i], 0);
512  while ((norm_str = pp2_charset_token_next(res->prt)))
513  {
514  struct word_entry **e = &res->entries;
515  while (*e)
516  e = &(*e)->next;
517  *e = nmem_malloc(res->nmem, sizeof(**e));
518  (*e)->norm_str = nmem_strdup(res->nmem, norm_str);
519  (*e)->ccl_field = ccl_field;
520  (*e)->termno = res->vec_len++;
521  (*e)->display_str = nmem_strdup(res->nmem, words[i]);
522  (*e)->next = 0;
523  }
524  }
525  break;
526  default:
527  break;
528  }
529 }
530 void relevance_clear(struct relevance *r)
531 {
532  if (r)
533  {
534  int i;
535  for (i = 0; i < r->vec_len; i++)
536  r->doc_frequency_vec[i] = 0;
537  }
538 }
539 
541  struct ccl_rpn_node *query,
542  int rank_cluster,
543  double follow_factor, double lead_decay,
544  int length_divide)
545 {
546  NMEM nmem = nmem_create();
547  struct relevance *res = nmem_malloc(nmem, sizeof(*res));
548 
549  res->nmem = nmem;
550  res->entries = 0;
551  res->vec_len = 1;
552  res->rank_cluster = rank_cluster;
554  res->lead_decay = lead_decay;
556  res->norm = 0;
557  res->prt = pp2_charset_token_create(pft, "relevance");
558 
559  pull_terms(res, query);
560 
561  res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
562 
563  // worker array
565  nmem_malloc(res->nmem,
566  res->vec_len * sizeof(*res->term_frequency_vec_tmp));
567 
568  res->term_pos =
569  nmem_malloc(res->nmem, res->vec_len * sizeof(*res->term_pos));
570 
571  relevance_clear(res);
572  return res;
573 }
574 
575 void relevance_destroy(struct relevance **rp)
576 {
577  if (*rp)
578  {
579  pp2_charset_token_destroy((*rp)->prt);
580  nmem_destroy((*rp)->nmem);
581  *rp = 0;
582  }
583 }
584 
585 void relevance_mergerec(struct relevance *r, struct record_cluster *dst,
586  const struct record_cluster *src)
587 {
588  int i;
589 
590  for (i = 0; i < r->vec_len; i++)
591  dst->term_frequency_vec[i] += src->term_frequency_vec[i];
592 
593  for (i = 0; i < r->vec_len; i++)
594  dst->term_frequency_vecf[i] += src->term_frequency_vecf[i];
595 }
596 
597 void relevance_newrec(struct relevance *r, struct record_cluster *rec)
598 {
599  int i;
600 
601  // term frequency [1,..] . [0] is total length of all fields
602  rec->term_frequency_vec =
603  nmem_malloc(r->nmem,
604  r->vec_len * sizeof(*rec->term_frequency_vec));
605  for (i = 0; i < r->vec_len; i++)
606  rec->term_frequency_vec[i] = 0;
607 
608  // term frequency divided by length of field [1,...]
609  rec->term_frequency_vecf =
610  nmem_malloc(r->nmem,
611  r->vec_len * sizeof(*rec->term_frequency_vecf));
612  for (i = 0; i < r->vec_len; i++)
613  rec->term_frequency_vecf[i] = 0.0;
614 }
615 
616 void relevance_donerecord(struct relevance *r, struct record_cluster *cluster)
617 {
618  int i;
619 
620  for (i = 1; i < r->vec_len; i++)
621  if (cluster->term_frequency_vec[i] > 0)
622  r->doc_frequency_vec[i]++;
623 
624  r->doc_frequency_vec[0]++;
625 }
626 
627 
628 
629 // Prepare for a relevance-sorted read
631 {
632  int i;
633  float *idfvec = xmalloc(rel->vec_len * sizeof(float));
634 
636 
637  // Calculate document frequency vector for each term.
638  for (i = 1; i < rel->vec_len; i++)
639  {
640  if (!rel->doc_frequency_vec[i])
641  idfvec[i] = 0;
642  else
643  {
644  /* add one to nominator idf(t,D) to ensure a value > 0 */
645  idfvec[i] = log((float) (1 + rel->doc_frequency_vec[0]) /
646  rel->doc_frequency_vec[i]);
647  }
648  }
649  // Calculate relevance for each document (cluster)
650  while (1)
651  {
652  int relevance = 0;
653  WRBUF w;
654  struct word_entry *e = rel->entries;
656  if (!rec)
657  break;
658  w = rec->relevance_explain2;
659  wrbuf_rewind(w);
660  wrbuf_puts(w, "relevance = 0;\n");
661  for (i = 1; i < rel->vec_len; i++)
662  {
663  float termfreq = (float) rec->term_frequency_vecf[i];
664  int add = 100000 * termfreq * idfvec[i];
665 
666  wrbuf_printf(w, "idf[%d] = log(((1 + total(%d))/termoccur(%d));\n",
667  i, rel->doc_frequency_vec[0],
668  rel->doc_frequency_vec[i]);
669  wrbuf_printf(w, "%s: relevance += 100000 * tf[%d](%f) * "
670  "idf[%d](%f) (%d);\n",
671  e->display_str, i, termfreq, i, idfvec[i], add);
672  relevance += add;
673  e = e->next;
674  }
675  if (!rel->rank_cluster)
676  {
677  struct record *record;
678  int cluster_size = 0;
679 
680  for (record = rec->records; record; record = record->next)
681  cluster_size++;
682 
683  wrbuf_printf(w, "score = relevance(%d)/cluster_size(%d);\n",
684  relevance, cluster_size);
685  relevance /= cluster_size;
686  }
687  else
688  {
689  wrbuf_printf(w, "score = relevance(%d);\n", relevance);
690  }
691  rec->relevance_score = relevance;
692 
693  // Build the normalizing structures
694  // List of (sub)records for each target
695  setup_norm_record( rel, rec );
696 
697  } // cluster loop
698 
699  normalize_scores(rel);
700 
701  // TODO - Calculate the cluster scores from individual records
702  // At the moment the record scoring puts one of them in the cluster...
704 
706  xfree(idfvec);
707 
708 }
709 
710 /*
711  * Local variables:
712  * c-basic-offset: 4
713  * c-file-style: "Stroustrup"
714  * indent-tabs-mode: nil
715  * End:
716  * vim: shiftwidth=4 tabstop=8 expandtab
717  */
718 
const char * pp2_charset_token_next(pp2_charset_token_t prt)
Definition: charsets.c:360
pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft, const char *id)
Definition: charsets.c:282
void pp2_get_org(pp2_charset_token_t prt, size_t *start, size_t *len)
Definition: charsets.c:376
void pp2_charset_token_first(pp2_charset_token_t prt, const char *buf, int skip_article)
Definition: charsets.c:314
void pp2_charset_token_destroy(pp2_charset_token_t prt)
Definition: charsets.c:346
struct session * client_get_session(struct client *cl)
Definition: client.c:256
struct session_database * client_get_database(struct client *cl)
Definition: client.c:251
Z39.50 client.
char * name
int conf_service_metadata_field_id(struct conf_service *service, const char *name)
void reclist_enter(struct reclist *l)
Definition: reclists.c:343
void reclist_leave(struct reclist *l)
Definition: reclists.c:350
struct record_cluster * reclist_read_record(struct reclist *l)
Definition: reclists.c:331
void reclist_rewind(struct reclist *l)
Definition: reclists.c:356
struct relevance * relevance_create_ccl(pp2_charset_fact_t pft, struct ccl_rpn_node *query, int rank_cluster, double follow_factor, double lead_decay, int length_divide)
Definition: relevance.c:540
void relevance_clear(struct relevance *r)
Definition: relevance.c:530
const int scorefield_position
Definition: relevance.c:79
static void normalize_scores(struct relevance *rel)
Definition: relevance.c:192
const int scorefield_internal
Definition: relevance.c:78
static void pull_terms(struct relevance *res, struct ccl_rpn_node *n)
Definition: relevance.c:487
void relevance_donerecord(struct relevance *r, struct record_cluster *cluster)
Definition: relevance.c:616
void relevance_destroy(struct relevance **rp)
Definition: relevance.c:575
void relevance_newrec(struct relevance *r, struct record_cluster *rec)
Definition: relevance.c:597
const int scorefield_none
Definition: relevance.c:76
static double squaresum(struct norm_record *rp, double a, double b)
Definition: relevance.c:178
int relevance_snippet(struct relevance *r, const char *words, const char *name, WRBUF w_snippet)
Definition: relevance.c:349
void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, const char *rank, const char *name)
Definition: relevance.c:398
void relevance_mergerec(struct relevance *r, struct record_cluster *dst, const struct record_cluster *src)
Definition: relevance.c:585
static void setup_norm_record(struct relevance *rel, struct record_cluster *clust)
Definition: relevance.c:135
static struct word_entry * word_entry_match(struct relevance *r, const char *norm_str, const char *rank, int *weight)
Definition: relevance.c:321
void relevance_prepare_read(struct relevance *rel, struct reclist *reclist)
Definition: relevance.c:630
struct norm_client * findnorm(struct relevance *rel, struct client *client)
Definition: relevance.c:92
const char * session_setting_oneval(struct session_database *db, int offset)
Definition: session.c:433
#define PZ_NATIVE_SCORE
Definition: settings.h:59
Represents client state for a connection to one search target.
Definition: client.c:99
float a
Definition: relevance.c:70
struct norm_client * next
Definition: relevance.c:72
float max
Definition: relevance.c:65
struct client * client
Definition: relevance.c:71
float b
Definition: relevance.c:70
float min
Definition: relevance.c:66
struct norm_record * records
Definition: relevance.c:73
int scorefield
Definition: relevance.c:69
const char * native_score
Definition: relevance.c:68
struct record_cluster * clust
Definition: relevance.c:87
float score
Definition: relevance.c:86
struct norm_record * next
Definition: relevance.c:88
struct record * record
Definition: relevance.c:85
WRBUF relevance_explain2
Definition: record.h:99
float * term_frequency_vecf
Definition: record.h:94
WRBUF relevance_explain1
Definition: record.h:98
int relevance_score
Definition: record.h:92
int * term_frequency_vec
Definition: record.h:93
struct record * records
Definition: record.h:100
union data_types data
Definition: record.h:49
Definition: record.h:60
int position
Definition: record.h:69
struct record * next
Definition: record.h:67
struct client * client
Definition: record.h:61
struct record_metadata ** metadata
Definition: record.h:63
double score
Definition: record.h:71
pp2_charset_token_t prt
Definition: relevance.c:44
int * doc_frequency_vec
Definition: relevance.c:39
double lead_decay
Definition: relevance.c:47
int * term_pos
Definition: relevance.c:41
struct norm_client * norm
Definition: relevance.c:50
struct word_entry * entries
Definition: relevance.c:43
int vec_len
Definition: relevance.c:42
int * term_frequency_vec_tmp
Definition: relevance.c:40
int length_divide
Definition: relevance.c:48
NMEM nmem
Definition: relevance.c:49
double follow_factor
Definition: relevance.c:46
int rank_cluster
Definition: relevance.c:45
struct conf_service * service
Definition: session.h:93
Definition: relevance.c:53
const char * norm_str
Definition: relevance.c:54
const char * display_str
Definition: relevance.c:55
int termno
Definition: relevance.c:56
char * ccl_field
Definition: relevance.c:57
struct word_entry * next
Definition: relevance.c:58
double fnumber
Definition: record.h:38