IDZEBRA  2.2.7
Data Structures | Macros | Functions | Variables
extract.c File Reference

indexes records and extract tokens for indexing and sorting More...

#include <stdio.h>
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include "index.h"
#include "orddict.h"
#include <direntz.h>
#include <charmap.h>
#include <yaz/snprintf.h>

Go to the source code of this file.

Data Structures

struct  snip_rec_info
 
struct  recordLogInfo
 

Macros

#define FILE_MATCH_BLANK   "\t "
 

Functions

static void extract_flush_record_keys2 (ZebraHandle zh, zint sysno, zebra_rec_keys_t ins_keys, zint ins_rank, zebra_rec_keys_t del_keys, zint del_rank)
 
static void zebra_init_log_level (void)
 
static WRBUF wrbuf_hex_str (const char *cstr)
 
static void extract_flush_sort_keys (ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t skp)
 
static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
 
static void extract_token_add (RecWord *p)
 top-level indexing handler for recctrl system More...
 
static void check_log_limit (ZebraHandle zh)
 
static void logRecord (ZebraHandle zh)
 
static void init_extractCtrl (ZebraHandle zh, struct recExtractCtrl *ctrl)
 
static void extract_add_index_string (RecWord *p, zinfo_index_category_t cat, const char *str, int length)
 
static void extract_set_store_data_prepare (struct recExtractCtrl *p)
 
static void extract_init (struct recExtractCtrl *p, RecWord *w)
 
static int parse_complete_field (RecWord *p, zebra_map_t zm, char *buf)
 
static void snippet_add_complete_field (RecWord *p, int ord, zebra_map_t zm)
 
static void snippet_add_incomplete_field (RecWord *p, int ord, zebra_map_t zm)
 
static void snippet_add_icu (RecWord *p, int ord, zebra_map_t zm)
 
static void snippet_token_add (RecWord *p)
 
static void snippet_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
 
void extract_snippet (ZebraHandle zh, zebra_snippets *sn, struct ZebraRecStream *stream, RecType rt, void *recTypeClientData)
 
static void searchRecordKey (ZebraHandle zh, zebra_rec_keys_t reckeys, const char *index_name, const char **ws, int ws_length)
 
static char * get_match_from_spec (ZebraHandle zh, zebra_rec_keys_t reckeys, const char *fname, const char *spec)
 
static void all_matches_add (struct recExtractCtrl *ctrl, zint record_id, zint sysno)
 add the always-matches index entry and map to real record ID More...
 
ZEBRA_RES zebra_extract_records_stream (ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, RecType recType, void *recTypeClientData)
 extracts records from stream More...
 
ZEBRA_RES zebra_extract_file (ZebraHandle zh, zint *sysno, const char *fname, enum zebra_recctrl_action_t action)
 
ZEBRA_RES zebra_buffer_extract_record (ZebraHandle zh, const char *buf, size_t buf_size, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname)
 
static ZEBRA_RES zebra_extract_record_stream (ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, RecType recType, void *recTypeClientData, int *more)
 
ZEBRA_RES zebra_extract_explain (void *handle, Record rec, data1_node *n)
 
void zebra_it_key_str_dump (ZebraHandle zh, struct it_key *key, const char *str, size_t slen, NMEM nmem, int level)
 
void extract_rec_keys_log (ZebraHandle zh, int is_insert, zebra_rec_keys_t reckeys, int level)
 
void extract_rec_keys_adjust (ZebraHandle zh, int is_insert, zebra_rec_keys_t reckeys)
 
ZEBRA_RES zebra_rec_keys_to_snippets1 (ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets)
 
void print_rec_keys (ZebraHandle zh, zebra_rec_keys_t reckeys)
 
static void extract_add_sort_string (RecWord *p, const char *str, int length)
 
static void extract_add_staticrank_string (RecWord *p, const char *str, int length)
 
static void extract_add_string (RecWord *p, zebra_map_t zm, const char *string, int length)
 
static void extract_add_incomplete_field (RecWord *p, zebra_map_t zm)
 
static void extract_add_complete_field (RecWord *p, zebra_map_t zm)
 
static void extract_add_icu (RecWord *p, zebra_map_t zm)
 
static void extract_set_store_data_cb (struct recExtractCtrl *p, void *buf, size_t sz)
 

Variables

static int log_level_extract = 0
 
static int log_level_details = 0
 
static int log_level_initialized = 0
 

Detailed Description

indexes records and extract tokens for indexing and sorting

Definition in file extract.c.

Macro Definition Documentation

◆ FILE_MATCH_BLANK

#define FILE_MATCH_BLANK   "\t "

Definition at line 403 of file extract.c.

Function Documentation

◆ all_matches_add()

static void all_matches_add ( struct recExtractCtrl ctrl,
zint  record_id,
zint  sysno 
)
static

add the always-matches index entry and map to real record ID

Parameters
ctrlrecord control
record_idcustom record ID
sysnosystem record ID

This function serves two purposes.. It adds the always matches entry and makes a pointer from the custom record ID (if defined) back to the system record ID (sysno) See zebra_recid_to_sysno .

Definition at line 580 of file extract.c.

References extract_add_index_string(), extract_init(), RecWord::index_name, RecWord::index_type, RecWord::record_id, RecWord::seqno, and zinfo_index_category_alwaysmatches.

Referenced by zebra_extract_record_stream().

◆ check_log_limit()

static void check_log_limit ( ZebraHandle  zh)
static

◆ extract_add_complete_field()

static void extract_add_complete_field ( RecWord p,
zebra_map_t  zm 
)
static

Definition at line 1723 of file extract.c.

References extract_add_string(), IT_MAX_WORD, parse_complete_field(), and RecWord::seqno.

Referenced by extract_token_add().

◆ extract_add_icu()

static void extract_add_icu ( RecWord p,
zebra_map_t  zm 
)
static

◆ extract_add_incomplete_field()

static void extract_add_incomplete_field ( RecWord p,
zebra_map_t  zm 
)
static

◆ extract_add_index_string()

static void extract_add_index_string ( RecWord p,
zinfo_index_category_t  cat,
const char *  str,
int  length 
)
static

◆ extract_add_sort_string()

static void extract_add_sort_string ( RecWord p,
const char *  str,
int  length 
)
static

◆ extract_add_staticrank_string()

static void extract_add_staticrank_string ( RecWord p,
const char *  str,
int  length 
)
static

Definition at line 1612 of file extract.c.

References atozint(), RecWord::extractCtrl, and recExtractCtrl::staticrank.

Referenced by extract_add_string().

◆ extract_add_string()

static void extract_add_string ( RecWord p,
zebra_map_t  zm,
const char *  string,
int  length 
)
static

◆ extract_flush_record_keys2()

static void extract_flush_record_keys2 ( ZebraHandle  zh,
zint  sysno,
zebra_rec_keys_t  ins_keys,
zint  ins_rank,
zebra_rec_keys_t  del_keys,
zint  del_rank 
)
static

◆ extract_flush_sort_keys()

void extract_flush_sort_keys ( ZebraHandle  zh,
zint  sysno,
int  cmd,
zebra_rec_keys_t  skp 
)
static

◆ extract_init()

static void extract_init ( struct recExtractCtrl p,
RecWord w 
)
static

◆ extract_rec_keys_adjust()

void extract_rec_keys_adjust ( ZebraHandle  zh,
int  is_insert,
zebra_rec_keys_t  reckeys 
)

◆ extract_rec_keys_log()

void extract_rec_keys_log ( ZebraHandle  zh,
int  is_insert,
zebra_rec_keys_t  reckeys,
int  level 
)

◆ extract_schema_add()

static void extract_schema_add ( struct recExtractCtrl p,
Odr_oid *  oid 
)
static

◆ extract_set_store_data_cb()

static void extract_set_store_data_cb ( struct recExtractCtrl p,
void *  buf,
size_t  sz 
)
static

◆ extract_set_store_data_prepare()

static void extract_set_store_data_prepare ( struct recExtractCtrl p)
static

◆ extract_snippet()

void extract_snippet ( ZebraHandle  zh,
zebra_snippets sn,
struct ZebraRecStream stream,
RecType  rt,
void *  recTypeClientData 
)

◆ extract_token_add()

static void extract_token_add ( RecWord p)
static

top-level indexing handler for recctrl system

Parameters
ptoken data to be indexed

Call sequence: extract_token_add extract_add_{in}_complete / extract_add_icu extract_add_string

extract_add_index_string or extract_add_sort_string or extract_add_staticrank_string

Definition at line 1767 of file extract.c.

References extract_add_complete_field(), extract_add_icu(), extract_add_incomplete_field(), RecWord::extractCtrl, recExtractCtrl::handle, RecWord::index_name, RecWord::index_type, log_level_details, zebra_session::reg, RecWord::seqno, RecWord::term_buf, RecWord::term_len, zebra_map_get_or_add(), zebra_register::zebra_maps, zebra_maps_is_complete(), zebra_maps_is_icu(), and ZINT_FORMAT.

Referenced by zebra_extract_explain(), and zebra_extract_record_stream().

◆ get_match_from_spec()

static char* get_match_from_spec ( ZebraHandle  zh,
zebra_rec_keys_t  reckeys,
const char *  fname,
const char *  spec 
)
static

◆ init_extractCtrl()

static void init_extractCtrl ( ZebraHandle  zh,
struct recExtractCtrl ctrl 
)
static

◆ logRecord()

static void logRecord ( ZebraHandle  zh)
static

◆ parse_complete_field()

static int parse_complete_field ( RecWord p,
zebra_map_t  zm,
char *  buf 
)
static

◆ print_rec_keys()

void print_rec_keys ( ZebraHandle  zh,
zebra_rec_keys_t  reckeys 
)

◆ searchRecordKey()

static void searchRecordKey ( ZebraHandle  zh,
zebra_rec_keys_t  reckeys,
const char *  index_name,
const char **  ws,
int  ws_length 
)
static

◆ snippet_add_complete_field()

static void snippet_add_complete_field ( RecWord p,
int  ord,
zebra_map_t  zm 
)
static

◆ snippet_add_icu()

static void snippet_add_icu ( RecWord p,
int  ord,
zebra_map_t  zm 
)
static

◆ snippet_add_incomplete_field()

static void snippet_add_incomplete_field ( RecWord p,
int  ord,
zebra_map_t  zm 
)
static

◆ snippet_schema_add()

static void snippet_schema_add ( struct recExtractCtrl p,
Odr_oid *  oid 
)
static

Definition at line 317 of file extract.c.

Referenced by extract_snippet().

◆ snippet_token_add()

static void snippet_token_add ( RecWord p)
static

◆ wrbuf_hex_str()

static WRBUF wrbuf_hex_str ( const char *  cstr)
static

Definition at line 66 of file extract.c.

Referenced by get_match_from_spec(), and zebra_extract_record_stream().

◆ zebra_buffer_extract_record()

ZEBRA_RES zebra_buffer_extract_record ( ZebraHandle  zh,
const char *  buf,
size_t  buf_size,
enum zebra_recctrl_action_t  action,
const char *  recordType,
zint sysno,
const char *  match_criteria,
const char *  fname 
)

◆ zebra_extract_explain()

ZEBRA_RES zebra_extract_explain ( void *  handle,
Record  rec,
data1_node n 
)

◆ zebra_extract_file()

ZEBRA_RES zebra_extract_file ( ZebraHandle  zh,
zint sysno,
const char *  fname,
enum zebra_recctrl_action_t  action 
)

◆ zebra_extract_record_stream()

static ZEBRA_RES zebra_extract_record_stream ( ZebraHandle  zh,
struct ZebraRecStream stream,
enum zebra_recctrl_action_t  action,
const char *  recordType,
zint sysno,
const char *  match_criteria,
const char *  fname,
RecType  recType,
void *  recTypeClientData,
int *  more 
)
static

Definition at line 778 of file extract.c.

References recExtractCtrl::action, action_a_delete, action_delete, action_insert, action_replace, action_update, all_matches_add(), zebra_session::basenames, recExtractCtrl::dh, zebra_register::dh, dict_delete_ord(), dict_insert_ord(), dict_lookup_ord(), ZebraRecStream::endf, recType::extract, extract_flush_record_keys2(), extract_flush_sort_keys(), extract_init(), extract_schema_add(), extract_set_store_data_prepare(), extract_token_add(), recExtractCtrl::first_record, get_match_from_spec(), recExtractCtrl::handle, record_info::info, recExtractCtrl::init, init_extractCtrl(), zebra_register::keys, log_level_extract, logRecord(), zebra_session::m_explain_database, zebra_session::m_file_verbose_limit, zebra_session::m_flag_rw, zebra_session::m_record_id, zebra_session::m_store_data, zebra_session::m_store_keys, recExtractCtrl::match_criteria, zebra_register::matchDict, ZebraRecStream::readf, rec_del(), rec_free(), rec_get(), rec_init_attr(), rec_new(), rec_put(), rec_strdup(), RECCTRL_EXTRACT_EOF, RECCTRL_EXTRACT_ERROR_GENERIC, RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER, RECCTRL_EXTRACT_OK, RECCTRL_EXTRACT_SKIP, recInfo_databaseName, recInfo_delKeys, recInfo_filename, recInfo_fileType, recInfo_sortKeys, recInfo_storeData, RecordAttr::recordOffset, zebra_register::records, zebra_session::records_deleted, zebra_session::records_inserted, zebra_session::records_processed, zebra_session::records_skipped, zebra_session::records_updated, RecordAttr::recordSize, zebra_session::reg, RecordAttr::runNumber, recExtractCtrl::schemaAdd, ZebraRecStream::seekf, record_info::size, zebra_register::sortKeys, recExtractCtrl::staticrank, RecordAttr::staticrank, zebra_session::store_data_buf, zebra_session::store_data_size, recExtractCtrl::stream, record_info::sysno, ZebraRecStream::tellf, recExtractCtrl::tokenAdd, wrbuf_hex_str(), ZEBRA_FAIL, zebra_init_log_level(), ZEBRA_OK, zebra_rec_keys_close(), zebra_rec_keys_empty(), zebra_rec_keys_get_buf(), zebra_rec_keys_get_custom_record_id(), zebra_rec_keys_open(), zebra_rec_keys_reset(), zebra_rec_keys_set_buf(), zebraExplain_curDatabase(), zebraExplain_get_database_ord(), zebraExplain_newDatabase(), zebraExplain_recordBytesIncrement(), zebraExplain_runNumberIncrement(), zebra_register::zei, and ZINT_FORMAT.

Referenced by zebra_extract_records_stream().

◆ zebra_extract_records_stream()

ZEBRA_RES zebra_extract_records_stream ( ZebraHandle  zh,
struct ZebraRecStream stream,
enum zebra_recctrl_action_t  action,
const char *  recordType,
zint sysno,
const char *  match_criteria,
const char *  fname,
RecType  recType,
void *  recTypeClientData 
)

extracts records from stream

Parameters
zhZebra Handle
streamstream that we read from
action(action_insert, action_replace, action_delete, ..)
recordTypeRecord filter type "grs.xml", etc.
sysnopointer to sysno if already known; NULL otherwise
match_criteria(NULL if not already given)
fnamefilename that we read from (for logging purposes only)
recTyperecord type
recTypeClientDataclient data for record type
Returns
ZEBRA_OK for success; ZEBRA_FAIL for failure

Definition at line 1190 of file extract.c.

References recExtractCtrl::action, recExtractCtrl::match_criteria, recExtractCtrl::stream, zebra_extract_record_stream(), and ZEBRA_OK.

Referenced by zebra_buffer_extract_record(), and zebra_extract_file().

◆ zebra_init_log_level()

static void zebra_init_log_level ( void  )
static

◆ zebra_it_key_str_dump()

void zebra_it_key_str_dump ( ZebraHandle  zh,
struct it_key key,
const char *  str,
size_t  slen,
NMEM  nmem,
int  level 
)

◆ zebra_rec_keys_to_snippets1()

ZEBRA_RES zebra_rec_keys_to_snippets1 ( ZebraHandle  zh,
zebra_rec_keys_t  reckeys,
zebra_snippets snippets 
)

Variable Documentation

◆ log_level_details

int log_level_details = 0
static

Definition at line 46 of file extract.c.

Referenced by extract_add_string(), extract_token_add(), and zebra_init_log_level().

◆ log_level_extract

int log_level_extract = 0
static

◆ log_level_initialized

int log_level_initialized = 0
static

Definition at line 47 of file extract.c.

Referenced by zebra_init_log_level().