pazpar2  1.14.1
normalize7bit.c
Go to the documentation of this file.
1 /* This file is part of Pazpar2.
2  Copyright (C) Index Data
3 
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
18 */
19 
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <stdlib.h>
29 #include <string.h>
30 #include <ctype.h>
31 
32 #include "normalize7bit.h"
33 
34 
36 char * normalize7bit_generic(char * str, const char * rm_chars)
37 {
38  char *p, *pe;
39  for (p = str; *p && isspace(*(unsigned char *)p); p++)
40  ;
41  for (pe = p + strlen(p) - 1;
42  pe > p && strchr(rm_chars, *pe); pe--)
43  *pe = '\0';
44  return p;
45 }
46 
47 char *normalize7bit_mergekey(char *buf)
48 {
49  char *p = buf, *pout = buf;
50  while (*p)
51  {
52  while (*p && !isalnum(*(unsigned char *)p))
53  p++;
54  while (isalnum(*(unsigned char *)p))
55  *(pout++) = tolower(*(unsigned char *)(p++));
56  if (*p)
57  *(pout++) = ' ';
58  while (*p && !isalnum(*(unsigned char *)p))
59  p++;
60  }
61  if (buf != pout)
62  do {
63  *(pout--) = '\0';
64  }
65  while (pout > buf && *pout == ' ');
66 
67  return buf;
68 }
69 
70 // Extract what appears to be years from buf, storing highest and
71 // lowest values.
72 // longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY
73 int extract7bit_dates(const char *buf, int *first, int *last, int longdate)
74 {
75  *first = -1;
76  *last = -1;
77  while (*buf)
78  {
79  const char *e;
80  int len;
81 
82  while (*buf && !isdigit(*(unsigned char *)buf))
83  buf++;
84  len = 0;
85  for (e = buf; *e && isdigit(*(unsigned char *)e); e++)
86  len++;
87  if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8))
88  {
89  int value = atoi(buf);
90  if (longdate && len == 4)
91  value *= 10000; // should really suffix 0101?
92  if (*first < 0 || value < *first)
93  *first = value;
94  if (*last < 0 || value > *last)
95  *last = value;
96  }
97  buf = e;
98  }
99  return *first;
100 }
101 
102 
103 
104 /*
105  * Local variables:
106  * c-basic-offset: 4
107  * c-file-style: "Stroustrup"
108  * indent-tabs-mode: nil
109  * End:
110  * vim: shiftwidth=4 tabstop=8 expandtab
111  */
112 
char * normalize7bit_generic(char *str, const char *rm_chars)
removes leading whitespace.. Removes suffix cahrs in rm_chars
Definition: normalize7bit.c:36
int extract7bit_dates(const char *buf, int *first, int *last, int longdate)
Definition: normalize7bit.c:73
char * normalize7bit_mergekey(char *buf)
Definition: normalize7bit.c:47