-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathStringDictionary.h
164 lines (137 loc) · 5.42 KB
/
StringDictionary.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/* StringDictionary.h
* Copyright (C) 2014, Francisco Claude & Rodrigo Canovas & Miguel A. Martinez-Prieto
* all rights reserved.
*
* Abstract class for implementing Compressed String Dictionaries.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*
* Contacting the authors:
* Francisco Claude: [email protected]
* Rodrigo Canovas: [email protected]
* Miguel A. Martinez-Prieto: [email protected]
*/
#ifndef _STRINGDICTIONARY_H
#define _STRINGDICTIONARY_H
using namespace std;
#include <libcdsBasics.h>
using namespace cds_utils;
#include "iterators/IteratorDictID.h"
#include "iterators/IteratorDictString.h"
#include "utils/Utils.h"
class StringDictionary
{
public:
/** Retrieves the ID corresponding to the given string.
@param str: the string to be located.
@param strLen: the string length.
@returns the ID (or NORESULT if it is not in the dictionary).
*/
virtual uint locate(uchar *str, uint strLen)=0;
/** Obtains the string associated with the given ID.
@param id: the ID to be extracted.
@param strLen: pointer to the extracted string length.
@returns the requested string (or NULL if it is not in the
dictionary).
*/
virtual uchar* extract(size_t id, uint *strLen)=0;
/** Locates all IDs of those elements prefixed by the given
string.
@param str: the prefix to be searched.
@param strLen: the prefix length.
@returns an iterator for direct scanning of all the IDs.
*/
virtual IteratorDictID* locatePrefix(uchar *str, uint strLen)=0;
/** Locates all IDs of those elements containing the given
substring.
@param str: the substring to be searched.
@param strLen: the substring length.
@returns an iterator for direct scanning of all the IDs.
*/
virtual IteratorDictID* locateSubstr(uchar *str, uint strLen)=0;
/** Retrieves the ID with rank k according to its alphabetical order.
@param rank: the alphabetical ranking.
@returns the ID.
*/
virtual uint locateRank(uint rank)=0;
/** Extracts all elements prefixed by the given string.
@param str: the prefix to be searched.
@param strLen: the prefix length.
@returns an iterator for direct scanning of all the strings.
*/
virtual IteratorDictString* extractPrefix(uchar *str, uint strLen)=0;
/** Extracts all elements containing by the given substring.
@param str: the substring to be searched.
@param strLen: the substring length.
@returns an iterator for direct scanning of all the strings.
*/
virtual IteratorDictString* extractSubstr(uchar *str, uint strLen)=0;
/** Obtains the string with rank k according to its
alphabetical order.
@param id: the ID to be extracted.
@param strLen: pointer to the extracted string length.
@returns the requested string (or NULL if it is not in the
dictionary).
*/
virtual uchar* extractRank(uint rank, uint *strLen)=0;
/** Extracts all strings in the dictionary sorted in
alphabetical order.
@returns an iterator for direct scanning of all the strings.
*/
virtual IteratorDictString* extractTable()=0;
/** Computes the size of the structure in bytes.
@returns the dictionary size in bytes.
*/
virtual size_t getSize()=0;
/** Retrieves the length of the largest string in the
dictionary.
@returns the length.
*/
uint maxLength();
/** Retrieves the number of elements in the dictionary.
@returns the number of elements.
*/
size_t numElements();
/** Stores the dictionary into an ofstream.
@param out: the oftstream.
*/
virtual void save(ofstream &out)=0;
/** Loads a dictionary from an ifstream.
@param in: the ifstream.
@param opt: some dicionaries have some different options for loading
@returns the loaded dictionary.
*/
static StringDictionary *load(ifstream &in, uint opt);
/** Generic destructor. */
virtual ~StringDictionary() {};
protected:
uint32_t type; //! Dictionary type.
uint64_t elements; //! Number of strings in the dictionary.
uint32_t maxlength; //! Length of the largest string in the dictionary.
};
#include "StringDictionaryHASHHF.h"
#include "StringDictionaryHASHUFFDAC.h"
#include "StringDictionaryHASHRPF.h"
#include "StringDictionaryHASHRPDAC.h"
#include "StringDictionaryPFC.h"
#include "StringDictionaryRPFC.h"
#include "StringDictionaryHTFC.h"
#include "StringDictionaryHHTFC.h"
#include "StringDictionaryRPHTFC.h"
#include "StringDictionaryRPDAC.h"
#include "StringDictionaryFMINDEX.h"
#include "StringDictionaryXBW.h"
#endif