diff options
Diffstat (limited to '')
-rw-r--r-- | linden/indra/newview/rlvmultistringsearch.h | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/linden/indra/newview/rlvmultistringsearch.h b/linden/indra/newview/rlvmultistringsearch.h new file mode 100644 index 0000000..43b0172 --- /dev/null +++ b/linden/indra/newview/rlvmultistringsearch.h | |||
@@ -0,0 +1,191 @@ | |||
1 | #ifndef RLV_MULTISTRINGSEARCH_H | ||
2 | #define RLV_MULTISTRINGSEARCH_H | ||
3 | |||
4 | // ============================================================================ | ||
5 | // Template classes for our state machine (2 dimensional array of type T) | ||
6 | |||
7 | // STL vector | ||
8 | template<typename T> class RlvMultiStringSearchFSM_STL | ||
9 | { | ||
10 | public: | ||
11 | /* | ||
12 | * Constructor/destructor | ||
13 | */ | ||
14 | |||
15 | // Initialize the FSM with an initial capacity of 'nCapacity' states | ||
16 | RlvMultiStringSearchFSM_STL(size_t nCapacity) | ||
17 | { | ||
18 | m_arFSM.reserve(nCapacity); | ||
19 | |||
20 | T* pT; | ||
21 | for (size_t idx = 0; idx < nCapacity; idx++) | ||
22 | { | ||
23 | // The width of each row is determined by the alphabet we're using (in this case UTF-8 | ||
24 | // so while every character might consist of multiple bytes there are | ||
25 | // still only 256 'columns' in the state machine) | ||
26 | pT = new T[256](); | ||
27 | |||
28 | // The above *should* initialize to 0 but since we can't account for every compiler doing it :( | ||
29 | memset(pT, 0, sizeof(T) * 256); | ||
30 | |||
31 | m_arFSM.push_back(pT); | ||
32 | } | ||
33 | }; | ||
34 | |||
35 | ~RlvMultiStringSearchFSM_STL() | ||
36 | { | ||
37 | // Free any memory we previously allocated | ||
38 | for (int idx = 0, cnt = m_arFSM.size(); idx < cnt; idx++) | ||
39 | delete[] m_arFSM[idx]; | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Operators | ||
44 | */ | ||
45 | // ASSERTION: nState < m_arFSM.size() at all times | ||
46 | // In other words: do *NOT* go out of bounds on the array (no memory will have allocated for that non-existing state) | ||
47 | // (There probably should be a check for that even in release but it seems wasteful, just don't do it :p) | ||
48 | inline T* operator[](size_t nState) | ||
49 | { | ||
50 | //#ifdef _DEBUG | ||
51 | // assert( nState < m_arFSM.size() ); | ||
52 | //#endif // _DEBUG | ||
53 | |||
54 | return m_arFSM[nState]; | ||
55 | } | ||
56 | inline const T* operator[](size_t nState) const | ||
57 | { | ||
58 | //#ifdef _DEBUG | ||
59 | // assert( nState < m_arFSM.size() ); | ||
60 | //#endif // _DEBUG | ||
61 | |||
62 | return m_arFSM[nState]; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Public member functions | ||
67 | */ | ||
68 | |||
69 | size_t getSize() const { return m_arFSM.size(); } | ||
70 | |||
71 | void resize(size_t nNewCapacity) | ||
72 | { | ||
73 | // Get our current capacity (only rows > capacity need memory allocated) | ||
74 | size_t nCurCapacity = m_arFSM.capacity(); | ||
75 | |||
76 | // Only expand, never shrink | ||
77 | if (nNewCapacity <= nCurCapacity) | ||
78 | { | ||
79 | //#ifdef _DEBUG | ||
80 | // assert(false); | ||
81 | //#endif //_DEBUG | ||
82 | |||
83 | return; | ||
84 | } | ||
85 | m_arFSM.resize(nNewCapacity); | ||
86 | |||
87 | // For each new state we added, allocate memory for the columns | ||
88 | for(size_t idx = nCurCapacity; idx < nNewCapacity; idx++) | ||
89 | // The memset is redundant (or rather *should* be) but since we can't account for every compiler doing it :( | ||
90 | m_arFSM[idx] = (T*)memset(new T[256](), 0, sizeof(T) * 256); | ||
91 | } | ||
92 | |||
93 | protected: | ||
94 | /* | ||
95 | * Member variables | ||
96 | */ | ||
97 | std::vector<T*> m_arFSM; | ||
98 | }; | ||
99 | |||
100 | // ============================================================================ | ||
101 | |||
102 | struct RlvMultiStringSearchMatch | ||
103 | { | ||
104 | int idxMatch; // Starting character index into the string of the matched keyword (-1 if no match) | ||
105 | int lenMatch; // Length of the matched keyword (undefined if no match) | ||
106 | U16 nParam; // User supplied parameter for the matched keyword (undefined if no match) | ||
107 | |||
108 | RlvMultiStringSearchMatch() : idxMatch(-1) {} | ||
109 | }; | ||
110 | |||
111 | // ============================================================================ | ||
112 | // The actual search class | ||
113 | |||
114 | class RlvMultiStringSearch | ||
115 | { | ||
116 | public: | ||
117 | /* | ||
118 | * Constructor/destructor | ||
119 | */ | ||
120 | RlvMultiStringSearch(); | ||
121 | //~RlvMultiStringSearch(); | ||
122 | |||
123 | /* | ||
124 | * Public member functions | ||
125 | */ | ||
126 | |||
127 | // Add a keyword to the state machine (if it already exists then it will simply overwrite the existing parameter) | ||
128 | void addKeyword(const std::string& strKeyword, U16 nParam); | ||
129 | |||
130 | BOOL getExactMatchParam(const std::string& strText, U16& nParam) const | ||
131 | { | ||
132 | RlvMultiStringSearchMatch match; | ||
133 | if (findFirst(strText, match)) | ||
134 | { | ||
135 | // We have an exact match if the starting index is 0 | ||
136 | // and the length of the match matches the length of the string | ||
137 | if ( (0 == match.idxMatch) && (match.lenMatch == (int)strText.length()) ) | ||
138 | { | ||
139 | nParam = match.nParam; | ||
140 | return TRUE; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | return FALSE; // Fall-through: no (exact) match | ||
145 | } | ||
146 | |||
147 | // Finds the first occurance of any keyword in the supplied string | ||
148 | bool findFirst(const std::string& strText, RlvMultiStringSearchMatch& match) const; | ||
149 | // Finds the next occurance of any keyword in the supplied string | ||
150 | bool findNext(const std::string& strText, int idxCh, RlvMultiStringSearchMatch& match) const; | ||
151 | // Finds all occurances of any keyword in the supplied string | ||
152 | std::vector<RlvMultiStringSearchMatch> findAll(const std::string& strText); | ||
153 | // Finds the last occurance of any keyword in the supplied string (non-optimized) | ||
154 | bool findLast(const std::string& strText, RlvMultiStringSearchMatch& match) const; | ||
155 | |||
156 | protected: | ||
157 | // Finds the next occurance of any keyword in the supplied string | ||
158 | bool findNext(const char* pstrText, int idxCh, int cntCh, RlvMultiStringSearchMatch& match, bool fWordMatch = true) const; | ||
159 | |||
160 | /* | ||
161 | * Member variables | ||
162 | */ | ||
163 | RlvMultiStringSearchFSM_STL<U32> m_FSM; // Our finite state machine (4 bytes * 256 = 1Kb of memory/state) | ||
164 | // HIWORD(U32) = 16-bits of user data | ||
165 | // LOWORD(U32) = ABBBBBBBBBBBBBBB | ||
166 | // A = termination bit | ||
167 | // If (set) and (B == 0): match | ||
168 | // If (set) and (B != 0): match, but might only be a substring of another keyword | ||
169 | // B = next state (0..32767) | ||
170 | // If (B == 0): false lead -> backtrack | ||
171 | // If (B != 0): partial keyword match, next state | ||
172 | size_t m_cntState; // The number of states in the FSM (= the number of *used* rows in the array) | ||
173 | }; | ||
174 | |||
175 | // ============================================================================ | ||
176 | // Inlined member functions | ||
177 | // | ||
178 | |||
179 | inline bool RlvMultiStringSearch::findFirst(const std::string& strText, RlvMultiStringSearchMatch& match) const | ||
180 | { | ||
181 | return findNext(strText.c_str(), 0, strText.length(), match); | ||
182 | } | ||
183 | |||
184 | inline bool RlvMultiStringSearch::findNext(const std::string& strText, int idxCh, RlvMultiStringSearchMatch& match) const | ||
185 | { | ||
186 | return findNext(strText.c_str(), idxCh, strText.length(), match); | ||
187 | } | ||
188 | |||
189 | // ============================================================================ | ||
190 | |||
191 | #endif // RLV_MULTISTRINGSEARCH_H | ||