aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/rlvmultistringsearch.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--linden/indra/newview/rlvmultistringsearch.cpp196
1 files changed, 196 insertions, 0 deletions
diff --git a/linden/indra/newview/rlvmultistringsearch.cpp b/linden/indra/newview/rlvmultistringsearch.cpp
new file mode 100644
index 0000000..0aa9889
--- /dev/null
+++ b/linden/indra/newview/rlvmultistringsearch.cpp
@@ -0,0 +1,196 @@
1#include "llviewerprecompiledheaders.h"
2
3#include "rlvmultistringsearch.h"
4
5// ====================================================================================
6
7#ifndef RLV_LOWORD
8 #define RLV_LOWORD(x) ( (U16)( ((U32)x) & 0xFFFF) )
9#endif // RLV_LOWORD
10
11#ifndef RLV_HIWORD
12 #define RLV_HIWORD(x) ( (U16)( (((U32)x) >> 16) & 0xFFFF) )
13#endif // RLV_HIWORD
14
15// ====================================================================================
16
17// (TODO-RLV: oops, forgot I was experimenting with word matching, get rid of that again?)
18#define isLetter(ch) \
19 ( ( (ch >= 'a') && (ch <= 'z') ) || ( (ch >= 'A') && (ch <= 'Z') ) )
20
21RlvMultiStringSearch::RlvMultiStringSearch()
22 : m_FSM(256), // Start our FSM with room for 256 states (enough for all attachment point names)
23 m_cntState(0)
24{
25}
26
27void RlvMultiStringSearch::addKeyword(const std::string& strKeyword, U16 nParam) {
28 U16 nCurState = 0; // Always start the loop at state 0
29
30 //
31 // Make sure there are enough unused rows to accomodate the worst case (== strKeyword.length() new states)
32 //
33 size_t nMaxState = m_FSM.getSize();
34 if (m_cntState + strKeyword.length() > nMaxState)
35 // Allocate enough new rows (in batches of 256 rows)
36 m_FSM.resize(nMaxState + ((strKeyword.length() / 256) + 1) * 256);
37
38 //
39 // Walk the string character by character
40 //
41 for (int idxCh = 0, cntCh = strKeyword.length(); idxCh < cntCh; idxCh++) {
42 // Look up the next state for current character
43 unsigned char ch = strKeyword[idxCh];
44 U16 nState = RLV_LOWORD(m_FSM[nCurState][ch]);
45
46 // If we're at the last character in the keyword then set the termination bit
47 if (cntCh - 1 == idxCh)
48 {
49 // (Only set the termination bit for the state because this keyword might be a substring of another keyword)
50 m_FSM[nCurState][ch] = (nParam << 16) | (nState | 0x8000);
51 }
52 else if ( (nState & 0x7FFF) == 0 ) // If the new state is 0 then we're creating a new path
53 {
54 // (Preserve the termination bit because another keyword might be a substring of this one)
55 nState = ++m_cntState | (nState & 0x8000);
56
57 // Store the new path in the FSM
58 //m_FSM[nCurState][ch] = (nParam << 16) | nState;
59 m_FSM[nCurState][ch] |= nState;
60 }
61
62 nCurState = nState & 0x7FFF; // Mask out the termination bit since we never need it for the current state
63 }
64}
65
66// (Iterating over a "const char*" is *significantly* faster than "std::string")
67bool RlvMultiStringSearch::findNext(const char* pstrText, int idxCh, int cntCh, RlvMultiStringSearchMatch& match, bool fWordMatch) const
68{
69 U16 nCurState = 0; // Always start the loop at state 0
70 U32 nLastMatch = 0; // Holds the state of the last (possibly partial) keyword match
71
72 //
73 // Walk the string character by character
74 //
75 for (; idxCh < cntCh; idxCh++)
76 {
77 // Keep track of the current state in case we need to backtrack
78 U16 nPrevState = nCurState;
79
80 // If we're currently in state 0, save the current character index (for backtracking or as keyword index match)
81 if (nCurState == 0)
82 match.idxMatch = idxCh;
83
84 // Look up the current character in the FSM
85 unsigned char ch = (unsigned char)pstrText[idxCh];
86 U32 nCell = m_FSM[nCurState & 0x7FFF][ch];
87
88 // If the termination bit is set then we found a keyword substring match
89 // If the next state is non-zero then we can't stop yet because the matched keyword might be a substring of another keyword
90 if (nCell & 0x8000)
91 {
92 if ( 0 == (nCell & 0x7FFF) )
93 {
94 // Termination bit with 'next state' equal to 0: matched keyword which isn't also a substring of any other keyword
95 match.lenMatch = idxCh - match.idxMatch + 1;
96 match.nParam = RLV_HIWORD(nCell);
97
98 // Rudimentary word matching: check if the match is a 'word'
99 if
100 (
101 (!fWordMatch) ||
102 (
103 ( (0 == match.idxMatch) || (!isLetter(pstrText[match.idxMatch - 1])) ) && // Start of string OR non-letter
104 ( (!isLetter(pstrText[match.idxMatch + match.lenMatch])) )
105 )
106 )
107 {
108 return true;
109 }
110
111 // Not a word, but there's no need to backtrack: we can move on from the character after the current one
112 nCell = 0; // Will set nCurState == 0 further down
113 match.idxMatch = idxCh; // Makes sure we move on to the next character instead of backtracking
114 }
115 else
116 {
117 nLastMatch = nCell;
118
119 // In case it turns out that we need to backtrack and return this match, save the length of this match
120 match.lenMatch = idxCh - match.idxMatch + 1;
121 }
122 }
123
124 nCurState = RLV_LOWORD(nCell);
125
126 // If our new state is 0, but our previous state wasn't, then we followed a false lead and need to backtrack
127 if ( (nPrevState != 0) && (nCurState == 0) )
128 {
129 // * if nLastMatch == 0 then we need to backtrack and keep going
130 // * if nLastMatch != 0 then we previously encountered a keyword match so return that one
131 if (nLastMatch) {
132 // Rudimentary word matching: check if the match is a 'word'
133 if
134 (
135 (!fWordMatch) ||
136 (
137 ( (0 == match.idxMatch) || (!isLetter(pstrText[match.idxMatch - 1])) ) && // Start of string OR non-letter
138 ( (!isLetter(pstrText[match.idxMatch + match.lenMatch])) )
139 )
140 )
141 {
142 match.nParam = RLV_HIWORD(nLastMatch);
143 return true;
144 } else
145 // Not a word match, so throw away this partial match and backtrack
146 nLastMatch = 0;
147 }
148
149 idxCh = match.idxMatch;
150 }
151 }
152
153 // We encountered a match, but while investigating whether it was a substring of another keyword we ran out of characters
154 if (nLastMatch)
155 {
156 // Rudimentary word matching: check if we started at the beginning of a word (we know the one behind us is '\0')
157 if ( (!fWordMatch) || ( (0 == match.idxMatch) || (!isLetter(pstrText[match.idxMatch - 1])) ) )
158 {
159 match.nParam = RLV_HIWORD(nLastMatch);
160 return true;
161 }
162 }
163
164 // None of the keywords is contained in the string: return failure
165 match.idxMatch = -1;
166 return false;
167}
168
169bool RlvMultiStringSearch::findLast(const std::string& strText, RlvMultiStringSearchMatch& match) const {
170 RlvMultiStringSearchMatch matchTemp;
171 match.idxMatch = -1; // (Needed to make the return work in case we don't find anything)
172 matchTemp.lenMatch = 0; // (Needed to make the first loop iteration start at 0)
173
174 // Iterating over a "const char*" is *significantly* faster than "std::string"
175 const char* pstrText = strText.c_str();
176 int lenText = strText.length();
177
178 while (findNext(pstrText, matchTemp.idxMatch + matchTemp.lenMatch + 1, lenText, matchTemp))
179 match = matchTemp;
180
181 return (match.idxMatch != -1);
182}
183
184std::vector<RlvMultiStringSearchMatch> RlvMultiStringSearch::findAll(const std::string& strText) {
185 std::vector<RlvMultiStringSearchMatch> arMatch;
186
187 RlvMultiStringSearchMatch match;
188 match.lenMatch = 0; // (Needed to make the first loop iteration start at 0)
189
190 while (findNext(strText, match.idxMatch + match.lenMatch + 1, match))
191 arMatch.push_back(match);
192
193 return arMatch;
194}
195
196// ====================================================================================