diff options
Diffstat (limited to '')
-rw-r--r-- | linden/indra/newview/rlvmultistringsearch.cpp | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/linden/indra/newview/rlvmultistringsearch.cpp b/linden/indra/newview/rlvmultistringsearch.cpp new file mode 100644 index 0000000..0aa9889 --- /dev/null +++ b/linden/indra/newview/rlvmultistringsearch.cpp | |||
@@ -0,0 +1,196 @@ | |||
1 | #include "llviewerprecompiledheaders.h" | ||
2 | |||
3 | #include "rlvmultistringsearch.h" | ||
4 | |||
5 | // ==================================================================================== | ||
6 | |||
7 | #ifndef RLV_LOWORD | ||
8 | #define RLV_LOWORD(x) ( (U16)( ((U32)x) & 0xFFFF) ) | ||
9 | #endif // RLV_LOWORD | ||
10 | |||
11 | #ifndef RLV_HIWORD | ||
12 | #define RLV_HIWORD(x) ( (U16)( (((U32)x) >> 16) & 0xFFFF) ) | ||
13 | #endif // RLV_HIWORD | ||
14 | |||
15 | // ==================================================================================== | ||
16 | |||
17 | // (TODO-RLV: oops, forgot I was experimenting with word matching, get rid of that again?) | ||
18 | #define isLetter(ch) \ | ||
19 | ( ( (ch >= 'a') && (ch <= 'z') ) || ( (ch >= 'A') && (ch <= 'Z') ) ) | ||
20 | |||
21 | RlvMultiStringSearch::RlvMultiStringSearch() | ||
22 | : m_FSM(256), // Start our FSM with room for 256 states (enough for all attachment point names) | ||
23 | m_cntState(0) | ||
24 | { | ||
25 | } | ||
26 | |||
27 | void RlvMultiStringSearch::addKeyword(const std::string& strKeyword, U16 nParam) { | ||
28 | U16 nCurState = 0; // Always start the loop at state 0 | ||
29 | |||
30 | // | ||
31 | // Make sure there are enough unused rows to accomodate the worst case (== strKeyword.length() new states) | ||
32 | // | ||
33 | size_t nMaxState = m_FSM.getSize(); | ||
34 | if (m_cntState + strKeyword.length() > nMaxState) | ||
35 | // Allocate enough new rows (in batches of 256 rows) | ||
36 | m_FSM.resize(nMaxState + ((strKeyword.length() / 256) + 1) * 256); | ||
37 | |||
38 | // | ||
39 | // Walk the string character by character | ||
40 | // | ||
41 | for (int idxCh = 0, cntCh = strKeyword.length(); idxCh < cntCh; idxCh++) { | ||
42 | // Look up the next state for current character | ||
43 | unsigned char ch = strKeyword[idxCh]; | ||
44 | U16 nState = RLV_LOWORD(m_FSM[nCurState][ch]); | ||
45 | |||
46 | // If we're at the last character in the keyword then set the termination bit | ||
47 | if (cntCh - 1 == idxCh) | ||
48 | { | ||
49 | // (Only set the termination bit for the state because this keyword might be a substring of another keyword) | ||
50 | m_FSM[nCurState][ch] = (nParam << 16) | (nState | 0x8000); | ||
51 | } | ||
52 | else if ( (nState & 0x7FFF) == 0 ) // If the new state is 0 then we're creating a new path | ||
53 | { | ||
54 | // (Preserve the termination bit because another keyword might be a substring of this one) | ||
55 | nState = ++m_cntState | (nState & 0x8000); | ||
56 | |||
57 | // Store the new path in the FSM | ||
58 | //m_FSM[nCurState][ch] = (nParam << 16) | nState; | ||
59 | m_FSM[nCurState][ch] |= nState; | ||
60 | } | ||
61 | |||
62 | nCurState = nState & 0x7FFF; // Mask out the termination bit since we never need it for the current state | ||
63 | } | ||
64 | } | ||
65 | |||
66 | // (Iterating over a "const char*" is *significantly* faster than "std::string") | ||
67 | bool RlvMultiStringSearch::findNext(const char* pstrText, int idxCh, int cntCh, RlvMultiStringSearchMatch& match, bool fWordMatch) const | ||
68 | { | ||
69 | U16 nCurState = 0; // Always start the loop at state 0 | ||
70 | U32 nLastMatch = 0; // Holds the state of the last (possibly partial) keyword match | ||
71 | |||
72 | // | ||
73 | // Walk the string character by character | ||
74 | // | ||
75 | for (; idxCh < cntCh; idxCh++) | ||
76 | { | ||
77 | // Keep track of the current state in case we need to backtrack | ||
78 | U16 nPrevState = nCurState; | ||
79 | |||
80 | // If we're currently in state 0, save the current character index (for backtracking or as keyword index match) | ||
81 | if (nCurState == 0) | ||
82 | match.idxMatch = idxCh; | ||
83 | |||
84 | // Look up the current character in the FSM | ||
85 | unsigned char ch = (unsigned char)pstrText[idxCh]; | ||
86 | U32 nCell = m_FSM[nCurState & 0x7FFF][ch]; | ||
87 | |||
88 | // If the termination bit is set then we found a keyword substring match | ||
89 | // If the next state is non-zero then we can't stop yet because the matched keyword might be a substring of another keyword | ||
90 | if (nCell & 0x8000) | ||
91 | { | ||
92 | if ( 0 == (nCell & 0x7FFF) ) | ||
93 | { | ||
94 | // Termination bit with 'next state' equal to 0: matched keyword which isn't also a substring of any other keyword | ||
95 | match.lenMatch = idxCh - match.idxMatch + 1; | ||
96 | match.nParam = RLV_HIWORD(nCell); | ||
97 | |||
98 | // Rudimentary word matching: check if the match is a 'word' | ||
99 | if | ||
100 | ( | ||
101 | (!fWordMatch) || | ||
102 | ( | ||
103 | ( (0 == match.idxMatch) || (!isLetter(pstrText[match.idxMatch - 1])) ) && // Start of string OR non-letter | ||
104 | ( (!isLetter(pstrText[match.idxMatch + match.lenMatch])) ) | ||
105 | ) | ||
106 | ) | ||
107 | { | ||
108 | return true; | ||
109 | } | ||
110 | |||
111 | // Not a word, but there's no need to backtrack: we can move on from the character after the current one | ||
112 | nCell = 0; // Will set nCurState == 0 further down | ||
113 | match.idxMatch = idxCh; // Makes sure we move on to the next character instead of backtracking | ||
114 | } | ||
115 | else | ||
116 | { | ||
117 | nLastMatch = nCell; | ||
118 | |||
119 | // In case it turns out that we need to backtrack and return this match, save the length of this match | ||
120 | match.lenMatch = idxCh - match.idxMatch + 1; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | nCurState = RLV_LOWORD(nCell); | ||
125 | |||
126 | // If our new state is 0, but our previous state wasn't, then we followed a false lead and need to backtrack | ||
127 | if ( (nPrevState != 0) && (nCurState == 0) ) | ||
128 | { | ||
129 | // * if nLastMatch == 0 then we need to backtrack and keep going | ||
130 | // * if nLastMatch != 0 then we previously encountered a keyword match so return that one | ||
131 | if (nLastMatch) { | ||
132 | // Rudimentary word matching: check if the match is a 'word' | ||
133 | if | ||
134 | ( | ||
135 | (!fWordMatch) || | ||
136 | ( | ||
137 | ( (0 == match.idxMatch) || (!isLetter(pstrText[match.idxMatch - 1])) ) && // Start of string OR non-letter | ||
138 | ( (!isLetter(pstrText[match.idxMatch + match.lenMatch])) ) | ||
139 | ) | ||
140 | ) | ||
141 | { | ||
142 | match.nParam = RLV_HIWORD(nLastMatch); | ||
143 | return true; | ||
144 | } else | ||
145 | // Not a word match, so throw away this partial match and backtrack | ||
146 | nLastMatch = 0; | ||
147 | } | ||
148 | |||
149 | idxCh = match.idxMatch; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | // We encountered a match, but while investigating whether it was a substring of another keyword we ran out of characters | ||
154 | if (nLastMatch) | ||
155 | { | ||
156 | // Rudimentary word matching: check if we started at the beginning of a word (we know the one behind us is '\0') | ||
157 | if ( (!fWordMatch) || ( (0 == match.idxMatch) || (!isLetter(pstrText[match.idxMatch - 1])) ) ) | ||
158 | { | ||
159 | match.nParam = RLV_HIWORD(nLastMatch); | ||
160 | return true; | ||
161 | } | ||
162 | } | ||
163 | |||
164 | // None of the keywords is contained in the string: return failure | ||
165 | match.idxMatch = -1; | ||
166 | return false; | ||
167 | } | ||
168 | |||
169 | bool RlvMultiStringSearch::findLast(const std::string& strText, RlvMultiStringSearchMatch& match) const { | ||
170 | RlvMultiStringSearchMatch matchTemp; | ||
171 | match.idxMatch = -1; // (Needed to make the return work in case we don't find anything) | ||
172 | matchTemp.lenMatch = 0; // (Needed to make the first loop iteration start at 0) | ||
173 | |||
174 | // Iterating over a "const char*" is *significantly* faster than "std::string" | ||
175 | const char* pstrText = strText.c_str(); | ||
176 | int lenText = strText.length(); | ||
177 | |||
178 | while (findNext(pstrText, matchTemp.idxMatch + matchTemp.lenMatch + 1, lenText, matchTemp)) | ||
179 | match = matchTemp; | ||
180 | |||
181 | return (match.idxMatch != -1); | ||
182 | } | ||
183 | |||
184 | std::vector<RlvMultiStringSearchMatch> RlvMultiStringSearch::findAll(const std::string& strText) { | ||
185 | std::vector<RlvMultiStringSearchMatch> arMatch; | ||
186 | |||
187 | RlvMultiStringSearchMatch match; | ||
188 | match.lenMatch = 0; // (Needed to make the first loop iteration start at 0) | ||
189 | |||
190 | while (findNext(strText, match.idxMatch + match.lenMatch + 1, match)) | ||
191 | arMatch.push_back(match); | ||
192 | |||
193 | return arMatch; | ||
194 | } | ||
195 | |||
196 | // ==================================================================================== | ||