aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/rlvmultistringsearch.h
blob: 43b0172f25164e766c0236b499ceb82e049bf116 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#ifndef RLV_MULTISTRINGSEARCH_H
#define RLV_MULTISTRINGSEARCH_H

// ============================================================================
// Template classes for our state machine (2 dimensional array of type T)

// STL vector
template<typename T> class RlvMultiStringSearchFSM_STL
{
public:
	/*
	 * Constructor/destructor
	 */

	// Initialize the FSM with an initial capacity of 'nCapacity' states
	RlvMultiStringSearchFSM_STL(size_t nCapacity)
	{
		m_arFSM.reserve(nCapacity);

		T* pT;
		for (size_t idx = 0; idx < nCapacity; idx++)
		{
			// The width of each row is determined by the alphabet we're using (in this case UTF-8
			// so while every character might consist of multiple bytes there are
			// still only 256 'columns' in the state machine)
			pT = new T[256]();

			// The above *should* initialize to 0 but since we can't account for every compiler doing it :(
			memset(pT, 0, sizeof(T) * 256);

			m_arFSM.push_back(pT);
		}
	};

	~RlvMultiStringSearchFSM_STL()
	{
		// Free any memory we previously allocated
		for (int idx = 0, cnt = m_arFSM.size(); idx < cnt; idx++)
			delete[] m_arFSM[idx];
	}

	/*
	 * Operators
	 */
	// ASSERTION: nState < m_arFSM.size() at all times
	// In other words: do *NOT* go out of bounds on the array (no memory will have allocated for that non-existing state)
	// (There probably should be a check for that even in release but it seems wasteful, just don't do it :p)
	inline T* operator[](size_t nState) 
	{
		//#ifdef _DEBUG
		//	assert( nState < m_arFSM.size() );
		//#endif // _DEBUG

		return m_arFSM[nState]; 
	}
	inline const T* operator[](size_t nState) const
	{
		//#ifdef _DEBUG
		//	assert( nState < m_arFSM.size() );
		//#endif // _DEBUG

		return m_arFSM[nState]; 
	}

	/*
	 * Public member functions
	 */

	size_t getSize() const { return m_arFSM.size(); }

	void resize(size_t nNewCapacity)
	{
		// Get our current capacity (only rows > capacity need memory allocated)
		size_t nCurCapacity = m_arFSM.capacity();

		// Only expand, never shrink
		if (nNewCapacity <= nCurCapacity)
		{
			//#ifdef _DEBUG
			//	assert(false);
			//#endif //_DEBUG

			return;
		}
		m_arFSM.resize(nNewCapacity);

		// For each new state we added, allocate memory for the columns
		for(size_t idx = nCurCapacity; idx < nNewCapacity; idx++)
			// The memset is redundant (or rather *should* be) but since we can't account for every compiler doing it :(
			m_arFSM[idx] = (T*)memset(new T[256](), 0, sizeof(T) * 256);
	}

protected:
	/*
	 * Member variables
	 */
	std::vector<T*> m_arFSM;
};

// ============================================================================

struct RlvMultiStringSearchMatch
{
	int idxMatch;	// Starting character index into the string of the matched keyword (-1 if no match)
	int lenMatch;	// Length of the matched keyword (undefined if no match)
	U16 nParam;		// User supplied parameter for the matched keyword (undefined if no match)

	RlvMultiStringSearchMatch() : idxMatch(-1) {}
};

// ============================================================================
// The actual search class

class RlvMultiStringSearch 
{
public:
	/*
	 * Constructor/destructor
	 */
	RlvMultiStringSearch();
	//~RlvMultiStringSearch();

	/*
	 * Public member functions
	 */

	// Add a keyword to the state machine (if it already exists then it will simply overwrite the existing parameter)
	void addKeyword(const std::string& strKeyword, U16 nParam);

	BOOL getExactMatchParam(const std::string& strText, U16& nParam) const
	{
		RlvMultiStringSearchMatch match;
		if (findFirst(strText, match))
		{
			// We have an exact match if the starting index is 0
			// and the length of the match matches the length of the string
			if ( (0 == match.idxMatch) && (match.lenMatch == (int)strText.length()) )
			{
				nParam = match.nParam;
				return TRUE;
			}
		}

		return FALSE;	// Fall-through: no (exact) match
	}

	// Finds the first occurance of any keyword in the supplied string
	bool findFirst(const std::string& strText, RlvMultiStringSearchMatch& match) const;
	// Finds the next occurance of any keyword in the supplied string
	bool findNext(const std::string& strText, int idxCh, RlvMultiStringSearchMatch& match) const;
	// Finds all occurances of any keyword in the supplied string
	std::vector<RlvMultiStringSearchMatch> findAll(const std::string& strText);
	// Finds the last occurance of any keyword in the supplied string (non-optimized)
	bool findLast(const std::string& strText, RlvMultiStringSearchMatch& match) const;

protected:
	// Finds the next occurance of any keyword in the supplied string
	bool findNext(const char* pstrText, int idxCh, int cntCh, RlvMultiStringSearchMatch& match, bool fWordMatch = true) const;

	/*
	 * Member variables
	 */
	RlvMultiStringSearchFSM_STL<U32> m_FSM;	// Our finite state machine (4 bytes * 256 = 1Kb of memory/state)
											// HIWORD(U32) = 16-bits of user data
											// LOWORD(U32) = ABBBBBBBBBBBBBBB
											//		A = termination bit
											//			If (set) and (B == 0): match
											//			If (set) and (B != 0): match, but might only be a substring of another keyword
											//		B = next state (0..32767)
											//			If (B == 0): false lead -> backtrack
											//			If (B != 0): partial keyword match, next state
	size_t	m_cntState;						// The number of states in the FSM (= the number of *used* rows in the array)
};

// ============================================================================
// Inlined member functions
//

inline bool RlvMultiStringSearch::findFirst(const std::string& strText, RlvMultiStringSearchMatch& match) const
{
	return findNext(strText.c_str(), 0, strText.length(), match);
}

inline bool RlvMultiStringSearch::findNext(const std::string& strText, int idxCh, RlvMultiStringSearchMatch& match) const
{
	return findNext(strText.c_str(), idxCh, strText.length(), match);
}

// ============================================================================

#endif // RLV_MULTISTRINGSEARCH_H