#include <pattern.hpp>
Public Methods | |
| Pattern (std::string pat) | |
| construct a Pattern object to search (case-independently). More... | |
| bool | operator() (const std::string& String_to_Test) |
Private Types | |
| enum | { endstreamch = 0x0d } |
Private Methods | |
| char | rch () |
get the next character (or 'endstreamch') from the pattern
| |
| char | nextitem () |
get the next 'item' (remove squoted characters)
| |
| void | setexits (unsigned short list ,unsigned short val) |
| replace each element of a list starting at 'list' with val. More... | |
| unsigned short | join (unsigned short list ,unsigned short val) |
append a value (or list) to a list
| |
| unsigned short | expand (unsigned short altp) |
| collect stuff until we get an endstreamch or an unmatched ). More... | |
| unsigned short | primative (void) |
| get a primative from the pattern. More... | |
Private Attributes | |
| std::string | the_Pattern |
| the actual pattern we're searching for. More... | |
| unsigned short | patp |
| working pointer for the match function. More... | |
| char | ch |
| current character from the_Pattern. More... | |
| usv | Compiled_Pattern |
| holds 'next state' info. More... | |
In usage, two calls are made: The first is to the constructor to 'set' the pattern Then (possibly) multiple calls to operator() to check strings against the pattern. For example:
Pattern magic("#?test#?"); // look for "test" anywhere in the string if (magic("this is a test")) // this would test true cout << "Yes, a match.\n"; if (magic("a rather contrived example")) // this would test false cout << "This is never gonna print\n"; else cout << "No copy of \"test\" in that string\n.";
Of course, both calls may be collapsed into a single "call" as follows:
if (Pattern("#?test#?")("This is a test, isn't it?")) cout << "Yes, and successful at that!\n";
#include <vector>
#include <algorithm>
...
std::vector<string> text_stuff;
... // code to fill text_stuff
...
// get an iterator which "points" to the first(1st) string in the vector
std::vector<string>::const_iterator iter(text_stuff.begin());
// output all strings which contain the pattern "this matches"
while (text_stuff.end() != (iter = find(iter, text_stuff.end(), Pattern("#?this'smatches#?"))))
{
cout << *iter; // output string which 'matches'
++iter; // so we start search after this string
}
Definition at line 86 of file pattern.hpp.
|
|
Definition at line 93 of file pattern.hpp. 00093 {endstreamch = 0x0d}
|
|
|
construct a Pattern object to search (case-independently).
The following metacharacters are used in the pattern string
? = matches any single character except newline
# = zero or more occurrances of following item
% = Matches the null string
() = enclose multiple items to be considered a single item
'a = any alphabetic character
'd = any digit
'n = any alphanumeric
's = any white space character
' = escape (match following metacharacter literally, '$ = $)
| = or (a|b) = a or b
` = or (a`b) = a or b (added to allow easier input from MS command lines)
all other characters must be matched exactly
Definition at line 234 of file pattern.hpp. 00235 : the_Pattern(pat), patp(0), Compiled_Pattern(pat.size()+1) 00236 { 00237 rch(); //! prime the pump 00238 setexits(expand(0), 0); //! generate Compiled_Pattern 00239 } |
|
|
collect stuff until we get an endstreamch or an unmatched ).
Definition at line 142 of file pattern.hpp. Referenced by Pattern(), and primative(). 00143 {
00144 unsigned short exits = 0;
00145 unsigned short a;
00146
00147 for (;;)
00148 {
00149 a = primative();
00150 if ((ch == '`') || (ch == '|') || (ch == ')') || (ch == endstreamch))
00151 {
00152 exits = join(exits, a);
00153
00154 if ( ( ch != '`' ) && ( ch != '|' ) )
00155 {
00156 return(exits);
00157 }
00158
00159 Compiled_Pattern[altp] = patp;
00160 altp = patp;
00161 nextitem();
00162 }
00163 else // must be just an 'item'
00164 {
00165 setexits(a, patp); //
00166 }
00167 }
00168 }
|
|
|
append a value (or list) to a list
Definition at line 127 of file pattern.hpp. Referenced by expand(). 00129 {
00130 if (list == 0) return(val);
00131 unsigned short save_head(list);
00132
00133 //! follow the list to the end
00134 while (Compiled_Pattern[list] != 0) list = Compiled_Pattern[list];
00135
00136 //! add new element to the end
00137 Compiled_Pattern[list] = val;
00138 return(save_head);
00139 }
|
|
|
get the next 'item' (remove squoted characters)
Definition at line 106 of file pattern.hpp. |
|
|
Definition at line 241 of file pattern.hpp. 00242 {
00243 StateSet a_states, b_states;
00244 unsigned short Position_to_Test(0);
00245 bool success(false); //!<working variable that hold's success
00246 a_states.reserve(the_Pattern.size());
00247 b_states.reserve(the_Pattern.size());
00248 a_states.Put(1);
00249 a_states.Put(Compiled_Pattern[0]);
00250 for(;;)
00251 {
00252 //!for_all a_states .....
00253 for (StateSet::iterator i = a_states.begin(); i != a_states.end(); ++i)
00254 {
00255 unsigned short p = *i;
00256 unsigned short q = Compiled_Pattern[p];
00257 switch (the_Pattern[p-1])
00258 {
00259 case '#':
00260 a_states.Put(p+1);
00261 case '%':
00262 success |= a_states.Put(q);
00263 break;
00264 case '(':
00265 case '`':
00266 case '|':
00267 a_states.Put(p+1);
00268 a_states.Put(q);
00269 break;
00270 default:
00271 break;
00272 } // end switch (the_Pattern[p-1])
00273 } // end for (StateSet::iterator i = a_states.begin(); i != a_states.end(); ++i)
00274
00275 //! are we done? test for ONLY two (2) ways out of here
00276 if (Position_to_Test >= String_to_Test.size()) return success;
00277 if (!a_states.size()) return false;
00278
00279 //!Check the next character in the "input" string
00280 success = false;
00281 b_states.clear();
00282 a_states.swap(b_states);
00283 ch = tolower(String_to_Test[Position_to_Test++]); //! to make everything 'case independent'
00284
00285 //!for_all b_states .....
00286 for (i = b_states.begin(); i != b_states.end(); ++i)
00287 {
00288 unsigned short p = *i;
00289 switch (char k = tolower(the_Pattern[p-1])) //! to make everything 'case independent'
00290 {
00291 case '#':
00292 case '`':
00293 case '|':
00294 case '%':
00295 case '(':
00296 continue; // nothing to check here
00297 case '\'': // AHA!! one of our 'special' characters
00298 switch (k = tolower(the_Pattern[p])) //! to make everything 'case independent'
00299 {
00300 case 'a':
00301 if (isalpha(ch))
00302 k = ch; // so they match later
00303 break;
00304 case 'd':
00305 if (isdigit(ch))
00306 k = ch; // so they match later
00307 else
00308 k = '0'; // so they canNOT match
00309 break;
00310 case 'n':
00311 if (isalnum(ch))
00312 k = ch; // so they match later
00313 break;
00314 case 's':
00315 if (isspace(ch))
00316 k = ch; // so they match later
00317 else
00318 k = ' '; // so they canNOT match
00319 default:; //! nothing to do for 'non-special' characters
00320 } // end switch (k = tolower(the_Pattern[p]))
00321
00322 //!This is the important part... does the 'pattern' match the 'input string'
00323 default:
00324 if (ch == k)
00325 case '?': //! remember "?" matches anything
00326 success |= a_states.Put(Compiled_Pattern[p]);
00327 continue;
00328 } // end switch (char k = tolower(the_Pattern[p-1]))
00329 } // end for (i = b_states.begin(); i != b_states.end(); ++i)
00330 } // end for (;;) ...... no way out here
00331 }
|
|
|
get a primative from the pattern.
Definition at line 179 of file pattern.hpp. Referenced by expand(). 00180 {
00181 unsigned short a;
00182 char op;
00183
00184 a = patp; // save where we're starting
00185 op = ch; // save current ch for later switching
00186 nextitem(); // get the next item (we're gonna consume this one)
00187 switch (op)
00188 {
00189 case '(':
00190 a = expand(a);
00191 if (ch != ')')
00192 throw std::runtime_error("bad pattern: expected )");
00193 nextitem(); // ch == ')' or we would have thrown
00194 return(a);
00195
00196 case endstreamch:
00197 case ')':
00198 case '`':
00199 case '|':
00200 throw std::runtime_error("bad pattern: unexpected ), `, |, or endstreamch");
00201
00202 case '#': // 0 or more of
00203 setexits(primative(), a); // loop back to try it again
00204 default:
00205 return(a);
00206 }
00207 }
|
|
|
get the next character (or 'endstreamch') from the pattern
Definition at line 97 of file pattern.hpp. Referenced by Pattern(), and nextitem(). 00098 {
00099 if (patp>=the_Pattern.size()) // if we're past the end
00100 return ch=endstreamch; // return a marker
00101 return ch=the_Pattern[patp++]; // otherwise return the character AND bump the pointer
00102 }
|
|
|
replace each element of a list starting at 'list' with val.
Definition at line 114 of file pattern.hpp. Referenced by Pattern(), expand(), and primative(). 00116 {
00117 while (list != 0)
00118 {
00119 unsigned short next(Compiled_Pattern[list]);
00120 Compiled_Pattern[list] = val;
00121 list = next;
00122 }
00123 }
|
|
|
holds 'next state' info.
Definition at line 92 of file pattern.hpp. |
|
|
current character from the_Pattern.
Definition at line 91 of file pattern.hpp. |
|
|
working pointer for the match function.
Definition at line 90 of file pattern.hpp. |
|
|
the actual pattern we're searching for.
Definition at line 89 of file pattern.hpp. |
1.2.3 written by Dimitri van Heesch,
© 1997-2000