#include <regexp.h>
PCRE was developped by Philip Hazel<ph10@cam.ac.uk> at University of Cambridge. The following is what he wrote:
The PCRE library is a set of functions that implement regu- lar expression pattern matching using the same syntax and semantics as Perl 5, with just a few differences. The current implementation corresponds to Perl 5.005, with some additional features from later versions.
A regular expression is a pattern that is matched against a subject string from left to right. Most characters stand for themselves in a pattern, and match the corresponding charac- ters in the subject. As a trivial example, the pattern
The quick brown fox
matches a portion of a subject string that is identical to itself. The power of regular expressions comes from the ability to include alternatives and repetitions in the pat- tern. These are encoded in the pattern by the use of meta- characters, which do not stand for themselves but instead are interpreted in some special way.
There are two different sets of meta-characters: those that are recognized anywhere in the pattern except within square brackets, and those that are recognized in square brackets. Outside square brackets, the meta-characters are as follows:
Definition at line 85 of file regexp.h.
Public Types | |
| enum | { anchored = PCRE_ANCHORED, caseless = PCRE_CASELESS, dollarend = PCRE_DOLLAR_ENDONLY, dotall = PCRE_DOTALL, extended = PCRE_EXTENDED, multiline = PCRE_MULTILINE, ungreedy = PCRE_UNGREEDY } |
Public Methods | |
| RegExp (unsigned opts=0) | |
| Constructs a regexp. | |
| RegExp (const std::string &s, unsigned opts=0) | |
| Constructs a regexp. | |
| RegExp (const char *s, unsigned opts=0) | |
| Constructs a regexp. | |
| RegExp (const RegExp &r) | |
| A copu constructor. | |
| ~RegExp () | |
| Destruts the object. | |
| const RegExp & | operator= (const std::string &s) |
| Assignment. | |
| const RegExp & | operator= (const char *s) |
| Assignment. | |
| const RegExp & | operator= (const RegExp &r) |
| Assignment. | |
| unsigned | options () |
| Returns the options. | |
| void | options (unsigned opts) |
| Set the options. | |
| int | find (const std::string &s, std::vector< std::string > *subs=0, const unsigned offset=0) const |
| int | ngmatch (const std::string &s) const |
| std::string | replace (const std::string &s, const std::string &rep, const unsigned offset=0) const |
| std::vector< std::pair< int, int > > | match (const std::string &s, unsigned offset=0) const |
| std::vector< std::pair< int, int > > | gmatch (const std::string &s) const |
| std::vector< std::string > | split (const std::string &s, bool emptyfields=true, unsigned maxfields=0) const |
Static Public Methods | |
| std::string | substr (const std::string &s, const std::vector< std::pair< int, int > > &marks, unsigned index) |
| std::vector< std::string > | substr (const std::string &s, const std::vector< std::pair< int, int > > &marks) |
Private Methods | |
| void | compile (const std::string &s) |
| void | copy (const RegExp &r) |
Private Attributes | |
| pcre * | re |
| unsigned | _opts |
|
|
Definition at line 88 of file regexp.h.
|
|
|
Constructs a regexp.
|
|
||||||||||||
|
Constructs a regexp.
|
|
||||||||||||
|
Constructs a regexp.
|
|
|
A copu constructor.
|
|
|
Destruts the object.
Definition at line 103 of file regexp.h. References re. |
|
|
Definition at line 128 of file regexp.h. Referenced by operator=(), and RegExp().
|
|
|
Definition at line 138 of file regexp.h. References _opts, matvec::check_ptr(), and re. Referenced by operator=(), and RegExp().
00139 {
00140 if (this == &r) return;
00141 size_t size;
00142 pcre_fullinfo(r.re, 0, PCRE_INFO_SIZE, &size);
00143 if ( re ) delete[] re;
00144 if(size>0){
00145 re = (pcre *) new char[size];
00146 }
00147 else {
00148 re = 0;
00149 }
00150 check_ptr(re);
00151 memcpy(re, r.re, size);
00152 _opts = r._opts;
00153 return;
00154 }
|
|
||||||||||||||||
|
Try to find the regexp by returning the index of first occurrence. It brings out a vector of all findings. Definition at line 270 of file regexp.h. References matvec::check_ptr(), and re.
00271 {
00272 if ( !re ) throw exception("find on uninitialized expression");
00273
00274 size_t msize;
00275 pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &msize);
00276 msize = 3*(msize+1);
00277 int *m = new int[msize];
00278 check_ptr(m);
00279 int result = pcre_exec(re, 0, s.c_str(), s.length(), offset, 0, m, msize);
00280 int ret;
00281 if (result >= 0) {
00282 ret = m[0];
00283 } else {
00284 ret = result;
00285 }
00286
00287 if (subs) {
00288 if (result > 0) {
00289 subs->reserve(result);
00290 for (int j,i = 0; i < result ; i++) {
00291 j = i*2;
00292 if (m[j] == -1) {
00293 subs->push_back("");
00294 } else {
00295 subs->push_back(s.substr(m[j], m[j+1] - m[j]));
00296 }
00297 }
00298 } else {
00299 subs->clear();
00300 }
00301 }
00302 delete[] m;
00303 return ret;
00304 }
|
|
|
Finds all matchings of the regular expression in the string and returns a vector of pair<int,int> with one element for each match. Substrings for each match are not reported. The strings corresponding to the individual matches can be retrieved using the substr() functions. Definition at line 194 of file regexp.h. References re. Referenced by split().
00195 {
00196 if ( !re ) throw exception("gmatch on uninitialized expression");
00197
00198 int m[3];
00199 vector<pair<int,int> > marks;
00200
00201 const char * str = s.c_str();
00202 unsigned offset = 0, len = s.length();
00203 while ( offset < len && pcre_exec(re, 0, str, len, offset, 0, m, 3) >= 0 ) {
00204 marks.push_back(make_pair(m[0], m[1]));
00205 offset = m[1];
00206 }
00207 return marks;
00208 }
|
|
||||||||||||
|
Matches it against the compiled regular expression. It takes a string and an optional starting offset, whose default value is 0. An exception is thrown if the Regexp is uninitialized. It returns a vector of pair<int,int>. If the returned vector is empty, the string did not match. If the returned vector (let's call it v) is not empty, then the v[0] pair contains the offsets of the first and the last-plus-one characters in the match. The rest of the elements, v[i], contain the same information for the captured substrings. If a certain subpattern in the expression did not participate in the match, the corresponding vector element will contain the pair (-1, -1). Definition at line 169 of file regexp.h. References matvec::check_ptr(), and re.
00170 {
00171 if ( !re ) throw exception("match on uninitialized expression");
00172
00173 size_t msize;
00174 pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &msize);
00175 msize = 3*(msize+1);
00176 int *m = new int[msize];
00177 check_ptr(m);
00178 int result;
00179 vector<pair<int,int> > marks;
00180
00181 result = pcre_exec(re, 0, s.c_str(), s.length(), offset, 0, m, msize);
00182 for ( int i = 0, *p = m ; i < result ; i++, p+=2 ) marks.push_back(make_pair(p[0], p[1]));
00183 delete[] m;
00184 return marks;
00185 }
|
|
|
Returns the number of matches Definition at line 309 of file regexp.h. References re.
00310 {
00311 if ( !re ) throw exception("nmatch on uninitialized expression");
00312
00313 int ret = 0;
00314 int m[3];
00315 const char *str = s.c_str();
00316 unsigned offset = 0, len = s.length();
00317 while ( offset < len && pcre_exec(re, 0, str, len, offset, 0, m, 3) >= 0 ) {
00318 ret++;
00319 offset = m[1];
00320 }
00321 return ret++;
00322 }
|
|
|
Assignment.
Definition at line 107 of file regexp.h. References copy(). |
|
|
Assignment.
Definition at line 106 of file regexp.h. References compile(). |
|
|
Assignment.
Definition at line 105 of file regexp.h. References compile(). |
|
|
Set the options.
Definition at line 111 of file regexp.h. References _opts. |
|
|
Returns the options.
Definition at line 110 of file regexp.h. References _opts. |
|
||||||||||||||||
|
Replace occurrence with rep starting from offset. Definition at line 327 of file regexp.h. References matvec::check_ptr(), and re.
00328 {
00329 if ( !re ) throw exception("replace on uninitialized expression");
00330
00331 size_t msize;
00332 pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &msize);
00333 msize = 3*(msize+1);
00334 int *m = new int[msize];
00335 check_ptr(m);
00336 int i,j,k,len,nmat;
00337 len = str.length();
00338 if ((nmat = pcre_exec(re, 0, str.c_str(), len, offset, 0, m, msize)) < 0) return str;
00339
00340 string ret = rep;
00341 string::size_type begidx;
00342 begidx = ret.find_first_of("$",0);
00343 while (begidx != string::npos) {
00344 k = 1;
00345 if (isdigit(ret.at(begidx+1))) {
00346 i = ret[begidx + 1]- 48;
00347 if (i && i < nmat) {
00348 j = i*2;
00349 k = m[j+1] - m[j];
00350 ret.replace(begidx,2,str.substr(m[j],k));
00351 }
00352 }
00353 begidx = ret.find_first_of("$",begidx + k);
00354 }
00355
00356 if (m[1] < len) ret.append(str.substr(m[1],len - m[1]));
00357 if (m[0] > 0) ret = str.substr(0,m[0]) + ret;
00358 return ret;
00359 }
|
|
||||||||||||||||
|
Returns a vector of substrings splitted from string s. Definition at line 242 of file regexp.h. References gmatch(), re, and substr().
00243 {
00244 if ( !re ) throw exception("split on uninitialized expression");
00245 vector<pair<int,int> > m = gmatch(s);
00246 vector<pair<int,int> > marks;
00247
00248 int begin = 0, end;
00249 for ( int i = 0, nsep = m.size() ; i < nsep ; i++ ) {
00250 end = m[i].first;
00251 if ( emptyfields || end > begin )
00252 marks.push_back(make_pair(begin, end));
00253 begin = m[i].second;
00254 }
00255 end = s.length();
00256 if ( emptyfields || end > begin ) marks.push_back(make_pair(begin, end));
00257 unsigned nfields = marks.size();
00258 if ( maxfields && nfields > maxfields ) {
00259 marks[maxfields-1].second = marks[nfields-1].second;
00260 marks.erase(&marks[maxfields], marks.end());
00261 }
00262 return substr(s, marks);
00263 }
|
|
||||||||||||
|
Returns a vector of strings containing all the substrings in the match. Definition at line 230 of file regexp.h. References substr().
00231 {
00232 vector<string> v;
00233 unsigned size = marks.size();
00234
00235 for ( unsigned i = 0 ; i < size ; i++ ) v.push_back(substr(s, marks, i));
00236 return v;
00237 }
|
|
||||||||||||||||
|
It returns the corresponding substring. The returned substring is corresponding to the matched string, the vector of pair<int,int> returned by match() and an index. Definition at line 217 of file regexp.h. Referenced by split(), and substr().
00218 {
00219 if ( index >= marks.size() ) throw exception("bad substring index");
00220
00221 int begin = marks[index].first;
00222 if ( begin == -1 ) return "";
00223 int end = marks[index].second;
00224 return s.substr(begin, end-begin);
00225 }
|
|
|
|
|
|
Definition at line 121 of file regexp.h. Referenced by compile(), copy(), find(), gmatch(), match(), ngmatch(), RegExp(), replace(), split(), and ~RegExp(). |
1.2.16