00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2007 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 2, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // You should have received a copy of the GNU General Public License along 00017 // with this library; see the file COPYING. If not, write to the Free 00018 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 00019 // USA. 00020 00021 // As a special exception, you may use this file as part of a free software 00022 // library without restriction. Specifically, if other files instantiate 00023 // templates or use macros or inline functions from this file, or you compile 00024 // this file and link it with other files to produce an executable, this 00025 // file does not by itself cause the resulting executable to be covered by 00026 // the GNU General Public License. This exception does not however 00027 // invalidate any other reasons why the executable file might be covered by 00028 // the GNU General Public License. 00029 00030 /** 00031 * @file tr1/regex 00032 * @author Stephen M. Webb <stephen.webb@bregmasoft.ca> 00033 * This is a TR1 C++ Library header. 00034 */ 00035 00036 #ifndef _TR1_REGEX 00037 #define _TR1_REGEX 1 00038 00039 #include <bits/c++config.h> 00040 #include <algorithm> 00041 #include <bitset> 00042 #include <iterator> 00043 #include <locale> 00044 #include <stdexcept> 00045 #include <string> 00046 #include <utility> 00047 #include <vector> 00048 00049 namespace std 00050 { 00051 _GLIBCXX_BEGIN_NAMESPACE(_GLIBCXX_TR1) 00052 00053 /** 00054 * @addtogroup tr1_regex Regular Expressions 00055 * A facility for performing regular expression pattern matching. 00056 * @{ 00057 */ 00058 00059 namespace regex_constants 00060 { 00061 // [7.5.1] Bitmask Type syntax_option_type 00062 enum __syntax_option 00063 { 00064 _S_icase, 00065 _S_nosubs, 00066 _S_optimize, 00067 _S_collate, 00068 _S_ECMAScript, 00069 _S_basic, 00070 _S_extended, 00071 _S_awk, 00072 _S_grep, 00073 _S_egrep, 00074 _S_syntax_last 00075 }; 00076 00077 /** 00078 * @brief This is a bitmask type indicating how to interpret the regex. 00079 * 00080 * The @c syntax_option_type is implementation defined but it is valid to 00081 * perform bitwise operations on these values and expect the right thing to 00082 * happen. 00083 * 00084 * A valid value of type syntax_option_type shall have exactly one of the 00085 * elements @c ECMAScript, @c basic, @c extended, @c awk, @c grep, @c egrep 00086 * set. 00087 */ 00088 typedef unsigned int syntax_option_type; 00089 00090 /// Specifies that the matching of regular expressions against a character 00091 /// sequence shall be performed without regard to case. 00092 static const syntax_option_type icase = 1 << _S_icase; 00093 00094 /// Specifies that when a regular expression is matched against a character 00095 /// container sequence, no sub-expression matches are to be stored in the 00096 /// supplied match_results structure. 00097 static const syntax_option_type nosubs = 1 << _S_nosubs; 00098 00099 /// Specifies that the regular expression engine should pay more attention to 00100 /// the speed with which regular expressions are matched, and less to the 00101 /// speed with which regular expression objects are constructed. Otherwise 00102 /// it has no detectable effect on the program output. 00103 static const syntax_option_type optimize = 1 << _S_optimize; 00104 00105 /// Specifies that character ranges of the form [a-b] should be locale 00106 /// sensitive. 00107 static const syntax_option_type collate = 1 << _S_collate; 00108 00109 /// Specifies that the grammar recognized by the regular expression engine is 00110 /// that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript 00111 /// Language Specification, Standard Ecma-262, third edition, 1999], as 00112 /// modified in tr1 section [7.13]. This grammar is similar to that defined 00113 /// in the PERL scripting language but extended with elements found in the 00114 /// POSIX regular expression grammar. 00115 static const syntax_option_type ECMAScript = 1 << _S_ECMAScript; 00116 00117 /// Specifies that the grammar recognized by the regular expression engine is 00118 /// that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, 00119 /// Portable Operating System Interface (POSIX), Base Definitions and 00120 /// Headers, Section 9, Regular Expressions [IEEE, Information Technology -- 00121 /// Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. 00122 static const syntax_option_type basic = 1 << _S_basic; 00123 00124 /// Specifies that the grammar recognized by the regular expression engine is 00125 /// that used by POSIX extended regular expressions in IEEE Std 1003.1-2001, 00126 /// Portable Operating System Interface (POSIX), Base Definitions and Headers, 00127 /// Section 9, Regular Expressions. 00128 static const syntax_option_type extended = 1 << _S_extended; 00129 00130 /// Specifies that the grammar recognized by the regular expression engine is 00131 /// that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is 00132 /// identical to syntax_option_type extended, except that C-style escape 00133 /// sequences are supported. These sequences are, explicitly, '\\', '\a', 00134 /// '\b', '\f', '\n', '\r', '\t' , '\v', '\"', '\\', and '\ddd' (where ddd is 00135 /// one, two, or three octal digits). 00136 static const syntax_option_type awk = 1 << _S_awk; 00137 00138 /// Specifies that the grammar recognized by the regular expression engine is 00139 /// that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is 00140 /// identical to syntax_option_type basic, except that newlines are treated 00141 /// as whitespace. 00142 static const syntax_option_type grep = 1 << _S_grep; 00143 00144 /// Specifies that the grammar recognized by the regular expression engine is 00145 /// that used by POSIX utility grep when given the -E option in 00146 /// IEEE Std 1003.1-2001. This option is identical to syntax_option_type 00147 /// extended, except that newlines are treated as whitespace. 00148 static const syntax_option_type egrep = 1 << _S_egrep; 00149 00150 00151 // [7.5.2] Bitmask Type match_flag_type 00152 enum __match_flag 00153 { 00154 _S_not_bol, 00155 _S_not_eol, 00156 _S_not_bow, 00157 _S_not_eow, 00158 _S_any, 00159 _S_not_null, 00160 _S_continuous, 00161 _S_prev_avail, 00162 _S_sed, 00163 _S_no_copy, 00164 _S_first_only, 00165 _S_match_flag_last 00166 }; 00167 00168 /** 00169 * @brief This is a bitmask type indicating regex matching rules. 00170 * 00171 * Matching a regular expression against a sequence of characters [first, 00172 * last) proceeds according to the rules of the grammar specified for the 00173 * regular expression object, modified according to the effects listed 00174 * below for any bitmask elements set. 00175 * 00176 * The @c match_flag_type is implementation defined but it is valid to 00177 * perform bitwise operations on these values and expect the right thing to 00178 * happen. 00179 */ 00180 typedef std::bitset<_S_match_flag_last> match_flag_type; 00181 00182 static const match_flag_type match_default = 0; 00183 00184 /// The first character in the sequence [first, last) is treated as though it 00185 /// is not at the beginning of a line, so the character "^" in the regular 00186 /// expression shall not match [first, first). 00187 static const match_flag_type match_not_bol = 1 << _S_not_bol; 00188 00189 /// The last character in the sequence [first, last) is treated as though it 00190 /// is not at the end of a line, so the character "$" in the regular 00191 /// expression shall not match [last, last). 00192 static const match_flag_type match_not_eol = 1 << _S_not_eol; 00193 00194 /// The expression "\b" is not matched against the sub-sequence 00195 /// [first,first). 00196 static const match_flag_type match_not_bow = 1 << _S_not_bow; 00197 00198 /// The expression "\b" should not be matched against the sub-sequence 00199 /// [last,last). 00200 static const match_flag_type match_not_eow = 1 << _S_not_eow; 00201 00202 /// If more than one match is possible then any match is an acceptable 00203 /// result. 00204 static const match_flag_type match_any = 1 << _S_any; 00205 00206 /// The expression does not match an empty sequence. 00207 static const match_flag_type match_not_null = 1 << _S_not_null; 00208 00209 /// The expression only matchs a sub-sequence that begins at first . 00210 static const match_flag_type match_continuous = 1 << _S_continuous; 00211 00212 /// --first is a valid iterator position. When this flag is set then the 00213 /// flags match_not_bol and match_not_bow are ignored by the regular 00214 /// expression algorithms 7.11 and iterators 7.12. 00215 static const match_flag_type match_prev_avail = 1 << _S_prev_avail; 00216 00217 /// When a regular expression match is to be replaced by a new string, the 00218 /// new string is constructed using the rules used by the ECMAScript replace 00219 /// function in ECMA- 262 [Ecma International, ECMAScript Language 00220 /// Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11 00221 /// String.prototype.replace. In addition, during search and replace 00222 /// operations all non-overlapping occurrences of the regular expression 00223 /// are located and replaced, and sections of the input that did not match 00224 /// the expression are copied unchanged to the output string. 00225 /// 00226 /// Format strings (from ECMA-262 [15.5.4.11]): 00227 /// $$ $ 00228 /// $& The matched substring. 00229 /// $` The portion of <em>string</em> that preceeds the matched substring. 00230 /// $' The portion of <em>string</em> that follows the matched substring. 00231 /// $n The nth capture, where n is in [1,9] and $n is not followed by a 00232 /// decimal digit. If n <= m and the nth capture is undefined, use the 00233 /// empty string 00234 /// instead. If n > m, the result is implementation-defined. 00235 /// $nn The nnth capture, where nn is a two-digit decimal number on [01, 99]. 00236 /// If nn <= m and the nth capture is undefined, use the empty string 00237 /// instead. If nn > m, the result is implementation-defined. 00238 /// 00239 static const match_flag_type format_default = 0; 00240 00241 /// When a regular expression match is to be replaced by a new string, the 00242 /// new string is constructed using the rules used by the POSIX sed utility 00243 /// in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable 00244 /// Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. 00245 static const match_flag_type format_sed = 1 << _S_sed; 00246 00247 /// During a search and replace operation, sections of the character 00248 /// container sequence being searched that do not match the regular 00249 /// expression shall not be copied to the output string. 00250 static const match_flag_type format_no_copy = 1 << _S_no_copy; 00251 00252 /// When specified during a search and replace operation, only the first 00253 /// occurrence of the regular expression shall be replaced. 00254 static const match_flag_type format_first_only = 1 << _S_first_only; 00255 00256 00257 // [7.5.3] implementation-defined error type 00258 enum error_type 00259 { 00260 _S_error_collate, 00261 _S_error_ctype, 00262 _S_error_escape, 00263 _S_error_backref, 00264 _S_error_brack, 00265 _S_error_paren, 00266 _S_error_brace, 00267 _S_error_badbrace, 00268 _S_error_range, 00269 _S_error_space, 00270 _S_error_badrepeat, 00271 _S_error_complexity, 00272 _S_error_stack, 00273 _S_error_last 00274 }; 00275 00276 /// The expression contained an invalid collating element name. 00277 static const error_type error_collate(_S_error_collate); 00278 00279 /// The expression contained an invalid character class name. 00280 static const error_type error_ctype(_S_error_ctype); 00281 00282 /// The expression contained an invalid escaped character, or a trailing 00283 /// escape. 00284 static const error_type error_escape(_S_error_escape); 00285 00286 /// The expression contained an invalid back reference. 00287 static const error_type error_backref(_S_error_backref); 00288 00289 /// The expression contained mismatched [ and ]. 00290 static const error_type error_brack(_S_error_brack); 00291 00292 /// The expression contained mismatched ( and ). 00293 static const error_type error_paren(_S_error_paren); 00294 00295 /// The expression contained mismatched { and } 00296 static const error_type error_brace(_S_error_brace); 00297 00298 /// The expression contained an invalid range in a {} expression. 00299 static const error_type error_badbrace(_S_error_badbrace); 00300 00301 /// The expression contained an invalid character range, 00302 /// such as [b-a] in most encodings. 00303 static const error_type error_range(_S_error_range); 00304 00305 /// There was insufficient memory to convert the expression into a 00306 /// finite state machine. 00307 static const error_type error_space(_S_error_space); 00308 00309 /// One of *?+{ was not preceded by a valid regular expression. 00310 static const error_type error_badrepeat(_S_error_badrepeat); 00311 00312 /// The complexity of an attempted match against a regular expression 00313 /// exceeded a pre-set level. 00314 static const error_type error_complexity(_S_error_complexity); 00315 00316 /// There was insufficient memory to determine whether the 00317 /// regular expression could match the specified character sequence. 00318 static const error_type error_stack(_S_error_stack); 00319 } 00320 00321 00322 // [7.8] Class regex_error 00323 /** 00324 * Defines the type of objects thrown as exceptions to report errors from the 00325 * regular expression library. 00326 */ 00327 class regex_error 00328 : public std::runtime_error 00329 { 00330 public: 00331 /** 00332 * @brief constructs a regex_error object. 00333 * 00334 * @param ecode the regex error code. 00335 */ 00336 explicit 00337 regex_error(regex_constants::error_type __ecode) 00338 : std::runtime_error("regex_error"), _M_code(__ecode) 00339 { } 00340 00341 /** 00342 * @brief gets the regex error code. 00343 * 00344 * @returns the regex error code. 00345 */ 00346 regex_constants::error_type 00347 code() const 00348 { return _M_code; } 00349 00350 protected: 00351 regex_constants::error_type _M_code; 00352 }; 00353 00354 00355 // [7.7] Class regex_traits 00356 /** 00357 * A regular expression traits class that satisfies the requirements of tr1 00358 * section [7.2]. 00359 * 00360 * The class %regex is parameterized around a set of related types and 00361 * functions used to complete the definition of its semantics. This class 00362 * satisfies the requirements of such a traits class. 00363 */ 00364 template<typename _Ch_type> 00365 struct regex_traits 00366 { 00367 public: 00368 typedef _Ch_type char_type; 00369 typedef std::basic_string<char_type> string_type; 00370 typedef std::locale locale_type; 00371 typedef std::ctype_base::mask char_class_type; 00372 00373 public: 00374 /** 00375 * @brief Constructs a default traits object. 00376 */ 00377 regex_traits() 00378 { } 00379 00380 /** 00381 * @brief Gives the length of a C-style string starting at @p __p. 00382 * 00383 * @param __p a pointer to the start of a character sequence. 00384 * 00385 * @returns the number of characters between @p *__p and the first 00386 * default-initialized value of type @p char_type. In other words, uses 00387 * the C-string algorithm for determiining the length of a sequence of 00388 * characters. 00389 */ 00390 static std::size_t 00391 length(const char_type* __p) 00392 { return string_type::traits_type::length(__p); } 00393 00394 /** 00395 * @brief Performs the identity translation. 00396 * 00397 * @param c A character to the locale-specific character set. 00398 * 00399 * @returns c. 00400 */ 00401 char_type 00402 translate(char_type __c) const 00403 { return __c; } 00404 00405 /** 00406 * @brief Translates a character into a case-insensitive equivalent. 00407 * 00408 * @param c A character to the locale-specific character set. 00409 * 00410 * @returns the locale-specific lower-case equivalent of c. 00411 * @throws std::bad_cast if the imbued locale does not support the ctype 00412 * facet. 00413 */ 00414 char_type 00415 translate_nocase(char_type __c) const 00416 { 00417 using std::ctype; 00418 using std::use_facet; 00419 return use_facet<ctype<char_type> >(_M_locale).tolower(__c); 00420 } 00421 00422 /** 00423 * @brief Gets a sort key for a character sequence. 00424 * 00425 * @param first beginning of the character sequence. 00426 * @param last one-past-the-end of the character sequence. 00427 * 00428 * Returns a sort key for the character sequence designated by the 00429 * iterator range [F1, F2) such that if the character sequence [G1, G2) 00430 * sorts before the character sequence [H1, H2) then 00431 * v.transform(G1, G2) < v.transform(H1, H2). 00432 * 00433 * What this really does is provide a more efficient way to compare a 00434 * string to multiple other strings in locales with fancy collation 00435 * rules and equivalence classes. 00436 * 00437 * @returns a locale-specific sort key equivalent to the input range. 00438 * 00439 * @throws std::bad_cast if the current locale does not have a collate 00440 * facet. 00441 */ 00442 template<typename _Fwd_iter> 00443 string_type 00444 transform(_Fwd_iter __first, _Fwd_iter __last) const 00445 { 00446 using std::collate; 00447 using std::use_facet; 00448 const collate<_Ch_type>& __c(use_facet< 00449 collate<_Ch_type> >(_M_locale)); 00450 string_type __s(__first, __last); 00451 return __c.transform(__s.data(), __s.data() + __s.size()); 00452 } 00453 00454 /** 00455 * @brief Dunno. 00456 * 00457 * @param first beginning of the character sequence. 00458 * @param last one-past-the-end of the character sequence. 00459 * 00460 * Effects: if typeid(use_facet<collate<_Ch_type> >) == 00461 * typeid(collate_byname<_Ch_type>) and the form of the sort key 00462 * returned by collate_byname<_Ch_type>::transform(first, last) is known 00463 * and can be converted into a primary sort key then returns that key, 00464 * otherwise returns an empty string. WTF?? 00465 * 00466 * @todo Implement this function. 00467 */ 00468 template<typename _Fwd_iter> 00469 string_type 00470 transform_primary(_Fwd_iter __first, _Fwd_iter __last) const 00471 { return string_type(); } 00472 00473 /** 00474 * @breief Gets a collation element by name. 00475 * 00476 * @param first beginning of the collation element name. 00477 * @param last one-past-the-end of the collation element name. 00478 * 00479 * @returns a sequence of one or more characters that represents the 00480 * collating element consisting of the character sequence designated by 00481 * the iterator range [first, last). Returns an empty string if the 00482 * character sequence is not a valid collating element. 00483 * 00484 * @todo Implement this function. 00485 */ 00486 template<typename _Fwd_iter> 00487 string_type 00488 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 00489 { return string_type(); } 00490 00491 /** 00492 * @brief Maps one or mire characters to a named character 00493 * classification. 00494 * 00495 * @param first beginning of the character sequence. 00496 * @param last one-past-the-end of the character sequence. 00497 * 00498 * @returns an unspecified value that represents the character 00499 * classification named by the character sequence designated by the 00500 * iterator range [first, last). The value returned shall be independent 00501 * of the case of the characters in the character sequence. If the name 00502 * is not recognized then returns a value that compares equal to 0. 00503 * 00504 * At least the following names (or their wide-character equivalent) are 00505 * supported. 00506 * - d 00507 * - w 00508 * - s 00509 * - alnum 00510 * - alpha 00511 * - blank 00512 * - cntrl 00513 * - digit 00514 * - graph 00515 * - lower 00516 * - print 00517 * - punct 00518 * - space 00519 * - upper 00520 * - xdigit 00521 * 00522 * @todo Implement this function. 00523 */ 00524 template<typename _Fwd_iter> 00525 char_class_type 00526 lookup_classname(_Fwd_iter __first, _Fwd_iter __last) const 00527 { return 0; } 00528 00529 /** 00530 * @brief Determines if @p c is a member of an identified class. 00531 * 00532 * @param c a character. 00533 * @param f a class type (as returned from lookup_classname). 00534 * 00535 * @returns true if the character @p c is a member of the classification 00536 * represented by @p f, false otherwise. 00537 * 00538 * @throws std::bad_cast if the current locale does not have a ctype 00539 * facet. 00540 */ 00541 bool 00542 isctype(_Ch_type __c, char_class_type __f) const 00543 { 00544 using std::ctype; 00545 using std::use_facet; 00546 const ctype<_Ch_type>& __ctype(use_facet< 00547 ctype<_Ch_type> >(_M_locale)); 00548 00549 if (__ctype.is(__c, __f)) 00550 return true; 00551 00552 // special case of underscore in [[:w:]] 00553 if (__c == __ctype.widen('_')) 00554 { 00555 const char* const __wb[] = "w"; 00556 char_class_type __wt = this->lookup_classname(__wb, 00557 __wb + sizeof(__wb)); 00558 if (__f | __wt) 00559 return true; 00560 } 00561 00562 // special case of [[:space:]] in [[:blank:]] 00563 if (__c == __ctype.isspace(__c)) 00564 { 00565 const char* const __bb[] = "blank"; 00566 char_class_type __bt = this->lookup_classname(__bb, 00567 __bb + sizeof(__bb)); 00568 if (__f | __bt) 00569 return true; 00570 } 00571 00572 return false; 00573 } 00574 00575 /** 00576 * @brief Converts a digit to an int. 00577 * 00578 * @param ch a character representing a digit. 00579 * @param radix the radix if the numeric conversion (limited to 8, 10, 00580 * or 16). 00581 * 00582 * @returns the value represented by the digit ch in base radix if the 00583 * character ch is a valid digit in base radix; otherwise returns -1. 00584 * 00585 * @todo Implement this function. 00586 */ 00587 int 00588 value(_Ch_type __ch, int __radix) const; 00589 00590 /** 00591 * @brief Imbues the regex_traits object with a copy of a new locale. 00592 * 00593 * @param loc A locale. 00594 * 00595 * @returns a copy of the previous locale in use by the regex_traits 00596 * object. 00597 * 00598 * @note Calling imbue with a different locale than the one currently in 00599 * use invalidates all cached data held by *this. 00600 */ 00601 locale_type 00602 imbue(locale_type __loc) 00603 { 00604 std::swap(_M_locale, __loc); 00605 return __loc; 00606 } 00607 00608 /** 00609 * @brief Gets a copy of the current locale in use by the regex_traits 00610 * object. 00611 */ 00612 locale_type 00613 getloc() const 00614 { return _M_locale; } 00615 00616 protected: 00617 locale_type _M_locale; 00618 }; 00619 00620 00621 // [7.8] Class basic_regex 00622 /** 00623 * Objects of specializations of this class represent regular expressions 00624 * constructed from sequences of character type @p _Ch_type. 00625 * 00626 * Storage for the regular expression is allocated and deallocated as 00627 * necessary by the member functions of this class. 00628 */ 00629 template<typename _Ch_type, typename _Rx_traits = regex_traits<_Ch_type> > 00630 class basic_regex 00631 { 00632 public: 00633 // types: 00634 typedef _Ch_type value_type; 00635 typedef regex_constants::syntax_option_type flag_type; 00636 typedef typename _Rx_traits::locale_type locale_type; 00637 typedef typename _Rx_traits::string_type string_type; 00638 00639 // [7.8.1] constants 00640 static const regex_constants::syntax_option_type icase 00641 = regex_constants::icase; 00642 static const regex_constants::syntax_option_type nosubs 00643 = regex_constants::nosubs; 00644 static const regex_constants::syntax_option_type optimize 00645 = regex_constants::optimize; 00646 static const regex_constants::syntax_option_type collate 00647 = regex_constants::collate; 00648 static const regex_constants::syntax_option_type ECMAScript 00649 = regex_constants::ECMAScript; 00650 static const regex_constants::syntax_option_type basic 00651 = regex_constants::basic; 00652 static const regex_constants::syntax_option_type extended 00653 = regex_constants::extended; 00654 static const regex_constants::syntax_option_type awk 00655 = regex_constants::awk; 00656 static const regex_constants::syntax_option_type grep 00657 = regex_constants::grep; 00658 static const regex_constants::syntax_option_type egrep 00659 = regex_constants::egrep; 00660 00661 // [7.8.2] construct/copy/destroy 00662 /** 00663 * Constructs a basic regular expression that does not match any 00664 * character sequence. 00665 */ 00666 basic_regex() 00667 : _M_flags(regex_constants::ECMAScript), _M_pattern(), _M_mark_count(0) 00668 { _M_compile(); } 00669 00670 /** 00671 * @brief Constructs a basic regular expression from the sequence 00672 * [p, p + char_traits<_Ch_type>::length(p)) interpreted according to the 00673 * flags in @p f. 00674 * 00675 * @param p A pointer to the start of a C-style null-terminated string 00676 * containing a regular expression. 00677 * @param f Flags indicating the syntax rules and options. 00678 * 00679 * @throws regex_error if @p p is not a valid regular expression. 00680 */ 00681 explicit 00682 basic_regex(const _Ch_type* __p, 00683 flag_type __f = regex_constants::ECMAScript) 00684 : _M_flags(__f), _M_pattern(__p), _M_mark_count(0) 00685 { _M_compile(); } 00686 00687 /** 00688 * @brief Constructs a basic regular expression from the sequence 00689 * [p, p + len) interpreted according to the flags in @p f. 00690 * 00691 * @param p A pointer to the start of a string containing a regular 00692 * expression. 00693 * @param len The length of the string containing the regular expression. 00694 * @param f Flags indicating the syntax rules and options. 00695 * 00696 * @throws regex_error if @p p is not a valid regular expression. 00697 */ 00698 basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f) 00699 : _M_flags(__f) , _M_pattern(__p, __len), _M_mark_count(0) 00700 { _M_compile(); } 00701 00702 /** 00703 * @brief Copy-contructs a basic regular expression. 00704 * 00705 * @param rhs A @p regex object. 00706 */ 00707 basic_regex(const basic_regex& __rhs) 00708 : _M_flags(__rhs._M_flags), _M_pattern(__rhs._M_pattern), 00709 _M_mark_count(__rhs._M_mark_count) 00710 { _M_compile(); } 00711 00712 /** 00713 * @brief Constructs a basic regular expression from the string 00714 * @p interpreted according to the flags in @p f. 00715 * 00716 * @param p A string containing a regular expression. 00717 * @param f Flags indicating the syntax rules and options. 00718 * 00719 * @throws regex_error if @p p is not a valid regular expression. 00720 */ 00721 template<typename _Ch_traits, typename _Ch_alloc> 00722 explicit 00723 basic_regex(const basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s, 00724 flag_type __f = regex_constants::ECMAScript) 00725 : _M_flags(__f), _M_pattern(__s), _M_mark_count(0) 00726 { _M_compile(); } 00727 00728 /** 00729 * @brief Constructs a basic regular expression from the range 00730 * [first, last) interpreted according to the flags in @p f. 00731 * 00732 * @param first The start of arange containing a valid regular 00733 * expression. 00734 * @param last The end of a range containing a valid regular 00735 * expression. 00736 * @param f The format flags of the regular expression. 00737 * 00738 * @throws regex_error if @p p is not a valid regular expression. 00739 */ 00740 template<typename _InputIterator> 00741 basic_regex(_InputIterator __first, _InputIterator __last, 00742 flag_type __f = regex_constants::ECMAScript) 00743 : _M_flags(__f), _M_pattern(__first, __last), _M_mark_count(0) 00744 { _M_compile(); } 00745 00746 /** 00747 * @brief Destroys a basic regular expression. 00748 */ 00749 ~basic_regex() 00750 { } 00751 00752 /** 00753 * @brief Assigns one regular expression to another. 00754 */ 00755 basic_regex& 00756 operator=(const basic_regex& __rhs) 00757 { return this->assign(__rhs); } 00758 00759 /** 00760 * @brief Replaces a regular expression with a new one constructed from 00761 * a C-style null-terminated string. 00762 * 00763 * @param A pointer to the start of a null-terminated C-style string 00764 * containing a regular expression. 00765 */ 00766 basic_regex& 00767 operator=(const _Ch_type* __p) 00768 { return this->assign(__p, flags()); } 00769 00770 /** 00771 * @brief Replaces a regular expression with a new one constructed from 00772 * a string. 00773 * 00774 * @param A pointer to a string containing a regular expression. 00775 */ 00776 template<typename _Ch_typeraits, typename _Allocator> 00777 basic_regex& 00778 operator=(const basic_string<_Ch_type, _Ch_typeraits, _Allocator>& __s) 00779 { return this->assign(__s, flags()); } 00780 00781 // [7.8.3] assign 00782 /** 00783 * @brief the real assignment operator. 00784 * 00785 * @param that Another regular expression object. 00786 */ 00787 basic_regex& 00788 assign(const basic_regex& __that) 00789 { 00790 basic_regex __tmp(__that); 00791 this->swap(__tmp); 00792 return *this; 00793 } 00794 00795 /** 00796 * @brief Assigns a new regular expression to a regex object from a 00797 * C-style null-terminated string containing a regular expression 00798 * pattern. 00799 * 00800 * @param p A pointer to a C-style null-terminated string containing 00801 * a regular expression pattern. 00802 * @param flags Syntax option flags. 00803 * 00804 * @throws regex_error if p does not contain a valid regular expression 00805 * pattern interpreted according to @p flags. If regex_error is thrown, 00806 * *this remains unchanged. 00807 */ 00808 basic_regex& 00809 assign(const _Ch_type* __p, 00810 flag_type __flags = regex_constants::ECMAScript) 00811 { return this->assign(string_type(__p), __flags); } 00812 00813 /** 00814 * @brief Assigns a new regular expression to a regex object from a 00815 * C-style string containing a regular expression pattern. 00816 * 00817 * @param p A pointer to a C-style string containing a 00818 * regular expression pattern. 00819 * @param len The length of the regular expression pattern string. 00820 * @param flags Syntax option flags. 00821 * 00822 * @throws regex_error if p does not contain a valid regular expression 00823 * pattern interpreted according to @p flags. If regex_error is thrown, 00824 * *this remains unchanged. 00825 */ 00826 basic_regex& 00827 assign(const _Ch_type* __p, std::size_t __len, flag_type __flags) 00828 { return this->assign(string_type(__p, __len), __flags); } 00829 00830 /** 00831 * @brief Assigns a new regular expression to a regex object from a 00832 * string containing a regular expression pattern. 00833 * 00834 * @param s A string containing a regular expression pattern. 00835 * @param flags Syntax option flags. 00836 * 00837 * @throws regex_error if p does not contain a valid regular expression 00838 * pattern interpreted according to @p flags. If regex_error is thrown, 00839 * *this remains unchanged. 00840 */ 00841 template<typename _Ch_typeraits, typename _Allocator> 00842 basic_regex& 00843 assign(const basic_string<_Ch_type, _Ch_typeraits, _Allocator>& __s, 00844 flag_type __f = regex_constants::ECMAScript) 00845 { 00846 basic_regex __tmp(__s, __f); 00847 this->swap(__tmp); 00848 return *this; 00849 } 00850 00851 /** 00852 * @brief Assigns a new regular expression to a regex object. 00853 * 00854 * @param first The start of a range containing a valid regular 00855 * expression. 00856 * @param last The end of a range containing a valid regular 00857 * expression. 00858 * @param flags Syntax option flags. 00859 * 00860 * @throws regex_error if p does not contain a valid regular expression 00861 * pattern interpreted according to @p flags. If regex_error is thrown, 00862 * *this remains unchanged. 00863 */ 00864 template<typename _InputIterator> 00865 basic_regex& 00866 assign(_InputIterator __first, _InputIterator __last, 00867 flag_type __flags = regex_constants::ECMAScript) 00868 { return this->assign(string_type(__first, __last), __flags); } 00869 00870 // [7.8.4] const operations 00871 /** 00872 * @brief Gets the number of marked subexpressions within the regular 00873 * expresison. 00874 */ 00875 unsigned int 00876 mark_count() const 00877 { return _M_mark_count; } 00878 00879 /** 00880 * @brief Gets the flags used to construct the regular expression 00881 * or in the last call to assign(). 00882 */ 00883 flag_type 00884 flags() const 00885 { return _M_flags; } 00886 00887 // [7.8.5] locale 00888 /** 00889 * @brief Imbues the regular expression object with the given locale. 00890 * 00891 * @param loc A locale. 00892 */ 00893 locale_type 00894 imbue(locale_type __loc) 00895 { return _M_traits.imbue(__loc); } 00896 00897 /** 00898 * @brief Gets the locale currently imbued in the regular expression 00899 * object. 00900 */ 00901 locale_type 00902 getloc() const 00903 { return _M_traits.getloc(); } 00904 00905 // [7.8.6] swap 00906 /** 00907 * @brief Swaps the contents of two regular expression obects. 00908 * 00909 * @param rhs Another regular expression object. 00910 */ 00911 void 00912 swap(basic_regex& __rhs) 00913 { 00914 std::swap(_M_flags, __rhs._M_flags); 00915 std::swap(_M_pattern, __rhs._M_pattern); 00916 std::swap(_M_mark_count, __rhs._M_mark_count); 00917 std::swap(_M_traits, __rhs._M_traits); 00918 } 00919 00920 private: 00921 /** 00922 * @brief Compiles a regular expression pattern into a NFA. 00923 * @todo Implement this function. 00924 */ 00925 void _M_compile() 00926 { } 00927 00928 protected: 00929 flag_type _M_flags; 00930 string_type _M_pattern; 00931 unsigned int _M_mark_count; 00932 _Rx_traits _M_traits; 00933 }; 00934 00935 typedef basic_regex<char> regex; 00936 #ifdef _GLIBCXX_USE_WCHAR_T 00937 typedef basic_regex<wchar_t> wregex; 00938 #endif 00939 00940 00941 // [7.8.6] basic_regex swap 00942 /** 00943 * @brief Swaps the contents of two regular expression objects. 00944 * @param lhs First regular expression. 00945 * @param rhs Second regular expression. 00946 */ 00947 template<typename _Ch_type, typename _Rx_traits> 00948 inline void 00949 swap(basic_regex<_Ch_type, _Rx_traits>& __lhs, 00950 basic_regex<_Ch_type, _Rx_traits>& __rhs) 00951 { return __lhs.swap(__rhs); } 00952 00953 00954 // [7.9] Class template sub_match 00955 /** 00956 * A sequence of characters matched by a particular marked sub-expression. 00957 * 00958 * An object of this class is essentially a pair of iterators marking a 00959 * matched subexpression within a regular expression pattern match. Such 00960 * objects can be converted to and compared with std::basic_string objects 00961 * of a similar base character type as the pattern matched by the regular 00962 * expression. 00963 * 00964 * The iterators that make up the pair are the usual half-open interval 00965 * referencing the actual original pattern matched. 00966 */ 00967 template<typename _BiIter> 00968 class sub_match : public std::pair<_BiIter, _BiIter> 00969 { 00970 public: 00971 typedef typename iterator_traits<_BiIter>::value_type value_type; 00972 typedef typename iterator_traits<_BiIter>::difference_type 00973 difference_type; 00974 typedef _BiIter iterator; 00975 00976 public: 00977 bool matched; 00978 00979 /** 00980 * Gets the length of the matching sequence. 00981 */ 00982 difference_type 00983 length() const 00984 { return this->matched ? std::distance(this->first, this->second) : 0; } 00985 00986 /** 00987 * @brief Gets the matching sequence as a string. 00988 * 00989 * @returns the matching sequence as a string. 00990 * 00991 * This is the implicit conversion operator. It is identical to the 00992 * str() member function except that it will want to pop up in 00993 * unexpected places and cause a great deal of confusion and cursing 00994 * from the unwary. 00995 */ 00996 operator basic_string<value_type>() const 00997 { 00998 return this->matched 00999 ? std::basic_string<value_type>(this->first, this->second) 01000 : std::basic_string<value_type>(); 01001 } 01002 01003 /** 01004 * @brief Gets the matching sequence as a string. 01005 * 01006 * @returns the matching sequence as a string. 01007 */ 01008 basic_string<value_type> 01009 str() const 01010 { 01011 return this->matched 01012 ? std::basic_string<value_type>(this->first, this->second) 01013 : std::basic_string<value_type>(); 01014 } 01015 01016 /** 01017 * @brief Compares this and another matched sequence. 01018 * 01019 * @param s Another matched sequence to compare to this one. 01020 * 01021 * @retval <0 this matched sequence will collate before @p s. 01022 * @retval =0 this matched sequence is equivalent to @p s. 01023 * @retval <0 this matched sequence will collate after @p s. 01024 */ 01025 int 01026 compare(const sub_match& __s) const 01027 { return this->str().compare(__s.str()); } 01028 01029 /** 01030 * @brief Compares this sub_match to a string. 01031 * 01032 * @param s A string to compare to this sub_match. 01033 * 01034 * @retval <0 this matched sequence will collate before @p s. 01035 * @retval =0 this matched sequence is equivalent to @p s. 01036 * @retval <0 this matched sequence will collate after @p s. 01037 */ 01038 int 01039 compare(const basic_string<value_type>& __s) const 01040 { return this->str().compare(__s); } 01041 01042 /** 01043 * @brief Compares this sub_match to a C-style string. 01044 * 01045 * @param s A C-style string to compare to this sub_match. 01046 * 01047 * @retval <0 this matched sequence will collate before @p s. 01048 * @retval =0 this matched sequence is equivalent to @p s. 01049 * @retval <0 this matched sequence will collate after @p s. 01050 */ 01051 int 01052 compare(const value_type* __s) const 01053 { return this->str().compare(__s); } 01054 }; 01055 01056 01057 typedef sub_match<const char*> csub_match; 01058 typedef sub_match<string::const_iterator> ssub_match; 01059 #ifdef _GLIBCXX_USE_WCHAR_T 01060 typedef sub_match<const wchar_t*> wcsub_match; 01061 typedef sub_match<wstring::const_iterator> wssub_match; 01062 #endif 01063 01064 // [7.9.2] sub_match non-member operators 01065 01066 /** 01067 * @brief Tests the equivalence of two regular expression submatches. 01068 * @param lhs First regular expression submatch. 01069 * @param rhs Second regular expression submatch. 01070 * @returns true if @a lhs is equivalent to @a rhs, false otherwise. 01071 */ 01072 template<typename _BiIter> 01073 inline bool 01074 operator==(const sub_match<_BiIter>& __lhs, 01075 const sub_match<_BiIter>& __rhs) 01076 { return __lhs.compare(__rhs) == 0; } 01077 01078 /** 01079 * @brief Tests the inequivalence of two regular expression submatches. 01080 * @param lhs First regular expression submatch. 01081 * @param rhs Second regular expression submatch. 01082 * @returns true if @a lhs is not equivalent to @a rhs, false otherwise. 01083 */ 01084 template<typename _BiIter> 01085 inline bool 01086 operator!=(const sub_match<_BiIter>& __lhs, 01087 const sub_match<_BiIter>& __rhs) 01088 { return __lhs.compare(__rhs) != 0; } 01089 01090 /** 01091 * @brief Tests the ordering of two regular expression submatches. 01092 * @param lhs First regular expression submatch. 01093 * @param rhs Second regular expression submatch. 01094 * @returns true if @a lhs precedes @a rhs, false otherwise. 01095 */ 01096 template<typename _BiIter> 01097 inline bool