tr1/regex

Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2007 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 2, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License along
00017 // with this library; see the file COPYING.  If not, write to the Free
00018 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
00019 // USA.
00020 
00021 // As a special exception, you may use this file as part of a free software
00022 // library without restriction.  Specifically, if other files instantiate
00023 // templates or use macros or inline functions from this file, or you compile
00024 // this file and link it with other files to produce an executable, this
00025 // file does not by itself cause the resulting executable to be covered by
00026 // the GNU General Public License.  This exception does not however
00027 // invalidate any other reasons why the executable file might be covered by
00028 // the GNU General Public License.
00029 
00030 /**
00031  * @file tr1/regex
00032  * @author Stephen M. Webb  <stephen.webb@bregmasoft.ca>
00033  * This is a TR1 C++ Library header. 
00034  */
00035 
00036 #ifndef _TR1_REGEX
00037 #define _TR1_REGEX 1
00038 
00039 #include <bits/c++config.h>
00040 #include <algorithm>
00041 #include <bitset>
00042 #include <iterator>
00043 #include <locale>
00044 #include <stdexcept>
00045 #include <string>
00046 #include <utility>
00047 #include <vector>
00048 
00049 namespace std
00050 {
00051 _GLIBCXX_BEGIN_NAMESPACE(_GLIBCXX_TR1)
00052 
00053 /**
00054  * @addtogroup tr1_regex Regular Expressions
00055  * A facility for performing regular expression pattern matching.
00056  * @{
00057  */
00058 
00059 namespace regex_constants
00060 {
00061   // [7.5.1] Bitmask Type syntax_option_type
00062   enum __syntax_option
00063     {
00064       _S_icase,
00065       _S_nosubs,
00066       _S_optimize,
00067       _S_collate,
00068       _S_ECMAScript,
00069       _S_basic,
00070       _S_extended,
00071       _S_awk,
00072       _S_grep,
00073       _S_egrep,
00074       _S_syntax_last
00075     };
00076 
00077   /**
00078    * @brief This is a bitmask type indicating how to interpret the regex.
00079    *
00080    * The @c syntax_option_type is implementation defined but it is valid to
00081    * perform bitwise operations on these values and expect the right thing to
00082    * happen.
00083    *
00084    * A valid value of type syntax_option_type shall have exactly one of the
00085    * elements @c ECMAScript, @c basic, @c extended, @c awk, @c grep, @c egrep
00086    * set.
00087    */
00088   typedef unsigned int syntax_option_type;
00089 
00090   /// Specifies that the matching of regular expressions against a character
00091   /// sequence shall be performed without regard to case.
00092   static const syntax_option_type icase      = 1 << _S_icase;
00093 
00094   /// Specifies that when a regular expression is matched against a character
00095   /// container sequence, no sub-expression matches are to be stored in the
00096   /// supplied match_results structure.
00097   static const syntax_option_type nosubs     = 1 << _S_nosubs;
00098 
00099   /// Specifies that the regular expression engine should pay more attention to
00100   /// the speed with which regular expressions are matched, and less to the
00101   /// speed with which regular expression objects are constructed. Otherwise
00102   /// it has no detectable effect on the program output.
00103   static const syntax_option_type optimize   = 1 << _S_optimize;
00104 
00105   /// Specifies that character ranges of the form [a-b] should be locale
00106   /// sensitive.
00107   static const syntax_option_type collate    = 1 << _S_collate;
00108 
00109   /// Specifies that the grammar recognized by the regular expression engine is
00110   /// that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript
00111   /// Language Specification, Standard Ecma-262, third edition, 1999], as
00112   /// modified in tr1 section [7.13].  This grammar is similar to that defined
00113   /// in the PERL scripting language but extended with elements found in the
00114   /// POSIX regular expression grammar.
00115   static const syntax_option_type ECMAScript = 1 << _S_ECMAScript;
00116 
00117   /// Specifies that the grammar recognized by the regular expression engine is
00118   /// that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
00119   /// Portable Operating System Interface (POSIX), Base Definitions and
00120   /// Headers, Section 9, Regular Expressions [IEEE, Information Technology --
00121   /// Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
00122   static const syntax_option_type basic      = 1 << _S_basic;
00123 
00124   /// Specifies that the grammar recognized by the regular expression engine is
00125   /// that used by POSIX extended regular expressions in IEEE Std 1003.1-2001,
00126   /// Portable Operating System Interface (POSIX), Base Definitions and Headers,
00127   /// Section 9, Regular Expressions.
00128   static const syntax_option_type extended   = 1 << _S_extended;
00129 
00130   /// Specifies that the grammar recognized by the regular expression engine is
00131   /// that used by POSIX utility awk in IEEE Std 1003.1-2001.  This option is
00132   /// identical to syntax_option_type extended, except that C-style escape
00133   /// sequences are supported.  These sequences are, explicitly, '\\', '\a',
00134   /// '\b', '\f', '\n', '\r', '\t' , '\v', '\"', '\\', and '\ddd' (where ddd is
00135   /// one, two, or three octal digits).  
00136   static const syntax_option_type awk        = 1 << _S_awk;
00137 
00138   /// Specifies that the grammar recognized by the regular expression engine is
00139   /// that used by POSIX utility grep in IEEE Std 1003.1-2001.  This option is
00140   /// identical to syntax_option_type basic, except that newlines are treated
00141   /// as whitespace.
00142   static const syntax_option_type grep       = 1 << _S_grep;
00143 
00144   /// Specifies that the grammar recognized by the regular expression engine is
00145   /// that used by POSIX utility grep when given the -E option in
00146   /// IEEE Std 1003.1-2001.  This option is identical to syntax_option_type 
00147   /// extended, except that newlines are treated as whitespace.
00148   static const syntax_option_type egrep      = 1 << _S_egrep;
00149 
00150 
00151   // [7.5.2] Bitmask Type match_flag_type
00152   enum __match_flag
00153     {
00154       _S_not_bol,
00155       _S_not_eol,
00156       _S_not_bow,
00157       _S_not_eow,
00158       _S_any,
00159       _S_not_null,
00160       _S_continuous,
00161       _S_prev_avail,
00162       _S_sed,
00163       _S_no_copy,
00164       _S_first_only,
00165       _S_match_flag_last
00166     };
00167 
00168   /**
00169    * @brief This is a bitmask type indicating regex matching rules.
00170    *
00171    * Matching a regular expression against a sequence of characters [first,
00172    * last) proceeds according to the rules of the grammar specified for the
00173    * regular expression object, modified according to the effects listed
00174    * below for any bitmask elements set.
00175    *
00176    * The @c match_flag_type is implementation defined but it is valid to
00177    * perform bitwise operations on these values and expect the right thing to
00178    * happen.
00179    */
00180   typedef std::bitset<_S_match_flag_last> match_flag_type;
00181 
00182   static const match_flag_type match_default     = 0;
00183 
00184   /// The first character in the sequence [first, last) is treated as though it
00185   /// is not at the beginning of a line, so the character "^" in the regular
00186   /// expression shall not match [first, first).
00187   static const match_flag_type match_not_bol     = 1 << _S_not_bol;
00188 
00189   /// The last character in the sequence [first, last) is treated as though it
00190   /// is not at the end of a line, so the character "$" in the regular
00191   /// expression shall not match [last, last).
00192   static const match_flag_type match_not_eol     = 1 << _S_not_eol;
00193    
00194   /// The expression "\b" is not matched against the sub-sequence
00195   /// [first,first).
00196   static const match_flag_type match_not_bow     = 1 << _S_not_bow;
00197    
00198   /// The expression "\b" should not be matched against the sub-sequence
00199   /// [last,last).
00200   static const match_flag_type match_not_eow     = 1 << _S_not_eow;
00201    
00202   /// If more than one match is possible then any match is an acceptable
00203   /// result.
00204   static const match_flag_type match_any         = 1 << _S_any;
00205    
00206   /// The expression does not match an empty sequence.
00207   static const match_flag_type match_not_null    = 1 << _S_not_null;
00208    
00209   /// The expression only matchs a sub-sequence that begins at first .
00210   static const match_flag_type match_continuous  = 1 << _S_continuous;
00211    
00212   /// --first is a valid iterator position.  When this flag is set then the
00213   /// flags match_not_bol and match_not_bow are ignored by the regular
00214   /// expression algorithms 7.11 and iterators 7.12.
00215   static const match_flag_type match_prev_avail  = 1 << _S_prev_avail;
00216 
00217   /// When a regular expression match is to be replaced by a new string, the
00218   /// new string is constructed using the rules used by the ECMAScript replace
00219   /// function in ECMA- 262 [Ecma International, ECMAScript Language
00220   /// Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
00221   /// String.prototype.replace. In addition, during search and replace
00222   /// operations all non-overlapping occurrences of the regular expression
00223   /// are located and replaced, and sections of the input that did not match
00224   /// the expression are copied unchanged to the output string.
00225   ///
00226   /// Format strings (from ECMA-262 [15.5.4.11]):
00227   /// $$  $
00228   /// $&  The matched substring.
00229   /// $`  The portion of <em>string</em> that preceeds the matched substring.
00230   /// $'  The portion of <em>string</em> that follows the matched substring.
00231   /// $n  The nth capture, where n is in [1,9] and $n is not followed by a
00232   ///     decimal digit.  If n <= m and the nth capture is undefined, use the
00233   ///     empty string
00234   ///     instead. If n > m, the result is implementation-defined.
00235   /// $nn The nnth capture, where nn is a two-digit decimal number on [01, 99].
00236   ///     If nn <= m and the nth capture is undefined, use the empty string
00237   ///     instead. If nn > m, the result is implementation-defined.
00238   ///
00239   static const match_flag_type format_default    = 0;
00240 
00241   /// When a regular expression match is to be replaced by a new string, the
00242   /// new string is constructed using the rules used by the POSIX sed utility
00243   /// in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
00244   /// Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
00245   static const match_flag_type format_sed        = 1 << _S_sed;
00246 
00247   /// During a search and replace operation, sections of the character
00248   /// container sequence being searched that do not match the regular
00249   /// expression shall not be copied to the output string.
00250   static const match_flag_type format_no_copy    = 1 << _S_no_copy;
00251 
00252   /// When specified during a search and replace operation, only the first
00253   /// occurrence of the regular expression shall be replaced.
00254   static const match_flag_type format_first_only = 1 << _S_first_only;
00255 
00256 
00257   // [7.5.3] implementation-defined error type
00258   enum error_type
00259     {
00260       _S_error_collate,
00261       _S_error_ctype,
00262       _S_error_escape,
00263       _S_error_backref,
00264       _S_error_brack,
00265       _S_error_paren,
00266       _S_error_brace,
00267       _S_error_badbrace,
00268       _S_error_range,
00269       _S_error_space,
00270       _S_error_badrepeat,
00271       _S_error_complexity,
00272       _S_error_stack,
00273       _S_error_last
00274     };
00275 
00276   /// The expression contained an invalid collating element name.
00277   static const error_type error_collate(_S_error_collate);
00278 
00279   /// The expression contained an invalid character class name.
00280   static const error_type error_ctype(_S_error_ctype);
00281 
00282   /// The expression contained an invalid escaped character, or a trailing
00283   /// escape.
00284   static const error_type error_escape(_S_error_escape);
00285 
00286   /// The expression contained an invalid back reference.
00287   static const error_type error_backref(_S_error_backref);
00288 
00289   /// The expression contained mismatched [ and ].
00290   static const error_type error_brack(_S_error_brack);
00291 
00292   /// The expression contained mismatched ( and ).
00293   static const error_type error_paren(_S_error_paren);
00294 
00295   /// The expression contained mismatched { and }
00296   static const error_type error_brace(_S_error_brace);
00297 
00298   /// The expression contained an invalid range in a {} expression.
00299   static const error_type error_badbrace(_S_error_badbrace);
00300 
00301   /// The expression contained an invalid character range,
00302   /// such as [b-a] in most encodings.
00303   static const error_type error_range(_S_error_range);
00304 
00305   /// There was insufficient memory to convert the expression into a
00306   /// finite state machine.
00307   static const error_type error_space(_S_error_space);
00308 
00309   /// One of *?+{ was not preceded by a valid regular expression.
00310   static const error_type error_badrepeat(_S_error_badrepeat);
00311 
00312   /// The complexity of an attempted match against a regular expression
00313   /// exceeded a pre-set level.
00314   static const error_type error_complexity(_S_error_complexity);
00315 
00316   /// There was insufficient memory to determine whether the
00317   /// regular expression could match the specified character sequence.
00318   static const error_type error_stack(_S_error_stack);
00319 }
00320 
00321 
00322   // [7.8] Class regex_error
00323   /**
00324    * Defines the type of objects thrown as exceptions to report errors from the
00325    * regular expression library.
00326    */
00327   class regex_error
00328   : public std::runtime_error
00329   {
00330   public:
00331     /**
00332      * @brief constructs a regex_error object.
00333      *
00334      * @param ecode the regex error code.
00335      */
00336     explicit
00337     regex_error(regex_constants::error_type __ecode)
00338     : std::runtime_error("regex_error"), _M_code(__ecode)
00339     { }
00340 
00341     /**
00342      * @brief gets the regex error code.
00343      *
00344      * @returns the regex error code.
00345      */
00346     regex_constants::error_type
00347     code() const
00348     { return _M_code; }
00349 
00350   protected:
00351     regex_constants::error_type _M_code;
00352   };
00353 
00354 
00355   // [7.7] Class regex_traits
00356   /**
00357    * A regular expression traits class that satisfies the requirements of tr1
00358    * section [7.2].
00359    *
00360    * The class %regex is parameterized around a set of related types and
00361    * functions used to complete the definition of its semantics.  This class
00362    * satisfies the requirements of such a traits class.
00363    */
00364   template<typename _Ch_type>
00365     struct regex_traits
00366     {
00367     public:
00368       typedef _Ch_type                     char_type;
00369       typedef std::basic_string<char_type> string_type;
00370       typedef std::locale                  locale_type;
00371       typedef std::ctype_base::mask        char_class_type;
00372 
00373     public:
00374       /**
00375        * @brief Constructs a default traits object.
00376        */
00377       regex_traits()
00378       { }
00379       
00380       /**
00381        * @brief Gives the length of a C-style string starting at @p __p.
00382        *
00383        * @param __p a pointer to the start of a character sequence.
00384        *
00385        * @returns the number of characters between @p *__p and the first
00386        * default-initialized value of type @p char_type.  In other words, uses
00387        * the C-string algorithm for determiining the length of a sequence of
00388        * characters.
00389        */
00390       static std::size_t
00391       length(const char_type* __p)
00392       { return string_type::traits_type::length(__p); }
00393 
00394       /**
00395        * @brief Performs the identity translation.
00396        *
00397        * @param c A character to the locale-specific character set.
00398        *
00399        * @returns c.
00400        */
00401       char_type
00402       translate(char_type __c) const
00403       { return __c; }
00404       
00405       /**
00406        * @brief Translates a character into a case-insensitive equivalent.
00407        *
00408        * @param c A character to the locale-specific character set.
00409        *
00410        * @returns the locale-specific lower-case equivalent of c.
00411        * @throws std::bad_cast if the imbued locale does not support the ctype
00412        *         facet.
00413        */
00414       char_type
00415       translate_nocase(char_type __c) const
00416       {
00417     using std::ctype;
00418     using std::use_facet;
00419     return use_facet<ctype<char_type> >(_M_locale).tolower(__c);
00420       }
00421       
00422       /**
00423        * @brief Gets a sort key for a character sequence.
00424        *
00425        * @param first beginning of the character sequence.
00426        * @param last  one-past-the-end of the character sequence.
00427        *
00428        * Returns a sort key for the character sequence designated by the
00429        * iterator range [F1, F2) such that if the character sequence [G1, G2)
00430        * sorts before the character sequence [H1, H2) then
00431        * v.transform(G1, G2) < v.transform(H1, H2).
00432        *
00433        * What this really does is provide a more efficient way to compare a
00434        * string to multiple other strings in locales with fancy collation
00435        * rules and equivalence classes.
00436        *
00437        * @returns a locale-specific sort key equivalent to the input range.
00438        *
00439        * @throws std::bad_cast if the current locale does not have a collate
00440        *         facet.
00441        */
00442       template<typename _Fwd_iter>
00443         string_type
00444         transform(_Fwd_iter __first, _Fwd_iter __last) const
00445         {
00446       using std::collate;
00447       using std::use_facet;
00448       const collate<_Ch_type>& __c(use_facet<
00449                        collate<_Ch_type> >(_M_locale));
00450       string_type __s(__first, __last);
00451       return __c.transform(__s.data(), __s.data() + __s.size());
00452     }
00453 
00454       /**
00455        * @brief Dunno.
00456        *
00457        * @param first beginning of the character sequence.
00458        * @param last  one-past-the-end of the character sequence.
00459        *
00460        * Effects: if typeid(use_facet<collate<_Ch_type> >) ==
00461        * typeid(collate_byname<_Ch_type>) and the form of the sort key
00462        * returned by collate_byname<_Ch_type>::transform(first, last) is known
00463        * and can be converted into a primary sort key then returns that key,
00464        * otherwise returns an empty string. WTF??
00465        *
00466        * @todo Implement this function.
00467        */
00468       template<typename _Fwd_iter>
00469         string_type
00470         transform_primary(_Fwd_iter __first, _Fwd_iter __last) const
00471         { return string_type(); }
00472 
00473       /**
00474        * @breief Gets a collation element by name.
00475        *
00476        * @param first beginning of the collation element name.
00477        * @param last  one-past-the-end of the collation element name.
00478        * 
00479        * @returns a sequence of one or more characters that represents the
00480        * collating element consisting of the character sequence designated by
00481        * the iterator range [first, last). Returns an empty string if the
00482        * character sequence is not a valid collating element.
00483        *
00484        * @todo Implement this function.
00485        */
00486       template<typename _Fwd_iter>
00487         string_type
00488         lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
00489         { return string_type(); }
00490 
00491       /**
00492        * @brief Maps one or mire characters to a named character
00493        *        classification.
00494        *
00495        * @param first beginning of the character sequence.
00496        * @param last  one-past-the-end of the character sequence.
00497        *
00498        * @returns an unspecified value that represents the character
00499        * classification named by the character sequence designated by the
00500        * iterator range [first, last). The value returned shall be independent
00501        * of the case of the characters in the character sequence. If the name
00502        * is not recognized then returns a value that compares equal to 0.
00503        *
00504        * At least the following names (or their wide-character equivalent) are
00505        * supported.
00506        * - d
00507        * - w
00508        * - s
00509        * - alnum
00510        * - alpha
00511        * - blank
00512        * - cntrl
00513        * - digit
00514        * - graph
00515        * - lower
00516        * - print
00517        * - punct
00518        * - space
00519        * - upper
00520        * - xdigit
00521        *
00522        * @todo Implement this function.
00523        */
00524       template<typename _Fwd_iter>
00525         char_class_type
00526         lookup_classname(_Fwd_iter __first, _Fwd_iter __last) const
00527         { return 0; }
00528 
00529       /**
00530        * @brief Determines if @p c is a member of an identified class.
00531        *
00532        * @param c a character.
00533        * @param f a class type (as returned from lookup_classname).
00534        *
00535        * @returns true if the character @p c is a member of the classification
00536        * represented by @p f, false otherwise.
00537        *
00538        * @throws std::bad_cast if the current locale does not have a ctype
00539        *         facet.
00540        */
00541       bool
00542       isctype(_Ch_type __c, char_class_type __f) const
00543       {
00544     using std::ctype;
00545     using std::use_facet;
00546     const ctype<_Ch_type>& __ctype(use_facet<
00547                        ctype<_Ch_type> >(_M_locale));
00548     
00549     if (__ctype.is(__c, __f))
00550       return true;
00551     
00552     // special case of underscore in [[:w:]]
00553     if (__c == __ctype.widen('_'))
00554       {
00555         const char* const __wb[] = "w";
00556         char_class_type __wt = this->lookup_classname(__wb,
00557                               __wb + sizeof(__wb));
00558         if (__f | __wt)
00559           return true;
00560       }
00561       
00562     // special case of [[:space:]] in [[:blank:]]
00563     if (__c == __ctype.isspace(__c))
00564       {
00565         const char* const __bb[] = "blank";
00566         char_class_type __bt = this->lookup_classname(__bb,
00567                               __bb + sizeof(__bb));
00568         if (__f | __bt)
00569           return true;
00570       }
00571     
00572     return false;
00573       }
00574 
00575       /**
00576        * @brief Converts a digit to an int.
00577        *
00578        * @param ch    a character representing a digit.
00579        * @param radix the radix if the numeric conversion (limited to 8, 10,
00580        *              or 16).
00581        * 
00582        * @returns the value represented by the digit ch in base radix if the
00583        * character ch is a valid digit in base radix; otherwise returns -1.
00584        *
00585        * @todo Implement this function.
00586        */
00587       int
00588       value(_Ch_type __ch, int __radix) const;
00589       
00590       /**
00591        * @brief Imbues the regex_traits object with a copy of a new locale.
00592        *
00593        * @param loc A locale.
00594        *
00595        * @returns a copy of the previous locale in use by the regex_traits
00596        *          object.
00597        *
00598        * @note Calling imbue with a different locale than the one currently in
00599        *       use invalidates all cached data held by *this.
00600        */
00601       locale_type
00602       imbue(locale_type __loc)
00603       {
00604     std::swap(_M_locale, __loc);
00605     return __loc;
00606       }
00607       
00608       /**
00609        * @brief Gets a copy of the current locale in use by the regex_traits
00610        * object.
00611        */
00612       locale_type
00613       getloc() const
00614       { return _M_locale; }
00615       
00616     protected:
00617       locale_type _M_locale;
00618     };
00619 
00620 
00621   // [7.8] Class basic_regex
00622   /**
00623    * Objects of specializations of this class represent regular expressions
00624    * constructed from sequences of character type @p _Ch_type.
00625    *
00626    * Storage for the regular expression is allocated and deallocated as
00627    * necessary by the member functions of this class.
00628    */
00629   template<typename _Ch_type, typename _Rx_traits = regex_traits<_Ch_type> >
00630     class basic_regex
00631     {
00632     public:
00633       // types:
00634       typedef _Ch_type                              value_type;
00635       typedef regex_constants::syntax_option_type flag_type;
00636       typedef typename _Rx_traits::locale_type  locale_type;
00637       typedef typename _Rx_traits::string_type  string_type;
00638 
00639       // [7.8.1] constants
00640       static const regex_constants::syntax_option_type icase
00641         = regex_constants::icase;
00642       static const regex_constants::syntax_option_type nosubs
00643         = regex_constants::nosubs;
00644       static const regex_constants::syntax_option_type optimize
00645         = regex_constants::optimize;
00646       static const regex_constants::syntax_option_type collate
00647         = regex_constants::collate;
00648       static const regex_constants::syntax_option_type ECMAScript
00649         = regex_constants::ECMAScript;
00650       static const regex_constants::syntax_option_type basic
00651         = regex_constants::basic;
00652       static const regex_constants::syntax_option_type extended
00653         = regex_constants::extended;
00654       static const regex_constants::syntax_option_type awk
00655         = regex_constants::awk;
00656       static const regex_constants::syntax_option_type grep
00657         = regex_constants::grep;
00658       static const regex_constants::syntax_option_type egrep
00659         = regex_constants::egrep;
00660 
00661       // [7.8.2] construct/copy/destroy
00662       /**
00663        * Constructs a basic regular expression that does not match any
00664        * character sequence.
00665        */
00666       basic_regex()
00667       : _M_flags(regex_constants::ECMAScript), _M_pattern(), _M_mark_count(0)
00668       { _M_compile(); }
00669 
00670       /**
00671        * @brief Constructs a basic regular expression from the sequence
00672        * [p, p + char_traits<_Ch_type>::length(p)) interpreted according to the
00673        * flags in @p f.
00674        *
00675        * @param p A pointer to the start of a C-style null-terminated string
00676        *          containing a regular expression.
00677        * @param f Flags indicating the syntax rules and options.
00678        *
00679        * @throws regex_error if @p p is not a valid regular expression.
00680        */
00681       explicit
00682       basic_regex(const _Ch_type* __p,
00683           flag_type __f = regex_constants::ECMAScript)
00684       : _M_flags(__f), _M_pattern(__p), _M_mark_count(0)
00685       { _M_compile(); }
00686 
00687       /**
00688        * @brief Constructs a basic regular expression from the sequence
00689        * [p, p + len) interpreted according to the flags in @p f.
00690        *
00691        * @param p   A pointer to the start of a string containing a regular
00692        *            expression.
00693        * @param len The length of the string containing the regular expression.
00694        * @param f   Flags indicating the syntax rules and options.
00695        *
00696        * @throws regex_error if @p p is not a valid regular expression.
00697        */
00698       basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f)
00699       : _M_flags(__f) , _M_pattern(__p, __len), _M_mark_count(0)
00700       { _M_compile(); }
00701 
00702       /**
00703        * @brief Copy-contructs a basic regular expression.
00704        *
00705        * @param rhs A @p regex object.
00706      */
00707       basic_regex(const basic_regex& __rhs)
00708       : _M_flags(__rhs._M_flags), _M_pattern(__rhs._M_pattern),
00709     _M_mark_count(__rhs._M_mark_count)
00710       { _M_compile(); }
00711 
00712       /**
00713        * @brief Constructs a basic regular expression from the string
00714        * @p interpreted according to the flags in @p f.
00715        *
00716        * @param p A string containing a regular expression.
00717        * @param f Flags indicating the syntax rules and options.
00718        *
00719        * @throws regex_error if @p p is not a valid regular expression.
00720        */
00721       template<typename _Ch_traits, typename _Ch_alloc>
00722         explicit
00723         basic_regex(const basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s,
00724             flag_type __f = regex_constants::ECMAScript)
00725     : _M_flags(__f), _M_pattern(__s), _M_mark_count(0)
00726         { _M_compile(); }
00727 
00728       /**
00729        * @brief Constructs a basic regular expression from the range
00730        * [first, last) interpreted according to the flags in @p f.
00731        *
00732        * @param first The start of arange containing a valid regular
00733        *              expression.
00734        * @param last  The end of a range containing a valid regular
00735        *              expression.
00736        * @param f     The format flags of the regular expression.
00737        *
00738        * @throws regex_error if @p p is not a valid regular expression.
00739        */
00740       template<typename _InputIterator>
00741         basic_regex(_InputIterator __first, _InputIterator __last, 
00742             flag_type __f = regex_constants::ECMAScript)
00743     : _M_flags(__f), _M_pattern(__first, __last), _M_mark_count(0)
00744         { _M_compile(); }
00745 
00746       /**
00747        * @brief Destroys a basic regular expression.
00748        */
00749       ~basic_regex()
00750       { }
00751       
00752       /**
00753        * @brief Assigns one regular expression to another.
00754        */
00755       basic_regex&
00756       operator=(const basic_regex& __rhs)
00757       { return this->assign(__rhs); }
00758 
00759       /**
00760        * @brief Replaces a regular expression with a new one constructed from
00761        * a C-style null-terminated string.
00762        *
00763        * @param A pointer to the start of a null-terminated C-style string
00764        *        containing a regular expression.
00765        */
00766       basic_regex&
00767       operator=(const _Ch_type* __p)
00768       { return this->assign(__p, flags()); }
00769       
00770       /**
00771        * @brief Replaces a regular expression with a new one constructed from
00772        * a string.
00773        *
00774        * @param A pointer to a string containing a regular expression.
00775        */
00776       template<typename _Ch_typeraits, typename _Allocator>
00777         basic_regex&
00778         operator=(const basic_string<_Ch_type, _Ch_typeraits, _Allocator>& __s)
00779         { return this->assign(__s, flags()); }
00780 
00781       // [7.8.3] assign
00782       /**
00783        * @brief the real assignment operator.
00784        *
00785        * @param that Another regular expression object.
00786        */
00787       basic_regex&
00788       assign(const basic_regex& __that)
00789       {
00790     basic_regex __tmp(__that);
00791     this->swap(__tmp);
00792     return *this;
00793       }
00794       
00795       /**
00796        * @brief Assigns a new regular expression to a regex object from a
00797        * C-style null-terminated string containing a regular expression
00798        * pattern.
00799        *
00800        * @param p     A pointer to a C-style null-terminated string containing
00801        *              a regular expression pattern.
00802        * @param flags Syntax option flags.
00803        *
00804        * @throws regex_error if p does not contain a valid regular expression
00805        * pattern interpreted according to @p flags.  If regex_error is thrown,
00806        * *this remains unchanged.
00807        */
00808       basic_regex&
00809       assign(const _Ch_type* __p,
00810          flag_type __flags = regex_constants::ECMAScript)
00811       { return this->assign(string_type(__p), __flags); }
00812 
00813       /**
00814        * @brief Assigns a new regular expression to a regex object from a
00815        * C-style string containing a regular expression pattern.
00816        *
00817        * @param p     A pointer to a C-style string containing a
00818        *              regular expression pattern.
00819        * @param len   The length of the regular expression pattern string.
00820        * @param flags Syntax option flags.
00821        *
00822        * @throws regex_error if p does not contain a valid regular expression
00823        * pattern interpreted according to @p flags.  If regex_error is thrown,
00824        * *this remains unchanged.
00825        */
00826       basic_regex&
00827       assign(const _Ch_type* __p, std::size_t __len, flag_type __flags)
00828       { return this->assign(string_type(__p, __len), __flags); }
00829 
00830       /**
00831        * @brief Assigns a new regular expression to a regex object from a 
00832        * string containing a regular expression pattern.
00833        *
00834        * @param s     A string containing a regular expression pattern.
00835        * @param flags Syntax option flags.
00836        *
00837        * @throws regex_error if p does not contain a valid regular expression
00838        * pattern interpreted according to @p flags.  If regex_error is thrown,
00839        * *this remains unchanged.
00840        */
00841       template<typename _Ch_typeraits, typename _Allocator>
00842         basic_regex&
00843         assign(const basic_string<_Ch_type, _Ch_typeraits, _Allocator>& __s,
00844            flag_type __f = regex_constants::ECMAScript)
00845         { 
00846       basic_regex __tmp(__s, __f);
00847       this->swap(__tmp);
00848       return *this;
00849     }
00850 
00851       /**
00852        * @brief Assigns a new regular expression to a regex object.
00853        *
00854        * @param first The start of a range containing a valid regular
00855        *              expression.
00856        * @param last  The end of a range containing a valid regular
00857        *              expression.
00858        * @param flags Syntax option flags.
00859        *
00860        * @throws regex_error if p does not contain a valid regular expression
00861        * pattern interpreted according to @p flags.  If regex_error is thrown,
00862        * *this remains unchanged.
00863        */
00864       template<typename _InputIterator>
00865         basic_regex&
00866         assign(_InputIterator __first, _InputIterator __last,
00867            flag_type __flags = regex_constants::ECMAScript)
00868         { return this->assign(string_type(__first, __last), __flags); }
00869 
00870       // [7.8.4] const operations
00871       /**
00872        * @brief Gets the number of marked subexpressions within the regular
00873        * expresison.
00874        */
00875       unsigned int
00876       mark_count() const
00877       { return _M_mark_count; }
00878       
00879       /**
00880        * @brief Gets the flags used to construct the regular expression
00881        * or in the last call to assign().
00882        */
00883       flag_type
00884       flags() const
00885       { return _M_flags; }
00886       
00887       // [7.8.5] locale
00888       /**
00889        * @brief Imbues the regular expression object with the given locale.
00890        *
00891        * @param loc A locale.
00892        */
00893       locale_type
00894       imbue(locale_type __loc)
00895       { return _M_traits.imbue(__loc); }
00896       
00897       /**
00898        * @brief Gets the locale currently imbued in the regular expression
00899        *        object.
00900        */
00901       locale_type
00902       getloc() const
00903       { return _M_traits.getloc(); }
00904       
00905       // [7.8.6] swap
00906       /**
00907        * @brief Swaps the contents of two regular expression obects.
00908        *
00909        * @param rhs Another regular expression object.
00910        */
00911       void
00912       swap(basic_regex& __rhs)
00913       {
00914     std::swap(_M_flags,      __rhs._M_flags);
00915     std::swap(_M_pattern,    __rhs._M_pattern);
00916     std::swap(_M_mark_count, __rhs._M_mark_count);
00917     std::swap(_M_traits,     __rhs._M_traits);
00918       }
00919       
00920     private:
00921       /**
00922        * @brief Compiles a regular expression pattern into a NFA.
00923        * @todo Implement this function.
00924        */
00925       void _M_compile()
00926       { }
00927 
00928     protected:
00929       flag_type    _M_flags;
00930       string_type  _M_pattern;
00931       unsigned int _M_mark_count;
00932       _Rx_traits   _M_traits;
00933     };
00934   
00935   typedef basic_regex<char>    regex;
00936 #ifdef _GLIBCXX_USE_WCHAR_T
00937   typedef basic_regex<wchar_t> wregex;
00938 #endif
00939 
00940 
00941   // [7.8.6] basic_regex swap
00942   /**
00943    * @brief Swaps the contents of two regular expression objects.
00944    * @param lhs First regular expression.
00945    * @param rhs Second regular expression.
00946    */
00947   template<typename _Ch_type, typename _Rx_traits>
00948     inline void
00949     swap(basic_regex<_Ch_type, _Rx_traits>& __lhs,
00950      basic_regex<_Ch_type, _Rx_traits>& __rhs)
00951     { return __lhs.swap(__rhs); }
00952 
00953 
00954   // [7.9] Class template sub_match
00955   /**
00956    * A sequence of characters matched by a particular marked sub-expression.
00957    *
00958    * An object of this class is essentially a pair of iterators marking a
00959    * matched subexpression within a regular expression pattern match. Such
00960    * objects can be converted to and compared with std::basic_string objects
00961    * of a similar base character type as the pattern matched by the regular
00962    * expression.
00963    *
00964    * The iterators that make up the pair are the usual half-open interval
00965    * referencing the actual original pattern matched.
00966    */
00967   template<typename _BiIter>
00968     class sub_match : public std::pair<_BiIter, _BiIter>
00969     {
00970     public:
00971       typedef typename iterator_traits<_BiIter>::value_type      value_type;
00972       typedef typename iterator_traits<_BiIter>::difference_type
00973                                                             difference_type;
00974       typedef _BiIter                                              iterator;
00975 
00976     public:
00977       bool matched;
00978       
00979       /**
00980        * Gets the length of the matching sequence.
00981        */
00982       difference_type
00983       length() const
00984       { return this->matched ? std::distance(this->first, this->second) : 0; }
00985 
00986       /**
00987        * @brief Gets the matching sequence as a string.
00988        *
00989        * @returns the matching sequence as a string.
00990        *
00991        * This is the implicit conversion operator.  It is identical to the
00992        * str() member function except that it will want to pop up in
00993        * unexpected places and cause a great deal of confusion and cursing
00994        * from the unwary.
00995        */
00996       operator basic_string<value_type>() const
00997       {
00998     return this->matched
00999       ? std::basic_string<value_type>(this->first, this->second)
01000       : std::basic_string<value_type>();
01001       }
01002       
01003       /**
01004        * @brief Gets the matching sequence as a string.
01005        *
01006        * @returns the matching sequence as a string.
01007        */
01008       basic_string<value_type>
01009       str() const
01010       {
01011     return this->matched
01012       ? std::basic_string<value_type>(this->first, this->second)
01013       : std::basic_string<value_type>();
01014       }
01015       
01016       /**
01017        * @brief Compares this and another matched sequence.
01018        *
01019        * @param s Another matched sequence to compare to this one.
01020        *
01021        * @retval <0 this matched sequence will collate before @p s.
01022        * @retval =0 this matched sequence is equivalent to @p s.
01023        * @retval <0 this matched sequence will collate after @p s.
01024        */
01025       int
01026       compare(const sub_match& __s) const
01027       { return this->str().compare(__s.str()); }
01028 
01029       /**
01030        * @brief Compares this sub_match to a string.
01031        *
01032        * @param s A string to compare to this sub_match.
01033        *
01034        * @retval <0 this matched sequence will collate before @p s.
01035        * @retval =0 this matched sequence is equivalent to @p s.
01036        * @retval <0 this matched sequence will collate after @p s.
01037        */
01038       int
01039       compare(const basic_string<value_type>& __s) const
01040       { return this->str().compare(__s); }
01041       
01042       /**
01043        * @brief Compares this sub_match to a C-style string.
01044        *
01045        * @param s A C-style string to compare to this sub_match.
01046        *
01047        * @retval <0 this matched sequence will collate before @p s.
01048        * @retval =0 this matched sequence is equivalent to @p s.
01049        * @retval <0 this matched sequence will collate after @p s.
01050        */
01051       int
01052       compare(const value_type* __s) const
01053       { return this->str().compare(__s); }
01054     };
01055   
01056   
01057   typedef sub_match<const char*>             csub_match;
01058   typedef sub_match<string::const_iterator>  ssub_match;
01059 #ifdef _GLIBCXX_USE_WCHAR_T
01060   typedef sub_match<const wchar_t*>          wcsub_match;
01061   typedef sub_match<wstring::const_iterator> wssub_match;
01062 #endif
01063 
01064   // [7.9.2] sub_match non-member operators
01065   
01066   /**
01067    * @brief Tests the equivalence of two regular expression submatches.
01068    * @param lhs First regular expression submatch.
01069    * @param rhs Second regular expression submatch.
01070    * @returns true if @a lhs  is equivalent to @a rhs, false otherwise.
01071    */
01072   template<typename _BiIter>
01073     inline bool
01074     operator==(const sub_match<_BiIter>& __lhs,
01075            const sub_match<_BiIter>& __rhs)
01076     { return __lhs.compare(__rhs) == 0; }
01077 
01078   /**
01079    * @brief Tests the inequivalence of two regular expression submatches.
01080    * @param lhs First regular expression submatch.
01081    * @param rhs Second regular expression submatch.
01082    * @returns true if @a lhs  is not equivalent to @a rhs, false otherwise.
01083    */
01084   template<typename _BiIter>
01085     inline bool
01086     operator!=(const sub_match<_BiIter>& __lhs,
01087            const sub_match<_BiIter>& __rhs)
01088     { return __lhs.compare(__rhs) != 0; }
01089 
01090   /**
01091    * @brief Tests the ordering of two regular expression submatches.
01092    * @param lhs First regular expression submatch.
01093    * @param rhs Second regular expression submatch.
01094    * @returns true if @a lhs precedes @a rhs, false otherwise.
01095    */
01096   template<typename _BiIter>
01097     inline bool