require 'oregexp'

module Oniguruma
   OPTION_NONE                 = 0
   OPTION_IGNORECASE           = 1
   OPTION_EXTEND               = (OPTION_IGNORECASE         << 1)
   OPTION_MULTILINE            = (OPTION_EXTEND             << 1)
   OPTION_SINGLELINE           = (OPTION_MULTILINE          << 1)
   OPTION_FIND_LONGEST         = (OPTION_SINGLELINE         << 1)
   OPTION_FIND_NOT_EMPTY       = (OPTION_FIND_LONGEST       << 1)
   OPTION_NEGATE_SINGLELINE    = (OPTION_FIND_NOT_EMPTY     << 1)
   OPTION_DONT_CAPTURE_GROUP   = (OPTION_NEGATE_SINGLELINE  << 1)
   OPTION_CAPTURE_GROUP        = (OPTION_DONT_CAPTURE_GROUP << 1)
   OPTION_NOTBOL               = (OPTION_CAPTURE_GROUP << 1)
   OPTION_NOTEOL               = (OPTION_NOTBOL << 1)
   OPTION_POSIX_REGION         = (OPTION_NOTEOL << 1)
   OPTION_MAXBIT               = OPTION_POSIX_REGION
   OPTION_DEFAULT              = OPTION_NONE

   OPTIONS_SHORTCUTS = {
    'i' => OPTION_IGNORECASE,
    'x' => OPTION_EXTEND,
    'm' => OPTION_MULTILINE,
    's' => OPTION_SINGLELINE,
    'l' => OPTION_FIND_LONGEST,
    'E' => OPTION_FIND_NOT_EMPTY,
    'S' => OPTION_NEGATE_SINGLELINE,
    'G' => OPTION_DONT_CAPTURE_GROUP,
    'g' => OPTION_CAPTURE_GROUP,
    'B' => OPTION_NOTBOL,
    'E' => OPTION_NOTEOL,
   }

   SYNTAX_ASIS                 = 0
   SYNTAX_POSIX_BASIC          = 1
   SYNTAX_POSIX_EXTENDED       = 2
   SYNTAX_EMACS                = 3
   SYNTAX_GREP                 = 4
   SYNTAX_GNU_REGEX            = 5
   SYNTAX_JAVA                 = 6
   SYNTAX_PERL                 = 7
   SYNTAX_PERL_NG              = 8
   SYNTAX_RUBY                 = 9
   SYNTAX_DEFAULT              = 10

   ENCODING_ASCII              = 0
   ENCODING_ISO_8859_1         = 1
   ENCODING_ISO_8859_2         = 2
   ENCODING_ISO_8859_3         = 3
   ENCODING_ISO_8859_4         = 4
   ENCODING_ISO_8859_5         = 5
   ENCODING_ISO_8859_6         = 6
   ENCODING_ISO_8859_7         = 7
   ENCODING_ISO_8859_8         = 8
   ENCODING_ISO_8859_9         = 9
   ENCODING_ISO_8859_10        = 10
   ENCODING_ISO_8859_11        = 11
   ENCODING_ISO_8859_12        = 12
   ENCODING_ISO_8859_13        = 13
   ENCODING_ISO_8859_14        = 14
   ENCODING_ISO_8859_15        = 15
   ENCODING_ISO_8859_16        = 16
   ENCODING_UTF8               = 17
   ENCODING_UTF16_BE           = 18
   ENCODING_UTF16_LE           = 19
   ENCODING_UTF32_BE           = 20
   ENCODING_UTF32_LE           = 21
   ENCODING_EUC_JP             = 22
   ENCODING_EUC_TW             = 23
   ENCODING_EUC_KR             = 24
   ENCODING_EUC_CN             = 25
   ENCODING_SJIS               = 26
   ENCODING_KOI8               = 27
   ENCODING_KOI8_R             = 28
   ENCODING_CP1251             = 29
   ENCODING_BIG5               = 30
   ENCODING_GB18030            = 31
   ENCODING_UNDEF              = 32

   
   class ORegexp
   
      class << self
         # :stopdoc:
         alias compile new
         # :startdoc:
      
         # call-seq:
         # ORegexp.escape(str)   => a_str
         # ORegexp.quote(str)    => a_str
         # 
         # Escapes any characters that would have special meaning in a regular
         # expression. Returns a new escaped string, or self if no characters are
         # escaped.  For any string,
         # <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
         # 
         #    ORegexp.escape('\\*?{}.')   #=> \\\\\*\?\{\}\.
         # 
         
         def escape( *args )
            Regexp.escape( *args )
         end
         # :stopdoc:
         alias quote escape
         # :startdoc:
         
         # call-seq:
         #    ORegexp.last_match           => matchdata
         #    ORegexp.last_match(fixnum)   => str
         #  
         # The first form returns the <code>MatchData</code> object generated by the
         # last successful pattern match. The second form returns the nth field in this
         # <code>MatchData</code> object.
         #     
         #    ORegexp.new( 'c(.)t' ) =~ 'cat'       #=> 0
         #    ORegexp.last_match                    #=> #<MatchData:0x401b3d30>
         #    ORegexp.last_match(0)                 #=> "cat"
         #    ORegexp.last_match(1)                 #=> "a"
         #    ORegexp.last_match(2)                 #=> nil
         
         def last_match( index = nil)
            if index
               @@last_match[index]
            else
               @@last_match
            end
         end
      end
      
      # :stopdoc:
      alias old_initialize initialize 
      # :startdoc:
      
      # call-seq:
      #     ORegexp.new( pattern, options_hash )
      #     ORegexp.new( pattern, option_str, encoding_str=nil, syntax_str=nil) 
      #
      # Constructs a new regular expression from <i>pattern</i>, which is a 
      # <code>String</code>. The second parameter <i></i> may be a <code>Hash</code> 
      # of the form:
      #
      # <code>{ :options => option_value, :encoding => encoding_value, :syntax => syntax_value }</code>
      #
      # Where <code>option_value</code> is a bitwise <code>OR</code> of 
      # <code>Oniguruma::OPTION_XXX</code> constants; <code>encoding_value</code>
      # is one of <code>Oniguruma::ENCODING_XXX</code> constants; and 
      # <code>syntax_value</code> is one of <code>Oniguruma::SYNTAX_XXX</code>
      # constants.
      #  
      #     r1 = ORegexp.new('^a-z+:\\s+\w+')                                            #=> /^a-z+:\s+\w+/
      #     r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE )                      #=> /cat/i
      #     r3 = ORegexp.new('dog', :options => OPTION_EXTEND )                          #=> /dog/x
      #
      #     #Accept java syntax on SJIS encoding:
      #     r4 = ORegexp.new('ape', :syntax  => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/ 
      #    
      # Second form uses string shortcuts to set options and encoding:
      #     r = ORegexp.new('cat', 'i', 'utf8', 'java')
      
      def initialize( pattern, *args )
         defaults = { :options => OPTION_DEFAULT, :encoding => ENCODING_ASCII, :syntax => SYNTAX_DEFAULT}
	 if args[0].is_a?(String)
           options = {}
	   option_str, encoding_str, syntax_str = *args
	   opt = 0
	   option_str.each_byte {|x| opt |= (OPTIONS_SHORTCUTS[x.chr] || 0) }
	   options[:options] = opt
	   if encoding_str && Oniguruma::const_defined?("ENCODING_#{encoding_str.upcase}")
             options[:encoding] = Oniguruma::const_get("ENCODING_#{encoding_str.upcase}")
	   end
	   if syntax_str && Oniguruma::const_defined?("SYNTAX_#{syntax_str.upcase}")
             options[:syntax] = Oniguruma::const_get("SYNTAX_#{syntax_str.upcase}")
	   end
	 else
           options = args[0] || {}
	 end
         old_initialize( pattern,  defaults.merge( options ).freeze )
      end
      
      # call-seq:
      #    rxp == other_rxp      => true or false
      #    rxp.eql?(other_rxp)   => true or false
      #  
      # Equality---Two regexps are equal if their patterns are identical, they have
      # the same character set code, and their <code>#casefold?</code> values are the
      # same.
      
      def == regexp
         @pattern == regexp.source && kcode == regexp.kcode && casefold? == regexp.casefold?
      end
      alias eql? ==
      
      # call-seq:
      #    rxp.casefold?   => true of false
      #
      # Returns the value of the case-insensitive flag.
      
      def casefold?
         (@options[:options] & OPTION_IGNORECASE) > 0
      end
      
      # call-seq:
      #    rxp.kode        => int
      #
      # Returns the character set code for the regexp.
      def kcode
         @options[:encoding]
      end
      
      # call-seq:
      #    rxp.options   => fixnum
      # 
      # Returns the set of bits corresponding to the options used when creating this
      # ORegexp (see <code>ORegexp::new</code> for details. Note that additional bits
      # may be set in the returned options: these are used internally by the regular
      # expression code. These extra bits are ignored if the options are passed to
      # <code>ORegexp::new</code>.
      #    
      #    Oniguruma::OPTION_IGNORECASE                                 #=> 1
      #    Oniguruma::OPTION_EXTEND                                     #=> 2
      #    Oniguruma::OPTION_MULTILINE                                  #=> 4
      #    
      #    Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND )  #=> 2
      
      def options
         @options[:options]
      end
      
      # call-seq:
      #    rxp.to_s   => str
      # 
      # Returns a string containing the regular expression and its options (using the
      # <code>(?xxx:yyy)</code> notation. This string can be fed back in to
      # <code>Regexp::new</code> to a regular expression with the same semantics as
      # the original. (However, <code>Regexp#==</code> may not return true when
      # comparing the two, as the source of the regular expression itself may
      # differ, as the example shows).  <code>Regexp#inspect</code> produces a
      # generally more readable version of <i>rxp</i>.
      #    
      #    r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
      #    s1 = r1.to_s                                                           #=> "(?ix-m:ab+c)"
      #    r2 = ORegexp.new(s1)                                                   #=> /(?ix-m:ab+c)/
      #    r1 == r2                                                               #=> false
      #    r1.source                                                              #=> "ab+c"
      #    r2.source                                                              #=> "(?ix-m:ab+c)"      
      
      def to_s
         opt_str = "(?"
         opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
         opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
         opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
         unless opt_str == "(?imx"
            opt_str += "-"
            opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) == 0
            opt_str += "m" if (@options[:options] & OPTION_MULTILINE) == 0
            opt_str += "x" if (@options[:options] & OPTION_EXTEND) == 0
         end
         opt_str += ")"
         opt_str + @pattern
      end
      
      
      # call-seq:
      #    rxp.inspect   => string
      #
      # Returns a readable version of <i>rxp</i>
      #
      #    ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect  => /cat/im
      #    ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s     => (?im-x)cat
      
      def inspect
         opt_str = ""
         opt_str += "i" if (@options[:options] & OPTION_IGNORECASE) > 0
         opt_str += "m" if (@options[:options] & OPTION_MULTILINE) > 0
         opt_str += "x" if (@options[:options] & OPTION_EXTEND) > 0
         "/" + @pattern + "/" + opt_str
      end
      
      # call-seq:
      #    rxp.source   => str
      # 
      # Returns the original string of the pattern.
      # 
      #    ORegex.new( 'ab+c', 'ix' ).source   #=> "ab+c"
      def source
         @pattern.freeze
      end
      
      alias match_all scan
      
   end
   
end

class ::String
   # Calls <code>Oniguruma::ORegexp#gsub</code> on this string.
   def ogsub(*args)
      Oniguruma::ORegexp.new(args.shift).gsub(self, *args)
   end
   
   # Calls <code>Oniguruma::ORegexp#gsub!</code> on this string.
   def ogsub!(*args)
      Oniguruma::ORegexp.new(args.shift).gsub!(self, *args)
   end
   
   # Calls <code>Oniguruma::ORegexp#sub</code> on this string.
   def osub(re, *args)
      Oniguruma::ORegexp.new( re ).sub(self, *args)
   end
   
   # Calls <code>Oniguruma::ORegexp#sub!</code> on this string.
   def osub!(re, *args)
      Oniguruma::ORegexp.new( re ).sub(self, *args)
   end
end

class ::MatchData
   # call-seq:
   #    to_index[symbol]      => int or nil
   # 
   # Returns the group index for the corresponding named group, or 
   # <code>nil</code> if the group does not exist.
   #    
   #    m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
   #    m.to_index[:begin]    #=> 1
   #    m.to_index[:unknown]  #=> nil
   def to_index symbol
      @named_captures && @named_captures[symbol]
   end

   alias old_aref :[]
   
   # call-seq:
   #    mtch[i]               => obj
   #    mtch[start, length]   => array
   #    mtch[range]           => array
   #    mtch[symbol]          => obj
   # 
   # <code>MatchData</code> acts as an array, and may be
   # accessed using the normal array indexing techniques.  <i>mtch</i>[0] is
   # equivalent to the special variable <code>$&</code>, and returns the entire
   # matched string.  <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
   # of the matched backreferences (portions of the pattern between parentheses).
   #    
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m[0]       #=> "HX1138"
   #    m[1, 2]    #=> ["H", "X"]
   #    m[1..3]    #=> ["H", "X", "113"]
   #    m[-3, 2]   #=> ["X", "113"]
   #
   # If a symbol is used as index, the corresponding named group is returned,
   # or <code>nil</code> if such a group does not exist.
   #
   #    m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
   #    m[:begin]  #=> "THX"
   #    m[:moddle]  #=> "1"
   #    m[:end]  #=> "138"
   
   def [](*idx)
      if idx[0].is_a?(Symbol) 
         k = to_index( idx[0] )
         k && old_aref(k)
       else
         old_aref(*idx)
       end
   end
   
   alias old_begin :begin
   
   # call-seq:
   #    mtch.begin(n)        => integer
   #    mtch.begin           => integer
   #    mtch.begin(symbol)   => integer
   # 
   # Returns the offset of the start of the <em>n</em>th element of the match
   # array in the string.
   #    
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m.begin(0)   #=> 1
   #    m.begin(2)   #=> 2
   #
   # If no arguments are given, the index of the
   # first matching character is returned.
   #
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m.begin      #=> 1
   #
   # If the argument is a symbol, then the beginning of the 
   # corresponding named group is returned, or <code>nil</code>
   # if the group does not exist.
   #
   #    m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
   #    m.begin(:middle) #=> 3
   
   def begin(*idx)
      if idx[0].is_a?(Symbol) 
         k = to_index( idx[0] )
         k && old_begin(k)
      elsif idx.empty?
         old_begin( 0 )
      else
         old_begin(*idx)
      end
   end
   
   alias old_end :end
   
   # call-seq:
   #    mtch.end(n)   => integer
   # 
   # Returns the offset of the character immediately following the end of the
   # <em>n</em>th element of the match array in the string.
   #    
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m.end(0)   #=> 7
   #    m.end(2)   #=> 3
   #
   # If no arguments are given, the index of the
   # last matching character is returned.
   #
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m.last      #=> 7
   #
   # If the argument is a symbol, then the beginning of the 
   # corresponding named group is returned, or <code>nil</code>
   # if the group does not exist.
   #
   #    m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
   #    m.end(:middle) #=> 4
   
   def end(*idx)
      if idx[0].is_a?(Symbol) 
         k = to_index( idx[0] )
         k && old_end(k)
      elsif idx.empty?
         old_end( 0 )
      else
         old_end(*idx)
      end
   end
   
   alias old_offset :offset
   
   # call-seq:
   #    mtch.offset(n)      => array
   #    mtch.offset         => array
   #    mtch.offset(symbol) => array
   # 
   # Returns a two-element array containing the beginning and ending offsets of
   # the <em>n</em>th match.
   #    
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m.offset(0)   #=> [1, 7]
   #    m.offset(4)   #=> [6, 7]   
   #
   # If no arguments are given, the offsets of the entire
   # sequence are returned.
   #
   #    m = ORegexp.new( '(.)(.)(\d+)(\d)' ).match("THX1138.")
   #    m.offset      #=> [1, 7]
   #
   # If the argument is a symbol, then the offsets of the 
   # corresponding named group are returned, or <code>nil</code>
   # if the group does not exist.
   #
   #    m = ORegexp.new( '(?<begin>^.*?)(?<middle>\d)(?<end>.*)' ).match("THX1138")
   #    m.end(:middle) #=> [3, 4]
   
   def offset(*idx)
      if idx[0].is_a?(Symbol) 
         k = to_index( idx[0] )
         k && old_offset(k)
      elsif idx.empty?
         old_offset( 0 )
      else
         old_offset(*idx)
      end
   end
end
