1  package frost.core
  2  
  3  ====================================================================================================
  4  Represents the process of scanning a string for matches to a regular expression. `Matcher` objects
  5  are created by [RegularExpression] to scan particular strings, and return matches one at a time.
  6  ====================================================================================================
  7  class Matcher {
  8      @private
  9      constant REPLACEMENT_GROUPS := /\$(0|[1-9]\d*)/
 10  
 11      @private
 12      def nativeHandle:Int
 13  
 14      @private
 15      def searchText:String
 16  
 17      ================================================================================================
 18      `true` if we have successfully found a match.
 19      ================================================================================================
 20      @private
 21      var matched:Bit
 22      
 23      ================================================================================================
 24      The position that [appendReplacement](appendReplacement(MutableString, String)) or [appendTail]
 25      will start copying from.
 26      ================================================================================================
 27      @private
 28      var replacementIndex:String.Index
 29  
 30      ================================================================================================
 31      Returns the index of the beginning of the last match located by `find()`. It is a [safety
 32      violation](/safetyViolations.html) to read this property unless the last call to `matches()` or
 33      `find()` was successful.
 34      ================================================================================================
 35      property start:String.Index
 36  
 37      ================================================================================================
 38      Returns the index of the end of the last match located by `find()`. It is a [safety
 39      violation](/safetyViolations.html) to read this property unless the last call to `matches()` or
 40      `find()` was successful.
 41      ================================================================================================
 42      property end:String.Index
 43  
 44      ================================================================================================
 45      Returns the number of groups contained in the match. This is always at least 1, as group zero
 46      represents the entire matched text. It is a [safety violation](/safetyViolations.html) to read
 47      this property unless the last call to `matches()` or `find()` was successful.
 48      ================================================================================================
 49      property groupCount:Int
 50  
 51      @private
 52      init() {
 53      }
 54  
 55      ================================================================================================
 56      Checks to see if the regular expression matches the entire string. While `find()` tolerates
 57      additional unmatched text before or after the match, `matches()` does not.
 58      ================================================================================================
 59      -- @self FIXME
 60      @external(frostMatcherMatches)
 61      method matches():Bit
 62  
 63      ================================================================================================
 64      Returns the next occurrence of the matcher's regular expression within the string, starting just
 65      past the last match (or at the beginning of the string, if this is the first call to `find()`).
 66      Returns a `Bit` indicating whether or not a match was found. Use [start], [end], and
 67      [group(Int)] for more information about the match.
 68      
 69      @returns `true` if a match was found
 70      @see matches()
 71      @see find(String.Index)
 72      ================================================================================================
 73      -- @self FIXME
 74      @pre(matched | replacementIndex = searchText.start)
 75      method find():Bit {
 76          var start:String.Index
 77          if matched {
 78              start := self.end
 79              if self.start = start { -- zero-character match, skip ahead one
 80                  start := searchText.next(start)
 81                  if start >= searchText.end {
 82                      matched := false
 83                      return matched
 84                  }
 85              }
 86          }
 87          else {
 88              start := searchText.start
 89          }
 90          matched := nativeFind(start)
 91          return matched
 92      }
 93      
 94      ================================================================================================
 95      Returns the first occurrence of the matcher's regular expression within the string starting at
 96      the character `start`. Returns whether or not a match was found. Use [start], [end], and
 97      [group(Int)] for more information about the match.
 98      
 99      @param start the index to start the search at
100      @returns `true` if a match was found
101      @see matches()
102      @see find()
103      ================================================================================================
104      -- @self FIXME
105      method find(start:String.Index):Bit {
106          replacementIndex := start
107          matched := nativeFind(start)
108          return matched
109      }
110  
111      ================================================================================================
112      After a successful match, appends a replacement for the match to a `MutableString`. The text
113      appended to the `MutableString` will include all unmatched characters between the last match and
114      the current match, and the `replacement` string may include references to match groups using the
115      syntax `$1`, `$2`, etc.
116  
117      `appendReplacement` is intended to be used in a loop with [find()] and completed with
118      [appendTail()](appendTail), such as in this example:
119  
120          -- testcase MatcherAppendReplacement(Simple)
121          def result := MutableString()
122          def regex := /\s+/ -- match all whitespace
123          def matcher := regex.matcher("Hello, can anyone hear me?")
124          while matcher.find() {
125              matcher.appendReplacement(result, "|")
126          }
127          matcher.appendTail(result)
128          Console.printLine(result)
129  
130      This will display the text `"Hello,|can|anyone|hear|me?"`.
131  
132      @param target the `MutableString` to append to
133      @param replacement the replacement string, optionally containing `$1`-style group references
134      ================================================================================================
135      @pre(matched)
136      -- @limited FIXME
137      method appendReplacement(target:MutableString, replacement:String) {
138          appendReplacement(target, replacement, true)
139      }
140  
141      ============================================================================
142      As [appendReplacement(MutableString, String)], but allows the 
143      interpretation of `$1`-style group references to be controlled. With
144      `allowGroupReferences` set to `false`, the replacement string is treated
145      literally, with no special handling for `$1`-style sequences.
146  
147      @param target the `MutableString` to append to
148      @param replacement the replacement string
149      @param allowGroupReferences if false, `$1`-style group references are 
150             ignored
151      ============================================================================
152      @pre(matched)
153      -- @limited FIXME
154      method appendReplacement(target:MutableString, replacement:String, allowGroupReferences:Bit) {
155          target.append(searchText[replacementIndex .. start])
156          if allowGroupReferences {
157              def ref := REPLACEMENT_GROUPS.matcher(replacement)
158              var lastEnd:String.Index := searchText.start
159              while ref.find() {
160                  def rawGroup := ref.group(1).asInt!
161                  var groupIdx := rawGroup
162                  var newEnd := ref.end
163                  while groupIdx >= groupCount { -- no such group, pull digits off
164                      groupIdx //= 10
165                      newEnd := searchText.previous(newEnd)
166                  }
167                  target.append(replacement[lastEnd .. ref.start])
168                  lastEnd := newEnd
169                  if groupIdx > 0 | rawGroup = 0 {
170                      target.append(group(groupIdx))
171                  }
172                  else {
173                      target.append("$")
174                  }
175              }
176              target.append(replacement[lastEnd..])
177          }
178          else {
179              target.append(replacement)
180          }
181          replacementIndex := end
182      }
183      
184      ================================================================================================
185      Appends all remaining unmatched text to the target `MutableString`. See
186      [appendReplacement](appendReplacement(MutableString, String)) for a usage example.
187  
188      @param target the string to append to
189      ================================================================================================
190      -- @limited FIXME
191      method appendTail(target:MutableString) {
192          target.append(searchText[replacementIndex..])
193      }
194  
195      @private
196      @external(frostMatcherNativeFind)
197      method nativeFind(start:String.Index):Bit
198  
199      @pre(matched)
200      @external(frostMatcherStart)
201      function get_start():String.Index
202  
203      @pre(matched)
204      @external(frostMatcherEnd)
205      function get_end():String.Index
206  
207      @pre(matched)
208      @external(frostMatcherGroupCount)
209      function get_groupCount():Int
210  
211      ================================================================================================
212      Returns the contents of the indicated match group. Group zero is the entire matched text, and
213      additional groups are defined by parentheses in the regular expression. For example:
214  
215          -- testcase MatcherGroup(Simple)
216          def m := /(\d+) plus (\d+) equals (\d+)/.matcher("12 plus 8 equals 20")
217          if m.matches() {
218              for i in 0 .. m.groupCount {
219                  Console.printLine(m.group(i))
220              }
221          }
222  
223      This will display:
224  
225          12 plus 8 equals 20
226          12
227          8
228          20
229      ================================================================================================
230      @pre(matched)
231      @external(frostMatcherGroup)
232      function group(index:Int):String?
233  
234      @override
235      method cleanup() {
236          destroy()
237      }
238  
239      @private
240      @external(frostMatcherDestroy)
241      method destroy()
242  }