001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang.text;
018
019 import java.util.Arrays;
020
021 /**
022 * A matcher class that can be queried to determine if a character array
023 * portion matches.
024 * <p>
025 * This class comes complete with various factory methods.
026 * If these do not suffice, you can subclass and implement your own matcher.
027 *
028 * @author Apache Software Foundation
029 * @since 2.2
030 * @version $Id: StrMatcher.java 905636 2010-02-02 14:03:32Z niallp $
031 */
032 public abstract class StrMatcher {
033
034 /**
035 * Matches the comma character.
036 */
037 private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
038 /**
039 * Matches the tab character.
040 */
041 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
042 /**
043 * Matches the space character.
044 */
045 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
046 /**
047 * Matches the same characters as StringTokenizer,
048 * namely space, tab, newline, formfeed.
049 */
050 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
051 /**
052 * Matches the String trim() whitespace characters.
053 */
054 private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
055 /**
056 * Matches the double quote character.
057 */
058 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
059 /**
060 * Matches the double quote character.
061 */
062 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
063 /**
064 * Matches the single or double quote character.
065 */
066 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
067 /**
068 * Matches no characters.
069 */
070 private static final StrMatcher NONE_MATCHER = new NoMatcher();
071
072 // -----------------------------------------------------------------------
073
074 /**
075 * Returns a matcher which matches the comma character.
076 *
077 * @return a matcher for a comma
078 */
079 public static StrMatcher commaMatcher() {
080 return COMMA_MATCHER;
081 }
082
083 /**
084 * Returns a matcher which matches the tab character.
085 *
086 * @return a matcher for a tab
087 */
088 public static StrMatcher tabMatcher() {
089 return TAB_MATCHER;
090 }
091
092 /**
093 * Returns a matcher which matches the space character.
094 *
095 * @return a matcher for a space
096 */
097 public static StrMatcher spaceMatcher() {
098 return SPACE_MATCHER;
099 }
100
101 /**
102 * Matches the same characters as StringTokenizer,
103 * namely space, tab, newline and formfeed.
104 *
105 * @return the split matcher
106 */
107 public static StrMatcher splitMatcher() {
108 return SPLIT_MATCHER;
109 }
110
111 /**
112 * Matches the String trim() whitespace characters.
113 *
114 * @return the trim matcher
115 */
116 public static StrMatcher trimMatcher() {
117 return TRIM_MATCHER;
118 }
119
120 /**
121 * Returns a matcher which matches the single quote character.
122 *
123 * @return a matcher for a single quote
124 */
125 public static StrMatcher singleQuoteMatcher() {
126 return SINGLE_QUOTE_MATCHER;
127 }
128
129 /**
130 * Returns a matcher which matches the double quote character.
131 *
132 * @return a matcher for a double quote
133 */
134 public static StrMatcher doubleQuoteMatcher() {
135 return DOUBLE_QUOTE_MATCHER;
136 }
137
138 /**
139 * Returns a matcher which matches the single or double quote character.
140 *
141 * @return a matcher for a single or double quote
142 */
143 public static StrMatcher quoteMatcher() {
144 return QUOTE_MATCHER;
145 }
146
147 /**
148 * Matches no characters.
149 *
150 * @return a matcher that matches nothing
151 */
152 public static StrMatcher noneMatcher() {
153 return NONE_MATCHER;
154 }
155
156 /**
157 * Constructor that creates a matcher from a character.
158 *
159 * @param ch the character to match, must not be null
160 * @return a new Matcher for the given char
161 */
162 public static StrMatcher charMatcher(char ch) {
163 return new CharMatcher(ch);
164 }
165
166 /**
167 * Constructor that creates a matcher from a set of characters.
168 *
169 * @param chars the characters to match, null or empty matches nothing
170 * @return a new matcher for the given char[]
171 */
172 public static StrMatcher charSetMatcher(char[] chars) {
173 if (chars == null || chars.length == 0) {
174 return NONE_MATCHER;
175 }
176 if (chars.length == 1) {
177 return new CharMatcher(chars[0]);
178 }
179 return new CharSetMatcher(chars);
180 }
181
182 /**
183 * Constructor that creates a matcher from a string representing a set of characters.
184 *
185 * @param chars the characters to match, null or empty matches nothing
186 * @return a new Matcher for the given characters
187 */
188 public static StrMatcher charSetMatcher(String chars) {
189 if (chars == null || chars.length() == 0) {
190 return NONE_MATCHER;
191 }
192 if (chars.length() == 1) {
193 return new CharMatcher(chars.charAt(0));
194 }
195 return new CharSetMatcher(chars.toCharArray());
196 }
197
198 /**
199 * Constructor that creates a matcher from a string.
200 *
201 * @param str the string to match, null or empty matches nothing
202 * @return a new Matcher for the given String
203 */
204 public static StrMatcher stringMatcher(String str) {
205 if (str == null || str.length() == 0) {
206 return NONE_MATCHER;
207 }
208 return new StringMatcher(str);
209 }
210
211 //-----------------------------------------------------------------------
212 /**
213 * Constructor.
214 */
215 protected StrMatcher() {
216 super();
217 }
218
219 /**
220 * Returns the number of matching characters, zero for no match.
221 * <p>
222 * This method is called to check for a match.
223 * The parameter <code>pos</code> represents the current position to be
224 * checked in the string <code>buffer</code> (a character array which must
225 * not be changed).
226 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
227 * <p>
228 * The character array may be larger than the active area to be matched.
229 * Only values in the buffer between the specifed indices may be accessed.
230 * <p>
231 * The matching code may check one character or many.
232 * It may check characters preceeding <code>pos</code> as well as those
233 * after, so long as no checks exceed the bounds specified.
234 * <p>
235 * It must return zero for no match, or a positive number if a match was found.
236 * The number indicates the number of characters that matched.
237 *
238 * @param buffer the text content to match against, do not change
239 * @param pos the starting position for the match, valid for buffer
240 * @param bufferStart the first active index in the buffer, valid for buffer
241 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
242 * @return the number of matching characters, zero for no match
243 */
244 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
245
246 /**
247 * Returns the number of matching characters, zero for no match.
248 * <p>
249 * This method is called to check for a match.
250 * The parameter <code>pos</code> represents the current position to be
251 * checked in the string <code>buffer</code> (a character array which must
252 * not be changed).
253 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
254 * <p>
255 * The matching code may check one character or many.
256 * It may check characters preceeding <code>pos</code> as well as those after.
257 * <p>
258 * It must return zero for no match, or a positive number if a match was found.
259 * The number indicates the number of characters that matched.
260 *
261 * @param buffer the text content to match against, do not change
262 * @param pos the starting position for the match, valid for buffer
263 * @return the number of matching characters, zero for no match
264 * @since 2.4
265 */
266 public int isMatch(char[] buffer, int pos) {
267 return isMatch(buffer, pos, 0, buffer.length);
268 }
269
270 //-----------------------------------------------------------------------
271 /**
272 * Class used to define a set of characters for matching purposes.
273 */
274 static final class CharSetMatcher extends StrMatcher {
275 /** The set of characters to match. */
276 private final char[] chars;
277
278 /**
279 * Constructor that creates a matcher from a character array.
280 *
281 * @param chars the characters to match, must not be null
282 */
283 CharSetMatcher(char chars[]) {
284 super();
285 this.chars = (char[]) chars.clone();
286 Arrays.sort(this.chars);
287 }
288
289 /**
290 * Returns whether or not the given character matches.
291 *
292 * @param buffer the text content to match against, do not change
293 * @param pos the starting position for the match, valid for buffer
294 * @param bufferStart the first active index in the buffer, valid for buffer
295 * @param bufferEnd the end index of the active buffer, valid for buffer
296 * @return the number of matching characters, zero for no match
297 */
298 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
299 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
300 }
301 }
302
303 //-----------------------------------------------------------------------
304 /**
305 * Class used to define a character for matching purposes.
306 */
307 static final class CharMatcher extends StrMatcher {
308 /** The character to match. */
309 private final char ch;
310
311 /**
312 * Constructor that creates a matcher that matches a single character.
313 *
314 * @param ch the character to match
315 */
316 CharMatcher(char ch) {
317 super();
318 this.ch = ch;
319 }
320
321 /**
322 * Returns whether or not the given character matches.
323 *
324 * @param buffer the text content to match against, do not change
325 * @param pos the starting position for the match, valid for buffer
326 * @param bufferStart the first active index in the buffer, valid for buffer
327 * @param bufferEnd the end index of the active buffer, valid for buffer
328 * @return the number of matching characters, zero for no match
329 */
330 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
331 return ch == buffer[pos] ? 1 : 0;
332 }
333 }
334
335 //-----------------------------------------------------------------------
336 /**
337 * Class used to define a set of characters for matching purposes.
338 */
339 static final class StringMatcher extends StrMatcher {
340 /** The string to match, as a character array. */
341 private final char[] chars;
342
343 /**
344 * Constructor that creates a matcher from a String.
345 *
346 * @param str the string to match, must not be null
347 */
348 StringMatcher(String str) {
349 super();
350 chars = str.toCharArray();
351 }
352
353 /**
354 * Returns whether or not the given text matches the stored string.
355 *
356 * @param buffer the text content to match against, do not change
357 * @param pos the starting position for the match, valid for buffer
358 * @param bufferStart the first active index in the buffer, valid for buffer
359 * @param bufferEnd the end index of the active buffer, valid for buffer
360 * @return the number of matching characters, zero for no match
361 */
362 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
363 int len = chars.length;
364 if (pos + len > bufferEnd) {
365 return 0;
366 }
367 for (int i = 0; i < chars.length; i++, pos++) {
368 if (chars[i] != buffer[pos]) {
369 return 0;
370 }
371 }
372 return len;
373 }
374 }
375
376 //-----------------------------------------------------------------------
377 /**
378 * Class used to match no characters.
379 */
380 static final class NoMatcher extends StrMatcher {
381
382 /**
383 * Constructs a new instance of <code>NoMatcher</code>.
384 */
385 NoMatcher() {
386 super();
387 }
388
389 /**
390 * Always returns <code>false</code>.
391 *
392 * @param buffer the text content to match against, do not change
393 * @param pos the starting position for the match, valid for buffer
394 * @param bufferStart the first active index in the buffer, valid for buffer
395 * @param bufferEnd the end index of the active buffer, valid for buffer
396 * @return the number of matching characters, zero for no match
397 */
398 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
399 return 0;
400 }
401 }
402
403 //-----------------------------------------------------------------------
404 /**
405 * Class used to match whitespace as per trim().
406 */
407 static final class TrimMatcher extends StrMatcher {
408
409 /**
410 * Constructs a new instance of <code>TrimMatcher</code>.
411 */
412 TrimMatcher() {
413 super();
414 }
415
416 /**
417 * Returns whether or not the given character matches.
418 *
419 * @param buffer the text content to match against, do not change
420 * @param pos the starting position for the match, valid for buffer
421 * @param bufferStart the first active index in the buffer, valid for buffer
422 * @param bufferEnd the end index of the active buffer, valid for buffer
423 * @return the number of matching characters, zero for no match
424 */
425 public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
426 return buffer[pos] <= 32 ? 1 : 0;
427 }
428 }
429
430 }