| Author | Dave Jarvis <email> |
|---|---|
| Date | 2022-11-01 00:05:15 GMT-0700 |
| Commit | bcf0faa78a93ef16d884c1064167c9cd602d7fc8 |
| Parent | b22e816 |
| compileJava { | ||
| options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation" | ||
| - options.compilerArgs.addAll(['--release', '17']) | ||
| + options.compilerArgs.addAll(['--release', '19']) | ||
| } | ||
| @SuppressWarnings( "unused" ) | ||
| public class StringIterationBenchmark { | ||
| + private static final int STRLEN = 1024 * 1024; | ||
| private static final String CHARSET = | ||
| "ABCDEFGHIJKLM NOPQRSTUVWXYZ abcdefghijklm nopqrstuvxyz 01234 5 6789"; | ||
| - public static final int STRLEN = 1024 * 1024; | ||
| - private static String generateText() { | ||
| - final var sb = new StringBuilder( STRLEN ); | ||
| + private static final String sText; | ||
| + | ||
| + static { | ||
| final var len = CHARSET.length(); | ||
| + final var buffer = new StringBuilder( STRLEN ); | ||
| for( var i = 0; i < STRLEN; i++ ) { | ||
| - sb.append( CHARSET.charAt( (int) (len * random()) ) ); | ||
| + buffer.append( CHARSET.charAt( (int) (len * random()) ) ); | ||
| } | ||
| - return sb.toString(); | ||
| + sText = buffer.toString(); | ||
| + } | ||
| + | ||
| + private static String getText() { | ||
| + return sText; | ||
| } | ||
| @Benchmark | ||
| + public void test_FastCharacterIterator() { | ||
| + final var s = getText(); | ||
| + final var i = new FastCharacterIterator( s ); | ||
| + var spaces = 0; | ||
| + | ||
| + char ch = ' '; | ||
| + | ||
| + while( (ch = i.advance()) != DONE ) { | ||
| + if( ch == ' ' ) { | ||
| + spaces++; | ||
| + } | ||
| + } | ||
| + | ||
| + fail( i.index(), s.length() ); | ||
| + } | ||
| + | ||
| + //@Benchmark | ||
| public void test_CharAtIterator() { | ||
| - final var s = generateText(); | ||
| + final var s = getText(); | ||
| final var length = s.length(); | ||
| var index = 0; | ||
| + var spaces = 0; | ||
| while( index < length ) { | ||
| final var ch = s.charAt( index ); | ||
| - index++; | ||
| - } | ||
| - | ||
| - assert index == length; | ||
| - } | ||
| - | ||
| - @Benchmark | ||
| - public void test_FastCharacterIterator() { | ||
| - final var s = generateText(); | ||
| - final var i = new FastCharacterIterator( s ); | ||
| - char c = ' '; | ||
| + if( ch == ' ' ) { | ||
| + spaces++; | ||
| + } | ||
| - while( c != DONE ) { | ||
| - i.next(); | ||
| - c = i.current(); | ||
| + index++; | ||
| } | ||
| - assert i.index() == STRLEN; | ||
| + fail( index, length ); | ||
| } | ||
| - @Benchmark | ||
| + //@Benchmark | ||
| public void test_StringCharacterIterator() { | ||
| - final var s = generateText(); | ||
| + final var s = getText(); | ||
| final var i = new StringCharacterIterator( s ); | ||
| var index = 0; | ||
| + var spaces = 0; | ||
| - char c = ' '; | ||
| + char ch = ' '; | ||
| - while( c != DONE ) { | ||
| - c = i.next(); | ||
| + while( ch != DONE ) { | ||
| + ch = i.next(); | ||
| + | ||
| + if( ch == ' ' ) { | ||
| + spaces++; | ||
| + } | ||
| + | ||
| index++; | ||
| } | ||
| - assert index == STRLEN; | ||
| + fail( index, s.length() ); | ||
| } | ||
| - @Benchmark | ||
| + //@Benchmark | ||
| public void test_CharArrayIterator() { | ||
| - final var s = generateText(); | ||
| + final var s = getText(); | ||
| final var i = s.toCharArray(); | ||
| var index = 0; | ||
| + var spaces = 0; | ||
| for( final var ch : i ) { | ||
| + if( ch == ' ' ) { | ||
| + spaces++; | ||
| + } | ||
| + | ||
| index++; | ||
| } | ||
| - assert index == STRLEN; | ||
| + fail( index, s.length() ); | ||
| } | ||
| - @Benchmark | ||
| + //@Benchmark | ||
| public void test_StringTokenizer() { | ||
| - final var s = generateText(); | ||
| + final var s = getText(); | ||
| final var i = new StringTokenizer( s, " ", true ); | ||
| var index = 0; | ||
| + var spaces = 0; | ||
| while( i.hasMoreTokens() ) { | ||
| final var token = i.nextToken(); | ||
| + | ||
| + if( token.isBlank() ) { | ||
| + spaces++; | ||
| + } | ||
| + | ||
| index += token.length(); | ||
| } | ||
| - assert index == STRLEN; | ||
| + fail( index, s.length() ); | ||
| } | ||
| - @Benchmark | ||
| + //@Benchmark | ||
| public void test_StreamIterator() { | ||
| - final var s = generateText(); | ||
| + final var s = getText(); | ||
| final var index = new AtomicInteger(); | ||
| + final var spaces = new AtomicInteger(); | ||
| s.chars().forEach( codepoint -> { | ||
| final var ch = Character.valueOf( (char) codepoint ); | ||
| + | ||
| + if( ch == ' ' ) { | ||
| + spaces.incrementAndGet(); | ||
| + } | ||
| + | ||
| index.incrementAndGet(); | ||
| } ); | ||
| - assert index.get() == STRLEN; | ||
| + fail( index.get(), s.length() ); | ||
| + } | ||
| + | ||
| + private static void fail( final int index, final int length ) { | ||
| + if( index != length ) { | ||
| + throw new RuntimeException( "Fail" ); | ||
| + } | ||
| } | ||
| } |
| } | ||
| else if( curr == '\\' ) { | ||
| - i.next(); | ||
| - final var next = i.current(); | ||
| + final var next = i.advance(); | ||
| if( next == '\'' ) { | ||
| private static boolean isDigit( final char curr ) { | ||
| return Character.isDigit( curr ) || | ||
| - "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) > -1; | ||
| + "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) >= 0; | ||
| } | ||
| */ | ||
| private static boolean isNumeric( final char curr ) { | ||
| - return | ||
| - curr == '.' || curr == ',' || curr == '-' || curr == '+' || | ||
| - curr == '^' || curr == '⅟' || curr == '⁄'; | ||
| + return curr == '.' || curr == ',' || curr == '-' || curr == '+' || | ||
| + curr == '^' || curr == '⅟' || curr == '⁄'; | ||
| } | ||
| } | ||
| /** | ||
| * Iterates over a string, much like {@link CharacterIterator}, but faster. | ||
| - * This class gets 53 ops/s vs. 49 ops/s for {@link StringCharacterIterator}. | ||
| - * In comparison, using unconstrained {@link String#charAt(int)} calls yields | ||
| - * 57 ops/s. | ||
| + * Achieves 1128.230 ops/s vs. 49 ops/s for {@link StringCharacterIterator}. | ||
| * <p> | ||
| * <strong>Caution:</strong> This class offers minimal bounds checking to eke | ||
| */ | ||
| public FastCharacterIterator( final String s ) { | ||
| + assert s != null; | ||
| + | ||
| mS = s; | ||
| mLen = s.length(); | ||
| /** | ||
| * Returns the character at the currently iterated position in the string. | ||
| - * This method performs bounds checking. | ||
| + * This method performs bounds checking by catching an exception because | ||
| + * usually parsing is complete when there are no more characters to iterate, | ||
| + * meaning that 99.99% of the time, explicit bounds checking is superfluous. | ||
| * | ||
| * @return {@link CharacterIterator#DONE} if there are no more characters. | ||
| */ | ||
| public char current() { | ||
| - final var pos = mPos; | ||
| - return pos < mLen ? mS.charAt( pos ) : DONE; | ||
| + try { | ||
| + return mS.charAt( mPos ); | ||
| + } catch( final Exception ex ) { | ||
| + return DONE; | ||
| + } | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns the next character in the string and consumes it. | ||
| + * | ||
| + * @return {@link CharacterIterator#DONE} if there are no more characters. | ||
| + */ | ||
| + public char advance() { | ||
| + try { | ||
| + return mS.charAt( ++mPos ); | ||
| + } catch( final Exception ex ) { | ||
| + return DONE; | ||
| + } | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns the next character in the string without consuming it. Multiple | ||
| + * consecutive calls to this method will return the same value. | ||
| + * | ||
| + * @return {@link CharacterIterator#DONE} if there are no more characters. | ||
| + */ | ||
| + public char peek() { | ||
| + try { | ||
| + return mS.charAt( mPos + 1 ); | ||
| + } catch( final Exception ex ) { | ||
| + return DONE; | ||
| + } | ||
| } | ||
| */ | ||
| public void prev() { | ||
| - --mPos; | ||
| - } | ||
| - | ||
| - /** | ||
| - * Returns the next character in the string without consuming it. Multiple | ||
| - * consecutive calls to this method will return the same value. This method | ||
| - * performs bounds checking. | ||
| - * | ||
| - * @return {@link CharacterIterator#DONE} if there are no more characters. | ||
| - */ | ||
| - public char peek() { | ||
| - final var pos = mPos; | ||
| - return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE; | ||
| + mPos--; | ||
| } | ||
| * @param f The function that determines when skipping stops. | ||
| */ | ||
| + @SuppressWarnings( "StatementWithEmptyBody" ) | ||
| public void skip( final Function<Character, Boolean> f ) { | ||
| assert f != null; | ||
| - do { | ||
| - next(); | ||
| - } | ||
| - while( f.apply( current() ) ); | ||
| + while( f.apply( advance() ) ) ; | ||
| // The loop always overshoots by one character. | ||
| Delta | 125 lines added, 66 lines removed, 59-line increase |
|---|