Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Boost fast character iterator performance, add improved benchmarks

AuthorDave Jarvis <email>
Date2022-11-01 00:05:15 GMT-0700
Commitbcf0faa78a93ef16d884c1064167c9cd602d7fc8
Parentb22e816
build.gradle
compileJava {
options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation"
- options.compilerArgs.addAll(['--release', '17'])
+ options.compilerArgs.addAll(['--release', '19'])
}
src/jmh/java/com/whitemagicsoftware/keenquotes/StringIterationBenchmark.java
@SuppressWarnings( "unused" )
public class StringIterationBenchmark {
+ private static final int STRLEN = 1024 * 1024;
private static final String CHARSET =
"ABCDEFGHIJKLM NOPQRSTUVWXYZ abcdefghijklm nopqrstuvxyz 01234 5 6789";
- public static final int STRLEN = 1024 * 1024;
- private static String generateText() {
- final var sb = new StringBuilder( STRLEN );
+ private static final String sText;
+
+ static {
final var len = CHARSET.length();
+ final var buffer = new StringBuilder( STRLEN );
for( var i = 0; i < STRLEN; i++ ) {
- sb.append( CHARSET.charAt( (int) (len * random()) ) );
+ buffer.append( CHARSET.charAt( (int) (len * random()) ) );
}
- return sb.toString();
+ sText = buffer.toString();
+ }
+
+ private static String getText() {
+ return sText;
}
@Benchmark
+ public void test_FastCharacterIterator() {
+ final var s = getText();
+ final var i = new FastCharacterIterator( s );
+ var spaces = 0;
+
+ char ch = ' ';
+
+ while( (ch = i.advance()) != DONE ) {
+ if( ch == ' ' ) {
+ spaces++;
+ }
+ }
+
+ fail( i.index(), s.length() );
+ }
+
+ //@Benchmark
public void test_CharAtIterator() {
- final var s = generateText();
+ final var s = getText();
final var length = s.length();
var index = 0;
+ var spaces = 0;
while( index < length ) {
final var ch = s.charAt( index );
- index++;
- }
-
- assert index == length;
- }
-
- @Benchmark
- public void test_FastCharacterIterator() {
- final var s = generateText();
- final var i = new FastCharacterIterator( s );
- char c = ' ';
+ if( ch == ' ' ) {
+ spaces++;
+ }
- while( c != DONE ) {
- i.next();
- c = i.current();
+ index++;
}
- assert i.index() == STRLEN;
+ fail( index, length );
}
- @Benchmark
+ //@Benchmark
public void test_StringCharacterIterator() {
- final var s = generateText();
+ final var s = getText();
final var i = new StringCharacterIterator( s );
var index = 0;
+ var spaces = 0;
- char c = ' ';
+ char ch = ' ';
- while( c != DONE ) {
- c = i.next();
+ while( ch != DONE ) {
+ ch = i.next();
+
+ if( ch == ' ' ) {
+ spaces++;
+ }
+
index++;
}
- assert index == STRLEN;
+ fail( index, s.length() );
}
- @Benchmark
+ //@Benchmark
public void test_CharArrayIterator() {
- final var s = generateText();
+ final var s = getText();
final var i = s.toCharArray();
var index = 0;
+ var spaces = 0;
for( final var ch : i ) {
+ if( ch == ' ' ) {
+ spaces++;
+ }
+
index++;
}
- assert index == STRLEN;
+ fail( index, s.length() );
}
- @Benchmark
+ //@Benchmark
public void test_StringTokenizer() {
- final var s = generateText();
+ final var s = getText();
final var i = new StringTokenizer( s, " ", true );
var index = 0;
+ var spaces = 0;
while( i.hasMoreTokens() ) {
final var token = i.nextToken();
+
+ if( token.isBlank() ) {
+ spaces++;
+ }
+
index += token.length();
}
- assert index == STRLEN;
+ fail( index, s.length() );
}
- @Benchmark
+ //@Benchmark
public void test_StreamIterator() {
- final var s = generateText();
+ final var s = getText();
final var index = new AtomicInteger();
+ final var spaces = new AtomicInteger();
s.chars().forEach( codepoint -> {
final var ch = Character.valueOf( (char) codepoint );
+
+ if( ch == ' ' ) {
+ spaces.incrementAndGet();
+ }
+
index.incrementAndGet();
} );
- assert index.get() == STRLEN;
+ fail( index.get(), s.length() );
+ }
+
+ private static void fail( final int index, final int length ) {
+ if( index != length ) {
+ throw new RuntimeException( "Fail" );
+ }
}
}
src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexer.java
}
else if( curr == '\\' ) {
- i.next();
- final var next = i.current();
+ final var next = i.advance();
if( next == '\'' ) {
private static boolean isDigit( final char curr ) {
return Character.isDigit( curr ) ||
- "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) > -1;
+ "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) >= 0;
}
*/
private static boolean isNumeric( final char curr ) {
- return
- curr == '.' || curr == ',' || curr == '-' || curr == '+' ||
- curr == '^' || curr == '⅟' || curr == '⁄';
+ return curr == '.' || curr == ',' || curr == '-' || curr == '+' ||
+ curr == '^' || curr == '⅟' || curr == '⁄';
}
}
src/main/java/com/whitemagicsoftware/keenquotes/util/FastCharacterIterator.java
/**
* Iterates over a string, much like {@link CharacterIterator}, but faster.
- * This class gets 53 ops/s vs. 49 ops/s for {@link StringCharacterIterator}.
- * In comparison, using unconstrained {@link String#charAt(int)} calls yields
- * 57 ops/s.
+ * Achieves 1128.230 ops/s vs. 49 ops/s for {@link StringCharacterIterator}.
* <p>
* <strong>Caution:</strong> This class offers minimal bounds checking to eke
*/
public FastCharacterIterator( final String s ) {
+ assert s != null;
+
mS = s;
mLen = s.length();
/**
* Returns the character at the currently iterated position in the string.
- * This method performs bounds checking.
+ * This method performs bounds checking by catching an exception because
+ * usually parsing is complete when there are no more characters to iterate,
+ * meaning that 99.99% of the time, explicit bounds checking is superfluous.
*
* @return {@link CharacterIterator#DONE} if there are no more characters.
*/
public char current() {
- final var pos = mPos;
- return pos < mLen ? mS.charAt( pos ) : DONE;
+ try {
+ return mS.charAt( mPos );
+ } catch( final Exception ex ) {
+ return DONE;
+ }
+ }
+
+ /**
+ * Returns the next character in the string and consumes it.
+ *
+ * @return {@link CharacterIterator#DONE} if there are no more characters.
+ */
+ public char advance() {
+ try {
+ return mS.charAt( ++mPos );
+ } catch( final Exception ex ) {
+ return DONE;
+ }
+ }
+
+ /**
+ * Returns the next character in the string without consuming it. Multiple
+ * consecutive calls to this method will return the same value.
+ *
+ * @return {@link CharacterIterator#DONE} if there are no more characters.
+ */
+ public char peek() {
+ try {
+ return mS.charAt( mPos + 1 );
+ } catch( final Exception ex ) {
+ return DONE;
+ }
}
*/
public void prev() {
- --mPos;
- }
-
- /**
- * Returns the next character in the string without consuming it. Multiple
- * consecutive calls to this method will return the same value. This method
- * performs bounds checking.
- *
- * @return {@link CharacterIterator#DONE} if there are no more characters.
- */
- public char peek() {
- final var pos = mPos;
- return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE;
+ mPos--;
}
* @param f The function that determines when skipping stops.
*/
+ @SuppressWarnings( "StatementWithEmptyBody" )
public void skip( final Function<Character, Boolean> f ) {
assert f != null;
- do {
- next();
- }
- while( f.apply( current() ) );
+ while( f.apply( advance() ) ) ;
// The loop always overshoots by one character.
Delta125 lines added, 66 lines removed, 59-line increase