Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git
M README.md
1
![Total Downloads](https://img.shields.io/github/downloads/DaveJarvis/keenquotes/total?color=blue&label=Total%20Downloads&style=flat) ![Release Downloads](https://img.shields.io/github/downloads/DaveJarvis/keenquotes/latest/total?color=purple&label=Release%20Downloads&style=flat) ![Release Date](https://img.shields.io/github/release-date/DaveJarvis/keenquotes?color=red&style=flat&label=Release%20Date) ![Release Version](https://img.shields.io/github/v/release/DaveJarvis/keenquotes?style=flat&label=Release)
2
1 3
# KeenQuotes
2 4
M build.gradle
30 30
compileJava {
31 31
  options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation"
32
  options.compilerArgs.addAll(['--release', '17'])
32
  options.compilerArgs.addAll(['--release', '19'])
33 33
}
34 34
M src/jmh/java/com/whitemagicsoftware/keenquotes/StringIterationBenchmark.java
12 12
import static java.text.CharacterIterator.DONE;
13 13
14
/**
15
 * Higher scores mean faster code:
16
 *
17
 * <pre>
18
 * Benchmark                     Mode   Cnt    Score    Error  Units
19
 * test_CharArrayIterator        thrpt   25  753.960 ±  0.972  ops/s
20
 * test_CharAtIterator           thrpt   25  878.016 ±  0.884  ops/s
21
 * test_FastCharacterIterator    thrpt   25  803.041 ± 48.422  ops/s
22
 * test_StreamIterator           thrpt   25  101.416 ±  0.053  ops/s
23
 * test_StringCharacterIterator  thrpt   25  580.341 ±  0.432  ops/s
24
 * test_StringTokenizer          thrpt   25  174.121 ±  8.282  ops/s
25
 * </pre>
26
 */
14 27
@SuppressWarnings( "unused" )
15 28
public class StringIterationBenchmark {
29
  private static final int STRLEN = 1024 * 1024;
16 30
  private static final String CHARSET =
17 31
    "ABCDEFGHIJKLM NOPQRSTUVWXYZ abcdefghijklm nopqrstuvxyz 01234 5 6789";
18
  public static final int STRLEN = 1024 * 1024;
19 32
20
  private static String generateText() {
21
    final var sb = new StringBuilder( STRLEN );
33
  private static final String sText;
34
35
  static {
22 36
    final var len = CHARSET.length();
37
    final var buffer = new StringBuilder( STRLEN );
23 38
24 39
    for( var i = 0; i < STRLEN; i++ ) {
25
      sb.append( CHARSET.charAt( (int) (len * random()) ) );
40
      buffer.append( CHARSET.charAt( (int) (len * random()) ) );
26 41
    }
27 42
28
    return sb.toString();
43
    sText = buffer.toString();
44
  }
45
46
  private static String getText() {
47
    return sText;
29 48
  }
30 49
31 50
  @Benchmark
32
  public void test_CharAtIterator() {
33
    final var s = generateText();
34
    final var length = s.length();
35
    var index = 0;
51
  public void test_FastCharacterIterator() {
52
    final var s = getText();
53
    final var i = new FastCharacterIterator( s );
54
    var spaces = 0;
36 55
37
    while( index < length ) {
38
      final var ch = s.charAt( index );
39
      index++;
56
    char ch = ' ';
57
58
    while( (ch = i.advance()) != DONE ) {
59
      if( ch == ' ' ) {
60
        spaces++;
61
      }
40 62
    }
41 63
42
    assert index == length;
64
    fail( i.index(), s.length() );
43 65
  }
44 66
45 67
  @Benchmark
46
  public void test_FastCharacterIterator() {
47
    final var s = generateText();
48
    final var i = new FastCharacterIterator( s );
68
  public void test_CharAtIterator() {
69
    final var s = getText();
70
    final var length = s.length();
71
    var index = 0;
72
    var spaces = 0;
49 73
50
    char c = ' ';
74
    while( index < length ) {
75
      final var ch = s.charAt( index );
51 76
52
    while( c != DONE ) {
53
      i.next();
54
      c = i.current();
77
      if( ch == ' ' ) {
78
        spaces++;
79
      }
80
81
      index++;
55 82
    }
56 83
57
    assert i.index() == STRLEN;
84
    fail( index, length );
58 85
  }
59 86
60 87
  @Benchmark
61 88
  public void test_StringCharacterIterator() {
62
    final var s = generateText();
89
    final var s = getText();
63 90
    final var i = new StringCharacterIterator( s );
64 91
    var index = 0;
92
    var spaces = 0;
65 93
66
    char c = ' ';
94
    char ch = ' ';
67 95
68
    while( c != DONE ) {
69
      c = i.next();
96
    while( ch != DONE ) {
97
      ch = i.next();
98
99
      if( ch == ' ' ) {
100
        spaces++;
101
      }
102
70 103
      index++;
71 104
    }
72 105
73
    assert index == STRLEN;
106
    fail( index, s.length() );
74 107
  }
75 108
76 109
  @Benchmark
77 110
  public void test_CharArrayIterator() {
78
    final var s = generateText();
111
    final var s = getText();
79 112
    final var i = s.toCharArray();
80 113
    var index = 0;
114
    var spaces = 0;
81 115
82 116
    for( final var ch : i ) {
117
      if( ch == ' ' ) {
118
        spaces++;
119
      }
120
83 121
      index++;
84 122
    }
85 123
86
    assert index == STRLEN;
124
    fail( index, s.length() );
87 125
  }
88 126
89 127
  @Benchmark
90 128
  public void test_StringTokenizer() {
91
    final var s = generateText();
129
    final var s = getText();
92 130
    final var i = new StringTokenizer( s, " ", true );
93 131
    var index = 0;
132
    var spaces = 0;
94 133
95 134
    while( i.hasMoreTokens() ) {
96 135
      final var token = i.nextToken();
136
137
      if( token.isBlank() ) {
138
        spaces++;
139
      }
140
97 141
      index += token.length();
98 142
    }
99 143
100
    assert index == STRLEN;
144
    fail( index, s.length() );
101 145
  }
102 146
103 147
  @Benchmark
104 148
  public void test_StreamIterator() {
105
    final var s = generateText();
149
    final var s = getText();
106 150
    final var index = new AtomicInteger();
151
    final var spaces = new AtomicInteger();
107 152
108 153
    s.chars().forEach( codepoint -> {
109 154
      final var ch = Character.valueOf( (char) codepoint );
155
156
      if( ch == ' ' ) {
157
        spaces.incrementAndGet();
158
      }
159
110 160
      index.incrementAndGet();
111 161
    } );
112 162
113
    assert index.get() == STRLEN;
163
    fail( index.get(), s.length() );
164
  }
165
166
  private static void fail( final int index, final int length ) {
167
    if( index != length ) {
168
      throw new RuntimeException( "Fail" );
169
    }
114 170
  }
115 171
}
M src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexer.java
128 128
      }
129 129
      else if( curr == '\\' ) {
130
        i.next();
131
        final var next = i.current();
130
        final var next = i.advance();
132 131
133 132
        if( next == '\'' ) {
...
198 197
  private static boolean isDigit( final char curr ) {
199 198
    return Character.isDigit( curr ) ||
200
      "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) > -1;
199
      "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) >= 0;
201 200
  }
202 201
...
223 222
   */
224 223
  private static boolean isNumeric( final char curr ) {
225
    return
226
      curr == '.' || curr == ',' || curr == '-' || curr == '+' ||
227
        curr == '^' || curr == '⅟' || curr == '⁄';
224
    return curr == '.' || curr == ',' || curr == '-' || curr == '+' ||
225
      curr == '^' || curr == '⅟' || curr == '⁄';
228 226
  }
229 227
}
M src/main/java/com/whitemagicsoftware/keenquotes/util/FastCharacterIterator.java
3 3
4 4
import java.text.CharacterIterator;
5
import java.text.StringCharacterIterator;
6 5
import java.util.function.Function;
7 6
8 7
import static java.text.CharacterIterator.DONE;
9 8
10 9
/**
11 10
 * Iterates over a string, much like {@link CharacterIterator}, but faster.
12
 * This class gets 53 ops/s vs. 49 ops/s for {@link StringCharacterIterator}.
13
 * In comparison, using unconstrained {@link String#charAt(int)} calls yields
14
 * 57 ops/s.
15 11
 * <p>
16 12
 * <strong>Caution:</strong> This class offers minimal bounds checking to eke
...
35 31
   */
36 32
  public FastCharacterIterator( final String s ) {
33
    assert s != null;
34
37 35
    mS = s;
38 36
    mLen = s.length();
...
51 49
  /**
52 50
   * Returns the character at the currently iterated position in the string.
53
   * This method performs bounds checking.
51
   * This method performs bounds checking by catching an exception because
52
   * usually parsing is complete when there are no more characters to iterate,
53
   * meaning that 99.99% of the time, explicit bounds checking is superfluous.
54 54
   *
55 55
   * @return {@link CharacterIterator#DONE} if there are no more characters.
56 56
   */
57 57
  public char current() {
58
    final var pos = mPos;
59
    return pos < mLen ? mS.charAt( pos ) : DONE;
58
    try {
59
      return mS.charAt( mPos );
60
    } catch( final Exception ex ) {
61
      return DONE;
62
    }
63
  }
64
65
  /**
66
   * Returns the next character in the string and consumes it.
67
   *
68
   * @return {@link CharacterIterator#DONE} if there are no more characters.
69
   */
70
  public char advance() {
71
    try {
72
      return mS.charAt( ++mPos );
73
    } catch( final Exception ex ) {
74
      return DONE;
75
    }
76
  }
77
78
  /**
79
   * Returns the next character in the string without consuming it. Multiple
80
   * consecutive calls to this method will return the same value.
81
   *
82
   * @return {@link CharacterIterator#DONE} if there are no more characters.
83
   */
84
  public char peek() {
85
    try {
86
      return mS.charAt( mPos + 1 );
87
    } catch( final Exception ex ) {
88
      return DONE;
89
    }
60 90
  }
61 91
...
71 101
   */
72 102
  public void prev() {
73
    --mPos;
74
  }
75
76
  /**
77
   * Returns the next character in the string without consuming it. Multiple
78
   * consecutive calls to this method will return the same value. This method
79
   * performs bounds checking.
80
   *
81
   * @return {@link CharacterIterator#DONE} if there are no more characters.
82
   */
83
  public char peek() {
84
    final var pos = mPos;
85
    return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE;
103
    mPos--;
86 104
  }
87 105
...
100 118
   * @param f The function that determines when skipping stops.
101 119
   */
120
  @SuppressWarnings( "StatementWithEmptyBody" )
102 121
  public void skip( final Function<Character, Boolean> f ) {
103 122
    assert f != null;
104 123
105
    do {
106
      next();
107
    }
108
    while( f.apply( current() ) );
124
    while( f.apply( advance() ) ) ;
109 125
110 126
    // The loop always overshoots by one character.