Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git
M .gitignore
5 5
build
6 6
7
tokens
7 8
M BUILD.md
21 21
Find the application at:
22 22
23
    build/libs/keenquotes.jar
23
    build/lib/keenquotes.jar
24 24
25 25
## Library
26 26
27 27
To build a library for use with other software applications, run:
28 28
29 29
    gradle clean lib
30 30
31 31
Find the library at:
32 32
33
    build/libs/keenquotes.jar
33
    build/lib/keenquotes.jar
34 34
35 35
M README.md
17 17
Download the application:
18 18
19
* [keenquotes.jar](https://github.com/DaveJarvis/keenquotes/releases/latest/download/keenquotes.jar)
19
* [keenquotes.jar](https://gitlab.com/DaveJarvis/KeenQuotes/-/releases/permalink/latest/downloads/keenquotes.jar)
20 20
21 21
# Run
A release.sh
1
#!/usr/bin/env bash
2
3
RELEASE=$(git describe --abbrev=0 --tags)
4
5
cat tokens/keenquotes.pat | glab auth login --hostname gitlab.com --stdin
6
7
glab release upload ${RELEASE} build/libs/keenquotes.jar
8
1 9
M src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java
22 22
 * respect to curling. Each {@link Lexeme} is marked as such.
23 23
 */
24
@SuppressWarnings( "SameParameterValue" )
25
public final class QuoteEmitter implements Consumer<Lexeme> {
26
  private static final LexemeType[] WORD_PERIOD_NUMBER = {
27
    WORD, PERIOD, NUMBER
28
  };
29
30
  private static final LexemeType[] PUNCT_PERIOD_ELLIPSIS_DASH = {
31
    PUNCT, PERIOD, ELLIPSIS, DASH
32
  };
33
34
  private static final LexemeType[] PUNCT_PERIOD = {
35
    PUNCT, PERIOD
36
  };
37
38
  private static final LexemeType[] SPACE_DASH_ENDING = {
39
    SPACE, DASH, ENDING
40
  };
41
42
  private static final LexemeType[] SPACE_ENDING = {
43
    SPACE, ENDING
44
  };
45
46
  private static final LexemeType[] SPACE_HYPHEN = {
47
    SPACE, HYPHEN
48
  };
49
50
  private static final LexemeType[] SPACE_PUNCT = {
51
    SPACE, PUNCT
52
  };
53
54
  private static final LexemeType[] SPACE_SOT = {
55
    SPACE, SOT
56
  };
57
58
  /**
59
   * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
60
   */
61
  private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
62
    new LexemeType[]{
63
      LexemeType.SOT, SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP
64
    };
65
66
  /**
67
   * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
68
   */
69
  private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
70
    new LexemeType[]{
71
      WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE
72
    };
73
74
  /**
75
   * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
76
   */
77
  private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
78
    new LexemeType[]{
79
      WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE
80
    };
81
82
  /**
83
   * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
84
   */
85
  private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
86
    new LexemeType[]{
87
      SPACE, HYPHEN, DASH, PUNCT, PERIOD, ELLIPSIS, QUOTE_DOUBLE, CLOSING_GROUP,
88
      ENDING
89
    };
90
91
  /**
92
   * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
93
   */
94
  private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
95
    new LexemeType[]{
96
      LexemeType.SOT, SPACE, DASH, EQUALS, OPENING_GROUP, EOL, EOP
97
    };
98
99
  /**
100
   * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
101
   */
102
  private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
103
    new LexemeType[]{
104
      WORD, PUNCT, NUMBER, DASH, ELLIPSIS, OPENING_GROUP, QUOTE_SINGLE,
105
      QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE
106
    };
107
108
  /**
109
   * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
110
   */
111
  private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
112
    new LexemeType[]{
113
      WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP, QUOTE_SINGLE,
114
      QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING
115
    };
116
117
  /**
118
   * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
119
   */
120
  private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
121
    new LexemeType[]{
122
      SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH, QUOTE_SINGLE, CLOSING_GROUP,
123
      ENDING
124
    };
125
126
  private final CircularFifoQueue<Lexeme> mQ = new CircularFifoQueue<>( 4 );
127
  private final String mText;
128
  private final Contractions mContractions;
129
  private final Consumer<Token> mConsumer;
130
131
  public QuoteEmitter(
132
    final String text,
133
    final Contractions contractions,
134
    final Consumer<Token> consumer
135
  ) {
136
    assert text != null;
137
    assert contractions != null;
138
139
    mText = text;
140
    mContractions = contractions;
141
    mConsumer = consumer;
142
  }
143
144
  /**
145
   * Scans the given text document for quotation marks and passes them to the
146
   * given {@link Token} {@link Consumer}.
147
   *
148
   * @param text         The prose to lex.
149
   * @param contractions List of ambiguous and unambiguous contractions.
150
   * @param consumer     Receives
151
   */
152
  public static void analyze(
153
    final String text,
154
    final Contractions contractions,
155
    final Consumer<Token> consumer,
156
    final LexerFilter filter
157
  ) {
158
    final var emitter = new QuoteEmitter( text, contractions, consumer );
159
    Lexer.lex( text, emitter, filter );
160
  }
161
162
  /**
163
   * @param lexeme the input argument
164
   */
165
  @Override
166
  public void accept( final Lexeme lexeme ) {
167
    mQ.add( lexeme );
168
169
    if( mQ.size() == 4 ) {
170
      parse();
171
    }
172
  }
173
174
  private void parse() {
175
    final var lex1 = mQ.get( 0 );
176
    final var lex2 = mQ.get( 1 );
177
    final var lex3 = mQ.get( 2 );
178
    final var lex4 = mQ.get( 3 );
179
180
    // <y'all>, <Ph.D.'ll>, <20's>, <she's>
181
    if( match( WORD_PERIOD_NUMBER, QUOTE_SINGLE, WORD, ANY ) ) {
182
      emit( QUOTE_APOSTROPHE, lex2 );
183
    }
184
    // <'n'>, <'N'>, <'owlin'>
185
    else if(
186
      match( ANY, QUOTE_SINGLE, WORD, QUOTE_SINGLE ) &&
187
        mContractions.beganEndedUnambiguously( lex3.toString( mText ) )
188
    ) {
189
      emit( QUOTE_APOSTROPHE, lex2 );
190
      emit( QUOTE_APOSTROPHE, lex4 );
191
      mQ.set( Lexeme.NONE, 3 );
192
    }
193
    // <2''>
194
    else if( match( NUMBER, QUOTE_SINGLE, QUOTE_SINGLE, ANY ) ) {
195
      // Force double primes to conform to the same constructor usage. This
196
      // simplifies the tokens, reduces some memory usage,
197
      final var lex = new Lexeme( PRIME_DOUBLE, lex2.began(), lex3.ended() );
198
199
      emit( QUOTE_PRIME_DOUBLE, lex );
200
      mQ.set( Lexeme.NONE, 2 );
201
    }
202
    // <2'>
203
    else if( match( NUMBER, QUOTE_SINGLE, ANY, ANY ) ) {
204
      emit( QUOTE_PRIME_SINGLE, lex2 );
205
    }
206
    // <2">
207
    else if( match( NUMBER, QUOTE_DOUBLE, ANY, ANY ) ) {
208
      emit( QUOTE_PRIME_DOUBLE, lex2 );
209
    }
210
    // <thinkin'>
211
    else if(
212
      match( WORD, QUOTE_SINGLE, ANY, ANY ) &&
213
        mContractions.endedUnambiguously( lex1.toString( mText ) )
214
    ) {
215
      emit( QUOTE_APOSTROPHE, lex2 );
216
    }
217
    // <'02>
218
    else if( match( ANY, QUOTE_SINGLE, NUMBER, SPACE_PUNCT ) ) {
219
      emit( QUOTE_APOSTROPHE, lex2 );
220
    }
221
    // <'20s>
222
    else if(
223
      match( ANY, QUOTE_SINGLE, NUMBER, WORD ) &&
224
        "s".equalsIgnoreCase( lex4.toString( mText ) )
225
    ) {
226
      emit( QUOTE_APOSTROPHE, lex2 );
227
    }
228
    // <.'\n>
229
    else if( match( PUNCT_PERIOD_ELLIPSIS_DASH, QUOTE_SINGLE, ENDING, ANY ) ) {
230
      emit( QUOTE_CLOSING_SINGLE, lex2 );
231
    }
232
    // <\'>
233
    else if( match( ESC_SINGLE, ANY, ANY, ANY ) ) {
234
      emit( QUOTE_STRAIGHT_SINGLE, lex1 );
235
    }
236
    // <\">
237
    else if( match( ESC_DOUBLE, ANY, ANY, ANY ) ) {
238
      emit( QUOTE_STRAIGHT_DOUBLE, lex1 );
239
240
      // <\"'--->
241
      if( match( ESC_DOUBLE, QUOTE_SINGLE, SPACE_DASH_ENDING, ANY ) ) {
242
        emit( QUOTE_CLOSING_SINGLE, lex2 );
243
      }
244
    }
245
    // <---'" >
246
    else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, SPACE_ENDING ) ) {
247
      emit( QUOTE_CLOSING_SINGLE, lex2 );
248
    }
249
    // <o’-lantern>, <o' fellow>, <O'-the>
250
    else if(
251
      match( WORD, QUOTE_SINGLE, SPACE_HYPHEN, WORD ) &&
252
        "o".equalsIgnoreCase( lex1.toString( mText ) )
253
    ) {
254
      emit( QUOTE_APOSTROPHE, lex2 );
255
    }
256
    // <"">, <"...>, <"word>, <---"word>
257
    else if(
258
      match(
259
        LEADING_QUOTE_OPENING_DOUBLE, QUOTE_DOUBLE,
260
        LAGGING_QUOTE_OPENING_DOUBLE, ANY
261
      )
262
    ) {
263
      emit( QUOTE_OPENING_DOUBLE, lex2 );
264
    }
265
    // <..."'>, <word"'>, <?"'>, <word"?>
266
    else if(
267
      match(
268
        LEADING_QUOTE_CLOSING_DOUBLE, QUOTE_DOUBLE,
269
        LAGGING_QUOTE_CLOSING_DOUBLE, ANY
270
      )
271
    ) {
272
      emit( QUOTE_CLOSING_DOUBLE, lex2 );
273
    }
274
    // < ''E>
275
    else if( match( SPACE_SOT, QUOTE_SINGLE, QUOTE_SINGLE, WORD ) ) {
276
      // Consume both immediately to avoid the false ambiguity <'e>.
277
      emit( QUOTE_OPENING_SINGLE, lex2 );
278
      emit( QUOTE_APOSTROPHE, lex3 );
279
      mQ.set( Lexeme.NONE, 1 );
280
      mQ.set( Lexeme.NONE, 2 );
281
    }
282
    // <'...>, <'word>, <---'word>, < 'nation>
283
    else if(
284
      match(
285
        LEADING_QUOTE_OPENING_SINGLE, QUOTE_SINGLE,
286
        LAGGING_QUOTE_OPENING_SINGLE, ANY )
287
    ) {
288
      final var word = lex3.toString( mText );
289
290
      if( mContractions.beganAmbiguously( word ) ) {
291
        emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
292
      }
293
      else if( mContractions.beganUnambiguously( word ) ) {
294
        emit( QUOTE_APOSTROPHE, lex2 );
295
      }
296
      // <"'"nested>
297
      else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, WORD ) ) {
298
        emit( QUOTE_OPENING_SINGLE, lex2 );
299
      }
300
      // <"'" >
301
      else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
302
        emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
303
      }
304
      // < '" >
305
      else if( match( ANY, QUOTE_SINGLE, LAGGING_QUOTE_OPENING_SINGLE, ANY ) ) {
306
        emit( QUOTE_OPENING_SINGLE, lex2 );
307
      }
308
      // Ambiguous
309
      else {
310
        emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
311
      }
312
    }
313
    // <word'">, <...'--->, <"' >
314
    else if(
315
      match(
316
        LEADING_QUOTE_CLOSING_SINGLE, QUOTE_SINGLE,
317
        LAGGING_QUOTE_CLOSING_SINGLE, ANY
318
      )
319
    ) {
320
      final var word = lex1.toString( mText );
321
322
      if( mContractions.endedAmbiguously( word ) ) {
323
        emit( QUOTE_AMBIGUOUS_LAGGING, lex2 );
324
      }
325
      else {
326
        emit( QUOTE_CLOSING_SINGLE, lex2 );
327
      }
328
    }
329
    // <word';> (contraction inferred by previous matches)
330
    else if( match( WORD, QUOTE_SINGLE, PUNCT_PERIOD, ANY ) ) {
331
      emit( QUOTE_APOSTROPHE, lex2 );
332
    }
333
    // <---'">
334
    else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
335
      emit( QUOTE_CLOSING_SINGLE, lex2 );
336
    }
337
    // <'42>, <'-3.14>
338
    else if( match( ANY, QUOTE_SINGLE, NUMBER, ANY ) ) {
339
      emit( QUOTE_OPENING_SINGLE, lex2 );
340
    }
341
    // <PRE-PARSED><'---.>
342
    else if( match( LexemeType.NONE, QUOTE_SINGLE, ANY, ANY ) ) {
343
      emit( QUOTE_CLOSING_SINGLE, lex2 );
344
    }
345
    // <''Cause >
346
    else if( match( QUOTE_SINGLE, QUOTE_SINGLE, WORD, ANY ) ) {
347
      final var word = lex3.toString( mText );
348
349
      if( mContractions.beganAmbiguously( word ) ) {
350
        emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
351
      }
352
      else if( mContractions.beganUnambiguously( word ) ) {
353
        emit( QUOTE_APOSTROPHE, lex2 );
354
      }
355
      else {
356
        emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
357
      }
358
    }
359
    // <'"Trouble>
360
    else if( match( QUOTE_SINGLE, QUOTE_DOUBLE, WORD, ANY ) ) {
361
      emit( QUOTE_OPENING_DOUBLE, lex2 );
362
    }
363
    // International quotation marks.
364
    else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) {
365
      emit( QUOTE_OPENING_DOUBLE, lex2 );
366
    }
367
    else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) {
368
      emit( QUOTE_OPENING_SINGLE, lex2 );
369
    }
370
    else if( match( ANY, QUOTE_DOUBLE_CLOSING, ANY, ANY  ) ) {
24
@SuppressWarnings(
25
  {"SameParameterValue", "SpellCheckingInspection", "GrazieInspection"}
26
)
27
public final class QuoteEmitter implements Consumer<Lexeme> {
28
  private static final LexemeType[] WORD_PERIOD_NUMBER = {
29
    WORD, PERIOD, NUMBER
30
  };
31
32
  private static final LexemeType[] PUNCT_PERIOD_ELLIPSIS_DASH = {
33
    PUNCT, PERIOD, ELLIPSIS, DASH
34
  };
35
36
  private static final LexemeType[] PUNCT_PERIOD = {
37
    PUNCT, PERIOD
38
  };
39
40
  private static final LexemeType[] SPACE_DASH_ENDING = {
41
    SPACE, DASH, ENDING
42
  };
43
44
  private static final LexemeType[] SPACE_ENDING = {
45
    SPACE, ENDING
46
  };
47
48
  private static final LexemeType[] SPACE_HYPHEN = {
49
    SPACE, HYPHEN
50
  };
51
52
  private static final LexemeType[] SPACE_PUNCT = {
53
    SPACE, PUNCT
54
  };
55
56
  private static final LexemeType[] SPACE_SOT = {
57
    SPACE, SOT
58
  };
59
60
  /**
61
   * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
62
   */
63
  private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
64
    new LexemeType[]{
65
      LexemeType.SOT, SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP
66
    };
67
68
  /**
69
   * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
70
   */
71
  private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
72
    new LexemeType[]{
73
      WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE
74
    };
75
76
  /**
77
   * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
78
   */
79
  private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
80
    new LexemeType[]{
81
      WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE
82
    };
83
84
  /**
85
   * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
86
   */
87
  private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
88
    new LexemeType[]{
89
      SPACE, HYPHEN, DASH, PUNCT, PERIOD, ELLIPSIS, QUOTE_DOUBLE, CLOSING_GROUP,
90
      ENDING
91
    };
92
93
  /**
94
   * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
95
   */
96
  private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
97
    new LexemeType[]{
98
      LexemeType.SOT, SPACE, DASH, EQUALS, OPENING_GROUP, EOL, EOP
99
    };
100
101
  /**
102
   * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
103
   */
104
  private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
105
    new LexemeType[]{
106
      WORD, PUNCT, NUMBER, DASH, ELLIPSIS, OPENING_GROUP, QUOTE_SINGLE,
107
      QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE
108
    };
109
110
  /**
111
   * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
112
   */
113
  private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
114
    new LexemeType[]{
115
      WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP, QUOTE_SINGLE,
116
      QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING
117
    };
118
119
  /**
120
   * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
121
   */
122
  private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
123
    new LexemeType[]{
124
      SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH, QUOTE_SINGLE, CLOSING_GROUP,
125
      ENDING
126
    };
127
128
  private final CircularFifoQueue<Lexeme> mQ = new CircularFifoQueue<>( 4 );
129
  private final String mText;
130
  private final Contractions mContractions;
131
  private final Consumer<Token> mConsumer;
132
133
  public QuoteEmitter(
134
    final String text,
135
    final Contractions contractions,
136
    final Consumer<Token> consumer
137
  ) {
138
    assert text != null;
139
    assert contractions != null;
140
141
    mText = text;
142
    mContractions = contractions;
143
    mConsumer = consumer;
144
  }
145
146
  /**
147
   * Scans the given text document for quotation marks and passes them to the
148
   * given {@link Token} {@link Consumer}.
149
   *
150
   * @param text         The prose to lex.
151
   * @param contractions List of ambiguous and unambiguous contractions.
152
   * @param consumer     Receives
153
   */
154
  public static void analyze(
155
    final String text,
156
    final Contractions contractions,
157
    final Consumer<Token> consumer,
158
    final LexerFilter filter
159
  ) {
160
    final var emitter = new QuoteEmitter( text, contractions, consumer );
161
    Lexer.lex( text, emitter, filter );
162
  }
163
164
  /**
165
   * @param lexeme the input argument
166
   */
167
  @Override
168
  public void accept( final Lexeme lexeme ) {
169
    mQ.add( lexeme );
170
171
    if( mQ.size() == 4 ) {
172
      parse();
173
    }
174
  }
175
176
  private void parse() {
177
    final var lex1 = mQ.get( 0 );
178
    final var lex2 = mQ.get( 1 );
179
    final var lex3 = mQ.get( 2 );
180
    final var lex4 = mQ.get( 3 );
181
182
    // <y'all>, <Ph.D.'ll>, <20's>, <she's>
183
    if( match( WORD_PERIOD_NUMBER, QUOTE_SINGLE, WORD, ANY ) ) {
184
      emit( QUOTE_APOSTROPHE, lex2 );
185
    }
186
    // <'n'>, <'N'>, <'owlin'>
187
    else if(
188
      match( ANY, QUOTE_SINGLE, WORD, QUOTE_SINGLE ) &&
189
        mContractions.beganEndedUnambiguously( lex3.toString( mText ) )
190
    ) {
191
      emit( QUOTE_APOSTROPHE, lex2 );
192
      emit( QUOTE_APOSTROPHE, lex4 );
193
      mQ.set( Lexeme.NONE, 3 );
194
    }
195
    // <2''>
196
    else if( match( NUMBER, QUOTE_SINGLE, QUOTE_SINGLE, ANY ) ) {
197
      // Force double primes to conform to the same constructor usage. This
198
      // simplifies the tokens, reduces some memory usage,
199
      final var lex = new Lexeme( PRIME_DOUBLE, lex2.began(), lex3.ended() );
200
201
      emit( QUOTE_PRIME_DOUBLE, lex );
202
      mQ.set( Lexeme.NONE, 2 );
203
    }
204
    // <2'>
205
    else if( match( NUMBER, QUOTE_SINGLE, ANY, ANY ) ) {
206
      emit( QUOTE_PRIME_SINGLE, lex2 );
207
    }
208
    // <2">
209
    else if( match( NUMBER, QUOTE_DOUBLE, ANY, ANY ) ) {
210
      emit( QUOTE_PRIME_DOUBLE, lex2 );
211
    }
212
    // <thinkin'>
213
    else if(
214
      match( WORD, QUOTE_SINGLE, ANY, ANY ) &&
215
        mContractions.endedUnambiguously( lex1.toString( mText ) )
216
    ) {
217
      emit( QUOTE_APOSTROPHE, lex2 );
218
    }
219
    // <'02>
220
    else if( match( ANY, QUOTE_SINGLE, NUMBER, SPACE_PUNCT ) ) {
221
      emit( QUOTE_APOSTROPHE, lex2 );
222
    }
223
    // <'20s>
224
    else if(
225
      match( ANY, QUOTE_SINGLE, NUMBER, WORD ) &&
226
        "s".equalsIgnoreCase( lex4.toString( mText ) )
227
    ) {
228
      emit( QUOTE_APOSTROPHE, lex2 );
229
    }
230
    // <.'\n>
231
    else if( match( PUNCT_PERIOD_ELLIPSIS_DASH, QUOTE_SINGLE, ENDING, ANY ) ) {
232
      emit( QUOTE_CLOSING_SINGLE, lex2 );
233
    }
234
    // <\'>
235
    else if( match( ESC_SINGLE, ANY, ANY, ANY ) ) {
236
      emit( QUOTE_STRAIGHT_SINGLE, lex1 );
237
    }
238
    // <\">
239
    else if( match( ESC_DOUBLE, ANY, ANY, ANY ) ) {
240
      emit( QUOTE_STRAIGHT_DOUBLE, lex1 );
241
242
      // <\"'--->
243
      if( match( ESC_DOUBLE, QUOTE_SINGLE, SPACE_DASH_ENDING, ANY ) ) {
244
        emit( QUOTE_CLOSING_SINGLE, lex2 );
245
      }
246
    }
247
    // <---'" >
248
    else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, SPACE_ENDING ) ) {
249
      emit( QUOTE_CLOSING_SINGLE, lex2 );
250
    }
251
    // <o’-lantern>, <o' fellow>, <O'-the>
252
    else if(
253
      match( WORD, QUOTE_SINGLE, SPACE_HYPHEN, WORD ) &&
254
        "o".equalsIgnoreCase( lex1.toString( mText ) )
255
    ) {
256
      emit( QUOTE_APOSTROPHE, lex2 );
257
    }
258
    // <"">, <"...>, <"word>, <---"word>
259
    else if(
260
      match(
261
        LEADING_QUOTE_OPENING_DOUBLE, QUOTE_DOUBLE,
262
        LAGGING_QUOTE_OPENING_DOUBLE, ANY
263
      )
264
    ) {
265
      emit( QUOTE_OPENING_DOUBLE, lex2 );
266
    }
267
    // <..."'>, <word"'>, <?"'>, <word"?>
268
    else if(
269
      match(
270
        LEADING_QUOTE_CLOSING_DOUBLE, QUOTE_DOUBLE,
271
        LAGGING_QUOTE_CLOSING_DOUBLE, ANY
272
      )
273
    ) {
274
      emit( QUOTE_CLOSING_DOUBLE, lex2 );
275
    }
276
    // < ''E>
277
    else if( match( SPACE_SOT, QUOTE_SINGLE, QUOTE_SINGLE, WORD ) ) {
278
      // Consume both immediately to avoid the false ambiguity <'e>.
279
      emit( QUOTE_OPENING_SINGLE, lex2 );
280
      emit( QUOTE_APOSTROPHE, lex3 );
281
      mQ.set( Lexeme.NONE, 1 );
282
      mQ.set( Lexeme.NONE, 2 );
283
    }
284
    // <'...>, <'word>, <---'word>, < 'nation>
285
    else if(
286
      match(
287
        LEADING_QUOTE_OPENING_SINGLE, QUOTE_SINGLE,
288
        LAGGING_QUOTE_OPENING_SINGLE, ANY )
289
    ) {
290
      final var word = lex3.toString( mText );
291
292
      if( mContractions.beganAmbiguously( word ) ) {
293
        emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
294
      }
295
      else if( mContractions.beganUnambiguously( word ) ) {
296
        emit( QUOTE_APOSTROPHE, lex2 );
297
      }
298
      // <"'"nested>
299
      else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, WORD ) ) {
300
        emit( QUOTE_OPENING_SINGLE, lex2 );
301
      }
302
      // <"'" >
303
      else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
304
        emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
305
      }
306
      // < '" >
307
      else if( match( ANY, QUOTE_SINGLE, LAGGING_QUOTE_OPENING_SINGLE, ANY ) ) {
308
        emit( QUOTE_OPENING_SINGLE, lex2 );
309
      }
310
      // Ambiguous
311
      else {
312
        emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
313
      }
314
    }
315
    // <"'--- >
316
    else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, DASH, ANY ) ) {
317
      emit( QUOTE_OPENING_SINGLE, lex2 );
318
    }
319
    // <word'">, <...'--->, <"' >
320
    else if(
321
      match(
322
        LEADING_QUOTE_CLOSING_SINGLE, QUOTE_SINGLE,
323
        LAGGING_QUOTE_CLOSING_SINGLE, ANY
324
      )
325
    ) {
326
      final var word = lex1.toString( mText );
327
328
      if( mContractions.endedAmbiguously( word ) ) {
329
        emit( QUOTE_AMBIGUOUS_LAGGING, lex2 );
330
      }
331
      else {
332
        emit( QUOTE_CLOSING_SINGLE, lex2 );
333
      }
334
    }
335
    // <word';> (contraction inferred by previous matches)
336
    else if( match( WORD, QUOTE_SINGLE, PUNCT_PERIOD, ANY ) ) {
337
      emit( QUOTE_APOSTROPHE, lex2 );
338
    }
339
    // <---'">
340
    else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
341
      emit( QUOTE_CLOSING_SINGLE, lex2 );
342
    }
343
    // <'42>, <'-3.14>
344
    else if( match( ANY, QUOTE_SINGLE, NUMBER, ANY ) ) {
345
      emit( QUOTE_OPENING_SINGLE, lex2 );
346
    }
347
    // <PRE-PARSED><'---.>
348
    else if( match( LexemeType.NONE, QUOTE_SINGLE, ANY, ANY ) ) {
349
      emit( QUOTE_CLOSING_SINGLE, lex2 );
350
    }
351
    // <''Cause >
352
    else if( match( QUOTE_SINGLE, QUOTE_SINGLE, WORD, ANY ) ) {
353
      final var word = lex3.toString( mText );
354
355
      if( mContractions.beganAmbiguously( word ) ) {
356
        emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
357
      }
358
      else if( mContractions.beganUnambiguously( word ) ) {
359
        emit( QUOTE_APOSTROPHE, lex2 );
360
      }
361
      else {
362
        emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
363
      }
364
    }
365
    // <'"Trouble>
366
    else if( match( QUOTE_SINGLE, QUOTE_DOUBLE, WORD, ANY ) ) {
367
      emit( QUOTE_OPENING_DOUBLE, lex2 );
368
    }
369
    // International quotation marks.
370
    else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) {
371
      emit( QUOTE_OPENING_DOUBLE, lex2 );
372
    }
373
    else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) {
374
      emit( QUOTE_OPENING_SINGLE, lex2 );
375
    }
376
    else if( match( ANY, QUOTE_DOUBLE_CLOSING, ANY, ANY ) ) {
371 377
      emit( QUOTE_CLOSING_DOUBLE, lex2 );
372 378
    }
M src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java
14 14
 * Represents a high-level token read from a text document.
15 15
 */
16
final class Token implements Comparable<Token>, Stem {
16
public final class Token implements Comparable<Token>, Stem {
17 17
  /**
18 18
   * Provides an entity-based set of {@link Token} replacements.
M src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-2-pass.txt
143 143
&ldquo;You &apos;cause---&rdquo; all fifteen years&apos; worth.
144 144
145
"'---has a prison.'"
146
&ldquo;&lsquo;---has a prison.&rsquo;&rdquo;
147