Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git
M src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java
61 61
      mEndedAmbiguous.addAll( ENDED_AMBIGUOUS );
62 62
63
      return new Contractions( this );
64
    }
65
66
    /**
67
     * This returns the {@code fallback} {@link Set} if {@code src} is empty;
68
     * otherwise, this returns the empty {@link Set}.
69
     *
70
     * @param src      A set of contractions, possibly empty.
71
     * @param fallback The default values to use if {@code src} is empty.
72
     * @param <T>      The type of data used by both {@link Set}s.
73
     * @return An empty {@link Set} if the {@code src} contains at least one
74
     * element; otherwise, this will return {@code fallback}.
75
     */
76
    private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) {
77
      assert src != null;
78
      assert fallback != null;
79
      return src.isEmpty() ? fallback : emptySet();
80
    }
81
  }
82
83
  /**
84
   * Answers whether the given word is a contraction that always starts
85
   * with an apostrophe. The comparison is case insensitive. This must
86
   * only be called when a straight quote is followed by a word.
87
   *
88
   * @param word The word to compare against the list of known unambiguous
89
   *             contractions.
90
   * @return {@code true} when the given word is in the set of unambiguous
91
   * contractions.
92
   */
93
  public boolean beganUnambiguously( final String word ) {
94
    assert word != null;
95
    return getBeganUnambiguous().contains( word.toLowerCase() );
96
  }
97
98
  /**
99
   * Answers whether the given word could be a contraction but is also a
100
   * valid word in non-contracted form.
101
   *
102
   * @param word The word to compare against the list of known ambiguous
103
   *             contractions.
104
   * @return {@code true} when the given word is in the set of ambiguous
105
   * contractions.
106
   */
107
  public boolean beganAmbiguously( final String word ) {
108
    assert word != null;
109
    return getBeganAmbiguous().contains( word.toLowerCase() );
110
  }
111
112
  public boolean endedUnambiguously( final String word ) {
113
    assert word != null;
114
    return getEndedUnambiguous().contains( word.toLowerCase() );
115
  }
116
117
  public boolean endedAmbiguously( final String word ) {
118
    assert word != null;
119
    final var check = word.toLowerCase();
120
121
    // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
122
    // allow o' to match because 'a sentence can end with the letter o'.
123
    return getEndedAmbiguous().contains( check ) ||
124
      check.endsWith( "s" ) || check.endsWith( "z" ) ||
125
      check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
126
  }
127
128
  private Set<String> getBeganUnambiguous() {
129
    return mBuilder.mBeganUnambiguous;
130
  }
131
132
  private Set<String> getEndedUnambiguous() {
133
    return mBuilder.mEndedUnambiguous;
134
  }
135
136
  private Set<String> getBeganAmbiguous() {
137
    return mBuilder.mBeganAmbiguous;
138
  }
139
140
  private Set<String> getEndedAmbiguous() {
141
    return mBuilder.mEndedAmbiguous;
142
  }
143
144
145
  @Override
146
  public String toString() {
147
    return
148
      toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
149
        toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ) +
150
        toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
151
        toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" );
152
  }
153
154
  private String toString(
155
    final Set<String> words, final String category, final String fmt ) {
156
    final var sb = new StringBuilder( 16384 );
157
    final var newline = System.lineSeparator();
158
    final var list = new ArrayList<>( words );
159
160
    sort( list );
161
    sb.append( format( "%n%s%n", category ) );
162
    list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) );
163
164
    return sb.toString();
165
  }
166
167
  /**
168
   * Words having a straight apostrophe that cannot be mistaken for an
169
   * opening single quote.
170
   */
171
  private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
172
    "aporth",
173
    "boutcha",
174
    "boutchu",
175
    "cept",
176
    "dillo",
177
    "em",
178
    "fraid",
179
    "gainst",
180
    "n",
181
    "neath",
182
    "nother",
183
    "nuff",
184
    "onna",
185
    "onna'",
186
    "pon",
187
    "s",
188
    "sblood",
189
    "scuse",
190
    "sfar",
191
    "sfoot",
192
    "t",
193
    "taint",
194
    "tain",
195
    "til",
196
    "tis",
197
    "tisn",
198
    "tshall",
199
    "twas",
200
    "twasn",
201
    "tween",
202
    "twere",
203
    "tweren",
204
    "twixt",
205
    "twon",
206
    "twou",
207
    "twould",
208
    "twouldn",
209
    "ve"
210
  );
211
212
  /**
213
   * Words having a straight apostrophe that may be either part of a
214
   * contraction or a word that stands alone beside an opening single quote.
215
   */
216
  private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
217
    // about|boxing match
218
    "bout",
219
    // because|causal
220
    "cause",
221
    // what you|choo choo train
222
    "choo",
223
    // he|e pluribus unum
224
    "e",
225
    // here|earlier
226
    "ere",
227
    // afro|to and fro
228
    "fro",
229
    // whore|ho ho!
230
    "ho",
231
    // okay|letter K
232
    "kay",
233
    // lo|lo and behold
234
    "lo",
235
    // are|regarding
236
    "re",
237
    // what's up|to sup
238
    "sup",
239
    // it will|twill fabric
240
    "twill",
241
    // them|utterance
242
    "um",
243
    // is that|Iranian village
244
    "zat"
245
  );
246
247
  private static final Set<String> ENDED_AMBIGUOUS = Set.of(
248
    // give|martial arts garment
249
    "gi",
250
    // in|I
251
    "i",
252
    // of|letter o
253
    "o"
254
  );
255
256
  private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
257
    // and
258
    "an",
259
    // for/before
260
    "fo",
261
    // friend
262
    "frien",
263
    // just
264
    "jus",
265
    // lord
266
    "lor",
267
    // myself
268
    "masel",
269
    // old
270
    "ol",
271
    // San (Francisco)
272
    "Sa",
273
    // shift
274
    "shif",
275
    // the
276
    "th",
277
    // what
278
    "wha",
279
    // world
280
    "worl",
281
    // Top ~500 common -ing words as English contractions.
282
    "acceptin",
283
    "accompanyin",
284
    "accordin",
285
    "accountin",
286
    "achievin",
287
    "acquirin",
288
    "actin",
289
    "addin",
290
    "addressin",
291
    "adjoinin",
292
    "adoptin",
293
    "advancin",
294
    "advertisin",
295
    "affectin",
296
    "agin",
297
    "allowin",
298
    "amazin",
299
    "analyzin",
300
    "answerin",
301
    "anythin",
302
    "appearin",
303
    "applyin",
304
    "approachin",
305
    "arguin",
306
    "arisin",
307
    "arrivin",
308
    "askin",
309
    "assessin",
310
    "assumin",
311
    "attackin",
312
    "attemptin",
313
    "attendin",
314
    "avoidin",
315
    "bankin",
316
    "bargainin",
317
    "bearin",
318
    "beatin",
319
    "becomin",
320
    "beginnin",
321
    "bein",
322
    "believin",
323
    "belongin",
324
    "bendin",
325
    "bindin",
326
    "bleedin",
327
    "blessin",
328
    "blowin",
329
    "boilin",
330
    "borrowin",
331
    "breakin",
332
    "breathin",
333
    "breedin",
334
    "bringin",
335
    "broadcastin",
336
    "buildin",
337
    "burnin",
338
    "buyin",
339
    "calculatin",
340
    "callin",
341
    "carryin",
342
    "castin",
343
    "causin",
344
    "ceilin",
345
    "challengin",
346
    "changin",
347
    "checkin",
348
    "choosin",
349
    "claimin",
350
    "cleanin",
351
    "clearin",
352
    "climbin",
353
    "closin",
354
    "clothin",
355
    "collectin",
356
    "combinin",
357
    "comin",
358
    "commandin",
359
    "comparin",
360
    "compellin",
361
    "competin",
362
    "computin",
363
    "concernin",
364
    "concludin",
365
    "conditionin",
366
    "conductin",
367
    "conflictin",
368
    "connectin",
369
    "considerin",
370
    "consistin",
371
    "constructin",
372
    "consultin",
373
    "consumin",
374
    "containin",
375
    "continuin",
376
    "contractin",
377
    "contributin",
378
    "controllin",
379
    "convincin",
380
    "cookin",
381
    "coolin",
382
    "copin",
383
    "correspondin",
384
    "counselin",
385
    "countin",
386
    "couplin",
387
    "coverin",
388
    "creatin",
389
    "crossin",
390
    "cryin",
391
    "cuttin",
392
    "dancin",
393
    "darlin",
394
    "datin",
395
    "dealin",
396
    "decidin",
397
    "declarin",
398
    "declinin",
399
    "decreasin",
400
    "definin",
401
    "demandin",
402
    "denyin",
403
    "dependin",
404
    "descendin",
405
    "describin",
406
    "designin",
407
    "destroyin",
408
    "determinin",
409
    "developin",
410
    "differin",
411
    "dinin",
412
    "directin",
413
    "discussin",
414
    "distinguishin",
415
    "disturbin",
416
    "dividin",
417
    "doin",
418
    "drawin",
419
    "dressin",
420
    "drinkin",
421
    "drivin",
422
    "droppin",
423
    "dryin",
424
    "durin",
425
    "dwellin",
426
    "dyin",
427
    "eatin",
428
    "editin",
429
    "emergin",
430
    "employin",
431
    "enablin",
432
    "encouragin",
433
    "endin",
434
    "engagin",
435
    "engineerin",
436
    "enjoyin",
437
    "enterin",
438
    "establishin",
439
    "evaluatin",
440
    "evenin",
441
    "everythin",
442
    "examinin",
443
    "exceedin",
444
    "excitin",
445
    "excludin",
446
    "existin",
447
    "expandin",
448
    "expectin",
449
    "experiencin",
450
    "explainin",
451
    "explorin",
452
    "expressin",
453
    "extendin",
454
    "facin",
455
    "failin",
456
    "fallin",
457
    "farmin",
458
    "fascinatin",
459
    "feedin",
460
    "feelin",
461
    "fightin",
462
    "filin",
463
    "fillin",
464
    "financin",
465
    "findin",
466
    "firin",
467
    "fishin",
468
    "fittin",
469
    "fixin",
470
    "floatin",
471
    "flowin",
472
    "flyin",
473
    "focusin",
474
    "followin",
475
    "forcin",
476
    "foregoin",
477
    "formin",
478
    "forthcomin",
479
    "foundin",
480
    "freezin",
481
    "fuckin",
482
    "functionin",
483
    "fundin",
484
    "gainin",
485
    "gatherin",
486
    "generatin",
487
    "gettin",
488
    "givin",
489
    "goin",
490
    "governin",
491
    "grantin",
492
    "growin",
493
    "hackin",
494
    "handlin",
495
    "hangin",
496
    "happenin",
497
    "havin",
498
    "headin",
499
    "healin",
500
    "hearin",
501
    "heatin",
502
    "helpin",
503
    "hidin",
504
    "holdin",
505
    "hopin",
506
    "housin",
507
    "huntin",
508
    "identifyin",
509
    "imagin",
510
    "implementin",
511
    "imposin",
512
    "improvin",
513
    "includin",
514
    "increasin",
515
    "indicatin",
516
    "interestin",
517
    "interpretin",
518
    "introducin",
519
    "involvin",
520
    "joinin",
521
    "judgin",
522
    "keepin",
523
    "killin",
524
    "knowin",
525
    "lackin",
526
    "landin",
527
    "lastin",
528
    "laughin",
529
    "layin",
530
    "leadin",
531
    "leanin",
532
    "learnin",
533
    "leavin",
534
    "lettin",
535
    "liftin",
536
    "lightin",
537
    "lightnin",
538
    "limitin",
539
    "listenin",
540
    "listin",
541
    "livin",
542
    "loadin",
543
    "lookin",
544
    "losin",
545
    "lovin",
546
    "lowerin",
547
    "lyin",
548
    "maintainin",
549
    "makin",
550
    "managin",
551
    "manufacturin",
552
    "mappin",
553
    "marketin",
554
    "markin",
555
    "matchin",
556
    "meanin",
557
    "measurin",
558
    "meetin",
559
    "meltin",
560
    "minin",
561
    "misleadin",
562
    "missin",
563
    "mixin",
564
    "modelin",
565
    "monitorin",
566
    "mornin",
567
    "movin",
568
    "neighborin",
569
    "neighbourin",
570
    "nothin",
571
    "notin",
572
    "notwithstandin",
573
    "nursin",
574
    "observin",
575
    "obtainin",
576
    "occurrin",
577
    "offerin",
578
    "offsprin",
579
    "ongoin",
580
    "openin",
581
    "operatin",
582
    "opposin",
583
    "orderin",
584
    "organizin",
585
    "outstandin",
586
    "overwhelmin",
587
    "packin",
588
    "paintin",
589
    "parkin",
590
    "participatin",
591
    "passin",
592
    "payin",
593
    "pendin",
594
    "performin",
595
    "pickin",
596
    "pissin",
597
    "placin",
598
    "plannin",
599
    "plantin",
600
    "playin",
601
    "pleasin",
602
    "pointin",
603
    "possessin",
604
    "preachin",
605
    "precedin",
606
    "preparin",
607
    "presentin",
608
    "preservin",
609
    "pressin",
610
    "prevailin",
611
    "preventin",
612
    "pricin",
613
    "printin",
614
    "proceedin",
615
    "processin",
616
    "producin",
617
    "programmin",
618
    "promisin",
619
    "promotin",
620
    "protectin",
621
    "providin",
622
    "provin",
623
    "publishin",
624
    "pullin",
625
    "purchasin",
626
    "pursuin",
627
    "pushin",
628
    "puttin",
629
    "questionin",
630
    "rangin",
631
    "ratin",
632
    "reachin",
633
    "readin",
634
    "reasonin",
635
    "receivin",
636
    "recognizin",
637
    "recordin",
638
    "reducin",
639
    "referrin",
640
    "reflectin",
641
    "refusin",
642
    "regardin",
643
    "regulatin",
644
    "relatin",
645
    "remainin",
646
    "rememberin",
647
    "removin",
648
    "renderin",
649
    "repeatin",
650
    "replacin",
651
    "reportin",
652
    "representin",
653
    "requirin",
654
    "respectin",
655
    "respondin",
656
    "restin",
657
    "resultin",
658
    "returnin",
659
    "revealin",
660
    "ridin",
661
    "risin",
662
    "rulin",
663
    "runnin",
664
    "sailin",
665
    "samplin",
666
    "satisfyin",
667
    "savin",
668
    "sayin",
669
    "scatterin",
670
    "schoolin",
671
    "screenin",
672
    "searchin",
673
    "securin",
674
    "seein",
675
    "seekin",
676
    "selectin",
677
    "sellin",
678
    "sendin",
679
    "separatin",
680
    "servin",
681
    "settin",
682
    "settlin",
683
    "sewin",
684
    "shakin",
685
    "shapin",
686
    "sharin",
687
    "shiftin",
688
    "shinin",
689
    "shippin",
690
    "shittin",
691
    "shootin",
692
    "shoppin",
693
    "showin",
694
    "singin",
695
    "sinkin",
696
    "sittin",
697
    "sleepin",
698
    "smilin",
699
    "smokin",
700
    "spankin",
701
    "solvin",
702
    "somethin",
703
    "speakin",
704
    "spellin",
705
    "spendin",
706
    "spinnin",
707
    "spittin",
708
    "spreadin",
709
    "standin",
710
    "starin",
711
    "startin",
712
    "statin",
713
    "stayin",
714
    "stealin",
715
    "sterlin",
716
    "stimulatin",
717
    "stirrin",
718
    "stoppin",
719
    "strengthenin",
720
    "stretchin",
721
    "strikin",
722
    "strugglin",
723
    "studyin",
724
    "succeedin",
725
    "sufferin",
726
    "suggestin",
727
    "supplyin",
728
    "supportin",
729
    "surprisin",
730
    "surroundin",
731
    "survivin",
732
    "sweepin",
733
    "swellin",
734
    "swimmin",
735
    "switchin",
736
    "takin",
737
    "talkin",
738
    "teachin",
739
    "tellin",
740
    "testin",
741
    "thinkin",
742
    "threatenin",
743
    "throwin",
744
    "timin",
745
    "touchin",
746
    "tradin",
747
    "trainin",
748
    "travelin",
749
    "treatin",
750
    "tremblin",
751
    "tryin",
752
    "turnin",
753
    "underlyin",
754
    "understandin",
755
    "undertakin",
756
    "unwillin",
757
    "usin",
758
    "varyin",
759
    "viewin",
760
    "visitin",
761
    "votin",
762
    "waitin",
763
    "walkin",
764
    "wanderin",
765
    "wantin",
766
    "warnin",
767
    "washin",
768
    "watchin",
769
    "wearin",
770
    "weddin",
771
    "whackin",
63
      // Remove ambiguous items if they are already declared.
64
      mBeganAmbiguous.removeAll( mBeganUnambiguous );
65
      mEndedAmbiguous.removeAll( mEndedUnambiguous );
66
67
      return new Contractions( this );
68
    }
69
70
    /**
71
     * This returns the {@code fallback} {@link Set} if {@code src} is empty;
72
     * otherwise, this returns the empty {@link Set}.
73
     *
74
     * @param src      A set of contractions, possibly empty.
75
     * @param fallback The default values to use if {@code src} is empty.
76
     * @param <T>      The type of data used by both {@link Set}s.
77
     * @return An empty {@link Set} if the {@code src} contains at least one
78
     * element; otherwise, this will return {@code fallback}.
79
     */
80
    private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) {
81
      assert src != null;
82
      assert fallback != null;
83
      return src.isEmpty() ? fallback : emptySet();
84
    }
85
  }
86
87
  /**
88
   * Answers whether the given word is a contraction that always starts
89
   * with an apostrophe. The comparison is case insensitive. This must
90
   * only be called when a straight quote is followed by a word.
91
   *
92
   * @param word The word to compare against the list of known unambiguous
93
   *             contractions.
94
   * @return {@code true} when the given word is in the set of unambiguous
95
   * contractions.
96
   */
97
  public boolean beganUnambiguously( final String word ) {
98
    assert word != null;
99
    return getBeganUnambiguous().contains( word.toLowerCase() );
100
  }
101
102
  /**
103
   * Answers whether the given word could be a contraction but is also a
104
   * valid word in non-contracted form.
105
   *
106
   * @param word The word to compare against the list of known ambiguous
107
   *             contractions.
108
   * @return {@code true} when the given word is in the set of ambiguous
109
   * contractions.
110
   */
111
  public boolean beganAmbiguously( final String word ) {
112
    assert word != null;
113
    return getBeganAmbiguous().contains( word.toLowerCase() );
114
  }
115
116
  public boolean endedUnambiguously( final String word ) {
117
    assert word != null;
118
    return getEndedUnambiguous().contains( word.toLowerCase() );
119
  }
120
121
  public boolean endedAmbiguously( final String word ) {
122
    assert word != null;
123
    final var check = word.toLowerCase();
124
125
    // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
126
    // allow o' to match because 'a sentence can end with the letter o'.
127
    return getEndedAmbiguous().contains( check ) ||
128
      check.endsWith( "s" ) || check.endsWith( "z" ) ||
129
      check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
130
  }
131
132
  private Set<String> getBeganUnambiguous() {
133
    return mBuilder.mBeganUnambiguous;
134
  }
135
136
  private Set<String> getEndedUnambiguous() {
137
    return mBuilder.mEndedUnambiguous;
138
  }
139
140
  private Set<String> getBeganAmbiguous() {
141
    return mBuilder.mBeganAmbiguous;
142
  }
143
144
  private Set<String> getEndedAmbiguous() {
145
    return mBuilder.mEndedAmbiguous;
146
  }
147
148
149
  @Override
150
  public String toString() {
151
    return
152
      toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
153
        toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ) +
154
        toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
155
        toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" );
156
  }
157
158
  private String toString(
159
    final Set<String> words, final String category, final String fmt ) {
160
    final var sb = new StringBuilder( 16384 );
161
    final var newline = System.lineSeparator();
162
    final var list = new ArrayList<>( words );
163
164
    sort( list );
165
    sb.append( format( "%n%s%n", category ) );
166
    list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) );
167
168
    return sb.toString();
169
  }
170
171
  /**
172
   * Words having a straight apostrophe that cannot be mistaken for an
173
   * opening single quote.
174
   */
175
  private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
176
    "aporth",
177
    "boutcha",
178
    "boutchu",
179
    "cept",
180
    "dillo",
181
    "em",
182
    "fraid",
183
    "gainst",
184
    "im",
185
    "n",
186
    "neath",
187
    "nother",
188
    "nuff",
189
    "onna",
190
    "onna'",
191
    "pon",
192
    "s",
193
    "sblood",
194
    "scuse",
195
    "sfar",
196
    "sfoot",
197
    "t",
198
    "taint",
199
    "tain",
200
    "til",
201
    "tis",
202
    "tisn",
203
    "tshall",
204
    "twas",
205
    "twasn",
206
    "tween",
207
    "twere",
208
    "tweren",
209
    "twixt",
210
    "twon",
211
    "twou",
212
    "twould",
213
    "twouldn",
214
    "ve"
215
  );
216
217
  /**
218
   * Words having a straight apostrophe that may be either part of a
219
   * contraction or a word that stands alone beside an opening single quote.
220
   */
221
  private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
222
    // about|boxing match
223
    "bout",
224
    // because|causal
225
    "cause",
226
    // what you|choo choo train
227
    "choo",
228
    // he|e pluribus unum
229
    "e",
230
    // here|earlier
231
    "ere",
232
    // afro|to and fro
233
    "fro",
234
    // whore|ho ho!
235
    "ho",
236
    // okay|letter K
237
    "kay",
238
    // lo|lo and behold
239
    "lo",
240
    // are|regarding
241
    "re",
242
    // what's up|to sup
243
    "sup",
244
    // it will|twill fabric
245
    "twill",
246
    // them|utterance
247
    "um",
248
    // is that|Iranian village
249
    "zat"
250
  );
251
252
  private static final Set<String> ENDED_AMBIGUOUS = Set.of(
253
    // give|martial arts garment
254
    "gi",
255
    // in|I
256
    "i",
257
    // of|letter o
258
    "o"
259
  );
260
261
  private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
262
    // and
263
    "an",
264
    // for/before
265
    "fo",
266
    // friend
267
    "frien",
268
    // just
269
    "jus",
270
    // lord
271
    "lor",
272
    // myself
273
    "masel",
274
    // old
275
    "ol",
276
    // San (Francisco)
277
    "Sa",
278
    // shift
279
    "shif",
280
    // the
281
    "th",
282
    // what
283
    "wha",
284
    // world
285
    "worl",
286
    // Top ~500 common -ing words as English contractions.
287
    "acceptin",
288
    "accompanyin",
289
    "accordin",
290
    "accountin",
291
    "achievin",
292
    "acquirin",
293
    "actin",
294
    "addin",
295
    "addressin",
296
    "adjoinin",
297
    "adoptin",
298
    "advancin",
299
    "advertisin",
300
    "affectin",
301
    "agin",
302
    "allowin",
303
    "amazin",
304
    "analyzin",
305
    "answerin",
306
    "anythin",
307
    "appearin",
308
    "applyin",
309
    "approachin",
310
    "arguin",
311
    "arisin",
312
    "arrivin",
313
    "askin",
314
    "assessin",
315
    "assumin",
316
    "attackin",
317
    "attemptin",
318
    "attendin",
319
    "avoidin",
320
    "bankin",
321
    "bargainin",
322
    "bearin",
323
    "beatin",
324
    "becomin",
325
    "beginnin",
326
    "bein",
327
    "believin",
328
    "belongin",
329
    "bendin",
330
    "bindin",
331
    "bleedin",
332
    "blessin",
333
    "blowin",
334
    "boilin",
335
    "borrowin",
336
    "breakin",
337
    "breathin",
338
    "breedin",
339
    "bringin",
340
    "broadcastin",
341
    "buildin",
342
    "burnin",
343
    "buyin",
344
    "calculatin",
345
    "callin",
346
    "carryin",
347
    "castin",
348
    "causin",
349
    "ceilin",
350
    "challengin",
351
    "changin",
352
    "checkin",
353
    "choosin",
354
    "claimin",
355
    "cleanin",
356
    "clearin",
357
    "climbin",
358
    "closin",
359
    "clothin",
360
    "collectin",
361
    "combinin",
362
    "comin",
363
    "commandin",
364
    "comparin",
365
    "compellin",
366
    "competin",
367
    "computin",
368
    "concernin",
369
    "concludin",
370
    "conditionin",
371
    "conductin",
372
    "conflictin",
373
    "connectin",
374
    "considerin",
375
    "consistin",
376
    "constructin",
377
    "consultin",
378
    "consumin",
379
    "containin",
380
    "continuin",
381
    "contractin",
382
    "contributin",
383
    "controllin",
384
    "convincin",
385
    "cookin",
386
    "coolin",
387
    "copin",
388
    "correspondin",
389
    "counselin",
390
    "countin",
391
    "couplin",
392
    "coverin",
393
    "creatin",
394
    "crossin",
395
    "cryin",
396
    "cuttin",
397
    "dancin",
398
    "darlin",
399
    "datin",
400
    "dealin",
401
    "decidin",
402
    "declarin",
403
    "declinin",
404
    "decreasin",
405
    "definin",
406
    "demandin",
407
    "denyin",
408
    "dependin",
409
    "descendin",
410
    "describin",
411
    "designin",
412
    "destroyin",
413
    "determinin",
414
    "developin",
415
    "differin",
416
    "dinin",
417
    "directin",
418
    "discussin",
419
    "distinguishin",
420
    "disturbin",
421
    "dividin",
422
    "doin",
423
    "drawin",
424
    "dressin",
425
    "drinkin",
426
    "drivin",
427
    "droppin",
428
    "dryin",
429
    "durin",
430
    "dwellin",
431
    "dyin",
432
    "eatin",
433
    "editin",
434
    "emergin",
435
    "employin",
436
    "enablin",
437
    "encouragin",
438
    "endin",
439
    "engagin",
440
    "engineerin",
441
    "enjoyin",
442
    "enterin",
443
    "establishin",
444
    "evaluatin",
445
    "evenin",
446
    "everythin",
447
    "examinin",
448
    "exceedin",
449
    "excitin",
450
    "excludin",
451
    "existin",
452
    "expandin",
453
    "expectin",
454
    "experiencin",
455
    "explainin",
456
    "explorin",
457
    "expressin",
458
    "extendin",
459
    "facin",
460
    "failin",
461
    "fallin",
462
    "farmin",
463
    "fascinatin",
464
    "feedin",
465
    "feelin",
466
    "fightin",
467
    "filin",
468
    "fillin",
469
    "financin",
470
    "findin",
471
    "firin",
472
    "fishin",
473
    "fittin",
474
    "fixin",
475
    "floatin",
476
    "flowin",
477
    "flyin",
478
    "focusin",
479
    "followin",
480
    "forcin",
481
    "foregoin",
482
    "formin",
483
    "forthcomin",
484
    "foundin",
485
    "freezin",
486
    "fuckin",
487
    "functionin",
488
    "fundin",
489
    "gainin",
490
    "gatherin",
491
    "generatin",
492
    "gettin",
493
    "givin",
494
    "goin",
495
    "governin",
496
    "grantin",
497
    "growin",
498
    "hackin",
499
    "handlin",
500
    "hangin",
501
    "happenin",
502
    "havin",
503
    "headin",
504
    "healin",
505
    "hearin",
506
    "heatin",
507
    "helpin",
508
    "hidin",
509
    "holdin",
510
    "hopin",
511
    "housin",
512
    "huntin",
513
    "identifyin",
514
    "imagin",
515
    "implementin",
516
    "imposin",
517
    "improvin",
518
    "includin",
519
    "increasin",
520
    "indicatin",
521
    "interestin",
522
    "interpretin",
523
    "introducin",
524
    "involvin",
525
    "joinin",
526
    "judgin",
527
    "keepin",
528
    "killin",
529
    "knowin",
530
    "lackin",
531
    "landin",
532
    "lastin",
533
    "laughin",
534
    "layin",
535
    "leadin",
536
    "leanin",
537
    "learnin",
538
    "leavin",
539
    "lettin",
540
    "liftin",
541
    "lightin",
542
    "lightnin",
543
    "limitin",
544
    "listenin",
545
    "listin",
546
    "livin",
547
    "loadin",
548
    "lookin",
549
    "losin",
550
    "lovin",
551
    "lowerin",
552
    "lyin",
553
    "maintainin",
554
    "makin",
555
    "managin",
556
    "manufacturin",
557
    "mappin",
558
    "marketin",
559
    "markin",
560
    "matchin",
561
    "meanin",
562
    "measurin",
563
    "meetin",
564
    "meltin",
565
    "minin",
566
    "misleadin",
567
    "missin",
568
    "mixin",
569
    "modelin",
570
    "monitorin",
571
    "mornin",
572
    "movin",
573
    "neighborin",
574
    "neighbourin",
575
    "nothin",
576
    "notin",
577
    "notwithstandin",
578
    "nursin",
579
    "observin",
580
    "obtainin",
581
    "occurrin",
582
    "offerin",
583
    "offsprin",
584
    "ongoin",
585
    "openin",
586
    "operatin",
587
    "opposin",
588
    "orderin",
589
    "organizin",
590
    "outstandin",
591
    "overwhelmin",
592
    "packin",
593
    "paintin",
594
    "parkin",
595
    "participatin",
596
    "passin",
597
    "payin",
598
    "pendin",
599
    "performin",
600
    "pickin",
601
    "pissin",
602
    "placin",
603
    "plannin",
604
    "plantin",
605
    "playin",
606
    "pleasin",
607
    "pointin",
608
    "poppin",
609
    "possessin",
610
    "preachin",
611
    "precedin",
612
    "preparin",
613
    "presentin",
614
    "preservin",
615
    "pressin",
616
    "prevailin",
617
    "preventin",
618
    "pricin",
619
    "printin",
620
    "proceedin",
621
    "processin",
622
    "producin",
623
    "programmin",
624
    "promisin",
625
    "promotin",
626
    "protectin",
627
    "providin",
628
    "provin",
629
    "publishin",
630
    "pullin",
631
    "purchasin",
632
    "pursuin",
633
    "pushin",
634
    "puttin",
635
    "questionin",
636
    "rangin",
637
    "ratin",
638
    "reachin",
639
    "readin",
640
    "reasonin",
641
    "receivin",
642
    "recognizin",
643
    "recordin",
644
    "reducin",
645
    "referrin",
646
    "reflectin",
647
    "refusin",
648
    "regardin",
649
    "regulatin",
650
    "relatin",
651
    "remainin",
652
    "rememberin",
653
    "removin",
654
    "renderin",
655
    "repeatin",
656
    "replacin",
657
    "reportin",
658
    "representin",
659
    "requirin",
660
    "respectin",
661
    "respondin",
662
    "restin",
663
    "resultin",
664
    "returnin",
665
    "revealin",
666
    "ridin",
667
    "risin",
668
    "rulin",
669
    "runnin",
670
    "rythin",
671
    "sailin",
672
    "samplin",
673
    "satisfyin",
674
    "savin",
675
    "sayin",
676
    "scatterin",
677
    "schoolin",
678
    "screenin",
679
    "searchin",
680
    "securin",
681
    "seein",
682
    "seekin",
683
    "selectin",
684
    "sellin",
685
    "sendin",
686
    "separatin",
687
    "servin",
688
    "settin",
689
    "settlin",
690
    "sewin",
691
    "shakin",
692
    "shapin",
693
    "sharin",
694
    "shiftin",
695
    "shinin",
696
    "shippin",
697
    "shittin",
698
    "shootin",
699
    "shoppin",
700
    "showin",
701
    "singin",
702
    "sinkin",
703
    "sittin",
704
    "sleepin",
705
    "smilin",
706
    "smokin",
707
    "spankin",
708
    "solvin",
709
    "somethin",
710
    "speakin",
711
    "spellin",
712
    "spendin",
713
    "spinnin",
714
    "spittin",
715
    "spreadin",
716
    "standin",
717
    "starin",
718
    "startin",
719
    "statin",
720
    "stayin",
721
    "stealin",
722
    "sterlin",
723
    "stimulatin",
724
    "stirrin",
725
    "stoppin",
726
    "strengthenin",
727
    "stretchin",
728
    "strikin",
729
    "strugglin",
730
    "studyin",
731
    "succeedin",
732
    "sufferin",
733
    "suggestin",
734
    "supplyin",
735
    "supportin",
736
    "surprisin",
737
    "surroundin",
738
    "survivin",
739
    "sweepin",
740
    "swellin",
741
    "swimmin",
742
    "switchin",
743
    "takin",
744
    "talkin",
745
    "teachin",
746
    "tellin",
747
    "testin",
748
    "thinkin",
749
    "threatenin",
750
    "throwin",
751
    "timin",
752
    "touchin",
753
    "tradin",
754
    "trainin",
755
    "travelin",
756
    "treatin",
757
    "tremblin",
758
    "tryin",
759
    "turnin",
760
    "underlyin",
761
    "understandin",
762
    "undertakin",
763
    "unwillin",
764
    "usin",
765
    "varyin",
766
    "viewin",
767
    "visitin",
768
    "votin",
769
    "waitin",
770
    "walkin",
771
    "wanderin",
772
    "wantin",
773
    "warnin",
774
    "washin",
775
    "watchin",
776
    "wearin",
777
    "weddin",
778
    "whackin",
779
    "wi",
772 780
    "willin",
773 781
    "windin",
M src/main/java/com/whitemagicsoftware/keenquotes/KeenQuotes.java
4 4
import picocli.CommandLine;
5 5
6
import java.io.BufferedReader;
7 6
import java.io.IOException;
8 7
import java.io.InputStream;
9
import java.io.InputStreamReader;
10 8
import java.util.Properties;
11 9
12 10
import static java.lang.String.format;
11
import static java.lang.System.*;
13 12
import static picocli.CommandLine.Help.Ansi.Style.*;
14 13
import static picocli.CommandLine.Help.ColorScheme;
...
38 37
39 38
    if( settings.displayList() ) {
40
      System.out.println( contractions.toString() );
39
      out.println( contractions.toString() );
41 40
    }
42 41
    else {
43
      convert( new Converter( System.err::println, contractions ) );
42
      try {
43
        out.print( convert( new Converter( err::println, contractions ) ) );
44
      } catch( final Exception ex ) {
45
        ex.printStackTrace( err );
46
      }
44 47
    }
45 48
  }
...
55 58
    return builder.build();
56 59
  }
57
58
  private void convert( final Converter converter ) {
59
    final var sb = new StringBuilder();
60
61
    try( final var reader = open( System.in ) ) {
62
      String line;
63
      final var sep = System.lineSeparator();
64
65
      while( (line = reader.readLine()) != null ) {
66
        sb.append( line );
67
        sb.append( sep );
68
      }
69 60
70
      System.out.println( converter.apply( sb.toString() ) );
71
    } catch( final Exception ex ) {
72
      ex.printStackTrace( System.err );
73
    }
61
  private String convert( final Converter converter ) throws IOException {
62
    return converter.apply( new String( in.readAllBytes() ) );
74 63
  }
75 64
...
113 102
  private static InputStream getResourceAsStream( final String resource ) {
114 103
    return KeenQuotes.class.getClassLoader().getResourceAsStream( resource );
115
  }
116
117
  @SuppressWarnings( "SameParameterValue" )
118
  private static BufferedReader open( final InputStream in ) {
119
    return new BufferedReader( new InputStreamReader( in ) );
120 104
  }
121 105
...
134 118
135 119
    if( parseResult.isUsageHelpRequested() ) {
136
      System.exit( exitCode );
120
      exit( exitCode );
137 121
    }
138 122
    else if( parseResult.isVersionHelpRequested() ) {
139
      System.out.println( getVersion() );
140
      System.exit( exitCode );
123
      out.println( getVersion() );
124
      exit( exitCode );
141 125
    }
142 126
  }
M src/main/java/com/whitemagicsoftware/keenquotes/Lexeme.java
50 50
  }
51 51
52
  static Lexeme createLexeme(
53
    final LexemeType lexeme, final int began, final int ended ) {
54
    assert lexeme != null;
55
    return new Lexeme( lexeme, began, ended );
56
  }
57
52 58
  /**
53 59
   * Extracts a sequence of characters from the given text at the offsets
...
133 139
    assert that != null;
134 140
    return this.mBegan - that.mBegan;
135
  }
136
137
  static Lexeme createLexeme(
138
    final LexemeType lexeme, final int began, final int ended ) {
139
    assert lexeme != null;
140
    return new Lexeme( lexeme, began, ended );
141 141
  }
142 142
}
M src/main/java/com/whitemagicsoftware/keenquotes/Lexer.java
91 91
        lexeme = createLexeme(
92 92
          slurp( i, ( next, ci ) ->
93
            next == '.' || (next == ' ' && peek( ci ) == '.') ) == 0
93
            next == '.' || next == ' ' && peek( ci ) == '.' ) == 0
94 94
            ? PERIOD
95 95
            : ELLIPSIS,
...
189 189
   */
190 190
  private boolean isDash( final char curr ) {
191
    return curr == '-' || curr == '–' || curr == '—';
191
    return curr == '-' || curr == '–' || curr == '—' || curr == '―';
192 192
  }
193 193
M src/main/java/com/whitemagicsoftware/keenquotes/Parser.java
51 51
   */
52 52
  private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
53
    new LexemeType[]{WORD, NUMBER, ELLIPSIS,
53
    new LexemeType[]{WORD, NUMBER, ELLIPSIS, OPENING_GROUP,
54 54
      QUOTE_SINGLE, QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE};
55 55
56 56
  /**
57 57
   * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
58 58
   */
59 59
  private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
60
    new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS,
60
    new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP,
61 61
      QUOTE_SINGLE, QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING};
62 62
M src/main/java/com/whitemagicsoftware/keenquotes/Settings.java
28 28
    names = {"-ub", "--unamb-began"},
29 29
    description =
30
      "Contractions to treat as unambiguous (e.g., cause,bout)",
31
    paramLabel = "words"
30
      "Contraction to treat as unambiguous (e.g., cause, bout)",
31
    paramLabel = "word"
32 32
  )
33 33
  private String[] mBeganUnambiguous;
34 34
35 35
  /**
36 36
   * List of unambiguous contractions having lagging apostrophes.
37 37
   */
38 38
  @CommandLine.Option(
39 39
    names = {"-ue", "--unamb-ended"},
40 40
    description =
41
      "Contractions to treat as unambiguous (e.g., frien,thinkin)",
42
    paramLabel = "words"
41
      "Contraction to treat as unambiguous (e.g., frien, thinkin)",
42
    paramLabel = "word"
43 43
  )
44 44
  private String[] mEndedUnambiguous;
45 45
46 46
  /**
47 47
   * List of ambiguous contractions having leading apostrophes.
48 48
   */
49 49
  @CommandLine.Option(
50 50
    names = {"-ab", "--amb-began"},
51 51
    description =
52
      "Contractions to treat as ambiguous (e.g., sup,kay)",
53
    paramLabel = "words"
52
      "Contraction to treat as ambiguous (e.g., sup, kay)",
53
    paramLabel = "word"
54 54
  )
55 55
  private String[] mBeganAmbiguous;
56 56
57 57
  /**
58 58
   * List of ambiguous contractions having lagging apostrophes.
59 59
   */
60 60
  @CommandLine.Option(
61 61
    names = {"-ae", "--amb-ended"},
62 62
    description =
63
      "Contractions to treat as ambiguous (e.g., gi,o)",
64
    paramLabel = "words"
63
      "Contraction to treat as ambiguous (e.g., gi, o)",
64
    paramLabel = "word"
65 65
  )
66 66
  private String[] mEndedAmbiguous;
M src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java
58 58
    try( final var reader = open( filename + ".txt" ) ) {
59 59
      String line;
60
      final var sep = System.lineSeparator();
60 61
61 62
      while( (line = reader.readLine()) != null ) {
62
        sb.append( line ).append( '\n' );
63
        sb.append( line ).append( sep );
63 64
      }
64 65
    }
M src/test/java/com/whitemagicsoftware/keenquotes/LexerTest.java
51 51
    testType( "---", DASH );
52 52
    testType( "–", DASH );
53
    testType( "―", DASH );
53 54
    testType( "—", DASH );
54 55
    testType( "—-—", DASH );