Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Add SDPX license identifier

AuthorDave Jarvis <email>
Date2023-02-20 12:08:33 GMT-0800
Commit31818da4e52bd6f13151af06362df6277fa2d2b3
Parentca3c3a3
src/jmh/java/com/whitemagicsoftware/keenquotes/StringIterationBenchmark.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes;
src/main/java/com/whitemagicsoftware/keenquotes/lex/FilterType.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.lex;
src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeGlyph.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.lex;
src/main/java/com/whitemagicsoftware/keenquotes/lex/LexerFilter.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.lex;
src/main/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolver.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/main/java/com/whitemagicsoftware/keenquotes/parser/Contractions.java
* Placeholder for various types of contractions.
*/
-@SuppressWarnings( "SpellCheckingInspection" )
-public class Contractions {
-
- private final Builder mBuilder;
-
- private Contractions( final Builder builder ) {
- assert builder != null;
- mBuilder = builder;
- }
-
- /**
- * Allows constructing a list of custom contractions.
- */
- @SuppressWarnings( "unused" )
- public static class Builder {
- private final Set<String> mBeganEndedUnambiguous = new HashSet<>();
- private final Set<String> mBeganUnambiguous = new HashSet<>();
- private final Set<String> mEndedUnambiguous = new HashSet<>();
- private final Set<String> mBeganAmbiguous = new HashSet<>();
- private final Set<String> mEndedAmbiguous = new HashSet<>();
-
- public Builder withBeganEndedUnambiguous( final List<String> words ) {
- mBeganEndedUnambiguous.addAll( words );
- return this;
- }
-
- public Builder withBeganUnambiguous( final List<String> words ) {
- mBeganUnambiguous.addAll( words );
- return this;
- }
-
- public Builder withEndedUnambiguous( final List<String> words ) {
- mEndedUnambiguous.addAll( words );
- return this;
- }
-
- public Builder withBeganAmbiguous( final List<String> words ) {
- mBeganAmbiguous.addAll( words );
- return this;
- }
-
- public Builder withEndedAmbiguous( final List<String> words ) {
- mEndedAmbiguous.addAll( words );
- return this;
- }
-
- /**
- * Constructs a new set of {@link Contractions} that can be configured
- * using this {@link Builder} instance.
- *
- * @return {@link Contractions} suitable for use with parsing text.
- */
- public Contractions build() {
- mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS );
- mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS );
- mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS );
- mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS );
- mEndedAmbiguous.addAll( ENDED_AMBIGUOUS );
-
- // Remove ambiguous items if they are already declared.
- mBeganAmbiguous.removeAll( mBeganUnambiguous );
- mEndedAmbiguous.removeAll( mEndedUnambiguous );
-
- return new Contractions( this );
- }
-
- /**
- * This returns the {@code fallback} {@link Set} if {@code src} is empty;
- * otherwise, this returns the empty {@link Set}.
- *
- * @param src A set of contractions, possibly empty.
- * @param fallback The default values to use if {@code src} is empty.
- * @param <T> The type of data used by both {@link Set}s.
- * @return An empty {@link Set} if the {@code src} contains at least one
- * element; otherwise, this will return {@code fallback}.
- */
- private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) {
- assert src != null;
- assert fallback != null;
- return src.isEmpty() ? fallback : emptySet();
- }
- }
-
- public boolean beganEndedUnambiguously( final String word ) {
- assert word != null;
- return getBeganEndedUnambiguous().contains( word );
- }
-
- /**
- * Answers whether the given word is a contraction that always starts
- * with an apostrophe. The comparison is case-insensitive. This must
- * only be called when a straight quote is followed by a word.
- *
- * @param word The word to compare against the list of known unambiguous
- * contractions.
- * @return {@code true} when the given word is in the set of unambiguous
- * contractions.
- */
- public boolean beganUnambiguously( final String word ) {
- assert word != null;
- return getBeganUnambiguous().contains( word.toLowerCase() );
- }
-
- /**
- * Answers whether the given word could be a contraction but is also a
- * valid word in non-contracted form.
- *
- * @param word The word to compare against the list of known ambiguous
- * contractions.
- * @return {@code true} when the given word is in the set of ambiguous
- * contractions.
- */
- public boolean beganAmbiguously( final String word ) {
- assert word != null;
- return getBeganAmbiguous().contains( word.toLowerCase() );
- }
-
- public boolean endedUnambiguously( final String word ) {
- assert word != null;
- return getEndedUnambiguous().contains( word.toLowerCase() );
- }
-
- public boolean endedAmbiguously( final String word ) {
- assert word != null;
- final var check = word.toLowerCase();
-
- // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
- // allow o' to match because 'a sentence can end with the letter o'.
- return getEndedAmbiguous().contains( check ) ||
- check.endsWith( "s" ) || check.endsWith( "z" ) ||
- check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
- }
-
- private Set<String> getBeganEndedUnambiguous() {
- return mBuilder.mBeganEndedUnambiguous;
- }
-
- private Set<String> getBeganUnambiguous() {
- return mBuilder.mBeganUnambiguous;
- }
-
- private Set<String> getEndedUnambiguous() {
- return mBuilder.mEndedUnambiguous;
- }
-
- private Set<String> getBeganAmbiguous() {
- return mBuilder.mBeganAmbiguous;
- }
-
- private Set<String> getEndedAmbiguous() {
- return mBuilder.mEndedAmbiguous;
- }
-
- @Override
- public String toString() {
- return
- toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) +
- toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
- toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) +
- toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
- toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" );
- }
-
- private String toString(
- final Set<String> words,
- final String category,
- final String fmt
- ) {
- final var sb = new StringBuilder( 16384 );
- final var newline = System.lineSeparator();
- final var list = new ArrayList<>( words );
-
- sort( list );
- sb.append( format( "%n%s%n", category ) );
- list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) );
-
- return sb.toString();
- }
-
- /**
- * Words having a straight apostrophe at the beginning and end.
- */
- private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of(
- // hacking
- "ackin",
- // hammering
- "ammerin",
- // hankering
- "ankerin",
- // having
- "avin",
- // hawking
- "awkin",
- // excepting
- "cepin",
- // excepting
- "ceppin",
- // excepting
- "ceptin",
- // according
- "cordin",
- // heading
- "eadin",
- // leaving
- "eavin",
- // helping
- "elpin",
- // hindering
- "inderin",
- // electioneering
- "lectioneerin",
- // amazing
- "mazin",
- // remembering
- "memberin",
- // fish 'n' chips
- "n",
- // hobbling
- "obblin",
- // holding
- "oldin",
- // hollering
- "ollerin",
- // hopping
- "oppin",
- // housekeeping
- "ousekeepin",
- // howling
- "owlin",
- // excepting
- "sceptin",
- // expecting
- "spectin",
- // explaining
- "splainin",
- // supposing
- "sposin",
- "sputin",
- // astonishing
- "stonishin",
- // destroying
- "stroyin",
- // persuading
- "suadin",
- "titivatin",
- // introducing
- "troducin",
- // hugging
- "uggin",
- // hulking
- "ulkin",
- // humbugging
- "umbuggin",
- // humiliating
- "umiliatin",
- // humming
- "ummin",
- // humping
- "umpin",
- // hurrying
- "urryin",
- // hurting
- "urtin",
- // hustling
- "ustlin",
- // investigating
- "vestigatin",
- // inviting
- "vitin",
- // excepting
- "xceptin",
- // explaining
- "xplainin",
- // exploding
- "xplodin"
- );
-
- /**
- * Words having a straight apostrophe that cannot be mistaken for an
- * opening single quote.
- */
- private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
- "aporth",
- // about you
- "boutcha",
- // about you
- "boutchu",
- // about well
- "boutwell",
- // except
- "cept",
- // decided
- "cided",
- // because
- "cos",
- // armadillo
- "dillo",
- // themselves,
- "emselves",
- // affectionate
- "fectionate",
- // before
- "fore",
- // afraid
- "fraid",
- // ???
- "funder",
- // against
- "gainst",
- // him
- "im",
- // little
- "ittle",
- // and
- "n",
- // beneath
- "neath",
- // another
- "nother",
- // another
- "nudder",
- // enough
- "nuff",
- // gonna
- "onna",
- "onna'",
- // horse
- "oss",
- // horses
- "osses",
- // splash
- "plash",
- // upon
- "pon",
- // that's|is
- "s",
- "sblood",
- // excuse
- "scuse",
- "sfar",
- "sfoot",
- // considered
- "sidered",
- // suspect|expect
- "spect",
- // suspects|expects
- "spects",
- // exploit
- "sploit",
- // exploits
- "sploits",
- // suppose
- "spose",
- // instead
- "stead",
- // it
- "t",
- "taint",
- "tain",
- "tay",
- "til",
- "tis",
- "tish",
- // it isn't
- "tisn",
- // stomach
- "tomach",
- // stormed
- "tormed",
- "tshall",
- "twas",
- "twasn",
- "tween",
- "twere",
- "tweren",
- "twixt",
- "twon",
- "twou",
- "twould",
- "twouldn",
- // one
- "un",
- // have
- "ve",
- // exactly
- "xactly"
- );
-
- /**
- * Words having a straight apostrophe that may be either part of a
- * contraction or a word that stands alone beside an opening single quote.
- */
- private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
- // have
- "a",
- // about|boxing match
- "bout",
- // because|causal
- "cause",
- // what you|choo choo train
- "choo",
- // across|angry
- "cross",
- // he|e pluribus unum
- "e",
- // them|letter m|em-dash|Emily
- "em",
- // here|earlier
- "ere",
- // afro|to and fro
- "fro",
- // unfounded|founded
- "founded",
- // whore|ho ho!
- "ho",
- // will|sick
- "ill",
- // okay|letter K
- "kay",
- // molasses|women
- "lasses",
- // lo|lo and behold
- "lo",
- // leaned|enlisted
- "listed",
- // ???
- "nation",
- // are|regarding
- "re",
- // circular|around
- "round",
- // what's up|to eat
- "sup",
- // cannot (tan't)|colour
- "tan",
- // still|turn soil
- "till",
- // it will|twill fabric
- "twill",
- // them|utterance
- "um",
- // away|direction
- "way",
- // is that|Iranian village
- "zat"
- );
-
- private static final Set<String> ENDED_AMBIGUOUS = Set.of(
- // pin|head dunk
- "bobbin",
- // give|martial arts garment
- "gi",
- // in|I
- "i",
- // of|letter o
- "o"
- );
-
- private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
- // and
- "an",
- // didn't
- "didn",
- // for/before
- "fo",
- // friend
- "frien",
- // just
- "jes",
- // just
- "jus",
- // lord
- "lor",
- // myself
- "masel",
- // old
- "ol",
- // San (Francisco)
- "Sa",
- // shift
- "shif",
- // the
- "th",
- // what
- "wha",
- // with
- "wi",
- // world
- "worl",
- // Top ~3800 common -ing words as English contractions.
- "abandonin",
- "abductin",
- "abettin",
- "abidin",
- "abolishin",
- "aboundin",
- "absorbin",
- "abstainin",
- "abstractin",
- "abusin",
- "abuttin",
- "acceleratin",
- "acceptin",
- "accessin",
- "accommodatin",
- "accompanyin",
- "accomplishin",
- "accordin",
- "accountin",
- "accruin",
- "accumulatin",
- "accusin",
- "achievin",
- "achin",
- "acknowledgin",
- "acquaintin",
- "acquirin",
- "actin",
- "activatin",
- "adaptin",
- "addin",
- "addressin",
- "adherin",
- "adjoinin",
- "adjustin",
- "administerin",
- "admirin",
- "admittin",
- "admonishin",
- "adoin",
- "adoptin",
- "adorin",
- "advancin",
- "advertisin",
- "advisin",
- "advocatin",
- "affectin",
- "affirmin",
- "afflictin",
- "affordin",
- "afishin",
- "aggravatin",
- "agitatin",
- "agoin",
- "agonizin",
- "agreein",
- "ailin",
- "aimin",
- "ain",
- "airin",
- "alarmin",
- "alayin",
- "alertin",
- "alienatin",
- "alightin",
- "alignin",
- "allegin",
- "alleviatin",
- "allocatin",
- "allowin",
- "alludin",
- "allurin",
- "alookin",
- "alterin",
- "alternatin",
- "alyin",
- "amassin",
- "amazin",
- "amblin",
- "amendin",
- "amountin",
- "amplifyin",
- "amusin",
- "analysin",
- "analyzin",
- "anchorin",
- "anglin",
- "animatin",
- "annealin",
- "annihilatin",
- "announcin",
- "annoyin",
- "anointin",
- "answerin",
- "anticipatin",
- "anythin",
- "apologizin",
- "appallin",
- "appealin",
- "appearin",
- "appertainin",
- "appetizin",
- "applaudin",
- "applyin",
- "appointin",
- "appraisin",
- "appreciatin",
- "apprehendin",
- "approachin",
- "appropriatin",
- "approvin",
- "approximatin",
- "archin",
- "archivin",
- "arguin",
- "arisin",
- "arousin",
- "arrangin",
- "arrestin",
- "arrivin",
- "articulatin",
- "ascendin",
- "ascertainin",
- "ascribin",
- "askin",
- "assailin",
- "assaultin",
- "assemblin",
- "assertin",
- "assessin",
- "assignin",
- "assimilatin",
- "assistin",
- "associatin",
- "assumin",
- "assurin",
- "astonishin",
- "astoundin",
- "atonin",
- "attachin",
- "attackin",
- "attainin",
- "attemptin",
- "attendin",
- "attestin",
- "attractin",
- "attributin",
- "auditin",
- "augmentin",
- "authorin",
- "authorisin",
- "authorizin",
- "availin",
- "avengin",
- "averagin",
- "avertin",
- "avoidin",
- "awaitin",
- "awakenin",
- "awakin",
- "awardin",
- "awin",
- "awnin",
- "awonderin",
- "axin",
- "babblin",
- "babysittin",
- "backin",
- "backslidin",
- "backtrackin",
- "badgerin",
- "bafflin",
- "baggin",
- "bailin",
- "baitin",
- "bakin",
- "balancin",
- "baldin",
- "balloonin",
- "ballotin",
- "bandagin",
- "bandin",
- "bangin",
- "banishin",
- "bankin",
- "bannin",
- "banquetin",
- "banterin",
- "baptizin",
- "bargainin",
- "barin",
- "barkin",
- "barrin",
- "barterin",
- "bashin",
- "baskin",
- "bastin",
- "bathin",
- "batterin",
- "battin",
- "battlin",
- "bawlin",
- "bayin",
- "beadin",
- "beamin",
- "bearin",
- "beatin",
- "beautifyin",
- "beckonin",
- "becomin",
- "beddin",
+public class Contractions {
+
+ private final Builder mBuilder;
+
+ private Contractions( final Builder builder ) {
+ assert builder != null;
+ mBuilder = builder;
+ }
+
+ /**
+ * Allows constructing a list of custom contractions.
+ */
+ @SuppressWarnings( "unused" )
+ public static class Builder {
+ private final Set<String> mBeganEndedUnambiguous = new HashSet<>();
+ private final Set<String> mBeganUnambiguous = new HashSet<>();
+ private final Set<String> mEndedUnambiguous = new HashSet<>();
+ private final Set<String> mBeganAmbiguous = new HashSet<>();
+ private final Set<String> mEndedAmbiguous = new HashSet<>();
+
+ public Builder withBeganEndedUnambiguous( final List<String> words ) {
+ mBeganEndedUnambiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withBeganUnambiguous( final List<String> words ) {
+ mBeganUnambiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withEndedUnambiguous( final List<String> words ) {
+ mEndedUnambiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withBeganAmbiguous( final List<String> words ) {
+ mBeganAmbiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withEndedAmbiguous( final List<String> words ) {
+ mEndedAmbiguous.addAll( words );
+ return this;
+ }
+
+ /**
+ * Constructs a new set of {@link Contractions} that can be configured
+ * using this {@link Builder} instance.
+ *
+ * @return {@link Contractions} suitable for use with parsing text.
+ */
+ public Contractions build() {
+ mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS );
+ mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS );
+ mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS );
+ mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS );
+ mEndedAmbiguous.addAll( ENDED_AMBIGUOUS );
+
+ // Remove ambiguous items if they are already declared.
+ mBeganAmbiguous.removeAll( mBeganUnambiguous );
+ mEndedAmbiguous.removeAll( mEndedUnambiguous );
+
+ return new Contractions( this );
+ }
+
+ /**
+ * This returns the {@code fallback} {@link Set} if {@code src} is empty;
+ * otherwise, this returns the empty {@link Set}.
+ *
+ * @param src A set of contractions, possibly empty.
+ * @param fallback The default values to use if {@code src} is empty.
+ * @param <T> The type of data used by both {@link Set}s.
+ * @return An empty {@link Set} if the {@code src} contains at least one
+ * element; otherwise, this will return {@code fallback}.
+ */
+ private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) {
+ assert src != null;
+ assert fallback != null;
+ return src.isEmpty() ? fallback : emptySet();
+ }
+ }
+
+ public boolean beganEndedUnambiguously( final String word ) {
+ assert word != null;
+ return getBeganEndedUnambiguous().contains( word );
+ }
+
+ /**
+ * Answers whether the given word is a contraction that always starts
+ * with an apostrophe. The comparison is case-insensitive. This must
+ * only be called when a straight quote is followed by a word.
+ *
+ * @param word The word to compare against the list of known unambiguous
+ * contractions.
+ * @return {@code true} when the given word is in the set of unambiguous
+ * contractions.
+ */
+ public boolean beganUnambiguously( final String word ) {
+ assert word != null;
+ return getBeganUnambiguous().contains( word.toLowerCase() );
+ }
+
+ /**
+ * Answers whether the given word could be a contraction but is also a
+ * valid word in non-contracted form.
+ *
+ * @param word The word to compare against the list of known ambiguous
+ * contractions.
+ * @return {@code true} when the given word is in the set of ambiguous
+ * contractions.
+ */
+ public boolean beganAmbiguously( final String word ) {
+ assert word != null;
+ return getBeganAmbiguous().contains( word.toLowerCase() );
+ }
+
+ public boolean endedUnambiguously( final String word ) {
+ assert word != null;
+ return getEndedUnambiguous().contains( word.toLowerCase() );
+ }
+
+ public boolean endedAmbiguously( final String word ) {
+ assert word != null;
+ final var check = word.toLowerCase();
+
+ // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
+ // allow o' to match because 'a sentence can end with the letter o'.
+ return getEndedAmbiguous().contains( check ) ||
+ check.endsWith( "s" ) || check.endsWith( "z" ) ||
+ check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
+ }
+
+ private Set<String> getBeganEndedUnambiguous() {
+ return mBuilder.mBeganEndedUnambiguous;
+ }
+
+ private Set<String> getBeganUnambiguous() {
+ return mBuilder.mBeganUnambiguous;
+ }
+
+ private Set<String> getEndedUnambiguous() {
+ return mBuilder.mEndedUnambiguous;
+ }
+
+ private Set<String> getBeganAmbiguous() {
+ return mBuilder.mBeganAmbiguous;
+ }
+
+ private Set<String> getEndedAmbiguous() {
+ return mBuilder.mEndedAmbiguous;
+ }
+
+ @Override
+ public String toString() {
+ return
+ toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) +
+ toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
+ toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) +
+ toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
+ toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" );
+ }
+
+ private String toString(
+ final Set<String> words,
+ final String category,
+ final String fmt
+ ) {
+ final var sb = new StringBuilder( 16384 );
+ final var newline = System.lineSeparator();
+ final var list = new ArrayList<>( words );
+
+ sort( list );
+ sb.append( format( "%n%s%n", category ) );
+ list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) );
+
+ return sb.toString();
+ }
+
+ /**
+ * Words having a straight apostrophe at the beginning and end.
+ */
+ private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of(
+ // hacking
+ "ackin",
+ // hammering
+ "ammerin",
+ // hankering
+ "ankerin",
+ // having
+ "avin",
+ // hawking
+ "awkin",
+ // excepting
+ "cepin",
+ // excepting
+ "ceppin",
+ // excepting
+ "ceptin",
+ // according
+ "cordin",
+ // heading
+ "eadin",
+ // leaving
+ "eavin",
+ // helping
+ "elpin",
+ // hindering
+ "inderin",
+ // electioneering
+ "lectioneerin",
+ // amazing
+ "mazin",
+ // remembering
+ "memberin",
+ // fish 'n' chips
+ "n",
+ // hobbling
+ "obblin",
+ // holding
+ "oldin",
+ // hollering
+ "ollerin",
+ // hopping
+ "oppin",
+ // housekeeping
+ "ousekeepin",
+ // howling
+ "owlin",
+ // excepting
+ "sceptin",
+ // expecting
+ "spectin",
+ // explaining
+ "splainin",
+ // supposing
+ "sposin",
+ "sputin",
+ // astonishing
+ "stonishin",
+ // destroying
+ "stroyin",
+ // persuading
+ "suadin",
+ "titivatin",
+ // introducing
+ "troducin",
+ // hugging
+ "uggin",
+ // hulking
+ "ulkin",
+ // humbugging
+ "umbuggin",
+ // humiliating
+ "umiliatin",
+ // humming
+ "ummin",
+ // humping
+ "umpin",
+ // hurrying
+ "urryin",
+ // hurting
+ "urtin",
+ // hustling
+ "ustlin",
+ // investigating
+ "vestigatin",
+ // inviting
+ "vitin",
+ // excepting
+ "xceptin",
+ // explaining
+ "xplainin",
+ // exploding
+ "xplodin"
+ );
+
+ /**
+ * Words having a straight apostrophe that cannot be mistaken for an
+ * opening single quote.
+ */
+ private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
+ "aporth",
+ // about you
+ "boutcha",
+ // about you
+ "boutchu",
+ // about well
+ "boutwell",
+ // except
+ "cept",
+ // decided
+ "cided",
+ // because
+ "cos",
+ // armadillo
+ "dillo",
+ // themselves,
+ "emselves",
+ // affectionate
+ "fectionate",
+ // before
+ "fore",
+ // afraid
+ "fraid",
+ // ???
+ "funder",
+ // against
+ "gainst",
+ // him
+ "im",
+ // little
+ "ittle",
+ // and
+ "n",
+ // beneath
+ "neath",
+ // another
+ "nother",
+ // another
+ "nudder",
+ // enough
+ "nuff",
+ // gonna
+ "onna",
+ "onna'",
+ // horse
+ "oss",
+ // horses
+ "osses",
+ // splash
+ "plash",
+ // upon
+ "pon",
+ // that's|is
+ "s",
+ "sblood",
+ // excuse
+ "scuse",
+ "sfar",
+ "sfoot",
+ // considered
+ "sidered",
+ // suspect|expect
+ "spect",
+ // suspects|expects
+ "spects",
+ // exploit
+ "sploit",
+ // exploits
+ "sploits",
+ // suppose
+ "spose",
+ // instead
+ "stead",
+ // it
+ "t",
+ "taint",
+ "tain",
+ "tay",
+ "til",
+ "tis",
+ "tish",
+ // it isn't
+ "tisn",
+ // stomach
+ "tomach",
+ // stormed
+ "tormed",
+ "tshall",
+ "twas",
+ "twasn",
+ "tween",
+ "twere",
+ "tweren",
+ "twixt",
+ "twon",
+ "twou",
+ "twould",
+ "twouldn",
+ // one
+ "un",
+ // have
+ "ve",
+ // exactly
+ "xactly"
+ );
+
+ /**
+ * Words having a straight apostrophe that may be either part of a
+ * contraction or a word that stands alone beside an opening single quote.
+ */
+ private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
+ // have
+ "a",
+ // about|boxing match
+ "bout",
+ // because|causal
+ "cause",
+ // what you|choo-choo train
+ "choo",
+ // across|angry
+ "cross",
+ // he|e pluribus unum
+ "e",
+ // them|letter m|em-dash|Emily
+ "em",
+ // here|earlier
+ "ere",
+ // afro|to and fro
+ "fro",
+ // unfounded|founded
+ "founded",
+ // whore|ho ho!
+ "ho",
+ // will|sick
+ "ill",
+ // okay|letter K
+ "kay",
+ // molasses|women
+ "lasses",
+ // lo|lo and behold
+ "lo",
+ // leaned|enlisted
+ "listed",
+ // ???
+ "nation",
+ // are|regarding
+ "re",
+ // circular|around
+ "round",
+ // what's up|to eat
+ "sup",
+ // cannot (tan't)|colour
+ "tan",
+ // still|turn soil
+ "till",
+ // it will|twill fabric
+ "twill",
+ // them|utterance
+ "um",
+ // away|direction
+ "way",
+ // is that|Iranian village
+ "zat"
+ );
+
+ private static final Set<String> ENDED_AMBIGUOUS = Set.of(
+ // pin|head dunk
+ "bobbin",
+ // give|martial arts garment
+ "gi",
+ // in|I
+ "i",
+ // of|letter o
+ "o"
+ );
+
+ private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
+ // and
+ "an",
+ // didn't
+ "didn",
+ // for/before
+ "fo",
+ // friend
+ "frien",
+ // just
+ "jes",
+ // just
+ "jus",
+ // lord
+ "lor",
+ // myself
+ "masel",
+ // old
+ "ol",
+ // San (Francisco)
+ "Sa",
+ // shift
+ "shif",
+ // the
+ "th",
+ // what
+ "wha",
+ // with
+ "wi",
+ // world
+ "worl",
+ // Top ~3800 common -ing words as English contractions.
+ "abandonin",
+ "abductin",
+ "abettin",
+ "abidin",
+ "abolishin",
+ "aboundin",
+ "absorbin",
+ "abstainin",
+ "abstractin",
+ "abusin",
+ "abuttin",
+ "acceleratin",
+ "acceptin",
+ "accessin",
+ "accommodatin",
+ "accompanyin",
+ "accomplishin",
+ "accordin",
+ "accountin",
+ "accruin",
+ "accumulatin",
+ "accusin",
+ "achievin",
+ "achin",
+ "acknowledgin",
+ "acquaintin",
+ "acquirin",
+ "actin",
+ "activatin",
+ "adaptin",
+ "addin",
+ "addressin",
+ "adherin",
+ "adjoinin",
+ "adjustin",
+ "administerin",
+ "admirin",
+ "admittin",
+ "admonishin",
+ "adoin",
+ "adoptin",
+ "adorin",
+ "advancin",
+ "advertisin",
+ "advisin",
+ "advocatin",
+ "affectin",
+ "affirmin",
+ "afflictin",
+ "affordin",
+ "afishin",
+ "aggravatin",
+ "agitatin",
+ "agoin",
+ "agonizin",
+ "agreein",
+ "ailin",
+ "aimin",
+ "ain",
+ "airin",
+ "alarmin",
+ "alayin",
+ "alertin",
+ "alienatin",
+ "alightin",
+ "alignin",
+ "allegin",
+ "alleviatin",
+ "allocatin",
+ "allowin",
+ "alludin",
+ "allurin",
+ "alookin",
+ "alterin",
+ "alternatin",
+ "alyin",
+ "amassin",
+ "amazin",
+ "amblin",
+ "amendin",
+ "amountin",
+ "amplifyin",
+ "amusin",
+ "analysin",
+ "analyzin",
+ "anchorin",
+ "anglin",
+ "animatin",
+ "annealin",
+ "annihilatin",
+ "announcin",
+ "annoyin",
+ "anointin",
+ "answerin",
+ "anticipatin",
+ "anythin",
+ "apologizin",
+ "appallin",
+ "appealin",
+ "appearin",
+ "appertainin",
+ "appetizin",
+ "applaudin",
+ "applyin",
+ "appointin",
+ "appraisin",
+ "appreciatin",
+ "apprehendin",
+ "approachin",
+ "appropriatin",
+ "approvin",
+ "approximatin",
+ "archin",
+ "archivin",
+ "arguin",
+ "arisin",
+ "arousin",
+ "arrangin",
+ "arrestin",
+ "arrivin",
+ "articulatin",
+ "ascendin",
+ "ascertainin",
+ "ascribin",
+ "askin",
+ "assailin",
+ "assaultin",
+ "assemblin",
+ "assertin",
+ "assessin",
+ "assignin",
+ "assimilatin",
+ "assistin",
+ "associatin",
+ "assumin",
+ "assurin",
+ "astonishin",
+ "astoundin",
+ "atonin",
+ "attachin",
+ "attackin",
+ "attainin",
+ "attemptin",
+ "attendin",
+ "attestin",
+ "attractin",
+ "attributin",
+ "auditin",
+ "augmentin",
+ "authorin",
+ "authorisin",
+ "authorizin",
+ "availin",
+ "avengin",
+ "averagin",
+ "avertin",
+ "avoidin",
+ "awaitin",
+ "awakenin",
+ "awakin",
+ "awardin",
+ "awin",
+ "awnin",
+ "awonderin",
+ "axin",
+ "babblin",
+ "babysittin",
+ "backin",
+ "backslidin",
+ "backtrackin",
+ "badgerin",
+ "bafflin",
+ "baggin",
+ "bailin",
+ "baitin",
+ "bakin",
+ "balancin",
+ "baldin",
+ "balloonin",
+ "ballotin",
+ "bandagin",
+ "bandin",
+ "bangin",
+ "banishin",
+ "bankin",
+ "bannin",
+ "banquetin",
+ "banterin",
+ "baptizin",
+ "bargainin",
+ "barin",
+ "barkin",
+ "barrin",
+ "barterin",
+ "bashin",
+ "baskin",
+ "bastin",
+ "bathin",
+ "batterin",
+ "battin",
+ "battlin",
+ "bawlin",
+ "bayin",
+ "beadin",
+ "beamin",
+ "bearin",
+ "beatin",
+ "beautifyin",
+ "beckonin",
+ "becomin",
+ "beddin",
+ "bedswervin",
"beefin",
"beepin",
src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/main/java/com/whitemagicsoftware/keenquotes/parser/Stem.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/main/java/com/whitemagicsoftware/keenquotes/parser/Tree.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/main/java/com/whitemagicsoftware/keenquotes/util/FastCharacterIterator.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.util;
src/main/java/com/whitemagicsoftware/keenquotes/util/Tuple.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.util;
src/test/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolverTest.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/test/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitterTest.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
src/test/java/com/whitemagicsoftware/keenquotes/texts/TestResource.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.texts;
Delta761 lines added, 716 lines removed, 45-line increase