| | * Placeholder for various types of contractions. |
| | */ |
| | -@SuppressWarnings( "SpellCheckingInspection" ) |
| | -public class Contractions { |
| | - |
| | - private final Builder mBuilder; |
| | - |
| | - private Contractions( final Builder builder ) { |
| | - assert builder != null; |
| | - mBuilder = builder; |
| | - } |
| | - |
| | - /** |
| | - * Allows constructing a list of custom contractions. |
| | - */ |
| | - @SuppressWarnings( "unused" ) |
| | - public static class Builder { |
| | - private final Set<String> mBeganEndedUnambiguous = new HashSet<>(); |
| | - private final Set<String> mBeganUnambiguous = new HashSet<>(); |
| | - private final Set<String> mEndedUnambiguous = new HashSet<>(); |
| | - private final Set<String> mBeganAmbiguous = new HashSet<>(); |
| | - private final Set<String> mEndedAmbiguous = new HashSet<>(); |
| | - |
| | - public Builder withBeganEndedUnambiguous( final List<String> words ) { |
| | - mBeganEndedUnambiguous.addAll( words ); |
| | - return this; |
| | - } |
| | - |
| | - public Builder withBeganUnambiguous( final List<String> words ) { |
| | - mBeganUnambiguous.addAll( words ); |
| | - return this; |
| | - } |
| | - |
| | - public Builder withEndedUnambiguous( final List<String> words ) { |
| | - mEndedUnambiguous.addAll( words ); |
| | - return this; |
| | - } |
| | - |
| | - public Builder withBeganAmbiguous( final List<String> words ) { |
| | - mBeganAmbiguous.addAll( words ); |
| | - return this; |
| | - } |
| | - |
| | - public Builder withEndedAmbiguous( final List<String> words ) { |
| | - mEndedAmbiguous.addAll( words ); |
| | - return this; |
| | - } |
| | - |
| | - /** |
| | - * Constructs a new set of {@link Contractions} that can be configured |
| | - * using this {@link Builder} instance. |
| | - * |
| | - * @return {@link Contractions} suitable for use with parsing text. |
| | - */ |
| | - public Contractions build() { |
| | - mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS ); |
| | - mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS ); |
| | - mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS ); |
| | - mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS ); |
| | - mEndedAmbiguous.addAll( ENDED_AMBIGUOUS ); |
| | - |
| | - // Remove ambiguous items if they are already declared. |
| | - mBeganAmbiguous.removeAll( mBeganUnambiguous ); |
| | - mEndedAmbiguous.removeAll( mEndedUnambiguous ); |
| | - |
| | - return new Contractions( this ); |
| | - } |
| | - |
| | - /** |
| | - * This returns the {@code fallback} {@link Set} if {@code src} is empty; |
| | - * otherwise, this returns the empty {@link Set}. |
| | - * |
| | - * @param src A set of contractions, possibly empty. |
| | - * @param fallback The default values to use if {@code src} is empty. |
| | - * @param <T> The type of data used by both {@link Set}s. |
| | - * @return An empty {@link Set} if the {@code src} contains at least one |
| | - * element; otherwise, this will return {@code fallback}. |
| | - */ |
| | - private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) { |
| | - assert src != null; |
| | - assert fallback != null; |
| | - return src.isEmpty() ? fallback : emptySet(); |
| | - } |
| | - } |
| | - |
| | - public boolean beganEndedUnambiguously( final String word ) { |
| | - assert word != null; |
| | - return getBeganEndedUnambiguous().contains( word ); |
| | - } |
| | - |
| | - /** |
| | - * Answers whether the given word is a contraction that always starts |
| | - * with an apostrophe. The comparison is case-insensitive. This must |
| | - * only be called when a straight quote is followed by a word. |
| | - * |
| | - * @param word The word to compare against the list of known unambiguous |
| | - * contractions. |
| | - * @return {@code true} when the given word is in the set of unambiguous |
| | - * contractions. |
| | - */ |
| | - public boolean beganUnambiguously( final String word ) { |
| | - assert word != null; |
| | - return getBeganUnambiguous().contains( word.toLowerCase() ); |
| | - } |
| | - |
| | - /** |
| | - * Answers whether the given word could be a contraction but is also a |
| | - * valid word in non-contracted form. |
| | - * |
| | - * @param word The word to compare against the list of known ambiguous |
| | - * contractions. |
| | - * @return {@code true} when the given word is in the set of ambiguous |
| | - * contractions. |
| | - */ |
| | - public boolean beganAmbiguously( final String word ) { |
| | - assert word != null; |
| | - return getBeganAmbiguous().contains( word.toLowerCase() ); |
| | - } |
| | - |
| | - public boolean endedUnambiguously( final String word ) { |
| | - assert word != null; |
| | - return getEndedUnambiguous().contains( word.toLowerCase() ); |
| | - } |
| | - |
| | - public boolean endedAmbiguously( final String word ) { |
| | - assert word != null; |
| | - final var check = word.toLowerCase(); |
| | - |
| | - // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet |
| | - // allow o' to match because 'a sentence can end with the letter o'. |
| | - return getEndedAmbiguous().contains( check ) || |
| | - check.endsWith( "s" ) || check.endsWith( "z" ) || |
| | - check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" )); |
| | - } |
| | - |
| | - private Set<String> getBeganEndedUnambiguous() { |
| | - return mBuilder.mBeganEndedUnambiguous; |
| | - } |
| | - |
| | - private Set<String> getBeganUnambiguous() { |
| | - return mBuilder.mBeganUnambiguous; |
| | - } |
| | - |
| | - private Set<String> getEndedUnambiguous() { |
| | - return mBuilder.mEndedUnambiguous; |
| | - } |
| | - |
| | - private Set<String> getBeganAmbiguous() { |
| | - return mBuilder.mBeganAmbiguous; |
| | - } |
| | - |
| | - private Set<String> getEndedAmbiguous() { |
| | - return mBuilder.mEndedAmbiguous; |
| | - } |
| | - |
| | - @Override |
| | - public String toString() { |
| | - return |
| | - toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) + |
| | - toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) + |
| | - toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) + |
| | - toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) + |
| | - toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ); |
| | - } |
| | - |
| | - private String toString( |
| | - final Set<String> words, |
| | - final String category, |
| | - final String fmt |
| | - ) { |
| | - final var sb = new StringBuilder( 16384 ); |
| | - final var newline = System.lineSeparator(); |
| | - final var list = new ArrayList<>( words ); |
| | - |
| | - sort( list ); |
| | - sb.append( format( "%n%s%n", category ) ); |
| | - list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) ); |
| | - |
| | - return sb.toString(); |
| | - } |
| | - |
| | - /** |
| | - * Words having a straight apostrophe at the beginning and end. |
| | - */ |
| | - private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of( |
| | - // hacking |
| | - "ackin", |
| | - // hammering |
| | - "ammerin", |
| | - // hankering |
| | - "ankerin", |
| | - // having |
| | - "avin", |
| | - // hawking |
| | - "awkin", |
| | - // excepting |
| | - "cepin", |
| | - // excepting |
| | - "ceppin", |
| | - // excepting |
| | - "ceptin", |
| | - // according |
| | - "cordin", |
| | - // heading |
| | - "eadin", |
| | - // leaving |
| | - "eavin", |
| | - // helping |
| | - "elpin", |
| | - // hindering |
| | - "inderin", |
| | - // electioneering |
| | - "lectioneerin", |
| | - // amazing |
| | - "mazin", |
| | - // remembering |
| | - "memberin", |
| | - // fish 'n' chips |
| | - "n", |
| | - // hobbling |
| | - "obblin", |
| | - // holding |
| | - "oldin", |
| | - // hollering |
| | - "ollerin", |
| | - // hopping |
| | - "oppin", |
| | - // housekeeping |
| | - "ousekeepin", |
| | - // howling |
| | - "owlin", |
| | - // excepting |
| | - "sceptin", |
| | - // expecting |
| | - "spectin", |
| | - // explaining |
| | - "splainin", |
| | - // supposing |
| | - "sposin", |
| | - "sputin", |
| | - // astonishing |
| | - "stonishin", |
| | - // destroying |
| | - "stroyin", |
| | - // persuading |
| | - "suadin", |
| | - "titivatin", |
| | - // introducing |
| | - "troducin", |
| | - // hugging |
| | - "uggin", |
| | - // hulking |
| | - "ulkin", |
| | - // humbugging |
| | - "umbuggin", |
| | - // humiliating |
| | - "umiliatin", |
| | - // humming |
| | - "ummin", |
| | - // humping |
| | - "umpin", |
| | - // hurrying |
| | - "urryin", |
| | - // hurting |
| | - "urtin", |
| | - // hustling |
| | - "ustlin", |
| | - // investigating |
| | - "vestigatin", |
| | - // inviting |
| | - "vitin", |
| | - // excepting |
| | - "xceptin", |
| | - // explaining |
| | - "xplainin", |
| | - // exploding |
| | - "xplodin" |
| | - ); |
| | - |
| | - /** |
| | - * Words having a straight apostrophe that cannot be mistaken for an |
| | - * opening single quote. |
| | - */ |
| | - private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of( |
| | - "aporth", |
| | - // about you |
| | - "boutcha", |
| | - // about you |
| | - "boutchu", |
| | - // about well |
| | - "boutwell", |
| | - // except |
| | - "cept", |
| | - // decided |
| | - "cided", |
| | - // because |
| | - "cos", |
| | - // armadillo |
| | - "dillo", |
| | - // themselves, |
| | - "emselves", |
| | - // affectionate |
| | - "fectionate", |
| | - // before |
| | - "fore", |
| | - // afraid |
| | - "fraid", |
| | - // ??? |
| | - "funder", |
| | - // against |
| | - "gainst", |
| | - // him |
| | - "im", |
| | - // little |
| | - "ittle", |
| | - // and |
| | - "n", |
| | - // beneath |
| | - "neath", |
| | - // another |
| | - "nother", |
| | - // another |
| | - "nudder", |
| | - // enough |
| | - "nuff", |
| | - // gonna |
| | - "onna", |
| | - "onna'", |
| | - // horse |
| | - "oss", |
| | - // horses |
| | - "osses", |
| | - // splash |
| | - "plash", |
| | - // upon |
| | - "pon", |
| | - // that's|is |
| | - "s", |
| | - "sblood", |
| | - // excuse |
| | - "scuse", |
| | - "sfar", |
| | - "sfoot", |
| | - // considered |
| | - "sidered", |
| | - // suspect|expect |
| | - "spect", |
| | - // suspects|expects |
| | - "spects", |
| | - // exploit |
| | - "sploit", |
| | - // exploits |
| | - "sploits", |
| | - // suppose |
| | - "spose", |
| | - // instead |
| | - "stead", |
| | - // it |
| | - "t", |
| | - "taint", |
| | - "tain", |
| | - "tay", |
| | - "til", |
| | - "tis", |
| | - "tish", |
| | - // it isn't |
| | - "tisn", |
| | - // stomach |
| | - "tomach", |
| | - // stormed |
| | - "tormed", |
| | - "tshall", |
| | - "twas", |
| | - "twasn", |
| | - "tween", |
| | - "twere", |
| | - "tweren", |
| | - "twixt", |
| | - "twon", |
| | - "twou", |
| | - "twould", |
| | - "twouldn", |
| | - // one |
| | - "un", |
| | - // have |
| | - "ve", |
| | - // exactly |
| | - "xactly" |
| | - ); |
| | - |
| | - /** |
| | - * Words having a straight apostrophe that may be either part of a |
| | - * contraction or a word that stands alone beside an opening single quote. |
| | - */ |
| | - private static final Set<String> BEGAN_AMBIGUOUS = Set.of( |
| | - // have |
| | - "a", |
| | - // about|boxing match |
| | - "bout", |
| | - // because|causal |
| | - "cause", |
| | - // what you|choo choo train |
| | - "choo", |
| | - // across|angry |
| | - "cross", |
| | - // he|e pluribus unum |
| | - "e", |
| | - // them|letter m|em-dash|Emily |
| | - "em", |
| | - // here|earlier |
| | - "ere", |
| | - // afro|to and fro |
| | - "fro", |
| | - // unfounded|founded |
| | - "founded", |
| | - // whore|ho ho! |
| | - "ho", |
| | - // will|sick |
| | - "ill", |
| | - // okay|letter K |
| | - "kay", |
| | - // molasses|women |
| | - "lasses", |
| | - // lo|lo and behold |
| | - "lo", |
| | - // leaned|enlisted |
| | - "listed", |
| | - // ??? |
| | - "nation", |
| | - // are|regarding |
| | - "re", |
| | - // circular|around |
| | - "round", |
| | - // what's up|to eat |
| | - "sup", |
| | - // cannot (tan't)|colour |
| | - "tan", |
| | - // still|turn soil |
| | - "till", |
| | - // it will|twill fabric |
| | - "twill", |
| | - // them|utterance |
| | - "um", |
| | - // away|direction |
| | - "way", |
| | - // is that|Iranian village |
| | - "zat" |
| | - ); |
| | - |
| | - private static final Set<String> ENDED_AMBIGUOUS = Set.of( |
| | - // pin|head dunk |
| | - "bobbin", |
| | - // give|martial arts garment |
| | - "gi", |
| | - // in|I |
| | - "i", |
| | - // of|letter o |
| | - "o" |
| | - ); |
| | - |
| | - private static final Set<String> ENDED_UNAMBIGUOUS = Set.of( |
| | - // and |
| | - "an", |
| | - // didn't |
| | - "didn", |
| | - // for/before |
| | - "fo", |
| | - // friend |
| | - "frien", |
| | - // just |
| | - "jes", |
| | - // just |
| | - "jus", |
| | - // lord |
| | - "lor", |
| | - // myself |
| | - "masel", |
| | - // old |
| | - "ol", |
| | - // San (Francisco) |
| | - "Sa", |
| | - // shift |
| | - "shif", |
| | - // the |
| | - "th", |
| | - // what |
| | - "wha", |
| | - // with |
| | - "wi", |
| | - // world |
| | - "worl", |
| | - // Top ~3800 common -ing words as English contractions. |
| | - "abandonin", |
| | - "abductin", |
| | - "abettin", |
| | - "abidin", |
| | - "abolishin", |
| | - "aboundin", |
| | - "absorbin", |
| | - "abstainin", |
| | - "abstractin", |
| | - "abusin", |
| | - "abuttin", |
| | - "acceleratin", |
| | - "acceptin", |
| | - "accessin", |
| | - "accommodatin", |
| | - "accompanyin", |
| | - "accomplishin", |
| | - "accordin", |
| | - "accountin", |
| | - "accruin", |
| | - "accumulatin", |
| | - "accusin", |
| | - "achievin", |
| | - "achin", |
| | - "acknowledgin", |
| | - "acquaintin", |
| | - "acquirin", |
| | - "actin", |
| | - "activatin", |
| | - "adaptin", |
| | - "addin", |
| | - "addressin", |
| | - "adherin", |
| | - "adjoinin", |
| | - "adjustin", |
| | - "administerin", |
| | - "admirin", |
| | - "admittin", |
| | - "admonishin", |
| | - "adoin", |
| | - "adoptin", |
| | - "adorin", |
| | - "advancin", |
| | - "advertisin", |
| | - "advisin", |
| | - "advocatin", |
| | - "affectin", |
| | - "affirmin", |
| | - "afflictin", |
| | - "affordin", |
| | - "afishin", |
| | - "aggravatin", |
| | - "agitatin", |
| | - "agoin", |
| | - "agonizin", |
| | - "agreein", |
| | - "ailin", |
| | - "aimin", |
| | - "ain", |
| | - "airin", |
| | - "alarmin", |
| | - "alayin", |
| | - "alertin", |
| | - "alienatin", |
| | - "alightin", |
| | - "alignin", |
| | - "allegin", |
| | - "alleviatin", |
| | - "allocatin", |
| | - "allowin", |
| | - "alludin", |
| | - "allurin", |
| | - "alookin", |
| | - "alterin", |
| | - "alternatin", |
| | - "alyin", |
| | - "amassin", |
| | - "amazin", |
| | - "amblin", |
| | - "amendin", |
| | - "amountin", |
| | - "amplifyin", |
| | - "amusin", |
| | - "analysin", |
| | - "analyzin", |
| | - "anchorin", |
| | - "anglin", |
| | - "animatin", |
| | - "annealin", |
| | - "annihilatin", |
| | - "announcin", |
| | - "annoyin", |
| | - "anointin", |
| | - "answerin", |
| | - "anticipatin", |
| | - "anythin", |
| | - "apologizin", |
| | - "appallin", |
| | - "appealin", |
| | - "appearin", |
| | - "appertainin", |
| | - "appetizin", |
| | - "applaudin", |
| | - "applyin", |
| | - "appointin", |
| | - "appraisin", |
| | - "appreciatin", |
| | - "apprehendin", |
| | - "approachin", |
| | - "appropriatin", |
| | - "approvin", |
| | - "approximatin", |
| | - "archin", |
| | - "archivin", |
| | - "arguin", |
| | - "arisin", |
| | - "arousin", |
| | - "arrangin", |
| | - "arrestin", |
| | - "arrivin", |
| | - "articulatin", |
| | - "ascendin", |
| | - "ascertainin", |
| | - "ascribin", |
| | - "askin", |
| | - "assailin", |
| | - "assaultin", |
| | - "assemblin", |
| | - "assertin", |
| | - "assessin", |
| | - "assignin", |
| | - "assimilatin", |
| | - "assistin", |
| | - "associatin", |
| | - "assumin", |
| | - "assurin", |
| | - "astonishin", |
| | - "astoundin", |
| | - "atonin", |
| | - "attachin", |
| | - "attackin", |
| | - "attainin", |
| | - "attemptin", |
| | - "attendin", |
| | - "attestin", |
| | - "attractin", |
| | - "attributin", |
| | - "auditin", |
| | - "augmentin", |
| | - "authorin", |
| | - "authorisin", |
| | - "authorizin", |
| | - "availin", |
| | - "avengin", |
| | - "averagin", |
| | - "avertin", |
| | - "avoidin", |
| | - "awaitin", |
| | - "awakenin", |
| | - "awakin", |
| | - "awardin", |
| | - "awin", |
| | - "awnin", |
| | - "awonderin", |
| | - "axin", |
| | - "babblin", |
| | - "babysittin", |
| | - "backin", |
| | - "backslidin", |
| | - "backtrackin", |
| | - "badgerin", |
| | - "bafflin", |
| | - "baggin", |
| | - "bailin", |
| | - "baitin", |
| | - "bakin", |
| | - "balancin", |
| | - "baldin", |
| | - "balloonin", |
| | - "ballotin", |
| | - "bandagin", |
| | - "bandin", |
| | - "bangin", |
| | - "banishin", |
| | - "bankin", |
| | - "bannin", |
| | - "banquetin", |
| | - "banterin", |
| | - "baptizin", |
| | - "bargainin", |
| | - "barin", |
| | - "barkin", |
| | - "barrin", |
| | - "barterin", |
| | - "bashin", |
| | - "baskin", |
| | - "bastin", |
| | - "bathin", |
| | - "batterin", |
| | - "battin", |
| | - "battlin", |
| | - "bawlin", |
| | - "bayin", |
| | - "beadin", |
| | - "beamin", |
| | - "bearin", |
| | - "beatin", |
| | - "beautifyin", |
| | - "beckonin", |
| | - "becomin", |
| | - "beddin", |
| | +public class Contractions { |
| | + |
| | + private final Builder mBuilder; |
| | + |
| | + private Contractions( final Builder builder ) { |
| | + assert builder != null; |
| | + mBuilder = builder; |
| | + } |
| | + |
| | + /** |
| | + * Allows constructing a list of custom contractions. |
| | + */ |
| | + @SuppressWarnings( "unused" ) |
| | + public static class Builder { |
| | + private final Set<String> mBeganEndedUnambiguous = new HashSet<>(); |
| | + private final Set<String> mBeganUnambiguous = new HashSet<>(); |
| | + private final Set<String> mEndedUnambiguous = new HashSet<>(); |
| | + private final Set<String> mBeganAmbiguous = new HashSet<>(); |
| | + private final Set<String> mEndedAmbiguous = new HashSet<>(); |
| | + |
| | + public Builder withBeganEndedUnambiguous( final List<String> words ) { |
| | + mBeganEndedUnambiguous.addAll( words ); |
| | + return this; |
| | + } |
| | + |
| | + public Builder withBeganUnambiguous( final List<String> words ) { |
| | + mBeganUnambiguous.addAll( words ); |
| | + return this; |
| | + } |
| | + |
| | + public Builder withEndedUnambiguous( final List<String> words ) { |
| | + mEndedUnambiguous.addAll( words ); |
| | + return this; |
| | + } |
| | + |
| | + public Builder withBeganAmbiguous( final List<String> words ) { |
| | + mBeganAmbiguous.addAll( words ); |
| | + return this; |
| | + } |
| | + |
| | + public Builder withEndedAmbiguous( final List<String> words ) { |
| | + mEndedAmbiguous.addAll( words ); |
| | + return this; |
| | + } |
| | + |
| | + /** |
| | + * Constructs a new set of {@link Contractions} that can be configured |
| | + * using this {@link Builder} instance. |
| | + * |
| | + * @return {@link Contractions} suitable for use with parsing text. |
| | + */ |
| | + public Contractions build() { |
| | + mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS ); |
| | + mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS ); |
| | + mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS ); |
| | + mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS ); |
| | + mEndedAmbiguous.addAll( ENDED_AMBIGUOUS ); |
| | + |
| | + // Remove ambiguous items if they are already declared. |
| | + mBeganAmbiguous.removeAll( mBeganUnambiguous ); |
| | + mEndedAmbiguous.removeAll( mEndedUnambiguous ); |
| | + |
| | + return new Contractions( this ); |
| | + } |
| | + |
| | + /** |
| | + * This returns the {@code fallback} {@link Set} if {@code src} is empty; |
| | + * otherwise, this returns the empty {@link Set}. |
| | + * |
| | + * @param src A set of contractions, possibly empty. |
| | + * @param fallback The default values to use if {@code src} is empty. |
| | + * @param <T> The type of data used by both {@link Set}s. |
| | + * @return An empty {@link Set} if the {@code src} contains at least one |
| | + * element; otherwise, this will return {@code fallback}. |
| | + */ |
| | + private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) { |
| | + assert src != null; |
| | + assert fallback != null; |
| | + return src.isEmpty() ? fallback : emptySet(); |
| | + } |
| | + } |
| | + |
| | + public boolean beganEndedUnambiguously( final String word ) { |
| | + assert word != null; |
| | + return getBeganEndedUnambiguous().contains( word ); |
| | + } |
| | + |
| | + /** |
| | + * Answers whether the given word is a contraction that always starts |
| | + * with an apostrophe. The comparison is case-insensitive. This must |
| | + * only be called when a straight quote is followed by a word. |
| | + * |
| | + * @param word The word to compare against the list of known unambiguous |
| | + * contractions. |
| | + * @return {@code true} when the given word is in the set of unambiguous |
| | + * contractions. |
| | + */ |
| | + public boolean beganUnambiguously( final String word ) { |
| | + assert word != null; |
| | + return getBeganUnambiguous().contains( word.toLowerCase() ); |
| | + } |
| | + |
| | + /** |
| | + * Answers whether the given word could be a contraction but is also a |
| | + * valid word in non-contracted form. |
| | + * |
| | + * @param word The word to compare against the list of known ambiguous |
| | + * contractions. |
| | + * @return {@code true} when the given word is in the set of ambiguous |
| | + * contractions. |
| | + */ |
| | + public boolean beganAmbiguously( final String word ) { |
| | + assert word != null; |
| | + return getBeganAmbiguous().contains( word.toLowerCase() ); |
| | + } |
| | + |
| | + public boolean endedUnambiguously( final String word ) { |
| | + assert word != null; |
| | + return getEndedUnambiguous().contains( word.toLowerCase() ); |
| | + } |
| | + |
| | + public boolean endedAmbiguously( final String word ) { |
| | + assert word != null; |
| | + final var check = word.toLowerCase(); |
| | + |
| | + // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet |
| | + // allow o' to match because 'a sentence can end with the letter o'. |
| | + return getEndedAmbiguous().contains( check ) || |
| | + check.endsWith( "s" ) || check.endsWith( "z" ) || |
| | + check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" )); |
| | + } |
| | + |
| | + private Set<String> getBeganEndedUnambiguous() { |
| | + return mBuilder.mBeganEndedUnambiguous; |
| | + } |
| | + |
| | + private Set<String> getBeganUnambiguous() { |
| | + return mBuilder.mBeganUnambiguous; |
| | + } |
| | + |
| | + private Set<String> getEndedUnambiguous() { |
| | + return mBuilder.mEndedUnambiguous; |
| | + } |
| | + |
| | + private Set<String> getBeganAmbiguous() { |
| | + return mBuilder.mBeganAmbiguous; |
| | + } |
| | + |
| | + private Set<String> getEndedAmbiguous() { |
| | + return mBuilder.mEndedAmbiguous; |
| | + } |
| | + |
| | + @Override |
| | + public String toString() { |
| | + return |
| | + toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) + |
| | + toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) + |
| | + toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) + |
| | + toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) + |
| | + toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ); |
| | + } |
| | + |
| | + private String toString( |
| | + final Set<String> words, |
| | + final String category, |
| | + final String fmt |
| | + ) { |
| | + final var sb = new StringBuilder( 16384 ); |
| | + final var newline = System.lineSeparator(); |
| | + final var list = new ArrayList<>( words ); |
| | + |
| | + sort( list ); |
| | + sb.append( format( "%n%s%n", category ) ); |
| | + list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) ); |
| | + |
| | + return sb.toString(); |
| | + } |
| | + |
| | + /** |
| | + * Words having a straight apostrophe at the beginning and end. |
| | + */ |
| | + private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of( |
| | + // hacking |
| | + "ackin", |
| | + // hammering |
| | + "ammerin", |
| | + // hankering |
| | + "ankerin", |
| | + // having |
| | + "avin", |
| | + // hawking |
| | + "awkin", |
| | + // excepting |
| | + "cepin", |
| | + // excepting |
| | + "ceppin", |
| | + // excepting |
| | + "ceptin", |
| | + // according |
| | + "cordin", |
| | + // heading |
| | + "eadin", |
| | + // leaving |
| | + "eavin", |
| | + // helping |
| | + "elpin", |
| | + // hindering |
| | + "inderin", |
| | + // electioneering |
| | + "lectioneerin", |
| | + // amazing |
| | + "mazin", |
| | + // remembering |
| | + "memberin", |
| | + // fish 'n' chips |
| | + "n", |
| | + // hobbling |
| | + "obblin", |
| | + // holding |
| | + "oldin", |
| | + // hollering |
| | + "ollerin", |
| | + // hopping |
| | + "oppin", |
| | + // housekeeping |
| | + "ousekeepin", |
| | + // howling |
| | + "owlin", |
| | + // excepting |
| | + "sceptin", |
| | + // expecting |
| | + "spectin", |
| | + // explaining |
| | + "splainin", |
| | + // supposing |
| | + "sposin", |
| | + "sputin", |
| | + // astonishing |
| | + "stonishin", |
| | + // destroying |
| | + "stroyin", |
| | + // persuading |
| | + "suadin", |
| | + "titivatin", |
| | + // introducing |
| | + "troducin", |
| | + // hugging |
| | + "uggin", |
| | + // hulking |
| | + "ulkin", |
| | + // humbugging |
| | + "umbuggin", |
| | + // humiliating |
| | + "umiliatin", |
| | + // humming |
| | + "ummin", |
| | + // humping |
| | + "umpin", |
| | + // hurrying |
| | + "urryin", |
| | + // hurting |
| | + "urtin", |
| | + // hustling |
| | + "ustlin", |
| | + // investigating |
| | + "vestigatin", |
| | + // inviting |
| | + "vitin", |
| | + // excepting |
| | + "xceptin", |
| | + // explaining |
| | + "xplainin", |
| | + // exploding |
| | + "xplodin" |
| | + ); |
| | + |
| | + /** |
| | + * Words having a straight apostrophe that cannot be mistaken for an |
| | + * opening single quote. |
| | + */ |
| | + private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of( |
| | + "aporth", |
| | + // about you |
| | + "boutcha", |
| | + // about you |
| | + "boutchu", |
| | + // about well |
| | + "boutwell", |
| | + // except |
| | + "cept", |
| | + // decided |
| | + "cided", |
| | + // because |
| | + "cos", |
| | + // armadillo |
| | + "dillo", |
| | + // themselves, |
| | + "emselves", |
| | + // affectionate |
| | + "fectionate", |
| | + // before |
| | + "fore", |
| | + // afraid |
| | + "fraid", |
| | + // ??? |
| | + "funder", |
| | + // against |
| | + "gainst", |
| | + // him |
| | + "im", |
| | + // little |
| | + "ittle", |
| | + // and |
| | + "n", |
| | + // beneath |
| | + "neath", |
| | + // another |
| | + "nother", |
| | + // another |
| | + "nudder", |
| | + // enough |
| | + "nuff", |
| | + // gonna |
| | + "onna", |
| | + "onna'", |
| | + // horse |
| | + "oss", |
| | + // horses |
| | + "osses", |
| | + // splash |
| | + "plash", |
| | + // upon |
| | + "pon", |
| | + // that's|is |
| | + "s", |
| | + "sblood", |
| | + // excuse |
| | + "scuse", |
| | + "sfar", |
| | + "sfoot", |
| | + // considered |
| | + "sidered", |
| | + // suspect|expect |
| | + "spect", |
| | + // suspects|expects |
| | + "spects", |
| | + // exploit |
| | + "sploit", |
| | + // exploits |
| | + "sploits", |
| | + // suppose |
| | + "spose", |
| | + // instead |
| | + "stead", |
| | + // it |
| | + "t", |
| | + "taint", |
| | + "tain", |
| | + "tay", |
| | + "til", |
| | + "tis", |
| | + "tish", |
| | + // it isn't |
| | + "tisn", |
| | + // stomach |
| | + "tomach", |
| | + // stormed |
| | + "tormed", |
| | + "tshall", |
| | + "twas", |
| | + "twasn", |
| | + "tween", |
| | + "twere", |
| | + "tweren", |
| | + "twixt", |
| | + "twon", |
| | + "twou", |
| | + "twould", |
| | + "twouldn", |
| | + // one |
| | + "un", |
| | + // have |
| | + "ve", |
| | + // exactly |
| | + "xactly" |
| | + ); |
| | + |
| | + /** |
| | + * Words having a straight apostrophe that may be either part of a |
| | + * contraction or a word that stands alone beside an opening single quote. |
| | + */ |
| | + private static final Set<String> BEGAN_AMBIGUOUS = Set.of( |
| | + // have |
| | + "a", |
| | + // about|boxing match |
| | + "bout", |
| | + // because|causal |
| | + "cause", |
| | + // what you|choo-choo train |
| | + "choo", |
| | + // across|angry |
| | + "cross", |
| | + // he|e pluribus unum |
| | + "e", |
| | + // them|letter m|em-dash|Emily |
| | + "em", |
| | + // here|earlier |
| | + "ere", |
| | + // afro|to and fro |
| | + "fro", |
| | + // unfounded|founded |
| | + "founded", |
| | + // whore|ho ho! |
| | + "ho", |
| | + // will|sick |
| | + "ill", |
| | + // okay|letter K |
| | + "kay", |
| | + // molasses|women |
| | + "lasses", |
| | + // lo|lo and behold |
| | + "lo", |
| | + // leaned|enlisted |
| | + "listed", |
| | + // ??? |
| | + "nation", |
| | + // are|regarding |
| | + "re", |
| | + // circular|around |
| | + "round", |
| | + // what's up|to eat |
| | + "sup", |
| | + // cannot (tan't)|colour |
| | + "tan", |
| | + // still|turn soil |
| | + "till", |
| | + // it will|twill fabric |
| | + "twill", |
| | + // them|utterance |
| | + "um", |
| | + // away|direction |
| | + "way", |
| | + // is that|Iranian village |
| | + "zat" |
| | + ); |
| | + |
| | + private static final Set<String> ENDED_AMBIGUOUS = Set.of( |
| | + // pin|head dunk |
| | + "bobbin", |
| | + // give|martial arts garment |
| | + "gi", |
| | + // in|I |
| | + "i", |
| | + // of|letter o |
| | + "o" |
| | + ); |
| | + |
| | + private static final Set<String> ENDED_UNAMBIGUOUS = Set.of( |
| | + // and |
| | + "an", |
| | + // didn't |
| | + "didn", |
| | + // for/before |
| | + "fo", |
| | + // friend |
| | + "frien", |
| | + // just |
| | + "jes", |
| | + // just |
| | + "jus", |
| | + // lord |
| | + "lor", |
| | + // myself |
| | + "masel", |
| | + // old |
| | + "ol", |
| | + // San (Francisco) |
| | + "Sa", |
| | + // shift |
| | + "shif", |
| | + // the |
| | + "th", |
| | + // what |
| | + "wha", |
| | + // with |
| | + "wi", |
| | + // world |
| | + "worl", |
| | + // Top ~3800 common -ing words as English contractions. |
| | + "abandonin", |
| | + "abductin", |
| | + "abettin", |
| | + "abidin", |
| | + "abolishin", |
| | + "aboundin", |
| | + "absorbin", |
| | + "abstainin", |
| | + "abstractin", |
| | + "abusin", |
| | + "abuttin", |
| | + "acceleratin", |
| | + "acceptin", |
| | + "accessin", |
| | + "accommodatin", |
| | + "accompanyin", |
| | + "accomplishin", |
| | + "accordin", |
| | + "accountin", |
| | + "accruin", |
| | + "accumulatin", |
| | + "accusin", |
| | + "achievin", |
| | + "achin", |
| | + "acknowledgin", |
| | + "acquaintin", |
| | + "acquirin", |
| | + "actin", |
| | + "activatin", |
| | + "adaptin", |
| | + "addin", |
| | + "addressin", |
| | + "adherin", |
| | + "adjoinin", |
| | + "adjustin", |
| | + "administerin", |
| | + "admirin", |
| | + "admittin", |
| | + "admonishin", |
| | + "adoin", |
| | + "adoptin", |
| | + "adorin", |
| | + "advancin", |
| | + "advertisin", |
| | + "advisin", |
| | + "advocatin", |
| | + "affectin", |
| | + "affirmin", |
| | + "afflictin", |
| | + "affordin", |
| | + "afishin", |
| | + "aggravatin", |
| | + "agitatin", |
| | + "agoin", |
| | + "agonizin", |
| | + "agreein", |
| | + "ailin", |
| | + "aimin", |
| | + "ain", |
| | + "airin", |
| | + "alarmin", |
| | + "alayin", |
| | + "alertin", |
| | + "alienatin", |
| | + "alightin", |
| | + "alignin", |
| | + "allegin", |
| | + "alleviatin", |
| | + "allocatin", |
| | + "allowin", |
| | + "alludin", |
| | + "allurin", |
| | + "alookin", |
| | + "alterin", |
| | + "alternatin", |
| | + "alyin", |
| | + "amassin", |
| | + "amazin", |
| | + "amblin", |
| | + "amendin", |
| | + "amountin", |
| | + "amplifyin", |
| | + "amusin", |
| | + "analysin", |
| | + "analyzin", |
| | + "anchorin", |
| | + "anglin", |
| | + "animatin", |
| | + "annealin", |
| | + "annihilatin", |
| | + "announcin", |
| | + "annoyin", |
| | + "anointin", |
| | + "answerin", |
| | + "anticipatin", |
| | + "anythin", |
| | + "apologizin", |
| | + "appallin", |
| | + "appealin", |
| | + "appearin", |
| | + "appertainin", |
| | + "appetizin", |
| | + "applaudin", |
| | + "applyin", |
| | + "appointin", |
| | + "appraisin", |
| | + "appreciatin", |
| | + "apprehendin", |
| | + "approachin", |
| | + "appropriatin", |
| | + "approvin", |
| | + "approximatin", |
| | + "archin", |
| | + "archivin", |
| | + "arguin", |
| | + "arisin", |
| | + "arousin", |
| | + "arrangin", |
| | + "arrestin", |
| | + "arrivin", |
| | + "articulatin", |
| | + "ascendin", |
| | + "ascertainin", |
| | + "ascribin", |
| | + "askin", |
| | + "assailin", |
| | + "assaultin", |
| | + "assemblin", |
| | + "assertin", |
| | + "assessin", |
| | + "assignin", |
| | + "assimilatin", |
| | + "assistin", |
| | + "associatin", |
| | + "assumin", |
| | + "assurin", |
| | + "astonishin", |
| | + "astoundin", |
| | + "atonin", |
| | + "attachin", |
| | + "attackin", |
| | + "attainin", |
| | + "attemptin", |
| | + "attendin", |
| | + "attestin", |
| | + "attractin", |
| | + "attributin", |
| | + "auditin", |
| | + "augmentin", |
| | + "authorin", |
| | + "authorisin", |
| | + "authorizin", |
| | + "availin", |
| | + "avengin", |
| | + "averagin", |
| | + "avertin", |
| | + "avoidin", |
| | + "awaitin", |
| | + "awakenin", |
| | + "awakin", |
| | + "awardin", |
| | + "awin", |
| | + "awnin", |
| | + "awonderin", |
| | + "axin", |
| | + "babblin", |
| | + "babysittin", |
| | + "backin", |
| | + "backslidin", |
| | + "backtrackin", |
| | + "badgerin", |
| | + "bafflin", |
| | + "baggin", |
| | + "bailin", |
| | + "baitin", |
| | + "bakin", |
| | + "balancin", |
| | + "baldin", |
| | + "balloonin", |
| | + "ballotin", |
| | + "bandagin", |
| | + "bandin", |
| | + "bangin", |
| | + "banishin", |
| | + "bankin", |
| | + "bannin", |
| | + "banquetin", |
| | + "banterin", |
| | + "baptizin", |
| | + "bargainin", |
| | + "barin", |
| | + "barkin", |
| | + "barrin", |
| | + "barterin", |
| | + "bashin", |
| | + "baskin", |
| | + "bastin", |
| | + "bathin", |
| | + "batterin", |
| | + "battin", |
| | + "battlin", |
| | + "bawlin", |
| | + "bayin", |
| | + "beadin", |
| | + "beamin", |
| | + "bearin", |
| | + "beatin", |
| | + "beautifyin", |
| | + "beckonin", |
| | + "becomin", |
| | + "beddin", |
| | + "bedswervin", |
| | "beefin", |
| | "beepin", |