| 7 | 7 | Building the software requires the following third-party programs: |
| 8 | 8 | |
| 9 | * [git](https://git-scm.com) |
|
| 10 | * [OpenJDK 19](https://bell-sw.com/pages/downloads) |
|
| 11 | * [Gradle 8.0.1](https://gradle.org) |
|
| 9 | * [git 2.44.0](https://git-scm.com) |
|
| 10 | * [OpenJDK 21](https://bell-sw.com/pages/downloads) |
|
| 11 | * [Gradle 8.7](https://gradle.org) |
|
| 12 | 12 | |
| 13 | 13 | ## Application |
| 14 | 14 | |
| 15 | 15 | Build the application as follows: |
| 16 | 16 | |
| 17 | git clone https://github.com/DaveJarvis/keenquotes.git |
|
| 17 | git clone https://gitlab.com/DaveJarvis/KeenQuotes.git |
|
| 18 | 18 | cd keenquotes |
| 19 | 19 | gradle clean build |
| ... | ||
| 32 | 32 | |
| 33 | 33 | build/lib/keenquotes.jar |
| 34 | ||
| 35 | 34 | |
| 9 | 9 | # Requirements |
| 10 | 10 | |
| 11 | Download and install JRE 20: |
|
| 11 | Download and install JRE: |
|
| 12 | 12 | |
| 13 | * [Java 20](https://bell-sw.com/pages/downloads) or newer. |
|
| 13 | * [Java 21](https://bell-sw.com/pages/downloads). |
|
| 14 | 14 | |
| 15 | 15 | # Download |
| 12 | 12 | |
| 13 | 13 | dependencies { |
| 14 | def v_junit = '5.10.2' |
|
| 15 | def v_picocli = '4.7.5' |
|
| 16 | ||
| 14 | 17 | // Command-line parsing |
| 15 | implementation 'info.picocli:picocli:4.7.5' |
|
| 18 | implementation "info.picocli:picocli:${v_picocli}" |
|
| 16 | 19 | |
| 17 | testImplementation 'org.junit.jupiter:junit-jupiter-api:5.10.1' |
|
| 18 | testImplementation 'org.junit.jupiter:junit-jupiter-params:5.10.1' |
|
| 19 | testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.10.1' |
|
| 20 | testImplementation "org.junit.jupiter:junit-jupiter-api:${v_junit}" |
|
| 21 | testImplementation "org.junit.jupiter:junit-jupiter-params:${v_junit}" |
|
| 22 | testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${v_junit}" |
|
| 20 | 23 | } |
| 21 | 24 | |
| ... | ||
| 30 | 33 | compileJava { |
| 31 | 34 | options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation" |
| 32 | options.compilerArgs.addAll(['--release', '21']) |
|
| 35 | options.compilerArgs.addAll( ['--release', '21'] ) |
|
| 33 | 36 | } |
| 34 | 37 | |
| ... | ||
| 41 | 44 | |
| 42 | 45 | def resourceDir = sourceSets.main.resources.srcDirs[0] |
| 43 | final File propertiesFile = file("${resourceDir}/com/whitemagicsoftware/${applicationName}/app/version.properties") |
|
| 44 | propertiesFile.write("application.version=${version}") |
|
| 46 | final File propertiesFile = file( "${resourceDir}/com/whitemagicsoftware/${applicationName}/app/version.properties" ) |
|
| 47 | propertiesFile.write( "application.version=${version}" ) |
|
| 45 | 48 | |
| 46 | 49 | jar { |
| 47 | 50 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE |
| 48 | 51 | |
| 49 | 52 | manifest { |
| 50 | 53 | attributes 'Main-Class': mainClassName |
| 51 | 54 | } |
| 52 | 55 | |
| 53 | 56 | from { |
| 54 | (configurations.runtimeClasspath.findAll { !it.path.endsWith(".pom") }).collect { |
|
| 55 | it.isDirectory() ? it : zipTree(it) |
|
| 57 | (configurations.runtimeClasspath.findAll { !it.path.endsWith( ".pom" ) }).collect { |
|
| 58 | it.isDirectory() ? it : zipTree( it ) |
|
| 56 | 59 | } |
| 57 | 60 | } |
| ... | ||
| 64 | 67 | tasks.register( 'lib', Jar ) { |
| 65 | 68 | archiveFileName = "${applicationName}.jar" |
| 66 | destinationDirectory = file( "$buildDir/lib" ) |
|
| 69 | destinationDirectory = layout.buildDirectory.dir( "lib" ) |
|
| 67 | 70 | |
| 68 | from "$buildDir/classes/java/main" |
|
| 71 | from layout.buildDirectory.dir( "classes/java/main" ) |
|
| 69 | 72 | exclude "**/KeenQuotes.class" |
| 70 | 73 | } |
| 71 | 74 | |
| 72 | 75 | lib.configure { |
| 73 | 76 | dependsOn compileJava |
| 74 | 77 | } |
| 75 | 78 | |
| 76 | tasks.named('test') { |
|
| 79 | tasks.named( 'test' ) { |
|
| 77 | 80 | useJUnitPlatform() |
| 78 | 81 | } |
| 14 | 14 | * Placeholder for various types of contractions. |
| 15 | 15 | */ |
| 16 | public class Contractions { |
|
| 17 | ||
| 18 | private final Builder mBuilder; |
|
| 19 | ||
| 20 | private Contractions( final Builder builder ) { |
|
| 21 | assert builder != null; |
|
| 22 | mBuilder = builder; |
|
| 23 | } |
|
| 24 | ||
| 25 | /** |
|
| 26 | * Allows constructing a list of custom contractions. |
|
| 27 | */ |
|
| 28 | @SuppressWarnings( "unused" ) |
|
| 29 | public static class Builder { |
|
| 30 | private final Set<String> mBeganEndedUnambiguous = new HashSet<>(); |
|
| 31 | private final Set<String> mBeganUnambiguous = new HashSet<>(); |
|
| 32 | private final Set<String> mEndedUnambiguous = new HashSet<>(); |
|
| 33 | private final Set<String> mBeganAmbiguous = new HashSet<>(); |
|
| 34 | private final Set<String> mEndedAmbiguous = new HashSet<>(); |
|
| 35 | ||
| 36 | public Builder withBeganEndedUnambiguous( final List<String> words ) { |
|
| 37 | mBeganEndedUnambiguous.addAll( words ); |
|
| 38 | return this; |
|
| 39 | } |
|
| 40 | ||
| 41 | public Builder withBeganUnambiguous( final List<String> words ) { |
|
| 42 | mBeganUnambiguous.addAll( words ); |
|
| 43 | return this; |
|
| 44 | } |
|
| 45 | ||
| 46 | public Builder withEndedUnambiguous( final List<String> words ) { |
|
| 47 | mEndedUnambiguous.addAll( words ); |
|
| 48 | return this; |
|
| 49 | } |
|
| 50 | ||
| 51 | public Builder withBeganAmbiguous( final List<String> words ) { |
|
| 52 | mBeganAmbiguous.addAll( words ); |
|
| 53 | return this; |
|
| 54 | } |
|
| 55 | ||
| 56 | public Builder withEndedAmbiguous( final List<String> words ) { |
|
| 57 | mEndedAmbiguous.addAll( words ); |
|
| 58 | return this; |
|
| 59 | } |
|
| 60 | ||
| 61 | /** |
|
| 62 | * Constructs a new set of {@link Contractions} that can be configured |
|
| 63 | * using this {@link Builder} instance. |
|
| 64 | * |
|
| 65 | * @return {@link Contractions} suitable for use with parsing text. |
|
| 66 | */ |
|
| 67 | public Contractions build() { |
|
| 68 | mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS ); |
|
| 69 | mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS ); |
|
| 70 | mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS ); |
|
| 71 | mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS ); |
|
| 72 | mEndedAmbiguous.addAll( ENDED_AMBIGUOUS ); |
|
| 73 | ||
| 74 | // Remove ambiguous items if they are already declared. |
|
| 75 | mBeganAmbiguous.removeAll( mBeganUnambiguous ); |
|
| 76 | mEndedAmbiguous.removeAll( mEndedUnambiguous ); |
|
| 77 | ||
| 78 | return new Contractions( this ); |
|
| 79 | } |
|
| 80 | ||
| 81 | /** |
|
| 82 | * This returns the {@code fallback} {@link Set} if {@code src} is empty; |
|
| 83 | * otherwise, this returns the empty {@link Set}. |
|
| 84 | * |
|
| 85 | * @param src A set of contractions, possibly empty. |
|
| 86 | * @param fallback The default values to use if {@code src} is empty. |
|
| 87 | * @param <T> The type of data used by both {@link Set}s. |
|
| 88 | * @return An empty {@link Set} if the {@code src} contains at least one |
|
| 89 | * element; otherwise, this will return {@code fallback}. |
|
| 90 | */ |
|
| 91 | private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) { |
|
| 92 | assert src != null; |
|
| 93 | assert fallback != null; |
|
| 94 | return src.isEmpty() ? fallback : emptySet(); |
|
| 95 | } |
|
| 96 | } |
|
| 97 | ||
| 98 | public boolean beganEndedUnambiguously( final String word ) { |
|
| 99 | assert word != null; |
|
| 100 | return getBeganEndedUnambiguous().contains( word ); |
|
| 101 | } |
|
| 102 | ||
| 103 | /** |
|
| 104 | * Answers whether the given word is a contraction that always starts |
|
| 105 | * with an apostrophe. The comparison is case-insensitive. This must |
|
| 106 | * only be called when a straight quote is followed by a word. |
|
| 107 | * |
|
| 108 | * @param word The word to compare against the list of known unambiguous |
|
| 109 | * contractions. |
|
| 110 | * @return {@code true} when the given word is in the set of unambiguous |
|
| 111 | * contractions. |
|
| 112 | */ |
|
| 113 | public boolean beganUnambiguously( final String word ) { |
|
| 114 | assert word != null; |
|
| 115 | return getBeganUnambiguous().contains( word.toLowerCase() ); |
|
| 116 | } |
|
| 117 | ||
| 118 | /** |
|
| 119 | * Answers whether the given word could be a contraction but is also a |
|
| 120 | * valid word in non-contracted form. |
|
| 121 | * |
|
| 122 | * @param word The word to compare against the list of known ambiguous |
|
| 123 | * contractions. |
|
| 124 | * @return {@code true} when the given word is in the set of ambiguous |
|
| 125 | * contractions. |
|
| 126 | */ |
|
| 127 | public boolean beganAmbiguously( final String word ) { |
|
| 128 | assert word != null; |
|
| 129 | return getBeganAmbiguous().contains( word.toLowerCase() ); |
|
| 130 | } |
|
| 131 | ||
| 132 | public boolean endedUnambiguously( final String word ) { |
|
| 133 | assert word != null; |
|
| 134 | return getEndedUnambiguous().contains( word.toLowerCase() ); |
|
| 135 | } |
|
| 136 | ||
| 137 | public boolean endedAmbiguously( final String word ) { |
|
| 138 | assert word != null; |
|
| 139 | final var check = word.toLowerCase(); |
|
| 140 | ||
| 141 | // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet |
|
| 142 | // allow o' to match because 'a sentence can end with the letter o'. |
|
| 143 | return getEndedAmbiguous().contains( check ) || |
|
| 144 | check.endsWith( "s" ) || check.endsWith( "z" ) || |
|
| 145 | check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" )); |
|
| 146 | } |
|
| 147 | ||
| 148 | private Set<String> getBeganEndedUnambiguous() { |
|
| 149 | return mBuilder.mBeganEndedUnambiguous; |
|
| 150 | } |
|
| 151 | ||
| 152 | private Set<String> getBeganUnambiguous() { |
|
| 153 | return mBuilder.mBeganUnambiguous; |
|
| 154 | } |
|
| 155 | ||
| 156 | private Set<String> getEndedUnambiguous() { |
|
| 157 | return mBuilder.mEndedUnambiguous; |
|
| 158 | } |
|
| 159 | ||
| 160 | private Set<String> getBeganAmbiguous() { |
|
| 161 | return mBuilder.mBeganAmbiguous; |
|
| 162 | } |
|
| 163 | ||
| 164 | private Set<String> getEndedAmbiguous() { |
|
| 165 | return mBuilder.mEndedAmbiguous; |
|
| 166 | } |
|
| 167 | ||
| 168 | @Override |
|
| 169 | public String toString() { |
|
| 170 | return |
|
| 171 | toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) + |
|
| 172 | toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) + |
|
| 173 | toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) + |
|
| 174 | toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) + |
|
| 175 | toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ); |
|
| 176 | } |
|
| 177 | ||
| 178 | private String toString( |
|
| 179 | final Set<String> words, |
|
| 180 | final String category, |
|
| 181 | final String fmt |
|
| 182 | ) { |
|
| 183 | final var sb = new StringBuilder( 16384 ); |
|
| 184 | final var newline = System.lineSeparator(); |
|
| 185 | final var list = new ArrayList<>( words ); |
|
| 186 | ||
| 187 | sort( list ); |
|
| 188 | sb.append( format( "%n%s%n", category ) ); |
|
| 189 | list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) ); |
|
| 190 | ||
| 191 | return sb.toString(); |
|
| 192 | } |
|
| 193 | ||
| 194 | /** |
|
| 195 | * Words having a straight apostrophe at the beginning and end. |
|
| 196 | */ |
|
| 197 | private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of( |
|
| 198 | // hacking |
|
| 199 | "ackin", |
|
| 200 | // hammering |
|
| 201 | "ammerin", |
|
| 202 | // hankering |
|
| 203 | "ankerin", |
|
| 204 | // having |
|
| 205 | "avin", |
|
| 206 | // hawking |
|
| 207 | "awkin", |
|
| 208 | // excepting |
|
| 209 | "cepin", |
|
| 210 | // excepting |
|
| 211 | "ceppin", |
|
| 212 | // excepting |
|
| 213 | "ceptin", |
|
| 214 | // according |
|
| 215 | "cordin", |
|
| 216 | // heading |
|
| 217 | "eadin", |
|
| 218 | // leaving |
|
| 219 | "eavin", |
|
| 220 | // helping |
|
| 221 | "elpin", |
|
| 222 | // hindering |
|
| 223 | "inderin", |
|
| 224 | // electioneering |
|
| 225 | "lectioneerin", |
|
| 226 | // amazing |
|
| 227 | "mazin", |
|
| 228 | // remembering |
|
| 229 | "memberin", |
|
| 230 | // fish 'n' chips |
|
| 231 | "n", |
|
| 232 | // hobbling |
|
| 233 | "obblin", |
|
| 234 | // holding |
|
| 235 | "oldin", |
|
| 236 | // hollering |
|
| 237 | "ollerin", |
|
| 238 | // hopping |
|
| 239 | "oppin", |
|
| 240 | // housekeeping |
|
| 241 | "ousekeepin", |
|
| 242 | // howling |
|
| 243 | "owlin", |
|
| 244 | // excepting |
|
| 245 | "sceptin", |
|
| 246 | // expecting |
|
| 247 | "spectin", |
|
| 248 | // explaining |
|
| 249 | "splainin", |
|
| 250 | // supposing |
|
| 251 | "sposin", |
|
| 252 | "sputin", |
|
| 253 | // astonishing |
|
| 254 | "stonishin", |
|
| 255 | // destroying |
|
| 256 | "stroyin", |
|
| 257 | // persuading |
|
| 258 | "suadin", |
|
| 259 | "titivatin", |
|
| 260 | // introducing |
|
| 261 | "troducin", |
|
| 262 | // hugging |
|
| 263 | "uggin", |
|
| 264 | // hulking |
|
| 265 | "ulkin", |
|
| 266 | // humbugging |
|
| 267 | "umbuggin", |
|
| 268 | // humiliating |
|
| 269 | "umiliatin", |
|
| 270 | // humming |
|
| 271 | "ummin", |
|
| 272 | // humping |
|
| 273 | "umpin", |
|
| 274 | // hurrying |
|
| 275 | "urryin", |
|
| 276 | // hurting |
|
| 277 | "urtin", |
|
| 278 | // hustling |
|
| 279 | "ustlin", |
|
| 280 | // investigating |
|
| 281 | "vestigatin", |
|
| 282 | // inviting |
|
| 283 | "vitin", |
|
| 284 | // excepting |
|
| 285 | "xceptin", |
|
| 286 | // explaining |
|
| 287 | "xplainin", |
|
| 288 | // exploding |
|
| 289 | "xplodin" |
|
| 290 | ); |
|
| 291 | ||
| 292 | /** |
|
| 293 | * Words having a straight apostrophe that cannot be mistaken for an |
|
| 294 | * opening single quote. |
|
| 295 | */ |
|
| 296 | private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of( |
|
| 297 | "aporth", |
|
| 298 | // about you |
|
| 299 | "boutcha", |
|
| 300 | // about you |
|
| 301 | "boutchu", |
|
| 302 | // about well |
|
| 303 | "boutwell", |
|
| 304 | // except |
|
| 305 | "cept", |
|
| 306 | // decided |
|
| 307 | "cided", |
|
| 308 | // because |
|
| 309 | "cos", |
|
| 310 | // armadillo |
|
| 311 | "dillo", |
|
| 312 | // themselves, |
|
| 313 | "emselves", |
|
| 314 | // affectionate |
|
| 315 | "fectionate", |
|
| 316 | // before |
|
| 317 | "fore", |
|
| 318 | // afraid |
|
| 319 | "fraid", |
|
| 320 | // ??? |
|
| 321 | "funder", |
|
| 322 | // against |
|
| 323 | "gainst", |
|
| 324 | // him |
|
| 325 | "im", |
|
| 326 | // little |
|
| 327 | "ittle", |
|
| 328 | // and |
|
| 329 | "n", |
|
| 330 | // beneath |
|
| 331 | "neath", |
|
| 332 | // another |
|
| 333 | "nother", |
|
| 334 | // another |
|
| 335 | "nudder", |
|
| 336 | // enough |
|
| 337 | "nuff", |
|
| 338 | // gonna |
|
| 339 | "onna", |
|
| 340 | "onna'", |
|
| 341 | // horse |
|
| 342 | "oss", |
|
| 343 | // horses |
|
| 344 | "osses", |
|
| 345 | // splash |
|
| 346 | "plash", |
|
| 347 | // upon |
|
| 348 | "pon", |
|
| 349 | // that's|is |
|
| 350 | "s", |
|
| 351 | "sblood", |
|
| 352 | // excuse |
|
| 353 | "scuse", |
|
| 354 | "sfar", |
|
| 355 | "sfoot", |
|
| 356 | // considered |
|
| 357 | "sidered", |
|
| 358 | // suspect|expect |
|
| 359 | "spect", |
|
| 360 | // suspects|expects |
|
| 361 | "spects", |
|
| 362 | // exploit |
|
| 363 | "sploit", |
|
| 364 | // exploits |
|
| 365 | "sploits", |
|
| 366 | // suppose |
|
| 367 | "spose", |
|
| 368 | // instead |
|
| 369 | "stead", |
|
| 370 | // it |
|
| 371 | "t", |
|
| 372 | "taint", |
|
| 373 | "tain", |
|
| 374 | "tay", |
|
| 375 | "til", |
|
| 376 | "tis", |
|
| 377 | "tish", |
|
| 378 | // it isn't |
|
| 379 | "tisn", |
|
| 380 | // stomach |
|
| 381 | "tomach", |
|
| 382 | // stormed |
|
| 383 | "tormed", |
|
| 384 | "tshall", |
|
| 385 | "twas", |
|
| 386 | "twasn", |
|
| 387 | "tween", |
|
| 388 | "twere", |
|
| 389 | "tweren", |
|
| 390 | "twixt", |
|
| 391 | "twon", |
|
| 392 | "twou", |
|
| 393 | "twould", |
|
| 394 | "twouldn", |
|
| 395 | // one |
|
| 396 | "un", |
|
| 397 | // have |
|
| 398 | "ve", |
|
| 399 | // exactly |
|
| 400 | "xactly" |
|
| 401 | ); |
|
| 402 | ||
| 403 | /** |
|
| 404 | * Words having a straight apostrophe that may be either part of a |
|
| 405 | * contraction or a word that stands alone beside an opening single quote. |
|
| 406 | */ |
|
| 407 | private static final Set<String> BEGAN_AMBIGUOUS = Set.of( |
|
| 408 | // have |
|
| 409 | "a", |
|
| 16 | @SuppressWarnings( { "SpellCheckingInspection", "GrazieInspection" } ) |
|
| 17 | public class Contractions { |
|
| 18 | ||
| 19 | private final Builder mBuilder; |
|
| 20 | ||
| 21 | private Contractions( final Builder builder ) { |
|
| 22 | assert builder != null; |
|
| 23 | mBuilder = builder; |
|
| 24 | } |
|
| 25 | ||
| 26 | /** |
|
| 27 | * Allows constructing a list of custom contractions. |
|
| 28 | */ |
|
| 29 | @SuppressWarnings( "unused" ) |
|
| 30 | public static class Builder { |
|
| 31 | private final Set<String> mBeganEndedUnambiguous = new HashSet<>(); |
|
| 32 | private final Set<String> mBeganUnambiguous = new HashSet<>(); |
|
| 33 | private final Set<String> mEndedUnambiguous = new HashSet<>(); |
|
| 34 | private final Set<String> mBeganAmbiguous = new HashSet<>(); |
|
| 35 | private final Set<String> mEndedAmbiguous = new HashSet<>(); |
|
| 36 | ||
| 37 | public Builder withBeganEndedUnambiguous( final List<String> words ) { |
|
| 38 | mBeganEndedUnambiguous.addAll( words ); |
|
| 39 | return this; |
|
| 40 | } |
|
| 41 | ||
| 42 | public Builder withBeganUnambiguous( final List<String> words ) { |
|
| 43 | mBeganUnambiguous.addAll( words ); |
|
| 44 | return this; |
|
| 45 | } |
|
| 46 | ||
| 47 | public Builder withEndedUnambiguous( final List<String> words ) { |
|
| 48 | mEndedUnambiguous.addAll( words ); |
|
| 49 | return this; |
|
| 50 | } |
|
| 51 | ||
| 52 | public Builder withBeganAmbiguous( final List<String> words ) { |
|
| 53 | mBeganAmbiguous.addAll( words ); |
|
| 54 | return this; |
|
| 55 | } |
|
| 56 | ||
| 57 | public Builder withEndedAmbiguous( final List<String> words ) { |
|
| 58 | mEndedAmbiguous.addAll( words ); |
|
| 59 | return this; |
|
| 60 | } |
|
| 61 | ||
| 62 | /** |
|
| 63 | * Constructs a new set of {@link Contractions} that can be configured |
|
| 64 | * using this {@link Builder} instance. |
|
| 65 | * |
|
| 66 | * @return {@link Contractions} suitable for use with parsing text. |
|
| 67 | */ |
|
| 68 | public Contractions build() { |
|
| 69 | mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS ); |
|
| 70 | mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS ); |
|
| 71 | mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS ); |
|
| 72 | mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS ); |
|
| 73 | mEndedAmbiguous.addAll( ENDED_AMBIGUOUS ); |
|
| 74 | ||
| 75 | // Remove ambiguous items if they are already declared. |
|
| 76 | mBeganAmbiguous.removeAll( mBeganUnambiguous ); |
|
| 77 | mEndedAmbiguous.removeAll( mEndedUnambiguous ); |
|
| 78 | ||
| 79 | return new Contractions( this ); |
|
| 80 | } |
|
| 81 | ||
| 82 | /** |
|
| 83 | * This returns the {@code fallback} {@link Set} if {@code src} is empty; |
|
| 84 | * otherwise, this returns the empty {@link Set}. |
|
| 85 | * |
|
| 86 | * @param src A set of contractions, possibly empty. |
|
| 87 | * @param fallback The default values to use if {@code src} is empty. |
|
| 88 | * @param <T> The type of data used by both {@link Set}s. |
|
| 89 | * @return An empty {@link Set} if the {@code src} contains at least one |
|
| 90 | * element; otherwise, this will return {@code fallback}. |
|
| 91 | */ |
|
| 92 | private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) { |
|
| 93 | assert src != null; |
|
| 94 | assert fallback != null; |
|
| 95 | return src.isEmpty() ? fallback : emptySet(); |
|
| 96 | } |
|
| 97 | } |
|
| 98 | ||
| 99 | public boolean beganEndedUnambiguously( final String word ) { |
|
| 100 | assert word != null; |
|
| 101 | return getBeganEndedUnambiguous().contains( word ); |
|
| 102 | } |
|
| 103 | ||
| 104 | /** |
|
| 105 | * Answers whether the given word is a contraction that always starts |
|
| 106 | * with an apostrophe. The comparison is case-insensitive. This must |
|
| 107 | * only be called when a straight quote is followed by a word. |
|
| 108 | * |
|
| 109 | * @param word The word to compare against the list of known unambiguous |
|
| 110 | * contractions. |
|
| 111 | * @return {@code true} when the given word is in the set of unambiguous |
|
| 112 | * contractions. |
|
| 113 | */ |
|
| 114 | public boolean beganUnambiguously( final String word ) { |
|
| 115 | assert word != null; |
|
| 116 | return getBeganUnambiguous().contains( word.toLowerCase() ); |
|
| 117 | } |
|
| 118 | ||
| 119 | /** |
|
| 120 | * Answers whether the given word could be a contraction but is also a |
|
| 121 | * valid word in non-contracted form. |
|
| 122 | * |
|
| 123 | * @param word The word to compare against the list of known ambiguous |
|
| 124 | * contractions. |
|
| 125 | * @return {@code true} when the given word is in the set of ambiguous |
|
| 126 | * contractions. |
|
| 127 | */ |
|
| 128 | public boolean beganAmbiguously( final String word ) { |
|
| 129 | assert word != null; |
|
| 130 | return getBeganAmbiguous().contains( word.toLowerCase() ); |
|
| 131 | } |
|
| 132 | ||
| 133 | public boolean endedUnambiguously( final String word ) { |
|
| 134 | assert word != null; |
|
| 135 | return getEndedUnambiguous().contains( word.toLowerCase() ); |
|
| 136 | } |
|
| 137 | ||
| 138 | public boolean endedAmbiguously( final String word ) { |
|
| 139 | assert word != null; |
|
| 140 | final var check = word.toLowerCase(); |
|
| 141 | ||
| 142 | // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet |
|
| 143 | // allow o' to match because 'a sentence can end with the letter o'. |
|
| 144 | return getEndedAmbiguous().contains( check ) || |
|
| 145 | check.endsWith( "s" ) || check.endsWith( "z" ) || |
|
| 146 | check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" )); |
|
| 147 | } |
|
| 148 | ||
| 149 | private Set<String> getBeganEndedUnambiguous() { |
|
| 150 | return mBuilder.mBeganEndedUnambiguous; |
|
| 151 | } |
|
| 152 | ||
| 153 | private Set<String> getBeganUnambiguous() { |
|
| 154 | return mBuilder.mBeganUnambiguous; |
|
| 155 | } |
|
| 156 | ||
| 157 | private Set<String> getEndedUnambiguous() { |
|
| 158 | return mBuilder.mEndedUnambiguous; |
|
| 159 | } |
|
| 160 | ||
| 161 | private Set<String> getBeganAmbiguous() { |
|
| 162 | return mBuilder.mBeganAmbiguous; |
|
| 163 | } |
|
| 164 | ||
| 165 | private Set<String> getEndedAmbiguous() { |
|
| 166 | return mBuilder.mEndedAmbiguous; |
|
| 167 | } |
|
| 168 | ||
| 169 | @Override |
|
| 170 | public String toString() { |
|
| 171 | return |
|
| 172 | toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) + |
|
| 173 | toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) + |
|
| 174 | toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) + |
|
| 175 | toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) + |
|
| 176 | toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ); |
|
| 177 | } |
|
| 178 | ||
| 179 | private String toString( |
|
| 180 | final Set<String> words, |
|
| 181 | final String category, |
|
| 182 | final String fmt |
|
| 183 | ) { |
|
| 184 | final var sb = new StringBuilder( 16384 ); |
|
| 185 | final var newline = System.lineSeparator(); |
|
| 186 | final var list = new ArrayList<>( words ); |
|
| 187 | ||
| 188 | sort( list ); |
|
| 189 | sb.append( format( "%n%s%n", category ) ); |
|
| 190 | list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) ); |
|
| 191 | ||
| 192 | return sb.toString(); |
|
| 193 | } |
|
| 194 | ||
| 195 | /** |
|
| 196 | * Words having a straight apostrophe at the beginning and end. |
|
| 197 | */ |
|
| 198 | private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of( |
|
| 199 | // hacking |
|
| 200 | "ackin", |
|
| 201 | // hammering |
|
| 202 | "ammerin", |
|
| 203 | // hankering |
|
| 204 | "ankerin", |
|
| 205 | // having |
|
| 206 | "avin", |
|
| 207 | // hawking |
|
| 208 | "awkin", |
|
| 209 | // excepting |
|
| 210 | "cepin", |
|
| 211 | // excepting |
|
| 212 | "ceppin", |
|
| 213 | // excepting |
|
| 214 | "ceptin", |
|
| 215 | // according |
|
| 216 | "cordin", |
|
| 217 | // heading |
|
| 218 | "eadin", |
|
| 219 | // leaving |
|
| 220 | "eavin", |
|
| 221 | // helping |
|
| 222 | "elpin", |
|
| 223 | // hindering |
|
| 224 | "inderin", |
|
| 225 | // electioneering |
|
| 226 | "lectioneerin", |
|
| 227 | // amazing |
|
| 228 | "mazin", |
|
| 229 | // remembering |
|
| 230 | "memberin", |
|
| 231 | // fish 'n' chips |
|
| 232 | "n", |
|
| 233 | // hobbling |
|
| 234 | "obblin", |
|
| 235 | // holding |
|
| 236 | "oldin", |
|
| 237 | // hollering |
|
| 238 | "ollerin", |
|
| 239 | // hopping |
|
| 240 | "oppin", |
|
| 241 | // housekeeping |
|
| 242 | "ousekeepin", |
|
| 243 | // howling |
|
| 244 | "owlin", |
|
| 245 | // excepting |
|
| 246 | "sceptin", |
|
| 247 | // expecting |
|
| 248 | "spectin", |
|
| 249 | // explaining |
|
| 250 | "splainin", |
|
| 251 | // supposing |
|
| 252 | "sposin", |
|
| 253 | "sputin", |
|
| 254 | // astonishing |
|
| 255 | "stonishin", |
|
| 256 | // destroying |
|
| 257 | "stroyin", |
|
| 258 | // persuading |
|
| 259 | "suadin", |
|
| 260 | "titivatin", |
|
| 261 | // introducing |
|
| 262 | "troducin", |
|
| 263 | // hugging |
|
| 264 | "uggin", |
|
| 265 | // hulking |
|
| 266 | "ulkin", |
|
| 267 | // humbugging |
|
| 268 | "umbuggin", |
|
| 269 | // humiliating |
|
| 270 | "umiliatin", |
|
| 271 | // humming |
|
| 272 | "ummin", |
|
| 273 | // humping |
|
| 274 | "umpin", |
|
| 275 | // hurrying |
|
| 276 | "urryin", |
|
| 277 | // hurting |
|
| 278 | "urtin", |
|
| 279 | // hustling |
|
| 280 | "ustlin", |
|
| 281 | // investigating |
|
| 282 | "vestigatin", |
|
| 283 | // inviting |
|
| 284 | "vitin", |
|
| 285 | // excepting |
|
| 286 | "xceptin", |
|
| 287 | // explaining |
|
| 288 | "xplainin", |
|
| 289 | // exploding |
|
| 290 | "xplodin" |
|
| 291 | ); |
|
| 292 | ||
| 293 | /** |
|
| 294 | * Words having a straight apostrophe that cannot be mistaken for an |
|
| 295 | * opening single quote. |
|
| 296 | */ |
|
| 297 | private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of( |
|
| 298 | "aporth", |
|
| 299 | // about you |
|
| 300 | "boutcha", |
|
| 301 | // about you |
|
| 302 | "boutchu", |
|
| 303 | // about well |
|
| 304 | "boutwell", |
|
| 305 | // except |
|
| 306 | "cept", |
|
| 307 | // decided |
|
| 308 | "cided", |
|
| 309 | // because |
|
| 310 | "cos", |
|
| 311 | // armadillo |
|
| 312 | "dillo", |
|
| 313 | // themselves, |
|
| 314 | "emselves", |
|
| 315 | // affectionate |
|
| 316 | "fectionate", |
|
| 317 | // before |
|
| 318 | "fore", |
|
| 319 | // afraid |
|
| 320 | "fraid", |
|
| 321 | // ??? |
|
| 322 | "funder", |
|
| 323 | // against |
|
| 324 | "gainst", |
|
| 325 | // him |
|
| 326 | "im", |
|
| 327 | // little |
|
| 328 | "ittle", |
|
| 329 | // and |
|
| 330 | "n", |
|
| 331 | // beneath |
|
| 332 | "neath", |
|
| 333 | // another |
|
| 334 | "nother", |
|
| 335 | // another |
|
| 336 | "nudder", |
|
| 337 | // enough |
|
| 338 | "nuff", |
|
| 339 | // gonna |
|
| 340 | "onna", |
|
| 341 | "onna'", |
|
| 342 | // horse |
|
| 343 | "oss", |
|
| 344 | // horses |
|
| 345 | "osses", |
|
| 346 | // splash |
|
| 347 | "plash", |
|
| 348 | // upon |
|
| 349 | "pon", |
|
| 350 | // that's|is |
|
| 351 | "s", |
|
| 352 | "sblood", |
|
| 353 | // excuse |
|
| 354 | "scuse", |
|
| 355 | "sfar", |
|
| 356 | "sfoot", |
|
| 357 | // considered |
|
| 358 | "sidered", |
|
| 359 | // suspect|expect |
|
| 360 | "spect", |
|
| 361 | // suspects|expects |
|
| 362 | "spects", |
|
| 363 | // exploit |
|
| 364 | "sploit", |
|
| 365 | // exploits |
|
| 366 | "sploits", |
|
| 367 | // suppose |
|
| 368 | "spose", |
|
| 369 | // instead |
|
| 370 | "stead", |
|
| 371 | // it |
|
| 372 | "t", |
|
| 373 | "taint", |
|
| 374 | "tain", |
|
| 375 | "tay", |
|
| 376 | "til", |
|
| 377 | "tis", |
|
| 378 | "tish", |
|
| 379 | // it isn't |
|
| 380 | "tisn", |
|
| 381 | // stomach |
|
| 382 | "tomach", |
|
| 383 | // stormed |
|
| 384 | "tormed", |
|
| 385 | "tshall", |
|
| 386 | "twas", |
|
| 387 | "twasn", |
|
| 388 | "tween", |
|
| 389 | "twere", |
|
| 390 | "tweren", |
|
| 391 | "twixt", |
|
| 392 | "twon", |
|
| 393 | "twou", |
|
| 394 | "twould", |
|
| 395 | "twouldn", |
|
| 396 | // one |
|
| 397 | "un", |
|
| 398 | // have |
|
| 399 | "ve", |
|
| 400 | // exactly |
|
| 401 | "xactly" |
|
| 402 | ); |
|
| 403 | ||
| 404 | /** |
|
| 405 | * Words having a straight apostrophe that may be either part of a |
|
| 406 | * contraction or a word that stands alone beside an opening single quote. |
|
| 407 | */ |
|
| 408 | private static final Set<String> BEGAN_AMBIGUOUS = Set.of( |
|
| 409 | // have |
|
| 410 | "a", |
|
| 411 | // than|an |
|
| 412 | "an", |
|
| 410 | 413 | // about|boxing match |
| 411 | 414 | "bout", |
| 146 | 146 | “‘---has a prison.’” |
| 147 | 147 | |
| 148 | "Teach 'em t'be more 'an we ever been!" |
|
| 149 | “Teach 'em t'be more 'an we ever been!” |
|
| 150 |