Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Add another dash symbol, use newlines from source document

AuthorDave Jarvis <email>
Date2021-06-20 18:01:26 GMT-0700
Commit6f67234efa93116f27b0112ab6861fd0521f5052
Parentfef153f
src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java
"fraid",
"gainst",
- "n",
- "neath",
- "nother",
- "nuff",
- "onna",
- "onna'",
- "pon",
- "s",
- "sblood",
- "scuse",
- "sfar",
- "sfoot",
- "t",
- "taint",
- "tain",
- "til",
- "tis",
- "tisn",
- "tshall",
- "twas",
- "twasn",
- "tween",
- "twere",
- "tweren",
- "twixt",
- "twon",
- "twou",
- "twould",
- "twouldn",
- "ve"
- );
-
- /**
- * Words having a straight apostrophe that may be either part of a
- * contraction or a word that stands alone beside an opening single quote.
- */
- private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
- // about|boxing match
- "bout",
- // because|causal
- "cause",
- // what you|choo choo train
- "choo",
- // he|e pluribus unum
- "e",
- // here|earlier
- "ere",
- // afro|to and fro
- "fro",
- // whore|ho ho!
- "ho",
- // okay|letter K
- "kay",
- // lo|lo and behold
- "lo",
- // are|regarding
- "re",
- // what's up|to sup
- "sup",
- // it will|twill fabric
- "twill",
- // them|utterance
- "um",
- // is that|Iranian village
- "zat"
- );
-
- private static final Set<String> ENDED_AMBIGUOUS = Set.of(
- // give|martial arts garment
- "gi",
- // in|I
- "i",
- // of|letter o
- "o"
- );
-
- private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
- // and
- "an",
- // for/before
- "fo",
- // friend
- "frien",
- // just
- "jus",
- // lord
- "lor",
- // myself
- "masel",
- // old
- "ol",
- // San (Francisco)
- "Sa",
- // shift
- "shif",
- // the
- "th",
- // what
- "wha",
- // world
- "worl",
- // Top ~500 common -ing words as English contractions.
- "acceptin",
- "accompanyin",
- "accordin",
- "accountin",
- "achievin",
- "acquirin",
- "actin",
- "addin",
- "addressin",
- "adjoinin",
- "adoptin",
- "advancin",
- "advertisin",
- "affectin",
- "agin",
- "allowin",
- "amazin",
- "analyzin",
- "answerin",
- "anythin",
- "appearin",
- "applyin",
- "approachin",
- "arguin",
- "arisin",
- "arrivin",
- "askin",
- "assessin",
- "assumin",
- "attackin",
- "attemptin",
- "attendin",
- "avoidin",
- "bankin",
- "bargainin",
- "bearin",
- "beatin",
- "becomin",
- "beginnin",
- "bein",
- "believin",
- "belongin",
- "bendin",
- "bindin",
- "bleedin",
- "blessin",
- "blowin",
- "boilin",
- "borrowin",
- "breakin",
- "breathin",
- "breedin",
- "bringin",
- "broadcastin",
- "buildin",
- "burnin",
- "buyin",
- "calculatin",
- "callin",
- "carryin",
- "castin",
- "causin",
- "ceilin",
- "challengin",
- "changin",
- "checkin",
- "choosin",
- "claimin",
- "cleanin",
- "clearin",
- "climbin",
- "closin",
- "clothin",
- "collectin",
- "combinin",
- "comin",
- "commandin",
- "comparin",
- "compellin",
- "competin",
- "computin",
- "concernin",
- "concludin",
- "conditionin",
- "conductin",
- "conflictin",
- "connectin",
- "considerin",
- "consistin",
- "constructin",
- "consultin",
- "consumin",
- "containin",
- "continuin",
- "contractin",
- "contributin",
- "controllin",
- "convincin",
- "cookin",
- "coolin",
- "copin",
- "correspondin",
- "counselin",
- "countin",
- "couplin",
- "coverin",
- "creatin",
- "crossin",
- "cryin",
- "cuttin",
- "dancin",
- "darlin",
- "datin",
- "dealin",
- "decidin",
- "declarin",
- "declinin",
- "decreasin",
- "definin",
- "demandin",
- "denyin",
- "dependin",
- "descendin",
- "describin",
- "designin",
- "destroyin",
- "determinin",
- "developin",
- "differin",
- "dinin",
- "directin",
- "discussin",
- "distinguishin",
- "disturbin",
- "dividin",
- "doin",
- "drawin",
- "dressin",
- "drinkin",
- "drivin",
- "droppin",
- "dryin",
- "durin",
- "dwellin",
- "dyin",
- "eatin",
- "editin",
- "emergin",
- "employin",
- "enablin",
- "encouragin",
- "endin",
- "engagin",
- "engineerin",
- "enjoyin",
- "enterin",
- "establishin",
- "evaluatin",
- "evenin",
- "everythin",
- "examinin",
- "exceedin",
- "excitin",
- "excludin",
- "existin",
- "expandin",
- "expectin",
- "experiencin",
- "explainin",
- "explorin",
- "expressin",
- "extendin",
- "facin",
- "failin",
- "fallin",
- "farmin",
- "fascinatin",
- "feedin",
- "feelin",
- "fightin",
- "filin",
- "fillin",
- "financin",
- "findin",
- "firin",
- "fishin",
- "fittin",
- "fixin",
- "floatin",
- "flowin",
- "flyin",
- "focusin",
- "followin",
- "forcin",
- "foregoin",
- "formin",
- "forthcomin",
- "foundin",
- "freezin",
- "fuckin",
- "functionin",
- "fundin",
- "gainin",
- "gatherin",
- "generatin",
- "gettin",
- "givin",
- "goin",
- "governin",
- "grantin",
- "growin",
- "hackin",
- "handlin",
- "hangin",
- "happenin",
- "havin",
- "headin",
- "healin",
- "hearin",
- "heatin",
- "helpin",
- "hidin",
- "holdin",
- "hopin",
- "housin",
- "huntin",
- "identifyin",
- "imagin",
- "implementin",
- "imposin",
- "improvin",
- "includin",
- "increasin",
- "indicatin",
- "interestin",
- "interpretin",
- "introducin",
- "involvin",
- "joinin",
- "judgin",
- "keepin",
- "killin",
- "knowin",
- "lackin",
- "landin",
- "lastin",
- "laughin",
- "layin",
- "leadin",
- "leanin",
- "learnin",
- "leavin",
- "lettin",
- "liftin",
- "lightin",
- "lightnin",
- "limitin",
- "listenin",
- "listin",
- "livin",
- "loadin",
- "lookin",
- "losin",
- "lovin",
- "lowerin",
- "lyin",
- "maintainin",
- "makin",
- "managin",
- "manufacturin",
- "mappin",
- "marketin",
- "markin",
- "matchin",
- "meanin",
- "measurin",
- "meetin",
- "meltin",
- "minin",
- "misleadin",
- "missin",
- "mixin",
- "modelin",
- "monitorin",
- "mornin",
- "movin",
- "neighborin",
- "neighbourin",
- "nothin",
- "notin",
- "notwithstandin",
- "nursin",
- "observin",
- "obtainin",
- "occurrin",
- "offerin",
- "offsprin",
- "ongoin",
- "openin",
- "operatin",
- "opposin",
- "orderin",
- "organizin",
- "outstandin",
- "overwhelmin",
- "packin",
- "paintin",
- "parkin",
- "participatin",
- "passin",
- "payin",
- "pendin",
- "performin",
- "pickin",
- "pissin",
- "placin",
- "plannin",
- "plantin",
- "playin",
- "pleasin",
- "pointin",
- "possessin",
- "preachin",
- "precedin",
- "preparin",
- "presentin",
- "preservin",
- "pressin",
- "prevailin",
- "preventin",
- "pricin",
- "printin",
- "proceedin",
- "processin",
- "producin",
- "programmin",
- "promisin",
- "promotin",
- "protectin",
- "providin",
- "provin",
- "publishin",
- "pullin",
- "purchasin",
- "pursuin",
- "pushin",
- "puttin",
- "questionin",
- "rangin",
- "ratin",
- "reachin",
- "readin",
- "reasonin",
- "receivin",
- "recognizin",
- "recordin",
- "reducin",
- "referrin",
- "reflectin",
- "refusin",
- "regardin",
- "regulatin",
- "relatin",
- "remainin",
- "rememberin",
- "removin",
- "renderin",
- "repeatin",
- "replacin",
- "reportin",
- "representin",
- "requirin",
- "respectin",
- "respondin",
- "restin",
- "resultin",
- "returnin",
- "revealin",
- "ridin",
- "risin",
- "rulin",
- "runnin",
+ "im",
+ "n",
+ "neath",
+ "nother",
+ "nuff",
+ "onna",
+ "onna'",
+ "pon",
+ "s",
+ "sblood",
+ "scuse",
+ "sfar",
+ "sfoot",
+ "t",
+ "taint",
+ "tain",
+ "til",
+ "tis",
+ "tisn",
+ "tshall",
+ "twas",
+ "twasn",
+ "tween",
+ "twere",
+ "tweren",
+ "twixt",
+ "twon",
+ "twou",
+ "twould",
+ "twouldn",
+ "ve"
+ );
+
+ /**
+ * Words having a straight apostrophe that may be either part of a
+ * contraction or a word that stands alone beside an opening single quote.
+ */
+ private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
+ // about|boxing match
+ "bout",
+ // because|causal
+ "cause",
+ // what you|choo choo train
+ "choo",
+ // he|e pluribus unum
+ "e",
+ // here|earlier
+ "ere",
+ // afro|to and fro
+ "fro",
+ // whore|ho ho!
+ "ho",
+ // okay|letter K
+ "kay",
+ // lo|lo and behold
+ "lo",
+ // are|regarding
+ "re",
+ // what's up|to sup
+ "sup",
+ // it will|twill fabric
+ "twill",
+ // them|utterance
+ "um",
+ // is that|Iranian village
+ "zat"
+ );
+
+ private static final Set<String> ENDED_AMBIGUOUS = Set.of(
+ // give|martial arts garment
+ "gi",
+ // in|I
+ "i",
+ // of|letter o
+ "o"
+ );
+
+ private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
+ // and
+ "an",
+ // for/before
+ "fo",
+ // friend
+ "frien",
+ // just
+ "jus",
+ // lord
+ "lor",
+ // myself
+ "masel",
+ // old
+ "ol",
+ // San (Francisco)
+ "Sa",
+ // shift
+ "shif",
+ // the
+ "th",
+ // what
+ "wha",
+ // world
+ "worl",
+ // Top ~500 common -ing words as English contractions.
+ "acceptin",
+ "accompanyin",
+ "accordin",
+ "accountin",
+ "achievin",
+ "acquirin",
+ "actin",
+ "addin",
+ "addressin",
+ "adjoinin",
+ "adoptin",
+ "advancin",
+ "advertisin",
+ "affectin",
+ "agin",
+ "allowin",
+ "amazin",
+ "analyzin",
+ "answerin",
+ "anythin",
+ "appearin",
+ "applyin",
+ "approachin",
+ "arguin",
+ "arisin",
+ "arrivin",
+ "askin",
+ "assessin",
+ "assumin",
+ "attackin",
+ "attemptin",
+ "attendin",
+ "avoidin",
+ "bankin",
+ "bargainin",
+ "bearin",
+ "beatin",
+ "becomin",
+ "beginnin",
+ "bein",
+ "believin",
+ "belongin",
+ "bendin",
+ "bindin",
+ "bleedin",
+ "blessin",
+ "blowin",
+ "boilin",
+ "borrowin",
+ "breakin",
+ "breathin",
+ "breedin",
+ "bringin",
+ "broadcastin",
+ "buildin",
+ "burnin",
+ "buyin",
+ "calculatin",
+ "callin",
+ "carryin",
+ "castin",
+ "causin",
+ "ceilin",
+ "challengin",
+ "changin",
+ "checkin",
+ "choosin",
+ "claimin",
+ "cleanin",
+ "clearin",
+ "climbin",
+ "closin",
+ "clothin",
+ "collectin",
+ "combinin",
+ "comin",
+ "commandin",
+ "comparin",
+ "compellin",
+ "competin",
+ "computin",
+ "concernin",
+ "concludin",
+ "conditionin",
+ "conductin",
+ "conflictin",
+ "connectin",
+ "considerin",
+ "consistin",
+ "constructin",
+ "consultin",
+ "consumin",
+ "containin",
+ "continuin",
+ "contractin",
+ "contributin",
+ "controllin",
+ "convincin",
+ "cookin",
+ "coolin",
+ "copin",
+ "correspondin",
+ "counselin",
+ "countin",
+ "couplin",
+ "coverin",
+ "creatin",
+ "crossin",
+ "cryin",
+ "cuttin",
+ "dancin",
+ "darlin",
+ "datin",
+ "dealin",
+ "decidin",
+ "declarin",
+ "declinin",
+ "decreasin",
+ "definin",
+ "demandin",
+ "denyin",
+ "dependin",
+ "descendin",
+ "describin",
+ "designin",
+ "destroyin",
+ "determinin",
+ "developin",
+ "differin",
+ "dinin",
+ "directin",
+ "discussin",
+ "distinguishin",
+ "disturbin",
+ "dividin",
+ "doin",
+ "drawin",
+ "dressin",
+ "drinkin",
+ "drivin",
+ "droppin",
+ "dryin",
+ "durin",
+ "dwellin",
+ "dyin",
+ "eatin",
+ "editin",
+ "emergin",
+ "employin",
+ "enablin",
+ "encouragin",
+ "endin",
+ "engagin",
+ "engineerin",
+ "enjoyin",
+ "enterin",
+ "establishin",
+ "evaluatin",
+ "evenin",
+ "everythin",
+ "examinin",
+ "exceedin",
+ "excitin",
+ "excludin",
+ "existin",
+ "expandin",
+ "expectin",
+ "experiencin",
+ "explainin",
+ "explorin",
+ "expressin",
+ "extendin",
+ "facin",
+ "failin",
+ "fallin",
+ "farmin",
+ "fascinatin",
+ "feedin",
+ "feelin",
+ "fightin",
+ "filin",
+ "fillin",
+ "financin",
+ "findin",
+ "firin",
+ "fishin",
+ "fittin",
+ "fixin",
+ "floatin",
+ "flowin",
+ "flyin",
+ "focusin",
+ "followin",
+ "forcin",
+ "foregoin",
+ "formin",
+ "forthcomin",
+ "foundin",
+ "freezin",
+ "fuckin",
+ "functionin",
+ "fundin",
+ "gainin",
+ "gatherin",
+ "generatin",
+ "gettin",
+ "givin",
+ "goin",
+ "governin",
+ "grantin",
+ "growin",
+ "hackin",
+ "handlin",
+ "hangin",
+ "happenin",
+ "havin",
+ "headin",
+ "healin",
+ "hearin",
+ "heatin",
+ "helpin",
+ "hidin",
+ "holdin",
+ "hopin",
+ "housin",
+ "huntin",
+ "identifyin",
+ "imagin",
+ "implementin",
+ "imposin",
+ "improvin",
+ "includin",
+ "increasin",
+ "indicatin",
+ "interestin",
+ "interpretin",
+ "introducin",
+ "involvin",
+ "joinin",
+ "judgin",
+ "keepin",
+ "killin",
+ "knowin",
+ "lackin",
+ "landin",
+ "lastin",
+ "laughin",
+ "layin",
+ "leadin",
+ "leanin",
+ "learnin",
+ "leavin",
+ "lettin",
+ "liftin",
+ "lightin",
+ "lightnin",
+ "limitin",
+ "listenin",
+ "listin",
+ "livin",
+ "loadin",
+ "lookin",
+ "losin",
+ "lovin",
+ "lowerin",
+ "lyin",
+ "maintainin",
+ "makin",
+ "managin",
+ "manufacturin",
+ "mappin",
+ "marketin",
+ "markin",
+ "matchin",
+ "meanin",
+ "measurin",
+ "meetin",
+ "meltin",
+ "minin",
+ "misleadin",
+ "missin",
+ "mixin",
+ "modelin",
+ "monitorin",
+ "mornin",
+ "movin",
+ "neighborin",
+ "neighbourin",
+ "nothin",
+ "notin",
+ "notwithstandin",
+ "nursin",
+ "observin",
+ "obtainin",
+ "occurrin",
+ "offerin",
+ "offsprin",
+ "ongoin",
+ "openin",
+ "operatin",
+ "opposin",
+ "orderin",
+ "organizin",
+ "outstandin",
+ "overwhelmin",
+ "packin",
+ "paintin",
+ "parkin",
+ "participatin",
+ "passin",
+ "payin",
+ "pendin",
+ "performin",
+ "pickin",
+ "pissin",
+ "placin",
+ "plannin",
+ "plantin",
+ "playin",
+ "pleasin",
+ "pointin",
+ "possessin",
+ "preachin",
+ "precedin",
+ "preparin",
+ "presentin",
+ "preservin",
+ "pressin",
+ "prevailin",
+ "preventin",
+ "pricin",
+ "printin",
+ "proceedin",
+ "processin",
+ "producin",
+ "programmin",
+ "promisin",
+ "promotin",
+ "protectin",
+ "providin",
+ "provin",
+ "publishin",
+ "pullin",
+ "purchasin",
+ "pursuin",
+ "pushin",
+ "puttin",
+ "questionin",
+ "rangin",
+ "ratin",
+ "reachin",
+ "readin",
+ "reasonin",
+ "receivin",
+ "recognizin",
+ "recordin",
+ "reducin",
+ "referrin",
+ "reflectin",
+ "refusin",
+ "regardin",
+ "regulatin",
+ "relatin",
+ "remainin",
+ "rememberin",
+ "removin",
+ "renderin",
+ "repeatin",
+ "replacin",
+ "reportin",
+ "representin",
+ "requirin",
+ "respectin",
+ "respondin",
+ "restin",
+ "resultin",
+ "returnin",
+ "revealin",
+ "ridin",
+ "risin",
+ "rulin",
+ "runnin",
+ "rythin",
"sailin",
"samplin",
src/main/java/com/whitemagicsoftware/keenquotes/KeenQuotes.java
import picocli.CommandLine;
-import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
import java.util.Properties;
import static java.lang.String.format;
+import static java.lang.System.*;
import static picocli.CommandLine.Help.Ansi.Style.*;
import static picocli.CommandLine.Help.ColorScheme;
if( settings.displayList() ) {
- System.out.println( contractions.toString() );
+ out.println( contractions.toString() );
}
else {
- convert( new Converter( System.err::println, contractions ) );
+ try {
+ out.print( convert( new Converter( err::println, contractions ) ) );
+ } catch( final Exception ex ) {
+ ex.printStackTrace( err );
+ }
}
}
return builder.build();
}
-
- private void convert( final Converter converter ) {
- final var sb = new StringBuilder();
-
- try( final var reader = open( System.in ) ) {
- String line;
- final var sep = System.lineSeparator();
-
- while( (line = reader.readLine()) != null ) {
- sb.append( line );
- sb.append( sep );
- }
- System.out.println( converter.apply( sb.toString() ) );
- } catch( final Exception ex ) {
- ex.printStackTrace( System.err );
- }
+ private String convert( final Converter converter ) throws IOException {
+ return converter.apply( new String( System.in.readAllBytes() ) );
}
private static InputStream getResourceAsStream( final String resource ) {
return KeenQuotes.class.getClassLoader().getResourceAsStream( resource );
- }
-
- @SuppressWarnings( "SameParameterValue" )
- private static BufferedReader open( final InputStream in ) {
- return new BufferedReader( new InputStreamReader( in ) );
}
if( parseResult.isUsageHelpRequested() ) {
- System.exit( exitCode );
+ exit( exitCode );
}
else if( parseResult.isVersionHelpRequested() ) {
- System.out.println( getVersion() );
- System.exit( exitCode );
+ out.println( getVersion() );
+ exit( exitCode );
}
}
src/main/java/com/whitemagicsoftware/keenquotes/Lexer.java
*/
private boolean isDash( final char curr ) {
- return curr == '-' || curr == '–' || curr == '—';
+ return curr == '-' || curr == '–' || curr == '—' || curr == '―';
}
src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java
try( final var reader = open( filename + ".txt" ) ) {
String line;
+ final var sep = System.lineSeparator();
while( (line = reader.readLine()) != null ) {
- sb.append( line ).append( '\n' );
+ sb.append( line ).append( sep );
}
}
src/test/java/com/whitemagicsoftware/keenquotes/LexerTest.java
testType( "---", DASH );
testType( "–", DASH );
+ testType( "―", DASH );
testType( "—", DASH );
testType( "—-—", DASH );
Delta502 lines added, 514 lines removed, 12-line decrease