| Author | Dave Jarvis <email> |
|---|---|
| Date | 2021-06-16 20:19:21 GMT-0700 |
| Commit | 93631b87e26e26c58ede8667433ef6ad9be16ffa |
| Parent | 3d0f249 |
| // the | ||
| "th", | ||
| - // Top ~500 common -ing words as English contractions. | ||
| - "acceptin", | ||
| - "accompanyin", | ||
| - "accordin", | ||
| - "accountin", | ||
| - "achievin", | ||
| - "acquirin", | ||
| - "actin", | ||
| - "addin", | ||
| - "addressin", | ||
| - "adjoinin", | ||
| - "adoptin", | ||
| - "advancin", | ||
| - "advertisin", | ||
| - "affectin", | ||
| - "agin", | ||
| - "allowin", | ||
| - "amazin", | ||
| - "analyzin", | ||
| - "answerin", | ||
| - "anythin", | ||
| - "appearin", | ||
| - "applyin", | ||
| - "approachin", | ||
| - "arguin", | ||
| - "arisin", | ||
| - "arrivin", | ||
| - "askin", | ||
| - "assessin", | ||
| - "assumin", | ||
| - "attackin", | ||
| - "attemptin", | ||
| - "attendin", | ||
| - "avoidin", | ||
| - "bankin", | ||
| - "bargainin", | ||
| - "bearin", | ||
| - "beatin", | ||
| - "becomin", | ||
| - "beginnin", | ||
| - "bein", | ||
| - "believin", | ||
| - "belongin", | ||
| - "bendin", | ||
| - "bindin", | ||
| - "bleedin", | ||
| - "blessin", | ||
| - "blowin", | ||
| - "boilin", | ||
| - "borrowin", | ||
| - "breakin", | ||
| - "breathin", | ||
| - "breedin", | ||
| - "bringin", | ||
| - "broadcastin", | ||
| - "buildin", | ||
| - "burnin", | ||
| - "buyin", | ||
| - "calculatin", | ||
| - "callin", | ||
| - "carryin", | ||
| - "castin", | ||
| - "causin", | ||
| - "ceilin", | ||
| - "challengin", | ||
| - "changin", | ||
| - "checkin", | ||
| - "choosin", | ||
| - "claimin", | ||
| - "cleanin", | ||
| - "clearin", | ||
| - "climbin", | ||
| - "closin", | ||
| - "clothin", | ||
| - "collectin", | ||
| - "combinin", | ||
| - "comin", | ||
| - "commandin", | ||
| - "comparin", | ||
| - "compellin", | ||
| - "competin", | ||
| - "computin", | ||
| - "concernin", | ||
| - "concludin", | ||
| - "conditionin", | ||
| - "conductin", | ||
| - "conflictin", | ||
| - "connectin", | ||
| - "considerin", | ||
| - "consistin", | ||
| - "constructin", | ||
| - "consultin", | ||
| - "consumin", | ||
| - "containin", | ||
| - "continuin", | ||
| - "contractin", | ||
| - "contributin", | ||
| - "controllin", | ||
| - "convincin", | ||
| - "cookin", | ||
| - "coolin", | ||
| - "copin", | ||
| - "correspondin", | ||
| - "counselin", | ||
| - "countin", | ||
| - "couplin", | ||
| - "coverin", | ||
| - "creatin", | ||
| - "crossin", | ||
| - "cryin", | ||
| - "cuttin", | ||
| - "dancin", | ||
| - "darlin", | ||
| - "datin", | ||
| - "dealin", | ||
| - "decidin", | ||
| - "declarin", | ||
| - "declinin", | ||
| - "decreasin", | ||
| - "definin", | ||
| - "demandin", | ||
| - "denyin", | ||
| - "dependin", | ||
| - "descendin", | ||
| - "describin", | ||
| - "designin", | ||
| - "destroyin", | ||
| - "determinin", | ||
| - "developin", | ||
| - "differin", | ||
| - "dinin", | ||
| - "directin", | ||
| - "discussin", | ||
| - "distinguishin", | ||
| - "disturbin", | ||
| - "dividin", | ||
| - "doin", | ||
| - "drawin", | ||
| - "dressin", | ||
| - "drinkin", | ||
| - "drivin", | ||
| - "droppin", | ||
| - "dryin", | ||
| - "durin", | ||
| - "dwellin", | ||
| - "dyin", | ||
| - "eatin", | ||
| - "editin", | ||
| - "emergin", | ||
| - "employin", | ||
| - "enablin", | ||
| - "encouragin", | ||
| - "endin", | ||
| - "engagin", | ||
| - "engineerin", | ||
| - "enjoyin", | ||
| - "enterin", | ||
| - "establishin", | ||
| - "evaluatin", | ||
| - "evenin", | ||
| - "everythin", | ||
| - "examinin", | ||
| - "exceedin", | ||
| - "excitin", | ||
| - "excludin", | ||
| - "existin", | ||
| - "expandin", | ||
| - "expectin", | ||
| - "experiencin", | ||
| - "explainin", | ||
| - "explorin", | ||
| - "expressin", | ||
| - "extendin", | ||
| - "facin", | ||
| - "failin", | ||
| - "fallin", | ||
| - "farmin", | ||
| - "fascinatin", | ||
| - "feedin", | ||
| - "feelin", | ||
| - "fightin", | ||
| - "filin", | ||
| - "fillin", | ||
| - "financin", | ||
| - "findin", | ||
| - "firin", | ||
| - "fishin", | ||
| - "fittin", | ||
| - "fixin", | ||
| - "floatin", | ||
| - "flowin", | ||
| - "flyin", | ||
| - "focusin", | ||
| - "followin", | ||
| - "forcin", | ||
| - "foregoin", | ||
| - "formin", | ||
| - "forthcomin", | ||
| - "foundin", | ||
| - "freezin", | ||
| - "fuckin", | ||
| - "functionin", | ||
| - "fundin", | ||
| - "gainin", | ||
| - "gatherin", | ||
| - "generatin", | ||
| - "gettin", | ||
| - "givin", | ||
| - "goin", | ||
| - "governin", | ||
| - "grantin", | ||
| - "growin", | ||
| - "hackin", | ||
| - "handlin", | ||
| - "hangin", | ||
| - "happenin", | ||
| - "havin", | ||
| - "headin", | ||
| - "healin", | ||
| - "hearin", | ||
| - "heatin", | ||
| - "helpin", | ||
| - "hidin", | ||
| - "holdin", | ||
| - "hopin", | ||
| - "housin", | ||
| - "huntin", | ||
| - "identifyin", | ||
| - "imagin", | ||
| - "implementin", | ||
| - "imposin", | ||
| - "improvin", | ||
| - "includin", | ||
| - "increasin", | ||
| - "indicatin", | ||
| - "interestin", | ||
| - "interpretin", | ||
| - "introducin", | ||
| - "involvin", | ||
| - "joinin", | ||
| - "judgin", | ||
| - "keepin", | ||
| - "killin", | ||
| - "knowin", | ||
| - "lackin", | ||
| - "landin", | ||
| - "lastin", | ||
| - "laughin", | ||
| - "layin", | ||
| - "leadin", | ||
| - "leanin", | ||
| - "learnin", | ||
| - "leavin", | ||
| - "lettin", | ||
| - "liftin", | ||
| - "lightin", | ||
| - "lightnin", | ||
| - "limitin", | ||
| - "listenin", | ||
| - "listin", | ||
| - "livin", | ||
| - "loadin", | ||
| - "lookin", | ||
| - "losin", | ||
| - "lovin", | ||
| - "lowerin", | ||
| - "lyin", | ||
| - "maintainin", | ||
| - "makin", | ||
| - "managin", | ||
| - "manufacturin", | ||
| - "mappin", | ||
| - "marketin", | ||
| - "markin", | ||
| - "matchin", | ||
| - "meanin", | ||
| - "measurin", | ||
| - "meetin", | ||
| - "meltin", | ||
| - "minin", | ||
| - "misleadin", | ||
| - "missin", | ||
| - "mixin", | ||
| - "modelin", | ||
| - "monitorin", | ||
| - "mornin", | ||
| - "movin", | ||
| - "neighborin", | ||
| - "nothin", | ||
| - "notin", | ||
| - "notwithstandin", | ||
| - "nursin", | ||
| - "observin", | ||
| - "obtainin", | ||
| - "occurrin", | ||
| - "offerin", | ||
| - "offsprin", | ||
| - "ongoin", | ||
| - "openin", | ||
| - "operatin", | ||
| - "opposin", | ||
| - "orderin", | ||
| - "organizin", | ||
| - "outstandin", | ||
| - "overwhelmin", | ||
| - "packin", | ||
| - "paintin", | ||
| - "parkin", | ||
| - "participatin", | ||
| - "passin", | ||
| - "payin", | ||
| - "pendin", | ||
| - "performin", | ||
| - "pickin", | ||
| - "pissin", | ||
| - "placin", | ||
| - "plannin", | ||
| - "plantin", | ||
| - "playin", | ||
| - "pleasin", | ||
| - "pointin", | ||
| - "possessin", | ||
| - "preachin", | ||
| - "precedin", | ||
| - "preparin", | ||
| - "presentin", | ||
| - "preservin", | ||
| - "pressin", | ||
| - "prevailin", | ||
| - "preventin", | ||
| - "pricin", | ||
| - "printin", | ||
| - "proceedin", | ||
| - "processin", | ||
| - "producin", | ||
| - "programmin", | ||
| - "promisin", | ||
| - "promotin", | ||
| - "protectin", | ||
| - "providin", | ||
| - "provin", | ||
| - "publishin", | ||
| - "pullin", | ||
| - "purchasin", | ||
| - "pursuin", | ||
| - "pushin", | ||
| - "puttin", | ||
| - "questionin", | ||
| - "rangin", | ||
| - "ratin", | ||
| - "reachin", | ||
| - "readin", | ||
| - "reasonin", | ||
| - "receivin", | ||
| - "recognizin", | ||
| - "recordin", | ||
| - "reducin", | ||
| - "referrin", | ||
| - "reflectin", | ||
| - "refusin", | ||
| - "regardin", | ||
| - "regulatin", | ||
| - "relatin", | ||
| - "remainin", | ||
| - "rememberin", | ||
| - "removin", | ||
| - "renderin", | ||
| - "repeatin", | ||
| - "replacin", | ||
| - "reportin", | ||
| - "representin", | ||
| - "requirin", | ||
| - "respectin", | ||
| - "respondin", | ||
| - "restin", | ||
| - "resultin", | ||
| - "returnin", | ||
| - "revealin", | ||
| - "ridin", | ||
| - "risin", | ||
| - "rulin", | ||
| - "runnin", | ||
| - "sailin", | ||
| - "samplin", | ||
| - "satisfyin", | ||
| - "savin", | ||
| - "sayin", | ||
| - "scatterin", | ||
| - "schoolin", | ||
| - "screenin", | ||
| - "searchin", | ||
| - "securin", | ||
| - "seein", | ||
| - "seekin", | ||
| - "selectin", | ||
| - "sellin", | ||
| - "sendin", | ||
| - "separatin", | ||
| - "servin", | ||
| - "settin", | ||
| - "settlin", | ||
| - "shakin", | ||
| - "shapin", | ||
| - "sharin", | ||
| - "shiftin", | ||
| - "shinin", | ||
| - "shippin", | ||
| + // what | ||
| + "wha", | ||
| + // for/before | ||
| + "fo", | ||
| + // San (Francisco) | ||
| + "Sa", | ||
| + // Top ~500 common -ing words as English contractions. | ||
| + "acceptin", | ||
| + "accompanyin", | ||
| + "accordin", | ||
| + "accountin", | ||
| + "achievin", | ||
| + "acquirin", | ||
| + "actin", | ||
| + "addin", | ||
| + "addressin", | ||
| + "adjoinin", | ||
| + "adoptin", | ||
| + "advancin", | ||
| + "advertisin", | ||
| + "affectin", | ||
| + "agin", | ||
| + "allowin", | ||
| + "amazin", | ||
| + "analyzin", | ||
| + "answerin", | ||
| + "anythin", | ||
| + "appearin", | ||
| + "applyin", | ||
| + "approachin", | ||
| + "arguin", | ||
| + "arisin", | ||
| + "arrivin", | ||
| + "askin", | ||
| + "assessin", | ||
| + "assumin", | ||
| + "attackin", | ||
| + "attemptin", | ||
| + "attendin", | ||
| + "avoidin", | ||
| + "bankin", | ||
| + "bargainin", | ||
| + "bearin", | ||
| + "beatin", | ||
| + "becomin", | ||
| + "beginnin", | ||
| + "bein", | ||
| + "believin", | ||
| + "belongin", | ||
| + "bendin", | ||
| + "bindin", | ||
| + "bleedin", | ||
| + "blessin", | ||
| + "blowin", | ||
| + "boilin", | ||
| + "borrowin", | ||
| + "breakin", | ||
| + "breathin", | ||
| + "breedin", | ||
| + "bringin", | ||
| + "broadcastin", | ||
| + "buildin", | ||
| + "burnin", | ||
| + "buyin", | ||
| + "calculatin", | ||
| + "callin", | ||
| + "carryin", | ||
| + "castin", | ||
| + "causin", | ||
| + "ceilin", | ||
| + "challengin", | ||
| + "changin", | ||
| + "checkin", | ||
| + "choosin", | ||
| + "claimin", | ||
| + "cleanin", | ||
| + "clearin", | ||
| + "climbin", | ||
| + "closin", | ||
| + "clothin", | ||
| + "collectin", | ||
| + "combinin", | ||
| + "comin", | ||
| + "commandin", | ||
| + "comparin", | ||
| + "compellin", | ||
| + "competin", | ||
| + "computin", | ||
| + "concernin", | ||
| + "concludin", | ||
| + "conditionin", | ||
| + "conductin", | ||
| + "conflictin", | ||
| + "connectin", | ||
| + "considerin", | ||
| + "consistin", | ||
| + "constructin", | ||
| + "consultin", | ||
| + "consumin", | ||
| + "containin", | ||
| + "continuin", | ||
| + "contractin", | ||
| + "contributin", | ||
| + "controllin", | ||
| + "convincin", | ||
| + "cookin", | ||
| + "coolin", | ||
| + "copin", | ||
| + "correspondin", | ||
| + "counselin", | ||
| + "countin", | ||
| + "couplin", | ||
| + "coverin", | ||
| + "creatin", | ||
| + "crossin", | ||
| + "cryin", | ||
| + "cuttin", | ||
| + "dancin", | ||
| + "darlin", | ||
| + "datin", | ||
| + "dealin", | ||
| + "decidin", | ||
| + "declarin", | ||
| + "declinin", | ||
| + "decreasin", | ||
| + "definin", | ||
| + "demandin", | ||
| + "denyin", | ||
| + "dependin", | ||
| + "descendin", | ||
| + "describin", | ||
| + "designin", | ||
| + "destroyin", | ||
| + "determinin", | ||
| + "developin", | ||
| + "differin", | ||
| + "dinin", | ||
| + "directin", | ||
| + "discussin", | ||
| + "distinguishin", | ||
| + "disturbin", | ||
| + "dividin", | ||
| + "doin", | ||
| + "drawin", | ||
| + "dressin", | ||
| + "drinkin", | ||
| + "drivin", | ||
| + "droppin", | ||
| + "dryin", | ||
| + "durin", | ||
| + "dwellin", | ||
| + "dyin", | ||
| + "eatin", | ||
| + "editin", | ||
| + "emergin", | ||
| + "employin", | ||
| + "enablin", | ||
| + "encouragin", | ||
| + "endin", | ||
| + "engagin", | ||
| + "engineerin", | ||
| + "enjoyin", | ||
| + "enterin", | ||
| + "establishin", | ||
| + "evaluatin", | ||
| + "evenin", | ||
| + "everythin", | ||
| + "examinin", | ||
| + "exceedin", | ||
| + "excitin", | ||
| + "excludin", | ||
| + "existin", | ||
| + "expandin", | ||
| + "expectin", | ||
| + "experiencin", | ||
| + "explainin", | ||
| + "explorin", | ||
| + "expressin", | ||
| + "extendin", | ||
| + "facin", | ||
| + "failin", | ||
| + "fallin", | ||
| + "farmin", | ||
| + "fascinatin", | ||
| + "feedin", | ||
| + "feelin", | ||
| + "fightin", | ||
| + "filin", | ||
| + "fillin", | ||
| + "financin", | ||
| + "findin", | ||
| + "firin", | ||
| + "fishin", | ||
| + "fittin", | ||
| + "fixin", | ||
| + "floatin", | ||
| + "flowin", | ||
| + "flyin", | ||
| + "focusin", | ||
| + "followin", | ||
| + "forcin", | ||
| + "foregoin", | ||
| + "formin", | ||
| + "forthcomin", | ||
| + "foundin", | ||
| + "freezin", | ||
| + "fuckin", | ||
| + "functionin", | ||
| + "fundin", | ||
| + "gainin", | ||
| + "gatherin", | ||
| + "generatin", | ||
| + "gettin", | ||
| + "givin", | ||
| + "goin", | ||
| + "governin", | ||
| + "grantin", | ||
| + "growin", | ||
| + "hackin", | ||
| + "handlin", | ||
| + "hangin", | ||
| + "happenin", | ||
| + "havin", | ||
| + "headin", | ||
| + "healin", | ||
| + "hearin", | ||
| + "heatin", | ||
| + "helpin", | ||
| + "hidin", | ||
| + "holdin", | ||
| + "hopin", | ||
| + "housin", | ||
| + "huntin", | ||
| + "identifyin", | ||
| + "imagin", | ||
| + "implementin", | ||
| + "imposin", | ||
| + "improvin", | ||
| + "includin", | ||
| + "increasin", | ||
| + "indicatin", | ||
| + "interestin", | ||
| + "interpretin", | ||
| + "introducin", | ||
| + "involvin", | ||
| + "joinin", | ||
| + "judgin", | ||
| + "keepin", | ||
| + "killin", | ||
| + "knowin", | ||
| + "lackin", | ||
| + "landin", | ||
| + "lastin", | ||
| + "laughin", | ||
| + "layin", | ||
| + "leadin", | ||
| + "leanin", | ||
| + "learnin", | ||
| + "leavin", | ||
| + "lettin", | ||
| + "liftin", | ||
| + "lightin", | ||
| + "lightnin", | ||
| + "limitin", | ||
| + "listenin", | ||
| + "listin", | ||
| + "livin", | ||
| + "loadin", | ||
| + "lookin", | ||
| + "losin", | ||
| + "lovin", | ||
| + "lowerin", | ||
| + "lyin", | ||
| + "maintainin", | ||
| + "makin", | ||
| + "managin", | ||
| + "manufacturin", | ||
| + "mappin", | ||
| + "marketin", | ||
| + "markin", | ||
| + "matchin", | ||
| + "meanin", | ||
| + "measurin", | ||
| + "meetin", | ||
| + "meltin", | ||
| + "minin", | ||
| + "misleadin", | ||
| + "missin", | ||
| + "mixin", | ||
| + "modelin", | ||
| + "monitorin", | ||
| + "mornin", | ||
| + "movin", | ||
| + "neighborin", | ||
| + "nothin", | ||
| + "notin", | ||
| + "notwithstandin", | ||
| + "nursin", | ||
| + "observin", | ||
| + "obtainin", | ||
| + "occurrin", | ||
| + "offerin", | ||
| + "offsprin", | ||
| + "ongoin", | ||
| + "openin", | ||
| + "operatin", | ||
| + "opposin", | ||
| + "orderin", | ||
| + "organizin", | ||
| + "outstandin", | ||
| + "overwhelmin", | ||
| + "packin", | ||
| + "paintin", | ||
| + "parkin", | ||
| + "participatin", | ||
| + "passin", | ||
| + "payin", | ||
| + "pendin", | ||
| + "performin", | ||
| + "pickin", | ||
| + "pissin", | ||
| + "placin", | ||
| + "plannin", | ||
| + "plantin", | ||
| + "playin", | ||
| + "pleasin", | ||
| + "pointin", | ||
| + "possessin", | ||
| + "preachin", | ||
| + "precedin", | ||
| + "preparin", | ||
| + "presentin", | ||
| + "preservin", | ||
| + "pressin", | ||
| + "prevailin", | ||
| + "preventin", | ||
| + "pricin", | ||
| + "printin", | ||
| + "proceedin", | ||
| + "processin", | ||
| + "producin", | ||
| + "programmin", | ||
| + "promisin", | ||
| + "promotin", | ||
| + "protectin", | ||
| + "providin", | ||
| + "provin", | ||
| + "publishin", | ||
| + "pullin", | ||
| + "purchasin", | ||
| + "pursuin", | ||
| + "pushin", | ||
| + "puttin", | ||
| + "questionin", | ||
| + "rangin", | ||
| + "ratin", | ||
| + "reachin", | ||
| + "readin", | ||
| + "reasonin", | ||
| + "receivin", | ||
| + "recognizin", | ||
| + "recordin", | ||
| + "reducin", | ||
| + "referrin", | ||
| + "reflectin", | ||
| + "refusin", | ||
| + "regardin", | ||
| + "regulatin", | ||
| + "relatin", | ||
| + "remainin", | ||
| + "rememberin", | ||
| + "removin", | ||
| + "renderin", | ||
| + "repeatin", | ||
| + "replacin", | ||
| + "reportin", | ||
| + "representin", | ||
| + "requirin", | ||
| + "respectin", | ||
| + "respondin", | ||
| + "restin", | ||
| + "resultin", | ||
| + "returnin", | ||
| + "revealin", | ||
| + "ridin", | ||
| + "risin", | ||
| + "rulin", | ||
| + "runnin", | ||
| + "sailin", | ||
| + "samplin", | ||
| + "satisfyin", | ||
| + "savin", | ||
| + "sayin", | ||
| + "scatterin", | ||
| + "schoolin", | ||
| + "screenin", | ||
| + "searchin", | ||
| + "securin", | ||
| + "seein", | ||
| + "seekin", | ||
| + "selectin", | ||
| + "sellin", | ||
| + "sendin", | ||
| + "separatin", | ||
| + "servin", | ||
| + "settin", | ||
| + "settlin", | ||
| + "shakin", | ||
| + "shapin", | ||
| + "sharin", | ||
| + "shiftin", | ||
| + "shinin", | ||
| + "shippin", | ||
| + "shittin", | ||
| "shootin", | ||
| "shoppin", |
| } | ||
| + @Override | ||
| + public String toString() { | ||
| + return getClass().getSimpleName() + '{' + | ||
| + "mType=" + mType + | ||
| + ", mBegan=" + mBegan + | ||
| + ", mEnded=" + mEnded + | ||
| + '}'; | ||
| + } | ||
| + | ||
| /** | ||
| * Answers whether the given {@link LexemeType} is the same as this |
| import static com.keenwrite.quotes.Lexeme.createLexeme; | ||
| import static com.keenwrite.quotes.LexemeType.*; | ||
| -import static java.lang.Character.*; | ||
| +import static java.lang.Character.isDigit; | ||
| +import static java.lang.Character.isWhitespace; | ||
| import static java.text.CharacterIterator.DONE; | ||
| return lexeme; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Answers whether the given character can be considered part of a word | ||
| + * or not. This will include {@code _} and {@code *} because plain text | ||
| + * formats often use those characters to emphasize a word. | ||
| + * | ||
| + * @param curr The character to check as being part of a word. | ||
| + * @return {@code true} if the given character is a letter or a formatting | ||
| + * indicator. | ||
| + */ | ||
| + private static boolean isLetter( final char curr ) { | ||
| + return Character.isLetter( curr ) || curr == '_' || curr == '*'; | ||
| } | ||
| + /** | ||
| + * Answers whether the given character can be considered part of a number | ||
| + * or not. This does not include digits, which are checked independently | ||
| + * from this method. | ||
| + * | ||
| + * @param curr The character to check as being related to numbers. | ||
| + * @return {@code true} if the given character can be considered part of | ||
| + * a number (e.g., -2,000.2^2 is considered a single number). | ||
| + */ | ||
| private static boolean isNumeric( final char curr ) { | ||
| - return curr == '.' || curr == ',' || curr == '-' || curr == '+'; | ||
| + return | ||
| + curr == '.' || curr == ',' || curr == '-' || curr == '+' || curr == '^'; | ||
| } | ||
| */ | ||
| private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE = | ||
| - new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOP}; | ||
| + new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP}; | ||
| /** | ||
| */ | ||
| private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE = | ||
| - new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, OPENING_GROUP, EOP}; | ||
| + new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, OPENING_GROUP, EOL, EOP}; | ||
| /** | ||
| */ | ||
| private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE = | ||
| - new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, CLOSING_GROUP, EOL}; | ||
| + new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, CLOSING_GROUP, EOL, EOP}; | ||
| /** | ||
| // Create and convert a list of all unambiguous quote characters. | ||
| while( (lexeme = mLexer.next()) != EOT ) { | ||
| - tokenize( lexeme, lexemes, tokenConsumer, unresolved ); | ||
| + if( tokenize( lexeme, lexemes, tokenConsumer, unresolved ) ) { | ||
| + // Attempt to resolve any remaining unambiguous quotes. | ||
| + resolve( unresolved, tokenConsumer ); | ||
| + | ||
| + // Notify of any unambiguous quotes that could not be resolved. | ||
| + unresolved.forEach( ( lex ) -> lexemeConsumer.accept( lex[ 1 ] ) ); | ||
| + unresolved.clear(); | ||
| + mOpeningSingleQuote = 0; | ||
| + mClosingSingleQuote = 0; | ||
| + } | ||
| } | ||
| } | ||
| - private void tokenize( final Lexeme lexeme, | ||
| - final CircularFifoQueue<Lexeme> lexemes, | ||
| - final Consumer<Token> consumer, | ||
| - final List<Lexeme[]> unresolved ) { | ||
| + /** | ||
| + * Converts {@link Lexeme}s identified as straight quotes into {@link Token}s | ||
| + * that represent the curly equivalent. The {@link Token}s are passed to | ||
| + * the given {@link Consumer} for further processing (e.g., replaced in | ||
| + * the original text being parsed). | ||
| + * | ||
| + * @param lexeme A part of the text being parsed. | ||
| + * @param lexemes A 3-element queue of lexemes that provide sufficient | ||
| + * context to identify curly quotes. | ||
| + * @param consumer Recipient of equivalent quotes. | ||
| + * @param unresolved Rolling list of potentially ambiguous {@link Lexeme}s | ||
| + * that could not be tokenized, yet. | ||
| + * @return {@code true} if an end-of-paragraph is detected. | ||
| + */ | ||
| + private boolean tokenize( final Lexeme lexeme, | ||
| + final CircularFifoQueue<Lexeme> lexemes, | ||
| + final Consumer<Token> consumer, | ||
| + final List<Lexeme[]> unresolved ) { | ||
| // Add the next lexeme to tokenize into the queue for immediate processing. | ||
| lexemes.add( lexeme ); | ||
| unresolved.add( new Lexeme[]{lex1, lex2, lex3} ); | ||
| } | ||
| + | ||
| + // Suggest to the caller that resolution should be performed. This allows | ||
| + // the algorithm to reset the opening/closing quote balance before the | ||
| + // next paragraph is parsed. | ||
| + return lex3.isType( EOP ); | ||
| } | ||
| * The {@link Parser} emits these token types. | ||
| */ | ||
| -public enum TokenType { | ||
| +enum TokenType { | ||
| QUOTE_OPENING_SINGLE, | ||
| QUOTE_OPENING_DOUBLE, |
| package com.keenwrite.quotes; | ||
| -import org.junit.jupiter.api.Disabled; | ||
| import org.junit.jupiter.api.Test; | ||
| */ | ||
| @Test | ||
| - @Disabled | ||
| + //@Disabled | ||
| public void test_parse_SingleLine_Parsed() { | ||
| out.println( KeenQuotes.convert( | ||
| public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException { | ||
| testConverter( text -> KeenQuotes.convert( text, ( lexeme ) -> {} ) ); | ||
| + } | ||
| + | ||
| + @Test | ||
| + void test_Parse_Story_Converted() throws IOException { | ||
| + final var sb = new StringBuilder( 2 ^ 20 ); | ||
| + | ||
| + try( final var reader = open( "foote.txt" ) ) { | ||
| + String line; | ||
| + | ||
| + while( (line = reader.readLine()) != null ) { | ||
| + sb.append( line ).append( '\n' ); | ||
| + } | ||
| + } | ||
| + | ||
| + final var s = KeenQuotes.convert( sb.toString(), out::println ); | ||
| + System.out.println(s); | ||
| } | ||
| private void testConverter( final Function<String, String> parser ) | ||
| throws IOException { | ||
| - try( final var reader = openResource( "smartypants.txt" ) ) { | ||
| + try( final var reader = open( "smartypants.txt" ) ) { | ||
| String line; | ||
| String testLine = ""; | ||
| } | ||
| - @SuppressWarnings( "SameParameterValue" ) | ||
| - private BufferedReader openResource( final String filename ) { | ||
| + /** | ||
| + * Opens a text file for reading. Callers are responsible for closing. | ||
| + * | ||
| + * @param filename The file to open. | ||
| + * @return An instance of {@link BufferedReader} that can be used to | ||
| + * read all the lines in the file. | ||
| + */ | ||
| + private BufferedReader open( final String filename ) { | ||
| final var is = getClass().getResourceAsStream( filename ); | ||
| assertNotNull( is ); | ||
| -package com.keenwrite.quotes; | ||
| - | ||
| -public class StoryTest { | ||
| - | ||
| -} | ||
| “And this” and “and this” and “and this” and “and another.” | ||
| +"Will'll invite?" Mrs. Thorne asked;\n"because he's coming--he's at the gate." | ||
| +“Will'll invite?” Mrs. Thorne asked;\n“because he's coming--he's at the gate.” | ||
| + | ||
| # ######################################################################## | ||
| # Single quotes | ||
| "'Twas, t'wasn't thy name, 'twas it?" said Jim "the Barber" Brown. | ||
| “'Twas, t'wasn't thy name, 'twas it?” said Jim “the Barber” Brown. | ||
| + | ||
| +"'I'm in danger. Help? Take me to--' an address on Van Ness Avenue. | ||
| +“‘I'm in danger. Help? Take me to--’ an address on Van Ness Avenue. | ||
| + | ||
| +"Ah," she said, "you knew! He told you--and you said 'my dear'! How could you?!" | ||
| +“Ah,” she said, “you knew! He told you--and you said ‘my dear’! How could you?!” | ||
| # ######################################################################## | ||
| Delta | 521 lines added, 428 lines removed, 93-line increase |
|---|