Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Add 3k contractions, emit more ambiguities

AuthorDave Jarvis <email>
Date2021-06-26 10:29:48 GMT-0700
Commitb1d513f2c14ee205c056ba20898832ce7da61864
Parent856fc43
src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java
"boutchu",
"cept",
- "dillo",
- "em",
- "fraid",
- "gainst",
- "im",
- "n",
- "neath",
- "nother",
- "nuff",
- "onna",
- "onna'",
- "pon",
- "s",
- "sblood",
- "scuse",
- "sfar",
- "sfoot",
- "t",
- "taint",
- "tain",
- "til",
- "tis",
- "tisn",
- "tshall",
- "twas",
- "twasn",
- "tween",
- "twere",
- "tweren",
- "twixt",
- "twon",
- "twou",
- "twould",
- "twouldn",
- "ve"
- );
-
- /**
- * Words having a straight apostrophe that may be either part of a
- * contraction or a word that stands alone beside an opening single quote.
- */
- private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
- // about|boxing match
- "bout",
- // because|causal
- "cause",
- // what you|choo choo train
- "choo",
- // he|e pluribus unum
- "e",
- // here|earlier
- "ere",
- // afro|to and fro
- "fro",
- // whore|ho ho!
- "ho",
- // okay|letter K
- "kay",
- // lo|lo and behold
- "lo",
- // are|regarding
- "re",
- // what's up|to sup
- "sup",
- // it will|twill fabric
- "twill",
- // them|utterance
- "um",
- // is that|Iranian village
- "zat"
- );
-
- private static final Set<String> ENDED_AMBIGUOUS = Set.of(
- // give|martial arts garment
- "gi",
- // in|I
- "i",
- // of|letter o
- "o"
- );
-
- private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
- // and
- "an",
- // for/before
- "fo",
- // friend
- "frien",
- // just
- "jus",
- // lord
- "lor",
- // myself
- "masel",
- // old
- "ol",
- // San (Francisco)
- "Sa",
- // shift
- "shif",
- // the
- "th",
- // what
- "wha",
- // world
- "worl",
- // Top ~500 common -ing words as English contractions.
- "acceptin",
- "accompanyin",
- "accordin",
- "accountin",
- "achievin",
- "acquirin",
- "actin",
- "addin",
- "addressin",
- "adjoinin",
- "adoptin",
- "advancin",
- "advertisin",
- "affectin",
- "agin",
- "allowin",
- "amazin",
- "analyzin",
- "answerin",
- "anythin",
- "appearin",
- "applyin",
- "approachin",
- "arguin",
- "arisin",
- "arrivin",
- "askin",
- "assessin",
- "assumin",
- "attackin",
- "attemptin",
- "attendin",
- "avoidin",
- "bankin",
- "bargainin",
- "bearin",
- "beatin",
- "becomin",
- "beginnin",
- "bein",
- "believin",
- "belongin",
- "bendin",
- "bindin",
- "bleedin",
- "blessin",
- "blowin",
- "boilin",
- "borrowin",
- "breakin",
- "breathin",
- "breedin",
- "bringin",
- "broadcastin",
- "buildin",
- "burnin",
- "buyin",
- "calculatin",
- "callin",
- "carryin",
- "castin",
- "causin",
- "ceilin",
- "challengin",
- "changin",
- "checkin",
- "choosin",
- "claimin",
- "cleanin",
- "clearin",
- "climbin",
- "closin",
- "clothin",
- "collectin",
- "combinin",
- "comin",
- "commandin",
- "comparin",
- "compellin",
- "competin",
- "computin",
- "concernin",
- "concludin",
- "conditionin",
- "conductin",
- "conflictin",
- "connectin",
- "considerin",
- "consistin",
- "constructin",
- "consultin",
- "consumin",
- "containin",
- "continuin",
- "contractin",
- "contributin",
- "controllin",
- "convincin",
- "cookin",
- "coolin",
- "copin",
- "correspondin",
- "counselin",
- "countin",
- "couplin",
- "coverin",
- "creatin",
- "crossin",
- "cryin",
- "cuttin",
- "dancin",
- "darlin",
- "datin",
- "dealin",
- "decidin",
- "declarin",
- "declinin",
- "decreasin",
- "definin",
- "demandin",
- "denyin",
- "dependin",
- "descendin",
- "describin",
- "designin",
- "destroyin",
- "determinin",
- "developin",
- "differin",
- "dinin",
- "directin",
- "discussin",
- "distinguishin",
- "disturbin",
- "dividin",
- "doin",
- "drawin",
- "dressin",
- "drinkin",
- "drivin",
- "droppin",
- "dryin",
- "durin",
- "dwellin",
- "dyin",
- "eatin",
- "editin",
- "emergin",
- "employin",
- "enablin",
- "encouragin",
- "endin",
- "engagin",
- "engineerin",
- "enjoyin",
- "enterin",
- "establishin",
- "evaluatin",
- "evenin",
- "everythin",
- "examinin",
- "exceedin",
- "excitin",
- "excludin",
- "existin",
- "expandin",
- "expectin",
- "experiencin",
- "explainin",
- "explorin",
- "expressin",
- "extendin",
- "facin",
- "failin",
- "fallin",
- "farmin",
- "fascinatin",
- "feedin",
- "feelin",
- "fightin",
- "filin",
- "fillin",
- "financin",
- "findin",
- "firin",
- "fishin",
- "fittin",
- "fixin",
- "floatin",
- "flowin",
- "flyin",
- "focusin",
- "followin",
- "forcin",
- "foregoin",
- "formin",
- "forthcomin",
- "foundin",
- "freezin",
- "fuckin",
- "functionin",
- "fundin",
- "gainin",
- "gatherin",
- "generatin",
- "gettin",
- "givin",
- "goin",
- "governin",
- "grantin",
- "growin",
- "hackin",
- "handlin",
- "hangin",
- "happenin",
- "havin",
- "headin",
- "healin",
- "hearin",
- "heatin",
- "helpin",
- "hidin",
- "holdin",
- "hopin",
- "housin",
- "huntin",
- "identifyin",
- "imagin",
- "implementin",
- "imposin",
- "improvin",
- "includin",
- "increasin",
- "indicatin",
- "interestin",
- "interpretin",
- "introducin",
- "involvin",
- "joinin",
- "judgin",
- "keepin",
- "killin",
- "knowin",
- "lackin",
- "landin",
- "lastin",
- "laughin",
- "layin",
- "leadin",
- "leanin",
- "learnin",
- "leavin",
- "lettin",
- "liftin",
- "lightin",
- "lightnin",
- "limitin",
- "listenin",
- "listin",
- "livin",
- "loadin",
- "lookin",
- "losin",
- "lovin",
- "lowerin",
- "lyin",
- "maintainin",
- "makin",
- "managin",
- "manufacturin",
- "mappin",
- "marketin",
- "markin",
- "matchin",
- "meanin",
- "measurin",
- "meetin",
- "meltin",
- "minin",
- "misleadin",
- "missin",
- "mixin",
- "modelin",
- "monitorin",
- "mornin",
- "movin",
- "neighborin",
- "neighbourin",
- "nothin",
- "notin",
- "notwithstandin",
- "nursin",
- "observin",
- "obtainin",
- "occurrin",
- "offerin",
- "offsprin",
- "ongoin",
- "openin",
- "operatin",
- "opposin",
- "orderin",
- "organizin",
- "outstandin",
- "overwhelmin",
- "packin",
- "paintin",
- "parkin",
- "participatin",
- "passin",
- "payin",
- "pendin",
- "performin",
- "pickin",
- "pissin",
- "placin",
- "plannin",
- "plantin",
- "playin",
- "pleasin",
- "pointin",
- "poppin",
- "possessin",
- "preachin",
- "precedin",
- "preparin",
- "presentin",
- "preservin",
- "pressin",
- "prevailin",
- "preventin",
- "pricin",
- "printin",
- "proceedin",
- "processin",
- "producin",
- "programmin",
- "promisin",
- "promotin",
- "protectin",
- "providin",
- "provin",
- "publishin",
- "pullin",
- "purchasin",
- "pursuin",
- "pushin",
- "puttin",
- "questionin",
- "rangin",
- "ratin",
- "reachin",
- "readin",
- "reasonin",
- "receivin",
- "recognizin",
- "recordin",
- "reducin",
- "referrin",
- "reflectin",
- "refusin",
- "regardin",
- "regulatin",
- "relatin",
- "remainin",
- "rememberin",
- "removin",
- "renderin",
- "repeatin",
- "replacin",
- "reportin",
- "representin",
- "requirin",
- "respectin",
- "respondin",
- "restin",
- "resultin",
- "returnin",
- "revealin",
- "ridin",
- "risin",
- "rulin",
- "runnin",
- "rythin",
- "sailin",
- "samplin",
- "satisfyin",
- "savin",
- "sayin",
- "scatterin",
- "schoolin",
- "screenin",
- "searchin",
- "securin",
- "seein",
- "seekin",
- "selectin",
- "sellin",
- "sendin",
- "separatin",
- "servin",
- "settin",
- "settlin",
- "sewin",
- "shakin",
- "shapin",
- "sharin",
- "shiftin",
- "shinin",
- "shippin",
- "shittin",
- "shootin",
- "shoppin",
- "showin",
- "singin",
- "sinkin",
- "sittin",
- "sleepin",
- "smilin",
- "smokin",
- "spankin",
- "solvin",
- "somethin",
- "speakin",
- "spellin",
- "spendin",
- "spinnin",
- "spittin",
- "spreadin",
- "standin",
- "starin",
- "startin",
- "statin",
- "stayin",
- "stealin",
- "sterlin",
- "stimulatin",
- "stirrin",
- "stoppin",
- "strengthenin",
- "stretchin",
- "strikin",
- "strugglin",
- "studyin",
- "succeedin",
- "sufferin",
- "suggestin",
- "supplyin",
- "supportin",
- "surprisin",
- "surroundin",
- "survivin",
- "sweepin",
- "swellin",
- "swimmin",
- "switchin",
- "takin",
- "talkin",
- "teachin",
- "tellin",
- "testin",
- "thinkin",
- "threatenin",
- "throwin",
- "timin",
- "touchin",
- "tradin",
- "trainin",
- "travelin",
- "treatin",
- "tremblin",
- "tryin",
- "turnin",
- "underlyin",
- "understandin",
- "undertakin",
- "unwillin",
- "usin",
- "varyin",
- "viewin",
- "visitin",
- "votin",
- "waitin",
- "walkin",
- "wanderin",
- "wantin",
- "warnin",
- "washin",
- "watchin",
- "wearin",
- "weddin",
- "whackin",
- "wi",
- "willin",
- "windin",
- "winnin",
- "wishin",
- "wonderin",
- "workin",
- "writin",
- "yieldin"
+ "cos",
+ "dillo",
+ "em",
+ "fraid",
+ "gainst",
+ "im",
+ "n",
+ "neath",
+ "nother",
+ "nuff",
+ "onna",
+ "onna'",
+ // horse
+ "oss",
+ // horses
+ "osses",
+ "pon",
+ "s",
+ "sblood",
+ "scuse",
+ "sfar",
+ "sfoot",
+ "t",
+ "taint",
+ "tain",
+ "til",
+ "tis",
+ "tisn",
+ "tshall",
+ "twas",
+ "twasn",
+ "tween",
+ "twere",
+ "tweren",
+ "twixt",
+ "twon",
+ "twou",
+ "twould",
+ "twouldn",
+ "ve"
+ );
+
+ /**
+ * Words having a straight apostrophe that may be either part of a
+ * contraction or a word that stands alone beside an opening single quote.
+ */
+ private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
+ // have
+ "a",
+ // about|boxing match
+ "bout",
+ // because|causal
+ "cause",
+ // what you|choo choo train
+ "choo",
+ // he|e pluribus unum
+ "e",
+ // here|earlier
+ "ere",
+ // afro|to and fro
+ "fro",
+ // whore|ho ho!
+ "ho",
+ // okay|letter K
+ "kay",
+ // lo|lo and behold
+ "lo",
+ // ???
+ "listed",
+ // ???
+ "nation",
+ // are|regarding
+ "re",
+ // what's up|to sup
+ "sup",
+ // it will|twill fabric
+ "twill",
+ // them|utterance
+ "um",
+ // is that|Iranian village
+ "zat"
+ );
+
+ private static final Set<String> ENDED_AMBIGUOUS = Set.of(
+ // give|martial arts garment
+ "gi",
+ // in|I
+ "i",
+ // of|letter o
+ "o"
+ );
+
+ private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
+ // and
+ "an",
+ // for/before
+ "fo",
+ // friend
+ "frien",
+ // just
+ "jus",
+ // lord
+ "lor",
+ // myself
+ "masel",
+ // old
+ "ol",
+ // San (Francisco)
+ "Sa",
+ // shift
+ "shif",
+ // the
+ "th",
+ // what
+ "wha",
+ // with
+ "wi",
+ // world
+ "worl",
+ // Top ~3500 common -ing words as English contractions.
+ "abandonin",
+ "abettin",
+ "abidin",
+ "abolishin",
+ "aboundin",
+ "absorbin",
+ "abstainin",
+ "abstractin",
+ "abusin",
+ "abuttin",
+ "acceleratin",
+ "acceptin",
+ "accessin",
+ "accommodatin",
+ "accompanyin",
+ "accomplishin",
+ "accordin",
+ "accountin",
+ "accruin",
+ "accumulatin",
+ "accusin",
+ "achievin",
+ "achin",
+ "acknowledgin",
+ "acquaintin",
+ "acquirin",
+ "actin",
+ "activatin",
+ "adaptin",
+ "addin",
+ "addressin",
+ "adherin",
+ "adjoinin",
+ "adjustin",
+ "administerin",
+ "admirin",
+ "admittin",
+ "admonishin",
+ "adoptin",
+ "adorin",
+ "adornin",
+ "advancin",
+ "advertisin",
+ "advisin",
+ "advocatin",
+ "affectin",
+ "affirmin",
+ "afflictin",
+ "affordin",
+ "agein",
+ "aggravatin",
+ "aggregatin",
+ "agin",
+ "agitatin",
+ "agonizin",
+ "agreein",
+ "ailin",
+ "aimin",
+ "airin",
+ "alarmin",
+ "alertin",
+ "alienatin",
+ "alightin",
+ "alignin",
+ "allegin",
+ "alleviatin",
+ "allocatin",
+ "allowin",
+ "alludin",
+ "allurin",
+ "alterin",
+ "alternatin",
+ "amassin",
+ "amazin",
+ "amendin",
+ "amountin",
+ "amplifyin",
+ "amusin",
+ "analysin",
+ "analyzin",
+ "anchorin",
+ "anglin",
+ "animatin",
+ "annealin",
+ "annihilatin",
+ "announcin",
+ "annoyin",
+ "anointin",
+ "answerin",
+ "anticipatin",
+ "anythin",
+ "apologizin",
+ "appallin",
+ "appealin",
+ "appearin",
+ "appertainin",
+ "appetizin",
+ "applaudin",
+ "applyin",
+ "appointin",
+ "appraisin",
+ "appreciatin",
+ "apprehendin",
+ "approachin",
+ "appropriatin",
+ "approvin",
+ "approximatin",
+ "archin",
+ "archivin",
+ "arguin",
+ "arisin",
+ "arousin",
+ "arrangin",
+ "arrestin",
+ "arrivin",
+ "articulatin",
+ "ascendin",
+ "ascertainin",
+ "ascribin",
+ "askin",
+ "assailin",
+ "assaultin",
+ "assemblin",
+ "assertin",
+ "assessin",
+ "assignin",
+ "assimilatin",
+ "assistin",
+ "associatin",
+ "assumin",
+ "assurin",
+ "astonishin",
+ "astoundin",
+ "atonin",
+ "attachin",
+ "attackin",
+ "attainin",
+ "attemptin",
+ "attendin",
+ "attestin",
+ "attractin",
+ "attributin",
+ "auditin",
+ "augmentin",
+ "authorin",
+ "authorisin",
+ "authorizin",
+ "availin",
+ "avengin",
+ "averagin",
+ "avertin",
+ "avoidin",
+ "awaitin",
+ "awakenin",
+ "awakin",
+ "awardin",
+ "awnin",
+ "babblin",
+ "babysittin",
+ "backin",
+ "backslidin",
+ "backtrackin",
+ "bafflin",
+ "baggin",
+ "bailin",
+ "baitin",
+ "bakin",
+ "balancin",
+ "baldin",
+ "balloonin",
+ "ballotin",
+ "bandagin",
+ "bandin",
+ "bangin",
+ "banishin",
+ "bankin",
+ "bannin",
+ "banquetin",
+ "banterin",
+ "baptizin",
+ "bargainin",
+ "barin",
+ "barkin",
+ "barrin",
+ "barterin",
+ "bashin",
+ "baskin",
+ "bastin",
+ "bathin",
+ "batterin",
+ "battin",
+ "battlin",
+ "bawlin",
+ "bayin",
+ "beadin",
+ "beamin",
+ "bearin",
+ "beatin",
+ "beautifyin",
+ "beckonin",
+ "becomin",
+ "beddin",
+ "beepin",
+ "befittin",
+ "befriendin",
+ "begettin",
+ "beggin",
+ "beginnin",
+ "beguilin",
+ "behavin",
+ "beheadin",
+ "beholdin",
+ "behrin",
+ "beijin",
+ "bein",
+ "belchin",
+ "believin",
+ "belittlin",
+ "bellowin",
+ "belongin",
+ "beltin",
+ "bemoanin",
+ "benchmarkin",
+ "bendin",
+ "benefitin",
+ "bennin",
+ "beratin",
+ "berin",
+ "beseechin",
+ "besettin",
+ "besiegin",
+ "bestowin",
+ "bestsellin",
+ "betrayin",
+ "betterin",
+ "bettin",
+ "bewailin",
+ "bewilderin",
+ "bewitchin",
+ "biasin",
+ "bickerin",
+ "bicyclin",
+ "biddin",
+ "bidin",
+ "bikin",
+ "billin",
+ "billowin",
+ "bindin",
+ "birthin",
+ "bisectin",
+ "bitin",
+ "blackenin",
+ "blackin",
+ "blackmailin",
+ "blamin",
+ "blanchin",
+ "blarin",
+ "blasphemin",
+ "blastin",
+ "blazin",
+ "bleachin",
+ "bleatin",
+ "bleedin",
+ "blendin",
+ "blessin",
+ "blightin",
+ "blindin",
+ "blinkin",
+ "blisterin",
+ "bloatin",
+ "blockadin",
+ "blockin",
+ "bloggin",
+ "bloodlettin",
+ "bloomin",
+ "blossomin",
+ "blottin",
+ "blowin",
+ "bluffin",
+ "blunderin",
+ "blurrin",
+ "blushin",
+ "blusterin",
+ "boardin",
+ "boastin",
+ "boatin",
+ "bodybuildin",
+ "boein",
+ "bogglin",
+ "boilin",
+ "bolsterin",
+ "boltin",
+ "bombardin",
+ "bombin",
+ "bondin",
+ "bookin",
+ "bookkeepin",
+ "boomin",
+ "boostin",
+ "bootin",
+ "bootstrappin",
+ "borderin",
+ "borin",
+ "borrowin",
+ "botherin",
+ "bottlin",
+ "bouncin",
+ "boundin",
+ "bowin",
+ "bowlin",
+ "bowrin",
+ "bowstrin",
+ "boxin",
+ "boycottin",
+ "bracin",
+ "bracketin",
+ "braggin",
+ "braidin",
+ "brainstormin",
+ "brainwashin",
+ "brakin",
+ "branchin",
+ "brandin",
+ "brandishin",
+ "bravin",
+ "brawlin",
+ "brayin",
+ "breachin",
+ "breakfastin",
+ "breakin",
+ "breastfeedin",
+ "breathin",
+ "breathtakin",
+ "breedin",
+ "brewin",
+ "bribin",
+ "bridgin",
+ "briefin",
+ "brightenin",
+ "brimmin",
+ "bringin",
+ "bristlin",
+ "britlin",
+ "broadcastin",
+ "broadenin",
+ "broilin",
+ "brokerin",
+ "broodin",
+ "brownin",
+ "browsin",
+ "bruisin",
+ "brushin",
+ "bubblin",
+ "buckin",
+ "bucklin",
+ "buddin",
+ "budgetin",
+ "bufferin",
+ "buffetin",
+ "buggin",
+ "buildin",
+ "bulgin",
+ "bullyin",
+ "bumblin",
+ "bumpin",
+ "bundlin",
+ "bunglin",
+ "buntin",
+ "burdenin",
+ "burgeonin",
+ "burnin",
+ "burrowin",
+ "burstin",
+ "buryin",
+ "busin",
+ "bustin",
+ "bustlin",
+ "busyin",
+ "butcherin",
+ "buttin",
+ "buttonin",
+ "buyin",
+ "buzzin",
+ "bypassin",
+ "cachin",
+ "calculatin",
+ "callin",
+ "calmin",
+ "campaignin",
+ "campin",
+ "cancelin",
+ "cannin",
+ "canvassin",
+ "cappin",
+ "captivatin",
+ "capturin",
+ "caregivin",
+ "caressin",
+ "carpetin",
+ "carryin",
+ "carvin",
+ "cascadin",
+ "casin",
+ "castin",
+ "catalogin",
+ "cataloguin",
+ "catchin",
+ "categorizin",
+ "caterin",
+ "causin",
+ "ceasin",
+ "ceilin",
+ "celebratin",
+ "cementin",
+ "centerin",
+ "certifyin",
+ "chafin",
+ "challengin",
+ "championin",
+ "changin",
+ "channelin",
+ "channin",
+ "chantin",
+ "characterizin",
+ "chargin",
+ "chartin",
+ "chasin",
+ "chatterin",
+ "chattin",
+ "cheatin",
+ "checkin",
+ "cheerin",
+ "cherishin",
+ "chewin",
+ "childbearin",
+ "chillin",
+ "chippin",
+ "chirpin",
+ "chokin",
+ "choosin",
+ "choppin",
+ "christenin",
+ "chucklin",
+ "churnin",
+ "circlin",
+ "circulatin",
+ "citin",
+ "civilizin",
+ "claddin",
+ "claimin",
+ "clamberin",
+ "clamorin",
+ "clampin",
+ "clangin",
+ "clankin",
+ "clappin",
+ "clarifyin",
+ "clashin",
+ "claspin",
+ "classifyin",
+ "clatterin",
+ "claverin",
+ "clawin",
+ "cleanin",
+ "cleansin",
+ "clearin",
+ "cleavin",
+ "clenchin",
+ "clickin",
+ "climbin",
+ "clin",
+ "clingin",
+ "clippin",
+ "cloggin",
+ "clonin",
+ "closin",
+ "clothin",
+ "clottin",
+ "clusterin",
+ "clutchin",
+ "coachin",
+ "coastin",
+ "coatin",
+ "coaxin",
+ "codin",
+ "coexistin",
+ "coilin",
+ "coincidin",
+ "coinin",
+ "collaboratin",
+ "collapsin",
+ "collectin",
+ "collidin",
+ "colonisin",
+ "colonizin",
+ "colorin",
+ "colourin",
+ "combatin",
+ "combin",
+ "combinin",
+ "comfortin",
+ "comin",
+ "commandin",
+ "commemoratin",
+ "commencin",
+ "commendin",
+ "commentin",
+ "commissionin",
+ "committin",
+ "communicatin",
+ "commutin",
+ "comparin",
+ "compellin",
+ "compensatin",
+ "competin",
+ "compilin",
+ "complainin",
+ "completin",
+ "complicatin",
+ "complyin",
+ "composin",
+ "compostin",
+ "compoundin",
+ "comprehendin",
+ "compressin",
+ "comprisin",
+ "compromisin",
+ "computin",
+ "concealin",
+ "concedin",
+ "conceivin",
+ "concentratin",
+ "conceptualizin",
+ "concernin",
+ "concludin",
+ "concurrin",
+ "condemnin",
+ "condensin",
+ "condescendin",
+ "conditionin",
+ "conductin",
+ "conferencin",
+ "conferrin",
+ "confessin",
+ "confidin",
+ "configurin",
+ "confinin",
+ "confirmin",
+ "conflictin",
+ "conformin",
+ "confoundin",
+ "confrontin",
+ "confusin",
+ "congratulatin",
+ "conjurin",
+ "connectin",
+ "connin",
+ "conquerin",
+ "consentin",
+ "conservin",
+ "considerin",
+ "consistin",
+ "consolidatin",
+ "consolin",
+ "conspirin",
+ "constitutin",
+ "constrainin",
+ "constructin",
+ "construin",
+ "consultin",
+ "consumin",
+ "contactin",
+ "containin",
+ "contaminatin",
+ "contemplatin",
+ "contendin",
+ "contestin",
+ "continuin",
+ "contractin",
+ "contradictin",
+ "contrastin",
+ "contributin",
+ "contrivin",
+ "controllin",
+ "convenin",
+ "convergin",
+ "conversin",
+ "convertin",
+ "conveyin",
+ "convincin",
+ "cooin",
+ "cookin",
+ "coolin",
+ "cooperatin",
+ "coordinatin",
+ "copin",
+ "copyin",
+ "correctin",
+ "correlatin",
+ "correspondin",
+ "corruptin",
+ "costin",
+ "coughin",
+ "counselin",
+ "counsellin",
+ "counteractin",
+ "counterfeitin",
+ "counterin",
+ "countervailin",
+ "countin",
+ "couplin",
+ "coursin",
+ "courtin",
+ "coverin",
+ "cowerin",
+ "crackin",
+ "cracklin",
+ "cradlin",
+ "craftin",
+ "crampin",
+ "crashin",
+ "cravin",
+ "crawlin",
+ "creakin",
+ "creatin",
+ "creditin",
+ "creepin",
+ "cringin",
+ "cripplin",
+ "criticizin",
+ "critiquin",
+ "croakin",
+ "croppin",
+ "crossin",
+ "crouchin",
+ "crowdin",
+ "crowin",
+ "crownin",
+ "cruisin",
+ "crumblin",
+ "crunchin",
+ "crusadin",
+ "crushin",
+ "cryin",
+ "culminatin",
+ "cultivatin",
+ "cummin",
+ "cunnin",
+ "cuppin",
+ "curbin",
+ "curin",
+ "curlin",
+ "cursin",
+ "curtailin",
+ "curvin",
+ "cushin",
+ "customizin",
+ "cuttin",
+ "cyclin",
+ "dabbin",
+ "dabblin",
+ "damagin",
+ "damnin",
+ "dampenin",
+ "dampin",
+ "dancin",
+ "danglin",
+ "darjeelin",
+ "darkenin",
+ "darklin",
+ "darlin",
+ "darnin",
+ "dartin",
+ "dashin",
+ "datin",
+ "dauntin",
+ "dawdlin",
+ "dawnin",
+ "daydreamin",
+ "dazzlin",
+ "deadenin",
+ "deafenin",
+ "dealin",
+ "debasin",
+ "debatin",
+ "debilitatin",
+ "debriefin",
+ "debuggin",
+ "decayin",
+ "deceivin",
+ "decidin",
+ "decipherin",
+ "decisionmakin",
+ "deckin",
+ "declaimin",
+ "declarin",
+ "declinin",
+ "decodin",
+ "decommissionin",
+ "decomposin",
+ "deconstructin",
+ "decoratin",
+ "decouplin",
+ "decreasin",
+ "dedicatin",
+ "deducin",
+ "deductin",
+ "deemin",
+ "deepenin",
+ "deerin",
+ "defaultin",
+ "defeatin",
+ "defendin",
+ "deferrin",
+ "defilin",
+ "definin",
+ "deflectin",
+ "deformin",
+ "defraudin",
+ "defrayin",
+ "defyin",
+ "degeneratin",
+ "degradin",
+ "dehumanizin",
+ "delayin",
+ "delegatin",
+ "deletin",
+ "deliberatin",
+ "delightin",
+ "delineatin",
+ "deliverin",
+ "deludin",
+ "delvin",
+ "demandin",
+ "demeanin",
+ "demin",
+ "democratizin",
+ "demolishin",
+ "demonstratin",
+ "demoralizin",
+ "denigratin",
+ "dennin",
+ "denotin",
+ "denouncin",
+ "denyin",
+ "departin",
+ "dependin",
+ "depictin",
+ "depletin",
+ "deplorin",
+ "deployin",
+ "depositin",
+ "deprecatin",
+ "depreciatin",
+ "depressin",
+ "deprivin",
+ "derivin",
+ "descendin",
+ "describin",
+ "desertin",
+ "deservin",
+ "designatin",
+ "designin",
+ "desirin",
+ "desolatin",
+ "despairin",
+ "despatchin",
+ "despisin",
+ "despondin",
+ "destabilizin",
+ "destroyin",
+ "detachin",
+ "detailin",
+ "detainin",
+ "detectin",
+ "deterioratin",
+ "determinin",
+ "deterrin",
+ "devastatin",
+ "developin",
+ "deviatin",
+ "devisin",
+ "devolvin",
+ "devotin",
+ "devourin",
+ "diagnosin",
+ "dialin",
+ "dictatin",
+ "dietin",
+ "differentiatin",
+ "differin",
+ "diffusin",
+ "digestin",
+ "diggin",
+ "digitizin",
+ "dilatin",
+ "dilutin",
+ "diminishin",
+ "dimmin",
+ "dinin",
+ "dippin",
+ "directin",
+ "disablin",
+ "disagreein",
+ "disappearin",
+ "disappointin",
+ "disapprovin",
+ "disarmin",
+ "disbandin",
+ "disbelievin",
+ "discardin",
+ "discernin",
+ "dischargin",
+ "disciplinin",
+ "disclosin",
+ "disconcertin",
+ "disconnectin",
+ "discontinuin",
+ "discountin",
+ "discouragin",
+ "discoursin",
+ "discoverin",
+ "discreditin",
+ "discriminatin",
+ "discussin",
+ "disdainin",
+ "disembarkin",
+ "disengagin",
+ "disfigurin",
+ "disguisin",
+ "disgustin",
+ "disheartenin",
+ "disinfectin",
+ "disintegratin",
+ "dislikin",
+ "dislodgin",
+ "dismantlin",
+ "dismissin",
+ "dismountin",
+ "disobeyin",
+ "disorientin",
+ "disparagin",
+ "dispatchin",
+ "dispellin",
+ "dispensin",
+ "dispersin",
+ "displacin",
+ "displayin",
+ "displeasin",
+ "disposin",
+ "disputin",
+ "disqualifyin",
+ "disquietin",
+ "disregardin",
+ "disruptin",
+ "dissectin",
+ "dissemblin",
+ "disseminatin",
+ "dissentin",
+ "dissipatin",
+ "dissolvin",
+ "distancin",
+ "distillin",
+ "distinguishin",
+ "distortin",
+ "distractin",
+ "distressin",
+ "distributin",
+ "disturbin",
+ "divergin",
+ "diversifyin",
+ "divertin",
+ "divestin",
+ "dividin",
+ "divin",
+ "divinin",
+ "divorcin",
+ "dizzyin",
+ "dockin",
+ "doctorin",
+ "documentin",
+ "dodgin",
+ "doddlin",
+ "doin",
+ "dominatin",
+ "domineerin",
+ "donatin",
+ "donnin",
+ "doodlin",
+ "dopin",
+ "dorkin",
+ "dosin",
+ "dotin",
+ "dottin",
+ "doublin",
+ "doubtin",
+ "dowlin",
+ "downin",
+ "downloadin",
+ "downplayin",
+ "downsizin",
+ "dozin",
+ "draftin",
+ "draggin",
+ "drainin",
+ "dramatisin",
+ "dramatizin",
+ "drapin",
+ "drawin",
+ "drawlin",
+ "dreadin",
+ "dreamin",
+ "dredgin",
+ "drenchin",
+ "dressin",
+ "dressmakin",
+ "dribblin",
+ "driftin",
+ "drillin",
+ "drinkin",
+ "drippin",
+ "drivin",
+ "drizzlin",
+ "dronin",
+ "droolin",
+ "droopin",
+ "droppin",
+ "drownin",
+ "drummin",
+ "dryin",
+ "duckin",
+ "duelin",
+ "duellin",
+ "dumpin",
+ "dunnin",
+ "duplicatin",
+ "durin",
+ "dwellin",
+ "dwindlin",
+ "dyein",
+ "dyin",
+ "ealin",
+ "earnin",
+ "earrin",
+ "earthin",
+ "easin",
+ "easygoin",
+ "eatin",
+ "eavesdroppin",
+ "ebbin",
+ "echoin",
+ "eclipsin",
+ "economizin",
+ "eddyin",
+ "edgin",
+ "edifyin",
+ "editin",
+ "educatin",
+ "effacin",
+ "effectin",
+ "ejectin",
+ "elaboratin",
+ "elbowin",
+ "electin",
+ "electioneerin",
+ "electrifyin",
+ "elevatin",
+ "elicitin",
+ "eliminatin",
+ "elucidatin",
+ "eludin",
+ "elutin",
+ "emanatin",
+ "emancipatin",
+ "embalmin",
+ "embarkin",
+ "embarrassin",
+ "embeddin",
+ "embellishin",
+ "embodyin",
+ "embracin",
+ "embroiderin",
+ "emergin",
+ "emigratin",
+ "emittin",
+ "emphasisin",
+ "emphasizin",
+ "employin",
+ "empowerin",
+ "emptyin",
+ "emulatin",
+ "enablin",
+ "enactin",
+ "encapsulatin",
+ "enchantin",
+ "encirclin",
+ "enclosin",
+ "encodin",
+ "encompassin",
+ "encounterin",
+ "encouragin",
+ "encroachin",
+ "encryptin",
+ "endangerin",
+ "endearin",
+ "endeavorin",
+ "endeavourin",
+ "endin",
+ "endorsin",
+ "endowin",
+ "endurin",
+ "energizin",
+ "enervatin",
+ "enfoldin",
+ "enforcin",
+ "engagin",
+ "engenderin",
+ "engineerin",
+ "engravin",
+ "engrossin",
+ "engulfin",
+ "enhancin",
+ "enjoinin",
+ "enjoyin",
+ "enlargin",
+ "enlightenin",
+ "enlistin",
+ "enlivenin",
+ "ennoblin",
+ "enquirin",
+ "enrichin",
+ "enrollin",
+ "enslavin",
+ "ensuin",
+ "ensurin",
+ "entailin",
+ "entanglin",
+ "enterin",
+ "enterprisin",
+ "entertainin",
+ "enthrallin",
+ "enticin",
+ "entitlin",
+ "entrancin",
+ "entreatin",
+ "entrenchin",
+ "entrustin",
+ "enumeratin",
+ "enunciatin",
+ "envelopin",
+ "envisionin",
+ "envyin",
+ "eppin",
+ "equalin",
+ "equalizin",
+ "equallin",
+ "equatin",
+ "equippin",
+ "eradicatin",
+ "erasin",
+ "erectin",
+ "erlin",
+ "erodin",
+ "errin",
+ "eruptin",
+ "escalatin",
+ "escapin",
+ "eschewin",
+ "escortin",
+ "espin",
+ "espousin",
+ "establishin",
+ "estain",
+ "estimatin",
+ "etchin",
+ "evacuatin",
+ "evadin",
+ "evaluatin",
+ "evangelizin",
+ "evaporatin",
+ "evenin",
+ "everlastin",
+ "everythin",
+ "evidencin",
+ "evincin",
+ "evokin",
+ "evolvin",
+ "ewin",
+ "exacerbatin",
+ "exactin",
+ "exaggeratin",
+ "exaltin",
+ "examinin",
+ "exasperatin",
+ "excavatin",
+ "exceedin",
+ "excellin",
+ "exceptin",
+ "exchangin",
+ "excitin",
+ "exclaimin",
+ "excludin",
+ "excruciatin",
+ "excusin",
+ "executin",
+ "exemplifyin",
+ "exemptin",
+ "exercisin",
+ "exertin",
+ "exhalin",
+ "exhaustin",
+ "exhibitin",
+ "exhilaratin",
+ "exhortin",
+ "existin",
+ "exitin",
+ "expandin",
+ "expectin",
+ "expeditin",
+ "expellin",
+ "expendin",
+ "experiencin",
+ "experimentin",
+ "expirin",
+ "explainin",
+ "explicatin",
+ "explodin",
+ "exploitin",
+ "explorin",
+ "exportin",
+ "exposin",
+ "expoundin",
+ "expressin",
+ "extendin",
+ "extenuatin",
+ "exterminatin",
+ "externalizin",
+ "extinguishin",
+ "extollin",
+ "extortin",
+ "extractin",
+ "extrapolatin",
+ "extricatin",
+ "exudin",
+ "exultin",
+ "eyein",
+ "eyin",
+ "fabricatin",
+ "facilitatin",
+ "facin",
+ "factorin",
+ "fadin",
+ "failin",
+ "faintin",
+ "fakin",
+ "fallin",
+ "falterin",
+ "fancyin",
+ "fannin",
+ "farmin",
+ "farthin",
+ "fascinatin",
+ "fashionin",
+ "fastenin",
+ "fastin",
+ "fatiguin",
+ "fattenin",
+ "faultin",
+ "favorin",
+ "favourin",
+ "fawnin",
+ "fearin",
+ "feastin",
+ "featurin",
+ "feedin",
+ "feelin",
+ "feignin",
+ "fellin",
+ "fencin",
+ "fermentin",
+ "fertilizin",
+ "festerin",
+ "fetchin",
+ "fiddlin",
+ "fidgetin",
+ "fieldin",
+ "fightin",
+ "figurin",
+ "filin",
+ "fillin",
+ "filmin",
+ "filmmakin",
+ "filterin",
+ "financin",
+ "findin",
+ "fingerin",
+ "fingerprintin",
+ "finishin",
+ "firin",
+ "fishin",
+ "fittin",
+ "fixin",
+ "flaggin",
+ "flailin",
+ "flamin",
+ "flankin",
+ "flappin",
+ "flarin",
+ "flashin",
+ "flattenin",
+ "flatterin",
+ "flauntin",
+ "flavorin",
+ "fledglin",
+ "fleein",
+ "fleetin",
+ "flemin",
+ "flemmin",
+ "flexin",
+ "flickerin",
+ "flickin",
+ "flinchin",
+ "flingin",
+ "flippin",
+ "flirtin",
+ "flittin",
+ "floatin",
+ "flockin",
+ "floggin",
+ "floodin",
+ "floorin",
+ "floppin",
+ "flounderin",
+ "flourishin",
+ "flowerin",
+ "flowin",
+ "fluctuatin",
+ "flushin",
+ "flutterin",
+ "flyin",
+ "foamin",
+ "focusin",
+ "focussin",
+ "foldin",
+ "followin",
+ "fondlin",
+ "foolin",
+ "footin",
+ "foragin",
+ "forbearin",
+ "forbiddin",
+ "forcin",
+ "forebodin",
+ "forecastin",
+ "foregoin",
+ "foregroundin",
+ "foreseein",
+ "foreshadowin",
+ "forfeitin",
+ "forgettin",
+ "forgin",
+ "forgivin",
+ "formattin",
+ "formin",
+ "formulatin",
+ "forsakin",
+ "forthcomin",
+ "fortifyin",
+ "forwardin",
+ "fosterin",
+ "foulin",
+ "foundin",
+ "foundlin",
+ "fowlin",
+ "fracturin",
+ "framin",
+ "franchisin",
+ "freakin",
+ "freein",
+ "freestandin",
+ "freezin",
+ "frequentin",
+ "frettin",
+ "frightenin",
+ "frontin",
+ "frostin",
+ "frownin",
+ "fruitin",
+ "frustratin",
+ "fryin",
+ "fuckin",
+ "fuelin",
+ "fulfillin",
+ "fumblin",
+ "fumin",
+ "functionin",
+ "fundin",
+ "fundraisin",
+ "furnishin",
+ "furtherin",
+ "fusin",
+ "fussin",
+ "gaggin",
+ "gainin",
+ "gallin",
+ "gallopin",
+ "gamblin",
+ "gapin",
+ "gardenin",
+ "garnerin",
+ "gaspin",
+ "gatherin",
+ "gatin",
+ "gaugin",
+ "gazin",
+ "gearin",
+ "geldin",
+ "genderin",
+ "generalizin",
+ "generatin",
+ "genotypin",
+ "germinatin",
+ "gesticulatin",
+ "gesturin",
+ "gettin",
+ "gigglin",
+ "gildin",
+ "gissin",
+ "givin",
+ "glancin",
+ "glarin",
+ "glazin",
+ "gleamin",
+ "gleanin",
+ "glidin",
+ "glimmerin",
+ "glintin",
+ "glistenin",
+ "glitterin",
+ "gloamin",
+ "gloatin",
+ "globalizin",
+ "glorifyin",
+ "gloryin",
+ "glowerin",
+ "glowin",
+ "gluin",
+ "gnashin",
+ "gnawin",
+ "goerin",
+ "goin",
+ "goldin",
+ "golfin",
+ "goodlookin",
+ "gorin",
+ "goslin",
+ "gossipin",
+ "gougin",
+ "governin",
+ "grabbin",
+ "gradin",
+ "graduatin",
+ "graftin",
+ "grantin",
+ "graphin",
+ "grapplin",
+ "graspin",
+ "gratifyin",
+ "grayin",
+ "graylin",
+ "grazin",
+ "greenin",
+ "greetin",
+ "grievin",
+ "grillin",
+ "grimacin",
+ "grindin",
+ "grinnin",
+ "grippin",
+ "groanin",
+ "groomin",
+ "gropin",
+ "groundbreakin",
+ "groundin",
+ "groupin",
+ "grovellin",
+ "growin",
+ "growlin",
+ "grubbin",
+ "grudgin",
+ "gruelin",
+ "grumblin",
+ "gruntin",
+ "guaranteein",
+ "guardin",
+ "guessin",
+ "guidin",
+ "gulpin",
+ "gunnin",
+ "gurglin",
+ "gushin",
+ "hackin",
+ "hagglin",
+ "hailin",
+ "hallucinatin",
+ "haltin",
+ "halvin",
+ "hammerin",
+ "hammin",
+ "hamperin",
+ "hamstrin",
+ "handicappin",
+ "handin",
+ "handlin",
+ "handshakin",
+ "handwritin",
+ "hangin",
+ "hankerin",
+ "happenin",
+ "haranguin",
+ "harassin",
+ "harborin",
+ "harbourin",
+ "hardenin",
+ "hardin",
+ "hardworkin",
+ "harin",
+ "harkin",
+ "harmin",
+ "harmonisin",
+ "harmonizin",
+ "harnessin",
+ "harpin",
+ "harrowin",
+ "harryin",
+ "harvestin",
+ "hashin",
+ "hastenin",
+ "hatchin",
+ "hatin",
+ "hauin",
+ "haulin",
+ "hauntin",
+ "havin",
+ "hawkin",
+ "hayin",
+ "hazardin",
+ "hazin",
+ "headin",
+ "healin",
+ "heapin",
+ "hearin",
+ "hearkenin",
+ "heartbreakin",
+ "heartenin",
+ "heartrendin",
+ "heartwarmin",
+ "heatin",
+ "heavin",
+ "hedgin",
+ "heedin",
+ "heightenin",
+ "hein",
+ "helpin",
+ "helsin",
+ "hemmin",
+ "hemorrhagin",
+ "hennin",
+ "heraldin",
+ "herdin",
+ "herin",
+ "herrin",
+ "hesitatin",
+ "hewin",
+ "hibernatin",
+ "hidin",
+ "highlightin",
+ "hijackin",
+ "hikin",
+ "hinderin",
+ "hintin",
+ "hirelin",
+ "hirin",
+ "hissin",
+ "hitchhikin",
+ "hitchin",
+ "hittin",
+ "hoardin",
+ "hobblin",
+ "hockin",
+ "hoein",
+ "hogglin",
+ "hoistin",
+ "holdin",
+ "hollerin",
+ "hollin",
+ "hollowin",
+ "homecomin",
+ "homemakin",
+ "homeschoolin",
+ "homin",
+ "homogenizin",
+ "honin",
+ "honkin",
+ "honorin",
+ "honourin",
+ "hookin",
+ "hootin",
+ "hopin",
+ "hoppin",
+ "horrifyin",
+ "hostin",
+ "houndin",
+ "housecleanin",
+ "housekeepin",
+ "housin",
+ "hoverin",
+ "howlin",
+ "huddlin",
+ "huffin",
+ "huggin",
+ "hulkin",
+ "humanizin",
+ "humblin",
+ "humiliatin",
+ "hummin",
+ "hungerin",
+ "huntin",
+ "hurlin",
+ "hurryin",
+ "hurtin",
+ "hurtlin",
+ "hushin",
+ "hustlin",
+ "icin",
+ "identifyin",
+ "idlin",
+ "ignitin",
+ "ignorin",
+ "illuminatin",
+ "illustratin",
+ "imagin",
+ "imaginin",
+ "imitatin",
+ "immersin",
+ "impactin",
+ "impairin",
+ "impartin",
+ "impedin",
+ "impellin",
+ "impendin",
+ "impingin",
+ "implementin",
+ "implicatin",
+ "implorin",
+ "implyin",
+ "importin",
+ "imposin",
+ "impressin",
+ "imprintin",
+ "imprisonin",
+ "improvin",
+ "improvisin",
+ "imputin",
+ "inbreedin",
+ "incitin",
+ "inclinin",
+ "inclosin",
+ "includin",
+ "incomin",
+ "incorporatin",
+ "increasin",
+ "incriminatin",
+ "inculcatin",
+ "incurrin",
+ "indexin",
+ "indicatin",
+ "inducin",
+ "indulgin",
+ "indwellin",
+ "infectin",
+ "inferrin",
+ "infightin",
+ "infiltratin",
+ "inflatin",
+ "inflictin",
+ "influencin",
+ "informin",
+ "infringin",
+ "infuriatin",
+ "infusin",
+ "ingestin",
+ "ingratiatin",
+ "inhabitin",
+ "inhalin",
+ "inheritin",
+ "inhibitin",
+ "initiatin",
+ "injectin",
+ "injurin",
+ "inklin",
+ "innin",
+ "innovatin",
+ "inquirin",
+ "insertin",
+ "insinuatin",
+ "insistin",
+ "inspectin",
+ "inspirin",
+ "installin",
+ "instigatin",
+ "instillin",
+ "institutin",
+ "instructin",
+ "insulatin",
+ "insultin",
+ "insurin",
+ "integratin",
+ "intendin",
+ "intensifyin",
+ "interactin",
+ "interceptin",
+ "interestin",
+ "interfacin",
+ "interferin",
+ "interlacin",
+ "interlockin",
+ "interminglin",
+ "internalizin",
+ "interposin",
+ "interpretin",
+ "interrogatin",
+ "interruptin",
+ "intersectin",
+ "intertwinin",
+ "intervenin",
+ "interviewin",
+ "interweavin",
+ "intherestin",
+ "intimatin",
+ "intimidatin",
+ "intoxicatin",
+ "intriguin",
+ "introducin",
+ "intrudin",
+ "invadin",
+ "inventin",
+ "invertin",
+ "investigatin",
+ "investin",
+ "invigoratin",
+ "invitin",
+ "invokin",
+ "involvin",
+ "ionizin",
+ "ironin",
+ "irrigatin",
+ "irritatin",
+ "isolatin",
+ "issuin",
+ "itchin",
+ "jabberin",
+ "jabbin",
+ "jammin",
+ "janglin",
+ "jarrin",
+ "jeerin",
+ "jeopardizin",
+ "jestin",
+ "jinglin",
+ "jobbin",
+ "jockeyin",
+ "joggin",
+ "joinin",
+ "jokin",
+ "joltin",
+ "jostlin",
+ "jottin",
+ "journalin",
+ "journeyin",
+ "joultin",
+ "joustin",
+ "judgin",
+ "jugglin",
+ "jumpin",
+ "justifyin",
+ "juttin",
+ "juxtaposin",
+ "kayakin",
+ "keatin",
+ "keelin",
+ "keenin",
+ "keepin",
+ "keesin",
+ "kellin",
+ "kesselrin",
+ "ketterin",
+ "keyin",
+ "kickin",
+ "kiddin",
+ "kidnappin",
+ "killin",
+ "kilwinnin",
+ "kindlin",
+ "kinkin",
+ "kiplin",
+ "kissin",
+ "kneadin",
+ "kneelin",
+ "knittin",
+ "knockin",
+ "knottin",
+ "knowin",
+ "konin",
+ "krigin",
+ "kunmin",
+ "labelin",
+ "labellin",
+ "laborin",
+ "labourin",
+ "lacin",
+ "lackin",
+ "lactatin",
+ "ladin",
+ "laggin",
+ "lamentin",
+ "landholdin",
+ "landin",
+ "landscapin",
+ "languishin",
+ "lansin",
+ "lappin",
+ "lapsin",
+ "lashin",
+ "lastin",
+ "laughin",
+ "launchin",
+ "launderin",
+ "lawmakin",
+ "layerin",
+ "layin",
+ "leachin",
+ "leadin",
+ "leakin",
+ "leanin",
+ "leapin",
+ "learnin",
+ "leasin",
+ "leavin",
+ "lecturin",
+ "leerin",
+ "legislatin",
+ "legitimatin",
+ "legitimizin",
+ "lendin",
+ "lengthenin",
+ "lessenin",
+ "lessin",
+ "letterin",
+ "lettin",
+ "levelin",
+ "levellin",
+ "leveragin",
+ "levyin",
+ "liberalizin",
+ "liberatin",
+ "licensin",
+ "lickin",
+ "lifesavin",
+ "liftin",
+ "lightenin",
+ "lightin",
+ "lightnin",
+ "likin",
+ "liltin",
+ "limitin",
+ "limpin",
+ "lingerin",
+ "linin",
+ "linkin",
+ "liquidatin",
+ "listenin",
+ "listin",
+ "listnin",
+ "litterin",
+ "livin",
+ "loadin",
+ "loafin",
+ "loathin",
+ "lobbyin",
+ "localizin",
+ "locatin",
+ "lockin",
+ "lodgin",
+ "loggin",
+ "loiterin",
+ "lollin",
+ "longin",
+ "longstandin",
+ "lookin",
+ "loomin",
+ "loopin",
+ "loosenin",
+ "loosin",
+ "lootin",
+ "losin",
+ "loungin",
+ "lovemakin",
+ "lovin",
+ "lowerin",
+ "lowin",
+ "lubricatin",
+ "luggin",
+ "lullin",
+ "lumberin",
+ "lunchin",
+ "lurchin",
+ "lurin",
+ "lurkin",
+ "lyin",
+ "lynchin",
+ "machinin",
+ "maddenin",
+ "magnetizin",
+ "magnifyin",
+ "mailin",
+ "maimin",
+ "mainsprin",
+ "mainstreamin",
+ "maintainin",
+ "mainwarin",
+ "majorin",
+ "makin",
+ "malfunctionin",
+ "malingerin",
+ "managin",
+ "mandatin",
+ "maneuverin",
+ "manifestin",
+ "manipulatin",
+ "mannerin",
+ "mannin",
+ "manoeuvrin",
+ "manufacturin",
+ "manurin",
+ "mappin",
+ "maraudin",
+ "marchin",
+ "marketin",
+ "markin",
+ "marryin",
+ "marshallin",
+ "marvelin",
+ "marvellin",
+ "maskin",
+ "masqueradin",
+ "massagin",
+ "massin",
+ "masterin",
+ "masturbatin",
+ "matchin",
+ "matchmakin",
+ "matin",
+ "mattin",
+ "maturin",
+ "maximisin",
+ "maximizin",
+ "meanderin",
+ "meanin",
+ "measurin",
+ "meddlin",
+ "mediatin",
+ "meditatin",
+ "meetin",
+ "meldin",
+ "meltin",
+ "memorizin",
+ "menacin",
+ "mendin",
+ "mentionin",
+ "mentorin",
+ "merchandisin",
+ "mergin",
+ "meshin",
+ "mesmerizin",
+ "messagin",
+ "messin",
+ "meterin",
+ "middlin",
+ "midmornin",
+ "migratin",
+ "milkin",
+ "millin",
+ "mimickin",
+ "mincin",
+ "mindin",
+ "minglin",
+ "minimisin",
+ "minimizin",
+ "minin",
+ "ministerin",
+ "mirrorin",
+ "misbehavin",
+ "misgivin",
+ "misleadin",
+ "misreadin",
+ "misrepresentin",
+ "missin",
+ "mistakin",
+ "misunderstandin",
+ "mitigatin",
+ "mixin",
+ "moanin",
+ "mobilisin",
+ "mobilizin",
+ "mockin",
+ "modelin",
+ "modellin",
+ "moderatin",
+ "modernizin",
+ "modifyin",
+ "modulatin",
+ "moistenin",
+ "moldin",
+ "molestin",
+ "monitorin",
+ "monopolizin",
+ "moorin",
+ "mopin",
+ "moppin",
+ "moralizin",
+ "mornin",
+ "morphin",
+ "mortifyin",
+ "motherin",
+ "motionin",
+ "motivatin",
+ "motorin",
+ "moulderin",
+ "mouldin",
+ "mountaineerin",
+ "mountin",
+ "mournin",
+ "mouthin",
+ "movin",
+ "mowin",
+ "mullin",
+ "multiplexin",
+ "multiplyin",
+ "multitaskin",
+ "mumblin",
+ "munchin",
+ "murderin",
+ "murmurin",
+ "murtherin",
+ "musin",
+ "musterin",
+ "mutterin",
+ "mystifyin",
+ "naethin",
+ "naggin",
+ "nailin",
+ "namin",
+ "namsayin",
+ "nankin",
+ "nappin",
+ "narratin",
+ "narrowin",
+ "nauseatin",
+ "nuffin",
+ "navigatin",
+ "nearin",
+ "necessitatin",
+ "necrotizin",
+ "needin",
+ "needlin",
+ "negatin",
+ "neglectin",
+ "negotiatin",
+ "neighborin",
+ "neighbourin",
+ "neighin",
+ "nestin",
+ "nestlin",
+ "nettin",
+ "networkin",
+ "neuroimagin",
+ "neutralizin",
+ "nibblin",
+ "nippin",
+ "noddin",
+ "nominatin",
+ "nonconformin",
+ "nonlivin",
+ "nonthreatenin",
+ "normalizin",
+ "nosin",
+ "nothin",
+ "noticin",
+ "notifyin",
+ "notin",
+ "nottin",
+ "notwithstandin",
+ "nourishin",
+ "nudgin",
+ "numberin",
+ "numbin",
+ "nursin",
+ "nurturin",
+ "obeyin",
+ "objectifyin",
+ "objectin",
+ "obligin",
+ "obliteratin",
+ "obscurin",
+ "observin",
+ "obsessin",
+ "obstructin",
+ "obtainin",
+ "obviatin",
+ "occasionin",
+ "occludin",
+ "occupyin",
+ "occurin",
+ "occurrin",
+ "offendin",
+ "offerin",
+ "officiatin",
+ "offin",
+ "offsettin",
+ "offshorin",
+ "offsprin",
+ "oglin",
+ "oilin",
+ "omittin",
+ "oncomin",
+ "ongoin",
+ "oozin",
+ "openin",
+ "operatin",
+ "opposin",
+ "oppressin",
+ "optimisin",
+ "optimizin",
+ "optin",
+ "orbitin",
+ "orchestratin",
+ "ordainin",
+ "orderin",
+ "ordinatin",
+ "organisin",
+ "organizin",
+ "orientin",
+ "originatin",
+ "ornamentin",
+ "oscillatin",
+ "otherin",
+ "oustin",
+ "outcroppin",
+ "outfittin",
+ "outgoin",
+ "outin",
+ "outlawin",
+ "outlinin",
+ "outlyin",
+ "outnumberin",
+ "outpourin",
+ "outsourcin",
+ "outstandin",
+ "outstrippin",
+ "overarchin",
+ "overbearin",
+ "overcomin",
+ "overcrowdin",
+ "overdoin",
+ "overeatin",
+ "overestimatin",
+ "overexpressin",
+ "overfishin",
+ "overflowin",
+ "overgrazin",
+ "overhangin",
+ "overhaulin",
+ "overhearin",
+ "overheatin",
+ "overlappin",
+ "overlayin",
+ "overloadin",
+ "overlookin",
+ "overlyin",
+ "overmasterin",
+ "overpowerin",
+ "overreachin",
+ "overreactin",
+ "overridin",
+ "overrulin",
+ "overrunnin",
+ "overseein",
+ "overshadowin",
+ "overstatin",
+ "oversteppin",
+ "overtakin",
+ "overthrowin",
+ "overtrainin",
+ "overturnin",
+ "overweenin",
+ "overwhelmin",
+ "overwritin",
+ "owin",
+ "ownin",
+ "oxidisin",
+ "oxidizin",
+ "pacifyin",
+ "pacin",
+ "packagin",
+ "packin",
+ "paddin",
+ "paddlin",
+ "pagin",
+ "painstakin",
+ "paintin",
+ "pairin",
+ "palin",
+ "palpitatin",
+ "pamperin",
+ "panderin",
+ "panelin",
+ "panellin",
+ "panickin",
+ "pannin",
+ "pantin",
+ "paradin",
+ "parallelin",
+ "paralysin",
+ "paralyzin",
+ "paraphrasin",
+ "pardonin",
+ "parentin",
+ "parin",
+ "parkin",
+ "parsin",
+ "partakin",
+ "participatin",
+ "partin",
+ "partitionin",
+ "partnerin",
+ "partyin",
+ "passin",
+ "pastin",
+ "patchin",
+ "patentin",
+ "patrollin",
+ "patronisin",
+ "patronizin",
+ "patterin",
+ "patternin",
+ "pausin",
+ "pavin",
+ "pawin",
+ "payin",
+ "peacebuildin",
+ "peacekeepin",
+ "peacemakin",
+ "peakin",
+ "pealin",
+ "peckin",
+ "peddlin",
+ "peekin",
+ "peelin",
+ "peepin",
+ "peerin",
+ "pekin",
+ "peltin",
+ "pendin",
+ "penetratin",
+ "pennin",
+ "peoplin",
+ "perceivin",
+ "perchin",
+ "percolatin",
+ "perfectin",
+ "perforatin",
+ "performin",
+ "perishin",
+ "permeatin",
+ "permittin",
+ "perpetratin",
+ "perpetuatin",
+ "perplexin",
+ "persecutin",
+ "perseverin",
+ "pershin",
+ "persistin",
+ "perspirin",
+ "persuadin",
+ "pertainin",
+ "perusin",
+ "pervadin",
+ "pervertin",
+ "pesterin",
+ "petitionin",
+ "pettin",
+ "phasin",
+ "philosophizin",
+ "phishin",
+ "phonin",
+ "photocopyin",
+ "photographin",
+ "phrasin",
+ "pickerin",
+ "picketin",
+ "pickin",
+ "picklin",
+ "picturin",
+ "piecin",
+ "piercin",
+ "pilferin",
+ "pilin",
+ "pillagin",
+ "pilotin",
+ "pinchin",
+ "pinin",
+ "pinnin",
+ "pioneerin",
+ "pipin",
+ "pissin",
+ "pitchin",
+ "pittin",
+ "pityin",
+ "pivotin",
+ "placin",
+ "plaguin",
+ "planin",
+ "plankin",
+ "plannin",
+ "plantin",
+ "plasterin",
+ "platin",
+ "playin",
+ "playthin",
+ "pleadin",
+ "pleasin",
+ "pledgin",
+ "ploddin",
+ "plottin",
+ "ploughin",
+ "plowin",
+ "pluckin",
+ "pluggin",
+ "plumbin",
+ "plummetin",
+ "plunderin",
+ "plungin",
+ "plyin",
+ "poachin",
+ "pocketin",
+ "podcastin",
+ "pointin",
+ "poisonin",
+ "pokin",
+ "polarizin",
+ "poldin",
+ "policin",
+ "policymakin",
+ "polishin",
+ "pollin",
+ "pollutin",
+ "ponderin",
+ "poolin",
+ "poppin",
+ "populatin",
+ "porin",
+ "portrayin",
+ "posin",
+ "positin",
+ "positionin",
+ "possessin",
+ "postin",
+ "postponin",
+ "postulatin",
+ "posturin",
+ "pottin",
+ "poundin",
+ "pourin",
+ "poutin",
+ "powerin",
+ "practicin",
+ "practisin",
+ "praisin",
+ "prancin",
+ "prayin",
+ "preachin",
+ "precedin",
+ "precipitatin",
+ "precludin",
+ "predictin",
+ "predisposin",
+ "predominatin",
+ "preexistin",
+ "preferrin",
+ "preparin",
+ "preponderatin",
+ "prepossessin",
+ "preprocessin",
+ "prescribin",
+ "presentin",
+ "preservin",
+ "presidin",
+ "pressin",
+ "pressurin",
+ "presumin",
+ "presupposin",
+ "pretendin",
+ "prevailin",
+ "preventin",
+ "preyin",
+ "pricin",
+ "prickin",
+ "primin",
+ "printin",
+ "prioritizin",
+ "privateerin",
+ "privilegin",
+ "probin",
+ "problemsolvin",
+ "proceedin",
+ "processin",
+ "proclaimin",
+ "procurin",
+ "proddin",
+ "producin",
+ "professin",
+ "profilin",
+ "profiteerin",
+ "profitin",
+ "programmin",
+ "progressin",
+ "prohibitin",
+ "projectin",
+ "proliferatin",
+ "prolongin",
+ "promisin",
+ "promotin",
+ "promptin",
+ "promulgatin",
+ "pronouncin",
+ "proofin",
+ "proofreadin",
+ "propagatin",
+ "propellin",
+ "prophesyin",
+ "proposin",
+ "proppin",
+ "prosecutin",
+ "proselytizin",
+ "prospectin",
+ "prosperin",
+ "prostratin",
+ "protectin",
+ "protestin",
+ "prototypin",
+ "protrudin",
+ "providin",
+ "provin",
+ "provisionin",
+ "provokin",
+ "prowlin",
+ "prunin",
+ "pryin",
+ "publicizin",
+ "publishin",
+ "puddin",
+ "pullin",
+ "pulsatin",
+ "pulsin",
+ "pumpin",
+ "punchin",
+ "punishin",
+ "punnin",
+ "purchasin",
+ "purgin",
+ "purifyin",
+ "purportin",
+ "purrin",
+ "pursuin",
+ "pushin",
+ "puttin",
+ "puzzlin",
+ "quakin",
+ "qualifyin",
+ "quantifyin",
+ "quarrelin",
+ "quarrellin",
+ "quarryin",
+ "quarterin",
+ "quaverin",
+ "quellin",
+ "quenchin",
+ "queryin",
+ "questin",
+ "questionin",
+ "queuein",
+ "queuin",
+ "quickenin",
+ "quietin",
+ "quiltin",
+ "quittin",
+ "quiverin",
+ "quizzin",
+ "quotin",
+ "racin",
+ "rackin",
+ "radiatin",
+ "raftin",
+ "ragin",
+ "raidin",
+ "railin",
+ "rainin",
+ "rakin",
+ "rallyin",
+ "ramblin",
+ "rammin",
+ "ranchin",
+ "rangin",
+ "rantin",
+ "rapin",
+ "rappin",
+ "raspin",
+ "ratifyin",
+ "ratin",
+ "rationalizin",
+ "rationin",
+ "rattlin",
+ "ravagin",
+ "ravin",
+ "ravishin",
+ "reachin",
+ "reactin",
+ "readin",
+ "reaffirmin",
+ "realisin",
+ "realizin",
+ "reapin",
+ "reappearin",
+ "rearin",
+ "rearrangin",
+ "reasonin",
+ "reassurin",
+ "rebellin",
+ "rebuildin",
+ "rebukin",
+ "recallin",
+ "recastin",
+ "recedin",
+ "receivin",
+ "reciprocatin",
+ "recitin",
+ "reckonin",
+ "reclaimin",
+ "reclinin",
+ "recognisin",
+ "recognizin",
+ "recollectin",
+ "recommendin",
+ "reconcilin",
+ "reconstructin",
+ "recordin",
+ "recountin",
+ "recoverin",
+ "recreatin",
+ "recruitin",
+ "rectifyin",
+ "recurrin",
+ "recyclin",
+ "reddenin",
+ "reddin",
+ "redeemin",
+ "redefinin",
+ "redirectin",
+ "redistrictin",
+ "reducin",
+ "reekin",
+ "reelin",
+ "reengineerin",
+ "refactorin",
+ "referencin",
+ "referrin",
+ "refinancin",
+ "refinin",
+ "reflectin",
+ "reformin",
+ "refractin",
+ "refrainin",
+ "reframin",
+ "refreshin",
+ "refuelin",
+ "refusin",
+ "refutin",
+ "regainin",
+ "regardin",
+ "regeneratin",
+ "registerin",
+ "regrettin",
+ "regulatin",
+ "rehearin",
+ "rehearsin",
+ "reignin",
+ "reinforcin",
+ "reinventin",
+ "reiteratin",
+ "rejectin",
+ "rejoicin",
+ "rejoinin",
+ "relapsin",
+ "relatin",
+ "relaxin",
+ "relayin",
+ "releasin",
+ "relievin",
+ "relinquishin",
+ "relishin",
+ "relivin",
+ "reloadin",
+ "relocatin",
+ "relyin",
+ "remainin",
+ "remakin",
+ "remarkin",
+ "remedyin",
+ "rememberin",
+ "remindin",
+ "reminiscin",
+ "remittin",
+ "remodelin",
+ "remodellin",
+ "removin",
+ "renamin",
+ "renderin",
+ "rendin",
+ "renewin",
+ "renouncin",
+ "renovatin",
+ "rentin",
+ "reopenin",
+ "reorderin",
+ "reorganizin",
+ "repairin",
+ "repayin",
+ "repealin",
+ "repeatin",
+ "repellin",
+ "repentin",
+ "replacin",
+ "replenishin",
+ "replicatin",
+ "replyin",
+ "reportin",
+ "reposin",
+ "repositionin",
+ "representin",
+ "repressin",
+ "reprintin",
+ "reproachin",
+ "reprocessin",
+ "reproducin",
+ "reprovin",
+ "repudiatin",
+ "requestin",
+ "requirin",
+ "rereadin",
+ "rescuin",
+ "researchin",
+ "resemblin",
+ "resentin",
+ "reservin",
+ "resettin",
+ "reshapin",
+ "residin",
+ "resignin",
+ "resistin",
+ "resolvin",
+ "resonatin",
+ "resortin",
+ "resoundin",
+ "respectin",
+ "respondin",
+ "restin",
+ "restorin",
+ "restrainin",
+ "restrictin",
+ "restructurin",
+ "resultin",
+ "resumin",
+ "resurfacin",
+ "retailin",
+ "retainin",
+ "retardin",
+ "retellin",
+ "rethinkin",
+ "retirin",
+ "retracin",
+ "retrainin",
+ "retreatin",
+ "retrievin",
+ "returnin",
+ "reusin",
+ "revealin",
+ "revellin",
+ "reverberatin",
+ "reversin",
+ "revertin",
+ "reviewin",
+ "revisin",
+ "revisitin",
+ "revivin",
+ "revoltin",
+ "revolvin",
+ "rewardin",
+ "reworkin",
+ "rewritin",
+ "rhymin",
+ "riddin",
+ "ridiculin",
+ "ridin",
+ "riggin",
+ "rightin",
+ "ringin",
+ "rinsin",
+ "riotin",
+ "ripenin",
+ "rippin",
+ "ripplin",
+ "risin",
+ "riskin",
+ "rivetin",
+ "roamin",
+ "roarin",
+ "roastin",
+ "rockin",
+ "rollickin",
+ "rollin",
+ "rompin",
+ "roofin",
+ "roomin",
+ "roostin",
+ "rootin",
+ "rotatin",
+ "rottin",
+ "roughin",
+ "roundin",
+ "rousin",
+ "routin",
+ "rovin",
+ "rowin",
+ "rowlin",
+ "rubbin",
+ "rufflin",
+ "ruinin",
+ "rulemakin",
+ "rulin",
+ "rumblin",
+ "ruminatin",
+ "rummagin",
+ "runnin",
+ "rushin",
+ "rustin",
+ "rustlin",
+ "rythin",
+ "sackin",
+ "sacrificin",
+ "safeguardin",
+ "safekeepin",
+ "saggin",
+ "sailin",
+ "salutin",
+ "samplin",
+ "sanctifyin",
+ "sanctionin",
+ "saplin",
+ "satisfyin",
+ "saunterin",
+ "savin",
+ "savorin",
+ "sawin",
+ "sayin",
+ "scaffoldin",
+ "scaldin",
+ "scalin",
+ "scamperin",
+ "scannin",
+ "scarin",
+ "scarrin",
+ "scathin",
+ "scatterin",
+ "scavengin",
+ "schedulin",
+ "schellin",
+ "schemin",
+ "schillin",
+ "schoolin",
+ "scoldin",
+ "scoopin",
+ "scorchin",
+ "scorin",
+ "scourin",
+ "scoutin",
+ "scowlin",
+ "scramblin",
+ "scrapin",
+ "scratchin",
+ "screamin",
+ "screechin",
+ "screenin",
+ "screwin",
+ "scribblin",
+ "scriptin",
+ "scrollin",
+ "scrubbin",
+ "scrutinizin",
+ "scurryin",
+ "seafarin",
+ "sealin",
+ "searchin",
+ "searin",
+ "seasonin",
+ "seatin",
+ "secretin",
+ "securin",
+ "seducin",
+ "seedin",
+ "seedlin",
+ "seein",
+ "seekin",
+ "seemin",
+ "seepin",
+ "seethin",
+ "seizin",
+ "selectin",
+ "sellin",
+ "sendin",
+ "sensin",
+ "sentencin",
+ "separatin",
+ "sequencin",
+ "servicin",
+ "servin",
+ "settin",
+ "settlin",
+ "sewin",
+ "shadin",
+ "shadowin",
+ "shakin",
+ "shamin",
+ "shapin",
+ "sharin",
+ "sharpenin",
+ "shatterin",
+ "shavin",
+ "shearin",
+ "sheathin",
+ "sheddin",
+ "sheetin",
+ "shellin",
+ "shelterin",
+ "shelvin",
+ "shewin",
+ "shieldin",
+ "shiftin",
+ "shillin",
+ "shimmerin",
+ "shinin",
+ "shipbuildin",
+ "shippin",
+ "shirkin",
+ "shittin",
+ "shiverin",
+ "shockin",
+ "shootin",
+ "shoppin",
+ "shortcomin",
+ "shortenin",
+ "shoulderin",
+ "shoutin",
+ "shovin",
+ "showerin",
+ "showin",
+ "shriekin",
+ "shrinkin",
+ "shruggin",
+ "shudderin",
+ "shufflin",
+ "shuntin",
+ "shuttin",
+ "siblin",
+ "sickenin",
+ "sidin",
+ "siftin",
+ "sighin",
+ "sightin",
+ "sightseein",
+ "signalin",
+ "signallin",
+ "signifyin",
+ "signin",
+ "silencin",
+ "simmerin",
+ "simplifyin",
+ "simulatin",
+ "singin",
+ "sinkin",
+ "sinnin",
+ "sippin",
+ "sitin",
+ "sittin",
+ "situatin",
+ "sizin",
+ "sizzlin",
+ "skatin",
+ "sketchin",
+ "skiin",
+ "skimmin",
+ "skinnin",
+ "skippin",
+ "skirmishin",
+ "skirtin",
+ "skulkin",
+ "slammin",
+ "slantin",
+ "slappin",
+ "slashin",
+ "slaughterin",
+ "slaveholdin",
+ "slayin",
+ "sleepin",
+ "slicin",
+ "slidin",
+ "slippin",
+ "slopin",
+ "slowin",
+ "slumberin",
+ "smackin",
+ "smartin",
+ "smashin",
+ "smatterin",
+ "smellin",
+ "smeltin",
+ "smilin",
+ "smokin",
+ "smolderin",
+ "smoothin",
+ "smotherin",
+ "smoulderin",
+ "smugglin",
+ "snappin",
+ "snarlin",
+ "snatchin",
+ "sneakin",
+ "sneerin",
+ "sneezin",
+ "sniffin",
+ "snoopin",
+ "snorin",
+ "snortin",
+ "snowin",
+ "soakin",
+ "soarin",
+ "sobbin",
+ "soberin",
+ "socializin",
+ "softenin",
+ "solderin",
+ "soldierin",
+ "solicitin",
+ "solvin",
+ "somethin",
+ "soothin",
+ "sorrowin",
+ "sortin",
+ "soundin",
+ "sourcin",
+ "sowin",
+ "spacin",
+ "spaldin",
+ "spankin",
+ "spannin",
+ "sparin",
+ "sparkin",
+ "sparklin",
+ "sparrin",
+ "spauldin",
+ "spawnin",
+ "speakin",
+ "specializin",
+ "specifyin",
+ "speculatin",
+ "speedin",
+ "spellin",
+ "spendin",
+ "spewin",
+ "spillin",
+ "spinnin",
+ "spiralin",
+ "spittin",
+ "splashin",
+ "splicin",
+ "splinterin",
+ "splittin",
+ "spoilin",
+ "sponsorin",
+ "sportin",
+ "spottin",
+ "spoutin",
+ "sprawlin",
+ "sprayin",
+ "spreadin",
+ "springin",
+ "sprinklin",
+ "sproutin",
+ "spurrin",
+ "sputterin",
+ "spyin",
+ "squabblin",
+ "squarin",
+ "squattin",
+ "squeakin",
+ "squealin",
+ "squeezin",
+ "squintin",
+ "squirmin",
+ "stabbin",
+ "stabilizin",
+ "stackin",
+ "staffin",
+ "staggerin",
+ "stagin",
+ "stainin",
+ "stakin",
+ "stalkin",
+ "stallin",
+ "stammerin",
+ "stampin",
+ "standin",
+ "starin",
+ "starrin",
+ "startin",
+ "startlin",
+ "starvin",
+ "statin",
+ "stayin",
+ "steadyin",
+ "stealin",
+ "steamin",
+ "steerin",
+ "stemmin",
+ "stentin",
+ "steppin",
+ "stereotypin",
+ "sterlin",
+ "stickin",
+ "stiffenin",
+ "stiflin",
+ "stimulatin",
+ "stingin",
+ "stinkin",
+ "stipulatin",
+ "stirlin",
+ "stirrin",
+ "stitchin",
+ "stockin",
+ "stompin",
+ "stoopin",
+ "stoppin",
+ "storin",
+ "stormin",
+ "storytellin",
+ "straddlin",
+ "stragglin",
+ "straightenin",
+ "strainin",
+ "stranglin",
+ "strappin",
+ "strayin",
+ "streamin",
+ "streamlinin",
+ "strengthenin",
+ "stressin",
+ "stretchin",
+ "stridin",
+ "strikin",
+ "stringin",
+ "strippin",
+ "strivin",
+ "strokin",
+ "strollin",
+ "structurin",
+ "strugglin",
+ "struttin",
+ "studyin",
+ "stuffin",
+ "stumblin",
+ "stunnin",
+ "stutterin",
+ "stylin",
+ "subduin",
+ "subjectin",
+ "submittin",
+ "subordinatin",
+ "subscribin",
+ "subsidin",
+ "subsistin",
+ "substitutin",
+ "subtractin",
+ "subvertin",
+ "succeedin",
+ "succumbin",
+ "suckin",
+ "sucklin",
+ "sufferin",
+ "suffocatin",
+ "suggestin",
+ "suin",
+ "summarizin",
+ "summin",
+ "summonin",
+ "superconductin",
+ "superintendin",
+ "supersedin",
+ "supervisin",
+ "supplementin",
+ "supplyin",
+ "supportin",
+ "supposin",
+ "suppressin",
+ "surfacin",
+ "surfin",
+ "surgin",
+ "surmountin",
+ "surpassin",
+ "surprisin",
+ "surrenderin",
+ "surroundin",
+ "surveyin",
+ "survivin",
+ "suspectin",
+ "suspendin",
+ "sustainin",
+ "swaggerin",
+ "swallowin",
+ "swappin",
+ "swarmin",
+ "swayin",
+ "swearin",
+ "sweatin",
+ "sweepin",
+ "swellin",
+ "swelterin",
+ "swimmin",
+ "swingin",
+ "swirlin",
+ "switchin",
+ "swoopin",
+ "symbolizin",
+ "sympathizin",
+ "synthesizin",
+ "tackin",
+ "tacklin",
+ "taggin",
+ "tailin",
+ "tailorin",
+ "takin",
+ "talkin",
+ "tamin",
+ "tamperin",
+ "tanglin",
+ "tantalisin",
+ "tantalizin",
+ "taperin",
+ "tapin",
+ "tappin",
+ "targetin",
+ "tarlin",
+ "tarryin",
+ "taskin",
+ "tastin",
+ "tattooin",
+ "tatterin",
+ "tauntin",
+ "taxin",
+ "teachin",
+ "teamin",
+ "tearin",
+ "teasin",
+ "teemin",
+ "teeterin",
+ "teethin",
+ "telecommutin",
+ "telegraphin",
+ "telemarketin",
+ "telephonin",
+ "tellin",
+ "temperin",
+ "temptin",
+ "tenderin",
+ "tendin",
+ "tensin",
+ "terminatin",
+ "terrifyin",
+ "terrorizin",
+ "testifyin",
+ "testin",
+ "tetherin",
+ "textin",
+ "thankin",
+ "thanksgivin",
+ "thawin",
+ "theorisin",
+ "theorizin",
+ "thickenin",
+ "thievin",
+ "thinkin",
+ "thinnin",
+ "thirstin",
+ "thoroughgoin",
+ "thrampin",
+ "thrashin",
+ "threadin",
+ "threatenin",
+ "threshin",
+ "thresholdin",
+ "thrillin",
+ "thrivin",
+ "throbbin",
+ "throngin",
+ "throttlin",
+ "throwin",
+ "thrustin",
+ "thuddin",
+ "thumbin",
+ "thumpin",
+ "thunderin",
+ "thwartin",
+ "tickin",
+ "ticklin",
+ "tidyin",
+ "tightenin",
+ "tilin",
+ "tillin",
+ "tiltin",
+ "timin",
+ "tinglin",
+ "tinkerin",
+ "tinklin",
+ "tintin",
+ "tippin",
+ "tirin",
+ "tithin",
+ "titillatin",
+ "toastin",
+ "toilin",
+ "toleratin",
+ "tollin",
+ "toolin",
+ "toppin",
+ "topplin",
+ "tormentin",
+ "torturin",
+ "tossin",
+ "tostrin",
+ "totalin",
+ "totalizin",
+ "totallin",
+ "totin",
+ "totterin",
+ "touchin",
+ "tourin",
+ "toutin",
+ "towerin",
+ "towin",
+ "toyin",
+ "tracin",
+ "trackin",
+ "tradin",
+ "traffickin",
+ "trailin",
+ "trainin",
+ "trampin",
+ "tramplin",
+ "transactin",
+ "transcendin",
+ "transcribin",
+ "transferrin",
+ "transformin",
+ "transgressin",
+ "transitionin",
+ "translatin",
+ "transmittin",
+ "transpirin",
+ "transplantin",
+ "transportin",
+ "transposin",
+ "trappin",
+ "travelin",
+ "travellin",
+ "traversin",
+ "trawlin",
+ "treadin",
+ "treatin",
+ "trekkin",
+ "tremblin",
+ "trenchin",
+ "trendin",
+ "trespassin",
+ "trickin",
+ "tricklin",
+ "triflin",
+ "triggerin",
+ "trillin",
+ "trimmin",
+ "trippin",
+ "triumphin",
+ "trollin",
+ "troopin",
+ "trottin",
+ "troubleshootin",
+ "troublin",
+ "truckin",
+ "trudgin",
+ "trumpetin",
+ "trustin",
+ "tryin",
+ "tubin",
+ "tuckin",
+ "tuggin",
+ "tulvin",
+ "tumblin",
+ "tunin",
+ "tunnelin",
+ "tunnellin",
+ "turnin",
+ "tutorin",
+ "tweakin",
+ "twinin",
+ "twinklin",
+ "twinnin",
+ "twirlin",
+ "twistin",
+ "twitchin",
+ "twitterin",
+ "tyin",
+ "typesettin",
+ "typewritin",
+ "typin",
+ "unappealin",
+ "unassumin",
+ "unavailin",
+ "unbecomin",
+ "unbelievin",
+ "unbendin",
+ "unblinkin",
+ "uncarin",
+ "unceasin",
+ "unchangin",
+ "uncomplainin",
+ "uncomprehendin",
+ "uncompromisin",
+ "unconvincin",
+ "uncoverin",
+ "undercuttin",
+ "underestimatin",
+ "undergoin",
+ "underlinin",
+ "underlyin",
+ "underminin",
+ "underpinnin",
+ "underscorin",
+ "understandin",
+ "undertakin",
+ "underwritin",
+ "undeservin",
+ "undeviatin",
+ "undoin",
+ "undressin",
+ "undulatin",
+ "undyin",
+ "unearthin",
+ "unendin",
+ "unerrin",
+ "unfailin",
+ "unfalterin",
+ "unfeelin",
+ "unflaggin",
+ "unflatterin",
+ "unflinchin",
+ "unfoldin",
+ "unforgivin",
+ "unheedin",
+ "unhesitatin",
+ "unifyin",
+ "uninterestin",
+ "uninvitin",
+ "unitin",
+ "universalizin",
+ "unknowin",
+ "unleashin",
+ "unloadin",
+ "unlockin",
+ "unlovin",
+ "unmaskin",
+ "unmeanin",
+ "unmovin",
+ "unnervin",
+ "unoffendin",
+ "unpackin",
+ "unpleasin",
+ "unpretendin",
+ "unpromisin",
+ "unquestionin",
+ "unravelin",
+ "unravellin",
+ "unreasonin",
+ "unrelentin",
+ "unremittin",
+ "unresistin",
+ "unrollin",
+ "unsatisfyin",
+ "unseein",
+ "unsettlin",
+ "unsmilin",
+ "unsparin",
+ "unsurprisin",
+ "unsuspectin",
+ "unswervin",
+ "unthinkin",
+ "untirin",
+ "untyin",
+ "unvaryin",
+ "unveilin",
+ "unwaverin",
+ "unwillin",
+ "unwindin",
+ "unwittin",
+ "unyieldin",
+ "upbraidin",
+ "upbringin",
+ "upbuildin",
+ "upcomin",
+ "updatin",
+ "upgradin",
+ "upholdin",
+ "upliftin",
+ "uploadin",
+ "uprisin",
+ "uprootin",
+ "upsettin",
+ "upstandin",
+ "upswin",
+ "upwellin",
+ "urgin",
+ "urinatin",
+ "usherin",
+ "usin",
+ "usurpin",
+ "utilisin",
+ "utilizin",
+ "utterin",
+ "vacatin",
+ "vacationin",
+ "vacillatin",
+ "vacuumin",
+ "validatin",
+ "valuin",
+ "vanishin",
+ "vanquishin",
+ "varyin",
+ "vaultin",
+ "vauntin",
+ "veerin",
+ "veilin",
+ "vendin",
+ "ventilatin",
+ "ventin",
+ "venturin",
+ "vergin",
+ "verifyin",
+ "versionin",
+ "vestin",
+ "vettin",
+ "vexin",
+ "vibratin",
+ "victuallin",
+ "viewin",
+ "vikin",
+ "vindicatin",
+ "vinin",
+ "violatin",
+ "visionin",
+ "visitin",
+ "visualizin",
+ "vitalizin",
+ "vivifyin",
+ "voicin",
+ "voidin",
+ "volunteerin",
+ "vomitin",
+ "votin",
+ "vowin",
+ "voyagin",
+ "vyin",
+ "wadin",
+ "waftin",
+ "waggin",
+ "wagin",
+ "wailin",
+ "waitin",
+ "waivin",
+ "wakin",
+ "walkin",
+ "wallowin",
+ "wanderin",
+ "wanin",
+ "wantin",
+ "warblin",
+ "wardin",
+ "warehousin",
+ "warin",
+ "warmin",
+ "warnin",
+ "warpin",
+ "warrin",
+ "washin",
+ "wastin",
+ "watchin",
+ "waterin",
+ "watermarkin",
+ "waverin",
+ "wavin",
+ "waxin",
+ "weakenin",
+ "weaklin",
+ "weanin",
+ "wearin",
+ "wearyin",
+ "weatherin",
+ "weavin",
+ "webbin",
+ "weddin",
+ "weedin",
+ "weepin",
+ "weighin",
+ "weightin",
+ "welcomin",
+ "weldin",
+ "wellbein",
+ "wellin",
+ "wettin",
+ "whackin",
+ "whalin",
+ "wheelin",
+ "wheezin",
+ "whimperin",
+ "whinin",
+ "whippin",
+ "whirlin",
+ "whirrin",
+ "whisperin",
+ "whistlin",
+ "whitenin",
+ "whitin",
+ "whizzin",
+ "whoopin",
+ "whoppin",
+ "widenin",
+ "wieldin",
+ "wildin",
+ "willin",
+ "wincin",
+ "windin",
+ "winkin",
+ "winnin",
+ "winterin",
+ "wipin",
+ "wirin",
+ "wishin",
+ "withdrawin",
+ "witherin",
+ "withholdin",
+ "withstandin",
+ "witnessin",
+ "wonderin",
+ "wooin",
+ "wordin",
+ "workin",
+ "worryin",
+ "worsenin",
+ "worshippin",
+ "woundin",
+ "wranglin",
+ "wrappin",
+ "wreckin",
+ "wrenchin",
+ "wrestlin",
+ "wrigglin",
+ "wringin",
+ "wrinklin",
+ "writhin",
+ "writin",
+ "wrongdoin",
+ "yachtin",
+ "yankin",
+ "yappin",
+ "yawnin",
+ "yearlin",
+ "yearnin",
+ "yellin",
+ "yellowin",
+ "yelpin",
+ "yieldin",
+ "zaggin",
+ "zeroin",
+ "zigzaggin",
+ "zippin",
+ "zonin",
+ "zoomin"
);
}
src/main/java/com/whitemagicsoftware/keenquotes/Lexeme.java
* of the given {@link LexemeType}s.
*/
- public boolean anyType( final LexemeType... types ) {
+ public boolean isType( final LexemeType... types ) {
assert types != null;
LexemeType getType() {
return mType;
+ }
+
+ public boolean before( final Lexeme that ) {
+ return compareTo( that ) < 0;
+ }
+
+ public boolean after( final Lexeme that ) {
+ return compareTo( that ) > 0;
}
src/main/java/com/whitemagicsoftware/keenquotes/Parser.java
import static com.whitemagicsoftware.keenquotes.LexemeType.*;
import static com.whitemagicsoftware.keenquotes.TokenType.*;
-
-/**
- * Converts straight double/single quotes and apostrophes to curly equivalents.
- */
-public final class Parser {
- /**
- * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
- new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP};
-
- /**
- * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
- new LexemeType[]{WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
-
- /**
- * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
- new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE};
-
- /**
- * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
- new LexemeType[]{SPACE, HYPHEN, DASH,
- QUOTE_DOUBLE, CLOSING_GROUP, EOL, EOP};
-
- /**
- * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
- new LexemeType[]{SPACE, DASH, EQUALS, QUOTE_SINGLE, OPENING_GROUP, EOL,
- EOP};
-
- /**
- * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
- new LexemeType[]{WORD, NUMBER, ELLIPSIS, OPENING_GROUP,
- QUOTE_SINGLE, QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE};
-
- /**
- * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
- new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP,
- QUOTE_SINGLE, QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING};
-
- /**
- * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
- new LexemeType[]{SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH,
- QUOTE_SINGLE, CLOSING_GROUP, EOL, EOP};
-
- /**
- * The text to parse. A reference is required as a minor optimization in
- * memory and speed: the lexer records integer offsets, rather than new
- * {@link String} instances, to track parsed lexemes.
- */
- private final String mText;
-
- /**
- * Converts a string into an iterable list of {@link Lexeme} instances.
- */
- private final Lexer mLexer;
-
- /**
- * Sets of contractions that help disambiguate single quotes in the text.
- * These are effectively immutable while parsing.
- */
- private final Contractions sContractions;
-
- /**
- * Incremented for each opening single quote emitted. Used to help resolve
- * ambiguities when single quote marks are balanced.
- */
- private int mOpeningSingleQuote;
-
- /**
- * Incremented for each closing single quote emitted. Used to help resolve
- * ambiguities when single quote marks are balanced.
- */
- private int mClosingSingleQuote;
-
- /**
- * Constructs a new {@link Parser} using the default contraction sets
- * to help resolve some ambiguous scenarios.
- *
- * @param text The prose to parse, containing zero or more quotation
- * characters.
- * @param contractions Custom sets of contractions to help resolve
- * ambiguities.
- */
- public Parser( final String text, final Contractions contractions ) {
- mText = text;
- mLexer = new Lexer( mText );
- sContractions = contractions;
- }
-
- /**
- * Iterates over the entire text provided at construction, emitting
- * {@link Token}s that can be used to convert straight quotes to curly
- * quotes.
- *
- * @param tokenConsumer Receives emitted {@link Token}s.
- */
- public void parse(
- final Consumer<Token> tokenConsumer,
- final Consumer<Lexeme> lexemeConsumer ) {
- final var lexemes = new CircularFifoQueue<Lexeme>( 3 );
-
- // Allow consuming the very first token without checking the queue size.
- flush( lexemes );
-
- final var unresolved = new ArrayList<Lexeme[]>();
- Lexeme lexeme;
-
- // Create and convert a list of all unambiguous quote characters.
- while( (lexeme = mLexer.next()) != EOT ) {
- if( tokenize( lexeme, lexemes, tokenConsumer, unresolved ) ) {
- // Attempt to resolve any remaining unambiguous quotes.
- resolve( unresolved, tokenConsumer );
-
- // Notify of any unambiguous quotes that could not be resolved.
- unresolved.forEach( ( lex ) -> lexemeConsumer.accept( lex[ 1 ] ) );
- unresolved.clear();
- mOpeningSingleQuote = 0;
- mClosingSingleQuote = 0;
- }
- }
-
- // By loop's end, the lexemes list contains tokens for all except the
- // final two elements (from tokenizing in triplets). Tokenize the remaining
- // unprocessed lexemes.
- tokenize( EOT, lexemes, tokenConsumer, unresolved );
- tokenize( EOT, lexemes, tokenConsumer, unresolved );
-
- // Attempt to resolve any remaining unambiguous quotes.
- resolve( unresolved, tokenConsumer );
-
- // Notify of any unambiguous quotes that could not be resolved.
- unresolved.forEach( ( lex ) -> lexemeConsumer.accept( lex[ 1 ] ) );
- }
-
- /**
- * Converts {@link Lexeme}s identified as straight quotes into {@link Token}s
- * that represent the curly equivalent. The {@link Token}s are passed to
- * the given {@link Consumer} for further processing (e.g., replaced in
- * the original text being parsed).
- *
- * @param lexeme A part of the text being parsed.
- * @param lexemes A 3-element queue of lexemes that provide sufficient
- * context to identify curly quotes.
- * @param consumer Recipient of equivalent quotes.
- * @param unresolved Rolling list of potentially ambiguous {@link Lexeme}s
- * that could not be tokenized, yet.
- * @return {@code true} if an end-of-paragraph is detected.
- */
- private boolean tokenize( final Lexeme lexeme,
- final CircularFifoQueue<Lexeme> lexemes,
- final Consumer<Token> consumer,
- final List<Lexeme[]> unresolved ) {
- // Add the next lexeme to tokenize into the queue for immediate processing.
- lexemes.add( lexeme );
-
- final var lex1 = lexemes.get( 0 );
- final var lex2 = lexemes.get( 1 );
- final var lex3 = lexemes.get( 2 );
-
- if( lex2.isType( QUOTE_SINGLE ) && lex3.isType( WORD ) &&
- lex1.anyType( WORD, PERIOD, NUMBER ) ) {
- // Examples: y'all, Ph.D.'ll, 20's, she's
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- }
- else if( lex1.isType( QUOTE_SINGLE ) && lex3.isType( QUOTE_SINGLE ) &&
- "n".equalsIgnoreCase( lex2.toString( mText ) ) ) {
- // I.e., 'n'
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
- flush( lexemes );
- truncate( unresolved );
- }
- else if( lex2.isType( QUOTE_SINGLE ) && lex1.isType( NUMBER ) ) {
- if( lex3.isType( QUOTE_SINGLE ) ) {
- // E.g., 2''
- consumer.accept(
- new Token( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() ) );
- flush( lexemes );
- }
- else {
- // E.g., 2'
- consumer.accept( new Token( QUOTE_PRIME_SINGLE, lex2 ) );
- }
- }
- else if( lex2.isType( QUOTE_DOUBLE ) && lex1.isType( NUMBER ) ) {
- // E.g., 2"
- consumer.accept( new Token( QUOTE_PRIME_DOUBLE, lex2 ) );
- }
- else if( lex2.isType( WORD ) && lex3.isType( QUOTE_SINGLE ) &&
- sContractions.endedUnambiguously( lex2.toString( mText ) ) ) {
- // E.g., thinkin'
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
- flush( lexemes );
- }
- else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) ) {
- if( lex3.anyType( SPACE, PUNCT ) || (lex3.isType( WORD ) &&
- lex3.toString( mText ).equalsIgnoreCase( "s" )) ) {
- // Sentences must re-written to avoid starting with numerals.
- // Examples: '20s, '02
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
- }
- else {
- // E.g., '2''
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex1 ) );
- mOpeningSingleQuote++;
- }
-
- truncate( unresolved );
- }
- else if( lex2.isType( QUOTE_SINGLE ) &&
- lex1.anyType( PUNCT, PERIOD, ELLIPSIS, DASH ) &&
- (lex3.anyType( EOL, EOP ) || lex3.isEot()) ) {
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- mClosingSingleQuote++;
- }
- else if( lex1.isType( ESC_SINGLE ) ) {
- // E.g., \'
- consumer.accept( new Token( QUOTE_STRAIGHT_SINGLE, lex1 ) );
- }
- else if( lex1.isType( ESC_DOUBLE ) ) {
- // E.g., \"
- consumer.accept( new Token( QUOTE_STRAIGHT_DOUBLE, lex1 ) );
-
- if( lex2.isType( QUOTE_SINGLE ) &&
- (lex3.isEot() || lex3.anyType( SPACE, DASH, EOL, EOP )) ) {
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- mClosingSingleQuote++;
- }
- }
- else if( lex2.isType( QUOTE_DOUBLE ) &&
- (lex1.isSot() || lex1.anyType( LEADING_QUOTE_OPENING_DOUBLE )) &&
- lex3.anyType( LAGGING_QUOTE_OPENING_DOUBLE ) ) {
- // Examples: "", "..., "word, ---"word
- consumer.accept( new Token( QUOTE_OPENING_DOUBLE, lex2 ) );
- }
- else if( lex2.isType( QUOTE_DOUBLE ) &&
- lex1.anyType( LEADING_QUOTE_CLOSING_DOUBLE ) &&
- (lex3.isEot() || lex3.anyType( LAGGING_QUOTE_CLOSING_DOUBLE )) ) {
- // Examples: ..."', word"', ?"', word"?
- consumer.accept( new Token( QUOTE_CLOSING_DOUBLE, lex2 ) );
- }
- else if( lex1.isType( QUOTE_SINGLE ) &&
- lex2.anyType( PUNCT, PERIOD, DASH ) && lex3.isType( QUOTE_DOUBLE ) ) {
- // E.g., '," (contraction ruled out from previous conditionals)
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex1 ) );
- truncate( unresolved );
- mClosingSingleQuote++;
- }
- else if( lex2.anyType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) {
- // After tokenizing, the parser will attempt to resolve ambiguities.
- unresolved.add( new Lexeme[]{lex1, lex2, lex3} );
- }
-
- // Suggest to the caller that resolution should be performed. This allows
- // the algorithm to reset the opening/closing quote balance before the
- // next paragraph is parsed.
- return lex3.isType( EOP );
- }
-
- private void resolve(
- final List<Lexeme[]> unresolved, final Consumer<Token> consumer ) {
- // Some non-emitted tokenized lexemes may be ambiguous.
- final var ambiguousLeadingQuotes = new ArrayList<Lexeme[]>( 16 );
- final var ambiguousLaggingQuotes = new ArrayList<Lexeme[]>( 16 );
- var resolvedLeadingQuotes = 0;
- var resolvedLaggingQuotes = 0;
-
- // Count the number of ambiguous and non-ambiguous open single quotes.
- for( var i = unresolved.iterator(); i.hasNext(); ) {
- final var quotes = i.next();
- final var lex1 = quotes[ 0 ];
- final var lex2 = quotes[ 1 ];
- final var lex3 = quotes[ 2 ];
-
- if( lex2.isType( QUOTE_SINGLE ) ) {
- final var word1 = lex1 == SOT ? "" : lex1.toString( mText );
- final var word3 = lex3 == EOT ? "" : lex3.toString( mText );
-
- if( sContractions.beganAmbiguously( word3 ) ) {
- // E.g., 'Cause
- if( lex1.isType( QUOTE_SINGLE ) ) {
- // E.g., ''Cause
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- i.remove();
- }
- else {
- // The contraction is uncertain until a closing quote is found that
- // may balance this single quote.
- ambiguousLeadingQuotes.add( quotes );
- }
- }
- else if( sContractions.beganUnambiguously( word3 ) ) {
- // The quote mark forms a word that does not stand alone from its
- // contraction. For example, twas is not a word: it's 'twas.
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- i.remove();
- }
- else if( sContractions.endedAmbiguously( word1 ) ) {
- ambiguousLaggingQuotes.add( quotes );
- }
- else if( (lex1.isSot() || lex1.anyType( LEADING_QUOTE_OPENING_SINGLE ))
- && lex3.anyType( LAGGING_QUOTE_OPENING_SINGLE ) ) {
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) );
- resolvedLeadingQuotes++;
- mOpeningSingleQuote++;
- i.remove();
- }
- else if( lex1.anyType( LEADING_QUOTE_CLOSING_SINGLE ) &&
- (lex3.isEot() || lex3.anyType( LAGGING_QUOTE_CLOSING_SINGLE )) ) {
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- resolvedLaggingQuotes++;
- mClosingSingleQuote++;
- i.remove();
- }
- else if( lex3.isType( NUMBER ) ) {
- // E.g., '04
- ambiguousLeadingQuotes.add( quotes );
- }
- }
- }
-
- final var ambiguousLeadingCount = ambiguousLeadingQuotes.size();
- final var ambiguousLaggingCount = ambiguousLaggingQuotes.size();
-
- if( resolvedLeadingQuotes == 1 && resolvedLaggingQuotes == 0 ) {
- if( ambiguousLeadingCount == 0 && ambiguousLaggingCount == 1 ) {
- final var balanced = mClosingSingleQuote - mOpeningSingleQuote == 0;
- final var quote = balanced ? QUOTE_APOSTROPHE : QUOTE_CLOSING_SINGLE;
- final var lex = ambiguousLaggingQuotes.get( 0 );
- consumer.accept( new Token( quote, lex[ 1 ] ) );
- unresolved.remove( lex );
- }
- else if( ambiguousLeadingCount == 0 && unresolved.size() == 1 ) {
- // Must be a closing quote.
- final var closing = unresolved.get( 0 );
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
- unresolved.remove( closing );
- }
- }
- else if( ambiguousLeadingCount == 0 && ambiguousLaggingCount > 0 ) {
- // If there are no ambiguous leading quotes then all ambiguous lagging
- // quotes must be contractions.
- ambiguousLaggingQuotes.forEach(
- lex -> {
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex[ 1 ] ) );
- unresolved.remove( lex );
- }
- );
- }
- else if( ambiguousLeadingCount == 0 ) {
- if( resolvedLaggingQuotes < resolvedLeadingQuotes ) {
- for( final var i = unresolved.iterator(); i.hasNext(); ) {
- final var closing = i.next()[ 1 ];
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing ) );
- i.remove();
- }
- }
- else if( mOpeningSingleQuote - mClosingSingleQuote == unresolved.size() ) {
- for( final var i = unresolved.iterator(); i.hasNext(); ) {
- final var closing = i.next();
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
- i.remove();
- }
- }
- else if( unresolved.size() == 2 ) {
- final var closing = unresolved.get( 0 );
- final var opening = unresolved.get( 1 );
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
-
- // Doesn't affect the algorithm.
- unresolved.clear();
+import static java.lang.Math.abs;
+import static java.util.Collections.sort;
+
+/**
+ * Converts straight double/single quotes and apostrophes to curly equivalents.
+ */
+public final class Parser {
+ /**
+ * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
+ new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP};
+
+ /**
+ * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
+ new LexemeType[]{WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
+
+ /**
+ * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
+ new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE};
+
+ /**
+ * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
+ new LexemeType[]{SPACE, HYPHEN, DASH,
+ QUOTE_DOUBLE, CLOSING_GROUP, EOL, EOP};
+
+ /**
+ * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
+ new LexemeType[]{SPACE, DASH, EQUALS, QUOTE_SINGLE, OPENING_GROUP, EOL,
+ EOP};
+
+ /**
+ * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
+ new LexemeType[]{WORD, NUMBER, ELLIPSIS, OPENING_GROUP,
+ QUOTE_SINGLE, QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE};
+
+ /**
+ * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
+ new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP,
+ QUOTE_SINGLE, QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING};
+
+ /**
+ * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
+ new LexemeType[]{SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH,
+ QUOTE_SINGLE, CLOSING_GROUP, EOL, EOP};
+
+ /**
+ * The text to parse. A reference is required as a minor optimization in
+ * memory and speed: the lexer records integer offsets, rather than new
+ * {@link String} instances, to track parsed lexemes.
+ */
+ private final String mText;
+
+ /**
+ * Converts a string into an iterable list of {@link Lexeme} instances.
+ */
+ private final Lexer mLexer;
+
+ /**
+ * Sets of contractions that help disambiguate single quotes in the text.
+ * These are effectively immutable while parsing.
+ */
+ private final Contractions sContractions;
+
+ /**
+ * Contains each emitted opening single quote per paragraph.
+ */
+ private final List<Lexeme> mOpeningSingleQuotes = new ArrayList<>();
+
+ /**
+ * Contains each emitted closing single quote per paragraph.
+ */
+ private final List<Lexeme> mClosingSingleQuotes = new ArrayList<>();
+
+ /**
+ * Contains each emitted opening double quote per paragraph.
+ */
+ private final List<Lexeme> mOpeningDoubleQuotes = new ArrayList<>();
+
+ /**
+ * Contains each emitted closing double quote per paragraph.
+ */
+ private final List<Lexeme> mClosingDoubleQuotes = new ArrayList<>();
+
+ /**
+ * Constructs a new {@link Parser} using the default contraction sets
+ * to help resolve some ambiguous scenarios.
+ *
+ * @param text The prose to parse, containing zero or more quotation
+ * characters.
+ * @param contractions Custom sets of contractions to help resolve
+ * ambiguities.
+ */
+ public Parser( final String text, final Contractions contractions ) {
+ mText = text;
+ mLexer = new Lexer( mText );
+ sContractions = contractions;
+ }
+
+ /**
+ * Iterates over the entire text provided at construction, emitting
+ * {@link Token}s that can be used to convert straight quotes to curly
+ * quotes.
+ *
+ * @param tokenConsumer Receives emitted {@link Token}s.
+ */
+ public void parse(
+ final Consumer<Token> tokenConsumer,
+ final Consumer<Lexeme> lexemeConsumer ) {
+ final var lexemes = new CircularFifoQueue<Lexeme>( 3 );
+
+ // Allow consuming the very first token without needing a queue size check.
+ flush( lexemes );
+
+ final var unresolved = new ArrayList<Lexeme[]>();
+ Lexeme lexeme;
+
+ // Create and convert a list of all unambiguous quote characters.
+ while( (lexeme = mLexer.next()) != EOT ) {
+ // Reset after tokenizing a paragraph.
+ if( tokenize( lexeme, lexemes, tokenConsumer, unresolved ) ) {
+ // Attempt to resolve any remaining unambiguous quotes.
+ resolve( unresolved, tokenConsumer );
+
+ // Notify of any unambiguous quotes that could not be resolved.
+ unresolved.forEach( ( lex ) -> lexemeConsumer.accept( lex[ 1 ] ) );
+ unresolved.clear();
+ mOpeningSingleQuotes.clear();
+ mClosingSingleQuotes.clear();
+ mOpeningDoubleQuotes.clear();
+ mClosingDoubleQuotes.clear();
+ }
+ }
+
+ // By loop's end, the lexemes list contains tokens for all except the
+ // final two elements (from tokenizing in triplets). Tokenize the remaining
+ // unprocessed lexemes.
+ tokenize( EOT, lexemes, tokenConsumer, unresolved );
+ tokenize( EOT, lexemes, tokenConsumer, unresolved );
+
+ // Attempt to resolve any remaining unambiguous quotes.
+ resolve( unresolved, tokenConsumer );
+
+ // Notify of any unambiguous quotes that could not be resolved.
+ unresolved.forEach( ( lex ) -> lexemeConsumer.accept( lex[ 1 ] ) );
+ }
+
+ /**
+ * Converts {@link Lexeme}s identified as straight quotes into {@link Token}s
+ * that represent the curly equivalent. The {@link Token}s are passed to
+ * the given {@link Consumer} for further processing (e.g., replaced in
+ * the original text being parsed).
+ *
+ * @param lexeme A part of the text being parsed.
+ * @param lexemes A 3-element queue of lexemes that provide sufficient
+ * context to identify curly quotes.
+ * @param consumer Recipient of equivalent quotes.
+ * @param unresolved Rolling list of potentially ambiguous {@link Lexeme}s
+ * that could not be tokenized, yet.
+ * @return {@code true} if an end-of-paragraph is detected.
+ */
+ private boolean tokenize( final Lexeme lexeme,
+ final CircularFifoQueue<Lexeme> lexemes,
+ final Consumer<Token> consumer,
+ final List<Lexeme[]> unresolved ) {
+ // Add the next lexeme to tokenize into the queue for immediate processing.
+ lexemes.add( lexeme );
+
+ final var lex1 = lexemes.get( 0 );
+ final var lex2 = lexemes.get( 1 );
+ final var lex3 = lexemes.get( 2 );
+
+ if( lex2.isType( QUOTE_SINGLE ) && lex3.isType( WORD ) &&
+ lex1.isType( WORD, PERIOD, NUMBER ) ) {
+ // Examples: y'all, Ph.D.'ll, 20's, she's
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
+ }
+ else if( lex1.isType( QUOTE_SINGLE ) && lex3.isType( QUOTE_SINGLE ) &&
+ "n".equalsIgnoreCase( lex2.toString( mText ) ) ) {
+ // I.e., 'n'
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
+ flush( lexemes );
+ truncate( unresolved );
+ }
+ else if( lex2.isType( QUOTE_SINGLE ) && lex1.isType( NUMBER ) ) {
+ if( lex3.isType( QUOTE_SINGLE ) ) {
+ // E.g., 2''
+ consumer.accept(
+ new Token( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() ) );
+ flush( lexemes );
+ }
+ else {
+ // E.g., 2'
+ consumer.accept( new Token( QUOTE_PRIME_SINGLE, lex2 ) );
+ }
+ }
+ else if( lex2.isType( QUOTE_DOUBLE ) && lex1.isType( NUMBER ) ) {
+ // E.g., 2"
+ consumer.accept( new Token( QUOTE_PRIME_DOUBLE, lex2 ) );
+ }
+ else if( lex2.isType( WORD ) && lex3.isType( QUOTE_SINGLE ) &&
+ sContractions.endedUnambiguously( lex2.toString( mText ) ) ) {
+ // E.g., thinkin'
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
+ flush( lexemes );
+ }
+ else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) ) {
+ // Sentences must re-written to avoid starting with numerals.
+ if( lex3.isType( SPACE, PUNCT ) || (lex3.isType( WORD ) &&
+ lex3.toString( mText ).equalsIgnoreCase( "s" )) ) {
+ // Examples: '20s, '02
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
+ }
+ else {
+ // E.g., '2''
+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex1 ) );
+ mOpeningSingleQuotes.add( lex1 );
+ }
+
+ truncate( unresolved );
+ }
+ else if( lex2.isType( QUOTE_SINGLE ) &&
+ lex1.isType( PUNCT, PERIOD, ELLIPSIS, DASH ) &&
+ (lex3.isType( EOL, EOP ) || lex3.isEot()) ) {
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
+ mClosingSingleQuotes.add( lex2 );
+ }
+ else if( lex1.isType( ESC_SINGLE ) ) {
+ // E.g., \'
+ consumer.accept( new Token( QUOTE_STRAIGHT_SINGLE, lex1 ) );
+ }
+ else if( lex1.isType( ESC_DOUBLE ) ) {
+ // E.g., \"
+ consumer.accept( new Token( QUOTE_STRAIGHT_DOUBLE, lex1 ) );
+
+ if( lex2.isType( QUOTE_SINGLE ) &&
+ (lex3.isEot() || lex3.isType( SPACE, DASH, EOL, EOP )) ) {
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
+ mClosingSingleQuotes.add( lex2 );
+ }
+ }
+ else if( lex2.isType( QUOTE_DOUBLE ) &&
+ (lex1.isSot() || lex1.isType( LEADING_QUOTE_OPENING_DOUBLE )) &&
+ lex3.isType( LAGGING_QUOTE_OPENING_DOUBLE ) ) {
+ // Examples: "", "..., "word, ---"word
+ consumer.accept( new Token( QUOTE_OPENING_DOUBLE, lex2 ) );
+ mOpeningDoubleQuotes.add( lex2 );
+ }
+ else if( lex2.isType( QUOTE_DOUBLE ) &&
+ lex1.isType( LEADING_QUOTE_CLOSING_DOUBLE ) &&
+ (lex3.isEot() || lex3.isType( LAGGING_QUOTE_CLOSING_DOUBLE )) ) {
+ // Examples: ..."', word"', ?"', word"?
+ consumer.accept( new Token( QUOTE_CLOSING_DOUBLE, lex2 ) );
+ mClosingDoubleQuotes.add( lex2 );
+ }
+ else if( lex1.isType( WORD ) && lex2.isType( QUOTE_SINGLE ) &&
+ lex3.isType( PUNCT, PERIOD ) ) {
+ // E.g., word', (contraction ruled out by previous conditions)
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
+ mClosingSingleQuotes.add( lex2 );
+ }
+ else if( lex2.isType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) {
+ // After tokenizing, the parser will attempt to resolve ambiguities.
+ unresolved.add( new Lexeme[]{lex1, lex2, lex3} );
+ }
+
+ // Suggest to the caller that resolution should be performed. This allows
+ // the algorithm to reset the opening/closing quote balance before the
+ // next paragraph is parsed.
+ return lex3.isType( EOP );
+ }
+
+ private void resolve(
+ final List<Lexeme[]> unresolved, final Consumer<Token> consumer ) {
+ // Some non-emitted tokenized lexemes may be ambiguous.
+ final var ambiguousLeadingQuotes = new ArrayList<Lexeme[]>( 16 );
+ final var ambiguousLaggingQuotes = new ArrayList<Lexeme[]>( 16 );
+ var resolvedLeadingQuotes = 0;
+ var resolvedLaggingQuotes = 0;
+
+ // Count the number of ambiguous and non-ambiguous open single quotes.
+ for( var i = unresolved.iterator(); i.hasNext(); ) {
+ final var quotes = i.next();
+ final var lex1 = quotes[ 0 ];
+ final var lex2 = quotes[ 1 ];
+ final var lex3 = quotes[ 2 ];
+
+ if( lex2.isType( QUOTE_SINGLE ) ) {
+ final var word1 = lex1 == SOT ? "" : lex1.toString( mText );
+ final var word3 = lex3 == EOT ? "" : lex3.toString( mText );
+
+ if( sContractions.beganAmbiguously( word3 ) ) {
+ // E.g., 'Cause
+ if( lex1.isType( QUOTE_SINGLE ) ) {
+ // E.g., ''Cause
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
+ i.remove();
+ }
+ else {
+ // The contraction is uncertain until a closing quote is found that
+ // may balance this single quote.
+ ambiguousLeadingQuotes.add( quotes );
+ }
+ }
+ else if( sContractions.beganUnambiguously( word3 ) ) {
+ // The quote mark forms a word that does not stand alone from its
+ // contraction. For example, twas is not a word: it's 'twas.
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
+ i.remove();
+ }
+ else if( sContractions.endedAmbiguously( word1 ) ) {
+ ambiguousLaggingQuotes.add( quotes );
+ }
+ else if( (lex1.isSot() || lex1.isType( LEADING_QUOTE_OPENING_SINGLE )) &&
+ lex3.isType( LAGGING_QUOTE_OPENING_SINGLE ) ) {
+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) );
+ resolvedLeadingQuotes++;
+ mOpeningSingleQuotes.add( lex2 );
+ i.remove();
+ }
+ else if( lex1.isType( LEADING_QUOTE_CLOSING_SINGLE ) &&
+ (lex3.isEot() || lex3.isType( LAGGING_QUOTE_CLOSING_SINGLE )) ) {
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
+ resolvedLaggingQuotes++;
+ mClosingSingleQuotes.add( lex2 );
+ i.remove();
+ }
+ else if( lex3.isType( NUMBER ) ) {
+ // E.g., '04
+ ambiguousLeadingQuotes.add( quotes );
+ }
+ }
+ }
+
+ sort( mOpeningSingleQuotes );
+ sort( mClosingSingleQuotes );
+ sort( mOpeningDoubleQuotes );
+ sort( mClosingDoubleQuotes );
+
+ final var singleQuoteEmpty =
+ mOpeningSingleQuotes.isEmpty() || mClosingSingleQuotes.isEmpty();
+ final var doubleQuoteEmpty =
+ mOpeningDoubleQuotes.isEmpty() || mClosingDoubleQuotes.isEmpty();
+
+ final var singleQuoteDelta = abs(
+ mClosingSingleQuotes.size() - mOpeningSingleQuotes.size()
+ );
+
+ final var doubleQuoteDelta = abs(
+ mClosingDoubleQuotes.size() - mOpeningDoubleQuotes.size()
+ );
+
+ final var ambiguousLeadingCount = ambiguousLeadingQuotes.size();
+ final var ambiguousLaggingCount = ambiguousLaggingQuotes.size();
+
+ if( resolvedLeadingQuotes == 1 && resolvedLaggingQuotes == 0 ) {
+ if( ambiguousLeadingCount == 0 && ambiguousLaggingCount == 1 ) {
+ final var balanced = singleQuoteDelta == 0;
+ final var quote = balanced ? QUOTE_APOSTROPHE : QUOTE_CLOSING_SINGLE;
+ final var lex = ambiguousLaggingQuotes.get( 0 );
+ consumer.accept( new Token( quote, lex[ 1 ] ) );
+ unresolved.remove( lex );
+ }
+ else if( ambiguousLeadingCount == 0 && unresolved.size() == 1 ) {
+ // Must be a closing quote.
+ final var closing = unresolved.get( 0 );
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
+ unresolved.remove( closing );
+ }
+ }
+ else if( ambiguousLeadingCount == 0 && ambiguousLaggingCount > 0 ) {
+ // If there are no ambiguous leading quotes then all ambiguous lagging
+ // quotes must be contractions.
+ ambiguousLaggingQuotes.forEach(
+ lex -> {
+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex[ 1 ] ) );
+ unresolved.remove( lex );
+ }
+ );
+ }
+ else if( mOpeningSingleQuotes.size() == 0 && mClosingSingleQuotes.size() == 1 ) {
+ final var opening = unresolved.get( 0 );
+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
+ unresolved.remove( opening );
+ }
+ else if( ambiguousLeadingCount == 0 ) {
+ if( resolvedLaggingQuotes < resolvedLeadingQuotes ) {
+ for( final var i = unresolved.iterator(); i.hasNext(); ) {
+ final var closing = i.next()[ 1 ];
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing ) );
+ i.remove();
+ }
+ }
+ else if( singleQuoteDelta == unresolved.size() ) {
+ for( final var i = unresolved.iterator(); i.hasNext(); ) {
+ final var closing = i.next();
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
+ i.remove();
+ }
+ }
+ else if( unresolved.size() == 2 ) {
+ final var closing = unresolved.get( 0 );
+ final var opening = unresolved.get( 1 );
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
+
+ // Doesn't affect the algorithm.
+ unresolved.clear();
+ }
+ }
+ else if( (singleQuoteDelta == 0 && !singleQuoteEmpty) ||
+ (doubleQuoteDelta == 0 && !doubleQuoteEmpty) ) {
+ // An apostrophe stands betwixt opening/closing single quotes.
+ for( final var lexemes = unresolved.iterator(); lexemes.hasNext(); ) {
+ final var quote = lexemes.next()[ 1 ];
+
+ for( int i = 0; i < mOpeningSingleQuotes.size(); i++ ) {
+ // An apostrophe must fall between an open/close pair.
+ final var openingQuote = mOpeningSingleQuotes.get( i );
+ final var closingQuote = mClosingSingleQuotes.get( i );
+
+ if( openingQuote.before( quote ) && closingQuote.after( quote ) ) {
+ consumer.accept( new Token( QUOTE_APOSTROPHE, quote ) );
+ lexemes.remove();
+ }
+ }
+ }
+
+ // An apostrophe stands betwixt opening/closing double quotes.
+ for( final var lexemes = unresolved.iterator(); lexemes.hasNext(); ) {
+ final var quote = lexemes.next()[ 1 ];
+
+ for( int i = 0; i < mOpeningDoubleQuotes.size(); i++ ) {
+ // An apostrophe must fall between an open/close pair.
+ final var openingQuote = mOpeningDoubleQuotes.get( i );
+ final var closingQuote = mClosingDoubleQuotes.get( i );
+
+ if( openingQuote.before( quote ) && closingQuote.after( quote ) ) {
+ consumer.accept( new Token( QUOTE_APOSTROPHE, quote ) );
+ lexemes.remove();
+ }
+ }
}
}
src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java
final var converter = new Converter( out::println );
out.println( converter.apply(
- "\"’Kearney lives on the banks of Killarney—’"
+ "'A', 'B', and 'C' are letters."
) );
}
src/test/resources/com/whitemagicsoftware/keenquotes/smartypants.txt
&ldquo;&apos;Twas, t&apos;wasn&apos;t thy name, &apos;twas it?&rdquo; said Jim &ldquo;the Barber&rdquo; Brown.
-"'I'm in danger. Help? Take me to--' an address on Van Ness Avenue.
-&ldquo;&lsquo;I&apos;m in danger. Help? Take me to--&rsquo; an address on Van Ness Avenue.
+"'I'm in danger. Help? Take me to'--an address on Van Ness Avenue.
+&ldquo;&lsquo;I&apos;m in danger. Help? Take me to&rsquo;--an address on Van Ness Avenue.
"Ah," she said, "you knew! He told you--and you said 'my dear'! How could you?!"
&ldquo;She said, &lsquo;That&apos;s Sam&apos;s&rsquo;,&rdquo; said the Sams&apos; cat.
-"'Jane said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
-&ldquo;&lsquo;Jane said, &lsquo;&apos;E&apos;ll be spooky, Sam&apos;s son with the jack-o&apos;-lantern!&rsquo;&rdquo; said the O&apos;Mally twins&apos;---y&apos;know---ghosts in unison.
+"Jane said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
+&ldquo;Jane said, &lsquo;&apos;E&apos;ll be spooky, Sam&apos;s son with the jack-o&apos;-lantern!&rsquo;&rdquo; said the O&apos;Mally twins&apos;---y&apos;know---ghosts in unison.
'He's at Sam's'
''Sup, Doc?'
&lsquo;&apos;Sup, Doc?&rsquo;
+
+'Wouldn't 'a brushed a fly off the top twig,' exclaimed Ashbrook,
+&lsquo;Wouldn&apos;t &apos;a brushed a fly off the top twig,&rsquo; exclaimed Ashbrook,
+
+"But ye've been 'nation quiet all the day though," said the lad...
+&ldquo;But ye&apos;ve been &apos;nation quiet all the day though,&rdquo; said the lad...
# ########################################################################
'Bout that time I says, 'Boys! I been thinkin' 'bout th' Universe.'
'Bout that time I says, &lsquo;Boys! I been thinkin&apos; 'bout th&apos; Universe.&rsquo;
+
+"Jarvis, sir? Why, him as 'listed some years ago, and fought under
+&ldquo;Jarvis, sir? Why, him as 'listed some years ago, and fought under
Delta4123 lines added, 1000 lines removed, 3123-line increase