Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Merge pull request #1 from DaveJarvis/1_create_two_pass_parser 1 create two pass parser

AuthorDave Jarvis <email>
Date2022-08-20 12:46:21 GMT-0700
Commita7a00bf07dbbf3960d1b4c54dd24cc4740e0ec89
Parentb9ca8e2
build.gradle
plugins {
id 'application'
- id 'com.palantir.git-version' version '0.12.3'
+ id 'com.palantir.git-version' version '0.15.0'
id "me.champeau.jmh" version "0.6.6"
}
compileJava {
options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation"
- options.compilerArgs.addAll(['--release', '11'])
+ options.compilerArgs.addAll(['--release', '17'])
}
application {
applicationName = 'keenquotes'
- mainClassName = "com.whitemagicsoftware.${applicationName}.KeenQuotes"
+ mainClassName = "com.whitemagicsoftware.${applicationName}.app.KeenQuotes"
}
version = gitVersion()
def resourceDir = sourceSets.main.resources.srcDirs[0]
-final File propertiesFile = file("${resourceDir}/com/whitemagicsoftware/${applicationName}/app.properties")
+final File propertiesFile = file("${resourceDir}/com/whitemagicsoftware/${applicationName}/app/version.properties")
propertiesFile.write("application.version=${version}")
src/jmh/java/com/whitemagicsoftware/keenquotes/StringIterationBenchmark.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
package com.whitemagicsoftware.keenquotes;
+import com.whitemagicsoftware.keenquotes.util.FastCharacterIterator;
import org.openjdk.jmh.annotations.Benchmark;
@SuppressWarnings( "unused" )
public class StringIterationBenchmark {
- private final static String CHARSET =
+ private static final String CHARSET =
"ABCDEFGHIJKLM NOPQRSTUVWXYZ abcdefghijklm nopqrstuvxyz 01234 5 6789";
- public final static int STRLEN = 1024 * 1024;
+ public static final int STRLEN = 1024 * 1024;
private static String generateText() {
src/main/java/com/whitemagicsoftware/keenquotes/CircularFifoQueue.java
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.whitemagicsoftware.keenquotes;
-
-import java.util.*;
-
-/**
- * CircularFifoQueue is a first-in first-out queue with a fixed size that
- * replaces its oldest element if full.
- * <p>
- * The removal order of a {@link CircularFifoQueue} is based on the
- * insertion order; elements are removed in the same order in which they
- * were added. The iteration order is the same as the removal order.
- * </p>
- * <p>
- * The {@link #add(Object)}, {@link #remove()}, {@link #peek()},
- * {@link #poll()},
- * {@link #offer(Object)} operations all perform in constant time.
- * All other operations perform in linear time or worse.
- * </p>
- * <p>
- * This queue prevents null objects from being added.
- * </p>
- *
- * @param <E> the type of elements in this collection
- * @since 4.0
- */
-public final class CircularFifoQueue<E> extends AbstractCollection<E>
- implements Queue<E> {
- /**
- * Underlying storage array.
- */
- private final transient E[] elements;
-
- /**
- * Array index of first (oldest) queue element.
- */
- private transient int start;
-
- /**
- * Index mod maxElements of the array position following the last queue
- * element. Queue elements start at elements[start] and "wrap around"
- * elements[maxElements-1], ending at elements[decrement(end)].
- * For example, elements = {c,a,b}, start=1, end=1 corresponds to
- * the queue [a,b,c].
- */
- private transient int end;
-
- /**
- * Flag to indicate if the queue is currently full.
- */
- private transient boolean full;
-
- /**
- * Capacity of the queue.
- */
- private final int maxElements;
-
- /**
- * Constructor that creates a queue with the specified size.
- *
- * @param size Immutable queue size, must be greater than zero.
- */
- @SuppressWarnings( "unchecked" )
- public CircularFifoQueue( final int size ) {
- assert size > 0;
-
- elements = (E[]) new Object[ size ];
- maxElements = elements.length;
- }
-
- /**
- * Returns the number of elements stored in the queue.
- *
- * @return this queue's size
- */
- @Override
- public int size() {
- int size;
-
- if( end < start ) {
- size = maxElements - start + end;
- }
- else if( end == start ) {
- size = full ? maxElements : 0;
- }
- else {
- size = end - start;
- }
-
- return size;
- }
-
- /**
- * Returns true if this queue is empty; false otherwise.
- *
- * @return true if this queue is empty
- */
- @Override
- public boolean isEmpty() {
- return size() == 0;
- }
-
- /**
- * Returns {@code true} if the capacity limit of this queue has been reached,
- * i.e. the number of elements stored in the queue equals its maximum size.
- *
- * @return {@code true} if the capacity limit has been reached, {@code
- * false} otherwise
- * @since 4.1
- */
- public boolean isAtFullCapacity() {
- return size() == maxElements;
- }
-
- /**
- * Clears this queue.
- */
- @Override
- public void clear() {
- full = false;
- start = 0;
- end = 0;
- Arrays.fill( elements, null );
- }
-
- /**
- * Adds the given element to this queue. If the queue is full, the least
- * recently added
- * element is discarded so that a new element can be inserted.
- *
- * @param element the element to add
- * @return true, always
- * @throws NullPointerException if the given element is null
- */
- @Override
- public boolean add( final E element ) {
- Objects.requireNonNull( element, "element" );
-
- if( isAtFullCapacity() ) {
- remove();
- }
-
- elements[ end++ ] = element;
-
- if( end >= maxElements ) {
- end = 0;
- }
-
- if( end == start ) {
- full = true;
- }
-
- return true;
- }
-
- /**
- * Returns the element at the specified position in this queue.
- *
- * @param index the position of the element in the queue
- * @return the element at position {@code index}
- * @throws NoSuchElementException if the requested position is outside the
- * range [0, size)
- */
- public E get( final int index ) {
- final int sz = size();
-
- if( index < 0 || index >= sz ) {
- throw new NoSuchElementException(
- String.format(
- "Index %1$d is outside the available range [0, %2$d)",
- index, sz ) );
- }
-
- final int idx = (start + index) % maxElements;
- return elements[ idx ];
- }
-
- /**
- * Adds the given element to this queue. If the queue is full, the least
- * recently added
- * element is discarded so that a new element can be inserted.
- *
- * @param element the element to add
- * @return true, always
- * @throws NullPointerException if the given element is null
- */
- @Override
- public boolean offer( final E element ) {
- return add( element );
- }
-
- @Override
- public E poll() {
- return isEmpty() ? null : remove();
- }
-
- @Override
- public E element() {
- if( isEmpty() ) {
- throw new NoSuchElementException( "empty queue" );
- }
-
- return peek();
- }
-
- @Override
- public E peek() {
- return isEmpty() ? null : elements[ start ];
- }
-
- @Override
- public E remove() {
- if( isEmpty() ) {
- throw new NoSuchElementException( "empty queue" );
- }
-
- final E element = elements[ start ];
- if( null != element ) {
- elements[ start++ ] = null;
-
- if( start >= maxElements ) {
- start = 0;
- }
- full = false;
- }
- return element;
- }
-
- /**
- * Increments the internal index.
- *
- * @param index the index to increment
- * @return the updated index
- */
- private int increment( int index ) {
- if( ++index >= maxElements ) {
- index = 0;
- }
- return index;
- }
-
- /**
- * Decrements the internal index.
- *
- * @param index the index to decrement
- * @return the updated index
- */
- private int decrement( int index ) {
- if( --index < 0 ) {
- index = maxElements - 1;
- }
- return index;
- }
-
- /**
- * Returns an iterator over this queue's elements.
- *
- * @return an iterator over this queue's elements
- */
- @Override
- public Iterator<E> iterator() {
- return new Iterator<>() {
-
- private int index = start;
- private int lastReturnedIndex = -1;
- private boolean isFirst = full;
-
- @Override
- public boolean hasNext() {
- return isFirst || index != end;
- }
-
- @Override
- public E next() {
- if( !hasNext() ) {
- throw new NoSuchElementException();
- }
-
- isFirst = false;
- lastReturnedIndex = index;
- index = increment( index );
- return elements[ lastReturnedIndex ];
- }
-
- @Override
- public void remove() {
- if( lastReturnedIndex == -1 ) {
- throw new IllegalStateException();
- }
-
- // First element can be removed quickly
- if( lastReturnedIndex == start ) {
- CircularFifoQueue.this.remove();
- lastReturnedIndex = -1;
- return;
- }
-
- int pos = lastReturnedIndex + 1;
- if( start < lastReturnedIndex && pos < end ) {
- // shift in one part
- System.arraycopy(
- elements, pos, elements, lastReturnedIndex, end - pos );
- }
- else {
- // Other elements require us to shift the subsequent elements
- while( pos != end ) {
- if( pos >= maxElements ) {
- elements[ pos - 1 ] = elements[ 0 ];
- pos = 0;
- }
- else {
- elements[ decrement( pos ) ] = elements[ pos ];
- pos = increment( pos );
- }
- }
- }
-
- lastReturnedIndex = -1;
- end = decrement( end );
- elements[ end ] = null;
- full = false;
- index = decrement( index );
- }
- };
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import static java.lang.String.format;
-import static java.util.Collections.emptySet;
-import static java.util.Collections.sort;
-
-/**
- * Placeholder for various types of contractions.
- */
-@SuppressWarnings( "SpellCheckingInspection" )
-public class Contractions {
-
- private final Builder mBuilder;
-
- private Contractions( final Builder builder ) {
- assert builder != null;
- mBuilder = builder;
- }
-
- /**
- * Allows constructing a list of custom contractions.
- */
- @SuppressWarnings( "unused" )
- public static class Builder {
- private final Set<String> mBeganUnambiguous = new HashSet<>();
- private final Set<String> mEndedUnambiguous = new HashSet<>();
- private final Set<String> mBeganAmbiguous = new HashSet<>();
- private final Set<String> mEndedAmbiguous = new HashSet<>();
-
- public Builder withBeganUnambiguous( final List<String> words ) {
- mBeganUnambiguous.addAll( words );
- return this;
- }
-
- public Builder withEndedUnambiguous( final List<String> words ) {
- mEndedUnambiguous.addAll( words );
- return this;
- }
-
- public Builder withBeganAmbiguous( final List<String> words ) {
- mBeganAmbiguous.addAll( words );
- return this;
- }
-
- public Builder withEndedAmbiguous( final List<String> words ) {
- mEndedAmbiguous.addAll( words );
- return this;
- }
-
- /**
- * Constructs a new set of {@link Contractions} that can be configured
- * using this {@link Builder} instance.
- *
- * @return {@link Contractions} suitable for use with parsing text.
- */
- public Contractions build() {
- mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS );
- mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS );
- mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS );
- mEndedAmbiguous.addAll( ENDED_AMBIGUOUS );
-
- // Remove ambiguous items if they are already declared.
- mBeganAmbiguous.removeAll( mBeganUnambiguous );
- mEndedAmbiguous.removeAll( mEndedUnambiguous );
-
- return new Contractions( this );
- }
-
- /**
- * This returns the {@code fallback} {@link Set} if {@code src} is empty;
- * otherwise, this returns the empty {@link Set}.
- *
- * @param src A set of contractions, possibly empty.
- * @param fallback The default values to use if {@code src} is empty.
- * @param <T> The type of data used by both {@link Set}s.
- * @return An empty {@link Set} if the {@code src} contains at least one
- * element; otherwise, this will return {@code fallback}.
- */
- private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) {
- assert src != null;
- assert fallback != null;
- return src.isEmpty() ? fallback : emptySet();
- }
- }
-
- /**
- * Answers whether the given word is a contraction that always starts
- * with an apostrophe. The comparison is case insensitive. This must
- * only be called when a straight quote is followed by a word.
- *
- * @param word The word to compare against the list of known unambiguous
- * contractions.
- * @return {@code true} when the given word is in the set of unambiguous
- * contractions.
- */
- public boolean beganUnambiguously( final String word ) {
- assert word != null;
- return getBeganUnambiguous().contains( word.toLowerCase() );
- }
-
- /**
- * Answers whether the given word could be a contraction but is also a
- * valid word in non-contracted form.
- *
- * @param word The word to compare against the list of known ambiguous
- * contractions.
- * @return {@code true} when the given word is in the set of ambiguous
- * contractions.
- */
- public boolean beganAmbiguously( final String word ) {
- assert word != null;
- return getBeganAmbiguous().contains( word.toLowerCase() );
- }
-
- public boolean endedUnambiguously( final String word ) {
- assert word != null;
- return getEndedUnambiguous().contains( word.toLowerCase() );
- }
-
- public boolean endedAmbiguously( final String word ) {
- assert word != null;
- final var check = word.toLowerCase();
-
- // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
- // allow o' to match because 'a sentence can end with the letter o'.
- return getEndedAmbiguous().contains( check ) ||
- check.endsWith( "s" ) || check.endsWith( "z" ) ||
- check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
- }
-
- private Set<String> getBeganUnambiguous() {
- return mBuilder.mBeganUnambiguous;
- }
-
- private Set<String> getEndedUnambiguous() {
- return mBuilder.mEndedUnambiguous;
- }
-
- private Set<String> getBeganAmbiguous() {
- return mBuilder.mBeganAmbiguous;
- }
-
- private Set<String> getEndedAmbiguous() {
- return mBuilder.mEndedAmbiguous;
- }
-
-
- @Override
- public String toString() {
- return
- toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
- toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ) +
- toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
- toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" );
- }
-
- private String toString(
- final Set<String> words, final String category, final String fmt ) {
- final var sb = new StringBuilder( 16384 );
- final var newline = System.lineSeparator();
- final var list = new ArrayList<>( words );
-
- sort( list );
- sb.append( format( "%n%s%n", category ) );
- list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) );
-
- return sb.toString();
- }
-
- /**
- * Words having a straight apostrophe that cannot be mistaken for an
- * opening single quote.
- */
- private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
- "aporth",
- // about you
- "boutcha",
- // about you
- "boutchu",
- // about well
- "boutwell",
- // except
- "cept",
- // decided
- "cided",
- // because
- "cos",
- "dillo",
- // them
- "em",
- // afraid
- "fraid",
- // against
- "gainst",
- // him
- "im",
- // and
- "n",
- // beneath
- "neath",
- // another
- "nother",
- // enough
- "nuff",
- // gonna
- "onna",
- "onna'",
- // horse
- "oss",
- // horses
- "osses",
- // upon
- "pon",
- "s",
- "sblood",
- // excuse
- "scuse",
- "sfar",
- "sfoot",
- // considered
- "sidered",
- // exploit
- "sploit",
- // exploits
- "sploits",
- "t",
- "taint",
- "tain",
- "til",
- "tis",
- "tisn",
- "tshall",
- "twas",
- "twasn",
- "tween",
- "twere",
- "tweren",
- "twixt",
- "twon",
- "twou",
- "twould",
- "twouldn",
- // have
- "ve",
- // exactly
- "xactly"
- );
-
- /**
- * Words having a straight apostrophe that may be either part of a
- * contraction or a word that stands alone beside an opening single quote.
- */
- private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
- // have
- "a",
- // about|boxing match
- "bout",
- // because|causal
- "cause",
- // what you|choo choo train
- "choo",
- // he|e pluribus unum
- "e",
- // here|earlier
- "ere",
- // afro|to and fro
- "fro",
- // whore|ho ho!
- "ho",
- // okay|letter K
- "kay",
- // lo|lo and behold
- "lo",
- // ???
- "listed",
- // ???
- "nation",
- // are|regarding
- "re",
- // circular|around
- "round",
- // what's up|to eat
- "sup",
- // it will|twill fabric
- "twill",
- // them|utterance
- "um",
- // is that|Iranian village
- "zat"
- );
-
- private static final Set<String> ENDED_AMBIGUOUS = Set.of(
- // pin|head dunk
- "bobbin",
- // give|martial arts garment
- "gi",
- // in|I
- "i",
- // of|letter o
- "o"
- );
-
- private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
- // and
- "an",
- // for/before
- "fo",
- // friend
- "frien",
- // just
- "jes",
- // just
- "jus",
- // lord
- "lor",
- // myself
- "masel",
- // old
- "ol",
- // San (Francisco)
- "Sa",
- // shift
- "shif",
- // the
- "th",
- // what
- "wha",
- // with
- "wi",
- // world
- "worl",
- // Top ~3500 common -ing words as English contractions.
- "abandonin",
- "abettin",
- "abidin",
- "abolishin",
- "aboundin",
- "absorbin",
- "abstainin",
- "abstractin",
- "abusin",
- "abuttin",
- "acceleratin",
- "acceptin",
- "accessin",
- "accommodatin",
- "accompanyin",
- "accomplishin",
- "accordin",
- "accountin",
- "accruin",
- "accumulatin",
- "accusin",
- "achievin",
- "achin",
- "acknowledgin",
- "acquaintin",
- "acquirin",
- "actin",
- "activatin",
- "adaptin",
- "addin",
- "addressin",
- "adherin",
- "adjoinin",
- "adjustin",
- "administerin",
- "admirin",
- "admittin",
- "admonishin",
- "adoptin",
- "adorin",
- "adornin",
- "advancin",
- "advertisin",
- "advisin",
- "advocatin",
- "affectin",
- "affirmin",
- "afflictin",
- "affordin",
- "agein",
- "aggravatin",
- "aggregatin",
- "agin",
- "agitatin",
- "agonizin",
- "agreein",
- "ailin",
- "aimin",
- "airin",
- "alarmin",
- "alertin",
- "alienatin",
- "alightin",
- "alignin",
- "allegin",
- "alleviatin",
- "allocatin",
- "allowin",
- "alludin",
- "allurin",
- "alterin",
- "alternatin",
- "amassin",
- "amazin",
- "amendin",
- "amountin",
- "amplifyin",
- "amusin",
- "analysin",
- "analyzin",
- "anchorin",
- "anglin",
- "animatin",
- "annealin",
- "annihilatin",
- "announcin",
- "annoyin",
- "anointin",
- "answerin",
- "anticipatin",
- "anythin",
- "apologizin",
- "appallin",
- "appealin",
- "appearin",
- "appertainin",
- "appetizin",
- "applaudin",
- "applyin",
- "appointin",
- "appraisin",
- "appreciatin",
- "apprehendin",
- "approachin",
- "appropriatin",
- "approvin",
- "approximatin",
- "archin",
- "archivin",
- "arguin",
- "arisin",
- "arousin",
- "arrangin",
- "arrestin",
- "arrivin",
- "articulatin",
- "ascendin",
- "ascertainin",
- "ascribin",
- "askin",
- "assailin",
- "assaultin",
- "assemblin",
- "assertin",
- "assessin",
- "assignin",
- "assimilatin",
- "assistin",
- "associatin",
- "assumin",
- "assurin",
- "astonishin",
- "astoundin",
- "atonin",
- "attachin",
- "attackin",
- "attainin",
- "attemptin",
- "attendin",
- "attestin",
- "attractin",
- "attributin",
- "auditin",
- "augmentin",
- "authorin",
- "authorisin",
- "authorizin",
- "availin",
- "avengin",
- "averagin",
- "avertin",
- "avoidin",
- "awaitin",
- "awakenin",
- "awakin",
- "awardin",
- "awnin",
- "babblin",
- "babysittin",
- "backin",
- "backslidin",
- "backtrackin",
- "bafflin",
- "baggin",
- "bailin",
- "baitin",
- "bakin",
- "balancin",
- "baldin",
- "balloonin",
- "ballotin",
- "bandagin",
- "bandin",
- "bangin",
- "banishin",
- "bankin",
- "bannin",
- "banquetin",
- "banterin",
- "baptizin",
- "bargainin",
- "barin",
- "barkin",
- "barrin",
- "barterin",
- "bashin",
- "baskin",
- "bastin",
- "bathin",
- "batterin",
- "battin",
- "battlin",
- "bawlin",
- "bayin",
- "beadin",
- "beamin",
- "bearin",
- "beatin",
- "beautifyin",
- "beckonin",
- "becomin",
- "beddin",
- "beepin",
- "befittin",
- "befriendin",
- "begettin",
- "beggin",
- "beginnin",
- "beguilin",
- "behavin",
- "beheadin",
- "beholdin",
- "behrin",
- "beijin",
- "bein",
- "belchin",
- "believin",
- "belittlin",
- "bellowin",
- "belongin",
- "beltin",
- "bemoanin",
- "benchmarkin",
- "bendin",
- "benefitin",
- "bennin",
- "beratin",
- "berin",
- "beseechin",
- "besettin",
- "besiegin",
- "bestowin",
- "bestsellin",
- "betrayin",
- "betterin",
- "bettin",
- "bewailin",
- "bewilderin",
- "bewitchin",
- "biasin",
- "bickerin",
- "bicyclin",
- "biddin",
- "bidin",
- "bikin",
- "billin",
- "billowin",
- "bindin",
- "birthin",
- "bisectin",
- "bitin",
- "blackenin",
- "blackin",
- "blackmailin",
- "blamin",
- "blanchin",
- "blarin",
- "blasphemin",
- "blastin",
- "blazin",
- "bleachin",
- "bleatin",
- "bleedin",
- "blendin",
- "blessin",
- "blightin",
- "blindin",
- "blinkin",
- "blisterin",
- "bloatin",
- "blockadin",
- "blockin",
- "bloggin",
- "bloodlettin",
- "bloomin",
- "blossomin",
- "blottin",
- "blowin",
- "bluffin",
- "blunderin",
- "blurrin",
- "blushin",
- "blusterin",
- "boardin",
- "boastin",
- "boatin",
- "bodybuildin",
- "boein",
- "bogglin",
- "boilin",
- "bolsterin",
- "boltin",
- "bombardin",
- "bombin",
- "bondin",
- "bookin",
- "bookkeepin",
- "boomin",
- "boostin",
- "bootin",
- "bootstrappin",
- "borderin",
- "borin",
- "borrowin",
- "botherin",
- "bottlin",
- "bouncin",
- "boundin",
- "bowin",
- "bowlin",
- "bowrin",
- "bowstrin",
- "boxin",
- "boycottin",
- "bracin",
- "bracketin",
- "braggin",
- "braidin",
- "brainstormin",
- "brainwashin",
- "brakin",
- "branchin",
- "brandin",
- "brandishin",
- "bravin",
- "brawlin",
- "brayin",
- "breachin",
- "breakfastin",
- "breakin",
- "breastfeedin",
- "breathin",
- "breathtakin",
- "breedin",
- "brewin",
- "bribin",
- "bridgin",
- "briefin",
- "brightenin",
- "brimmin",
- "brinin",
- "bringin",
- "bristlin",
- "britlin",
- "broadcastin",
- "broadenin",
- "broilin",
- "brokerin",
- "broodin",
- "brownin",
- "browsin",
- "bruisin",
- "brushin",
- "bubblin",
- "buckin",
- "bucklin",
- "buddin",
- "budgetin",
- "bufferin",
- "buffetin",
- "buggin",
- "buildin",
- "bulgin",
- "bullyin",
- "bumblin",
- "bumpin",
- "bundlin",
- "bunglin",
- "buntin",
- "burdenin",
- "burgeonin",
- "burnin",
- "burrowin",
- "burstin",
- "buryin",
- "busin",
- "bustin",
- "bustlin",
- "busyin",
- "butcherin",
- "buttin",
- "buttonin",
- "buyin",
- "buzzin",
- "bypassin",
- "cachin",
- "calculatin",
- "callin",
- "calmin",
- "campaignin",
- "campin",
- "cancelin",
- "cannin",
- "canvassin",
- "cappin",
- "captivatin",
- "capturin",
- "caregivin",
- "caressin",
- "carpetin",
- "carryin",
- "carvin",
- "cascadin",
- "casin",
- "castin",
- "catalogin",
- "cataloguin",
- "catchin",
- "categorizin",
- "caterin",
- "causin",
- "ceasin",
- "ceilin",
- "celebratin",
- "cementin",
- "centerin",
- "certifyin",
- "chafin",
- "challengin",
- "championin",
- "changin",
- "channelin",
- "channin",
- "chantin",
- "characterizin",
- "chargin",
- "chartin",
- "chasin",
- "chatterin",
- "chattin",
- "cheatin",
- "checkin",
- "cheerin",
- "cherishin",
- "chewin",
- "childbearin",
- "chillin",
- "chippin",
- "chirpin",
- "chokin",
- "choosin",
- "choppin",
- "christenin",
- "chucklin",
- "churnin",
- "circlin",
- "circulatin",
- "citin",
- "civilizin",
- "claddin",
- "claimin",
- "clamberin",
- "clamorin",
- "clampin",
- "clangin",
- "clankin",
- "clappin",
- "clarifyin",
- "clashin",
- "claspin",
- "classifyin",
- "clatterin",
- "claverin",
- "clawin",
- "cleanin",
- "cleansin",
- "clearin",
- "cleavin",
- "clenchin",
- "clickin",
- "climbin",
- "clin",
- "clingin",
- "clippin",
- "cloggin",
- "clonin",
- "closin",
- "clothin",
- "clottin",
- "clusterin",
- "clutchin",
- "coachin",
- "coastin",
- "coatin",
- "coaxin",
- "codin",
- "coexistin",
- "coilin",
- "coincidin",
- "coinin",
- "collaboratin",
- "collapsin",
- "collectin",
- "collidin",
- "colonisin",
- "colonizin",
- "colorin",
- "colourin",
- "combatin",
- "combin",
- "combinin",
- "comfortin",
- "comin",
- "commandin",
- "commemoratin",
- "commencin",
- "commendin",
- "commentin",
- "commissionin",
- "committin",
- "communicatin",
- "commutin",
- "comparin",
- "compellin",
- "compensatin",
- "competin",
- "compilin",
- "complainin",
- "completin",
- "complicatin",
- "complyin",
- "composin",
- "compostin",
- "compoundin",
- "comprehendin",
- "compressin",
- "comprisin",
- "compromisin",
- "computin",
- "concealin",
- "concedin",
- "conceivin",
- "concentratin",
- "conceptualizin",
- "concernin",
- "concludin",
- "concurrin",
- "condemnin",
- "condensin",
- "condescendin",
- "conditionin",
- "conductin",
- "conferencin",
- "conferrin",
- "confessin",
- "confidin",
- "configurin",
- "confinin",
- "confirmin",
- "conflictin",
- "conformin",
- "confoundin",
- "confrontin",
- "confusin",
- "congratulatin",
- "conjurin",
- "connectin",
- "connin",
- "conquerin",
- "consentin",
- "conservin",
- "considerin",
- "consistin",
- "consolidatin",
- "consolin",
- "conspirin",
- "constitutin",
- "constrainin",
- "constructin",
- "construin",
- "consultin",
- "consumin",
- "contactin",
- "containin",
- "contaminatin",
- "contemplatin",
- "contendin",
- "contestin",
- "continuin",
- "contractin",
- "contradictin",
- "contrastin",
- "contributin",
- "contrivin",
- "controllin",
- "convenin",
- "convergin",
- "conversin",
- "convertin",
- "conveyin",
- "convincin",
- "cooin",
- "cookin",
- "coolin",
- "cooperatin",
- "coordinatin",
- "copin",
- "copyin",
- "correctin",
- "correlatin",
- "correspondin",
- "corruptin",
- "costin",
- "coughin",
- "counselin",
- "counsellin",
- "counteractin",
- "counterfeitin",
- "counterin",
- "countervailin",
- "countin",
- "couplin",
- "coursin",
- "courtin",
- "coverin",
- "cowerin",
- "crackin",
- "cracklin",
- "cradlin",
- "craftin",
- "crampin",
- "crashin",
- "cravin",
- "crawlin",
- "creakin",
- "creatin",
- "creditin",
- "creepin",
- "cringin",
- "cripplin",
- "criticizin",
- "critiquin",
- "croakin",
- "croppin",
- "crossin",
- "crouchin",
- "crowdin",
- "crowin",
- "crownin",
- "cruisin",
- "crumblin",
- "crunchin",
- "crusadin",
- "crushin",
- "cryin",
- "culminatin",
- "cultivatin",
- "cummin",
- "cunnin",
- "cuppin",
- "curbin",
- "curin",
- "curlin",
- "cursin",
- "curtailin",
- "curvin",
- "cushin",
- "customizin",
- "cuttin",
- "cyclin",
- "dabbin",
- "dabblin",
- "damagin",
- "damnin",
- "dampenin",
- "dampin",
- "dancin",
- "danglin",
- "darjeelin",
- "darkenin",
- "darklin",
- "darlin",
- "darnin",
- "dartin",
- "dashin",
- "datin",
- "dauntin",
- "dawdlin",
- "dawnin",
- "daydreamin",
- "dazzlin",
- "deadenin",
- "deafenin",
- "dealin",
- "debasin",
- "debatin",
- "debilitatin",
- "debriefin",
- "debuggin",
- "decayin",
- "deceivin",
- "decidin",
- "decipherin",
- "decisionmakin",
- "deckin",
- "declaimin",
- "declarin",
- "declinin",
- "decodin",
- "decommissionin",
- "decomposin",
- "deconstructin",
- "decoratin",
- "decouplin",
- "decreasin",
- "dedicatin",
- "deducin",
- "deductin",
- "deemin",
- "deepenin",
- "deerin",
- "defaultin",
- "defeatin",
- "defendin",
- "deferrin",
- "defilin",
- "definin",
- "deflectin",
- "deformin",
- "defraudin",
- "defrayin",
- "defyin",
- "degeneratin",
- "degradin",
- "dehumanizin",
- "delayin",
- "delegatin",
- "deletin",
- "deliberatin",
- "delightin",
- "delineatin",
- "deliverin",
- "deludin",
- "delvin",
- "demandin",
- "demeanin",
- "demin",
- "democratizin",
- "demolishin",
- "demonstratin",
- "demoralizin",
- "denigratin",
- "dennin",
- "denotin",
- "denouncin",
- "denyin",
- "departin",
- "dependin",
- "depictin",
- "depletin",
- "deplorin",
- "deployin",
- "depositin",
- "deprecatin",
- "depreciatin",
- "depressin",
- "deprivin",
- "derivin",
- "descendin",
- "describin",
- "desertin",
- "deservin",
- "designatin",
- "designin",
- "desirin",
- "desolatin",
- "despairin",
- "despatchin",
- "despisin",
- "despondin",
- "destabilizin",
- "destroyin",
- "detachin",
- "detailin",
- "detainin",
- "detectin",
- "deterioratin",
- "determinin",
- "deterrin",
- "devastatin",
- "developin",
- "deviatin",
- "devisin",
- "devolvin",
- "devotin",
- "devourin",
- "diagnosin",
- "dialin",
- "dictatin",
- "dietin",
- "differentiatin",
- "differin",
- "diffusin",
- "digestin",
- "diggin",
- "digitizin",
- "dilatin",
- "dilutin",
- "diminishin",
- "dimmin",
- "dinin",
- "dippin",
- "directin",
- "disablin",
- "disagreein",
- "disappearin",
- "disappointin",
- "disapprovin",
- "disarmin",
- "disbandin",
- "disbelievin",
- "discardin",
- "discernin",
- "dischargin",
- "disciplinin",
- "disclosin",
- "disconcertin",
- "disconnectin",
- "discontinuin",
- "discountin",
- "discouragin",
- "discoursin",
- "discoverin",
- "discreditin",
- "discriminatin",
- "discussin",
- "disdainin",
- "disembarkin",
- "disengagin",
- "disfigurin",
- "disguisin",
- "disgustin",
- "disheartenin",
- "disinfectin",
- "disintegratin",
- "dislikin",
- "dislodgin",
- "dismantlin",
- "dismissin",
- "dismountin",
- "disobeyin",
- "disorientin",
- "disparagin",
- "dispatchin",
- "dispellin",
- "dispensin",
- "dispersin",
- "displacin",
- "displayin",
- "displeasin",
- "disposin",
- "disputin",
- "disqualifyin",
- "disquietin",
- "disregardin",
- "disruptin",
- "dissectin",
- "dissemblin",
- "disseminatin",
- "dissentin",
- "dissipatin",
- "dissolvin",
- "distancin",
- "distillin",
- "distinguishin",
- "distortin",
- "distractin",
- "distressin",
- "distributin",
- "disturbin",
- "divergin",
- "diversifyin",
- "divertin",
- "divestin",
- "dividin",
- "divin",
- "divinin",
- "divorcin",
- "dizzyin",
- "dockin",
- "doctorin",
- "documentin",
- "dodgin",
- "doddlin",
- "doin",
- "dominatin",
- "domineerin",
- "donatin",
- "donnin",
- "doodlin",
- "dopin",
- "dorkin",
- "dosin",
- "dotin",
- "dottin",
- "doublin",
- "doubtin",
- "dowlin",
- "downin",
- "downloadin",
- "downplayin",
- "downsizin",
- "dozin",
- "draftin",
- "draggin",
- "drainin",
- "dramatisin",
- "dramatizin",
- "drapin",
- "drawin",
- "drawlin",
- "dreadin",
- "dreamin",
- "dredgin",
- "drenchin",
- "dressin",
- "dressmakin",
- "dribblin",
- "driftin",
- "drillin",
- "drinkin",
- "drippin",
- "drivin",
- "drizzlin",
- "dronin",
- "droolin",
- "droopin",
- "droppin",
- "drownin",
- "drummin",
- "dryin",
- "duckin",
- "duelin",
- "duellin",
- "dumpin",
- "dunnin",
- "duplicatin",
- "durin",
- "dwellin",
- "dwindlin",
- "dyein",
- "dyin",
- "ealin",
- "earnin",
- "earrin",
- "earthin",
- "easin",
- "easygoin",
- "eatin",
- "eavesdroppin",
- "ebbin",
- "echoin",
- "eclipsin",
- "economizin",
- "eddyin",
- "edgin",
- "edifyin",
- "editin",
- "educatin",
- "effacin",
- "effectin",
- "ejectin",
- "elaboratin",
- "elbowin",
- "electin",
- "electioneerin",
- "electrifyin",
- "elevatin",
- "elicitin",
- "eliminatin",
- "elucidatin",
- "eludin",
- "elutin",
- "emanatin",
- "emancipatin",
- "embalmin",
- "embarkin",
- "embarrassin",
- "embeddin",
- "embellishin",
- "embodyin",
- "embracin",
- "embroiderin",
- "emergin",
- "emigratin",
- "emittin",
- "emphasisin",
- "emphasizin",
- "employin",
- "empowerin",
- "emptyin",
- "emulatin",
- "enablin",
- "enactin",
- "encapsulatin",
- "enchantin",
- "encirclin",
- "enclosin",
- "encodin",
- "encompassin",
- "encounterin",
- "encouragin",
- "encroachin",
- "encryptin",
- "endangerin",
- "endearin",
- "endeavorin",
- "endeavourin",
- "endin",
- "endorsin",
- "endowin",
- "endurin",
- "energizin",
- "enervatin",
- "enfoldin",
- "enforcin",
- "engagin",
- "engenderin",
- "engineerin",
- "engravin",
- "engrossin",
- "engulfin",
- "enhancin",
- "enjoinin",
- "enjoyin",
- "enlargin",
- "enlightenin",
- "enlistin",
- "enlivenin",
- "ennoblin",
- "enquirin",
- "enrichin",
- "enrollin",
- "enslavin",
- "ensuin",
- "ensurin",
- "entailin",
- "entanglin",
- "enterin",
- "enterprisin",
- "entertainin",
- "enthrallin",
- "enticin",
- "entitlin",
- "entrancin",
- "entreatin",
- "entrenchin",
- "entrustin",
- "enumeratin",
- "enunciatin",
- "envelopin",
- "envisionin",
- "envyin",
- "eppin",
- "equalin",
- "equalizin",
- "equallin",
- "equatin",
- "equippin",
- "eradicatin",
- "erasin",
- "erectin",
- "erlin",
- "erodin",
- "errin",
- "eruptin",
- "escalatin",
- "escapin",
- "eschewin",
- "escortin",
- "espin",
- "espousin",
- "establishin",
- "estain",
- "estimatin",
- "etchin",
- "evacuatin",
- "evadin",
- "evaluatin",
- "evangelizin",
- "evaporatin",
- "evenin",
- "everlastin",
- "everythin",
- "evidencin",
- "evincin",
- "evokin",
- "evolvin",
- "ewin",
- "exacerbatin",
- "exactin",
- "exaggeratin",
- "exaltin",
- "examinin",
- "exasperatin",
- "excavatin",
- "exceedin",
- "excellin",
- "exceptin",
- "exchangin",
- "excitin",
- "exclaimin",
- "excludin",
- "excruciatin",
- "excusin",
- "executin",
- "exemplifyin",
- "exemptin",
- "exercisin",
- "exertin",
- "exhalin",
- "exhaustin",
- "exhibitin",
- "exhilaratin",
- "exhortin",
- "existin",
- "exitin",
- "expandin",
- "expectin",
- "expeditin",
- "expellin",
- "expendin",
- "experiencin",
- "experimentin",
- "expirin",
- "explainin",
- "explicatin",
- "explodin",
- "exploitin",
- "explorin",
- "exportin",
- "exposin",
- "expoundin",
- "expressin",
- "extendin",
- "extenuatin",
- "exterminatin",
- "externalizin",
- "extinguishin",
- "extollin",
- "extortin",
- "extractin",
- "extrapolatin",
- "extricatin",
- "exudin",
- "exultin",
- "eyein",
- "eyin",
- "fabricatin",
- "facilitatin",
- "facin",
- "factorin",
- "fadin",
- "failin",
- "faintin",
- "fakin",
- "fallin",
- "falterin",
- "fancyin",
- "fannin",
- "farmin",
- "farthin",
- "fascinatin",
- "fashionin",
- "fastenin",
- "fastin",
- "fatiguin",
- "fattenin",
- "faultin",
- "favorin",
- "favourin",
- "fawnin",
- "fearin",
- "feastin",
- "featurin",
- "feedin",
- "feelin",
- "feignin",
- "fellin",
- "fencin",
- "fermentin",
- "fertilizin",
- "festerin",
- "fetchin",
- "fiddlin",
- "fidgetin",
- "fieldin",
- "fightin",
- "figurin",
- "filin",
- "fillin",
- "filmin",
- "filmmakin",
- "filterin",
- "financin",
- "findin",
- "fingerin",
- "fingerprintin",
- "finishin",
- "firin",
- "fishin",
- "fittin",
- "fixin",
- "flaggin",
- "flailin",
- "flamin",
- "flankin",
- "flappin",
- "flarin",
- "flashin",
- "flattenin",
- "flatterin",
- "flauntin",
- "flavorin",
- "fledglin",
- "fleein",
- "fleetin",
- "flemin",
- "flemmin",
- "flexin",
- "flickerin",
- "flickin",
- "flinchin",
- "flingin",
- "flippin",
- "flirtin",
- "flittin",
- "floatin",
- "flockin",
- "floggin",
- "floodin",
- "floorin",
- "floppin",
- "flounderin",
- "flourishin",
- "flowerin",
- "flowin",
- "fluctuatin",
- "flushin",
- "flutterin",
- "flyin",
- "foamin",
- "focusin",
- "focussin",
- "foldin",
- "followin",
- "fondlin",
- "foolin",
- "footin",
- "foragin",
- "forbearin",
- "forbiddin",
- "forcin",
- "forebodin",
- "forecastin",
- "foregoin",
- "foregroundin",
- "foreseein",
- "foreshadowin",
- "forfeitin",
- "forgettin",
- "forgin",
- "forgivin",
- "formattin",
- "formin",
- "formulatin",
- "forsakin",
- "forthcomin",
- "fortifyin",
- "forwardin",
- "fosterin",
- "foulin",
- "foundin",
- "foundlin",
- "fowlin",
- "fracturin",
- "framin",
- "franchisin",
- "freakin",
- "freein",
- "freestandin",
- "freezin",
- "frequentin",
- "frettin",
- "frightenin",
- "frontin",
- "frostin",
- "frownin",
- "fruitin",
- "frustratin",
- "fryin",
- "fuckin",
- "fuelin",
- "fulfillin",
- "fumblin",
- "fumin",
- "functionin",
- "fundin",
- "fundraisin",
- "furnishin",
- "furtherin",
- "fusin",
- "fussin",
- "gaggin",
- "gainin",
- "gallin",
- "gallopin",
- "gamblin",
- "gapin",
- "gardenin",
- "garnerin",
- "gaspin",
- "gatherin",
- "gatin",
- "gaugin",
- "gazin",
- "gearin",
- "geldin",
- "genderin",
- "generalizin",
- "generatin",
- "genotypin",
- "germinatin",
- "gesticulatin",
- "gesturin",
- "gettin",
- "gigglin",
- "gildin",
- "gissin",
- "givin",
- "glancin",
- "glarin",
- "glazin",
- "gleamin",
- "gleanin",
- "glidin",
- "glimmerin",
- "glintin",
- "glistenin",
- "glitterin",
- "gloamin",
- "gloatin",
- "globalizin",
- "glorifyin",
- "gloryin",
- "glowerin",
- "glowin",
- "gluin",
- "gnashin",
- "gnawin",
- "goerin",
- "goin",
- "goldin",
- "golfin",
- "goodlookin",
- "gorin",
- "goslin",
- "gossipin",
- "gougin",
- "governin",
- "grabbin",
- "gradin",
- "graduatin",
- "graftin",
- "grantin",
- "graphin",
- "grapplin",
- "graspin",
- "gratifyin",
- "grayin",
- "graylin",
- "grazin",
- "greenin",
- "greetin",
- "grievin",
- "grillin",
- "grimacin",
- "grindin",
- "grinnin",
- "grippin",
- "groanin",
- "groomin",
- "gropin",
- "groundbreakin",
- "groundin",
- "groupin",
- "grovellin",
- "growin",
- "growlin",
- "grubbin",
- "grudgin",
- "gruelin",
- "grumblin",
- "gruntin",
- "guaranteein",
- "guardin",
- "guessin",
- "guidin",
- "gulpin",
- "gunnin",
- "gurglin",
- "gushin",
- "hackin",
- "hagglin",
- "hailin",
- "hallucinatin",
- "haltin",
- "halvin",
- "hammerin",
- "hammin",
- "hamperin",
- "hamstrin",
- "handicappin",
- "handin",
- "handlin",
- "handshakin",
- "handwritin",
- "hangin",
- "hankerin",
- "happenin",
- "haranguin",
- "harassin",
- "harborin",
- "harbourin",
- "hardenin",
- "hardin",
- "hardworkin",
- "harin",
- "harkin",
- "harmin",
- "harmonisin",
- "harmonizin",
- "harnessin",
- "harpin",
- "harrowin",
- "harryin",
- "harvestin",
- "hashin",
- "hastenin",
- "hatchin",
- "hatin",
- "hauin",
- "haulin",
- "hauntin",
- "havin",
- "hawkin",
- "hayin",
- "hazardin",
- "hazin",
- "headin",
- "healin",
- "heapin",
- "hearin",
- "hearkenin",
- "heartbreakin",
- "heartenin",
- "heartrendin",
- "heartwarmin",
- "heatin",
- "heavin",
- "hedgin",
- "heedin",
- "heightenin",
- "helpin",
- "hemmin",
- "hemorrhagin",
- "hennin",
- "heraldin",
- "herdin",
- "herin",
- "herrin",
- "hesitatin",
- "hewin",
- "hibernatin",
- "hidin",
- "highlightin",
- "hijackin",
- "hikin",
- "hinderin",
- "hintin",
- "hirelin",
- "hirin",
- "hissin",
- "hitchhikin",
- "hitchin",
- "hittin",
- "hoardin",
- "hobblin",
- "hockin",
- "hoein",
- "hogglin",
- "hoistin",
- "holdin",
- "hollerin",
- "hollin",
- "hollowin",
- "homecomin",
- "homemakin",
- "homeschoolin",
- "homin",
- "homogenizin",
- "honin",
- "honkin",
- "honorin",
- "honourin",
- "hookin",
- "hootin",
- "hopin",
- "hoppin",
- "horrifyin",
- "hostin",
- "houndin",
- "housecleanin",
- "housekeepin",
- "housin",
- "hoverin",
- "howlin",
- "huddlin",
- "huffin",
- "huggin",
- "hulkin",
- "humanizin",
- "humblin",
- "humiliatin",
- "hummin",
- "hungerin",
- "huntin",
- "hurlin",
- "hurryin",
- "hurtin",
- "hurtlin",
- "hushin",
- "hustlin",
- "icin",
- "identifyin",
- "idlin",
- "ignitin",
- "ignorin",
- "illuminatin",
- "illustratin",
- "imagin",
- "imaginin",
- "imitatin",
- "immersin",
- "impactin",
- "impairin",
- "impartin",
- "impedin",
- "impellin",
- "impendin",
- "impingin",
- "implementin",
- "implicatin",
- "implorin",
- "implyin",
- "importin",
- "imposin",
- "impressin",
- "imprintin",
- "imprisonin",
- "improvin",
- "improvisin",
- "imputin",
- "inbreedin",
- "incitin",
- "inclinin",
- "inclosin",
- "includin",
- "incomin",
- "incorporatin",
- "increasin",
- "incriminatin",
- "inculcatin",
- "incurrin",
- "indexin",
- "indicatin",
- "inducin",
- "indulgin",
- "indwellin",
- "infectin",
- "inferrin",
- "infightin",
- "infiltratin",
- "inflatin",
- "inflictin",
- "influencin",
- "informin",
- "infringin",
- "infuriatin",
- "infusin",
- "ingestin",
- "ingratiatin",
- "inhabitin",
- "inhalin",
- "inheritin",
- "inhibitin",
- "initiatin",
- "injectin",
- "injurin",
- "inklin",
- "innin",
- "innovatin",
- "inquirin",
- "insertin",
- "insinuatin",
- "insistin",
- "inspectin",
- "inspirin",
- "installin",
- "instigatin",
- "instillin",
- "institutin",
- "instructin",
- "insulatin",
- "insultin",
- "insurin",
- "integratin",
- "intendin",
- "intensifyin",
- "interactin",
- "interceptin",
- "interestin",
- "interfacin",
- "interferin",
- "interlacin",
- "interlockin",
- "interminglin",
- "internalizin",
- "interposin",
- "interpretin",
- "interrogatin",
- "interruptin",
- "intersectin",
- "intertwinin",
- "intervenin",
- "interviewin",
- "interweavin",
- "intherestin",
- "intimatin",
- "intimidatin",
- "intoxicatin",
- "intriguin",
- "introducin",
- "intrudin",
- "invadin",
- "inventin",
- "invertin",
- "investigatin",
- "investin",
- "invigoratin",
- "invitin",
- "invokin",
- "involvin",
- "ionizin",
- "ironin",
- "irrigatin",
- "irritatin",
- "isolatin",
- "issuin",
- "itchin",
- "jabberin",
- "jabbin",
- "jammin",
- "janglin",
- "jarrin",
- "jeerin",
- "jeopardizin",
- "jestin",
- "jinglin",
- "jobbin",
- "jockeyin",
- "joggin",
- "joinin",
- "jokin",
- "joltin",
- "jostlin",
- "jottin",
- "journalin",
- "journeyin",
- "joultin",
- "joustin",
- "judgin",
- "jugglin",
- "jumpin",
- "justifyin",
- "juttin",
- "juxtaposin",
- "kayakin",
- "keatin",
- "keelin",
- "keenin",
- "keepin",
- "keesin",
- "kellin",
- "kesselrin",
- "ketterin",
- "keyin",
- "kickin",
- "kiddin",
- "kidnappin",
- "killin",
- "kilwinnin",
- "kindlin",
- "kinkin",
- "kiplin",
- "kissin",
- "kneadin",
- "kneelin",
- "knittin",
- "knockin",
- "knottin",
- "knowin",
- "konin",
- "krigin",
- "kunmin",
- "labelin",
- "labellin",
- "laborin",
- "labourin",
- "lacin",
- "lackin",
- "lactatin",
- "ladin",
- "laggin",
- "lamentin",
- "landholdin",
- "landin",
- "landscapin",
- "languishin",
- "lansin",
- "lappin",
- "lapsin",
- "lashin",
- "lastin",
- "laughin",
- "launchin",
- "launderin",
- "lawmakin",
- "layerin",
- "layin",
- "leachin",
- "leadin",
- "leakin",
- "leanin",
- "leapin",
- "learnin",
- "leasin",
- "leavin",
- "lecturin",
- "leerin",
- "legislatin",
- "legitimatin",
- "legitimizin",
- "lendin",
- "lengthenin",
- "lessenin",
- "lessin",
- "letterin",
- "lettin",
- "levelin",
- "levellin",
- "leveragin",
- "levyin",
- "liberalizin",
- "liberatin",
- "licensin",
- "lickin",
- "lifesavin",
- "liftin",
- "lightenin",
- "lightin",
- "lightnin",
- "likin",
- "liltin",
- "limitin",
- "limpin",
- "lingerin",
- "linin",
- "linkin",
- "liquidatin",
- "listenin",
- "listin",
- "listnin",
- "litterin",
- "livin",
- "loadin",
- "loafin",
- "loathin",
- "lobbyin",
- "localizin",
- "locatin",
- "lockin",
- "lodgin",
- "loggin",
- "loiterin",
- "lollin",
- "longin",
- "longstandin",
- "lookin",
- "loomin",
- "loopin",
- "loosenin",
- "loosin",
- "lootin",
- "losin",
- "loungin",
- "lovemakin",
- "lovin",
- "lowerin",
- "lowin",
- "lubricatin",
- "luggin",
- "lullin",
- "lumberin",
- "lunchin",
- "lurchin",
- "lurin",
- "lurkin",
- "lyin",
- "lynchin",
- "machinin",
- "maddenin",
- "magnetizin",
- "magnifyin",
- "mailin",
- "maimin",
- "mainsprin",
- "mainstreamin",
- "maintainin",
- "mainwarin",
- "majorin",
- "makin",
- "malfunctionin",
- "malingerin",
- "managin",
- "mandatin",
- "maneuverin",
- "manifestin",
- "manipulatin",
- "mannerin",
- "mannin",
- "manoeuvrin",
- "manufacturin",
- "manurin",
- "mappin",
- "maraudin",
- "marchin",
- "marketin",
- "markin",
- "marryin",
- "marshallin",
- "marvelin",
- "marvellin",
- "maskin",
- "masqueradin",
- "massagin",
- "massin",
- "masterin",
- "masturbatin",
- "matchin",
- "matchmakin",
- "matin",
- "mattin",
- "maturin",
- "maximisin",
- "maximizin",
- "meanderin",
- "meanin",
- "measurin",
- "meddlin",
- "mediatin",
- "meditatin",
- "meetin",
- "meldin",
- "meltin",
- "memorizin",
- "menacin",
- "mendin",
- "mentionin",
- "mentorin",
- "merchandisin",
- "mergin",
- "meshin",
- "mesmerizin",
- "messagin",
- "messin",
- "meterin",
- "middlin",
- "midmornin",
- "migratin",
- "milkin",
- "millin",
- "mimickin",
- "mincin",
- "mindin",
- "minglin",
- "minimisin",
- "minimizin",
- "minin",
- "ministerin",
- "mirrorin",
- "misbehavin",
- "misgivin",
- "misleadin",
- "misreadin",
- "misrepresentin",
- "missin",
- "mistakin",
- "misunderstandin",
- "mitigatin",
- "mixin",
- "moanin",
- "mobilisin",
- "mobilizin",
- "mockin",
- "modelin",
- "modellin",
- "moderatin",
- "modernizin",
- "modifyin",
- "modulatin",
- "moistenin",
- "moldin",
- "molestin",
- "monitorin",
- "monopolizin",
- "moorin",
- "mopin",
- "moppin",
- "moralizin",
- "mornin",
- "morphin",
- "mortifyin",
- "motherin",
- "motionin",
- "motivatin",
- "motorin",
- "moulderin",
- "mouldin",
- "mountaineerin",
- "mountin",
- "mournin",
- "mouthin",
- "movin",
- "mowin",
- "mullin",
- "multiplexin",
- "multiplyin",
- "multitaskin",
- "mumblin",
- "munchin",
- "murderin",
- "murmurin",
- "murtherin",
- "musin",
- "musterin",
- "mutterin",
- "mystifyin",
- "naethin",
- "naggin",
- "nailin",
- "namin",
- "namsayin",
- "nankin",
- "nappin",
- "narratin",
- "narrowin",
- "nauseatin",
- "nuffin",
- "navigatin",
- "nearin",
- "necessitatin",
- "necrotizin",
- "needin",
- "needlin",
- "negatin",
- "neglectin",
- "negotiatin",
- "neighborin",
- "neighbourin",
- "neighin",
- "nestin",
- "nestlin",
- "nettin",
- "networkin",
- "neuroimagin",
- "neutralizin",
- "nibblin",
- "nippin",
- "noddin",
- "nominatin",
- "nonconformin",
- "nonlivin",
- "nonthreatenin",
- "normalizin",
- "nosin",
- "nothin",
- "noticin",
- "notifyin",
- "notin",
- "nottin",
- "notwithstandin",
- "nourishin",
- "nudgin",
- "numberin",
- "numbin",
- "nursin",
- "nurturin",
- "obeyin",
- "objectifyin",
- "objectin",
- "obligin",
- "obliteratin",
- "obscurin",
- "observin",
- "obsessin",
- "obstructin",
- "obtainin",
- "obviatin",
- "occasionin",
- "occludin",
- "occupyin",
- "occurin",
- "occurrin",
- "offendin",
- "offerin",
- "officiatin",
- "offin",
- "offsettin",
- "offshorin",
- "offsprin",
- "oglin",
- "oilin",
- "omittin",
- "oncomin",
- "ongoin",
- "oozin",
- "openin",
- "operatin",
- "opposin",
- "oppressin",
- "optimisin",
- "optimizin",
- "optin",
- "orbitin",
- "orchestratin",
- "ordainin",
- "orderin",
- "ordinatin",
- "organisin",
- "organizin",
- "orientin",
- "originatin",
- "ornamentin",
- "oscillatin",
- "otherin",
- "oustin",
- "outcroppin",
- "outfittin",
- "outgoin",
- "outin",
- "outlawin",
- "outlinin",
- "outlyin",
- "outnumberin",
- "outpourin",
- "outsourcin",
- "outstandin",
- "outstrippin",
- "overarchin",
- "overbearin",
- "overcomin",
- "overcrowdin",
- "overdoin",
- "overeatin",
- "overestimatin",
- "overexpressin",
- "overfishin",
- "overflowin",
- "overgrazin",
- "overhangin",
- "overhaulin",
- "overhearin",
- "overheatin",
- "overlappin",
- "overlayin",
- "overloadin",
- "overlookin",
- "overlyin",
- "overmasterin",
- "overpowerin",
- "overreachin",
- "overreactin",
- "overridin",
- "overrulin",
- "overrunnin",
- "overseein",
- "overshadowin",
- "overstatin",
- "oversteppin",
- "overtakin",
- "overthrowin",
- "overtrainin",
- "overturnin",
- "overweenin",
- "overwhelmin",
- "overwritin",
- "owin",
- "ownin",
- "oxidisin",
- "oxidizin",
- "pacifyin",
- "pacin",
- "packagin",
- "packin",
- "paddin",
- "paddlin",
- "pagin",
- "painstakin",
- "paintin",
- "pairin",
- "palin",
- "palpitatin",
- "pamperin",
- "panderin",
- "panelin",
- "panellin",
- "panickin",
- "pannin",
- "pantin",
- "paradin",
- "parallelin",
- "paralysin",
- "paralyzin",
- "paraphrasin",
- "pardonin",
- "parentin",
- "parin",
- "parkin",
- "parsin",
- "partakin",
- "participatin",
- "partin",
- "partitionin",
- "partnerin",
- "partyin",
- "passin",
- "pastin",
- "patchin",
- "patentin",
- "patrollin",
- "patronisin",
- "patronizin",
- "patterin",
- "patternin",
- "pausin",
- "pavin",
- "pawin",
- "payin",
- "peacebuildin",
- "peacekeepin",
- "peacemakin",
- "peakin",
- "pealin",
- "peckin",
- "peddlin",
- "peekin",
- "peelin",
- "peepin",
- "peerin",
- "pekin",
- "peltin",
- "pendin",
- "penetratin",
- "pennin",
- "peoplin",
- "perceivin",
- "perchin",
- "percolatin",
- "perfectin",
- "perforatin",
- "performin",
- "perishin",
- "permeatin",
- "permittin",
- "perpetratin",
- "perpetuatin",
- "perplexin",
- "persecutin",
- "perseverin",
- "pershin",
- "persistin",
- "perspirin",
- "persuadin",
- "pertainin",
- "perusin",
- "pervadin",
- "pervertin",
- "pesterin",
- "petitionin",
- "pettin",
- "phasin",
- "philosophizin",
- "phishin",
- "phonin",
- "photocopyin",
- "photographin",
- "phrasin",
- "pickerin",
- "picketin",
- "pickin",
- "picklin",
- "picturin",
- "piecin",
- "piercin",
- "pilferin",
- "pilin",
- "pillagin",
- "pilotin",
- "pinchin",
- "pinin",
- "pinnin",
- "pioneerin",
- "pipin",
- "pissin",
- "pitchin",
- "pittin",
- "pityin",
- "pivotin",
- "placin",
- "plaguin",
- "planin",
- "plankin",
- "plannin",
- "plantin",
- "plasterin",
- "platin",
- "playin",
- "playthin",
- "pleadin",
- "pleasin",
- "pledgin",
- "ploddin",
- "plottin",
- "ploughin",
- "plowin",
- "pluckin",
- "pluggin",
- "plumbin",
- "plummetin",
- "plunderin",
- "plungin",
- "plyin",
- "poachin",
- "pocketin",
- "podcastin",
- "pointin",
- "poisonin",
- "pokin",
- "polarizin",
- "poldin",
- "policin",
- "policymakin",
- "polishin",
- "pollin",
- "pollutin",
- "ponderin",
- "poolin",
- "poppin",
- "populatin",
- "porin",
- "portrayin",
- "posin",
- "positin",
- "positionin",
- "possessin",
- "postin",
- "postponin",
- "postulatin",
- "posturin",
- "pottin",
- "poundin",
- "pourin",
- "poutin",
- "powerin",
- "practicin",
- "practisin",
- "praisin",
- "prancin",
- "prayin",
- "preachin",
- "precedin",
- "precipitatin",
- "precludin",
- "predictin",
- "predisposin",
- "predominatin",
- "preexistin",
- "preferrin",
- "preparin",
- "preponderatin",
- "prepossessin",
- "preprocessin",
- "prescribin",
- "presentin",
- "preservin",
- "presidin",
- "pressin",
- "pressurin",
- "presumin",
- "presupposin",
- "pretendin",
- "prevailin",
- "preventin",
- "preyin",
- "pricin",
- "prickin",
- "primin",
- "printin",
- "prioritizin",
- "privateerin",
- "privilegin",
- "probin",
- "problemsolvin",
- "proceedin",
- "processin",
- "proclaimin",
- "procurin",
- "proddin",
- "producin",
- "professin",
- "profilin",
- "profiteerin",
- "profitin",
- "programmin",
- "progressin",
- "prohibitin",
- "projectin",
- "proliferatin",
- "prolongin",
- "promisin",
- "promotin",
- "promptin",
- "promulgatin",
- "pronouncin",
- "proofin",
- "proofreadin",
- "propagatin",
- "propellin",
- "prophesyin",
- "proposin",
- "proppin",
- "prosecutin",
- "proselytizin",
- "prospectin",
- "prosperin",
- "prostratin",
- "protectin",
- "protestin",
- "prototypin",
- "protrudin",
- "providin",
- "provin",
- "provisionin",
- "provokin",
- "prowlin",
- "prunin",
- "pryin",
- "publicizin",
- "publishin",
- "puddin",
- "pullin",
- "pulsatin",
- "pulsin",
- "pumpin",
- "punchin",
- "punishin",
- "punnin",
- "purchasin",
- "purgin",
- "purifyin",
- "purportin",
- "purrin",
- "pursuin",
- "pushin",
- "puttin",
- "puzzlin",
- "quakin",
- "qualifyin",
- "quantifyin",
- "quarrelin",
- "quarrellin",
- "quarryin",
- "quarterin",
- "quaverin",
- "quellin",
- "quenchin",
- "queryin",
- "questin",
- "questionin",
- "queuein",
- "queuin",
- "quickenin",
- "quietin",
- "quiltin",
- "quittin",
- "quiverin",
- "quizzin",
- "quotin",
- "racin",
- "rackin",
- "radiatin",
- "raftin",
- "ragin",
- "raidin",
- "railin",
- "rainin",
- "rakin",
- "rallyin",
- "ramblin",
- "rammin",
- "ranchin",
- "rangin",
- "rantin",
- "rapin",
- "rappin",
- "raspin",
- "ratifyin",
- "ratin",
- "rationalizin",
- "rationin",
- "rattlin",
- "ravagin",
- "ravin",
- "ravishin",
- "reachin",
- "reactin",
- "readin",
- "reaffirmin",
- "realisin",
- "realizin",
- "reapin",
- "reappearin",
- "rearin",
- "rearrangin",
- "reasonin",
- "reassurin",
- "rebellin",
- "rebuildin",
- "rebukin",
- "recallin",
- "recastin",
- "recedin",
- "receivin",
- "reciprocatin",
- "recitin",
- "reckonin",
- "reclaimin",
- "reclinin",
- "recognisin",
- "recognizin",
- "recollectin",
- "recommendin",
- "reconcilin",
- "reconstructin",
- "recordin",
- "recountin",
- "recoverin",
- "recreatin",
- "recruitin",
- "rectifyin",
- "recurrin",
- "recyclin",
- "reddenin",
- "reddin",
- "redeemin",
- "redefinin",
- "redirectin",
- "redistrictin",
- "reducin",
- "reekin",
- "reelin",
- "reengineerin",
- "refactorin",
- "referencin",
- "referrin",
- "refinancin",
- "refinin",
- "reflectin",
- "reformin",
- "refractin",
- "refrainin",
- "reframin",
- "refreshin",
- "refuelin",
- "refusin",
- "refutin",
- "regainin",
- "regardin",
- "regeneratin",
- "registerin",
- "regrettin",
- "regulatin",
- "rehashin",
- "rehearin",
- "rehearsin",
- "reignin",
- "reinforcin",
- "reinventin",
- "reiteratin",
- "rejectin",
- "rejoicin",
- "rejoinin",
- "relapsin",
- "relatin",
- "relaxin",
- "relayin",
- "releasin",
- "relievin",
- "relinquishin",
- "relishin",
- "relivin",
- "reloadin",
- "relocatin",
- "relyin",
- "remainin",
- "remakin",
- "remarkin",
- "remedyin",
- "rememberin",
- "remindin",
- "reminiscin",
- "remittin",
- "remodelin",
- "remodellin",
- "removin",
- "renamin",
- "renderin",
- "rendin",
- "renewin",
- "renouncin",
- "renovatin",
- "rentin",
- "reopenin",
- "reorderin",
- "reorganizin",
- "repairin",
- "repayin",
- "repealin",
- "repeatin",
- "repellin",
- "repentin",
- "replacin",
- "replenishin",
- "replicatin",
- "replyin",
- "reportin",
- "reposin",
- "repositionin",
- "representin",
- "repressin",
- "reprintin",
- "reproachin",
- "reprocessin",
- "reproducin",
- "reprovin",
- "repudiatin",
- "requestin",
- "requirin",
- "rereadin",
- "rescuin",
- "researchin",
- "resemblin",
- "resentin",
- "reservin",
- "resettin",
- "reshapin",
- "residin",
- "resignin",
- "resistin",
- "resolvin",
- "resonatin",
- "resortin",
- "resoundin",
- "respectin",
- "respondin",
- "restin",
- "restorin",
- "restrainin",
- "restrictin",
- "restructurin",
- "resultin",
- "resumin",
- "resurfacin",
- "retailin",
- "retainin",
- "retardin",
- "retellin",
- "rethinkin",
- "retirin",
- "retracin",
- "retrainin",
- "retreatin",
- "retrievin",
- "returnin",
- "reusin",
- "revealin",
- "revellin",
- "reverberatin",
- "reversin",
- "revertin",
- "reviewin",
- "revisin",
- "revisitin",
- "revivin",
- "revoltin",
- "revolvin",
- "rewardin",
- "reworkin",
- "rewritin",
- "rhymin",
- "riddin",
- "ridiculin",
- "ridin",
- "riggin",
- "rightin",
- "ringin",
- "rinsin",
- "riotin",
- "ripenin",
- "rippin",
- "ripplin",
- "risin",
- "riskin",
- "rivetin",
- "roamin",
- "roarin",
- "roastin",
- "rockin",
- "rollickin",
- "rollin",
- "rompin",
- "roofin",
- "roomin",
- "roostin",
- "rootin",
- "rotatin",
- "rottin",
- "roughin",
- "roundin",
- "rousin",
- "routin",
- "rovin",
- "rowin",
- "rowlin",
- "rubbin",
- "rufflin",
- "ruinin",
- "rulemakin",
- "rulin",
- "rumblin",
- "ruminatin",
- "rummagin",
- "runnin",
- "rushin",
- "rustin",
- "rustlin",
- "rythin",
- "sackin",
- "sacrificin",
- "safeguardin",
- "safekeepin",
- "saggin",
- "sailin",
- "salutin",
- "samplin",
- "sanctifyin",
- "sanctionin",
- "saplin",
- "satisfyin",
- "saunterin",
- "savin",
- "savorin",
- "sawin",
- "sayin",
- "scaffoldin",
- "scaldin",
- "scalin",
- "scamperin",
- "scannin",
- "scarin",
- "scarrin",
- "scathin",
- "scatterin",
- "scavengin",
- "schedulin",
- "schellin",
- "schemin",
- "schillin",
- "schoolin",
- "scoldin",
- "scoopin",
- "scorchin",
- "scorin",
- "scourin",
- "scoutin",
- "scowlin",
- "scramblin",
- "scrapin",
- "scratchin",
- "screamin",
- "screechin",
- "screenin",
- "screwin",
- "scribblin",
- "scriptin",
- "scrollin",
- "scrubbin",
- "scrutinizin",
- "scurryin",
- "seafarin",
- "sealin",
- "searchin",
- "searin",
- "seasonin",
- "seatin",
- "secretin",
- "securin",
- "seducin",
- "seedin",
- "seedlin",
- "seein",
- "seekin",
- "seemin",
- "seepin",
- "seethin",
- "seizin",
- "selectin",
- "sellin",
- "sendin",
- "sensin",
- "sentencin",
- "separatin",
- "sequencin",
- "servicin",
- "servin",
- "settin",
- "settlin",
- "sewin",
- "shadin",
- "shadowin",
- "shakin",
- "shamin",
- "shapin",
- "sharin",
- "sharkin",
- "sharpenin",
- "shatterin",
- "shavin",
- "shearin",
- "sheathin",
- "sheddin",
- "sheetin",
- "shellin",
- "shelterin",
- "shelvin",
- "shewin",
- "shieldin",
- "shiftin",
- "shillin",
- "shimmerin",
- "shinin",
- "shipbuildin",
- "shippin",
- "shirkin",
- "shittin",
- "shiverin",
- "shockin",
- "shootin",
- "shoppin",
- "shortcomin",
- "shortenin",
- "shoulderin",
- "shoutin",
- "shovin",
- "showerin",
- "showin",
- "shriekin",
- "shrinkin",
- "shruggin",
- "shudderin",
- "shufflin",
- "shuntin",
- "shuttin",
- "siblin",
- "sickenin",
- "sidin",
- "siftin",
- "sighin",
- "sightin",
- "sightseein",
- "signalin",
- "signallin",
- "signifyin",
- "signin",
- "silencin",
- "simmerin",
- "simplifyin",
- "simulatin",
- "singin",
- "sinkin",
- "sinnin",
- "sippin",
- "sitin",
- "sittin",
- "situatin",
- "sizin",
- "sizzlin",
- "skatin",
- "sketchin",
- "skiin",
- "skimmin",
- "skinnin",
- "skippin",
- "skirmishin",
- "skirtin",
- "skulkin",
- "slammin",
- "slantin",
- "slappin",
- "slashin",
- "slaughterin",
- "slaveholdin",
- "slayin",
- "sleepin",
- "slicin",
- "slidin",
- "slippin",
- "slopin",
- "slowin",
- "slumberin",
- "smackin",
- "smartin",
- "smashin",
- "smatterin",
- "smellin",
- "smeltin",
- "smilin",
- "smokin",
- "smolderin",
- "smoothin",
- "smotherin",
- "smoulderin",
- "smugglin",
- "snappin",
- "snarlin",
- "snatchin",
- "sneakin",
- "sneerin",
- "sneezin",
- "sniffin",
- "snoopin",
- "snorin",
- "snortin",
- "snowin",
- "soakin",
- "soarin",
- "sobbin",
- "soberin",
- "socializin",
- "softenin",
- "solderin",
- "soldierin",
- "solicitin",
- "solvin",
- "somethin",
- "soothin",
- "sorrowin",
- "sortin",
- "soundin",
- "sourcin",
- "sowin",
- "spacin",
- "spaldin",
- "spankin",
- "spannin",
- "sparin",
- "sparkin",
- "sparklin",
- "sparrin",
- "spauldin",
- "spawnin",
- "speakin",
- "specializin",
- "specifyin",
- "speculatin",
- "speedin",
- "spellin",
- "spendin",
- "spewin",
- "spillin",
- "spinnin",
- "spiralin",
- "spittin",
- "splashin",
- "splicin",
- "splinterin",
- "splittin",
- "spoilin",
- "sponsorin",
- "sportin",
- "spottin",
- "spoutin",
- "sprawlin",
- "sprayin",
- "spreadin",
- "springin",
- "sprinklin",
- "sproutin",
- "spurrin",
- "sputterin",
- "spyin",
- "squabblin",
- "squarin",
- "squattin",
- "squeakin",
- "squealin",
- "squeezin",
- "squintin",
- "squirmin",
- "stabbin",
- "stabilizin",
- "stackin",
- "staffin",
- "staggerin",
- "stagin",
- "stainin",
- "stakin",
- "stalkin",
- "stallin",
- "stammerin",
- "stampin",
- "standin",
- "starin",
- "starrin",
- "startin",
- "startlin",
- "starvin",
- "statin",
- "stayin",
- "steadyin",
- "stealin",
- "steamin",
- "steerin",
- "stemmin",
- "stentin",
- "steppin",
- "stereotypin",
- "sterlin",
- "stickin",
- "stiffenin",
- "stiflin",
- "stimulatin",
- "stingin",
- "stinkin",
- "stipulatin",
- "stirlin",
- "stirrin",
- "stitchin",
- "stockin",
- "stompin",
- "stoopin",
- "stoppin",
- "storin",
- "stormin",
- "storytellin",
- "straddlin",
- "stragglin",
- "straightenin",
- "strainin",
- "stranglin",
- "strappin",
- "strayin",
- "streamin",
- "streamlinin",
- "strengthenin",
- "stressin",
- "stretchin",
- "stridin",
- "strikin",
- "stringin",
- "strippin",
- "strivin",
- "strokin",
- "strollin",
- "structurin",
- "strugglin",
- "struttin",
- "studyin",
- "stuffin",
- "stumblin",
- "stunnin",
- "stutterin",
- "stylin",
- "subduin",
- "subjectin",
- "submittin",
- "subordinatin",
- "subscribin",
- "subsidin",
- "subsistin",
- "substitutin",
- "subtractin",
- "subvertin",
- "succeedin",
- "succumbin",
- "suckin",
- "sucklin",
- "sufferin",
- "suffocatin",
- "suggestin",
- "suin",
- "summarizin",
- "summin",
- "summonin",
- "superconductin",
- "superintendin",
- "supersedin",
- "supervisin",
- "supplementin",
- "supplyin",
- "supportin",
- "supposin",
- "suppressin",
- "surfacin",
- "surfin",
- "surgin",
- "surmountin",
- "surpassin",
- "surprisin",
- "surrenderin",
- "surroundin",
- "surveyin",
- "survivin",
- "suspectin",
- "suspendin",
- "sustainin",
- "swaggerin",
- "swallowin",
- "swappin",
- "swarmin",
- "swayin",
- "swearin",
- "sweatin",
- "sweepin",
- "swellin",
- "swelterin",
- "swimmin",
- "swingin",
- "swirlin",
- "switchin",
- "swoopin",
- "symbolizin",
- "sympathizin",
- "synthesizin",
- "tackin",
- "tacklin",
- "taggin",
- "tailin",
- "tailorin",
- "takin",
- "talkin",
- "tamin",
- "tamperin",
- "tanglin",
- "tantalisin",
- "tantalizin",
- "taperin",
- "tapin",
- "tappin",
- "targetin",
- "tarlin",
- "tarryin",
- "taskin",
- "tastin",
- "tattooin",
- "tatterin",
- "tauntin",
- "taxin",
- "teachin",
- "teamin",
- "tearin",
- "teasin",
- "teemin",
- "teeterin",
- "teethin",
- "telecommutin",
- "telegraphin",
- "telemarketin",
- "telephonin",
- "tellin",
- "temperin",
- "temptin",
- "tenderin",
- "tendin",
- "tensin",
- "terminatin",
- "terrifyin",
- "terrorizin",
- "testifyin",
- "testin",
- "tetherin",
- "textin",
- "thankin",
- "thanksgivin",
- "thawin",
- "theorisin",
- "theorizin",
- "thickenin",
- "thievin",
- "thinkin",
- "thinnin",
- "thirstin",
- "thoroughgoin",
- "thrampin",
- "thrashin",
- "threadin",
- "threatenin",
- "threshin",
- "thresholdin",
- "thrillin",
- "thrivin",
- "throbbin",
- "throngin",
- "throttlin",
- "throwin",
- "thrustin",
- "thuddin",
- "thumbin",
- "thumpin",
- "thunderin",
- "thwartin",
- "tickin",
- "ticklin",
- "tidyin",
- "tightenin",
- "tilin",
- "tillin",
- "tiltin",
- "timin",
- "tinglin",
- "tinkerin",
- "tinklin",
- "tintin",
- "tippin",
- "tirin",
- "tithin",
- "titillatin",
- "toastin",
- "toilin",
- "toleratin",
- "tollin",
- "toolin",
- "toppin",
- "topplin",
- "tormentin",
- "torturin",
- "tossin",
- "tostrin",
- "totalin",
- "totalizin",
- "totallin",
- "totin",
- "totterin",
- "touchin",
- "tourin",
- "toutin",
- "towerin",
- "towin",
- "toyin",
- "tracin",
- "trackin",
- "tradin",
- "traffickin",
- "trailin",
- "trainin",
- "trampin",
- "tramplin",
- "transactin",
- "transcendin",
- "transcribin",
- "transferrin",
- "transformin",
- "transgressin",
- "transitionin",
- "translatin",
- "transmittin",
- "transpirin",
- "transplantin",
- "transportin",
- "transposin",
- "trappin",
- "travelin",
- "travellin",
- "traversin",
- "trawlin",
- "treadin",
- "treatin",
- "trekkin",
- "tremblin",
- "trenchin",
- "trendin",
- "trespassin",
- "trickin",
- "tricklin",
- "triflin",
- "triggerin",
- "trillin",
- "trimmin",
- "trippin",
- "triumphin",
- "trollin",
- "troopin",
- "trottin",
- "troubleshootin",
- "troublin",
- "truckin",
- "trudgin",
- "trumpetin",
- "trustin",
- "tryin",
- "tubin",
- "tuckin",
- "tuggin",
- "tulvin",
- "tumblin",
- "tunin",
- "tunnelin",
- "tunnellin",
- "turnin",
- "tutorin",
- "tweakin",
- "twinin",
- "twinklin",
- "twinnin",
- "twirlin",
- "twistin",
- "twitchin",
- "twitterin",
- "tyin",
- "typesettin",
- "typewritin",
- "typin",
- "unappealin",
- "unassumin",
- "unavailin",
- "unbecomin",
- "unbelievin",
- "unbendin",
- "unblinkin",
- "uncarin",
- "unceasin",
- "unchangin",
- "uncomplainin",
- "uncomprehendin",
- "uncompromisin",
- "unconvincin",
- "uncoverin",
- "undercuttin",
- "underestimatin",
- "undergoin",
- "underlinin",
- "underlyin",
- "underminin",
- "underpinnin",
- "underscorin",
- "understandin",
- "undertakin",
- "underwritin",
- "undeservin",
- "undeviatin",
- "undoin",
- "undressin",
- "undulatin",
- "undyin",
- "unearthin",
- "unendin",
- "unerrin",
- "unfailin",
- "unfalterin",
- "unfeelin",
- "unflaggin",
- "unflatterin",
- "unflinchin",
- "unfoldin",
- "unforgivin",
- "unheedin",
- "unhesitatin",
- "unifyin",
- "uninterestin",
- "uninvitin",
- "unitin",
- "universalizin",
- "unknowin",
- "unleashin",
- "unloadin",
- "unlockin",
- "unlovin",
- "unmaskin",
- "unmeanin",
- "unmovin",
- "unnervin",
- "unoffendin",
- "unpackin",
- "unpleasin",
- "unpretendin",
- "unpromisin",
- "unquestionin",
- "unravelin",
- "unravellin",
- "unreasonin",
- "unrelentin",
- "unremittin",
- "unresistin",
- "unrollin",
- "unsatisfyin",
- "unseein",
- "unsettlin",
- "unsmilin",
- "unsparin",
- "unsurprisin",
- "unsuspectin",
- "unswervin",
- "unthinkin",
- "untirin",
- "untyin",
- "unvaryin",
- "unveilin",
- "unwaverin",
- "unwillin",
- "unwindin",
- "unwittin",
- "unyieldin",
- "upbraidin",
- "upbringin",
- "upbuildin",
- "upcomin",
- "updatin",
- "upgradin",
- "upholdin",
- "upliftin",
- "uploadin",
- "uprisin",
- "uprootin",
- "upsettin",
- "upstandin",
- "upswin",
- "upwellin",
- "urgin",
- "urinatin",
- "usherin",
- "usin",
- "usurpin",
- "utilisin",
- "utilizin",
- "utterin",
- "vacatin",
- "vacationin",
- "vacillatin",
- "vacuumin",
- "validatin",
- "valuin",
- "vanishin",
- "vanquishin",
- "varyin",
- "vaultin",
- "vauntin",
- "veerin",
- "veilin",
- "vendin",
- "ventilatin",
- "ventin",
- "venturin",
- "vergin",
- "verifyin",
- "versionin",
- "vestin",
- "vettin",
- "vexin",
- "vibratin",
- "victuallin",
- "viewin",
- "vikin",
- "vindicatin",
- "vinin",
- "violatin",
- "visionin",
- "visitin",
- "visualizin",
- "vitalizin",
- "vivifyin",
- "voicin",
- "voidin",
- "volunteerin",
- "vomitin",
- "votin",
- "vowin",
- "voyagin",
- "vyin",
- "wadin",
- "waftin",
- "waggin",
- "wagin",
- "wailin",
- "waitin",
- "waivin",
- "wakin",
- "walkin",
- "wallowin",
- "wanderin",
- "wanin",
- "wantin",
- "warblin",
- "wardin",
- "warehousin",
- "warin",
- "warmin",
- "warnin",
- "warpin",
- "warrin",
- "washin",
- "wastin",
- "watchin",
- "waterin",
- "watermarkin",
- "waverin",
- "wavin",
- "waxin",
- "weakenin",
- "weaklin",
- "weanin",
- "wearin",
- "wearyin",
- "weatherin",
- "weavin",
- "webbin",
- "weddin",
- "weedin",
- "weepin",
- "weighin",
- "weightin",
- "welcomin",
- "weldin",
- "wellbein",
- "wellin",
- "wettin",
- "whackin",
- "whalin",
- "wheelin",
- "wheezin",
- "whimperin",
- "whinin",
- "whippin",
- "whirlin",
- "whirrin",
- "whisperin",
- "whistlin",
- "whitenin",
- "whitin",
- "whizzin",
- "whoopin",
- "whoppin",
- "widenin",
- "wieldin",
- "wildin",
- "willin",
- "wincin",
- "windin",
- "winkin",
- "winnin",
- "winterin",
- "wipin",
- "wirin",
- "wishin",
- "withdrawin",
- "witherin",
- "withholdin",
- "withstandin",
- "witnessin",
- "wonderin",
- "wooin",
- "wordin",
- "workin",
- "worryin",
- "worsenin",
- "worshippin",
- "woundin",
- "wranglin",
- "wrappin",
- "wreckin",
- "wrenchin",
- "wrestlin",
- "wrigglin",
- "wringin",
- "wrinklin",
- "writhin",
- "writin",
- "wrongdoin",
- "yachtin",
- "yankin",
- "yappin",
- "yawnin",
- "yearlin",
- "yearnin",
- "yellin",
- "yellowin",
- "yelpin",
- "yieldin",
- "zaggin",
- "zeroin",
- "zigzaggin",
- "zippin",
- "zonin",
- "zoomin"
- );
-}
src/main/java/com/whitemagicsoftware/keenquotes/Converter.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.function.Consumer;
-import java.util.function.Function;
-
-import static com.whitemagicsoftware.keenquotes.TokenType.*;
-import static java.util.Collections.sort;
-import static java.util.Map.*;
-
-/**
- * Responsible for converting curly quotes to HTML entities throughout a
- * text string.
- */
-@SuppressWarnings( "unused" )
-public class Converter implements Function<String, String> {
- public static final Map<TokenType, String> ENTITIES = ofEntries(
- entry( QUOTE_OPENING_SINGLE, "&lsquo;" ),
- entry( QUOTE_CLOSING_SINGLE, "&rsquo;" ),
- entry( QUOTE_OPENING_DOUBLE, "&ldquo;" ),
- entry( QUOTE_CLOSING_DOUBLE, "&rdquo;" ),
- entry( QUOTE_STRAIGHT_SINGLE, "'" ),
- entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
- entry( QUOTE_APOSTROPHE, "&apos;" ),
- entry( QUOTE_PRIME_SINGLE, "&prime;" ),
- entry( QUOTE_PRIME_DOUBLE, "&Prime;" ),
- entry( QUOTE_PRIME_TRIPLE, "&tprime;" ),
- entry( QUOTE_PRIME_QUADRUPLE, "&qprime;" )
- );
-
- /**
- * Used by external applications to initialize the replacement map.
- */
- public static final Map<TokenType, String> CHARS = ofEntries(
- entry( QUOTE_OPENING_SINGLE, "‘" ),
- entry( QUOTE_CLOSING_SINGLE, "’" ),
- entry( QUOTE_OPENING_DOUBLE, "“" ),
- entry( QUOTE_CLOSING_DOUBLE, "”" ),
- entry( QUOTE_STRAIGHT_SINGLE, "'" ),
- entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
- entry( QUOTE_APOSTROPHE, "’" ),
- entry( QUOTE_PRIME_SINGLE, "′" ),
- entry( QUOTE_PRIME_DOUBLE, "″" ),
- entry( QUOTE_PRIME_TRIPLE, "‴" ),
- entry( QUOTE_PRIME_QUADRUPLE, "⁗" )
- );
-
- private final Consumer<Lexeme> mUnresolved;
- private final Contractions mContractions;
- private final Map<TokenType, String> mReplacements;
- private final ParserType mParserType;
-
- /**
- * Maps quotes to HTML entities.
- *
- * @param unresolved Consumes {@link Lexeme}s that could not be converted
- * into HTML entities.
- * @param parserType Creates a parser based on document content structure.
- */
- public Converter(
- final Consumer<Lexeme> unresolved, final ParserType parserType ) {
- this( unresolved, new Contractions.Builder().build(), parserType );
- }
-
- /**
- * Maps quotes to HTML entities.
- *
- * @param unresolved Consumes {@link Lexeme}s that could not be converted
- * into HTML entities.
- * @param parserType Creates a parser based on document content structure.
- */
- public Converter(
- final Consumer<Lexeme> unresolved,
- final Map<TokenType, String> replacements,
- final ParserType parserType ) {
- this(
- unresolved, new Contractions.Builder().build(), replacements, parserType
- );
- }
-
- /**
- * Maps quotes to HTML entities.
- *
- * @param unresolved Consumes {@link Lexeme}s that could not be converted
- * into HTML entities.
- * @param c Contractions listings.
- * @param parserType Creates a parser based on document content structure.
- */
- public Converter(
- final Consumer<Lexeme> unresolved,
- final Contractions c,
- final ParserType parserType ) {
- this( unresolved, c, ENTITIES, parserType );
- }
-
- /**
- * Maps quotes to curled equivalents.
- *
- * @param unresolved Consumes {@link Lexeme}s that could not be converted
- * into HTML entities.
- * @param c Contractions listings.
- * @param replacements Map of recognized quotes to output types (entity or
- * Unicode character).
- */
- public Converter(
- final Consumer<Lexeme> unresolved,
- final Contractions c,
- final Map<TokenType, String> replacements,
- final ParserType parserType ) {
- mUnresolved = unresolved;
- mContractions = c;
- mReplacements = replacements;
- mParserType = parserType;
- }
-
- /**
- * Converts straight quotes to curly quotes and primes. Any quotation marks
- * that cannot be converted are passed to the {@link Consumer}. This method
- * is re-entrant, but not tested to be thread-safe.
- *
- * @param text The text to parse.
- * @return The given text string with as many straight quotes converted to
- * curly quotes as is feasible.
- */
- @Override
- public String apply( final String text ) {
- final var parser = new Parser( text, mContractions );
- final var tokens = new ArrayList<Token>();
-
- // Parse the tokens and consume all unresolved lexemes.
- parser.parse( tokens::add, mUnresolved, mParserType.filter() );
-
- // The parser may emit tokens in any order.
- sort( tokens );
-
- final var result = new StringBuilder( text.length() );
- var position = 0;
-
- for( final var token : tokens ) {
- if( position <= token.began() ) {
- result.append( text, position, token.began() );
- result.append( mReplacements.get( token.getType() ) );
- }
-
- position = token.ended();
- }
-
- return result.append( text.substring( position ) ).toString();
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/FastCharacterIterator.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.text.CharacterIterator;
-import java.text.StringCharacterIterator;
-import java.util.function.Function;
-
-import static java.text.CharacterIterator.DONE;
-
-/**
- * Iterates over a string, much like {@link CharacterIterator}, but faster.
- * This class gets 53 ops/s vs. 49 ops/s for {@link StringCharacterIterator}.
- * In comparison, using unconstrained {@link String#charAt(int)} calls yields
- * 57 ops/s.
- * <p>
- * <strong>Caution:</strong> This class offers minimal bounds checking to eke
- * out some efficiency.
- * </p>
- */
-final class FastCharacterIterator {
-
- private final String mS;
- private final int mLen;
-
- /**
- * Starts at 0, not guaranteed to be within bounds.
- */
- private int mPos;
-
- /**
- * Constructs a new iterator that can advance through the given string
- * one character at a time.
- *
- * @param s The string to iterate.
- */
- public FastCharacterIterator( final String s ) {
- mS = s;
- mLen = s.length();
- }
-
- /**
- * Returns the iterated index. The return value is not guaranteed to be
- * within the string bounds.
- *
- * @return The iterated index.
- */
- public int index() {
- return mPos;
- }
-
- /**
- * Returns the character at the currently iterated position in the string.
- * This method performs bounds checking.
- *
- * @return {@link CharacterIterator#DONE} if there are no more characters.
- */
- public char current() {
- final var pos = mPos;
- return pos < mLen ? mS.charAt( pos ) : DONE;
- }
-
- /**
- * Advances to the next character in the string, without bounds checking.
- */
- public void next() {
- mPos++;
- }
-
- /**
- * Advances to the previous character in the string, without bounds checking.
- */
- public void prev() {
- --mPos;
- }
-
- /**
- * Returns the next character in the string without consuming it. Multiple
- * consecutive calls to this method will return the same value. This method
- * performs bounds checking.
- *
- * @return {@link CharacterIterator#DONE} if there are no more characters.
- */
- public char peek() {
- final var pos = mPos;
- return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE;
- }
-
- /**
- * Answers whether {@link #next()} followed by {@link #current()} is safe.
- *
- * @return {@code true} if there are more characters to be iterated.
- */
- public boolean hasNext() {
- return mPos < mLen;
- }
-
- /**
- * Parse all characters that match a given function.
- *
- * @param f The function that determines when skipping stops.
- */
- public void skip( final Function<Character, Boolean> f ) {
- assert f != null;
-
- do {
- next();
- }
- while( f.apply( current() ) );
-
- // The loop always overshoots by one character.
- prev();
- }
-
- /**
- * Creates a string from a subset of consecutive characters in the string
- * being iterated. The calling class is responsible for bounds-checking.
- *
- * @param began The starting index, must be greater than or equal to zero
- * and less than or equal to {@code ended}.
- * @param ended The ending index, must be less than the string length.
- * @return A substring of the iterated string.
- * @throws IndexOutOfBoundsException Either or both parameters exceed the
- * string's boundaries.
- */
- public String substring( final int began, final int ended ) {
- assert began >= 0;
- assert began <= ended;
- assert ended < mLen;
-
- return mS.substring( began, ended );
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/KeenQuotes.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import picocli.CommandLine;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Properties;
-
-import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_PLAIN;
-import static java.lang.String.format;
-import static java.lang.System.*;
-import static picocli.CommandLine.Help.Ansi.Style.*;
-import static picocli.CommandLine.Help.ColorScheme;
-
-/**
- * Responsible for replacing {@link Token} instances with equivalent smart
- * quotes (or straight quotes). This will inform the caller when ambiguous
- * quotes cannot be reliably resolved.
- */
-public final class KeenQuotes {
- private final Settings mSettings = new Settings( this );
-
- private static ColorScheme createColourScheme() {
- return new ColorScheme.Builder()
- .commands( bold )
- .options( fg_blue, bold )
- .parameters( fg_blue )
- .optionParams( italic )
- .errors( fg_red, bold )
- .stackTraces( italic )
- .build();
- }
-
- public void run() {
- final var settings = getSettings();
- final var contractions = createContractions( settings );
-
- if( settings.displayList() ) {
- out.println( contractions.toString() );
- }
- else {
- try {
- final var c = new Converter( err::println, contractions, PARSER_PLAIN );
- out.print( convert( c ) );
- } catch( final Exception ex ) {
- ex.printStackTrace( err );
- }
- }
- }
-
- private Contractions createContractions( final Settings settings ) {
- return new Contractions.Builder()
- .withBeganUnambiguous( settings.getBeganUnambiguous() )
- .withEndedUnambiguous( settings.getEndedUnambiguous() )
- .withBeganAmbiguous( settings.getBeganAmbiguous() )
- .withEndedAmbiguous( settings.getEndedAmbiguous() )
- .build();
- }
-
- private String convert( final Converter converter ) throws IOException {
- return converter.apply( new String( in.readAllBytes() ) );
- }
-
- private Settings getSettings() {
- return mSettings;
- }
-
- /**
- * Returns the application version number retrieved from the application
- * properties file. The properties file is generated at build time, which
- * keys off the repository.
- *
- * @return The application version number.
- * @throws RuntimeException An {@link IOException} occurred.
- */
- private static String getVersion() {
- try {
- final var properties = loadProperties( "app.properties" );
- return properties.getProperty( "application.version" );
- } catch( final Exception ex ) {
- throw new RuntimeException( ex );
- }
- }
-
- @SuppressWarnings( "SameParameterValue" )
- private static Properties loadProperties( final String resource )
- throws IOException {
- final var properties = new Properties();
- properties.load( getResourceAsStream( getResourceName( resource ) ) );
- return properties;
- }
-
- private static String getResourceName( final String resource ) {
- return format( "%s/%s", getPackagePath(), resource );
- }
-
- private static String getPackagePath() {
- return KeenQuotes.class.getPackageName().replace( '.', '/' );
- }
-
- private static InputStream getResourceAsStream( final String resource ) {
- return KeenQuotes.class.getClassLoader().getResourceAsStream( resource );
- }
-
- /**
- * Main application entry point.
- *
- * @param args Command-line arguments.
- */
- public static void main( final String[] args ) {
- final var app = new KeenQuotes();
- final var parser = new CommandLine( app.getSettings() );
- parser.setColorScheme( createColourScheme() );
-
- final var exitCode = parser.execute( args );
- final var parseResult = parser.getParseResult();
-
- if( parseResult.isUsageHelpRequested() ) {
- exit( exitCode );
- }
- else if( parseResult.isVersionHelpRequested() ) {
- out.println( getVersion() );
- exit( exitCode );
- }
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/Lexeme.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import static com.whitemagicsoftware.keenquotes.LexemeType.FLAG;
-
-/**
- * Responsible for tracking the beginning and ending offsets of a lexeme within
- * a text string. Tracking the beginning and ending indices should use less
- * memory than duplicating the entire text of Unicode characters (i.e., using
- * a similar approach to run-length encoding).
- */
-public final class Lexeme implements Comparable<Lexeme> {
- /**
- * Signifies an invalid index to help distinguish EOT/SOT.
- */
- private static final int E_INDEX = -2;
-
- /**
- * Denotes there are no more lexemes: the end of text (EOT) has been reached.
- * The beginning index differentiates between EOT and SOT.
- */
- public static final Lexeme EOT = new Lexeme( FLAG, -1, E_INDEX );
-
- /**
- * Denotes parsing at the start of text (SOT). This is useful to avoid
- * branching conditions while iterating. The beginning index differentiates
- * between EOT and SOT.
- */
- public static final Lexeme SOT = new Lexeme( FLAG, 0, E_INDEX );
-
- private final LexemeType mType;
- private final int mBegan;
- private final int mEnded;
-
- /**
- * Create a lexeme that represents a section of the text.
- *
- * @param type Type of {@link Lexeme} to create.
- * @param began Offset into the text where this instance starts (0-based).
- * @param ended Offset into the text where this instance stops (0-based).
- */
- public Lexeme( final LexemeType type, final int began, final int ended ) {
- assert type != null;
- assert began >= 0 || ended == E_INDEX;
- assert began <= ended || ended == E_INDEX;
-
- mType = type;
- mBegan = began;
- mEnded = ended;
- }
-
- /**
- * Answers whether the given {@link LexemeType} is the same as this
- * instance's internal {@link LexemeType}.
- *
- * @param type The {@link LexemeType} to compare.
- * @return {@code true} if the given {@link LexemeType} is equal to the
- * internal {@link LexemeType}.
- */
- public boolean isType( final LexemeType type ) {
- assert type != null;
- return mType == type;
- }
-
- /**
- * Answers whether any of the given {@link LexemeType} matches this
- * instance's internal {@link LexemeType}.
- *
- * @param types The {@link LexemeType}s to compare.
- * @return {@code true} if the internal {@link LexemeType} matches any one
- * of the given {@link LexemeType}s.
- */
- public boolean isType( final LexemeType... types ) {
- assert types != null;
-
- for( final var type : types ) {
- if( mType == type ) {
- return true;
- }
- }
-
- return false;
- }
-
- public int began() {
- return mBegan;
- }
-
- public int ended() {
- return mEnded;
- }
-
- boolean isSot() {
- return mBegan == 0;
- }
-
- boolean isEot() {
- return mBegan == -1;
- }
-
- LexemeType getType() {
- return mType;
- }
-
- public boolean before( final Lexeme that ) {
- return compareTo( that ) < 0;
- }
-
- public boolean after( final Lexeme that ) {
- return compareTo( that ) > 0;
- }
-
- /**
- * Compares the starting offset of the given {@link Lexeme} to the starting
- * offset of this {@link Lexeme} instance. This allows a list {@link Lexeme}s
- * to be sorted by order of appearance in the parsed text.
- */
- @Override
- public int compareTo( final Lexeme that ) {
- assert that != null;
- return this.mBegan - that.mBegan;
- }
-
- /**
- * Extracts a sequence of characters from the given text at the offsets
- * captured by this lexeme.
- *
- * @param text The text that was parsed using this class.
- * @return The character string captured by the lexeme.
- */
- public String toString( final String text ) {
- assert text != null;
- return text.substring( mBegan, mEnded );
- }
-
- @Override
- public String toString() {
- return getClass().getSimpleName() + '[' +
- "mType=" + mType +
- ", mBegan=" + mBegan +
- ", mEnded=" + mEnded +
- ']';
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/LexemeType.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-/**
- * Represents the type of {@link Lexeme} parsed by the {@link Lexer}.
- */
-@SuppressWarnings( "SpellCheckingInspection" )
-public enum LexemeType {
- QUOTE_SINGLE,
- QUOTE_SINGLE_OPENING,
- QUOTE_SINGLE_CLOSING,
- QUOTE_DOUBLE,
- QUOTE_DOUBLE_OPENING,
- QUOTE_DOUBLE_CLOSING,
- ESC_SINGLE,
- ESC_DOUBLE,
- EOL,
- EOP,
- SPACE,
- WORD,
- NUMBER,
- PUNCT,
- OPENING_GROUP,
- CLOSING_GROUP,
- HYPHEN,
- DASH,
- EQUALS,
- PERIOD,
- ELLIPSIS,
- FLAG
-}
src/main/java/com/whitemagicsoftware/keenquotes/Lexer.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.util.function.Consumer;
-
-import static com.whitemagicsoftware.keenquotes.LexemeType.*;
-import static java.lang.Character.isWhitespace;
-import static java.text.CharacterIterator.DONE;
-
-/**
- * Turns text into words, numbers, punctuation, spaces, and more.
- */
-public final class Lexer {
- /**
- * Tokenizes a sequence of characters. The order of comparisons is optimized
- * towards probability of the occurrence of a character in regular English
- * prose: letters, space, quotation marks, numbers, periods, new lines,
- * then end of text.
- *
- * @param text The sequence of characters to tokenize.
- * @param emitter Recipient of all tokenized character sequences.
- * @param filter Tokenization preprocessor, usually empty, but can be used
- * to skip certain character sequences (such as XML tags).
- */
- public static void lex(
- final String text,
- final Consumer<Lexeme> emitter,
- final Consumer<FastCharacterIterator> filter ) {
- lex( new FastCharacterIterator( text ), emitter, filter );
- }
-
- private static void lex(
- final FastCharacterIterator i,
- final Consumer<Lexeme> emitter,
- final Consumer<FastCharacterIterator> filter ) {
-
- while( i.hasNext() ) {
- // Allow filters to skip character sequences (such as XML tags).
- filter.accept( i );
-
- final var index = i.index();
- final var curr = i.current();
- var token = PUNCT;
-
- if( isLetter( curr ) ) {
- // T1000 is one word, not a word and a number.
- i.skip( next -> isLetter( next ) || isDigit( next ) );
- token = WORD;
- }
- else if( curr == ' ' ) {
- i.skip( next -> next == ' ' );
- token = SPACE;
- }
- else if( curr == '\r' || curr == '\n' ) {
- final var cr = new int[]{curr == '\r' ? 1 : 0};
- final var lf = new int[]{curr == '\n' ? 1 : 0};
-
- // Swallow all consecutive CR (Mac), CRLF (Windows), and/or LF (Unix).
- i.skip( next -> {
- cr[ 0 ] += next == '\r' ? 1 : 0;
- lf[ 0 ] += next == '\n' ? 1 : 0;
-
- return next == '\r' || next == '\n';
- } );
-
- token = cr[ 0 ] + lf[ 0 ] == 1 || cr[ 0 ] == 1 && lf[ 0 ] == 1
- ? EOL
- : EOP;
- }
- else if( isWhitespace( curr ) ) {
- i.skip( Character::isWhitespace );
- token = SPACE;
- }
- else if( isDigit( curr ) || isNumeric( curr ) && isDigit( i.peek() ) ) {
- // Parse all consecutive number characters to prevent the main loop
- // from switching back to word tokens.
- i.skip(
- next -> isDigit( next ) || isNumeric( next ) && isDigit( i.peek() )
- );
- token = NUMBER;
- }
- else if( curr == '.' ) {
- i.skip( next -> next == '.' || next == ' ' && i.peek() == '.' );
-
- token = i.index() - index == 0 ? PERIOD : ELLIPSIS;
- }
- else if( curr == '"' ) {
- token = QUOTE_DOUBLE;
- }
- else if( curr == '\'' ) {
- token = QUOTE_SINGLE;
- }
- else if( curr == '-' && i.peek() != '-' ) {
- token = HYPHEN;
- }
- else if( isDash( curr ) ) {
- i.skip( Lexer::isDash );
- token = DASH;
- }
- else if( curr == '(' || curr == '{' || curr == '[' ) {
- token = OPENING_GROUP;
- }
- else if( curr == ')' || curr == '}' || curr == ']' ) {
- token = CLOSING_GROUP;
- }
- else if( curr == '“' ) {
- token = QUOTE_DOUBLE_OPENING;
- }
- else if( curr == '”' ) {
- token = QUOTE_DOUBLE_CLOSING;
- }
- else if( curr == '‘' ) {
- token = QUOTE_SINGLE_OPENING;
- }
- else if( curr == '’' ) {
- token = QUOTE_SINGLE_CLOSING;
- }
- else if( curr == '\\' ) {
- i.next();
- final var next = i.current();
-
- if( next == '\'' ) {
- token = ESC_SINGLE;
- }
- else if( next == '\"' ) {
- token = ESC_DOUBLE;
- }
- else {
- // Push back the escaped character, which wasn't a straight quote.
- i.prev();
- }
- }
- else if( curr == '=' ) {
- token = EQUALS;
- }
- else if( curr == DONE ) {
- continue;
- }
-
- assert index >= 0;
- assert curr != DONE;
-
- emitter.accept( new Lexeme( token, index, i.index() + 1 ) );
- i.next();
- }
- }
-
- /**
- * Answers whether the given character can be considered part of a word
- * or not. This will include {@code _} and {@code *} because plain text
- * formats often use those characters to emphasize a word.
- *
- * @param curr The character to check as being part of a word.
- * @return {@code true} if the given character is a letter or a formatting
- * indicator.
- */
- private static boolean isLetter( final char curr ) {
- return Character.isLetter( curr ) || curr == '_' || curr == '*';
- }
-
- private static boolean isDigit( final char curr ) {
- return Character.isDigit( curr ) ||
- "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) > -1;
- }
-
- /**
- * Answers whether the given character may be part of an en- or em-dash.
- * This must be called after it is known that the character isn't a lone
- * hyphen.
- *
- * @param curr The character to check as being a dash.
- * @return {@code true} if the given character is part of a dash.
- */
- private static boolean isDash( final char curr ) {
- return curr == '-' || curr == '–' || curr == '—' || curr == '―';
- }
-
- /**
- * Answers whether the given character can be considered part of a number
- * or not. This does not include digits, which are checked independently
- * of this method.
- *
- * @param curr The character to check as being related to a number.
- * @return {@code true} if the given character can be considered part of
- * a number (e.g., -2,000.2^2 is considered a single number).
- */
- private static boolean isNumeric( final char curr ) {
- return
- curr == '.' || curr == ',' || curr == '-' || curr == '+' ||
- curr == '^' || curr == '⅟' || curr == '⁄';
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/Parser.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.function.Consumer;
-
-import static com.whitemagicsoftware.keenquotes.Lexeme.EOT;
-import static com.whitemagicsoftware.keenquotes.Lexeme.SOT;
-import static com.whitemagicsoftware.keenquotes.LexemeType.*;
-import static com.whitemagicsoftware.keenquotes.TokenType.*;
-import static java.lang.Math.abs;
-import static java.util.Collections.sort;
-
-/**
- * Converts straight double/single quotes and apostrophes to curly equivalents.
- */
-public class Parser {
- /**
- * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
- new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP};
-
- /**
- * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
- new LexemeType[]{WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
-
- /**
- * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
- new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE};
-
- /**
- * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
- new LexemeType[]{SPACE, HYPHEN, DASH,
- QUOTE_DOUBLE, CLOSING_GROUP, EOL, EOP};
-
- /**
- * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
- new LexemeType[]{SPACE, DASH, EQUALS, QUOTE_SINGLE, OPENING_GROUP, EOL,
- EOP};
-
- /**
- * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
- new LexemeType[]{WORD, NUMBER, DASH, ELLIPSIS, OPENING_GROUP,
- QUOTE_SINGLE, QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE};
-
- /**
- * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
- new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP,
- QUOTE_SINGLE, QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING};
-
- /**
- * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
- */
- private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
- new LexemeType[]{SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH,
- QUOTE_SINGLE, CLOSING_GROUP, EOL, EOP};
-
- /**
- * The text to parse. A reference is required as a minor optimization in
- * memory and speed: the lexer records integer offsets, rather than new
- * {@link String} instances, to track parsed lexemes.
- */
- private final String mText;
-
- /**
- * Sets of contractions that help disambiguate single quotes in the text.
- * These are effectively immutable while parsing.
- */
- private final Contractions sContractions;
-
- /**
- * Contains each emitted opening single quote per paragraph.
- */
- private final List<Lexeme> mOpeningSingleQuotes = new ArrayList<>();
-
- /**
- * Contains each emitted closing single quote per paragraph.
- */
- private final List<Lexeme> mClosingSingleQuotes = new ArrayList<>();
-
- /**
- * Contains each emitted opening double quote per paragraph.
- */
- private final List<Lexeme> mOpeningDoubleQuotes = new ArrayList<>();
-
- /**
- * Contains each emitted closing double quote per paragraph.
- */
- private final List<Lexeme> mClosingDoubleQuotes = new ArrayList<>();
-
- /**
- * Constructs a new {@link Parser} using the default contraction sets
- * to help resolve some ambiguous scenarios.
- *
- * @param text The prose to parse, containing zero or more quotation
- * characters.
- * @param contractions Custom sets of contractions to help resolve
- * ambiguities.
- */
- public Parser( final String text, final Contractions contractions ) {
- mText = text;
- sContractions = contractions;
- }
-
- /**
- * Iterates over the entire text provided at construction, emitting
- * {@link Token}s that can be used to convert straight quotes to curly
- * quotes.
- *
- * @param tokenConsumer Receives emitted {@link Token}s.
- */
- public void parse(
- final Consumer<Token> tokenConsumer,
- final Consumer<Lexeme> lexemeConsumer,
- final Consumer<FastCharacterIterator> filter ) {
- final var lexemes = new CircularFifoQueue<Lexeme>( 3 );
-
- // Allow consuming the very first token without needing a queue size check.
- flush( lexemes );
-
- final var unresolved = new ArrayList<Lexeme[]>();
-
- // Create and convert a list of all unambiguous quote characters.
- Lexer.lex(mText, lexeme -> {
- // Reset after tokenizing a paragraph.
- if( tokenize( lexeme, lexemes, tokenConsumer, unresolved ) ) {
- // Attempt to resolve any remaining unambiguous quotes.
- resolve( unresolved, tokenConsumer );
-
- // Notify of any unambiguous quotes that could not be resolved.
- unresolved.forEach( lex -> lexemeConsumer.accept( lex[ 1 ] ) );
- unresolved.clear();
- mOpeningSingleQuotes.clear();
- mClosingSingleQuotes.clear();
- mOpeningDoubleQuotes.clear();
- mClosingDoubleQuotes.clear();
- }
- }, filter);
-
- // By loop's end, the lexemes list contains tokens for all except the
- // final two elements (from tokenizing in triplets). Tokenize the remaining
- // unprocessed lexemes.
- tokenize( EOT, lexemes, tokenConsumer, unresolved );
- tokenize( EOT, lexemes, tokenConsumer, unresolved );
-
- // Attempt to resolve any remaining unambiguous quotes.
- resolve( unresolved, tokenConsumer );
-
- // Notify of any unambiguous quotes that could not be resolved.
- unresolved.forEach( lex -> lexemeConsumer.accept( lex[ 1 ] ) );
- }
-
- /**
- * Converts {@link Lexeme}s identified as straight quotes into {@link Token}s
- * that represent the curly equivalent. The {@link Token}s are passed to
- * the given {@link Consumer} for further processing (e.g., replaced in
- * the original text being parsed).
- *
- * @param lexeme A part of the text being parsed.
- * @param lexemes A 3-element queue of lexemes that provide sufficient
- * context to identify curly quotes.
- * @param consumer Recipient of equivalent quotes.
- * @param unresolved Rolling list of potentially ambiguous {@link Lexeme}s
- * that could not be tokenized, yet.
- * @return {@code true} if an end-of-paragraph is detected.
- */
- private boolean tokenize( final Lexeme lexeme,
- final CircularFifoQueue<Lexeme> lexemes,
- final Consumer<Token> consumer,
- final List<Lexeme[]> unresolved ) {
- // Add the next lexeme to tokenize into the queue for immediate processing.
- lexemes.add( lexeme );
-
- final var lex1 = lexemes.get( 0 );
- final var lex2 = lexemes.get( 1 );
- final var lex3 = lexemes.get( 2 );
-
- if( lex2.isType( QUOTE_SINGLE ) && lex3.isType( WORD ) &&
- lex1.isType( WORD, PERIOD, NUMBER ) ) {
- // Examples: y'all, Ph.D.'ll, 20's, she's
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- }
- else if( lex1.isType( QUOTE_SINGLE ) && lex3.isType( QUOTE_SINGLE ) &&
- "n".equalsIgnoreCase( lex2.toString( mText ) ) ) {
- // I.e., 'n'
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
- flush( lexemes );
- truncate( unresolved );
- }
- else if( lex2.isType( QUOTE_SINGLE ) && lex1.isType( NUMBER ) ) {
- if( lex3.isType( QUOTE_SINGLE ) ) {
- // E.g., 2''
- consumer.accept(
- new Token( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() ) );
- flush( lexemes );
- }
- else {
- // E.g., 2'
- consumer.accept( new Token( QUOTE_PRIME_SINGLE, lex2 ) );
- }
- }
- else if( lex2.isType( QUOTE_DOUBLE ) && lex1.isType( NUMBER ) ) {
- // E.g., 2"
- consumer.accept( new Token( QUOTE_PRIME_DOUBLE, lex2 ) );
- }
- else if( lex2.isType( WORD ) && lex3.isType( QUOTE_SINGLE ) &&
- sContractions.endedUnambiguously( lex2.toString( mText ) ) ) {
- // E.g., thinkin'
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
- flush( lexemes );
- }
- else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) ) {
- // Authors must re-write their sentences to avoid starting with numerals.
- if( lex3.isType( SPACE, PUNCT ) || lex3.isType( WORD ) &&
- lex3.toString( mText ).equalsIgnoreCase( "s" ) ) {
- // E.g.: '20s, '02
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
- }
- else {
- // E.g., '2''
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex1 ) );
- mOpeningSingleQuotes.add( lex1 );
- }
-
- truncate( unresolved );
- }
- else if( lex2.isType( QUOTE_SINGLE ) &&
- lex1.isType( PUNCT, PERIOD, ELLIPSIS, DASH ) &&
- (lex3.isType( EOL, EOP ) || lex3.isEot()) ) {
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- mClosingSingleQuotes.add( lex2 );
- }
- else if( lex1.isType( ESC_SINGLE ) ) {
- // E.g., \'
- consumer.accept( new Token( QUOTE_STRAIGHT_SINGLE, lex1 ) );
- }
- else if( lex1.isType( ESC_DOUBLE ) ) {
- // E.g., \"
- consumer.accept( new Token( QUOTE_STRAIGHT_DOUBLE, lex1 ) );
-
- if( lex2.isType( QUOTE_SINGLE ) &&
- (lex3.isEot() || lex3.isType( SPACE, DASH, EOL, EOP )) ) {
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- mClosingSingleQuotes.add( lex2 );
- }
- }
- else if( lex2.isType( QUOTE_DOUBLE ) &&
- (lex1.isSot() || lex1.isType( LEADING_QUOTE_OPENING_DOUBLE )) &&
- lex3.isType( LAGGING_QUOTE_OPENING_DOUBLE ) ) {
- // E.g.: "", "..., "word, ---"word
- consumer.accept( new Token( QUOTE_OPENING_DOUBLE, lex2 ) );
- mOpeningDoubleQuotes.add( lex2 );
- }
- else if( lex2.isType( QUOTE_DOUBLE ) &&
- lex1.isType( LEADING_QUOTE_CLOSING_DOUBLE ) &&
- (lex3.isEot() || lex3.isType( LAGGING_QUOTE_CLOSING_DOUBLE )) ) {
- // E.g.: ..."', word"', ?"', word"?
- consumer.accept( new Token( QUOTE_CLOSING_DOUBLE, lex2 ) );
- mClosingDoubleQuotes.add( lex2 );
- }
- else if( lex1.isType( WORD ) && lex2.isType( QUOTE_SINGLE ) &&
- lex3.isType( PUNCT, PERIOD ) ) {
- // E.g., word', (contraction ruled out by previous conditions)
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- mClosingSingleQuotes.add( lex2 );
- }
- else if( lex1.isType( DASH ) &&
- lex2.isType( QUOTE_SINGLE ) &&
- lex3.isType( QUOTE_DOUBLE ) ) {
- // E.g., ---'"
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- mClosingSingleQuotes.add( lex2 );
- }
- else if( lex2.isType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) {
- // After tokenizing, the parser will attempt to resolve ambiguities.
- unresolved.add( new Lexeme[]{lex1, lex2, lex3} );
- }
-
- // Suggest to the caller that resolution should be performed. This allows
- // the algorithm to reset the opening/closing quote balance before the
- // next paragraph is parsed.
- return lex3.isType( EOP );
- }
-
- private void resolve(
- final List<Lexeme[]> unresolved, final Consumer<Token> consumer ) {
- // Some non-emitted tokenized lexemes may be ambiguous.
- final var ambiguousLeadingQuotes = new ArrayList<Lexeme[]>( 16 );
- final var ambiguousLaggingQuotes = new ArrayList<Lexeme[]>( 16 );
- var resolvedLeadingQuotes = 0;
- var resolvedLaggingQuotes = 0;
-
- // Count the number of ambiguous and non-ambiguous open single quotes.
- for( var i = unresolved.iterator(); i.hasNext(); ) {
- final var quotes = i.next();
- final var lex1 = quotes[ 0 ];
- final var lex2 = quotes[ 1 ];
- final var lex3 = quotes[ 2 ];
-
- if( lex2.isType( QUOTE_SINGLE ) ) {
- final var word1 = lex1 == SOT ? "" : lex1.toString( mText );
- final var word3 = lex3 == EOT ? "" : lex3.toString( mText );
-
- if( sContractions.beganAmbiguously( word3 ) ) {
- // E.g., 'Cause
- if( lex1.isType( QUOTE_SINGLE ) ) {
- // E.g., ''Cause
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- i.remove();
- }
- else {
- // The contraction is uncertain until a closing quote is found that
- // may balance this single quote.
- ambiguousLeadingQuotes.add( quotes );
- }
- }
- else if( sContractions.beganUnambiguously( word3 ) ) {
- // The quote mark forms a word that does not stand alone from its
- // contraction. For example, twas is not a word: it's 'twas.
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- i.remove();
- }
- else if( lex1.isType( WORD ) &&
- (lex3.isType( PUNCT, PERIOD, ELLIPSIS, DASH, SPACE, EOP ) ||
- lex3.isEot()) && unresolved.indexOf( quotes ) == 0 &&
- resolvedLeadingQuotes == 0 ) {
- // E.g., Tyfós'
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
- i.remove();
- }
- else if( sContractions.endedAmbiguously( word1 ) ) {
- ambiguousLaggingQuotes.add( quotes );
- }
- else if( (lex1.isSot() || lex1.isType( LEADING_QUOTE_OPENING_SINGLE )) &&
- lex3.isType( LAGGING_QUOTE_OPENING_SINGLE ) ) {
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) );
- resolvedLeadingQuotes++;
- mOpeningSingleQuotes.add( lex2 );
- i.remove();
- }
- else if( lex1.isType( LEADING_QUOTE_CLOSING_SINGLE ) &&
- (lex3.isEot() || lex3.isType( LAGGING_QUOTE_CLOSING_SINGLE )) ) {
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
- resolvedLaggingQuotes++;
- mClosingSingleQuotes.add( lex2 );
- i.remove();
- }
- else if( lex3.isType( NUMBER ) ) {
- // E.g., '04
- ambiguousLeadingQuotes.add( quotes );
- }
- }
- }
-
- sort( mOpeningSingleQuotes );
- sort( mClosingSingleQuotes );
- sort( mOpeningDoubleQuotes );
- sort( mClosingDoubleQuotes );
-
- final var singleQuoteEmpty =
- mOpeningSingleQuotes.isEmpty() || mClosingSingleQuotes.isEmpty();
- final var doubleQuoteEmpty =
- mOpeningDoubleQuotes.isEmpty() || mClosingDoubleQuotes.isEmpty();
-
- final var singleQuoteDelta = abs(
- mClosingSingleQuotes.size() - mOpeningSingleQuotes.size()
- );
-
- final var doubleQuoteDelta = abs(
- mClosingDoubleQuotes.size() - mOpeningDoubleQuotes.size()
- );
-
- final var ambiguousLeadingCount = ambiguousLeadingQuotes.size();
- final var ambiguousLaggingCount = ambiguousLaggingQuotes.size();
-
- if( resolvedLeadingQuotes == 1 && resolvedLaggingQuotes == 0 ) {
- if( ambiguousLeadingCount == 0 && ambiguousLaggingCount == 1 ) {
- final var balanced = singleQuoteDelta == 0;
- final var quote = balanced ? QUOTE_APOSTROPHE : QUOTE_CLOSING_SINGLE;
- final var lex = ambiguousLaggingQuotes.get( 0 );
- consumer.accept( new Token( quote, lex[ 1 ] ) );
- unresolved.remove( lex );
- }
- else if( ambiguousLeadingCount == 0 && unresolved.size() == 1 ) {
- // Must be a closing quote.
- final var closing = unresolved.get( 0 );
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
- unresolved.remove( closing );
- }
- }
- else if( ambiguousLeadingCount == 0 && ambiguousLaggingCount > 0 ) {
- // If there are no ambiguous leading quotes then all ambiguous lagging
- // quotes must be contractions.
- ambiguousLaggingQuotes.forEach(
- lex -> {
- consumer.accept( new Token( QUOTE_APOSTROPHE, lex[ 1 ] ) );
- unresolved.remove( lex );
- }
- );
- }
- else if( mOpeningSingleQuotes.size() == 0 &&
- mClosingSingleQuotes.size() == 1 && !unresolved.isEmpty() ) {
- final var opening = unresolved.get( 0 );
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
- unresolved.remove( opening );
- }
- else if( ambiguousLeadingCount == 0 ) {
- if( resolvedLaggingQuotes < resolvedLeadingQuotes ) {
- for( final var i = unresolved.iterator(); i.hasNext(); ) {
- final var closing = i.next()[ 1 ];
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing ) );
- i.remove();
- }
- }
- else if( singleQuoteDelta == unresolved.size() ) {
- for( final var i = unresolved.iterator(); i.hasNext(); ) {
- final var closing = i.next();
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
- i.remove();
- }
- }
- else if( unresolved.size() == 2 ) {
- final var closing = unresolved.get( 0 );
- final var opening = unresolved.get( 1 );
- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
-
- // Doesn't affect the algorithm.
- unresolved.clear();
- }
- }
- else if( singleQuoteDelta == 0 && !singleQuoteEmpty ||
- doubleQuoteDelta == 0 && !doubleQuoteEmpty ) {
- // An apostrophe stands betwixt opening/closing single quotes.
- for( final var lexemes = unresolved.iterator(); lexemes.hasNext(); ) {
- final var quote = lexemes.next()[ 1 ];
-
- for( int i = 0; i < mOpeningSingleQuotes.size(); i++ ) {
- // An apostrophe must fall between an open/close pair.
- final var openingQuote = mOpeningSingleQuotes.get( i );
- final var closingQuote = mClosingSingleQuotes.get( i );
-
- if( openingQuote.before( quote ) && closingQuote.after( quote ) ) {
- consumer.accept( new Token( QUOTE_APOSTROPHE, quote ) );
- lexemes.remove();
- }
- }
- }
-
- // An apostrophe stands betwixt opening/closing double quotes.
- final var lexemes = unresolved.iterator();
-
- while( lexemes.hasNext() ) {
- final var quote = lexemes.next()[ 1 ];
-
- // Prevent an index out of bounds exception.
- final var len = Math.min(
- mOpeningDoubleQuotes.size(),
- mClosingDoubleQuotes.size()
- );
-
- for( int i = 0; i < len; i++ ) {
- // An apostrophe must fall between an open/close pair.
- final var openingQuote = mOpeningDoubleQuotes.get( i );
- final var closingQuote = mClosingDoubleQuotes.get( i );
-
- if( openingQuote.before( quote ) && closingQuote.after( quote ) ) {
- consumer.accept( new Token( QUOTE_APOSTROPHE, quote ) );
-
- try {
- lexemes.remove();
- } catch( final Exception ex ) {
- // Weird edge case that hasn't been tracked down. Doesn't affect
- // the unit tests.
- break;
- }
- }
- }
- }
- }
- else if( ambiguousLeadingCount == 1 && resolvedLaggingQuotes == 1 ) {
- final var opening = ambiguousLeadingQuotes.get( 0 );
- consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
- unresolved.remove( opening );
- }
- }
-
- /**
- * Remove the last {@link Lexeme}s from the given list.
- *
- * @param unresolved The list of {@link Lexeme}s to modify.
- */
- private void truncate( final List<Lexeme[]> unresolved ) {
- if( !unresolved.isEmpty() ) {
- unresolved.remove( unresolved.size() - 1 );
- }
- }
-
- /**
- * Overwrites the {@link CircularFifoQueue}'s contents with start-of-text
- * indicators.
- *
- * @param lexemes The queue to overflow.
- */
- private void flush( final CircularFifoQueue<Lexeme> lexemes ) {
- lexemes.add( SOT );
- lexemes.add( SOT );
- lexemes.add( SOT );
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/ParserType.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.util.function.Consumer;
-
-public enum ParserType {
- PARSER_PLAIN( filter -> {} ),
- PARSER_XML( filter -> new XmlFilter() );
-
- private final Consumer<FastCharacterIterator> mFilter;
-
- ParserType( final Consumer<FastCharacterIterator> filter ) {
- mFilter = filter;
- }
-
- Consumer<FastCharacterIterator> filter() {
- return mFilter;
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/Settings.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import picocli.CommandLine;
-
-import java.util.List;
-import java.util.concurrent.Callable;
-
-import static java.util.Arrays.asList;
-import static java.util.Collections.emptyList;
-
-@CommandLine.Command(
- name = "KeenQuotes",
- mixinStandardHelpOptions = true,
- description = "Converts straight quotes to curly quotes."
-)
-@SuppressWarnings( {"FieldMayBeFinal", "CanBeFinal"} )
-public final class Settings implements Callable<Integer> {
- /**
- * Main executable class.
- */
- private final KeenQuotes mMain;
-
- /**
- * List of unambiguous contractions having leading apostrophes.
- */
- @CommandLine.Option(
- names = {"-ub", "--unamb-began"},
- description =
- "Contraction to treat as unambiguous (e.g., cause, bout)",
- paramLabel = "word"
- )
- private String[] mBeganUnambiguous;
-
- /**
- * List of unambiguous contractions having lagging apostrophes.
- */
- @CommandLine.Option(
- names = {"-ue", "--unamb-ended"},
- description =
- "Contraction to treat as unambiguous (e.g., frien, thinkin)",
- paramLabel = "word"
- )
- private String[] mEndedUnambiguous;
-
- /**
- * List of ambiguous contractions having leading apostrophes.
- */
- @CommandLine.Option(
- names = {"-ab", "--amb-began"},
- description =
- "Contraction to treat as ambiguous (e.g., sup, kay)",
- paramLabel = "word"
- )
- private String[] mBeganAmbiguous;
-
- /**
- * List of ambiguous contractions having lagging apostrophes.
- */
- @CommandLine.Option(
- names = {"-ae", "--amb-ended"},
- description =
- "Contraction to treat as ambiguous (e.g., gi, o)",
- paramLabel = "word"
- )
- private String[] mEndedAmbiguous;
-
- /**
- * Display default values.
- */
- @CommandLine.Option(
- names = {"-l", "--list"},
- description = "List all ambiguous and unambiguous contractions"
- )
- private boolean mDisplayList;
-
- public Settings( final KeenQuotes main ) {
- assert main != null;
- mMain = main;
- }
-
- /**
- * Answers whether the contractions listings should be displayed.
- *
- * @return {@code true} to list the contractions.
- */
- boolean displayList() {
- return mDisplayList;
- }
-
- List<String> getBeganUnambiguous() {
- return nullSafe( mBeganUnambiguous );
- }
-
- List<String> getEndedUnambiguous() {
- return nullSafe( mEndedUnambiguous );
- }
-
- List<String> getBeganAmbiguous() {
- return nullSafe( mBeganAmbiguous );
- }
-
- List<String> getEndedAmbiguous() {
- return nullSafe( mEndedAmbiguous );
- }
-
- private List<String> nullSafe( final String[] words ) {
- return words == null ? emptyList() : asList( words );
- }
-
- /**
- * Invoked after the command-line arguments are parsed to launch the
- * application.
- *
- * @return Exit level zero.
- */
- @Override
- public Integer call() {
- mMain.run();
- return 0;
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/Token.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-/**
- * Represents a high-level token read from the text.
- */
-final class Token implements Comparable<Token> {
- private final TokenType mType;
- private final int mBegan;
- private final int mEnded;
-
- /**
- * Convenience constructor to create a token that uses the lexeme's
- * beginning and ending offsets to represent a complete token.
- *
- * @param type The type of {@link Token} to create.
- * @param lexeme Container for beginning and ending text offsets.
- */
- Token( final TokenType type, final Lexeme lexeme ) {
- this( type, lexeme.began(), lexeme.ended() );
- }
-
- /**
- * This constructor can be used to create tokens that span more than a
- * single character. Almost all tokens represent a single character, only
- * the double-prime sequence ({@code ''}) is more than one character.
- *
- * @param type The type of {@link Token} to create.
- * @param began Beginning offset into text where token is found.
- * @param ended Ending offset into text where token is found.
- */
- Token( final TokenType type, final int began, final int ended ) {
- assert type != null;
- assert began >= 0;
- assert ended > began;
-
- mType = type;
- mBegan = began;
- mEnded = ended;
- }
-
- TokenType getType() {
- return mType;
- }
-
- int began() {
- return mBegan;
- }
-
- int ended() {
- return mEnded;
- }
-
- @Override
- public int compareTo( final Token that ) {
- return this.mBegan - that.mBegan;
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/TokenType.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-/**
- * The {@link Parser} emits these token types.
- */
-enum TokenType {
- QUOTE_OPENING_SINGLE,
- QUOTE_OPENING_DOUBLE,
- QUOTE_CLOSING_SINGLE,
- QUOTE_CLOSING_DOUBLE,
- QUOTE_APOSTROPHE,
- QUOTE_STRAIGHT_SINGLE,
- QUOTE_STRAIGHT_DOUBLE,
- QUOTE_PRIME_SINGLE,
- QUOTE_PRIME_DOUBLE,
- QUOTE_PRIME_TRIPLE,
- QUOTE_PRIME_QUADRUPLE,
-}
src/main/java/com/whitemagicsoftware/keenquotes/XmlFilter.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import java.text.CharacterIterator;
-import java.util.Set;
-import java.util.function.Consumer;
-
-/**
- * Responsible for lexing text while ignoring XML elements. The document must
- * be both sane and well-formed. This is not intended to lex documents in the
- * wild where the user has injected custom HTML. The lexer will fail if the
- * angle brackets are not balanced. Additionally, any less than or greater than
- * symbols must be encoded as {@code &lt;} or {@code &gt;}, respectively.
- */
-final class XmlFilter implements Consumer<FastCharacterIterator> {
-
- /**
- * Elements that indicate preformatted text, intentional straight quotes.
- */
- private final Set<String> UNTOUCHABLE = Set.of(
- "pre",
- "code",
- "tt",
- "tex",
- "kbd",
- "samp",
- "var",
- "l",
- "blockcode"
- );
-
- public XmlFilter() {}
-
- /**
- * Skip XML tags found within the prose, which hides the elements.
- */
- @Override
- public void accept( final FastCharacterIterator i ) {
- if( i.current() == '<' ) {
- final var openingTag = nextTag( i );
-
- if( UNTOUCHABLE.contains( openingTag.toLowerCase() ) ) {
- String closingTag;
-
- do {
- closingTag = nextTag( i );
- }
- while( !closingTag.endsWith( openingTag ) );
- }
- }
- }
-
- /**
- * Skips to the next greater than or less than symbol.
- *
- * @param i The {@link CharacterIterator} used to scan through the text, one
- * character at a time.
- * @return An opening/closing tag name, or the content within the element.
- */
- private String nextTag( final FastCharacterIterator i ) {
- final var begin = i.index();
-
- i.skip( next -> next != '>' && next != '<' );
-
- // Swallow the trailing greater than symbol.
- i.next();
-
- // Skip to the character following the greater than symbol.
- i.next();
-
- return i.substring( begin + 1, i.index() - 1 );
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/app/KeenQuotes.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.app;
+
+import com.whitemagicsoftware.keenquotes.parser.Contractions;
+import com.whitemagicsoftware.keenquotes.parser.Curler;
+import picocli.CommandLine;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
+import static java.lang.String.format;
+import static java.lang.System.*;
+import static picocli.CommandLine.Help.Ansi.Style.*;
+import static picocli.CommandLine.Help.ColorScheme;
+
+/**
+ * Responsible for replacing straight quotes with equivalent smart quotes (or
+ * straight quotes). This will inform the caller when ambiguous quotes cannot
+ * be reliably resolved.
+ */
+public final class KeenQuotes {
+ private final Settings mSettings = new Settings( this );
+
+ private static ColorScheme createColourScheme() {
+ return new ColorScheme.Builder()
+ .commands( bold )
+ .options( fg_blue, bold )
+ .parameters( fg_blue )
+ .optionParams( italic )
+ .errors( fg_red, bold )
+ .stackTraces( italic )
+ .build();
+ }
+
+ public void run() {
+ final var settings = getSettings();
+ final var contractions = createContractions( settings );
+
+ if( settings.displayList() ) {
+ out.println( contractions.toString() );
+ }
+ else {
+ try {
+ final var c = new Curler( contractions, FILTER_PLAIN );
+ out.print( convert( c ) );
+ } catch( final Exception ex ) {
+ ex.printStackTrace( err );
+ }
+ }
+ }
+
+ private Contractions createContractions( final Settings settings ) {
+ return new Contractions.Builder()
+ .withBeganUnambiguous( settings.getBeganUnambiguous() )
+ .withEndedUnambiguous( settings.getEndedUnambiguous() )
+ .withBeganAmbiguous( settings.getBeganAmbiguous() )
+ .withEndedAmbiguous( settings.getEndedAmbiguous() )
+ .build();
+ }
+
+ private String convert( final Curler curler ) throws IOException {
+ return curler.apply( new String( in.readAllBytes() ) );
+ }
+
+ private Settings getSettings() {
+ return mSettings;
+ }
+
+ /**
+ * Returns the application version number retrieved from the application
+ * properties file. The properties file is generated at build time, which
+ * keys off the repository.
+ *
+ * @return The application version number.
+ * @throws RuntimeException An {@link IOException} occurred.
+ */
+ private static String getVersion() {
+ try {
+ final var properties = loadProperties( "version.properties" );
+ return properties.getProperty( "application.version" );
+ } catch( final Exception ex ) {
+ throw new RuntimeException( ex );
+ }
+ }
+
+ @SuppressWarnings( "SameParameterValue" )
+ private static Properties loadProperties( final String resource )
+ throws IOException {
+ final var properties = new Properties();
+ properties.load( getResourceAsStream( getResourceName( resource ) ) );
+ return properties;
+ }
+
+ private static String getResourceName( final String resource ) {
+ return format( "%s/%s", getPackagePath(), resource );
+ }
+
+ private static String getPackagePath() {
+ return KeenQuotes.class.getPackageName().replace( '.', '/' );
+ }
+
+ private static InputStream getResourceAsStream( final String resource ) {
+ return KeenQuotes.class.getClassLoader().getResourceAsStream( resource );
+ }
+
+ /**
+ * Main application entry point.
+ *
+ * @param args Command-line arguments.
+ */
+ public static void main( final String[] args ) {
+ final var app = new KeenQuotes();
+ final var parser = new CommandLine( app.getSettings() );
+ parser.setColorScheme( createColourScheme() );
+
+ final var exitCode = parser.execute( args );
+ final var parseResult = parser.getParseResult();
+
+ if( parseResult.isUsageHelpRequested() ) {
+ exit( exitCode );
+ }
+ else if( parseResult.isVersionHelpRequested() ) {
+ out.println( getVersion() );
+ exit( exitCode );
+ }
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/app/Settings.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.app;
+
+import picocli.CommandLine;
+
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+
+@CommandLine.Command(
+ name = "KeenQuotes",
+ mixinStandardHelpOptions = true,
+ description = "Converts straight quotes to curly quotes."
+)
+@SuppressWarnings( {"FieldMayBeFinal", "CanBeFinal"} )
+public final class Settings implements Callable<Integer> {
+ /**
+ * Main executable class.
+ */
+ private final KeenQuotes mMain;
+
+ /**
+ * List of unambiguous contractions having leading apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ub", "--unamb-began"},
+ description =
+ "Contraction to treat as unambiguous (e.g., cause, bout)",
+ paramLabel = "word"
+ )
+ private String[] mBeganUnambiguous;
+
+ /**
+ * List of unambiguous contractions having lagging apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ue", "--unamb-ended"},
+ description =
+ "Contraction to treat as unambiguous (e.g., frien, thinkin)",
+ paramLabel = "word"
+ )
+ private String[] mEndedUnambiguous;
+
+ /**
+ * List of ambiguous contractions having leading apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ab", "--amb-began"},
+ description =
+ "Contraction to treat as ambiguous (e.g., sup, kay)",
+ paramLabel = "word"
+ )
+ private String[] mBeganAmbiguous;
+
+ /**
+ * List of ambiguous contractions having lagging apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ae", "--amb-ended"},
+ description =
+ "Contraction to treat as ambiguous (e.g., gi, o)",
+ paramLabel = "word"
+ )
+ private String[] mEndedAmbiguous;
+
+ /**
+ * Display default values.
+ */
+ @CommandLine.Option(
+ names = {"-l", "--list"},
+ description = "List all ambiguous and unambiguous contractions"
+ )
+ private boolean mDisplayList;
+
+ public Settings( final KeenQuotes main ) {
+ assert main != null;
+ mMain = main;
+ }
+
+ /**
+ * Answers whether the contractions listings should be displayed.
+ *
+ * @return {@code true} to list the contractions.
+ */
+ boolean displayList() {
+ return mDisplayList;
+ }
+
+ List<String> getBeganUnambiguous() {
+ return nullSafe( mBeganUnambiguous );
+ }
+
+ List<String> getEndedUnambiguous() {
+ return nullSafe( mEndedUnambiguous );
+ }
+
+ List<String> getBeganAmbiguous() {
+ return nullSafe( mBeganAmbiguous );
+ }
+
+ List<String> getEndedAmbiguous() {
+ return nullSafe( mEndedAmbiguous );
+ }
+
+ private List<String> nullSafe( final String[] words ) {
+ return words == null ? emptyList() : asList( words );
+ }
+
+ /**
+ * Invoked after the command-line arguments are parsed to launch the
+ * application.
+ *
+ * @return Exit level zero.
+ */
+ @Override
+ public Integer call() {
+ mMain.run();
+ return 0;
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/lex/FilterType.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+/**
+ * Denotes what filtering to apply when scanning for quotation marks.
+ */
+public enum FilterType {
+ /**
+ * Curls all quotation marks.
+ */
+ FILTER_PLAIN( filter -> false ),
+
+ /**
+ * Suppresses curling quotation marks within certain XML elements.
+ */
+ FILTER_XML( new XmlFilter() );
+
+ private final LexerFilter mFilter;
+
+ FilterType( final LexerFilter filter ) {
+ mFilter = filter;
+ }
+
+ public LexerFilter filter() {
+ return mFilter;
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexeme.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+import static com.whitemagicsoftware.keenquotes.lex.LexemeType.*;
+
+/**
+ * Responsible for tracking the beginning and ending offsets of a lexeme within
+ * a text string. Tracking the beginning and ending indices should use less
+ * memory than duplicating the entire text of Unicode characters (i.e., using
+ * a similar approach to run-length encoding).
+ */
+public final class Lexeme implements Comparable<Lexeme> {
+ /**
+ * Signifies an invalid index to help distinguish EOT/SOT.
+ */
+ private static final int E_INDEX = -2;
+
+ /**
+ * Denotes there are no more lexemes: the end of text (EOT) has been reached.
+ * The beginning index differentiates between EOT and SOT.
+ */
+ public static final Lexeme EOT = new Lexeme( LexemeType.EOT, -1, E_INDEX );
+
+ /**
+ * Denotes parsing at the start of text (SOT). This is useful to avoid
+ * branching conditions while iterating. The beginning index differentiates
+ * between EOT and SOT.
+ */
+ public static final Lexeme SOT = new Lexeme( LexemeType.SOT, 0, E_INDEX );
+
+ /**
+ * Denotes that a lexeme from the lexer has been obliterated and must not
+ * be considered when parsing tokens.
+ */
+ public static final Lexeme NONE = new Lexeme( LexemeType.NONE, 0, 0 );
+
+ private final LexemeType mType;
+ private final int mBegan;
+ private final int mEnded;
+
+ /**
+ * Create a lexeme that represents a section of the text.
+ *
+ * @param type Type of {@link Lexeme} to create.
+ * @param began Offset into the text where this instance starts (0-based).
+ * @param ended Offset into the text where this instance stops (0-based).
+ */
+ public Lexeme( final LexemeType type, final int began, final int ended ) {
+ assert type != null;
+ assert began >= 0 || ended == E_INDEX;
+ assert began <= ended || ended == E_INDEX;
+
+ mType = type;
+ mBegan = began;
+ mEnded = ended;
+ }
+
+ /**
+ * Answers whether the given {@link LexemeType} is the same as this
+ * instance's internal {@link LexemeType}.
+ *
+ * @param type The {@link LexemeType} to compare.
+ * @return {@code true} if the given {@link LexemeType} is equal to the
+ * internal {@link LexemeType}.
+ */
+ public boolean isType( final LexemeType type ) {
+ assert type != null;
+ return (mType != LexemeType.NONE || type == LexemeType.NONE) &&
+ (mType == type || type == ANY ||
+ (type == ENDING &&
+ (mType == EOL || mType == EOP || mType == LexemeType.EOT)
+ )
+ );
+ }
+
+ /**
+ * Answers whether any of the given {@link LexemeType} matches this
+ * instance's internal {@link LexemeType}.
+ *
+ * @param types The {@link LexemeType}s to compare.
+ * @return {@code true} if the internal {@link LexemeType} matches any one
+ * of the given {@link LexemeType}s.
+ */
+ public boolean isType( final LexemeType... types ) {
+ assert types != null;
+
+ for( final var type : types ) {
+ if( isType( type ) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public int began() {
+ return mBegan;
+ }
+
+ public int ended() {
+ return mEnded;
+ }
+
+ public LexemeType getType() {
+ return mType;
+ }
+
+ /**
+ * Compares the starting offset of the given {@link Lexeme} to the starting
+ * offset of this {@link Lexeme} instance. This allows a list {@link Lexeme}s
+ * to be sorted by order of appearance in the parsed text.
+ */
+ @Override
+ public int compareTo( final Lexeme that ) {
+ assert that != null;
+ return this.mBegan - that.mBegan;
+ }
+
+ /**
+ * Extracts a sequence of characters from the given text at the offsets
+ * captured by this lexeme.
+ *
+ * @param text The text that was parsed using this class.
+ * @return The character string captured by the lexeme.
+ */
+ public String toString( final String text ) {
+ assert text != null;
+ return text.substring( mBegan, mEnded ).toLowerCase();
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName() + '[' +
+ "mType=" + mType +
+ ", mBegan=" + mBegan +
+ ", mEnded=" + mEnded +
+ ']';
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeType.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+/**
+ * Represents the type of {@link Lexeme} parsed by the {@link Lexer}.
+ */
+@SuppressWarnings( "SpellCheckingInspection" )
+public enum LexemeType {
+ QUOTE_SINGLE,
+ QUOTE_SINGLE_OPENING,
+ QUOTE_SINGLE_CLOSING,
+ QUOTE_DOUBLE,
+ QUOTE_DOUBLE_OPENING,
+ QUOTE_DOUBLE_CLOSING,
+ ESC_SINGLE,
+ ESC_DOUBLE,
+ SOT,
+ EOL,
+ EOP,
+ EOT,
+ SPACE,
+ WORD,
+ NUMBER,
+ PUNCT,
+ OPENING_GROUP,
+ CLOSING_GROUP,
+ HYPHEN,
+ DASH,
+ EQUALS,
+ PERIOD,
+ ELLIPSIS,
+ ENDING,
+ ANY,
+ NONE
+}
src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexer.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+import com.whitemagicsoftware.keenquotes.util.FastCharacterIterator;
+
+import java.util.function.Consumer;
+
+import static com.whitemagicsoftware.keenquotes.lex.LexemeType.*;
+import static java.lang.Character.isWhitespace;
+import static java.text.CharacterIterator.DONE;
+
+/**
+ * Turns text into words, numbers, punctuation, spaces, and more.
+ */
+public final class Lexer {
+ /**
+ * Tokenizes a sequence of characters. The order of comparisons is optimized
+ * towards probability of the occurrence of a character in regular English
+ * prose: letters, space, quotation marks, numbers, periods, new lines,
+ * then end of text.
+ *
+ * @param text The sequence of characters to tokenize.
+ * @param emitter Recipient of all tokenized character sequences.
+ * @param filter Tokenization preprocessor, usually empty, but can be used
+ * to skip certain character sequences (such as XML tags).
+ */
+ public static void lex(
+ final String text,
+ final Consumer<Lexeme> emitter,
+ final LexerFilter filter ) {
+ lex( new FastCharacterIterator( text ), emitter, filter );
+ }
+
+ @SuppressWarnings( "StatementWithEmptyBody" )
+ private static void lex(
+ final FastCharacterIterator i,
+ final Consumer<Lexeme> consumer,
+ final LexerFilter filter ) {
+
+ // Ensure at least one lexeme precedes a quotation mark. This is because
+ // the algorithm that determines how to curl a quotation mark relies on
+ // the character to be the second of four lexemes. Four lexemes provides
+ // sufficient context to curl most straight quotes.
+ consumer.accept( Lexeme.SOT );
+
+ while( i.hasNext() ) {
+ // Allow filters to skip character sequences (such as XML tags). This
+ // must allow back-to-back filtering, hence the loop.
+ while( filter.test( i ) );
+
+ final var index = i.index();
+ final var curr = i.current();
+ var token = PUNCT;
+
+ if( isLetter( curr ) ) {
+ // T1000 is one word, not a word and a number.
+ i.skip( next -> isLetter( next ) || isDigit( next ) );
+ token = WORD;
+ }
+ else if( curr == ' ' ) {
+ i.skip( next -> next == ' ' );
+ token = SPACE;
+ }
+ else if( curr == '\r' || curr == '\n' ) {
+ final var cr = new int[]{curr == '\r' ? 1 : 0};
+ final var lf = new int[]{curr == '\n' ? 1 : 0};
+
+ // Swallow all consecutive CR (Mac), CRLF (Windows), and/or LF (Unix).
+ i.skip( next -> {
+ cr[ 0 ] += next == '\r' ? 1 : 0;
+ lf[ 0 ] += next == '\n' ? 1 : 0;
+
+ return next == '\r' || next == '\n';
+ } );
+
+ token = cr[ 0 ] + lf[ 0 ] == 1 || cr[ 0 ] == 1 && lf[ 0 ] == 1
+ ? EOL
+ : EOP;
+ }
+ else if( isWhitespace( curr ) ) {
+ i.skip( Character::isWhitespace );
+ token = SPACE;
+ }
+ else if( isDigit( curr ) || isNumeric( curr ) && isDigit( i.peek() ) ) {
+ // Parse all consecutive number characters to prevent the main loop
+ // from switching back to word tokens.
+ i.skip(
+ next -> isDigit( next ) || isNumeric( next ) && isDigit( i.peek() )
+ );
+ token = NUMBER;
+ }
+ else if( curr == '.' ) {
+ i.skip( next -> next == '.' || next == ' ' && i.peek() == '.' );
+
+ token = i.index() - index == 0 ? PERIOD : ELLIPSIS;
+ }
+ else if( curr == '"' ) {
+ token = QUOTE_DOUBLE;
+ }
+ else if( curr == '\'' ) {
+ token = QUOTE_SINGLE;
+ }
+ else if( curr == '-' && i.peek() != '-' ) {
+ token = HYPHEN;
+ }
+ else if( isDash( curr ) ) {
+ i.skip( Lexer::isDash );
+ token = DASH;
+ }
+ else if( curr == '(' || curr == '{' || curr == '[' ) {
+ token = OPENING_GROUP;
+ }
+ else if( curr == ')' || curr == '}' || curr == ']' ) {
+ token = CLOSING_GROUP;
+ }
+ else if( curr == '“' ) {
+ token = QUOTE_DOUBLE_OPENING;
+ }
+ else if( curr == '”' ) {
+ token = QUOTE_DOUBLE_CLOSING;
+ }
+ else if( curr == '‘' ) {
+ token = QUOTE_SINGLE_OPENING;
+ }
+ else if( curr == '’' ) {
+ token = QUOTE_SINGLE_CLOSING;
+ }
+ else if( curr == '\\' ) {
+ i.next();
+ final var next = i.current();
+
+ if( next == '\'' ) {
+ token = ESC_SINGLE;
+ }
+ else if( next == '\"' ) {
+ token = ESC_DOUBLE;
+ }
+ else {
+ // Push back the escaped character, which wasn't a straight quote.
+ i.prev();
+ }
+ }
+ else if( curr == '=' ) {
+ token = EQUALS;
+ }
+ else if( curr == DONE ) {
+ continue;
+ }
+
+ assert index >= 0;
+ assert curr != DONE;
+
+ consumer.accept( new Lexeme( token, index, i.index() + 1 ) );
+ i.next();
+ }
+
+ // Simulate an end of line and end of paragraph before the end of text.
+ // This allows the parser to match against lexemes at the end of
+ // the string to curl (without having to introduce more conditions).
+ consumer.accept( new Lexeme( EOL, i.index(), i.index() ) );
+ consumer.accept( new Lexeme( EOP, i.index(), i.index() ) );
+ consumer.accept( Lexeme.EOT );
+ }
+
+ /**
+ * Answers whether the given character can be considered part of a word
+ * or not. This will include {@code _} and {@code *} because plain text
+ * formats often use those characters to emphasize a word.
+ *
+ * @param curr The character to check as being part of a word.
+ * @return {@code true} if the given character is a letter or a formatting
+ * indicator.
+ */
+ private static boolean isLetter( final char curr ) {
+ return Character.isLetter( curr ) || curr == '_' || curr == '*';
+ }
+
+ private static boolean isDigit( final char curr ) {
+ return Character.isDigit( curr ) ||
+ "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞".indexOf( curr ) > -1;
+ }
+
+ /**
+ * Answers whether the given character may be part of an en- or em-dash.
+ * This must be called after it is known that the character isn't a lone
+ * hyphen.
+ *
+ * @param curr The character to check as being a dash.
+ * @return {@code true} if the given character is part of a dash.
+ */
+ private static boolean isDash( final char curr ) {
+ return curr == '-' || curr == '–' || curr == '—' || curr == '―';
+ }
+
+ /**
+ * Answers whether the given character can be considered part of a number
+ * or not. This does not include digits, which are checked independently
+ * of this method.
+ *
+ * @param curr The character to check as being related to a number.
+ * @return {@code true} if the given character can be considered part of
+ * a number (e.g., -2,000.2^2 is considered a single number).
+ */
+ private static boolean isNumeric( final char curr ) {
+ return
+ curr == '.' || curr == ',' || curr == '-' || curr == '+' ||
+ curr == '^' || curr == '⅟' || curr == '⁄';
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/lex/LexerFilter.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+import com.whitemagicsoftware.keenquotes.util.FastCharacterIterator;
+
+import java.util.function.Predicate;
+
+/**
+ * Convenience interface definition to avoid duplication.
+ */
+public interface LexerFilter extends Predicate<FastCharacterIterator> {
+}
src/main/java/com/whitemagicsoftware/keenquotes/lex/XmlFilter.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+import com.whitemagicsoftware.keenquotes.util.FastCharacterIterator;
+
+import java.text.CharacterIterator;
+import java.util.Set;
+
+/**
+ * Responsible for lexing text while ignoring XML elements. The document must
+ * be both sane and well-formed. This is not intended to lex documents in the
+ * wild where the user has injected custom HTML. The lexer will fail if the
+ * angle brackets are not balanced. Additionally, any less than or greater than
+ * symbols must be encoded as {@code &lt;} or {@code &gt;}, respectively.
+ */
+public final class XmlFilter implements LexerFilter {
+
+ /**
+ * Elements that indicate preformatted text, intentional straight quotes.
+ */
+ private final Set<String> UNTOUCHABLE = Set.of(
+ "pre",
+ "code",
+ "tt",
+ "tex",
+ "kbd",
+ "samp",
+ "var",
+ "l",
+ "blockcode"
+ );
+
+ public XmlFilter() {}
+
+ /**
+ * Skip XML tags found within the prose, which hides the elements.
+ */
+ @Override
+ public boolean test( final FastCharacterIterator i ) {
+ final var match = i.current() == '<';
+
+ if( match ) {
+ final var openingTag = nextTag( i );
+
+ if( UNTOUCHABLE.contains( openingTag.toLowerCase() ) ) {
+ String closingTag;
+
+ do {
+ closingTag = nextTag( i );
+ }
+ while( !closingTag.endsWith( openingTag ) );
+ }
+ }
+
+ return match;
+ }
+
+ /**
+ * Skips to the next greater than or less than symbol.
+ *
+ * @param i The {@link CharacterIterator} used to scan through the text, one
+ * character at a time.
+ * @return An opening/closing tag name, or the content within the element.
+ */
+ private String nextTag( final FastCharacterIterator i ) {
+ final var begin = i.index();
+
+ i.skip( next -> next != '>' && next != '<' );
+
+ // Swallow the trailing greater than symbol.
+ i.next();
+
+ // Skip to the character following the greater than symbol.
+ i.next();
+
+ return i.substring( begin + 1, i.index() - 1 );
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolver.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import com.whitemagicsoftware.keenquotes.lex.LexerFilter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.function.Consumer;
+
+import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
+
+/**
+ * Responsible for resolving ambiguous quotes that could not be resolved during
+ * a first-pass parse of a document. Creates an ordered sequence of tokens that
+ * can be used to resolve certain ambiguities due to the fact that nested
+ * quotations must alternate between double and single quotes. This means that
+ * a string such as, "Is Iris' name Greek?" can have its single quote resolved
+ * (because the apostrophe is alone within double quotes).
+ */
+public final class AmbiguityResolver implements Consumer<Token> {
+
+ private final Consumer<Token> mConsumer;
+ private Tree<Token> mTree = new Tree<>();
+
+ private AmbiguityResolver( final Consumer<Token> consumer ) {
+ assert consumer != null;
+ mConsumer = consumer;
+ }
+
+ /**
+ * Entry point into the straight quote disambiguation algorithm.
+ *
+ * @param text Document to curl.
+ * @param contractions Set of known contractions (ambiguous and otherwise).
+ * @param consumer Recipient of quotation marks to curl.
+ */
+ public static void resolve(
+ final String text,
+ final Contractions contractions,
+ final Consumer<Token> consumer,
+ final LexerFilter filter ) {
+ final var resolver = new AmbiguityResolver( consumer );
+
+ QuoteEmitter.analyze( text, contractions, resolver, filter );
+ resolver.resolve();
+ }
+
+ /**
+ * Accepts opening, closing, ambiguous, and apostrophe {@link Token} types.
+ *
+ * @param token The {@link Token} to be resolved.
+ */
+ @Override
+ public void accept( final Token token ) {
+ // Create a new subtree when an opening quotation mark is found.
+ if( token.isType( QUOTE_OPENING_SINGLE ) ||
+ token.isType( QUOTE_OPENING_DOUBLE ) ) {
+ mTree = mTree.opening( token );
+ }
+ // Close the subtree if it was open, try to close it.
+ else if( token.isType( QUOTE_CLOSING_SINGLE ) ||
+ token.isType( QUOTE_CLOSING_DOUBLE ) ) {
+ mTree = mTree.closing( token );
+ }
+ // Add ambiguous tokens to be resolved; add apostrophes for later emitting.
+ else {
+ mTree.add( token );
+ }
+ }
+
+ /**
+ * Traverse the tree and resolve as many ambiguous tokens as possible. This
+ * is called after the document's AST is built.
+ */
+ private void resolve() {
+ final var tokens = new ArrayList<Token>();
+
+ // Opening and closing quotes aren't necessarily balanced, meaning the tree
+ // could be dangling anywhere below the root. We need to traverse the whole
+ // structure, every token from the top-down, when visiting breadth-first.
+ mTree = mTree.root();
+
+ // Replace the tree's tokens in situ with their deduced quotation mark.
+ mTree.visit( this::disambiguate );
+
+ // The tokens are not necessarily replaced or constructed in order.
+ mTree.visit( tree -> tree.iterateTokens( tokens::add ) );
+
+ Collections.sort( tokens );
+
+ // Relay the tokens, in order, for updating the parsed document.
+ tokens.forEach( mConsumer );
+ }
+
+ /**
+ * The workhorse logic, which replaces ambiguous quotation marks with
+ * resolvable equivalents. Any unresolved quotation marks are left in the
+ * data structure, marked as an ambiguous form.
+ *
+ * @param tree The {@link Tree} that may contain ambiguous tokens to resolve.
+ */
+ private void disambiguate( final Tree<Token> tree ) {
+ final var countLeading = tree.count( QUOTE_AMBIGUOUS_LEADING );
+ final var countLagging = tree.count( QUOTE_AMBIGUOUS_LAGGING );
+ final var countUnknown = tree.count( AMBIGUOUS );
+
+ if( tree.hasOpeningSingleQuote() && !tree.hasClosingSingleQuote() ) {
+ if( countUnknown == 0 && countLeading == 0 && countLagging == 1 ) {
+ tree.replaceAll( QUOTE_AMBIGUOUS_LAGGING, QUOTE_CLOSING_SINGLE );
+ }
+ else if( countUnknown == 1 && countLagging == 0 ) {
+ tree.replaceAll( AMBIGUOUS, QUOTE_CLOSING_SINGLE );
+ }
+ }
+
+ if( countUnknown == 0 && countLeading == 1 && countLagging == 0 &&
+ !tree.hasOpeningSingleQuote() && tree.hasClosingSingleQuote() ) {
+ tree.replaceAll( QUOTE_AMBIGUOUS_LEADING, QUOTE_OPENING_SINGLE );
+ }
+
+ if( !tree.hasOpeningSingleQuote() && !tree.hasClosingSingleQuote() ||
+ tree.isBalanced() ) {
+ if( countUnknown == 0 && countLeading > 0 && countLagging == 0 ) {
+ tree.replaceAll( QUOTE_AMBIGUOUS_LEADING, QUOTE_APOSTROPHE );
+ }
+
+ if( countUnknown == 0 && countLeading == 0 && countLagging > 0 ) {
+ tree.replaceAll( QUOTE_AMBIGUOUS_LAGGING, QUOTE_APOSTROPHE );
+ }
+ }
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Contractions.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static java.lang.String.format;
+import static java.util.Collections.emptySet;
+import static java.util.Collections.sort;
+
+/**
+ * Placeholder for various types of contractions.
+ */
+@SuppressWarnings( "SpellCheckingInspection" )
+public class Contractions {
+
+ private final Builder mBuilder;
+
+ private Contractions( final Builder builder ) {
+ assert builder != null;
+ mBuilder = builder;
+ }
+
+ /**
+ * Allows constructing a list of custom contractions.
+ */
+ @SuppressWarnings( "unused" )
+ public static class Builder {
+ private final Set<String> mBeganEndedUnambiguous = new HashSet<>();
+ private final Set<String> mBeganUnambiguous = new HashSet<>();
+ private final Set<String> mEndedUnambiguous = new HashSet<>();
+ private final Set<String> mBeganAmbiguous = new HashSet<>();
+ private final Set<String> mEndedAmbiguous = new HashSet<>();
+
+ public Builder withBeganEndedUnambiguous( final List<String> words ) {
+ mBeganEndedUnambiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withBeganUnambiguous( final List<String> words ) {
+ mBeganUnambiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withEndedUnambiguous( final List<String> words ) {
+ mEndedUnambiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withBeganAmbiguous( final List<String> words ) {
+ mBeganAmbiguous.addAll( words );
+ return this;
+ }
+
+ public Builder withEndedAmbiguous( final List<String> words ) {
+ mEndedAmbiguous.addAll( words );
+ return this;
+ }
+
+ /**
+ * Constructs a new set of {@link Contractions} that can be configured
+ * using this {@link Builder} instance.
+ *
+ * @return {@link Contractions} suitable for use with parsing text.
+ */
+ public Contractions build() {
+ mBeganEndedUnambiguous.addAll( BEGAN_ENDED_UNAMBIGUOUS );
+ mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS );
+ mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS );
+ mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS );
+ mEndedAmbiguous.addAll( ENDED_AMBIGUOUS );
+
+ // Remove ambiguous items if they are already declared.
+ mBeganAmbiguous.removeAll( mBeganUnambiguous );
+ mEndedAmbiguous.removeAll( mEndedUnambiguous );
+
+ return new Contractions( this );
+ }
+
+ /**
+ * This returns the {@code fallback} {@link Set} if {@code src} is empty;
+ * otherwise, this returns the empty {@link Set}.
+ *
+ * @param src A set of contractions, possibly empty.
+ * @param fallback The default values to use if {@code src} is empty.
+ * @param <T> The type of data used by both {@link Set}s.
+ * @return An empty {@link Set} if the {@code src} contains at least one
+ * element; otherwise, this will return {@code fallback}.
+ */
+ private static <T> Set<T> from( final Set<T> src, final Set<T> fallback ) {
+ assert src != null;
+ assert fallback != null;
+ return src.isEmpty() ? fallback : emptySet();
+ }
+ }
+
+ public boolean beganEndedUmambiguously( final String word ) {
+ assert word != null;
+ return getBeganEndedUnambiguous().contains( word );
+ }
+
+ /**
+ * Answers whether the given word is a contraction that always starts
+ * with an apostrophe. The comparison is case-insensitive. This must
+ * only be called when a straight quote is followed by a word.
+ *
+ * @param word The word to compare against the list of known unambiguous
+ * contractions.
+ * @return {@code true} when the given word is in the set of unambiguous
+ * contractions.
+ */
+ public boolean beganUnambiguously( final String word ) {
+ assert word != null;
+ return getBeganUnambiguous().contains( word.toLowerCase() );
+ }
+
+ /**
+ * Answers whether the given word could be a contraction but is also a
+ * valid word in non-contracted form.
+ *
+ * @param word The word to compare against the list of known ambiguous
+ * contractions.
+ * @return {@code true} when the given word is in the set of ambiguous
+ * contractions.
+ */
+ public boolean beganAmbiguously( final String word ) {
+ assert word != null;
+ return getBeganAmbiguous().contains( word.toLowerCase() );
+ }
+
+ public boolean endedUnambiguously( final String word ) {
+ assert word != null;
+ return getEndedUnambiguous().contains( word.toLowerCase() );
+ }
+
+ public boolean endedAmbiguously( final String word ) {
+ assert word != null;
+ final var check = word.toLowerCase();
+
+ // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
+ // allow o' to match because 'a sentence can end with the letter o'.
+ return getEndedAmbiguous().contains( check ) ||
+ check.endsWith( "s" ) || check.endsWith( "z" ) ||
+ check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
+ }
+
+ private Set<String> getBeganEndedUnambiguous() {
+ return mBuilder.mBeganEndedUnambiguous;
+ }
+
+ private Set<String> getBeganUnambiguous() {
+ return mBuilder.mBeganUnambiguous;
+ }
+
+ private Set<String> getEndedUnambiguous() {
+ return mBuilder.mEndedUnambiguous;
+ }
+
+ private Set<String> getBeganAmbiguous() {
+ return mBuilder.mBeganAmbiguous;
+ }
+
+ private Set<String> getEndedAmbiguous() {
+ return mBuilder.mEndedAmbiguous;
+ }
+
+ @Override
+ public String toString() {
+ return
+ toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
+ toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ) +
+ toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
+ toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" );
+ }
+
+ private String toString(
+ final Set<String> words, final String category, final String fmt ) {
+ final var sb = new StringBuilder( 16384 );
+ final var newline = System.lineSeparator();
+ final var list = new ArrayList<>( words );
+
+ sort( list );
+ sb.append( format( "%n%s%n", category ) );
+ list.forEach( ( s ) -> sb.append( format( fmt, s ) ).append( newline ) );
+
+ return sb.toString();
+ }
+
+ /**
+ * Words having a straight apostrophe at the beginning and end.
+ */
+ private static final Set<String> BEGAN_ENDED_UNAMBIGUOUS = Set.of(
+ "ackin",
+ "ammerin",
+ // hankering
+ "ankerin",
+ // having
+ "avin",
+ "awkin",
+ "cepin",
+ "ceppin",
+ "ceptin",
+ "cordin",
+ "eadin",
+ "eavin",
+ "elpin",
+ "inderin",
+ "lectioneerin",
+ "mazin",
+ "memberin",
+ // fish 'n' chips
+ "n",
+ "obblin",
+ "oldin",
+ "ollerin",
+ "oppin",
+ "ousekeepin",
+ "owlin",
+ "sceptin",
+ "spectin",
+ "splainin",
+ "sposin",
+ "sputin",
+ "stonishin",
+ "stroyin",
+ "suadin",
+ "titivatin",
+ "troducin",
+ "uggin",
+ "ulkin",
+ "umbuggin",
+ "umiliatin",
+ "ummin",
+ "umpin",
+ "urryin",
+ "urtin",
+ "ustlin",
+ "vestigatin",
+ "vitin",
+ "xceptin",
+ "xplainin",
+ "xplodin"
+ );
+
+ /**
+ * Words having a straight apostrophe that cannot be mistaken for an
+ * opening single quote.
+ */
+ private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
+ "aporth",
+ // about you
+ "boutcha",
+ // about you
+ "boutchu",
+ // about well
+ "boutwell",
+ // except
+ "cept",
+ // decided
+ "cided",
+ // because
+ "cos",
+ // armadillo
+ "dillo",
+ // them
+ "em",
+ // afraid
+ "fraid",
+ // against
+ "gainst",
+ // him
+ "im",
+ // and
+ "n",
+ // beneath
+ "neath",
+ // another
+ "nother",
+ // enough
+ "nuff",
+ // gonna
+ "onna",
+ "onna'",
+ // horse
+ "oss",
+ // horses
+ "osses",
+ // upon
+ "pon",
+ // that's|is
+ "s",
+ "sblood",
+ // excuse
+ "scuse",
+ "sfar",
+ "sfoot",
+ // considered
+ "sidered",
+ // exploit
+ "sploit",
+ // exploits
+ "sploits",
+ // it
+ "t",
+ "taint",
+ "tain",
+ "til",
+ "tis",
+ "tisn",
+ "tshall",
+ "twas",
+ "twasn",
+ "tween",
+ "twere",
+ "tweren",
+ "twixt",
+ "twon",
+ "twou",
+ "twould",
+ "twouldn",
+ // have
+ "ve",
+ // exactly
+ "xactly"
+ );
+
+ /**
+ * Words having a straight apostrophe that may be either part of a
+ * contraction or a word that stands alone beside an opening single quote.
+ */
+ private static final Set<String> BEGAN_AMBIGUOUS = Set.of(
+ // have
+ "a",
+ // about|boxing match
+ "bout",
+ // because|causal
+ "cause",
+ // what you|choo choo train
+ "choo",
+ // he|e pluribus unum
+ "e",
+ // here|earlier
+ "ere",
+ // afro|to and fro
+ "fro",
+ // whore|ho ho!
+ "ho",
+ // okay|letter K
+ "kay",
+ // lo|lo and behold
+ "lo",
+ // leaned|enlisted
+ "listed",
+ // ???
+ "nation",
+ // are|regarding
+ "re",
+ // circular|around
+ "round",
+ // what's up|to eat
+ "sup",
+ // it will|twill fabric
+ "twill",
+ // them|utterance
+ "um",
+ // is that|Iranian village
+ "zat"
+ );
+
+ private static final Set<String> ENDED_AMBIGUOUS = Set.of(
+ // pin|head dunk
+ "bobbin",
+ // give|martial arts garment
+ "gi",
+ // in|I
+ "i",
+ // of|letter o
+ "o"
+ );
+
+ private static final Set<String> ENDED_UNAMBIGUOUS = Set.of(
+ // and
+ "an",
+ // didn't
+ "didn",
+ // for/before
+ "fo",
+ // friend
+ "frien",
+ // just
+ "jes",
+ // just
+ "jus",
+ // lord
+ "lor",
+ // myself
+ "masel",
+ // old
+ "ol",
+ // San (Francisco)
+ "Sa",
+ // shift
+ "shif",
+ // the
+ "th",
+ // what
+ "wha",
+ // with
+ "wi",
+ // world
+ "worl",
+ // Top ~3800 common -ing words as English contractions.
+ "abandonin",
+ "abductin",
+ "abettin",
+ "abidin",
+ "abolishin",
+ "aboundin",
+ "absorbin",
+ "abstainin",
+ "abstractin",
+ "abusin",
+ "abuttin",
+ "acceleratin",
+ "acceptin",
+ "accessin",
+ "accommodatin",
+ "accompanyin",
+ "accomplishin",
+ "accordin",
+ "accountin",
+ "accruin",
+ "accumulatin",
+ "accusin",
+ "achievin",
+ "achin",
+ "acknowledgin",
+ "acquaintin",
+ "acquirin",
+ "actin",
+ "activatin",
+ "adaptin",
+ "addin",
+ "addressin",
+ "adherin",
+ "adjoinin",
+ "adjustin",
+ "administerin",
+ "admirin",
+ "admittin",
+ "admonishin",
+ "adoin",
+ "adoptin",
+ "adorin",
+ "advancin",
+ "advertisin",
+ "advisin",
+ "advocatin",
+ "affectin",
+ "affirmin",
+ "afflictin",
+ "affordin",
+ "afishin",
+ "aggravatin",
+ "agitatin",
+ "agoin",
+ "agonizin",
+ "agreein",
+ "ailin",
+ "aimin",
+ "ain",
+ "airin",
+ "alarmin",
+ "alayin",
+ "alertin",
+ "alienatin",
+ "alightin",
+ "alignin",
+ "allegin",
+ "alleviatin",
+ "allocatin",
+ "allowin",
+ "alludin",
+ "allurin",
+ "alookin",
+ "alterin",
+ "alternatin",
+ "alyin",
+ "amassin",
+ "amazin",
+ "amblin",
+ "amendin",
+ "amountin",
+ "amplifyin",
+ "amusin",
+ "analysin",
+ "analyzin",
+ "anchorin",
+ "anglin",
+ "animatin",
+ "annealin",
+ "annihilatin",
+ "announcin",
+ "annoyin",
+ "anointin",
+ "answerin",
+ "anticipatin",
+ "anythin",
+ "apologizin",
+ "appallin",
+ "appealin",
+ "appearin",
+ "appertainin",
+ "appetizin",
+ "applaudin",
+ "applyin",
+ "appointin",
+ "appraisin",
+ "appreciatin",
+ "apprehendin",
+ "approachin",
+ "appropriatin",
+ "approvin",
+ "approximatin",
+ "archin",
+ "archivin",
+ "arguin",
+ "arisin",
+ "arousin",
+ "arrangin",
+ "arrestin",
+ "arrivin",
+ "articulatin",
+ "ascendin",
+ "ascertainin",
+ "ascribin",
+ "askin",
+ "assailin",
+ "assaultin",
+ "assemblin",
+ "assertin",
+ "assessin",
+ "assignin",
+ "assimilatin",
+ "assistin",
+ "associatin",
+ "assumin",
+ "assurin",
+ "astonishin",
+ "astoundin",
+ "atonin",
+ "attachin",
+ "attackin",
+ "attainin",
+ "attemptin",
+ "attendin",
+ "attestin",
+ "attractin",
+ "attributin",
+ "auditin",
+ "augmentin",
+ "authorin",
+ "authorisin",
+ "authorizin",
+ "availin",
+ "avengin",
+ "averagin",
+ "avertin",
+ "avoidin",
+ "awaitin",
+ "awakenin",
+ "awakin",
+ "awardin",
+ "awin",
+ "awnin",
+ "awonderin",
+ "axin",
+ "babblin",
+ "babysittin",
+ "backin",
+ "backslidin",
+ "backtrackin",
+ "badgerin",
+ "bafflin",
+ "baggin",
+ "bailin",
+ "baitin",
+ "bakin",
+ "balancin",
+ "baldin",
+ "balloonin",
+ "ballotin",
+ "bandagin",
+ "bandin",
+ "bangin",
+ "banishin",
+ "bankin",
+ "bannin",
+ "banquetin",
+ "banterin",
+ "baptizin",
+ "bargainin",
+ "barin",
+ "barkin",
+ "barrin",
+ "barterin",
+ "bashin",
+ "baskin",
+ "bastin",
+ "bathin",
+ "batterin",
+ "battin",
+ "battlin",
+ "bawlin",
+ "bayin",
+ "beadin",
+ "beamin",
+ "bearin",
+ "beatin",
+ "beautifyin",
+ "beckonin",
+ "becomin",
+ "beddin",
+ "beefin",
+ "beepin",
+ "befittin",
+ "befriendin",
+ "begettin",
+ "beggin",
+ "beginnin",
+ "beguilin",
+ "behavin",
+ "beheadin",
+ "beholdin",
+ "beijin",
+ "bein",
+ "belayin",
+ "belchin",
+ "believin",
+ "belittlin",
+ "bellerin",
+ "bellowin",
+ "belongin",
+ "beltin",
+ "bemoanin",
+ "benchmarkin",
+ "bendin",
+ "benefitin",
+ "beratin",
+ "beseechin",
+ "besettin",
+ "besiegin",
+ "bestowin",
+ "bestsellin",
+ "betrayin",
+ "betterin",
+ "bettin",
+ "bewailin",
+ "bewilderin",
+ "bewitchin",
+ "biasin",
+ "bickerin",
+ "bicyclin",
+ "biddin",
+ "bidin",
+ "bikin",
+ "billin",
+ "billowin",
+ "bindin",
+ "birlin",
+ "birthin",
+ "bisectin",
+ "bitin",
+ "blabbin",
+ "blackenin",
+ "blackin",
+ "blackmailin",
+ "blamin",
+ "blanchin",
+ "blarin",
+ "blasphemin",
+ "blastin",
+ "blatherin",
+ "blazin",
+ "bleachin",
+ "bleatin",
+ "bleedin",
+ "blendin",
+ "blessin",
+ "bletherin",
+ "blightin",
+ "blin",
+ "blindin",
+ "blinkin",
+ "blisterin",
+ "bloatin",
+ "blockadin",
+ "blockin",
+ "bloggin",
+ "bloodlettin",
+ "bloomin",
+ "blottin",
+ "blowin",
+ "blubberin",
+ "bluffin",
+ "bluin",
+ "blunderin",
+ "blurrin",
+ "blushin",
+ "blusterin",
+ "boardin",
+ "boastin",
+ "boatin",
+ "bodybuildin",
+ "bogglin",
+ "boilin",
+ "bolsterin",
+ "boltin",
+ "bombardin",
+ "bombin",
+ "bondin",
+ "booin",
+ "bookin",
+ "bookkeepin",
+ "boomin",
+ "boostin",
+ "bootin",
+ "bootstrappin",
+ "boozin",
+ "borderin",
+ "borin",
+ "borrowin",
+ "bossin",
+ "botherin",
+ "bottlin",
+ "bouncin",
+ "boundin",
+ "bowin",
+ "bowlin",
+ "bowrin",
+ "bowstrin",
+ "boxin",
+ "boycottin",
+ "bracin",
+ "bracketin",
+ "braggin",
+ "braidin",
+ "brainstormin",
+ "brainwashin",
+ "brakin",
+ "branchin",
+ "brandin",
+ "brandishin",
+ "bravin",
+ "brawlin",
+ "brayin",
+ "breachin",
+ "breakfastin",
+ "breakin",
+ "breastfeedin",
+ "breathin",
+ "breathtakin",
+ "breedin",
+ "breezin",
+ "brewin",
+ "bribin",
+ "bridgin",
+ "briefin",
+ "brightenin",
+ "brimmin",
+ "bringin",
+ "brinin",
+ "bristlin",
+ "broadcastin",
+ "broadenin",
+ "broilin",
+ "brokerin",
+ "broodin",
+ "brownin",
+ "browsin",
+ "bruisin",
+ "brushin",
+ "bubblin",
+ "buckin",
+ "bucklin",
+ "buddin",
+ "budgetin",
+ "bufferin",
+ "buffetin",
+ "buggin",
+ "buildin",
+ "bulgin",
+ "bullyin",
+ "bumblin",
+ "bummin",
+ "bumpin",
+ "bunchin",
+ "bundlin",
+ "bunglin",
+ "buntin",
+ "burdenin",
+ "burgeonin",
+ "burnin",
+ "burrowin",
+ "burstin",
+ "buryin",
+ "bushwhackin",
+ "busin",
+ "bustin",
+ "bustlin",
+ "busyin",
+ "butcherin",
+ "buttin",
+ "buttonin",
+ "buyin",
+ "buzzin",
+ "bypassin",
+ "cachin",
+ "cacklin",
+ "cadgin",
+ "calculatin",
+ "callin",
+ "calmin",
+ "campaignin",
+ "campin",
+ "cancelin",
+ "cannin",
+ "canterin",
+ "cantin",
+ "canvassin",
+ "caperin",
+ "cappin",
+ "captivatin",
+ "capturin",
+ "cardin",
+ "careerin",
+ "caregivin",
+ "caressin",
+ "carin",
+ "carousin",
+ "carpetin",
+ "carryin",
+ "cartin",
+ "carvin",
+ "cascadin",
+ "cashin",
+ "casin",
+ "castin",
+ "catalogin",
+ "catchin",
+ "categorizin",
+ "caterin",
+ "causin",
+ "cavin",
+ "cavortin",
+ "ceasin",
+ "ceilin",
+ "celebratin",
+ "cementin",
+ "centerin",
+ "certifyin",
+ "chaffin",
+ "chafin",
+ "challengin",
+ "champin",
+ "championin",
+ "changin",
+ "channelin",
+ "chantin",
+ "chappin",
+ "characterizin",
+ "chargin",
+ "charmin",
+ "chartin",
+ "chasin",
+ "chatterin",
+ "chattin",
+ "cheatin",
+ "checkin",
+ "cheerin",
+ "cherishin",
+ "chewin",
+ "childbearin",
+ "chillin",
+ "chinkin",
+ "chinnin",
+ "chippin",
+ "chirpin",
+ "chitterin",
+ "chokin",
+ "choosin",
+ "choppin",
+ "christenin",
+ "chuckin",
+ "chucklin",
+ "chunkin",
+ "churnin",
+ "cinchin",
+ "cipherin",
+ "circlin",
+ "circulatin",
+ "circumventin",
+ "citin",
+ "civilizin",
+ "clackin",
+ "claddin",
+ "claimin",
+ "clamberin",
+ "clammin",
+ "clamorin",
+ "clampin",
+ "clangin",
+ "clankin",
+ "clappin",
+ "clarifyin",
+ "clashin",
+ "claspin",
+ "classifyin",
+ "clatterin",
+ "claverin",
+ "clawin",
+ "cleanin",
+ "cleansin",
+ "clearin",
+ "cleavin",
+ "clenchin",
+ "clerkin",
+ "clickin",
+ "climbin",
+ "clin",
+ "clinchin",
+ "clingin",
+ "clinkin",
+ "clippin",
+ "clockin",
+ "cloggin",
+ "clonin",
+ "closin",
+ "clothin",
+ "clottin",
+ "clubbin",
+ "cluckin",
+ "clusterin",
+ "clutchin",
+ "clutterin",
+ "coachin",
+ "coastin",
+ "coatin",
+ "coaxin",
+ "cobblin",
+ "cockin",
+ "coddlin",
+ "codin",
+ "coexistin",
+ "coilin",
+ "coincidin",
+ "coinin",
+ "collaboratin",
+ "collapsin",
+ "collectin",
+ "collidin",
+ "colonisin",
+ "colonizin",
+ "colorin",
+ "colourin",
+ "combatin",
+ "combin",
+ "combinin",
+ "comfortin",
+ "comin",
+ "commandin",
+ "commemoratin",
+ "commencin",
+ "commendin",
+ "commentin",
+ "commissionin",
+ "committin",
+ "communicatin",
+ "commutin",
+ "comparin",
+ "compellin",
+ "compensatin",
+ "competin",
+ "compilin",
+ "complainin",
+ "completin",
+ "complicatin",
+ "complyin",
+ "composin",
+ "compostin",
+ "compoundin",
+ "comprehendin",
+ "compressin",
+ "comprisin",
+ "compromisin",
+ "computin",
+ "concealin",
+ "concedin",
+ "conceivin",
+ "concentratin",
+ "conceptualizin",
+ "concernin",
+ "concludin",
+ "concurrin",
+ "condemnin",
+ "condensin",
+ "condescendin",
+ "conditionin",
+ "conductin",
+ "conferencin",
+ "conferrin",
+ "confessin",
+ "confidin",
+ "configurin",
+ "confinin",
+ "confirmin",
+ "conflictin",
+ "conformin",
+ "confoundin",
+ "confrontin",
+ "confusin",
+ "congratulatin",
+ "conjurin",
+ "connectin",
+ "connin",
+ "conquerin",
+ "consentin",
+ "conservin",
+ "considerin",
+ "consistin",
+ "consolidatin",
+ "consolin",
+ "consortin",
+ "conspirin",
+ "constitutin",
+ "constrainin",
+ "constructin",
+ "construin",
+ "consultin",
+ "consumin",
+ "contactin",
+ "containin",
+ "contaminatin",
+ "contemplatin",
+ "contendin",
+ "contestin",
+ "continuin",
+ "contractin",
+ "contradictin",
+ "contrastin",
+ "contributin",
+ "contrivin",
+ "controllin",
+ "convenin",
+ "convergin",
+ "conversin",
+ "convertin",
+ "conveyin",
+ "convincin",
+ "cooin",
+ "cookin",
+ "coolin",
+ "cooperatin",
+ "coordinatin",
+ "copin",
+ "coppin",
+ "copyin",
+ "corkin",
+ "cornin",
+ "correctin",
+ "correlatin",
+ "correspondin",
+ "corruptin",
+ "costin",
+ "coughin",
+ "counselin",
+ "counsellin",
+ "counteractin",
+ "counterfeitin",
+ "counterin",
+ "countervailin",
+ "countin",
+ "couplin",
+ "coursin",
+ "courtin",
+ "coverin",
+ "cowerin",
+ "crabbin",
+ "crackin",
+ "cracklin",
+ "cradlin",
+ "craftin",
+ "crammin",
+ "crampin",
+ "cranin",
+ "crapin",
+ "crashin",
+ "cravin",
+ "crawlin",
+ "creakin",
+ "creatin",
+ "creditin",
+ "creepin",
+ "cringin",
+ "crinklin",
+ "cripplin",
+ "criticizin",
+ "critiquin",
+ "croakin",
+ "croonin",
+ "croppin",
+ "crossin",
+ "crouchin",
+ "crowdin",
+ "crowin",
+ "crownin",
+ "cruisin",
+ "crumblin",
+ "crunchin",
+ "crusadin",
+ "crushin",
+ "cryin",
+ "cuddlin",
+ "cuffin",
+ "culminatin",
+ "cultivatin",
+ "cummin",
+ "cunnin",
+ "cuppin",
+ "curbin",
+ "curin",
+ "curlin",
+ "cursin",
+ "curtailin",
+ "curvin",
+ "cushin",
+ "cussin",
+ "customizin",
+ "cuttin",
+ "cyclin",
+ "dabbin",
+ "dabblin",
+ "damagin",
+ "damnin",
+ "dampenin",
+ "dampin",
+ "dancin",
+ "danglin",
+ "darin",
+ "darkenin",
+ "darlin",
+ "darnin",
+ "dartin",
+ "dashin",
+ "datin",
+ "dauntin",
+ "dawdlin",
+ "dawnin",
+ "daydreamin",
+ "dazzlin",
+ "deadenin",
+ "deafenin",
+ "dealin",
+ "debasin",
+ "debatin",
+ "debilitatin",
+ "debriefin",
+ "debuggin",
+ "decayin",
+ "deceivin",
+ "decidin",
+ "decipherin",
+ "deckin",
+ "declaimin",
+ "declarin",
+ "declinin",
+ "decodin",
+ "decommissionin",
+ "decomposin",
+ "deconstructin",
+ "decoratin",
+ "decouplin",
+ "decreasin",
+ "dedicatin",
+ "deducin",
+ "deductin",
+ "deemin",
+ "deepenin",
+ "defaultin",
+ "defeatin",
+ "defendin",
+ "deferrin",
+ "defilin",
+ "definin",
+ "deflectin",
+ "deformin",
+ "defraudin",
+ "defrayin",
+ "defyin",
+ "degeneratin",
+ "degradin",
+ "dehumanizin",
+ "delayin",
+ "delegatin",
+ "deletin",
+ "deliberatin",
+ "delightin",
+ "delineatin",
+ "deliverin",
+ "deludin",
+ "delvin",
+ "demandin",
+ "demeanin",
+ "democratizin",
+ "demolishin",
+ "demonstratin",
+ "demoralizin",
+ "denigratin",
+ "denotin",
+ "denouncin",
+ "denyin",
+ "departin",
+ "dependin",
+ "depictin",
+ "depletin",
+ "deplorin",
+ "deployin",
+ "depositin",
+ "deprecatin",
+ "depreciatin",
+ "depressin",
+ "deprivin",
+ "derivin",
+ "descendin",
+ "describin",
+ "desertin",
+ "deservin",
+ "designatin",
+ "designin",
+ "desirin",
+ "desolatin",
+ "despairin",
+ "despisin",
+ "despondin",
+ "destabilizin",
+ "destroyin",
+ "detachin",
+ "detailin",
+ "detainin",
+ "detectin",
+ "deterioratin",
+ "determinin",
+ "deterrin",
+ "devastatin",
+ "developin",
+ "deviatin",
+ "devilin",
+ "devisin",
+ "devolvin",
+ "devotin",
+ "devourin",
+ "diagnosin",
+ "dialin",
+ "dickerin",
+ "dictatin",
+ "dietin",
+ "differentiatin",
+ "differin",
+ "diffusin",
+ "digestin",
+ "diggin",
+ "digitizin",
+ "dilatin",
+ "dilutin",
+ "diminishin",
+ "dimmin",
+ "dingin",
+ "dinin",
+ "dippin",
+ "directin",
+ "disablin",
+ "disagreein",
+ "disappearin",
+ "disappointin",
+ "disapprovin",
+ "disarmin",
+ "disbandin",
+ "disbelievin",
+ "discardin",
+ "discernin",
+ "dischargin",
+ "disciplinin",
+ "disclosin",
+ "disconcertin",
+ "disconnectin",
+ "discontinuin",
+ "discountin",
+ "discouragin",
+ "discoursin",
+ "discoverin",
+ "discreditin",
+ "discriminatin",
+ "discussin",
+ "disdainin",
+ "disembarkin",
+ "disengagin",
+ "disfigurin",
+ "disgracin",
+ "disguisin",
+ "disgustin",
+ "disheartenin",
+ "dishin",
+ "disinfectin",
+ "disintegratin",
+ "dislikin",
+ "dislodgin",
+ "dismantlin",
+ "dismissin",
+ "dismountin",
+ "disobeyin",
+ "disorientin",
+ "disparagin",
+ "dispatchin",
+ "dispellin",
+ "dispensin",
+ "dispersin",
+ "displacin",
+ "displayin",
+ "displeasin",
+ "disposin",
+ "disputin",
+ "disqualifyin",
+ "disquietin",
+ "disregardin",
+ "disruptin",
+ "dissectin",
+ "dissemblin",
+ "disseminatin",
+ "dissentin",
+ "dissipatin",
+ "dissolvin",
+ "distancin",
+ "distillin",
+ "distinguishin",
+ "distortin",
+ "distractin",
+ "distressin",
+ "distributin",
+ "disturbin",
+ "divergin",
+ "diversifyin",
+ "divertin",
+ "divestin",
+ "dividin",
+ "divin",
+ "divinin",
+ "divorcin",
+ "dizzyin",
+ "dockin",
+ "doctorin",
+ "documentin",
+ "dodgin",
+ "doggin",
+ "doin",
+ "dominatin",
+ "domineerin",
+ "donatin",
+ "donnin",
+ "doodlin",
+ "dopin",
+ "dorkin",
+ "dosin",
+ "dotin",
+ "dottin",
+ "doublin",
+ "doubtin",
+ "dowlin",
+ "downin",
+ "downloadin",
+ "downplayin",
+ "downsizin",
+ "dozin",
+ "draftin",
+ "draggin",
+ "drainin",
+ "dramatisin",
+ "dramatizin",
+ "drapin",
+ "drawin",
+ "drawlin",
+ "dreadin",
+ "dreamin",
+ "dredgin",
+ "drenchin",
+ "dressin",
+ "dressmakin",
+ "dribblin",
+ "driftin",
+ "drillin",
+ "drinkin",
+ "drippin",
+ "drivin",
+ "drizzlin",
+ "dronin",
+ "droolin",
+ "droopin",
+ "droppin",
+ "drownin",
+ "drummin",
+ "dryin",
+ "duckin",
+ "duelin",
+ "duellin",
+ "dumpin",
+ "dumplin",
+ "duplicatin",
+ "durin",
+ "dustin",
+ "dwellin",
+ "dwindlin",
+ "dyein",
+ "dyin",
+ "earnin",
+ "earrin",
+ "earthin",
+ "easin",
+ "easygoin",
+ "eatin",
+ "eavesdroppin",
+ "ebbin",
+ "echoin",
+ "eclipsin",
+ "economizin",
+ "eddyin",
+ "edgin",
+ "edifyin",
+ "editin",
+ "educatin",
+ "effacin",
+ "effectin",
+ "eggin",
+ "ejectin",
+ "elaboratin",
+ "elbowin",
+ "electin",
+ "electioneerin",
+ "electrifyin",
+ "elevatin",
+ "elicitin",
+ "eliminatin",
+ "elucidatin",
+ "eludin",
+ "elutin",
+ "emanatin",
+ "emancipatin",
+ "embalmin",
+ "embarkin",
+ "embarrassin",
+ "embeddin",
+ "embellishin",
+ "embodyin",
+ "embracin",
+ "embroiderin",
+ "emergin",
+ "emigratin",
+ "emittin",
+ "emphasisin",
+ "emphasizin",
+ "employin",
+ "empowerin",
+ "emptyin",
+ "emulatin",
+ "enablin",
+ "enactin",
+ "encapsulatin",
+ "enchantin",
+ "encirclin",
+ "enclosin",
+ "encodin",
+ "encompassin",
+ "encounterin",
+ "encouragin",
+ "encroachin",
+ "encryptin",
+ "endangerin",
+ "endearin",
+ "endeavorin",
+ "endeavourin",
+ "endin",
+ "endorsin",
+ "endowin",
+ "endurin",
+ "energizin",
+ "enervatin",
+ "enfoldin",
+ "enforcin",
+ "engagin",
+ "engenderin",
+ "engineerin",
+ "engravin",
+ "engrossin",
+ "engulfin",
+ "enhancin",
+ "enjoinin",
+ "enjoyin",
+ "enlargin",
+ "enlightenin",
+ "enlistin",
+ "enlivenin",
+ "ennoblin",
+ "enquirin",
+ "enrichin",
+ "enrollin",
+ "enslavin",
+ "ensuin",
+ "ensurin",
+ "entailin",
+ "entanglin",
+ "enterin",
+ "enterprisin",
+ "entertainin",
+ "enthrallin",
+ "enticin",
+ "entitlin",
+ "entrancin",
+ "entreatin",
+ "entrenchin",
+ "entrustin",
+ "enumeratin",
+ "enunciatin",
+ "envelopin",
+ "environin",
+ "envisionin",
+ "envyin",
+ "equalin",
+ "equalizin",
+ "equallin",
+ "equatin",
+ "equippin",
+ "eradicatin",
+ "erasin",
+ "erectin",
+ "ergoin",
+ "erodin",
+ "errin",
+ "eruptin",
+ "escalatin",
+ "escapin",
+ "eschewin",
+ "escortin",
+ "espousin",
+ "establishin",
+ "estimatin",
+ "etchin",
+ "evacuatin",
+ "evadin",
+ "evaluatin",
+ "evangelizin",
+ "evaporatin",
+ "evenin",
+ "everlastin",
+ "everythin",
+ "evidencin",
+ "evincin",
+ "evokin",
+ "evolvin",
+ "exacerbatin",
+ "exactin",
+ "exaggeratin",
+ "exaltin",
+ "examinin",
+ "exasperatin",
+ "excavatin",
+ "exceedin",
+ "excellin",
+ "exceptin",
+ "exchangin",
+ "excitin",
+ "exclaimin",
+ "excludin",
+ "excruciatin",
+ "excusin",
+ "executin",
+ "exemplifyin",
+ "exemptin",
+ "exercisin",
+ "exertin",
+ "exhalin",
+ "exhaustin",
+ "exhibitin",
+ "exhilaratin",
+ "exhortin",
+ "existin",
+ "exitin",
+ "expandin",
+ "expectin",
+ "expeditin",
+ "expellin",
+ "expendin",
+ "experiencin",
+ "experimentin",
+ "expirin",
+ "explainin",
+ "explicatin",
+ "explodin",
+ "exploitin",
+ "explorin",
+ "exportin",
+ "exposin",
+ "expoundin",
+ "expressin",
+ "extendin",
+ "extenuatin",
+ "exterminatin",
+ "externalizin",
+ "extinguishin",
+ "extollin",
+ "extortin",
+ "extractin",
+ "extrapolatin",
+ "extricatin",
+ "exudin",
+ "exultin",
+ "eyein",
+ "fabricatin",
+ "facilitatin",
+ "facin",
+ "factorin",
+ "fadin",
+ "failin",
+ "faintin",
+ "fairin",
+ "fakin",
+ "fallin",
+ "falterin",
+ "famishin",
+ "fancyin",
+ "fannin",
+ "farin",
+ "farmin",
+ "farthin",
+ "fascinatin",
+ "fashionin",
+ "fastenin",
+ "fastin",
+ "fatiguin",
+ "fattenin",
+ "faultin",
+ "favorin",
+ "favourin",
+ "fawnin",
+ "fearin",
+ "feastin",
+ "featherin",
+ "featurin",
+ "feedin",
+ "feelin",
+ "feignin",
+ "fellin",
+ "fencin",
+ "fendin",
+ "fermentin",
+ "ferryin",
+ "fertilizin",
+ "festerin",
+ "fetchin",
+ "fiddlin",
+ "fidgetin",
+ "fieldin",
+ "fightin",
+ "figurin",
+ "filin",
+ "fillin",
+ "filmin",
+ "filmmakin",
+ "filterin",
+ "financin",
+ "findin",
+ "fingerin",
+ "fingerprintin",
+ "finishin",
+ "firin",
+ "fishin",
+ "fittin",
+ "fixin",
+ "flaggin",
+ "flailin",
+ "flamin",
+ "flankin",
+ "flappin",
+ "flarin",
+ "flashin",
+ "flattenin",
+ "flatterin",
+ "flauntin",
+ "flavorin",
+ "fledglin",
+ "fleein",
+ "fleetin",
+ "flexin",
+ "flickerin",
+ "flickin",
+ "flinchin",
+ "flingin",
+ "flippin",
+ "flirtin",
+ "flittin",
+ "floatin",
+ "flockin",
+ "floggin",
+ "floodin",
+ "floorin",
+ "floppin",
+ "flounderin",
+ "flourishin",
+ "floutin",
+ "flowerin",
+ "flowin",
+ "fluctuatin",
+ "flushin",
+ "flutterin",
+ "flyin",
+ "foamin",
+ "focusin",
+ "foldin",
+ "followin",
+ "fondlin",
+ "foolin",
+ "footin",
+ "foragin",
+ "forbearin",
+ "forbiddin",
+ "forcin",
+ "forebodin",
+ "forecastin",
+ "foregoin",
+ "foreseein",
+ "foreshadowin",
+ "forfeitin",
+ "forgettin",
+ "forgin",
+ "forgivin",
+ "forkin",
+ "formattin",
+ "formin",
+ "formulatin",
+ "forsakin",
+ "forthcomin",
+ "fortifyin",
+ "forwardin",
+ "fosterin",
+ "foulin",
+ "foundin",
+ "foundlin",
+ "fowlin",
+ "fracturin",
+ "framin",
+ "franchisin",
+ "freakin",
+ "freein",
+ "freestandin",
+ "freezin",
+ "freightin",
+ "frequentin",
+ "frettin",
+ "frightenin",
+ "friskin",
+ "frizzlin",
+ "frolickin",
+ "frontin",
+ "frostin",
+ "frothin",
+ "frownin",
+ "fruitin",
+ "frustratin",
+ "fryin",
+ "fuckin",
+ "fuelin",
+ "fulfillin",
+ "fumblin",
+ "fumin",
+ "functionin",
+ "fundin",
+ "fundraisin",
+ "furnishin",
+ "furrin",
+ "furtherin",
+ "fusin",
+ "fussin",
+ "gabbin",
+ "gaggin",
+ "gainin",
+ "galavantin",
+ "gallin",
+ "gallopin",
+ "gamblin",
+ "gambolin",
+ "gangin",
+ "gapin",
+ "gardenin",
+ "garnerin",
+ "gaspin",
+ "gassin",
+ "gatherin",
+ "gatin",
+ "gaugin",
+ "gawkin",
+ "gazin",
+ "gearin",
+ "geldin",
+ "genderin",
+ "generalizin",
+ "generatin",
+ "genotypin",
+ "germinatin",
+ "gesticulatin",
+ "gesturin",
+ "gettin",
+ "gibin",
+ "gigglin",
+ "gildin",
+ "ginnin",
+ "girdin",
+ "givin",
+ "glancin",
+ "glarin",
+ "glazin",
+ "gleamin",
+ "gleanin",
+ "glidin",
+ "glimmerin",
+ "glintin",
+ "glistenin",
+ "glitterin",
+ "gloatin",
+ "globalizin",
+ "glorifyin",
+ "gloryin",
+ "glowerin",
+ "glowin",
+ "gluin",
+ "gnashin",
+ "gnawin",
+ "gobblin",
+ "goerin",
+ "goin",
+ "goldin",
+ "golfin",
+ "gorin",
+ "goslin",
+ "gossipin",
+ "gougin",
+ "governin",
+ "grabbin",
+ "gradin",
+ "graduatin",
+ "graftin",
+ "grainin",
+ "grantin",
+ "graphin",
+ "grapplin",
+ "graspin",
+ "gratifyin",
+ "gratin",
+ "grayin",
+ "grazin",
+ "greasin",
+ "greenin",
+ "greetin",
+ "grievin",
+ "grillin",
+ "grimacin",
+ "grindin",
+ "grinnin",
+ "grippin",
+ "grittin",
+ "groanin",
+ "groomin",
+ "gropin",
+ "groundbreakin",
+ "groundin",
+ "groupin",
+ "grovellin",
+ "growin",
+ "growlin",
+ "grubbin",
+ "grudgin",
+ "gruelin",
+ "grumblin",
+ "gruntin",
+ "guaranteein",
+ "guardin",
+ "guessin",
+ "guidin",
+ "gulpin",
+ "gunnin",
+ "gurglin",
+ "gushin",
+ "guyin",
+ "hackin",
+ "hagglin",
+ "hailin",
+ "hallucinatin",
+ "haltin",
+ "halvin",
+ "hammerin",
+ "hammin",
+ "hamperin",
+ "hamstrin",
+ "handicappin",
+ "handin",
+ "handlin",
+ "handshakin",
+ "handwritin",
+ "hangin",
+ "hankerin",
+ "happenin",
+ "haranguin",
+ "harassin",
+ "harborin",
+ "harbourin",
+ "hardenin",
+ "hardin",
+ "hardworkin",
+ "harkin",
+ "harmin",
+ "harmonisin",
+ "harmonizin",
+ "harnessin",
+ "harpin",
+ "harrowin",
+ "harryin",
+ "harvestin",
+ "hashin",
+ "hastenin",
+ "hatchin",
+ "hatin",
+ "haulin",
+ "hauntin",
+ "haverin",
+ "havin",
+ "hawkin",
+ "hayin",
+ "hazardin",
+ "hazin",
+ "headin",
+ "healin",
+ "heapin",
+ "hearin",
+ "hearkenin",
+ "heartbreakin",
+ "heartenin",
+ "heartrendin",
+ "heartwarmin",
+ "heatin",
+ "heavin",
+ "hedgin",
+ "heedin",
+ "heightenin",
+ "helpin",
+ "hemmin",
+ "hemorrhagin",
+ "hennin",
+ "heraldin",
+ "herdin",
+ "herrin",
+ "hesitatin",
+ "hewin",
+ "hibernatin",
+ "hidin",
+ "highlightin",
+ "hijackin",
+ "hikin",
+ "hinderin",
+ "hingin",
+ "hintin",
+ "hirelin",
+ "hirin",
+ "hissin",
+ "hitchhikin",
+ "hitchin",
+ "hittin",
+ "hoardin",
+ "hobblin",
+ "hobnobbin",
+ "hockin",
+ "hoggin",
+ "hoistin",
+ "holdin",
+ "holin",
+ "hollerin",
+ "hollowin",
+ "homecomin",
+ "homemakin",
+ "homeschoolin",
+ "homin",
+ "homogenizin",
+ "honeyin",
+ "honin",
+ "honkin",
+ "honorin",
+ "honourin",
+ "hoofin",
+ "hookin",
+ "hootin",
+ "hopin",
+ "hoppin",
+ "horrifyin",
+ "hostin",
+ "houndin",
+ "housecleanin",
+ "housekeepin",
+ "housin",
+ "hoverin",
+ "howlin",
+ "huddlin",
+ "huffin",
+ "huggin",
+ "hulkin",
+ "humanizin",
+ "humblin",
+ "humbuggin",
+ "humiliatin",
+ "hummin",
+ "humorin",
+ "humpin",
+ "hungerin",
+ "huntin",
+ "hurlin",
+ "hurryin",
+ "hurtin",
+ "hurtlin",
+ "hushin",
+ "huskin",
+ "hustlin",
+ "icin",
+ "identifyin",
+ "idlin",
+ "ignitin",
+ "ignorin",
+ "illuminatin",
+ "illustratin",
+ "imagin",
+ "imaginin",
+ "imbibin",
+ "imitatin",
+ "immersin",
+ "impactin",
+ "impairin",
+ "impartin",
+ "impedin",
+ "impellin",
+ "impendin",
+ "impingin",
+ "implementin",
+ "implicatin",
+ "implorin",
+ "implyin",
+ "importin",
+ "imposin",
+ "impressin",
+ "imprintin",
+ "imprisonin",
+ "improvin",
+ "improvisin",
+ "imputin",
+ "inbreedin",
+ "incitin",
+ "inclinin",
+ "inclosin",
+ "includin",
+ "incomin",
+ "incorporatin",
+ "increasin",
+ "incriminatin",
+ "incurrin",
+ "indexin",
+ "indicatin",
+ "inducin",
+ "indulgin",
+ "infectin",
+ "inferrin",
+ "infightin",
+ "infiltratin",
+ "inflatin",
+ "inflictin",
+ "influencin",
+ "informin",
+ "infringin",
+ "infuriatin",
+ "infusin",
+ "ingestin",
+ "ingratiatin",
+ "inhabitin",
+ "inhalin",
+ "inheritin",
+ "inhibitin",
+ "initiatin",
+ "injectin",
+ "injurin",
+ "inklin",
+ "innin",
+ "innovatin",
+ "inquirin",
+ "insertin",
+ "insinuatin",
+ "insistin",
+ "inspectin",
+ "inspirin",
+ "installin",
+ "instigatin",
+ "instillin",
+ "institutin",
+ "instructin",
+ "insulatin",
+ "insultin",
+ "insurin",
+ "integratin",
+ "intendin",
+ "intensifyin",
+ "interactin",
+ "interceptin",
+ "interestin",
+ "interfacin",
+ "interferin",
+ "interlacin",
+ "interlockin",
+ "interminglin",
+ "internalizin",
+ "interposin",
+ "interpretin",
+ "interrogatin",
+ "interruptin",
+ "intersectin",
+ "intertwinin",
+ "intervenin",
+ "interviewin",
+ "interweavin",
+ "intimatin",
+ "intimidatin",
+ "intoxicatin",
+ "intriguin",
+ "introducin",
+ "intrudin",
+ "invadin",
+ "inventin",
+ "invertin",
+ "investigatin",
+ "investin",
+ "invigoratin",
+ "invitin",
+ "invokin",
+ "involvin",
+ "ionizin",
+ "ironin",
+ "irrigatin",
+ "irritatin",
+ "isolatin",
+ "issuin",
+ "itchin",
+ "jabberin",
+ "jabbin",
+ "jammin",
+ "janglin",
+ "jarrin",
+ "jauntin",
+ "jeerin",
+ "jeopardizin",
+ "jerkin",
+ "jestin",
+ "jiggin",
+ "jinglin",
+ "jobbin",
+ "jockeyin",
+ "joggin",
+ "joinin",
+ "jokin",
+ "jollyin",
+ "joltin",
+ "joshin",
+ "jostlin",
+ "jottin",
+ "jouncin",
+ "journalin",
+ "journeyin",
+ "joustin",
+ "judgin",
+ "jugglin",
+ "jumpin",
+ "junketin",
+ "justifyin",
+ "juttin",
+ "juxtaposin",
+ "kayakin",
+ "keelin",
+ "keenin",
+ "keepin",
+ "kennin",
+ "ketchin",
+ "keyin",
+ "kickin",
+ "kiddin",
+ "kidnappin",
+ "killin",
+ "kindlin",
+ "kinkin",
+ "kissin",
+ "kitin",
+ "kneadin",
+ "kneelin",
+ "knifin",
+ "knittin",
+ "knockin",
+ "knottin",
+ "knowin",
+ "labelin",
+ "labellin",
+ "laborin",
+ "labourin",
+ "lacin",
+ "lackin",
+ "lactatin",
+ "ladin",
+ "ladlin",
+ "laggin",
+ "lakin",
+ "lambastin",
+ "lamentin",
+ "lamin",
+ "landholdin",
+ "landin",
+ "landscapin",
+ "languishin",
+ "lappin",
+ "lapsin",
+ "lashin",
+ "lastin",
+ "latherin",
+ "laughin",
+ "launchin",
+ "launderin",
+ "lawmakin",
+ "layerin",
+ "layin",
+ "lazin",
+ "leachin",
+ "leadin",
+ "leakin",
+ "leanin",
+ "leapin",
+ "learnin",
+ "leasin",
+ "leatherin",
+ "leavin",
+ "lecturin",
+ "leerin",
+ "leggin",
+ "legislatin",
+ "legitimatin",
+ "legitimizin",
+ "lendin",
+ "lengthenin",
+ "lessenin",
+ "lessin",
+ "letterin",
+ "lettin",
+ "levelin",
+ "levellin",
+ "leveragin",
+ "levyin",
+ "liberalizin",
+ "liberatin",
+ "licensin",
+ "lickin",
+ "lifesavin",
+ "liftin",
+ "lightenin",
+ "lightin",
+ "lightnin",
+ "likin",
+ "liltin",
+ "limitin",
+ "limpin",
+ "lingerin",
+ "linin",
+ "linkin",
+ "liquidatin",
+ "lispin",
+ "listenin",
+ "listin",
+ "litterin",
+ "livin",
+ "loadin",
+ "loafin",
+ "loanin",
+ "loathin",
+ "lobbyin",
+ "lobsterin",
+ "localizin",
+ "locatin",
+ "lockin",
+ "lodgin",
+ "loggin",
+ "loiterin",
+ "lollin",
+ "longin",
+ "longstandin",
+ "lookin",
+ "loomin",
+ "loopin",
+ "loosenin",
+ "loosin",
+ "lootin",
+ "lopin",
+ "lordin",
+ "losin",
+ "loungin",
+ "lovemakin",
+ "lovin",
+ "lowerin",
+ "lowin",
+ "lubricatin",
+ "luggin",
+ "lullin",
+ "lumberin",
+ "lunchin",
+ "lurchin",
+ "lurin",
+ "lurkin",
+ "lyin",
+ "lynchin",
+ "machinin",
+ "maddenin",
+ "magnetizin",
+ "magnifyin",
+ "mailin",
+ "maimin",
+ "mainsprin",
+ "mainstreamin",
+ "maintainin",
+ "majorin",
+ "makin",
+ "malfunctionin",
+ "malingerin",
+ "managin",
+ "mandatin",
+ "maneuverin",
+ "manifestin",
+ "manipulatin",
+ "mannin",
+ "manoeuverin",
+ "manufacturin",
+ "manurin",
+ "mappin",
+ "maraudin",
+ "marchin",
+ "marketin",
+ "markin",
+ "marryin",
+ "marshallin",
+ "marvelin",
+ "marvellin",
+ "mascottin",
+ "mashin",
+ "maskin",
+ "masqueradin",
+ "massagin",
+ "massin",
+ "masterin",
+ "masturbatin",
+ "matchin",
+ "matchmakin",
+ "materializin",
+ "matin",
+ "mattin",
+ "maturin",
+ "maulin",
+ "maximisin",
+ "maximizin",
+ "meanderin",
+ "meanin",
+ "measurin",
+ "meddlin",
+ "mediatin",
+ "meditatin",
+ "meetin",
+ "meldin",
+ "meltin",
+ "memorizin",
+ "menacin",
+ "mendin",
+ "mentionin",
+ "mentorin",
+ "merchandisin",
+ "mergin",
+ "meshin",
+ "mesmerizin",
+ "messagin",
+ "messin",
+ "meterin",
+ "middlin",
+ "midmornin",
+ "migratin",
+ "milkin",
+ "millin",
+ "mimickin",
+ "min",
+ "mincin",
+ "mindin",
+ "minglin",
+ "minimisin",
+ "minimizin",
+ "minin",
+ "ministerin",
+ "mirrorin",
+ "misbehavin",
+ "misdoubtin",
+ "misgivin",
+ "misjudgin",
+ "mislayin",
+ "misleadin",
+ "misreadin",
+ "misrepresentin",
+ "missin",
+ "mistakin",
+ "mistreatin",
+ "mistrustin",
+ "misunderstandin",
+ "mitigatin",
+ "mixin",
+ "moanin",
+ "mobilisin",
+ "mobilizin",
+ "mockin",
+ "modelin",
+ "modellin",
+ "moderatin",
+ "modernizin",
+ "modifyin",
+ "modulatin",
+ "moistenin",
+ "moldin",
+ "molestin",
+ "monitorin",
+ "monkeyin",
+ "monopolizin",
+ "moochin",
+ "moonin",
+ "moonshinin",
+ "moorin",
+ "mopin",
+ "moppin",
+ "moralizin",
+ "mornin",
+ "morphin",
+ "mortifyin",
+ "moseyin",
+ "motherin",
+ "motionin",
+ "motivatin",
+ "motorin",
+ "moulderin",
+ "mouldin",
+ "mountaineerin",
+ "mountin",
+ "mournin",
+ "mouthin",
+ "movin",
+ "mowin",
+ "muckin",
+ "muddlin",
+ "mullin",
+ "multiplexin",
+ "multiplyin",
+ "multitaskin",
+ "mumblin",
+ "munchin",
+ "murderin",
+ "murmurin",
+ "musin",
+ "mussin",
+ "musterin",
+ "mutterin",
+ "mystifyin",
+ "nabbin",
+ "naggin",
+ "nailin",
+ "namin",
+ "namsayin",
+ "nappin",
+ "narratin",
+ "narrowin",
+ "nauseatin",
+ "navigatin",
+ "nearin",
+ "necessitatin",
+ "necrotizin",
+ "needin",
+ "needlin",
+ "negatin",
+ "neglectin",
+ "negotiatin",
+ "neighborin",
+ "neighbourin",
+ "neighin",
+ "nestin",
+ "nestlin",
+ "nettin",
+ "networkin",
+ "neuroimagin",
+ "neutralizin",
+ "nibblin",
+ "nickin",
+ "nippin",
+ "noddin",
+ "nominatin",
+ "nonconformin",
+ "nonlivin",
+ "nonthreatenin",
+ "normalizin",
+ "northin",
+ "nosin",
+ "nothin",
+ "noticin",
+ "notifyin",
+ "notin",
+ "notwithstandin",
+ "nourishin",
+ "nudgin",
+ "numberin",
+ "numbin",
+ "nursin",
+ "nurturin",
+ "obeyin",
+ "objectifyin",
+ "objectin",
+ "obligin",
+ "obliteratin",
+ "obscurin",
+ "observin",
+ "obsessin",
+ "obstructin",
+ "obtainin",
+ "obviatin",
+ "occasionin",
+ "occludin",
+ "occupyin",
+ "occurrin",
+ "offendin",
+ "offerin",
+ "officiatin",
+ "offin",
+ "offsettin",
+ "offshorin",
+ "offsprin",
+ "oglin",
+ "oilin",
+ "omittin",
+ "oncomin",
+ "ongoin",
+ "oozin",
+ "openin",
+ "operatin",
+ "opinin",
+ "opposin",
+ "oppressin",
+ "optimisin",
+ "optimizin",
+ "optin",
+ "orbitin",
+ "orchestratin",
+ "ordainin",
+ "orderin",
+ "ordinatin",
+ "organisin",
+ "organizin",
+ "orientin",
+ "originatin",
+ "ornamentin",
+ "oscillatin",
+ "oustin",
+ "outbreakin",
+ "outcroppin",
+ "outfittin",
+ "outgoin",
+ "outin",
+ "outlawin",
+ "outlinin",
+ "outlyin",
+ "outnumberin",
+ "outpourin",
+ "outsourcin",
+ "outstandin",
+ "outstrippin",
+ "overarchin",
+ "overbearin",
+ "overcomin",
+ "overcrowdin",
+ "overdoin",
+ "overeatin",
+ "overestimatin",
+ "overexpressin",
+ "overfishin",
+ "overflowin",
+ "overgrazin",
+ "overhangin",
+ "overhaulin",
+ "overhearin",
+ "overheatin",
+ "overlappin",
+ "overlayin",
+ "overloadin",
+ "overlookin",
+ "overlyin",
+ "overmasterin",
+ "overpowerin",
+ "overreachin",
+ "overreactin",
+ "overridin",
+ "overrulin",
+ "overrunnin",
+ "overseein",
+ "overshadowin",
+ "overstatin",
+ "oversteppin",
+ "overtakin",
+ "overthrowin",
+ "overtrainin",
+ "overturnin",
+ "overweenin",
+ "overwhelmin",
+ "overwritin",
+ "owin",
+ "ownin",
+ "oxidisin",
+ "oxidizin",
+ "pacifyin",
+ "pacin",
+ "packagin",
+ "packin",
+ "paddin",
+ "paddlin",
+ "pagin",
+ "painstakin",
+ "paintin",
+ "pairin",
+ "palaverin",
+ "palin",
+ "palpitatin",
+ "pamperin",
+ "panderin",
+ "panelin",
+ "panellin",
+ "panickin",
+ "pannin",
+ "pantin",
+ "paperin",
+ "paradin",
+ "parallelin",
+ "paralysin",
+ "paralyzin",
+ "paraphrasin",
+ "pardonin",
+ "parentin",
+ "parin",
+ "parkin",
+ "parsin",
+ "partakin",
+ "participatin",
+ "partin",
+ "partitionin",
+ "partnerin",
+ "partyin",
+ "passin",
+ "pastin",
+ "patchin",
+ "patentin",
+ "patrollin",
+ "patronisin",
+ "patronizin",
+ "patterin",
+ "patternin",
+ "pattin",
+ "pausin",
+ "pavin",
+ "pawin",
+ "pawnin",
+ "payin",
+ "peacebuildin",
+ "peacekeepin",
+ "peacemakin",
+ "peakin",
+ "pealin",
+ "peckin",
+ "peddlin",
+ "peekin",
+ "peelin",
+ "peepin",
+ "peerin",
+ "peggin",
+ "peltin",
+ "pendin",
+ "penetratin",
+ "pennin",
+ "peoplin",
+ "pepperin",
+ "perceivin",
+ "perchin",
+ "percolatin",
+ "perfectin",
+ "perforatin",
+ "performin",
+ "perishin",
+ "permeatin",
+ "permittin",
+ "perpetratin",
+ "perpetuatin",
+ "perplexin",
+ "persecutin",
+ "perseverin",
+ "persistin",
+ "perspirin",
+ "persuadin",
+ "pertainin",
+ "perusin",
+ "pervadin",
+ "pervertin",
+ "pesterin",
+ "peterin",
+ "petitionin",
+ "pettin",
+ "phasin",
+ "philanderin",
+ "philosophizin",
+ "phishin",
+ "phonin",
+ "photocopyin",
+ "photographin",
+ "phrasin",
+ "pickerin",
+ "picketin",
+ "pickin",
+ "picklin",
+ "picturin",
+ "piddlin",
+ "piecin",
+ "piercin",
+ "pilferin",
+ "pilin",
+ "pillagin",
+ "pilotin",
+ "pinchin",
+ "pinin",
+ "pinnin",
+ "pintin",
+ "pioneerin",
+ "pipin",
+ "pissin",
+ "pitchin",
+ "pittin",
+ "pityin",
+ "pivotin",
+ "placin",
+ "plaguin",
+ "planin",
+ "plankin",
+ "plannin",
+ "plantin",
+ "plasterin",
+ "platin",
+ "playin",
+ "playthin",
+ "pleadin",
+ "pleasin",
+ "pleasurin",
+ "pledgin",
+ "ploddin",
+ "plottin",
+ "ploughin",
+ "plowin",
+ "pluckin",
+ "pluggin",
+ "plumbin",
+ "plummetin",
+ "plumpin",
+ "plunderin",
+ "plungin",
+ "plyin",
+ "poachin",
+ "pocketin",
+ "podcastin",
+ "pointin",
+ "poisin",
+ "poisonin",
+ "pokin",
+ "polarizin",
+ "policin",
+ "policymakin",
+ "polin",
+ "polishin",
+ "pollin",
+ "pollutin",
+ "ponderin",
+ "pooin",
+ "poolin",
+ "poppin",
+ "populatin",
+ "porin",
+ "portrayin",
+ "posin",
+ "positin",
+ "positionin",
+ "possessin",
+ "postin",
+ "postponin",
+ "postulatin",
+ "posturin",
+ "potterin",
+ "pottin",
+ "poundin",
+ "pourin",
+ "poutin",
+ "powerin",
+ "practicin",
+ "practisin",
+ "praisin",
+ "prancin",
+ "prankin",
+ "prayin",
+ "preachin",
+ "precedin",
+ "precipitatin",
+ "precludin",
+ "predictin",
+ "predisposin",
+ "predominatin",
+ "preexistin",
+ "preferrin",
+ "preparin",
+ "preponderatin",
+ "prepossessin",
+ "preprocessin",
+ "prescribin",
+ "presentin",
+ "preservin",
+ "presidin",
+ "pressin",
+ "pressurin",
+ "presumin",
+ "presupposin",
+ "pretendin",
+ "prevailin",
+ "preventin",
+ "preyin",
+ "pricin",
+ "prickin",
+ "primin",
+ "primpin",
+ "printin",
+ "prioritizin",
+ "privateerin",
+ "probin",
+ "proceedin",
+ "processin",
+ "proclaimin",
+ "procurin",
+ "proddin",
+ "producin",
+ "professin",
+ "profilin",
+ "profiteerin",
+ "profitin",
+ "programmin",
+ "progressin",
+ "prohibitin",
+ "projectin",
+ "proliferatin",
+ "prolongin",
+ "promenadin",
+ "promisin",
+ "promotin",
+ "promptin",
+ "promulgatin",
+ "pronouncin",
+ "proofin",
+ "proofreadin",
+ "propagatin",
+ "propellin",
+ "prophesyin",
+ "proposin",
+ "proppin",
+ "prosecutin",
+ "proselytizin",
+ "prospectin",
+ "prosperin",
+ "prostratin",
+ "protectin",
+ "protestin",
+ "prototypin",
+ "protrudin",
+ "providin",
+ "provin",
+ "provisionin",
+ "provokin",
+ "prowlin",
+ "prunin",
+ "pryin",
+ "publicizin",
+ "publishin",
+ "puckerin",
+ "puddin",
+ "pullin",
+ "pulsatin",
+ "pulsin",
+ "pumpin",
+ "punchin",
+ "punishin",
+ "punnin",
+ "purchasin",
+ "purgin",
+ "purifyin",
+ "purportin",
+ "purposin",
+ "purrin",
+ "pursuin",
+ "pushin",
+ "putterin",
+ "puttin",
+ "puzzlin",
+ "quakin",
+ "qualifyin",
+ "quantifyin",
+ "quarrelin",
+ "quarrellin",
+ "quarryin",
+ "quarterin",
+ "quaverin",
+ "queerin",
+ "quellin",
+ "quenchin",
+ "queryin",
+ "questin",
+ "questionin",
+ "queuein",
+ "queuin",
+ "quickenin",
+ "quietin",
+ "quiltin",
+ "quittin",
+ "quiverin",
+ "quizzin",
+ "quotin",
+ "racin",
+ "rackin",
+ "radiatin",
+ "raftin",
+ "ragin",
+ "raidin",
+ "railin",
+ "rainin",
+ "rakin",
+ "rallyin",
+ "ramblin",
+ "rammin",
+ "rampagin",
+ "rampin",
+ "ranchin",
+ "rangin",
+ "rantin",
+ "rapin",
+ "rappin",
+ "rarin",
+ "raspin",
+ "ratifyin",
+ "ratin",
+ "rationalizin",
+ "rationin",
+ "rattlin",
+ "ravagin",
+ "ravin",
+ "ravishin",
+ "reachin",
+ "reactin",
+ "readin",
+ "reaffirmin",
+ "realisin",
+ "realizin",
+ "reapin",
+ "reappearin",
+ "rearin",
+ "rearrangin",
+ "reasonin",
+ "reassurin",
+ "rebellin",
+ "rebuildin",
+ "rebukin",
+ "recallin",
+ "recastin",
+ "recedin",
+ "receivin",
+ "reciprocatin",
+ "recitin",
+ "reckonin",
+ "reclaimin",
+ "reclinin",
+ "recognisin",
+ "recognizin",
+ "recollectin",
+ "recommendin",
+ "reconcilin",
+ "reconstructin",
+ "recordin",
+ "recountin",
+ "recoverin",
+ "recreatin",
+ "recruitin",
+ "rectifyin",
+ "recurrin",
+ "recyclin",
+ "reddenin",
+ "reddin",
+ "redeemin",
+ "redefinin",
+ "redirectin",
+ "redistrictin",
+ "reducin",
+ "reekin",
+ "reelin",
+ "reengineerin",
+ "refactorin",
+ "referencin",
+ "referrin",
+ "refinancin",
+ "refinin",
+ "reflectin",
+ "reformin",
+ "refractin",
+ "refrainin",
+ "reframin",
+ "refreshin",
+ "refuelin",
+ "refusin",
+ "refutin",
+ "regainin",
+ "regardin",
+ "regeneratin",
+ "registerin",
+ "regrettin",
+ "regulatin",
+ "rehashin",
+ "rehearin",
+ "rehearsin",
+ "reignin",
+ "reinforcin",
+ "reinventin",
+ "reiteratin",
+ "rejectin",
+ "rejoicin",
+ "rejoinin",
+ "relapsin",
+ "relatin",
+ "relaxin",
+ "relayin",
+ "releasin",
+ "relievin",
+ "relinquishin",
+ "relishin",
+ "relivin",
+ "reloadin",
+ "relocatin",
+ "relyin",
+ "remainin",
+ "remakin",
+ "remarkin",
+ "remedyin",
+ "rememberin",
+ "remindin",
+ "reminiscin",
+ "remittin",
+ "remodelin",
+ "remodellin",
+ "removin",
+ "renamin",
+ "renderin",
+ "rendin",
+ "renewin",
+ "renouncin",
+ "renovatin",
+ "rentin",
+ "reopenin",
+ "reorderin",
+ "reorganizin",
+ "repairin",
+ "repayin",
+ "repealin",
+ "repeatin",
+ "repellin",
+ "repentin",
+ "replacin",
+ "replenishin",
+ "replicatin",
+ "replyin",
+ "reportin",
+ "reposin",
+ "repositionin",
+ "representin",
+ "repressin",
+ "reprintin",
+ "reproachin",
+ "reprocessin",
+ "reproducin",
+ "reprovin",
+ "repudiatin",
+ "requestin",
+ "requirin",
+ "rereadin",
+ "rescuin",
+ "researchin",
+ "resemblin",
+ "resentin",
+ "reservin",
+ "resettin",
+ "reshapin",
+ "residin",
+ "resignin",
+ "resistin",
+ "resolvin",
+ "resonatin",
+ "resortin",
+ "resoundin",
+ "respectin",
+ "respondin",
+ "restin",
+ "restorin",
+ "restrainin",
+ "restrictin",
+ "restructurin",
+ "resultin",
+ "resumin",
+ "resurfacin",
+ "retailin",
+ "retainin",
+ "retardin",
+ "retellin",
+ "rethinkin",
+ "retirin",
+ "retracin",
+ "retrainin",
+ "retreatin",
+ "retrievin",
+ "returnin",
+ "reusin",
+ "revealin",
+ "revellin",
+ "reverberatin",
+ "reversin",
+ "revertin",
+ "reviewin",
+ "revisin",
+ "revisitin",
+ "revivin",
+ "revoltin",
+ "revolvin",
+ "rewardin",
+ "reworkin",
+ "rewritin",
+ "rhymin",
+ "riddin",
+ "ridiculin",
+ "ridin",
+ "riggin",
+ "rightin",
+ "rilin",
+ "ringin",
+ "rinsin",
+ "riotin",
+ "ripenin",
+ "rippin",
+ "ripplin",
+ "risin",
+ "riskin",
+ "rivetin",
+ "roamin",
+ "roarin",
+ "roastin",
+ "robbin",
+ "rockin",
+ "rollickin",
+ "rollin",
+ "romancin",
+ "rompin",
+ "roofin",
+ "roomin",
+ "roostin",
+ "rootin",
+ "ropin",
+ "rotatin",
+ "rottin",
+ "roughin",
+ "roundin",
+ "rousin",
+ "routin",
+ "rovin",
+ "rowin",
+ "rubberin",
+ "rubbin",
+ "rufflin",
+ "ruinin",
+ "rulemakin",
+ "rulin",
+ "rumblin",
+ "ruminatin",
+ "rummagin",
+ "rumplin",
+ "runnin",
+ "rushin",
+ "rustin",
+ "rustlin",
+ "sackin",
+ "sacrificin",
+ "saddlin",
+ "safeguardin",
+ "safekeepin",
+ "saggin",
+ "sailin",
+ "saltin",
+ "salutin",
+ "samplin",
+ "sanctifyin",
+ "sanctionin",
+ "saplin",
+ "sassin",
+ "satisfyin",
+ "saunterin",
+ "savin",
+ "savorin",
+ "sawin",
+ "sayin",
+ "scaffoldin",
+ "scaldin",
+ "scalin",
+ "scalpin",
+ "scamperin",
+ "scannin",
+ "scarin",
+ "scarrin",
+ "scathin",
+ "scatterin",
+ "scavengin",
+ "scentin",
+ "schedulin",
+ "schemin",
+ "schillin",
+ "schoolin",
+ "scoffin",
+ "scoldin",
+ "scoopin",
+ "scootin",
+ "scorchin",
+ "scorin",
+ "scourin",
+ "scoutin",
+ "scowlin",
+ "scrabblin",
+ "scramblin",
+ "scrapin",
+ "scrappin",
+ "scratchin",
+ "screamin",
+ "screechin",
+ "screenin",
+ "screwin",
+ "scribblin",
+ "scrimpin",
+ "scriptin",
+ "scrollin",
+ "scrubbin",
+ "scrutinizin",
+ "scufflin",
+ "scullin",
+ "scurryin",
+ "scuttlin",
+ "seafarin",
+ "sealin",
+ "searchin",
+ "searin",
+ "seasonin",
+ "seatin",
+ "secedin",
+ "secretin",
+ "securin",
+ "seducin",
+ "seedin",
+ "seedlin",
+ "seein",
+ "seekin",
+ "seemin",
+ "seepin",
+ "seethin",
+ "selectin",
+ "sellin",
+ "sendin",
+ "sensin",
+ "sentencin",
+ "separatin",
+ "sequencin",
+ "serenadin",
+ "servicin",
+ "servin",
+ "settin",
+ "settlin",
+ "sewin",
+ "shadin",
+ "shadowin",
+ "shakin",
+ "shamin",
+ "shapin",
+ "sharin",
+ "sharkin",
+ "sharpenin",
+ "shatterin",
+ "shavin",
+ "shearin",
+ "sheathin",
+ "sheddin",
+ "sheetin",
+ "shellin",
+ "shelterin",
+ "shelvin",
+ "shieldin",
+ "shiftin",
+ "shillin",
+ "shimmerin",
+ "shinglin",
+ "shinin",
+ "shinnin",
+ "shipbuildin",
+ "shippin",
+ "shirkin",
+ "shittin",
+ "shiverin",
+ "shockin",
+ "shoein",
+ "shoemakin",
+ "shootin",
+ "shoppin",
+ "shortcomin",
+ "shortenin",
+ "shoulderin",
+ "shoutin",
+ "shovelin",
+ "shovellin",
+ "shovin",
+ "showerin",
+ "showin",
+ "shriekin",
+ "shrinkin",
+ "shruggin",
+ "shuckin",
+ "shudderin",
+ "shufflin",
+ "shuntin",
+ "shuttin",
+ "shyin",
+ "siblin",
+ "sickenin",
+ "sidin",
+ "sidlin",
+ "siftin",
+ "sighin",
+ "sightin",
+ "sightseein",
+ "signalin",
+ "signallin",
+ "signifyin",
+ "signin",
+ "silencin",
+ "simmerin",
+ "simperin",
+ "simplifyin",
+ "simulatin",
+ "singein",
+ "singin",
+ "sinkin",
+ "sinnin",
+ "sippin",
+ "sitin",
+ "sittin",
+ "situatin",
+ "sizin",
+ "sizzlin",
+ "skatin",
+ "sketchin",
+ "skiin",
+ "skimmin",
+ "skimpin",
+ "skinnin",
+ "skippin",
+ "skirmishin",
+ "skirtin",
+ "skulkin",
+ "skylarkin",
+ "slackenin",
+ "slackin",
+ "slammin",
+ "slanderin",
+ "slantin",
+ "slappin",
+ "slashin",
+ "slaughterin",
+ "slavin",
+ "slayin",
+ "sleddin",
+ "sleepin",
+ "slicin",
+ "slickin",
+ "slidin",
+ "slightin",
+ "slingin",
+ "slinkin",
+ "slippin",
+ "slitherin",
+ "slobberin",
+ "slopin",
+ "sloppin",
+ "sloshin",
+ "slouchin",
+ "slowin",
+ "slumberin",
+ "smackin",
+ "smartin",
+ "smashin",
+ "smatterin",
+ "smellin",
+ "smeltin",
+ "smilin",
+ "smokin",
+ "smolderin",
+ "smoothin",
+ "smotherin",
+ "smoulderin",
+ "smugglin",
+ "snakin",
+ "snappin",
+ "snarlin",
+ "snatchin",
+ "sneakin",
+ "sneerin",
+ "sneezin",
+ "snickerin",
+ "sniffin",
+ "snifflin",
+ "sniggerin",
+ "snipin",
+ "snivelin",
+ "snivellin",
+ "snoopin",
+ "snoozin",
+ "snorin",
+ "snortin",
+ "snowin",
+ "snuffin",
+ "soakin",
+ "soapin",
+ "soarin",
+ "sobbin",
+ "soberin",
+ "socializin",
+ "softenin",
+ "solderin",
+ "soldierin",
+ "solicitin",
+ "solvin",
+ "somethin",
+ "soothin",
+ "soppin",
+ "sorrowin",
+ "sortin",
+ "soundin",
+ "sourcin",
+ "sousin",
+ "sowin",
+ "spacin",
+ "spankin",
+ "spannin",
+ "sparin",
+ "sparkin",
+ "sparklin",
+ "sparrin",
+ "spatterin",
+ "spawnin",
+ "speakin",
+ "specializin",
+ "specifyin",
+ "speculatin",
+ "speedin",
+ "spellin",
+ "spendin",
+ "spewin",
+ "spikin",
+ "spillin",
+ "spindlin",
+ "spinnin",
+ "spiralin",
+ "spitin",
+ "spittin",
+ "splashin",
+ "splicin",
+ "splinterin",
+ "splittin",
+ "spoilin",
+ "spongin",
+ "sponsorin",
+ "sportin",
+ "spottin",
+ "spoutin",
+ "sprawlin",
+ "sprayin",
+ "spreadin",
+ "springin",
+ "sprinklin",
+ "sprintin",
+ "sproutin",
+ "spurrin",
+ "sputterin",
+ "spyin",
+ "squabblin",
+ "squallin",
+ "squanderin",
+ "squarin",
+ "squashin",
+ "squattin",
+ "squawkin",
+ "squeakin",
+ "squealin",
+ "squeezin",
+ "squintin",
+ "squirmin",
+ "squirtin",
+ "stabbin",
+ "stabilizin",
+ "stackin",
+ "staffin",
+ "staggerin",
+ "stagin",
+ "stainin",
+ "stakin",
+ "stalin",
+ "stalkin",
+ "stallin",
+ "stammerin",
+ "stampedin",
+ "stampin",
+ "standin",
+ "starin",
+ "starrin",
+ "startin",
+ "startlin",
+ "starvin",
+ "statin",
+ "stavin",
+ "stayin",
+ "steadin",
+ "steadyin",
+ "stealin",
+ "steamin",
+ "steepin",
+ "steerin",
+ "stemmin",
+ "stentin",
+ "steppin",
+ "stereotypin",
+ "sterlin",
+ "stewin",
+ "stickin",
+ "stiffenin",
+ "stiffin",
+ "stiflin",
+ "stimulatin",
+ "stingin",
+ "stinkin",
+ "stipulatin",
+ "stirlin",
+ "stirrin",
+ "stitchin",
+ "stockin",
+ "stompin",
+ "stoopin",
+ "stoppin",
+ "storin",
+ "stormin",
+ "storytellin",
+ "stowin",
+ "straddlin",
+ "stragglin",
+ "straightenin",
+ "strainin",
+ "stranglin",
+ "strappin",
+ "strayin",
+ "streakin",
+ "streamin",
+ "streamlinin",
+ "strengthenin",
+ "stressin",
+ "stretchin",
+ "stridin",
+ "strikin",
+ "stringin",
+ "strippin",
+ "strivin",
+ "strokin",
+ "strollin",
+ "stroppin",
+ "structurin",
+ "strugglin",
+ "struttin",
+ "studyin",
+ "stuffin",
+ "stumblin",
+ "stumpin",
+ "stunnin",
+ "stutterin",
+ "stylin",
+ "subduin",
+ "subjectin",
+ "submittin",
+ "subordinatin",
+ "subscribin",
+ "subsidin",
+ "subsistin",
+ "substitutin",
+ "subtractin",
+ "subvertin",
+ "succeedin",
+ "succumbin",
+ "suckin",
+ "sucklin",
+ "sufferin",
+ "suffocatin",
+ "sugarin",
+ "suggestin",
+ "suin",
+ "suitin",
+ "sulkin",
+ "summarizin",
+ "summin",
+ "summonin",
+ "sunnin",
+ "superconductin",
+ "superintendin",
+ "supersedin",
+ "supervisin",
+ "suppin",
+ "supplementin",
+ "supplyin",
+ "supportin",
+ "supposin",
+ "suppressin",
+ "surfacin",
+ "surfin",
+ "surgin",
+ "surmountin",
+ "surpassin",
+ "surprisin",
+ "surrenderin",
+ "surroundin",
+ "surveyin",
+ "survivin",
+ "suspectin",
+ "suspendin",
+ "suspicionin",
+ "sustainin",
+ "swabbin",
+ "swaggerin",
+ "swallowin",
+ "swampin",
+ "swappin",
+ "swarmin",
+ "swayin",
+ "swearin",
+ "sweatin",
+ "sweepin",
+ "sweetenin",
+ "sweetheartin",
+ "swellin",
+ "swelterin",
+ "swimmin",
+ "swindlin",
+ "swingin",
+ "swipin",
+ "swirlin",
+ "swishin",
+ "switchin",
+ "swoopin",
+ "symbolizin",
+ "sympathizin",
+ "synthesizin",
+ "tackin",
+ "tacklin",
+ "taggin",
+ "tailin",
+ "tailorin",
+ "takin",
+ "talkin",
+ "tamin",
+ "tamperin",
+ "tanglin",
+ "tangoin",
+ "tankin",
+ "tantalisin",
+ "tantalizin",
+ "taperin",
+ "tapin",
+ "tappin",
+ "targetin",
+ "tarin",
+ "tarryin",
+ "taskin",
+ "tastin",
+ "tatterin",
+ "tattooin",
+ "tauntin",
+ "taxin",
+ "teachin",
+ "teamin",
+ "tearin",
+ "teasin",
+ "teemin",
+ "teeterin",
+ "teethin",
+ "telecommutin",
+ "telegraphin",
+ "telemarketin",
+ "telephonin",
+ "tellin",
+ "temperin",
+ "temptin",
+ "tenderin",
+ "tendin",
+ "tensin",
+ "tentin",
+ "terminatin",
+ "terrifyin",
+ "terrorizin",
+ "testifyin",
+ "testin",
+ "tetherin",
+ "textin",
+ "thankin",
+ "thanksgivin",
+ "thawin",
+ "theorisin",
+ "theorizin",
+ "thickenin",
+ "thievin",
+ "thinkin",
+ "thinnin",
+ "thirstin",
+ "thoroughgoin",
+ "thrashin",
+ "threadin",
+ "threatenin",
+ "threshin",
+ "thresholdin",
+ "thrillin",
+ "thrivin",
+ "throbbin",
+ "throngin",
+ "throttlin",
+ "throwin",
+ "thrustin",
+ "thuddin",
+ "thumbin",
+ "thumpin",
+ "thunderin",
+ "thwartin",
+ "tickin",
+ "ticklin",
+ "tidyin",
+ "tightenin",
+ "tilin",
+ "tillin",
+ "tiltin",
+ "timin",
+ "tinglin",
+ "tinkerin",
+ "tinklin",
+ "tintin",
+ "tippin",
+ "tirin",
+ "tithin",
+ "titillatin",
+ "titterin",
+ "toastin",
+ "toddlin",
+ "toilin",
+ "toleratin",
+ "tollin",
+ "toolin",
+ "tootin",
+ "toppin",
+ "topplin",
+ "tormentin",
+ "torturin",
+ "tossin",
+ "totalin",
+ "totallin",
+ "totin",
+ "totterin",
+ "touchin",
+ "tourin",
+ "toutin",
+ "towerin",
+ "towin",
+ "toyin",
+ "tracin",
+ "trackin",
+ "tradin",
+ "traffickin",
+ "trailin",
+ "trainin",
+ "traipsin",
+ "traitin",
+ "trampin",
+ "tramplin",
+ "transactin",
+ "transcendin",
+ "transcribin",
+ "transferrin",
+ "transformin",
+ "transgressin",
+ "transitionin",
+ "translatin",
+ "transmittin",
+ "transpirin",
+ "transplantin",
+ "transportin",
+ "transposin",
+ "trapesin",
+ "trappin",
+ "travelin",
+ "travellin",
+ "traversin",
+ "trawlin",
+ "treadin",
+ "treatin",
+ "trekkin",
+ "tremblin",
+ "trenchin",
+ "trendin",
+ "trespassin",
+ "trickin",
+ "tricklin",
+ "triflin",
+ "triggerin",
+ "trillin",
+ "trimmin",
+ "trippin",
+ "triumphin",
+ "trollin",
+ "trompin",
+ "troopin",
+ "trottin",
+ "troubleshootin",
+ "troublin",
+ "trouncin",
+ "truckin",
+ "trudgin",
+ "trumpetin",
+ "trustin",
+ "tryin",
+ "trystin",
+ "tubin",
+ "tuckerin",
+ "tuckin",
+ "tuggin",
+ "tumblin",
+ "tunin",
+ "tunnelin",
+ "tunnellin",
+ "turnin",
+ "tutorin",
+ "tweakin",
+ "twiddlin",
+ "twinin",
+ "twinklin",
+ "twinnin",
+ "twirlin",
+ "twistin",
+ "twitchin",
+ "twitterin",
+ "tyin",
+ "typesettin",
+ "typewritin",
+ "typin",
+ "unappealin",
+ "unassumin",
+ "unavailin",
+ "unbecomin",
+ "unbelievin",
+ "unbendin",
+ "unblinkin",
+ "uncarin",
+ "unceasin",
+ "unchangin",
+ "uncomplainin",
+ "uncomprehendin",
+ "uncompromisin",
+ "unconvincin",
+ "uncoverin",
+ "undercuttin",
+ "underestimatin",
+ "undergoin",
+ "underlinin",
+ "underlyin",
+ "underminin",
+ "underpinnin",
+ "underscorin",
+ "understandin",
+ "undertakin",
+ "underwritin",
+ "undeviatin",
+ "undoin",
+ "undressin",
+ "undulatin",
+ "undyin",
+ "unearthin",
+ "unendin",
+ "unerrin",
+ "unfailin",
+ "unfalterin",
+ "unfeelin",
+ "unflaggin",
+ "unflatterin",
+ "unflinchin",
+ "unfoldin",
+ "unforgivin",
+ "unheedin",
+ "unhesitatin",
+ "unifyin",
+ "uninterestin",
+ "uninvitin",
+ "unitin",
+ "universalizin",
+ "unknowin",
+ "unleashin",
+ "unloadin",
+ "unlockin",
+ "unlovin",
+ "unmaskin",
+ "unmeanin",
+ "unmovin",
+ "unnervin",
+ "unoffendin",
+ "unpackin",
+ "unpleasin",
+ "unpretendin",
+ "unpromisin",
+ "unquestionin",
+ "unravelin",
+ "unravellin",
+ "unreasonin",
+ "unrelentin",
+ "unremittin",
+ "unresistin",
+ "unrollin",
+ "unsatisfyin",
+ "unseein",
+ "unsettlin",
+ "unsmilin",
+ "unsparin",
+ "unsurprisin",
+ "unsuspectin",
+ "unswervin",
+ "unthinkin",
+ "untirin",
+ "untyin",
+ "unvaryin",
+ "unveilin",
+ "unwaverin",
+ "unwillin",
+ "unwindin",
+ "unwittin",
+ "unyieldin",
+ "upbraidin",
+ "upbringin",
+ "upbuildin",
+ "upcomin",
+ "updatin",
+ "upgradin",
+ "upholdin",
+ "upliftin",
+ "uploadin",
+ "uprisin",
+ "uprootin",
+ "upsettin",
+ "upstandin",
+ "upswin",
+ "upwellin",
+ "urgin",
+ "urinatin",
+ "usherin",
+ "usin",
+ "usurpin",
+ "utilisin",
+ "utilizin",
+ "utterin",
+ "vacatin",
+ "vacationin",
+ "vacillatin",
+ "vacuumin",
+ "validatin",
+ "valuin",
+ "vanishin",
+ "vanquishin",
+ "varyin",
+ "vaultin",
+ "vauntin",
+ "veerin",
+ "veilin",
+ "vendin",
+ "ventilatin",
+ "ventin",
+ "venturin",
+ "vergin",
+ "verifyin",
+ "versionin",
+ "vestin",
+ "vettin",
+ "vexin",
+ "vibratin",
+ "victuallin",
+ "viewin",
+ "vikin",
+ "vindicatin",
+ "vinin",
+ "violatin",
+ "visionin",
+ "visitin",
+ "visualizin",
+ "vitalizin",
+ "vivifyin",
+ "voicin",
+ "voidin",
+ "volunteerin",
+ "vomitin",
+ "votin",
+ "vowin",
+ "voyagin",
+ "vyin",
+ "waddlin",
+ "wadin",
+ "waftin",
+ "waggin",
+ "wagin",
+ "wailin",
+ "waitin",
+ "waivin",
+ "wakenin",
+ "wakin",
+ "walkin",
+ "wallopin",
+ "wallowin",
+ "waltzin",
+ "wanderin",
+ "wanin",
+ "wantin",
+ "warblin",
+ "wardin",
+ "warehousin",
+ "warin",
+ "warmin",
+ "warnin",
+ "warpin",
+ "warrin",
+ "washin",
+ "wastin",
+ "watchin",
+ "waterin",
+ "watermarkin",
+ "waverin",
+ "wavin",
+ "waxin",
+ "weakenin",
+ "weaklin",
+ "weanin",
+ "wearin",
+ "wearyin",
+ "weatherin",
+ "weavin",
+ "webbin",
+ "weddin",
+ "weedin",
+ "weepin",
+ "weighin",
+ "weightin",
+ "welcomin",
+ "weldin",
+ "wellbein",
+ "wellin",
+ "wettin",
+ "whackin",
+ "whalin",
+ "wheedlin",
+ "wheelin",
+ "wheezin",
+ "whettin",
+ "whifflin",
+ "whimperin",
+ "whinin",
+ "whippin",
+ "whirlin",
+ "whirrin",
+ "whiskin",
+ "whisperin",
+ "whistlin",
+ "whitenin",
+ "whitewashin",
+ "whitin",
+ "whittlin",
+ "whizzin",
+ "whoopin",
+ "whoppin",
+ "whuppin",
+ "widenin",
+ "wieldin",
+ "wiggin",
+ "wigglin",
+ "wildin",
+ "willin",
+ "wincin",
+ "windin",
+ "wingin",
+ "winkin",
+ "winnin",
+ "winterin",
+ "wipin",
+ "wirin",
+ "wishin",
+ "withdrawin",
+ "witherin",
+ "withholdin",
+ "withstandin",
+ "witnessin",
+ "wobblin",
+ "wonderin",
+ "wooin",
+ "wordin",
+ "workin",
+ "worryin",
+ "worsenin",
+ "worshippin",
+ "woundin",
+ "wranglin",
+ "wrappin",
+ "wreathin",
+ "wreckin",
+ "wrenchin",
+ "wrestin",
+ "wrestlin",
+ "wrigglin",
+ "wringin",
+ "wrinklin",
+ "writhin",
+ "writin",
+ "wrongdoin",
+ "wrongin",
+ "yachtin",
+ "yammerin",
+ "yankin",
+ "yappin",
+ "yarnin",
+ "yawnin",
+ "yearlin",
+ "yearnin",
+ "yellin",
+ "yellowin",
+ "yelpin",
+ "yieldin",
+ "yowlin",
+ "zaggin",
+ "zeroin",
+ "zigzaggin",
+ "zippin",
+ "zonin",
+ "zoomin"
+ );
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import com.whitemagicsoftware.keenquotes.lex.FilterType;
+
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
+import static java.util.Map.entry;
+import static java.util.Map.ofEntries;
+
+/**
+ * Resolves straight quotes into curly quotes throughout a document.
+ */
+@SuppressWarnings( "unused" )
+public class Curler implements Function<String, String> {
+ /**
+ * Provides an entity-based set of {@link Token} replacements.
+ */
+ public static final Map<TokenType, String> ENTITIES = ofEntries(
+ entry( QUOTE_OPENING_SINGLE, "&lsquo;" ),
+ entry( QUOTE_CLOSING_SINGLE, "&rsquo;" ),
+ entry( QUOTE_OPENING_DOUBLE, "&ldquo;" ),
+ entry( QUOTE_CLOSING_DOUBLE, "&rdquo;" ),
+ entry( QUOTE_STRAIGHT_SINGLE, "'" ),
+ entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
+ entry( QUOTE_APOSTROPHE, "&apos;" ),
+ entry( QUOTE_PRIME_SINGLE, "&prime;" ),
+ entry( QUOTE_PRIME_DOUBLE, "&Prime;" ),
+ entry( QUOTE_PRIME_TRIPLE, "&tprime;" ),
+ entry( QUOTE_PRIME_QUADRUPLE, "&qprime;" )
+ );
+
+ /**
+ * Provides a character-based set of {@link Token} replacements.
+ */
+ public static final Map<TokenType, String> CHARS = ofEntries(
+ entry( QUOTE_OPENING_SINGLE, "‘" ),
+ entry( QUOTE_CLOSING_SINGLE, "’" ),
+ entry( QUOTE_OPENING_DOUBLE, "“" ),
+ entry( QUOTE_CLOSING_DOUBLE, "”" ),
+ entry( QUOTE_STRAIGHT_SINGLE, "'" ),
+ entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
+ entry( QUOTE_APOSTROPHE, "’" ),
+ entry( QUOTE_PRIME_SINGLE, "′" ),
+ entry( QUOTE_PRIME_DOUBLE, "″" ),
+ entry( QUOTE_PRIME_TRIPLE, "‴" ),
+ entry( QUOTE_PRIME_QUADRUPLE, "⁗" )
+ );
+
+ private final Contractions mContractions;
+ private final Map<TokenType, String> mReplacements;
+ private final FilterType mFilterType;
+
+ /**
+ * Maps quotes to HTML entities.
+ *
+ * @param c Contractions listings.
+ * @param parserType Creates a parser based on document content structure.
+ */
+ public Curler(
+ final Contractions c,
+ final FilterType parserType
+ ) {
+ this( c, ENTITIES, parserType );
+ }
+
+ /**
+ * Maps quotes to curled character equivalents.
+ *
+ * @param c Contractions listings.
+ * @param replacements Map of recognized quotes to output types (entity or
+ * Unicode character).
+ */
+ public Curler(
+ final Contractions c,
+ final Map<TokenType, String> replacements,
+ final FilterType parserType
+ ) {
+ mContractions = c;
+ mReplacements = replacements;
+ mFilterType = parserType;
+ }
+
+ /**
+ * Converts straight quotes to curly quotes and primes. Any quotation marks
+ * that cannot be converted are passed to the {@link Consumer}. This method
+ * is re-entrant, but not tested to be thread-safe.
+ *
+ * @param text The text to parse.
+ * @return The given text string with as many straight quotes converted to
+ * curly quotes as is feasible.
+ */
+ @Override
+ public String apply( final String text ) {
+ final var output = new StringBuilder( text );
+ final var offset = new AtomicInteger( 0 );
+
+ AmbiguityResolver.resolve(
+ text,
+ mContractions,
+ replace( output, offset, mReplacements ),
+ mFilterType.filter()
+ );
+
+ return output.toString();
+ }
+
+ /**
+ * Replaces non-ambiguous tokens with their equivalent string representation.
+ *
+ * @param output Continuously updated result document.
+ * @param offset Accumulating index where {@link Token} is replaced.
+ * @param replacements Map of {@link TokenType}s to replacement strings.
+ * @return Instructions to replace a {@link Token} in the result document.
+ */
+ public static Consumer<Token> replace(
+ final StringBuilder output,
+ final AtomicInteger offset,
+ final Map<TokenType, String> replacements
+ ) {
+ return token -> {
+ if( !token.isAmbiguous() ) {
+ final var entity = token.toString( replacements );
+
+ output.replace(
+ token.began() + offset.get(),
+ token.ended() + offset.get(),
+ entity
+ );
+
+ offset.addAndGet( entity.length() - (token.ended() - token.began()) );
+ }
+ };
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import com.whitemagicsoftware.keenquotes.lex.Lexeme;
+import com.whitemagicsoftware.keenquotes.lex.LexemeType;
+import com.whitemagicsoftware.keenquotes.lex.Lexer;
+import com.whitemagicsoftware.keenquotes.lex.LexerFilter;
+import com.whitemagicsoftware.keenquotes.util.CircularFifoQueue;
+
+import java.util.function.Consumer;
+
+import static com.whitemagicsoftware.keenquotes.lex.LexemeType.*;
+import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
+
+/**
+ * Responsible for emitting quotation marks as logical groups. This is the
+ * first pass of a two-pass parser. Each group contains enough context to
+ * distinguish whether a quotation mark is unambiguous or ambiguous with
+ * respect to curling. Each {@link Lexeme} is marked as such.
+ */
+@SuppressWarnings( "SameParameterValue" )
+public final class QuoteEmitter implements Consumer<Lexeme> {
+ private static final LexemeType[] WORD_PERIOD_NUMBER = {
+ WORD, PERIOD, NUMBER
+ };
+
+ private static final LexemeType[] PUNCT_PERIOD_ELLIPSIS_DASH = {
+ PUNCT, PERIOD, ELLIPSIS, DASH
+ };
+
+ private static final LexemeType[] PUNCT_PERIOD = {
+ PUNCT, PERIOD
+ };
+
+ private static final LexemeType[] SPACE_DASH_ENDING = {
+ SPACE, DASH, ENDING
+ };
+
+ private static final LexemeType[] SPACE_ENDING = {
+ SPACE, ENDING
+ };
+
+ private static final LexemeType[] SPACE_HYPHEN = {
+ SPACE, HYPHEN
+ };
+
+ private static final LexemeType[] SPACE_PUNCT = {
+ SPACE, PUNCT
+ };
+
+ private static final LexemeType[] SPACE_SOT = {
+ SPACE, SOT
+ };
+
+ private static final LexemeType[] QUOTE_SINGLE_QUOTE_DOUBLE = {
+ QUOTE_SINGLE, QUOTE_DOUBLE
+ };
+
+ /**
+ * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
+ new LexemeType[]{
+ LexemeType.SOT, SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP
+ };
+
+ /**
+ * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
+ new LexemeType[]{
+ WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE
+ };
+
+ /**
+ * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
+ new LexemeType[]{
+ WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE
+ };
+
+ /**
+ * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
+ new LexemeType[]{
+ SPACE, HYPHEN, DASH, PUNCT, PERIOD, ELLIPSIS, QUOTE_DOUBLE, CLOSING_GROUP,
+ ENDING
+ };
+
+ /**
+ * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
+ new LexemeType[]{
+ LexemeType.SOT, SPACE, DASH, EQUALS, QUOTE_SINGLE, OPENING_GROUP, EOL, EOP
+ };
+
+ /**
+ * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
+ new LexemeType[]{
+ WORD, NUMBER, DASH, ELLIPSIS, OPENING_GROUP, QUOTE_SINGLE,
+ QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE
+ };
+
+ /**
+ * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
+ new LexemeType[]{
+ WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP, QUOTE_SINGLE,
+ QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING
+ };
+
+ /**
+ * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
+ */
+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
+ new LexemeType[]{
+ SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH, QUOTE_SINGLE, CLOSING_GROUP,
+ ENDING
+ };
+
+ private final CircularFifoQueue<Lexeme> mQ = new CircularFifoQueue<>( 4 );
+ private final String mText;
+ private final Contractions mContractions;
+ private final Consumer<Token> mConsumer;
+
+ public QuoteEmitter(
+ final String text,
+ final Contractions contractions,
+ final Consumer<Token> consumer
+ ) {
+ assert text != null;
+ assert contractions != null;
+
+ mText = text;
+ mContractions = contractions;
+ mConsumer = consumer;
+ }
+
+ /**
+ * Scans the given text document for quotation marks and passes them to the
+ * given {@link Token} {@link Consumer}.
+ *
+ * @param text The prose to lex.
+ * @param contractions List of ambiguous and unambiguous contractions.
+ * @param consumer Receives
+ */
+ public static void analyze(
+ final String text,
+ final Contractions contractions,
+ final Consumer<Token> consumer,
+ final LexerFilter filter
+ ) {
+ final var emitter = new QuoteEmitter( text, contractions, consumer );
+ Lexer.lex( text, emitter, filter );
+ }
+
+ /**
+ * @param lexeme the input argument
+ */
+ @Override
+ public void accept( final Lexeme lexeme ) {
+ mQ.add( lexeme );
+
+ if( mQ.size() == 4 ) {
+ parse();
+ }
+ }
+
+ private void parse() {
+ final var lex1 = mQ.get( 0 );
+ final var lex2 = mQ.get( 1 );
+ final var lex3 = mQ.get( 2 );
+ final var lex4 = mQ.get( 3 );
+
+ // <y'all>, <Ph.D.'ll>, <20's>, <she's>
+ if( match( WORD_PERIOD_NUMBER, QUOTE_SINGLE, WORD, ANY ) ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <'n'>, <'N'>, <'owlin'>
+ else if(
+ match( ANY, QUOTE_SINGLE, WORD, QUOTE_SINGLE ) &&
+ mContractions.beganEndedUmambiguously(lex3.toString(mText))
+ ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ emit( QUOTE_APOSTROPHE, lex4 );
+ mQ.set( Lexeme.NONE, 3 );
+ }
+ // <2''>
+ else if( match( NUMBER, QUOTE_SINGLE, QUOTE_SINGLE, ANY ) ) {
+ emit( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() );
+ mQ.set( Lexeme.NONE, 2 );
+ }
+ // <2'>
+ else if( match( NUMBER, QUOTE_SINGLE, ANY, ANY ) ) {
+ emit( QUOTE_PRIME_SINGLE, lex2 );
+ }
+ // <2">
+ else if( match( NUMBER, QUOTE_DOUBLE, ANY, ANY ) ) {
+ emit( QUOTE_PRIME_DOUBLE, lex2 );
+ }
+ // <thinkin'>
+ else if(
+ match( WORD, QUOTE_SINGLE, ANY, ANY ) &&
+ mContractions.endedUnambiguously( lex1.toString( mText ) )
+ ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <'02>
+ else if( match( ANY, QUOTE_SINGLE, NUMBER, SPACE_PUNCT ) ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <'20s>
+ else if(
+ match( ANY, QUOTE_SINGLE, NUMBER, WORD ) &&
+ "s".equalsIgnoreCase( lex4.toString( mText ) )
+ ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <.'\n>
+ else if( match( PUNCT_PERIOD_ELLIPSIS_DASH, QUOTE_SINGLE, ENDING, ANY ) ) {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
+ }
+ // <\'>
+ else if( match( ESC_SINGLE, ANY, ANY, ANY ) ) {
+ emit( QUOTE_STRAIGHT_SINGLE, lex1 );
+ }
+ // <\">
+ else if( match( ESC_DOUBLE, ANY, ANY, ANY ) ) {
+ emit( QUOTE_STRAIGHT_DOUBLE, lex1 );
+
+ // <\"'--->
+ if( match( ESC_DOUBLE, QUOTE_SINGLE, SPACE_DASH_ENDING, ANY ) ) {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
+ }
+ }
+ // <---'" >
+ else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, SPACE_ENDING ) ) {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
+ }
+ // <o’-lantern>, <o' fellow>, <O'-the>
+ else if(
+ match( WORD, QUOTE_SINGLE, SPACE_HYPHEN, WORD ) &&
+ "o".equalsIgnoreCase( lex1.toString( mText ) )
+ ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <"">, <"...>, <"word>, <---"word>
+ else if(
+ match(
+ LEADING_QUOTE_OPENING_DOUBLE, QUOTE_DOUBLE,
+ LAGGING_QUOTE_OPENING_DOUBLE, ANY
+ )
+ ) {
+ emit( QUOTE_OPENING_DOUBLE, lex2 );
+ }
+ // <..."'>, <word"'>, <?"'>, <word"?>
+ else if(
+ match(
+ LEADING_QUOTE_CLOSING_DOUBLE, QUOTE_DOUBLE,
+ LAGGING_QUOTE_CLOSING_DOUBLE, ANY
+ )
+ ) {
+ emit( QUOTE_CLOSING_DOUBLE, lex2 );
+ }
+ // < ''E>
+ else if( match( SPACE_SOT, QUOTE_SINGLE, QUOTE_SINGLE, WORD ) ) {
+ // Consume both immediately to avoid the false ambiguity <'e>.
+ emit( QUOTE_OPENING_SINGLE, lex2 );
+ emit( QUOTE_APOSTROPHE, lex3 );
+ mQ.set( Lexeme.NONE, 1 );
+ mQ.set( Lexeme.NONE, 2 );
+ }
+ // <'...>, <'word>, <---'word>, < 'nation>
+ else if(
+ match(
+ LEADING_QUOTE_OPENING_SINGLE, QUOTE_SINGLE,
+ LAGGING_QUOTE_OPENING_SINGLE, ANY )
+ ) {
+ final var word = lex3.toString( mText );
+
+ if( mContractions.beganAmbiguously( word ) ) {
+ emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
+ }
+ else if( mContractions.beganUnambiguously( word ) ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <"'"nested>
+ else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, WORD ) ) {
+ emit( QUOTE_OPENING_SINGLE, lex2 );
+ }
+ // <"'" >
+ else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
+ emit( lex2 );
+ }
+ // < '" >
+ else if( match( ANY, QUOTE_SINGLE, LAGGING_QUOTE_OPENING_SINGLE, ANY ) ) {
+ emit( QUOTE_OPENING_SINGLE, lex2 );
+ }
+ // Ambiguous
+ else {
+ emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
+ }
+ }
+ // <word'">, <...'--->, <"' >
+ else if(
+ match(
+ LEADING_QUOTE_CLOSING_SINGLE, QUOTE_SINGLE,
+ LAGGING_QUOTE_CLOSING_SINGLE, ANY
+ )
+ ) {
+ final var word = lex1.toString( mText );
+
+ if( mContractions.endedAmbiguously( word ) ) {
+ emit( QUOTE_AMBIGUOUS_LAGGING, lex2 );
+ }
+ else {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
+ }
+ }
+ // <word';> (contraction inferred by previous matches)
+ else if( match( WORD, QUOTE_SINGLE, PUNCT_PERIOD, ANY ) ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ // <---'">
+ else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
+ }
+ // <'42>, <'-3.14>
+ else if( match( ANY, QUOTE_SINGLE, NUMBER, ANY ) ) {
+ emit( QUOTE_OPENING_SINGLE, lex2 );
+ }
+ // <PRE-PARSED><'---.>
+ else if( match( LexemeType.NONE, QUOTE_SINGLE, ANY, ANY ) ) {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
+ }
+ // <''Cause >
+ else if( match( QUOTE_SINGLE, QUOTE_SINGLE, WORD, ANY ) ) {
+ final var word = lex3.toString( mText );
+
+ if( mContractions.beganAmbiguously( word ) ) {
+ emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
+ }
+ else if( mContractions.beganUnambiguously( word ) ) {
+ emit( QUOTE_APOSTROPHE, lex2 );
+ }
+ else {
+ emit( lex2 );
+ }
+ }
+ // Ambiguous (no match)
+ else if( match( ANY, QUOTE_SINGLE_QUOTE_DOUBLE, ANY, ANY ) ) {
+ emit( lex2 );
+ }
+ }
+
+ private void emit( final TokenType tokenType, final Lexeme lexeme ) {
+ mConsumer.accept( new Token( tokenType, lexeme ) );
+ }
+
+ private void emit(
+ final TokenType tokenType,
+ final int began,
+ final int ended ) {
+ mConsumer.accept( new Token( tokenType, began, ended ) );
+ }
+
+ /**
+ * Emits a token that represents an ambiguous quotation mark.
+ *
+ * @param lexeme A quotation mark that could not be curled.
+ */
+ private void emit( final Lexeme lexeme ) {
+ mConsumer.accept( new Token( AMBIGUOUS, lexeme ) );
+ }
+
+ private boolean match(
+ final LexemeType l1,
+ final LexemeType l2,
+ final LexemeType l3,
+ final LexemeType l4 ) {
+ return mQ.get( 0 ).isType( l1 ) &&
+ mQ.get( 1 ).isType( l2 ) &&
+ mQ.get( 2 ).isType( l3 ) &&
+ mQ.get( 3 ).isType( l4 );
+ }
+
+ private boolean match(
+ final LexemeType[] l1,
+ final LexemeType l2,
+ final LexemeType l3,
+ final LexemeType l4 ) {
+ return mQ.get( 0 ).isType( l1 ) &&
+ mQ.get( 1 ).isType( l2 ) &&
+ mQ.get( 2 ).isType( l3 ) &&
+ mQ.get( 3 ).isType( l4 );
+ }
+
+ private boolean match(
+ final LexemeType l1,
+ final LexemeType[] l2,
+ final LexemeType l3,
+ final LexemeType l4 ) {
+ return mQ.get( 0 ).isType( l1 ) &&
+ mQ.get( 1 ).isType( l2 ) &&
+ mQ.get( 2 ).isType( l3 ) &&
+ mQ.get( 3 ).isType( l4 );
+ }
+
+ private boolean match(
+ final LexemeType l1,
+ final LexemeType l2,
+ final LexemeType[] l3,
+ final LexemeType l4 ) {
+ return mQ.get( 0 ).isType( l1 ) &&
+ mQ.get( 1 ).isType( l2 ) &&
+ mQ.get( 2 ).isType( l3 ) &&
+ mQ.get( 3 ).isType( l4 );
+ }
+
+ private boolean match(
+ final LexemeType l1,
+ final LexemeType l2,
+ final LexemeType l3,
+ final LexemeType[] l4 ) {
+ return mQ.get( 0 ).isType( l1 ) &&
+ mQ.get( 1 ).isType( l2 ) &&
+ mQ.get( 2 ).isType( l3 ) &&
+ mQ.get( 3 ).isType( l4 );
+ }
+
+ private boolean match(
+ final LexemeType[] l1,
+ final LexemeType l2,
+ final LexemeType[] l3,
+ final LexemeType l4 ) {
+ return mQ.get( 0 ).isType( l1 ) &&
+ mQ.get( 1 ).isType( l2 ) &&
+ mQ.get( 2 ).isType( l3 ) &&
+ mQ.get( 3 ).isType( l4 );
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Stem.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+/**
+ * Responsible for identifying a {@link Tree}'s subcomponent. This marker
+ * interface helps unify a {@link Tree}'s child elements, allowing the elements
+ * to be added and maintained in insertion order.
+ */
+public interface Stem {
+ /**
+ * Returns this object as a well-formed XML document fragment.
+ *
+ * @return Part of an XML document.
+ */
+ String toXml();
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import com.whitemagicsoftware.keenquotes.lex.Lexeme;
+
+import java.util.Map;
+
+import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
+
+/**
+ * Represents a high-level token read from a text document.
+ */
+final class Token implements Comparable<Token>, Stem {
+ /**
+ * Denotes that the token does not represent a value in the parsed document.
+ */
+ public static final Token NONE = new Token( TokenType.NONE, Lexeme.NONE );
+
+ private TokenType mTokenType;
+ private final int mBegan;
+ private final int mEnded;
+
+ /**
+ * Convenience constructor to create a token that uses the lexeme's
+ * beginning and ending offsets to represent a complete token.
+ *
+ * @param type The type of {@link Token} to create.
+ * @param lexeme Container for beginning and ending text offsets.
+ */
+ Token( final TokenType type, final Lexeme lexeme ) {
+ this( type, lexeme.began(), lexeme.ended() );
+ }
+
+ /**
+ * This constructor can be used to create tokens that span more than a
+ * single character. Almost all tokens represent a single character, only
+ * the double-prime sequence ({@code ''}) is more than one character.
+ *
+ * @param tokenType The type of {@link Token} to create.
+ * @param began Beginning offset into text where token is found.
+ * @param ended Ending offset into text where token is found.
+ */
+ Token( final TokenType tokenType, final int began, final int ended ) {
+ assert tokenType != null;
+ assert began >= 0;
+ assert ended >= began;
+
+ mTokenType = tokenType;
+ mBegan = began;
+ mEnded = ended;
+ }
+
+ /**
+ * Answers whether this {@link Token} appears before the given {@link Token}
+ * in the document. If they overlap, this will return {@code false}.
+ *
+ * @param token The {@link Token} to compare against.
+ * @return {@code true} iff this {@link Token} sequence ends
+ * <em>before</em> the given {@link Token} sequence begins.
+ */
+ boolean isBefore( final Token token ) {
+ assert token != null;
+ assert token != NONE;
+
+ return mEnded <= token.mBegan;
+ }
+
+ /**
+ * Answers whether this {@link Token} appears after the given {@link Token}
+ * in the document. If they overlap, this will return {@code false}.
+ *
+ * @param token The {@link Token} to compare against.
+ * @return {@code true} iff this {@link Token} sequence starts <em>after</em>
+ * the given {@link Token} sequence ends.
+ */
+ @SuppressWarnings( "unused" )
+ boolean isAfter( final Token token ) {
+ assert token != null;
+ assert token != NONE;
+
+ return mBegan > token.mEnded;
+ }
+
+ TokenType getType() {
+ return mTokenType;
+ }
+
+ boolean isType( final TokenType tokenType ) {
+ assert tokenType != null;
+
+ return mTokenType == tokenType;
+ }
+
+ int began() {
+ return mBegan;
+ }
+
+ int ended() {
+ return mEnded;
+ }
+
+ boolean isAmbiguous() {
+ return mTokenType == AMBIGUOUS ||
+ mTokenType == QUOTE_AMBIGUOUS_LEADING ||
+ mTokenType == QUOTE_AMBIGUOUS_LAGGING;
+ }
+
+ /**
+ * Allows mutating an ambiguous token into a non-ambiguous token while
+ * preserving the indexes into the document. This also prevents having to
+ * create additional tokens when resolving.
+ *
+ * @param tokenType The new state, must not be ambiguous.
+ */
+ void setTokenType( final TokenType tokenType ) {
+ assert isAmbiguous();
+ assert tokenType != null;
+
+ mTokenType = tokenType;
+
+ assert !isAmbiguous();
+ }
+
+ @Override
+ public int compareTo( final Token that ) {
+ return this.mBegan - that.mBegan;
+ }
+
+ @Override
+ public String toXml() {
+ return "<" +
+ mTokenType +
+ " type='" + getType().name() + "'" +
+ " began='" + began() + "'" +
+ " ended='" + ended() + "' />";
+ }
+
+ public String toString( final Map<TokenType, String> entities ) {
+ return entities.get( getType() );
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName() + '[' +
+ "mType=" + mTokenType +
+ ", mBegan=" + mBegan +
+ ", mEnded=" + mEnded +
+ ']';
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/TokenType.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+/**
+ * Identifies the type of quotation mark found while parsing prose.
+ */
+enum TokenType {
+ QUOTE_OPENING_SINGLE( "opening-single" ),
+ QUOTE_OPENING_DOUBLE( "opening-double" ),
+ QUOTE_CLOSING_SINGLE( "closing-single" ),
+ QUOTE_CLOSING_DOUBLE( "closing-double" ),
+ QUOTE_APOSTROPHE( "apostrophe" ),
+ QUOTE_STRAIGHT_SINGLE,
+ QUOTE_STRAIGHT_DOUBLE,
+ QUOTE_PRIME_SINGLE,
+ QUOTE_PRIME_DOUBLE,
+ QUOTE_PRIME_TRIPLE,
+ QUOTE_PRIME_QUADRUPLE,
+ QUOTE_AMBIGUOUS_LEADING( "leading-ambiguous" ),
+ QUOTE_AMBIGUOUS_LAGGING( "lagging-ambiguous" ),
+ AMBIGUOUS( "ambiguous" ),
+ NONE;
+
+ private final String mName;
+
+ TokenType() {
+ this( "token" );
+ }
+
+ TokenType( final String name ) {
+ mName = name;
+ }
+
+ @Override
+ public String toString() {
+ return mName;
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Tree.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Consumer;
+
+import static com.whitemagicsoftware.keenquotes.parser.Token.NONE;
+import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
+
+/**
+ * Responsible for helping determine whether ambiguous quotation marks can be
+ * disambiguated.
+ *
+ * @param <T> The type of items to add to the {@link Tree}.
+ */
+class Tree<T extends Token> implements Stem {
+ /**
+ * Assign to {@code null} if this is the root of the {@link Tree}.
+ */
+ private final Tree<T> mParent;
+
+ /**
+ * Subtrees and {@link Token}s.
+ */
+ private final Collection<Stem> mStems = new LinkedHashSet<>( 128 );
+
+ /**
+ * The opening quotation mark that necessitated a new {@link Tree} instance.
+ */
+ private final T mOpening;
+
+ /**
+ * Mutated by {@link #closing(Token)} when performing disambiguation.
+ */
+ @SuppressWarnings( "unchecked" )
+ private T mClosing = (T) NONE;
+
+ /**
+ * Creates a root {@link Tree} instance (no parent).
+ */
+ @SuppressWarnings( "unchecked" )
+ public Tree() {
+ mParent = null;
+ mOpening = (T) NONE;
+ }
+
+ /**
+ * Constructs a new {@link Tree} that branches off of the given parent. The
+ * only time that a new {@link Tree} is branched is when an opening quotation
+ * mark is encountered.
+ *
+ * @param parent The new subtree.
+ * @param opening The opening quotation mark that requires a subtree.
+ */
+ private Tree( final Tree<T> parent, final T opening ) {
+ assert parent != null;
+ assert opening != null;
+ assert opening != NONE;
+
+ mParent = parent;
+ mOpening = opening;
+ }
+
+ /**
+ * Answers whether an opening single quote has been assigned. This will be
+ * {@code true} for any subtree (because a subtree can only be instantiated
+ * with a non-ambiguous opening quotation mark).
+ *
+ * @return {@code true} if this {@link Tree} has a valid opening single
+ * quote.
+ */
+ public boolean hasOpeningSingleQuote() {
+ return mOpening.isType( QUOTE_OPENING_SINGLE );
+ }
+
+ /**
+ * Answers whether a closing single quote has been assigned.
+ *
+ * @return {@code true} if this {@link Tree} has a valid closing single
+ * quote.
+ */
+ public boolean hasClosingSingleQuote() {
+ return mClosing.isType( QUOTE_CLOSING_SINGLE );
+ }
+
+ /**
+ * Answers whether this level in the {@link Tree} contains both an opening
+ * quotation mark and a closing quotation mark of the same type.
+ *
+ * @return {@code true} iff the opening and closing quotation marks are
+ * the same type (double or single).
+ */
+ public boolean isBalanced() {
+ return
+ mOpening.isType( QUOTE_OPENING_DOUBLE ) &&
+ mClosing.isType( QUOTE_CLOSING_DOUBLE ) ||
+ hasOpeningSingleQuote() && hasClosingSingleQuote();
+ }
+
+ /**
+ * Counts the number of {@link Token}s at this level of the {@link Tree}.
+ * This will not traverse to the parent or child {@link Tree}s.
+ *
+ * @param tokenType The {@link Token} type to tally.
+ * @return The number of {@link Token}s present in this {@link Tree},
+ * not including ancestors or descendants.
+ */
+ public int count( final TokenType tokenType ) {
+ final var count = new AtomicInteger();
+
+ iterateTokens( token -> {
+ if( token.isType( tokenType ) ) {
+ count.incrementAndGet();
+ }
+ } );
+
+ return count.get();
+ }
+
+ /**
+ * Passes all {@link Token} instances in this {@link Tree} to the given
+ * {@link Consumer}. This does not traverse ancestors or descendants.
+ *
+ * @param consumer Receives each {@link Token} instance, including the
+ * opening and closing {@link Token}s, if assigned.
+ */
+ public void iterateTokens( final Consumer<Token> consumer ) {
+ if( !mOpening.isType( TokenType.NONE ) ) {
+ consumer.accept( mOpening );
+ }
+
+ for( final var stem : mStems ) {
+ if( stem instanceof Token token ) {
+ consumer.accept( token );
+ }
+ }
+
+ if( !mClosing.isType( TokenType.NONE ) ) {
+ consumer.accept( mClosing );
+ }
+ }
+
+ /**
+ * Performs an iterative, breadth-first visit of every tree and subtree in
+ * the nested hierarchy of quotations.
+ *
+ * @param consumer Recipient of all {@link Tree} nodes, breadth-first.
+ */
+ public void visit( final Consumer<Tree<T>> consumer ) {
+ final var queue = new LinkedList<Tree<T>>();
+ queue.add( this );
+
+ while( !queue.isEmpty() ) {
+ final var current = queue.poll();
+
+ consumer.accept( current );
+
+ queue.addAll( current.subtrees() );
+ }
+ }
+
+ /**
+ * Adds a new subtree to this node with the subtree's parent as this node.
+ *
+ * @param opening The opening quotation mark that necessitates branching.
+ * @return A new subtree instance.
+ */
+ Tree<T> opening( final T opening ) {
+ assert opening != null;
+
+ return add( new Tree<>( this, opening ) );
+ }
+
+ /**
+ * Closes the current open/close quotation mark pair.
+ *
+ * @param closing The closing quotation mark that pairs the opening mark.
+ * @return Parent instance, or this instance if at the root, never
+ * {@code null}.
+ */
+ Tree<T> closing( final T closing ) {
+ assert closing != NONE;
+ assert mOpening.isBefore( closing );
+
+ mClosing = closing;
+
+ return mParent == null ? this : mParent;
+ }
+
+ /**
+ * Adds any {@link Stem} implementation to the child stems of this structure.
+ *
+ * @param stem The child to add.
+ * @return The item added to the {@link Tree}.
+ */
+ <S extends Stem> S add( final S stem ) {
+ assert stem != null;
+
+ mStems.add( stem );
+
+ return stem;
+ }
+
+ /**
+ * Finds the {@link Tree}'s root.
+ *
+ * @return The earliest ancestor such that its parent is {@code null}.
+ */
+ Tree<T> root() {
+ Tree<T> ancestor = this;
+
+ // Search for the tree's root.
+ while( ancestor.parent() != null ) {
+ ancestor = ancestor.parent();
+ }
+
+ return ancestor;
+ }
+
+ void replaceAll( final TokenType oldToken, final TokenType newToken ) {
+ for( final var stem : mStems ) {
+ if( stem instanceof Token token && token.isType( oldToken ) ) {
+ token.setTokenType( newToken );
+ }
+ }
+ }
+
+ private Tree<T> parent() {
+ return mParent;
+ }
+
+ @SuppressWarnings( {"rawtypes", "unchecked"} )
+ private List<Tree<T>> subtrees() {
+ final var result = new ArrayList<Tree<T>>();
+
+ for( final var stem : mStems ) {
+ if( stem instanceof Tree tree ) {
+ result.add( tree );
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Helper method to convert this {@link Tree} into a well-formed XML string.
+ * This is useful for debugging.
+ *
+ * @return The XMl representation of this object.
+ */
+ @Override
+ public String toXml() {
+ final var sb = new StringBuilder( 128 );
+ final var name = parent() == null ? "root" : "tree";
+
+ sb.append( '<' );
+ sb.append( name );
+ sb.append( '>' );
+
+ if( !mOpening.isType( TokenType.NONE ) ) {
+ sb.append( mOpening.toXml() );
+ }
+
+ mStems.forEach( stem -> sb.append( stem.toXml() ) );
+
+ if( !mClosing.isType( TokenType.NONE ) ) {
+ sb.append( mClosing.toXml() );
+ }
+
+ sb.append( "</" );
+ sb.append( name );
+ sb.append( '>' );
+
+ return sb.toString();
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/util/CircularFifoQueue.java
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.whitemagicsoftware.keenquotes.util;
+
+import java.util.*;
+
+import static java.lang.String.*;
+import static java.util.Objects.*;
+
+/**
+ * This is a first-in first-out queue with a fixed size that replaces its
+ * oldest element if full.
+ * <p>
+ * The removal order of a {@link CircularFifoQueue} is based on the
+ * insertion order; elements are removed in the same order in which they
+ * were added. The iteration order is the same as the removal order.
+ * </p>
+ * <p>
+ * The {@link #add(Object)}, {@link #remove()}, {@link #peek()},
+ * {@link #poll()}, {@link #offer(Object)} operations all perform in constant
+ * time. All other operations perform in linear time or worse.
+ * </p>
+ * <p>
+ * This queue prevents {@code null} objects from being added.
+ * </p>
+ *
+ * @param <E> the type of elements in this collection
+ * @since 4.0
+ */
+public final class CircularFifoQueue<E> extends AbstractCollection<E>
+ implements Queue<E> {
+ /**
+ * Underlying storage array.
+ */
+ private final transient E[] elements;
+
+ /**
+ * Array index of first (oldest) queue element.
+ */
+ private transient int start;
+
+ /**
+ * Index mod maxElements of the array position following the last queue
+ * element. Queue elements start at elements[start] and "wrap around"
+ * elements[maxElements-1], ending at elements[decrement(end)].
+ * For example, elements = {c,a,b}, start=1, end=1 corresponds to
+ * the queue [a,b,c].
+ */
+ private transient int end;
+
+ /**
+ * Flag to indicate if the queue is currently full.
+ */
+ private transient boolean full;
+
+ /**
+ * Capacity of the queue.
+ */
+ private final int maxElements;
+
+ /**
+ * Constructor that creates a queue with the specified size.
+ *
+ * @param size Immutable queue size, must be greater than zero.
+ */
+ @SuppressWarnings( "unchecked" )
+ public CircularFifoQueue( final int size ) {
+ assert size > 0;
+
+ elements = (E[]) new Object[ size ];
+ maxElements = elements.length;
+ }
+
+ /**
+ * Returns the number of elements stored in the queue.
+ *
+ * @return this queue's size
+ */
+ @Override
+ public int size() {
+ final int size;
+
+ if( end < start ) {
+ size = maxElements - start + end;
+ }
+ else if( end == start ) {
+ size = full ? maxElements : 0;
+ }
+ else {
+ size = end - start;
+ }
+
+ return size;
+ }
+
+ /**
+ * Returns true if this queue is empty; false otherwise.
+ *
+ * @return true if this queue is empty
+ */
+ @Override
+ public boolean isEmpty() {
+ return size() == 0;
+ }
+
+ /**
+ * Returns {@code true} if the capacity limit of this queue has been reached,
+ * i.e. the number of elements stored in the queue equals its maximum size.
+ *
+ * @return {@code true} if the capacity limit has been reached, {@code
+ * false} otherwise
+ * @since 4.1
+ */
+ public boolean isAtFullCapacity() {
+ return size() == maxElements;
+ }
+
+ /**
+ * Clears this queue.
+ */
+ @Override
+ public void clear() {
+ full = false;
+ start = 0;
+ end = 0;
+ Arrays.fill( elements, null );
+ }
+
+ /**
+ * Adds the given element to this queue. If the queue is full, the least
+ * recently added element is discarded so that a new element can be inserted.
+ *
+ * @param element the element to add
+ * @return true, always
+ * @throws NullPointerException if the given element is null
+ */
+ @Override
+ public boolean add( final E element ) {
+ requireNonNull( element, "element" );
+
+ if( isAtFullCapacity() ) {
+ remove();
+ }
+
+ elements[ end++ ] = element;
+
+ if( end >= maxElements ) {
+ end = 0;
+ }
+
+ if( end == start ) {
+ full = true;
+ }
+
+ return true;
+ }
+
+ /**
+ * Returns the element at the specified position in this queue.
+ *
+ * @param index the position of the element in the queue, zero-based.
+ * @return The element at position {@code index}.
+ * @throws NoSuchElementException if the requested position is outside the
+ * range [0, size).
+ */
+ public E get( final int index ) {
+ invariant( index );
+
+ return elements[ (start + index) % maxElements ];
+ }
+
+ /**
+ * Sets the element at the specified position in this queue.
+ *
+ * @param element The element to replace at the given {@code index}.
+ * @param index The element at position {@code index} to replace,
+ * zero-based.
+ * @throws NoSuchElementException if the requested position is outside the
+ * range [0, size).
+ */
+ public void set( final E element, final int index ) {
+ requireNonNull( element, "element" );
+ invariant( index );
+
+ elements[ (start + index) % maxElements ] = element;
+ }
+
+ private void invariant( final int index ) {
+ final int sz = size();
+
+ if( index < 0 || index >= sz ) {
+ throw new NoSuchElementException( format(
+ "Index %1$d is outside the available range [0, %2$d)", index, sz
+ ) );
+ }
+ }
+
+ /**
+ * Adds the given element to this queue. If the queue is full, the least
+ * recently added element is discarded so that a new element can be inserted.
+ *
+ * @param element The element to add.
+ * @return {@code true}, always
+ * @throws NullPointerException if the given element is {@code null}.
+ */
+ @Override
+ public boolean offer( final E element ) {
+ return add( element );
+ }
+
+ @Override
+ public E poll() {
+ return isEmpty() ? null : remove();
+ }
+
+ @Override
+ public E element() {
+ if( isEmpty() ) {
+ throw new NoSuchElementException( "empty queue" );
+ }
+
+ return peek();
+ }
+
+ @Override
+ public E peek() {
+ return isEmpty() ? null : elements[ start ];
+ }
+
+ @Override
+ public E remove() {
+ if( isEmpty() ) {
+ throw new NoSuchElementException( "empty queue" );
+ }
+
+ final E element = elements[ start ];
+
+ if( null != element ) {
+ elements[ start++ ] = null;
+
+ if( start >= maxElements ) {
+ start = 0;
+ }
+ full = false;
+ }
+
+ return element;
+ }
+
+ /**
+ * Increments the internal index.
+ *
+ * @param index the index to increment
+ * @return the updated index
+ */
+ private int increment( int index ) {
+ if( ++index >= maxElements ) {
+ index = 0;
+ }
+
+ return index;
+ }
+
+ /**
+ * Decrements the internal index.
+ *
+ * @param index the index to decrement
+ * @return the updated index
+ */
+ private int decrement( int index ) {
+ if( --index < 0 ) {
+ index = maxElements - 1;
+ }
+
+ return index;
+ }
+
+ /**
+ * Returns an iterator over this queue's elements.
+ *
+ * @return an iterator over this queue's elements
+ */
+ @Override
+ public Iterator<E> iterator() {
+ return new Iterator<>() {
+ private int index = start;
+ private int lastReturnedIndex = -1;
+ private boolean isFirst = full;
+
+ @Override
+ public boolean hasNext() {
+ return isFirst || index != end;
+ }
+
+ @Override
+ public E next() {
+ if( !hasNext() ) {
+ throw new NoSuchElementException();
+ }
+
+ isFirst = false;
+ lastReturnedIndex = index;
+ index = increment( index );
+ return elements[ lastReturnedIndex ];
+ }
+
+ @Override
+ public void remove() {
+ if( lastReturnedIndex == -1 ) {
+ throw new IllegalStateException();
+ }
+
+ // First element can be removed quickly
+ if( lastReturnedIndex == start ) {
+ CircularFifoQueue.this.remove();
+ lastReturnedIndex = -1;
+ return;
+ }
+
+ int pos = lastReturnedIndex + 1;
+ if( start < lastReturnedIndex && pos < end ) {
+ // shift in one part
+ System.arraycopy(
+ elements, pos, elements, lastReturnedIndex, end - pos );
+ }
+ else {
+ // Other elements require us to shift the subsequent elements
+ while( pos != end ) {
+ if( pos >= maxElements ) {
+ elements[ pos - 1 ] = elements[ 0 ];
+ pos = 0;
+ }
+ else {
+ elements[ decrement( pos ) ] = elements[ pos ];
+ pos = increment( pos );
+ }
+ }
+ }
+
+ lastReturnedIndex = -1;
+ end = decrement( end );
+ elements[ end ] = null;
+ full = false;
+ index = decrement( index );
+ }
+ };
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/util/FastCharacterIterator.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.util;
+
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.function.Function;
+
+import static java.text.CharacterIterator.DONE;
+
+/**
+ * Iterates over a string, much like {@link CharacterIterator}, but faster.
+ * This class gets 53 ops/s vs. 49 ops/s for {@link StringCharacterIterator}.
+ * In comparison, using unconstrained {@link String#charAt(int)} calls yields
+ * 57 ops/s.
+ * <p>
+ * <strong>Caution:</strong> This class offers minimal bounds checking to eke
+ * out some efficiency.
+ * </p>
+ */
+public final class FastCharacterIterator {
+
+ private final String mS;
+ private final int mLen;
+
+ /**
+ * Starts at 0, not guaranteed to be within bounds.
+ */
+ private int mPos;
+
+ /**
+ * Constructs a new iterator that can advance through the given string
+ * one character at a time.
+ *
+ * @param s The string to iterate.
+ */
+ public FastCharacterIterator( final String s ) {
+ mS = s;
+ mLen = s.length();
+ }
+
+ /**
+ * Returns the iterated index. The return value is not guaranteed to be
+ * within the string bounds.
+ *
+ * @return The iterated index.
+ */
+ public int index() {
+ return mPos;
+ }
+
+ /**
+ * Returns the character at the currently iterated position in the string.
+ * This method performs bounds checking.
+ *
+ * @return {@link CharacterIterator#DONE} if there are no more characters.
+ */
+ public char current() {
+ final var pos = mPos;
+ return pos < mLen ? mS.charAt( pos ) : DONE;
+ }
+
+ /**
+ * Advances to the next character in the string, without bounds checking.
+ */
+ public void next() {
+ mPos++;
+ }
+
+ /**
+ * Advances to the previous character in the string, without bounds checking.
+ */
+ public void prev() {
+ --mPos;
+ }
+
+ /**
+ * Returns the next character in the string without consuming it. Multiple
+ * consecutive calls to this method will return the same value. This method
+ * performs bounds checking.
+ *
+ * @return {@link CharacterIterator#DONE} if there are no more characters.
+ */
+ public char peek() {
+ final var pos = mPos;
+ return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE;
+ }
+
+ /**
+ * Answers whether {@link #next()} followed by {@link #current()} is safe.
+ *
+ * @return {@code true} if there are more characters to be iterated.
+ */
+ public boolean hasNext() {
+ return mPos < mLen;
+ }
+
+ /**
+ * Parse all characters that match a given function.
+ *
+ * @param f The function that determines when skipping stops.
+ */
+ public void skip( final Function<Character, Boolean> f ) {
+ assert f != null;
+
+ do {
+ next();
+ }
+ while( f.apply( current() ) );
+
+ // The loop always overshoots by one character.
+ prev();
+ }
+
+ /**
+ * Creates a string from a subset of consecutive characters in the string
+ * being iterated. The calling class is responsible for bounds-checking.
+ *
+ * @param began The starting index, must be greater than or equal to zero
+ * and less than or equal to {@code ended}.
+ * @param ended The ending index, must be less than the string length.
+ * @return A substring of the iterated string.
+ * @throws IndexOutOfBoundsException Either or both parameters exceed the
+ * string's boundaries.
+ */
+ public String substring( final int began, final int ended ) {
+ assert began >= 0;
+ assert began <= ended;
+ assert ended < mLen;
+
+ return mS.substring( began, ended );
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/util/Tuple.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.util;
+
+/**
+ * Responsible for associating two values.
+ *
+ * @param item1 The first item to associate with the second item.
+ * @param item2 The second item to associate with the first item.
+ * @param <T1> The type of the first item.
+ * @param <T2> The type of the second item.
+ */
+public record Tuple<T1, T2>( T1 item1, T2 item2 ) {
+}
src/main/resources/com/whitemagicsoftware/keenquotes/.gitignore
-app.properties
src/main/resources/com/whitemagicsoftware/keenquotes/app/.gitignore
+version.properties
src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.MethodSource;
-import org.junit.jupiter.params.provider.ValueSource;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.function.Consumer;
-import java.util.function.Function;
-import java.util.stream.Stream;
-
-import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_PLAIN;
-import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_XML;
-import static java.lang.System.out;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.params.provider.Arguments.arguments;
-
-/**
- * Test that English straight quotes are converted to curly quotes and
- * apostrophes.
- */
-public class KeenQuotesTest {
- /**
- * This is a single-use test that is useful for debugging.
- */
- @Test
- public void test_parse_SingleLine_Parsed() {
- final var converter = createConverter( out::println );
- out.println( converter.apply(
- "\"---retroactively!\""
- ) );
- }
-
- /**
- * Tests that straight quotes are converted to curly quotes.
- *
- * @throws IOException Error opening file full of fun.
- */
- @Test
- public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException {
- testConverter( createConverter( ( lex ) -> {} ) );
- }
-
- @ParameterizedTest
- @MethodSource( "param_XmlParse_StraightQuotes_CurlyQuotes" )
- public void test_XmlParse_StraightQuotes_CurlyQuotes(
- final String input, final String expected ) {
- final var converter = createConverter( out::println, PARSER_XML );
- final var actual = converter.apply( input );
- assertEquals( expected, actual );
- }
-
- /**
- * Re-enable using <a href="https://www.gutenberg.org/">Project Gutenberg</a>
- * texts.
- *
- * @param filename A plain text file to convert.
- * @throws IOException Could not find, open, or read from text file.
- */
- @ParameterizedTest
- @ValueSource( strings = {"habberton"} )
- @Disabled
- void test_Parse_Story_Converted( final String filename ) throws IOException {
- final var sb = new StringBuilder( 2 ^ 20 );
-
- try( final var reader = open( filename + ".txt" ) ) {
- String line;
- final var sep = System.lineSeparator();
-
- while( (line = reader.readLine()) != null ) {
- sb.append( line ).append( sep );
- }
- }
-
- final var converter = createConverter( out::println );
- System.out.println( converter.apply( sb.toString() ) );
- }
-
- static Stream<Arguments> param_XmlParse_StraightQuotes_CurlyQuotes() {
- return Stream.of(
- arguments(
- "<em>'twas</em>",
- "<em>&apos;twas</em>"
- ),
- arguments(
- "<strong>'Twas</strong> <kbd>'</kbd> in <tex>Knuth's TeX</tex>",
- "<strong>&apos;Twas</strong> <kbd>'</kbd> in <tex>Knuth's TeX</tex>"
- ),
- arguments(
- "<bold>'twas</bold> redeemed for the <em>cat</em>'s eye",
- "<bold>&apos;twas</bold> redeemed for the <em>cat</em>&apos;s eye"
- ),
- arguments(
- "<a href=\"https://x.org\" title=\"X's Homepage\">X11's bomb</a>",
- "<a href=\"https://x.org\" title=\"X's Homepage\">X11&apos;s bomb</a>"
- ),
- arguments(
- "''<em>Twas</em> happening!'",
- "&lsquo;&apos;<em>Twas</em> happening!&rsquo;"
- )
- );
- }
-
- /**
- * Reads a file full of couplets. The first of the pair is the input,
- * the second of the pair is the expected result. Couplets may include
- * newline characters to indicate end of lines and end of paragraphs.
- * Lines that start with {@code #} are ignored.
- *
- * @param parser The text processor capable of straight quote conversion.
- * @throws IOException Error opening file full of fun.
- */
- private void testConverter( final Function<String, String> parser )
- throws IOException {
- try( final var reader = open( "smartypants.txt" ) ) {
- String line;
- String testLine = "";
- String expected = "";
-
- while( ((line = reader.readLine()) != null) ) {
- if( line.startsWith( "#" ) || line.isBlank() ) { continue; }
-
- // Read the first line of the couplet.
- if( testLine.isBlank() ) {
- testLine = line;
- continue;
- }
-
- // Read the second line of the couplet.
- if( expected.isBlank() ) {
- expected = line;
- }
-
- testLine = unescapeEol( testLine );
- expected = unescapeEol( expected );
-
- final var actual = parser.apply( testLine );
- assertEquals( expected, actual );
-
- testLine = "";
- expected = "";
- }
- }
- }
-
- private static String unescapeEol( final String s ) {
- return String.join( "\n", s.split( "\\\\n" ) );
- }
-
- /**
- * Opens a text file for reading. Callers are responsible for closing.
- *
- * @param filename The file to open.
- * @return An instance of {@link BufferedReader} that can be used to
- * read all the lines in the file.
- */
- private BufferedReader open( final String filename ) {
- final var is = getClass().getResourceAsStream( filename );
- assertNotNull( is );
-
- return new BufferedReader( new InputStreamReader( is ) );
- }
-
- private Function<String, String> createConverter(
- final Consumer<Lexeme> unresolved ) {
- return createConverter( unresolved, PARSER_PLAIN );
- }
-
- private Function<String, String> createConverter(
- final Consumer<Lexeme> unresolved, final ParserType parserType ) {
- return new Converter( unresolved, parserType );
- }
-}
src/test/java/com/whitemagicsoftware/keenquotes/LexerTest.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import org.junit.jupiter.api.Test;
-
-import java.util.List;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.function.BiFunction;
-import java.util.function.Consumer;
-
-import static com.whitemagicsoftware.keenquotes.LexemeType.*;
-import static java.util.Arrays.asList;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-/**
- * Tests lexing words, numbers, punctuation, spaces, newlines, etc.
- */
-final class LexerTest {
-
- @Test
- void test_Lexing_Words_LexemeValues() {
- testText( "abc 123", "abc", " ", "123" );
- testText( "-123 abc", "-123", " ", "abc" );
- }
-
- @Test
- void test_Lexing_Numbers_EmitNumbers() {
- testType( ".123", NUMBER );
- testType( "-123.", NUMBER, PERIOD );
- testType( " 123.123.123", SPACE, NUMBER );
- testType( "123 123\"", NUMBER, SPACE, NUMBER, QUOTE_DOUBLE );
- testType( "-123,123.123", NUMBER );
- testType( "...1,023...", ELLIPSIS, NUMBER, ELLIPSIS );
- }
-
- @Test
- void test_Lexing_Words_EmitWords() {
- testType( "abc", WORD );
- testType( "abc abc", WORD, SPACE, WORD );
- testType( "abc...", WORD, ELLIPSIS );
- testType( "abc123", WORD );
- testType( "-123abc", NUMBER, WORD );
- testType( "abc-o'-abc", WORD, HYPHEN, WORD, QUOTE_SINGLE, HYPHEN, WORD );
- }
-
- @Test
- void test_Lexing_PunctuationMarks_EmitPunctuationMarks() {
- testType( "!", PUNCT );
- testType( ";", PUNCT );
- testType( "\\", PUNCT );
- testType( ".", PERIOD );
- testType( "-", HYPHEN );
- testType( "--", DASH );
- testType( "---", DASH );
- testType( "–", DASH );
- testType( "―", DASH );
- testType( "—", DASH );
- testType( "—-—", DASH );
- testType( "...", ELLIPSIS );
- testType( ". .", ELLIPSIS );
- testType( ". . .", ELLIPSIS );
- testType( ".. ... ....", ELLIPSIS );
- }
-
- @Test
- void test_Lexing_Quotes_EmitQuotes() {
- testType( "'", QUOTE_SINGLE );
- testType( "\"", QUOTE_DOUBLE );
- testType( "‘", QUOTE_SINGLE_OPENING );
- testType( "’", QUOTE_SINGLE_CLOSING );
- testType( "“", QUOTE_DOUBLE_OPENING );
- testType( "”", QUOTE_DOUBLE_CLOSING );
- testType( "3 o'clock", NUMBER, SPACE, WORD, QUOTE_SINGLE, WORD );
- }
-
- @Test
- void test_Lexing_Escapes_EmitEscapedQuotes() {
- testType( "123\\'456\\\"", NUMBER, ESC_SINGLE, NUMBER, ESC_DOUBLE );
- testText( "123\\'456\\\"", "123", "\\'", "456", "\\\"" );
- }
-
- @Test
- void test_Lexing_Newlines_EmitNewlines() {
- testType( "\r", EOL );
- testType( "\n", EOL );
- testType( "\r\n", EOL );
- testType( "\r\n\r\n", EOP );
- testType( "\r\n\n\r", EOP );
- testType( "abc \r\nabc\n", WORD, SPACE, EOL, WORD, EOL );
- }
-
- @Test
- void test_Lexing_Whitespace_EmitSpace() {
- testType( " ", SPACE );
- testType( "\n \n", EOL, SPACE, EOL );
- }
-
- static void testType(
- final String actual, final LexemeType... expected ) {
- final var list = asList( expected );
- testType( actual, ( lexeme, text ) -> lexeme.getType(), list );
- }
-
- static void testType(
- final String actual,
- final Consumer<FastCharacterIterator> filter,
- final LexemeType... expected ) {
- final var list = asList( expected );
- testType( actual, ( lexeme, text ) -> lexeme.getType(), filter, list );
- }
-
- static void testText(
- final String actual, final String... expected ) {
- testType( actual, Lexeme::toString, asList( expected ) );
- }
-
- private static <A, E> void testType(
- final String text,
- final BiFunction<Lexeme, String, A> f,
- final List<E> elements ) {
- testType( text, f, filter -> {}, elements );
- }
-
- private static <A, E> void testType(
- final String text,
- final BiFunction<Lexeme, String, A> f,
- final Consumer<FastCharacterIterator> filter,
- final List<E> elements ) {
- var counter = new AtomicInteger();
-
- Lexer.lex( text, lexeme -> {
- final var expected = elements.get( counter.getAndIncrement() );
- final var actual = f.apply( lexeme, text );
-
- assertEquals( expected, actual );
- }, filter );
-
- // Ensure all expected values are matched (verify end of text reached).
- assertEquals( elements.size(), counter.get() );
- }
-}
src/test/java/com/whitemagicsoftware/keenquotes/ParserTest.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import org.junit.jupiter.api.Test;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import static com.whitemagicsoftware.keenquotes.TokenType.QUOTE_APOSTROPHE;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-/**
- * Test that all unambiguous apostrophes are emitted once.
- */
-final class ParserTest {
-
- @SuppressWarnings( "TextBlockMigration" )
- private final static Map<String, Map<TokenType, Integer>> TEST_CASES =
- Map.of(
- "That's a 35'×10\" yacht!",
- Map.of( QUOTE_APOSTROPHE, 1 ),
- "It's the 80's...",
- Map.of( QUOTE_APOSTROPHE, 2 ),
- "Fish-'n'-chips!",
- Map.of( QUOTE_APOSTROPHE, 2 ),
- "She's a cat ya'll couldn't've known!",
- Map.of( QUOTE_APOSTROPHE, 4 ),
- "'Twas and 'tis whate'er lay 'twixt dawn and dusk 'n River Styx.",
- Map.of( QUOTE_APOSTROPHE, 5 ),
- "But I must leave the proofs to those who 've seen 'em;\n" +
- "But this I heard her say, and can't be wrong\n" +
- "And all may think which way their judgments lean 'em,\n" +
- "''T is strange---the Hebrew noun which means \"I am,\"\n" +
- "The English always use to govern d--n.'",
- Map.of( QUOTE_APOSTROPHE, 5 )
- );
-
- @Test
- void test_Conversion_StraightQuotes_ExpectedConversionCount() {
- for( final var entry : TEST_CASES.entrySet() ) {
- parse( entry.getKey(), entry.getValue() );
- }
- }
-
- private void parse( final String text, final Map<TokenType, Integer> tally ) {
- final var contractions = new Contractions.Builder().build();
- final var parser = new Parser( text, contractions );
- final var actual = new HashMap<TokenType, Integer>();
-
- parser.parse(
- token -> actual.merge( token.getType(), 1, Integer::sum ),
- lexeme -> {},
- filter -> {}
- );
-
- for( final var expectedEntry : tally.entrySet() ) {
- final var expectedCount = expectedEntry.getValue();
- final var actualCount = actual.get( expectedEntry.getKey() );
-
- assertEquals( expectedCount, actualCount, text );
- }
- }
-}
src/test/java/com/whitemagicsoftware/keenquotes/XmlFilterTest.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-import org.junit.jupiter.api.Test;
-
-import java.util.function.Consumer;
-
-import static com.whitemagicsoftware.keenquotes.LexemeType.*;
-import static com.whitemagicsoftware.keenquotes.LexerTest.testType;
-
-/**
- * Test that parsing XML documents ignores elements.
- */
-final class XmlFilterTest {
-
- @Test
- void test_Lexing_Xml_EmitTags() {
- final var actual =
- "A <em>world's</em> aflame <pre><code>ch = '\\''</code></pre>.";
- testType(
- actual,
- createXmlFilter(),
- // A world ' s aflame .
- WORD, SPACE, WORD, QUOTE_SINGLE, WORD, SPACE, WORD, SPACE, PERIOD
- );
- }
-
- @Test
- void test_Lexing_XmlAttribute_EmitTags() {
- final var actual = "<a href=\"http://x.org\">X11</a>";
- testType( actual, createXmlFilter(), WORD );
- }
-
- static Consumer<FastCharacterIterator> createXmlFilter() {
- return new XmlFilter();
- }
-}
src/test/java/com/whitemagicsoftware/keenquotes/lex/LexerTest.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiFunction;
+
+import static com.whitemagicsoftware.keenquotes.lex.LexemeType.*;
+import static java.util.Arrays.asList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Tests lexing words, numbers, punctuation, spaces, newlines, etc.
+ */
+public final class LexerTest {
+
+ @Test
+ void test_Lexing_Words_LexemeValues() {
+ testText( "abc 123", "abc", " ", "123" );
+ testText( "-123 abc", "-123", " ", "abc" );
+ }
+
+ @Test
+ void test_Lexing_Numbers_EmitNumbers() {
+ testType( ".123", NUMBER );
+ testType( "-123.", NUMBER, PERIOD );
+ testType( " 123.123.123", SPACE, NUMBER );
+ testType( "123 123\"", NUMBER, SPACE, NUMBER, QUOTE_DOUBLE );
+ testType( "-123,123.123", NUMBER );
+ testType( "...1,023...", ELLIPSIS, NUMBER, ELLIPSIS );
+ testType( "123-456", NUMBER );
+ testType( "123-", NUMBER, HYPHEN );
+ testType( "123 - 456", NUMBER, SPACE, HYPHEN, SPACE, NUMBER );
+ }
+
+ @Test
+ void test_Lexing_Words_EmitWords() {
+ testType( "abc", WORD );
+ testType( "abc abc", WORD, SPACE, WORD );
+ testType( "abc...", WORD, ELLIPSIS );
+ testType( "abc123", WORD );
+ testType( "-123abc", NUMBER, WORD );
+ testType( "-123abc123", NUMBER, WORD );
+ testType( "abc-o'-abc", WORD, HYPHEN, WORD, QUOTE_SINGLE, HYPHEN, WORD );
+ }
+
+ @Test
+ void test_Lexing_PunctuationMarks_EmitPunctuationMarks() {
+ testType( "!", PUNCT );
+ testType( ";", PUNCT );
+ testType( "\\", PUNCT );
+ testType( ".", PERIOD );
+ testType( "-", HYPHEN );
+ testType( "--", DASH );
+ testType( "---", DASH );
+ testType( "–", DASH );
+ testType( "―", DASH );
+ testType( "—", DASH );
+ testType( "—-—", DASH );
+ testType( "...", ELLIPSIS );
+ testType( ". .", ELLIPSIS );
+ testType( ". . .", ELLIPSIS );
+ testType( ".. ... ....", ELLIPSIS );
+ }
+
+ @Test
+ void test_Lexing_Quotes_EmitQuotes() {
+ testType( "'", QUOTE_SINGLE );
+ testType( "\"", QUOTE_DOUBLE );
+ testType( "‘", QUOTE_SINGLE_OPENING );
+ testType( "’", QUOTE_SINGLE_CLOSING );
+ testType( "“", QUOTE_DOUBLE_OPENING );
+ testType( "”", QUOTE_DOUBLE_CLOSING );
+ testType( "3 o'clock", NUMBER, SPACE, WORD, QUOTE_SINGLE, WORD );
+ }
+
+ @Test
+ void test_Lexing_Escapes_EmitEscapedQuotes() {
+ testType( "123\\'456\\\"", NUMBER, ESC_SINGLE, NUMBER, ESC_DOUBLE );
+ testText( "123\\'456\\\"", "123", "\\'", "456", "\\\"" );
+ }
+
+ @Test
+ void test_Lexing_Newlines_EmitNewlines() {
+ testType( "\r", EOL );
+ testType( "\n", EOL );
+ testType( "\r\n", EOL );
+ testType( "\r\n\r\n", EOP );
+ testType( "\r\n\n\r", EOP );
+ testType( "abc \r\nabc\n", WORD, SPACE, EOL, WORD, EOL );
+ }
+
+ @Test
+ void test_Lexing_Whitespace_EmitSpace() {
+ testType( " ", SPACE );
+ testType( "\n \n", EOL, SPACE, EOL );
+ }
+
+ static void testType(
+ final String actual,
+ final LexerFilter filter,
+ final LexemeType... expected ) {
+ assert actual != null;
+ assert filter != null;
+ assert expected != null;
+
+ final var list = asList( expected );
+
+ testType( actual, ( lexeme, text ) -> lexeme.getType(), filter, list );
+ }
+
+ static void testType( final String actual, final LexemeType... expected ) {
+ testType( actual, filter -> false, expected );
+ }
+
+ static void testText(
+ final String actual, final String... expected ) {
+ testType( actual, Lexeme::toString, asList( expected ) );
+ }
+
+ private static <A, E> void testType(
+ final String text,
+ final BiFunction<Lexeme, String, A> f,
+ final List<E> elements ) {
+ testType( text, f, filter -> false, elements );
+ }
+
+ private static <A, E> void testType(
+ final String text,
+ final BiFunction<Lexeme, String, A> f,
+ final LexerFilter filter,
+ final List<E> elements ) {
+ var counter = new AtomicInteger();
+
+ Lexer.lex( text, lexeme -> {
+ // Ignore the SOT and EOT lexemes (avoids duplication). Each test will
+ // include EOL, EOP, and EOT tokens due to the Lexer's algorithm.
+ if( !lexeme.isType( SOT, EOT ) && counter.get() < elements.size() ) {
+ final var expected = elements.get( counter.getAndIncrement() );
+ final var actual = f.apply( lexeme, text );
+
+ assertEquals( expected, actual );
+ }
+ }, filter );
+
+ // Ensure all expected values are matched (verify end of text reached).
+ assertEquals( elements.size(), counter.get() );
+ }
+}
src/test/java/com/whitemagicsoftware/keenquotes/lex/XmlFilterTest.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.lex;
+
+import org.junit.jupiter.api.Test;
+
+import static com.whitemagicsoftware.keenquotes.lex.LexemeType.*;
+
+/**
+ * Test that parsing XML documents ignores elements.
+ */
+final class XmlFilterTest {
+
+ @Test
+ void test_Lexing_Xml_EmitTags() {
+ final var actual =
+ "A <em>world's</em> aflame <pre><code>ch = '\\''</code></pre>.";
+ LexerTest.testType(
+ actual,
+ createXmlFilter(),
+ // A world ' s aflame .
+ WORD, SPACE, WORD, QUOTE_SINGLE, WORD, SPACE, WORD, SPACE, PERIOD
+ );
+ }
+
+ @Test
+ void test_Lexing_XmlAttribute_EmitTags() {
+ final var actual = "<a href=\"http://x.org\">X11</a>";
+ LexerTest.testType( actual, createXmlFilter(), WORD );
+ }
+
+ static LexerFilter createXmlFilter() {
+ return new XmlFilter();
+ }
+}
src/test/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolverTest.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static com.whitemagicsoftware.keenquotes.parser.Curler.ENTITIES;
+import static com.whitemagicsoftware.keenquotes.parser.Curler.replace;
+import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class AmbiguityResolverTest {
+ private final Contractions CONTRACTIONS = new Contractions.Builder().build();
+
+ @Test
+ @Disabled
+ void test_Resolve_1Pass_QuotesReplaced() throws IOException {
+ test( "unambiguous-1-pass.txt" );
+ }
+
+ @Test
+ @Disabled
+ void test_Resolve_2Pass_QuotesReplaced() throws IOException {
+ test( "unambiguous-2-pass.txt" );
+ }
+
+ @Test
+ void test_Resolve_InvalidGrammar_AmbiguousRemain() throws IOException {
+ test( "invalid-grammar.txt" );
+ }
+
+ void test( final String filename ) throws IOException {
+ final var couplets = readPairs( filename );
+
+ couplets.forEach( couplet -> {
+ final var input = couplet.item1();
+ final var output = new StringBuilder( input );
+ final var expected = couplet.item2();
+ final var offset = new AtomicInteger( 0 );
+
+ AmbiguityResolver.resolve(
+ input,
+ CONTRACTIONS,
+ replace( output, offset, ENTITIES ),
+ filter -> false
+ );
+
+ final var actual = output.toString();
+ assertEquals( expected, actual );
+ } );
+ }
+}
src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import com.whitemagicsoftware.keenquotes.lex.FilterType;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.IOException;
+import java.util.function.Function;
+
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML;
+import static com.whitemagicsoftware.keenquotes.texts.TestResource.open;
+import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Test that English straight quotes are converted to curly quotes and
+ * apostrophes.
+ */
+public class CurlerTest {
+ private static final String SEP = System.lineSeparator();
+
+ /**
+ * Tests that straight quotes are converted to curly quotes.
+ *
+ * @throws IOException Error opening file full of fun.
+ */
+ @Test
+ public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException {
+ testCurler( createCurler( FILTER_PLAIN ), "unambiguous-1-pass.txt" );
+ }
+
+ @Test
+ public void test_XmlParse_StraightQuotes_CurlyQuotes() throws IOException {
+ testCurler( createCurler( FILTER_XML ), "xml.txt" );
+ }
+
+ /**
+ * Re-enable using <a href="https://www.gutenberg.org">Project Gutenberg</a>
+ * texts.
+ *
+ * @param filename A plain text file to convert.
+ * @throws IOException Could not find, open, or read from text file.
+ */
+ @ParameterizedTest
+ @ValueSource( strings = {"habberton"} )
+ //@Disabled
+ void test_Parse_Story_Converted( final String filename ) throws IOException {
+ final var sb = new StringBuilder( 2 ^ 20 );
+
+ try( final var reader = open( filename + ".txt" ) ) {
+ String line;
+
+ while( (line = reader.readLine()) != null ) {
+ sb.append( line ).append( SEP );
+ }
+ }
+
+ final var converter = createCurler( FILTER_PLAIN );
+ System.out.println( converter.apply( sb.toString() ) );
+ }
+
+ /**
+ * Reads a file full of couplets. The first of the pair is the input,
+ * the second of the pair is the expected result. Couplets may include
+ * newline characters to indicate end of lines and end of paragraphs.
+ * Lines that start with {@code #} are ignored.
+ *
+ * @param parser The text processor capable of straight quote conversion.
+ * @throws IOException Error opening file full of fun.
+ */
+ private void testCurler(
+ final Function<String, String> parser,
+ final String filename
+ ) throws IOException {
+ final var couplets = readPairs( filename );
+
+ couplets.forEach( couplet -> {
+ final var actual = parser.apply( couplet.item1() );
+ final var expected = couplet.item2();
+
+ assertEquals( expected, actual );
+ } );
+ }
+
+ private Function<String, String> createCurler( final FilterType parserType ) {
+ return new Curler( new Contractions.Builder().build(), parserType );
+ }
+}
src/test/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitterTest.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.parser;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static com.whitemagicsoftware.keenquotes.parser.Curler.ENTITIES;
+import static com.whitemagicsoftware.keenquotes.parser.Curler.replace;
+import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class QuoteEmitterTest {
+ private final Contractions CONTRACTIONS = new Contractions.Builder().build();
+
+ @Test
+ void test_Emit_MultipleInputs_QuotesEmitted() throws IOException {
+ final var couplets = readPairs(
+ "unambiguous-1-pass.txt" );
+
+ couplets.forEach( couplet -> {
+ final var input = couplet.item1();
+ final var output = new StringBuilder( input );
+ final var expected = couplet.item2();
+ final var offset = new AtomicInteger( 0 );
+
+ QuoteEmitter.analyze(
+ input,
+ CONTRACTIONS,
+ replace( output, offset, ENTITIES ),
+ filter -> false
+ );
+
+ final var actual = output.toString();
+ assertEquals( expected, actual );
+ } );
+ }
+}
src/test/java/com/whitemagicsoftware/keenquotes/texts/TestResource.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes.texts;
+
+import com.whitemagicsoftware.keenquotes.util.Tuple;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.LinkedList;
+
+/**
+ * Responsible for opening a test resource file.
+ */
+public class TestResource {
+
+ private TestResource() {}
+
+ /**
+ * Opens a text file for reading. Callers must close the resource.
+ *
+ * @param filename The file to open.
+ * @return An instance of {@link BufferedReader} that can be used to
+ * read all the lines in the file.
+ */
+ public static BufferedReader open( final String filename ) {
+ return new TestResource().openResource( filename );
+ }
+
+ /**
+ * Reads a set of coupled lines, which represent the input and expected
+ * results after encoding the quotation marks. This loads the entire file
+ * into memory.
+ *
+ * @param filename The name of the file to parse.
+ * @return The set of input and expected results.
+ * @throws IOException Could not open the file for reading.
+ */
+ public static Collection<Tuple<String, String>> readPairs(
+ final String filename
+ ) throws IOException {
+ final var result = new LinkedList<Tuple<String, String>>();
+
+ try( final var reader = open( filename ) ) {
+ String line;
+ String testLine = "";
+ String expected = "";
+
+ while( (line = reader.readLine()) != null ) {
+ if( line.startsWith( "#" ) || line.isBlank() ) {continue;}
+
+ // Read the first line of the couplet.
+ if( testLine.isBlank() ) {
+ testLine = line;
+ continue;
+ }
+
+ // Read the second line of the couplet.
+ if( expected.isBlank() ) {
+ expected = line;
+ }
+
+ testLine = unescapeEol( testLine );
+ expected = unescapeEol( expected );
+
+ result.add( new Tuple<>( testLine, expected ) );
+
+ testLine = "";
+ expected = "";
+ }
+ }
+
+ return result;
+ }
+
+ private BufferedReader openResource( final String filename ) {
+ final var is = getClass().getResourceAsStream( filename );
+
+ // If the resource cannot be found, throw a developer-friendly exception.
+ if( is == null ) {
+ Exception ex;
+
+ try {
+ final var resource = getClass().getResource( "." );
+ final var path = resource == null
+ ? filename
+ : Path.of( resource.toURI().getPath(), filename ).toString();
+ ex = new FileNotFoundException( path );
+ } catch( final Exception _ex ) {
+ ex = _ex;
+ }
+
+ // We don't need no steenkin' checked exceptions.
+ throw new RuntimeException( ex );
+ }
+
+ return new BufferedReader( new InputStreamReader( is ) );
+ }
+
+ private static String unescapeEol( final String s ) {
+ return String.join( "\n", s.split( "\\\\n" ) );
+ }
+}
src/test/resources/com/whitemagicsoftware/keenquotes/smartypants.txt
-# ########################################################################
-# Escaped quotes
-# ########################################################################
-She stood 5\'7\".
-She stood 5'7".
-
-\"What?\"
-"What?"
-
-# ########################################################################
-# Primes (single, double)
-# ########################################################################
-Alice's friend is 6'3" tall.
-Alice&apos;s friend is 6&prime;3&Prime; tall.
-
-Bob's table is 5'' × 4''.
-Bob&apos;s table is 5&Prime; × 4&Prime;.
-
-Bob's table is 5'×4'.
-Bob&apos;s table is 5&prime;×4&prime;.
-
-What's this '-5.5'',' '-10.2'' cm,' and another '-7.25''' thing?
-What&apos;s this &lsquo;-5.5&Prime;,&rsquo; &lsquo;-10.2&Prime; cm,&rsquo; and another &lsquo;-7.25&Prime;&rsquo; thing?
-
-'What's this -5.5'' thing?'
-&lsquo;What&apos;s this -5.5&Prime; thing?&rsquo;
-
-+7.9'' is weird.
-+7.9&Prime; is weird.
-
-12" record, 5'10" height
-12&Prime; record, 5&prime;10&Prime; height
-
-Use 11.5"×14.25" virtual paper!
-Use 11.5&Prime;×14.25&Prime; virtual paper!
-
-Angular measure 3° 5' 30" means 3 degs, 5 arcmins, & 30 arcsecs.
-Angular measure 3° 5&prime; 30&Prime; means 3 degs, 5 arcmins, & 30 arcsecs.
-
-Play 12½" records.
-Play 12½&Prime; records.
-
-# ########################################################################
-# Double quotes
-# ########################################################################
-"I am Sam"
-&ldquo;I am Sam&rdquo;
-
-"...even better!"
-&ldquo;...even better!&rdquo;
-
-"It was so," said he.
-&ldquo;It was so,&rdquo; said he.
-
-"What cries in the streets"?
-&ldquo;What cries in the streets&rdquo;?
-
-With "air quotes" in the middle.
-With &ldquo;air quotes&rdquo; in the middle.
-
-With--"air quotes"--and dashes.
-With--&ldquo;air quotes&rdquo;--and dashes.
-
-"Not all open quotes are closed...
-&ldquo;Not all open quotes are closed...
-
-"---retroactively!"
-&ldquo;---retroactively!&rdquo;
-
-"And this" and "and this" and "and this" and "and another."
-&ldquo;And this&rdquo; and &ldquo;and this&rdquo; and &ldquo;and this&rdquo; and &ldquo;and another.&rdquo;
-
-"Will'll invite?" Mrs. Thorne asked;\n"because he's coming--he's at the gate."
-&ldquo;Will&apos;ll invite?&rdquo; Mrs. Thorne asked;\n&ldquo;because he&apos;s coming--he&apos;s at the gate.&rdquo;
-
-Model "T2000"
-Model &ldquo;T2000&rdquo;
-
-# ########################################################################
-# Single quotes
-# ########################################################################
-'I am Sam'
-&lsquo;I am Sam&rsquo;
-
-'It was so,' said he.
-&lsquo;It was so,&rsquo; said he.
-
-'...even better!'
-&lsquo;...even better!&rsquo;
-
-With 'quotes' in the middle.
-With &lsquo;quotes&rsquo; in the middle.
-
-With--'imaginary'--dashes.
-With--&lsquo;imaginary&rsquo;--dashes.
-
-''Cause I don't like it, 's why,' said Pat.
-&lsquo;&apos;Cause I don&apos;t like it, &apos;s why,&rsquo; said Pat.
-
-'It's a beautiful day!'
-&lsquo;It&apos;s a beautiful day!&rsquo;
-
-# ########################################################################
-# Possessives
-# ########################################################################
-Sam's Sams' and the Ross's roses' thorns were prickly.
-Sam&apos;s Sams&apos; and the Ross&apos;s roses&apos; thorns were prickly.
-
-# ########################################################################
-# Inside contractions (no leading/trailing apostrophes)
-# ########################################################################
-I don't like it: I love's it!
-I don&apos;t like it: I love&apos;s it!
-
-We'd've thought that pancakes'll be sweeter there.
-We&apos;d&apos;ve thought that pancakes&apos;ll be sweeter there.
-
-She'd be coming o'er when the horse'd gone to pasture...
-She&apos;d be coming o&apos;er when the horse&apos;d gone to pasture...
-
-# ########################################################################
-# Beginning contractions (leading apostrophes)
-# ########################################################################
-'Twas and 'tis whate'er lay 'twixt dawn and dusk 'n River Styx.
-&apos;Twas and &apos;tis whate&apos;er lay &apos;twixt dawn and dusk &apos;n River Styx.
-
-# ########################################################################
-# Outside contractions (leading and trailing, no middle)
-# ########################################################################
-Salt 'n' vinegar, fish-'n'-chips, sugar 'n spice!
-Salt &apos;n&apos; vinegar, fish-&apos;n&apos;-chips, sugar &apos;n spice!
-
-# ########################################################################
-# Ending contractions (trailing apostrophes)
-# ########################################################################
-Didn' get th' message.
-Didn&apos; get th&apos; message.
-
-Namsayin', y'know what I'ma sayin'?
-Namsayin&apos;, y&apos;know what I&apos;ma sayin&apos;?
-
-# ########################################################################
-# Decades
-# ########################################################################
-The Roaring '20s had the best music, no?
-The Roaring &apos;20s had the best music, no?
-
-Took place in '04, yes'm!
-Took place in &apos;04, yes&apos;m!
-
-# ########################################################################
-# Consecutive quotes
-# ########################################################################
-"'I'm trouble.'"
-&ldquo;&lsquo;I&apos;m trouble.&rsquo;&rdquo;
-
-'"Trouble's my name."'
-&lsquo;&ldquo;Trouble&apos;s my name.&rdquo;&rsquo;
-
-# ########################################################################
-# Nested quotations
-# ########################################################################
-"'Here I am,' said Sam"
-&ldquo;&lsquo;Here I am,&rsquo; said Sam&rdquo;
-
-'"Here I am," said Sam'
-&lsquo;&ldquo;Here I am,&rdquo; said Sam&rsquo;
-
-'Hello, "Dr. Brown," what's your real name?'
-&lsquo;Hello, &ldquo;Dr. Brown,&rdquo; what&apos;s your real name?&rsquo;
-
-"'Twas, t'wasn't thy name, 'twas it?" said Jim "the Barber" Brown.
-&ldquo;&apos;Twas, t&apos;wasn&apos;t thy name, &apos;twas it?&rdquo; said Jim &ldquo;the Barber&rdquo; Brown.
-
-"'I'm in danger. Help? Take me to'--an address on Van Ness Avenue.
-&ldquo;&lsquo;I&apos;m in danger. Help? Take me to&rsquo;--an address on Van Ness Avenue.
-
-"Ah," she said, "you knew! He told you--and you said 'my dear'! How could you?!"
-&ldquo;Ah,&rdquo; she said, &ldquo;you knew! He told you--and you said &lsquo;my dear&rsquo;! How could you?!&rdquo;
-
-shouldn't drop letters, nor say "ain't" or "hain't."
-shouldn&apos;t drop letters, nor say &ldquo;ain&apos;t&rdquo; or &ldquo;hain&apos;t.&rdquo;
-
-"’Kearney lives on the banks of Killarney—’
-&ldquo;’Kearney lives on the banks of Killarney—’
-
-# ########################################################################
-# Mixed
-# ########################################################################
-"She said, 'That's Sam's'," said the Sams' cat.
-&ldquo;She said, &lsquo;That&apos;s Sam&apos;s&rsquo;,&rdquo; said the Sams&apos; cat.
-
-"Jane said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
-&ldquo;Jane said, &lsquo;&apos;E&apos;ll be spooky, Sam&apos;s son with the jack-o&apos;-lantern!&rsquo;&rdquo; said the O&apos;Mally twins&apos;---y&apos;know---ghosts in unison.
-
-'He's at Sam's'
-&lsquo;He&apos;s at Sam&apos;s&rsquo;
-
-ma'am
-ma&apos;am
-
-'Twas midnight
-&apos;Twas midnight
-
-"Hello," said the spider. "'Shelob' is my name."
-&ldquo;Hello,&rdquo; said the spider. &ldquo;&lsquo;Shelob&rsquo; is my name.&rdquo;
-
-'He said, \"I want to go.\"' Were you alive in the 70's?
-&lsquo;He said, "I want to go."&rsquo; Were you alive in the 70&apos;s?
-
-"That's a 'magic' sock."
-&ldquo;That&apos;s a &lsquo;magic&rsquo; sock.&rdquo;
-
-'That's a "magic" sock.'
-&lsquo;That&apos;s a &ldquo;magic&rdquo; sock.&rsquo;
-
-'That's a "very 'magic'" sock.'
-&lsquo;That&apos;s a &ldquo;very &lsquo;magic&rsquo;&rdquo; sock.&rsquo;
-
-Website! Company Name, Inc. ("Company Name" or "Company") recommends reading carefully:
-Website! Company Name, Inc. (&ldquo;Company Name&rdquo; or &ldquo;Company&rdquo;) recommends reading carefully:
-
-Website! Company Name, Inc. ('Company Name' or 'Company') recommends reading carefully:
-Website! Company Name, Inc. (&lsquo;Company Name&rsquo; or &lsquo;Company&rsquo;) recommends reading carefully:
-
-Workin' hard
-Workin&apos; hard
-
-"She said, 'Llamas'll languish, they'll--
-&ldquo;She said, &lsquo;Llamas&apos;ll languish, they&apos;ll--
-
-'The 70s are my favorite numbers,' she said.
-&lsquo;The 70s are my favorite numbers,&rsquo; she said.
-
-'70s fashion was weird.
-&apos;70s fashion was weird.
-
-Model \"T2000\"
-Model "T2000"
-
-The 2's complement.\n\n'Yar, binary!'
-The 2&apos;s complement.\n\n&lsquo;Yar, binary!&rsquo;
-
-The Who's complement.\n\n\n"Paragraph boundary!"
-The Who&apos;s complement.\n\n\n&ldquo;Paragraph boundary!&rdquo;
-
-'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'
-&lsquo;Oak,&rsquo; &lsquo;elm,&rsquo; and &lsquo;beech&rsquo; are names of trees. So is &lsquo;pine.&rsquo;
-
-'A', 'B', and 'C' are letters.
-&lsquo;A&rsquo;, &lsquo;B&rsquo;, and &lsquo;C&rsquo; are letters.
-
-'He said, "Thinkin'."'
-&lsquo;He said, &ldquo;Thinkin&apos;.&rdquo;&rsquo;
-
-''Sup, Doc?'
-&lsquo;&apos;Sup, Doc?&rsquo;
-
-'Wouldn't 'a brushed a fly off the top twig,' exclaimed Ashbrook,
-&lsquo;Wouldn&apos;t &apos;a brushed a fly off the top twig,&rsquo; exclaimed Ashbrook,
-
-"But ye've been 'nation quiet all the day though," said the lad...
-&ldquo;But ye&apos;ve been &apos;nation quiet all the day though,&rdquo; said the lad...
-
-"Not much o' fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin' man, used to say, says he, 'If e'er I sow my wheat wi'out brinin', I'm a Dutchman,' says he; an' that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren't goin' to bother mysen about Dutchmen. There's fools enoo, an' rogues enoo, wi'out lookin' i' books for 'em."
-&ldquo;Not much o&apos; fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin&apos; man, used to say, says he, &lsquo;If e&apos;er I sow my wheat wi&apos;out brinin&apos;, I&apos;m a Dutchman,&rsquo; says he; an&apos; that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren&apos;t goin&apos; to bother mysen about Dutchmen. There&apos;s fools enoo, an&apos; rogues enoo, wi&apos;out lookin&apos; i&apos; books for &apos;em.&rdquo;
-
-Computer says, "'It is mysteries---'"
-Computer says, &ldquo;&lsquo;It is mysteries---&rsquo;&rdquo;
-
-# ########################################################################
-# Ambiguous (no solution)
-# ########################################################################
-She said, 'Cause of death?\n\n'Old age, there's no doubt.'
-She said, 'Cause of death?\n\n&lsquo;Old age, there&apos;s no doubt.&rsquo;
-
-She said, 'Cause I said so!\n\n'If we're done, I've a secret.'
-She said, 'Cause I said so!\n\n&lsquo;If we&apos;re done, I&apos;ve a secret.&rsquo;
-
-Book 'em, Danno. Rock 'n' roll. 'Cause 'twas the season.
-Book &apos;em, Danno. Rock &apos;n&apos; roll. 'Cause &apos;twas the season.
-
-'Bout that time I says, 'Boys! I been thinkin' 'bout th' Universe.'
-'Bout that time I says, &lsquo;Boys! I been thinkin&apos; 'bout th&apos; Universe.&rsquo;
-
-"Jarvis, sir? Why, him as 'listed some years ago, and fought under
-&ldquo;Jarvis, sir? Why, him as 'listed some years ago, and fought under
-
-"You 'cause---" all fifteen years' worth.
-&ldquo;You &apos;cause---&rdquo; all fifteen years' worth.
src/test/resources/com/whitemagicsoftware/keenquotes/texts/ambiguous-n-pass.txt
+# ########################################################################
+# Ambiguous (no solution)
+# ########################################################################
+Book 'em, Danno. Rock 'n' roll. ''Cause 'twas the season.
+Book &apos;em, Danno. Rock &apos;n&apos; roll. &lsquo;'Cause &apos;twas the season.
+
+"Jarvis, sir? Why, him as 'listed some years ago, and fought under
+&ldquo;Jarvis, sir? Why, him as 'listed some years ago, and fought under
src/test/resources/com/whitemagicsoftware/keenquotes/texts/invalid-grammar.txt
+# """wtf"""
+# &ldquo;&ldquo;&ldquo;wtf&rdquo;&rdquo;&rdquo;
+
+# '''wtf'''
+# &lsquo;&lsquo;&lsquo;wtf&rsquo;&rsquo;&rsquo;
src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-1-pass.txt
+# ########################################################################
+# Escaped quotes
+# ########################################################################
+She stood 5\'7\".
+She stood 5'7".
+
+\"What?\"
+"What?"
+
+# ########################################################################
+# Primes (single, double)
+# ########################################################################
+Alice's friend is 6'3" tall.
+Alice&apos;s friend is 6&prime;3&Prime; tall.
+
++7.9'' is weird.
++7.9&Prime; is weird.
+
+That's a 35'×10" yacht!
+That&apos;s a 35&prime;×10&Prime; yacht!
+
+Bob's table is 5'×4'.
+Bob&apos;s table is 5&prime;×4&prime;.
+
+Bob's table is 5'' × 4''.
+Bob&apos;s table is 5&Prime; × 4&Prime;.
+
+'What's this -5.5'' thing?'
+&lsquo;What&apos;s this -5.5&Prime; thing?&rsquo;
+
+What's this '-5.5'',' '-10.2'' cm,' and another '-7.25''' thing?
+What&apos;s this &lsquo;-5.5&Prime;,&rsquo; &lsquo;-10.2&Prime; cm,&rsquo; and another &lsquo;-7.25&Prime;&rsquo; thing?
+
+Play 12½" records.
+Play 12½&Prime; records.
+
+12" record, 5'10" height
+12&Prime; record, 5&prime;10&Prime; height
+
+Use 11.5"×14.25" virtual paper!
+Use 11.5&Prime;×14.25&Prime; virtual paper!
+
+Angular measure 3° 5' 30" means 3 degs, 5 arcmins, & 30 arcsecs.
+Angular measure 3° 5&prime; 30&Prime; means 3 degs, 5 arcmins, & 30 arcsecs.
+
+# ########################################################################
+# Double quotes
+# ########################################################################
+"I am Sam"
+&ldquo;I am Sam&rdquo;
+
+"...even better!"
+&ldquo;...even better!&rdquo;
+
+"It was so," said he.
+&ldquo;It was so,&rdquo; said he.
+
+"What cries in the streets"?
+&ldquo;What cries in the streets&rdquo;?
+
+With "air quotes" in the middle.
+With &ldquo;air quotes&rdquo; in the middle.
+
+With--"air quotes"--and dashes.
+With--&ldquo;air quotes&rdquo;--and dashes.
+
+"Not all open quotes are closed...
+&ldquo;Not all open quotes are closed...
+
+"---retroactively!"
+&ldquo;---retroactively!&rdquo;
+
+"And this" and "and this" and "and this" and "and another."
+&ldquo;And this&rdquo; and &ldquo;and this&rdquo; and &ldquo;and this&rdquo; and &ldquo;and another.&rdquo;
+
+"Will'll invite?" Mrs. Thorne asked;\n"because he's coming--he's at the gate."
+&ldquo;Will&apos;ll invite?&rdquo; Mrs. Thorne asked;\n&ldquo;because he&apos;s coming--he&apos;s at the gate.&rdquo;
+
+Model "T2000"
+Model &ldquo;T2000&rdquo;
+
+# ########################################################################
+# Single quotes
+# ########################################################################
+'I am Sam'
+&lsquo;I am Sam&rsquo;
+
+'It was so,' said he.
+&lsquo;It was so,&rsquo; said he.
+
+It's the 80's...
+It&apos;s the 80&apos;s...
+
+'...even better!'
+&lsquo;...even better!&rsquo;
+
+With--'imaginary'--dashes.
+With--&lsquo;imaginary&rsquo;--dashes.
+
+'It's a beautiful day!'
+&lsquo;It&apos;s a beautiful day!&rsquo;
+
+# ########################################################################
+# Inside contractions (no leading/trailing apostrophes)
+# ########################################################################
+I don't like it: I love's it!
+I don&apos;t like it: I love&apos;s it!
+
+We'd've thought that pancakes'll be sweeter there.
+We&apos;d&apos;ve thought that pancakes&apos;ll be sweeter there.
+
+She'd be coming o'er when the horse'd gone to pasture...
+She&apos;d be coming o&apos;er when the horse&apos;d gone to pasture...
+
+# ########################################################################
+# Beginning contractions (leading apostrophes)
+# ########################################################################
+'Twas and 'tis whate'er lay 'twixt dawn and dusk 'n River Styx.
+&apos;Twas and &apos;tis whate&apos;er lay &apos;twixt dawn and dusk &apos;n River Styx.
+
+# ########################################################################
+# Outside contractions (leading and trailing, no middle)
+# ########################################################################
+Fish-'n'-chips!
+Fish-&apos;n&apos;-chips!
+
+Salt 'n' vinegar, fish-'n'-chips, sugar 'n spice!
+Salt &apos;n&apos; vinegar, fish-&apos;n&apos;-chips, sugar &apos;n spice!
+
+# ########################################################################
+# Ending contractions (trailing apostrophes)
+# ########################################################################
+Didn' get th' message.
+Didn&apos; get th&apos; message.
+
+Namsayin', y'know what I'ma sayin'?
+Namsayin&apos;, y&apos;know what I&apos;ma sayin&apos;?
+
+# ########################################################################
+# Decades
+# ########################################################################
+The Roaring '20s had the best music, no?
+The Roaring &apos;20s had the best music, no?
+
+Took place in '04, yes'm!
+Took place in &apos;04, yes&apos;m!
+
+# ########################################################################
+# Consecutive quotes
+# ########################################################################
+"'I'm trouble.'"
+&ldquo;&lsquo;I&apos;m trouble.&rsquo;&rdquo;
+
+'"Trouble's my name."'
+&lsquo;&ldquo;Trouble&apos;s my name.&rdquo;&rsquo;
+
+# ########################################################################
+# Nested quotations
+# ########################################################################
+"'Here I am,' said Sam"
+&ldquo;&lsquo;Here I am,&rsquo; said Sam&rdquo;
+
+'"Here I am," said Sam'
+&lsquo;&ldquo;Here I am,&rdquo; said Sam&rsquo;
+
+'Hello, "Dr. Brown," what's your real name?'
+&lsquo;Hello, &ldquo;Dr. Brown,&rdquo; what&apos;s your real name?&rsquo;
+
+"'Twas, t'wasn't thy name, 'twas it?" said Jim "the Barber" Brown.
+&ldquo;&apos;Twas, t&apos;wasn&apos;t thy name, &apos;twas it?&rdquo; said Jim &ldquo;the Barber&rdquo; Brown.
+
+"'I'm in danger. Help? Take me to'--an address on Van Ness Avenue.
+&ldquo;&lsquo;I&apos;m in danger. Help? Take me to&rsquo;--an address on Van Ness Avenue.
+
+"Ah," she said, "you knew! He told you--and you said 'my dear'! How could you?!"
+&ldquo;Ah,&rdquo; she said, &ldquo;you knew! He told you--and you said &lsquo;my dear&rsquo;! How could you?!&rdquo;
+
+shouldn't drop letters, nor say "ain't" or "hain't."
+shouldn&apos;t drop letters, nor say &ldquo;ain&apos;t&rdquo; or &ldquo;hain&apos;t.&rdquo;
+
+"’Kearney lives on the banks of Killarney—’
+&ldquo;’Kearney lives on the banks of Killarney—’
+
+# ########################################################################
+# Mixed
+# ########################################################################
+ma'am
+ma&apos;am
+
+'Twas midnight
+&apos;Twas midnight
+
+"Hello," said the spider. "'Shelob' is my name."
+&ldquo;Hello,&rdquo; said the spider. &ldquo;&lsquo;Shelob&rsquo; is my name.&rdquo;
+
+'He said, \"I want to go.\"' Were you alive in the 70's?
+&lsquo;He said, "I want to go."&rsquo; Were you alive in the 70&apos;s?
+
+"That's a 'magic' sock."
+&ldquo;That&apos;s a &lsquo;magic&rsquo; sock.&rdquo;
+
+'That's a "magic" sock.'
+&lsquo;That&apos;s a &ldquo;magic&rdquo; sock.&rsquo;
+
+'That's a "very 'magic'" sock.'
+&lsquo;That&apos;s a &ldquo;very &lsquo;magic&rsquo;&rdquo; sock.&rsquo;
+
+Website! Company Name, Inc. ("Company Name" or "Company") recommends reading carefully:
+Website! Company Name, Inc. (&ldquo;Company Name&rdquo; or &ldquo;Company&rdquo;) recommends reading carefully:
+
+Website! Company Name, Inc. ('Company Name' or 'Company') recommends reading carefully:
+Website! Company Name, Inc. (&lsquo;Company Name&rsquo; or &lsquo;Company&rsquo;) recommends reading carefully:
+
+Workin' hard
+Workin&apos; hard
+
+"She said, 'Llamas'll languish, they'll--
+&ldquo;She said, &lsquo;Llamas&apos;ll languish, they&apos;ll--
+
+'The 70s are my favorite numbers,' she said.
+&lsquo;The 70s are my favorite numbers,&rsquo; she said.
+
+'70s fashion was weird.
+&apos;70s fashion was weird.
+
+Model \"T2000\"
+Model "T2000"
+
+The 2's complement.\n\n'Yar, binary!'
+The 2&apos;s complement.\n\n&lsquo;Yar, binary!&rsquo;
+
+The Who's complement.\n\n\n"Paragraph boundary!"
+The Who&apos;s complement.\n\n\n&ldquo;Paragraph boundary!&rdquo;
+
+'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'
+&lsquo;Oak,&rsquo; &lsquo;elm,&rsquo; and &lsquo;beech&rsquo; are names of trees. So is &lsquo;pine.&rsquo;
+
+'He said, "Thinkin'."'
+&lsquo;He said, &ldquo;Thinkin&apos;.&rdquo;&rsquo;
+
+Computer says, "'It is mysteries---'"
+Computer says, &ldquo;&lsquo;It is mysteries---&rsquo;&rdquo;
src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-2-pass.txt
+# ########################################################################
+# Single quotes
+# ########################################################################
+
+With 'quotes' in the middle.
+With &lsquo;quotes&rsquo; in the middle.
+
+''Cause I don't like it, 's why,' said Pat.
+&lsquo;&apos;Cause I don&apos;t like it, &apos;s why,&rsquo; said Pat.
+
+Book 'em, Danno. Rock 'n' roll. 'Cause 'twas the season.
+Book &apos;em, Danno. Rock &apos;n&apos; roll. &apos;Cause &apos;twas the season.
+
+She's a cat y'all couldn't've known!
+She&apos;s a cat y&apos;all couldn&apos;t&apos;ve known!
+
+She said, 'Cause of death?\n\n'Old age, there's no doubt.'
+She said, &apos;Cause of death?\n\n&lsquo;Old age, there&apos;s no doubt.&rsquo;
+
+She said, 'Cause I said so!\n\n'If we're done, I've a secret.'
+She said, &apos;Cause I said so!\n\n&lsquo;If we&apos;re done, I&apos;ve a secret.&rsquo;
+
+'Twas and 'tis whate'er lay 'twixt dawn and dusk 'n River Styx.
+&apos;Twas and &apos;tis whate&apos;er lay &apos;twixt dawn and dusk &apos;n River Styx.
+
+# ########################################################################
+# Possessives
+# ########################################################################
+Sam's Sams' and the Ross's roses' thorns were prickly.
+Sam&apos;s Sams&apos; and the Ross&apos;s roses&apos; thorns were prickly.
+
+# ########################################################################
+# Mixed
+# ########################################################################
+"She said, 'That's Sam's'," said the Sams' cat.
+&ldquo;She said, &lsquo;That&apos;s Sam&apos;s&rsquo;,&rdquo; said the Sams&apos; cat.
+
+"Jane said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
+&ldquo;Jane said, &lsquo;&apos;E&apos;ll be spooky, Sam&apos;s son with the jack-o&apos;-lantern!&rsquo;&rdquo; said the O&apos;Mally twins&apos;---y&apos;know---ghosts in unison.
+
+''Sup, Doc?'
+&lsquo;&apos;Sup, Doc?&rsquo;
+
+'He's at Sam's'
+&lsquo;He&apos;s at Sam&apos;s&rsquo;
+
+'A', 'B', and 'C' are letters.
+&lsquo;A&rsquo;, &lsquo;B&rsquo;, and &lsquo;C&rsquo; are letters.
+
+'Wouldn't 'a brushed a fly off the top twig,' exclaimed Ashbrook,
+&lsquo;Wouldn&apos;t &apos;a brushed a fly off the top twig,&rsquo; exclaimed Ashbrook,
+
+"But ye've been 'nation quiet all the day though," said the lad...
+&ldquo;But ye&apos;ve been &apos;nation quiet all the day though,&rdquo; said the lad...
+
+"Not much o' fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin' man, used to say, says he, 'If e'er I sow my wheat wi'out brinin', I'm a Dutchman,' says he; an' that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren't goin' to bother mysen about Dutchmen. There's fools enoo, an' rogues enoo, wi'out lookin' i' books for 'em."
+&ldquo;Not much o&apos; fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin&apos; man, used to say, says he, &lsquo;If e&apos;er I sow my wheat wi&apos;out brinin&apos;, I&apos;m a Dutchman,&rsquo; says he; an&apos; that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren&apos;t goin&apos; to bother mysen about Dutchmen. There&apos;s fools enoo, an&apos; rogues enoo, wi&apos;out lookin&apos; i&apos; books for &apos;em.&rdquo;
+
+But I must leave the proofs to those who 've seen 'em;\n\nBut this I heard her say, and can't be wrong\n\nAnd all may think which way their judgments lean 'em,\n\n''T is strange---the Hebrew noun which means "I am,"\n\nThe English always use to govern d--n.'
+But I must leave the proofs to those who &apos;ve seen &apos;em;\n\nBut this I heard her say, and can&apos;t be wrong\n\nAnd all may think which way their judgments lean &apos;em,\n\n&lsquo;&apos;T is strange---the Hebrew noun which means &ldquo;I am,&rdquo;\n\nThe English always use to govern d--n.&rsquo;
+
+# ########################################################################
+# Nested
+# ########################################################################
+"'"nest"'"
+&ldquo;&lsquo;&ldquo;nest&rdquo;&rsquo;&rdquo;
+
+'Bout that time I says, 'Boys! I been thinkin' 'bout th' Universe.'
+&apos;Bout that time I says, &lsquo;Boys! I been thinkin&apos; &apos;bout th&apos; Universe.&rsquo;
+
+"You 'cause---" all fifteen years' worth.
+&ldquo;You &apos;cause---&rdquo; all fifteen years&apos; worth.
+
src/test/resources/com/whitemagicsoftware/keenquotes/texts/xml.txt
+<em>'twas</em>
+<em>&apos;twas</em>
+
+''<em>Twas</em> happening!'
+&lsquo;&apos;<em>Twas</em> happening!&rsquo;
+
+<strong>'Twas</strong> <kbd>'</kbd> in <tex>Knuth's TeX</tex>
+<strong>&apos;Twas</strong> <kbd>'</kbd> in <tex>Knuth's TeX</tex>
+
+<bold>'twas</bold> redeemed for the <em>cat</em>'s eye
+<bold>&apos;twas</bold> redeemed for the <em>cat</em>&apos;s eye
+
+<a href="https://x.org" title="X's Homepage">X11's bomb</a>
+<a href="https://x.org" title="X's Homepage">X11&apos;s bomb</a>
Delta7474 lines added, 6517 lines removed, 957-line increase