Add comments, rename methods, emit ambiguities

Author	Dave Jarvis <email>
Date	2021-06-15 18:55:05 GMT-0700
Commit	3d0f24912f298335bd55f42ca7302f2078034299
Parent	9d7091e

lib/src/main/java/com/keenwrite/quotes/KeenQuotes.java

		+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
		+package com.keenwrite.quotes;
		+
		+import java.util.ArrayList;
		+import java.util.Map;
		+import java.util.function.Consumer;
		+
		+import static com.keenwrite.quotes.TokenType.*;
		+import static java.util.Collections.sort;
		+
		+/**
		+ * Responsible for replacing {@link Token} instances with equivalent smart
		+ * quotes (or straight quotes). This will inform the caller when ambiguous
		+ * quotes cannot be reliably resolved.
		+ */
		+public final class KeenQuotes {
		+ private static final Map<TokenType, String> REPLACEMENTS = Map.of(
		+ QUOTE_OPENING_SINGLE, "‘",
		+ QUOTE_CLOSING_SINGLE, "’",
		+ QUOTE_OPENING_DOUBLE, "“",
		+ QUOTE_CLOSING_DOUBLE, "”",
		+ QUOTE_STRAIGHT_SINGLE, "'",
		+ QUOTE_STRAIGHT_DOUBLE, "\"",
		+ QUOTE_APOSTROPHE, "'",
		+ QUOTE_PRIME_SINGLE, "′",
		+ QUOTE_PRIME_DOUBLE, "″"
		+ );
		+
		+ /**
		+ * Converts straight quotes to curly quotes and primes. Any quotation marks
		+ * that cannot be converted are passed to the {@link Consumer}.
		+ *
		+ * @param text The text to parse.
		+ * @param unresolved Recipient for ambiguous {@link Lexeme}s.
		+ * @return The given text string with as many straight quotes converted to
		+ * curly quotes as is feasible.
		+ */
		+ public static String convert(
		+ final String text, final Consumer<Lexeme> unresolved ) {
		+ final var parser = new Parser( text );
		+ final var tokens = new ArrayList<Token>();
		+
		+ // Parse the tokens and consume all unresolved lexemes.
		+ parser.parse( tokens::add, unresolved );
		+
		+ // The parser may emit tokens in any order.
		+ sort( tokens );
		+
		+ final var result = new StringBuilder( text.length() );
		+ var position = 0;
		+
		+ for( final var token : tokens ) {
		+ if( position <= token.began() ) {
		+ result.append( text, position, token.began() );
		+ result.append( REPLACEMENTS.get( token.getType() ) );
		+ }
		+
		+ position = token.ended();
		+ }
		+
		+ return result.append( text.substring( position ) ).toString();
		+ }
		+}

lib/src/main/java/com/keenwrite/quotes/Lexeme.java

		* a similar approach to run-length encoding).
		*/
		-public class Lexeme implements Comparable<Lexeme> {
		+public final class Lexeme implements Comparable<Lexeme> {
		+ /**
		+ * Signifies an invalid index to help distinguish EOT/SOT.
		+ */
		+ private static final int E_INDEX = -2;
		+
		/**
		* Denotes there are no more lexemes: the end of text (EOT) has been reached.
		* The beginning index differentiates between EOT and SOT.
		*/
		- public static final Lexeme EOT = new Lexeme( FLAG, -1, -2 );
		+ public static final Lexeme EOT = new Lexeme( FLAG, -1, E_INDEX );

		/**
		* Denotes parsing at the start of text (SOT). This is useful to avoid
		* branching conditions while iterating. The beginning index differentiates
		* between EOT and SOT.
		*/
		- public static final Lexeme SOT = new Lexeme( FLAG, 0, -2 );
		+ public static final Lexeme SOT = new Lexeme( FLAG, 0, E_INDEX );

		private final LexemeType mType;
		private final int mBegan;
		private final int mEnded;

		/**
		* Create a lexeme that represents a section of the text.
		+ *
		+ * @param type Type of {@link Lexeme} to create.
		+ * @param began Offset into the text where this instance starts (0-based).
		+ * @param ended Offset into the text where this instance stops (0-based).
		*/
		private Lexeme( final LexemeType type, final int began, final int ended ) {
		assert type != null;
		- assert began >= 0 \|\| ended == -2;
		- assert ended >= began \|\| ended == -2;
		+ assert began >= 0 \|\| ended == E_INDEX;
		+ assert ended >= began \|\| ended == E_INDEX;

		mType = type;

		*/
		public String toString( final String text ) {
		+ assert text != null;
		return text.substring( mBegan, mEnded );
		}

		+ /**
		+ * Answers whether the given {@link LexemeType} is the same as this
		+ * instance's internal {@link LexemeType}.
		+ *
		+ * @param type The {@link LexemeType} to compare.
		+ * @return {@code true} if the given {@link LexemeType} is equal to the
		+ * internal {@link LexemeType}.
		+ */
		public boolean isType( final LexemeType type ) {
		+ assert type != null;
		return mType == type;
		}

		+ /**
		+ * Answers whether any of the given {@link LexemeType} matches this
		+ * instance's internal {@link LexemeType}.
		+ *
		+ * @param types The {@link LexemeType}s to compare.
		+ * @return {@code true} if the internal {@link LexemeType} matches any one
		+ * of the given {@link LexemeType}s.
		+ */
		public boolean anyType( final LexemeType... types ) {
		+ assert types != null;
		+
		for( final var type : types ) {
		if( mType == type ) {
		return true;
		}
		}

		return false;
		- }
		-
		- LexemeType getType() {
		- return mType;
		}

		- int began() {
		+ public int began() {
		return mBegan;
		}

		- int ended() {
		+ public int ended() {
		return mEnded;
		+ }
		+
		+ boolean isSot() {
		+ return mBegan == 0;
		}

		boolean isEot() {
		return mBegan == -1;
		}

		- boolean isSot() {
		- return mBegan == 0;
		+ LexemeType getType() {
		+ return mType;
		}

		+ /**
		+ * Compares the starting offset of the given {@link Lexeme} to the starting
		+ * offset of this {@link Lexeme} instance. This allows a list {@link Lexeme}s
		+ * to be sorted by order of appearance in the parsed text.
		+ */
		@Override
		public int compareTo( final Lexeme that ) {
		+ assert that != null;
		return this.mBegan - that.mBegan;
		- }
		-
		- @Override
		- public String toString() {
		- return getClass().getSimpleName() + "{" +
		- "mType=" + mType +
		- ", mBegan=" + mBegan +
		- ", mEnded=" + mEnded +
		- '}';
		}

		static Lexeme createLexeme(
		final LexemeType lexeme, final int began, final int ended ) {
		+ assert lexeme != null;
		return new Lexeme( lexeme, began, ended );
		}

lib/src/main/java/com/keenwrite/quotes/LexemeType.java

		package com.keenwrite.quotes;

		+/**
		+ * Represents the type of a {@link Lexeme} parsed by the {@link Lexer}.
		+ */
		public enum LexemeType {
		QUOTE_SINGLE,

lib/src/main/java/com/keenwrite/quotes/Parser.java

		* </ol>
		*/
		-public class Parser {
		- /**
		- * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
		- new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOP};
		-
		- /**
		- * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
		- new LexemeType[]{WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
		-
		- /**
		- * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
		- new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE};
		-
		- /**
		- * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
		- new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, CLOSING_GROUP, EOL};
		-
		- /**
		- * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
		- new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, OPENING_GROUP, EOP};
		-
		- /**
		- * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
		- new LexemeType[]{WORD, NUMBER, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
		-
		- /**
		- * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
		- new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_SINGLE};
		-
		- /**
		- * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
		- new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, CLOSING_GROUP, EOL};
		-
		- /**
		- * The text to parse. A reference is required as a minor optimization in
		- * memory and speed: the lexer records integer offsets, rather than new
		- * {@link String} instances, to track parsed lexemes.
		- */
		- private final String mText;
		-
		- /**
		- * Converts a string into an iterable list of {@link Lexeme} instances.
		- */
		- private final Lexer mLexer;
		-
		- /**
		- * Sets of contractions that help disambiguate single quotes in the text.
		- * These are effectively immutable while parsing.
		- */
		- private final Contractions sContractions;
		-
		- /**
		- * Incremented for each opening single quote emitted. Used to help resolve
		- * ambiguities when single quote marks are balanced.
		- */
		- private int mOpeningSingleQuote;
		-
		- /**
		- * Incremented for each closing single quote emitted. Used to help resolve
		- * ambiguities when single quote marks are balanced.
		- */
		- private int mClosingSingleQuote;
		-
		- /**
		- * Constructs a new {@link Parser} using the default contraction sets
		- * to help resolve some ambiguous scenarios.
		- *
		- * @param text The prose to parse, containing zero or more quotation
		- * characters.
		- */
		- public Parser( final String text ) {
		- this( text, new Contractions.Builder().build() );
		- }
		-
		- /**
		- * Constructs a new {@link Parser} using the default contraction sets
		- * to help resolve some ambiguous scenarios.
		- *
		- * @param text The prose to parse, containing zero or more quotation
		- * characters.
		- * @param contractions Custom sets of contractions to help resolve
		- * ambiguities.
		- */
		- public Parser( final String text, final Contractions contractions ) {
		- mText = text;
		- mLexer = new Lexer( mText );
		- sContractions = contractions;
		- }
		-
		- /**
		- * Iterates over the entire text provided at construction, emitting
		- * {@link Token}s that can be used to convert straight quotes to curly
		- * quotes.
		- *
		- * @param consumer Receives emitted {@link Token}s.
		- * @return The list of lexemes that could not be resolved.
		- */
		- public List<Lexeme> parse( final Consumer<Token> consumer ) {
		- final var lexemes = new CircularFifoQueue<Lexeme>( 3 );
		-
		- // Allow consuming the very first token without checking the queue size.
		- flush( lexemes );
		-
		- final var unresolved = new ArrayList<Lexeme[]>();
		- Lexeme lexeme;
		-
		- // Create and convert a list of all unambiguous quote characters.
		- while( (lexeme = mLexer.next()) != EOT ) {
		- tokenize( lexeme, lexemes, consumer, unresolved );
		- }
		-
		- // By loop's end, the lexemes list contains tokens for all except the
		- // final two elements (from tokenizing in triplets). Tokenize the remaining
		- // unprocessed lexemes.
		- tokenize( EOT, lexemes, consumer, unresolved );
		- tokenize( EOT, lexemes, consumer, unresolved );
		-
		- // Attempt to resolve any remaining unambiguous quotes.
		- resolve( unresolved, consumer );
		-
		- final var result = new ArrayList<Lexeme>( unresolved.size() );
		-
		- unresolved.forEach( ( lex ) -> result.add( lex[ 1 ] ) );
		-
		- return result;
		- }
		-
		- private void tokenize( final Lexeme lexeme,
		- final CircularFifoQueue<Lexeme> lexemes,
		- final Consumer<Token> consumer,
		- final List<Lexeme[]> unresolved ) {
		- // Add the next lexeme to tokenize into the queue for immediate processing.
		- lexemes.add( lexeme );
		-
		- final var lex1 = lexemes.get( 0 );
		- final var lex2 = lexemes.get( 1 );
		- final var lex3 = lexemes.get( 2 );
		-
		- if( lex2.isType( QUOTE_SINGLE ) && lex3.isType( WORD ) &&
		- lex1.anyType( WORD, PERIOD, NUMBER ) ) {
		- // Examples: y'all, Ph.D.'ll, 20's, she's
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
		- flush( lexemes );
		- }
		- else if( lex1.isType( QUOTE_SINGLE ) && lex3.isType( QUOTE_SINGLE ) &&
		- "n".equalsIgnoreCase( lex2.toString( mText ) ) ) {
		- // I.e., 'n'
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
		- flush( lexemes );
		- resolved( unresolved );
		- }
		- else if( lex2.isType( QUOTE_SINGLE ) && lex1.isType( NUMBER ) ) {
		- if( lex3.isType( QUOTE_SINGLE ) ) {
		- // E.g., 2''
		- consumer.accept(
		- new Token( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() ) );
		- flush( lexemes );
		- }
		- else {
		- // E.g., 2'
		- consumer.accept( new Token( QUOTE_PRIME_SINGLE, lex2 ) );
		- }
		- }
		- else if( lex2.isType( QUOTE_DOUBLE ) && lex1.isType( NUMBER ) ) {
		- // E.g., 2"
		- consumer.accept( new Token( QUOTE_PRIME_DOUBLE, lex2 ) );
		- }
		- else if( lex2.isType( WORD ) && lex3.isType( QUOTE_SINGLE ) &&
		- sContractions.endedUnambiguously( lex2.toString( mText ) ) ) {
		- // E.g., thinkin'
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
		- flush( lexemes );
		- }
		- else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) ) {
		- if( lex3.anyType( SPACE, PUNCT ) \|\| (lex3.isType( WORD ) &&
		- lex3.toString( mText ).equalsIgnoreCase( "s" )) ) {
		- // Sentences must re-written to avoid starting with numerals.
		- // Examples: '20s, '02
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
		- }
		- else {
		- // E.g., '2''
		- consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex1 ) );
		- mOpeningSingleQuote++;
		- }
		-
		- resolved( unresolved );
		- }
		- else if( lex2.isType( QUOTE_SINGLE ) &&
		- lex1.anyType( PUNCT, PERIOD, ELLIPSIS, DASH ) &&
		- (lex3.anyType( EOL, EOP ) \|\| lex3.isEot()) ) {
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
		- mClosingSingleQuote++;
		- }
		- else if( lex1.isType( ESC_SINGLE ) ) {
		- // E.g., \'
		- consumer.accept( new Token( QUOTE_STRAIGHT_SINGLE, lex1 ) );
		- }
		- else if( lex1.isType( ESC_DOUBLE ) ) {
		- // E.g., \"
		- consumer.accept( new Token( QUOTE_STRAIGHT_DOUBLE, lex1 ) );
		-
		- if( lex2.isType( QUOTE_SINGLE ) &&
		- (lex3.isEot() \|\| lex3.anyType( SPACE, DASH, EOL, EOP )) ) {
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
		- mClosingSingleQuote++;
		- }
		- }
		- else if( lex2.isType( QUOTE_DOUBLE ) &&
		- (lex1.isSot() \|\| lex1.anyType( LEADING_QUOTE_OPENING_DOUBLE )) &&
		- lex3.anyType( LAGGING_QUOTE_OPENING_DOUBLE ) ) {
		- // Examples: "", "..., "word, ---"word
		- consumer.accept( new Token( QUOTE_OPENING_DOUBLE, lex2 ) );
		- }
		- else if( lex2.isType( QUOTE_DOUBLE ) &&
		- lex1.anyType( LEADING_QUOTE_CLOSING_DOUBLE ) &&
		- (lex3.isEot() \|\| lex3.anyType( LAGGING_QUOTE_CLOSING_DOUBLE )) ) {
		- // E.g., ..."', word"', ?"'
		- consumer.accept( new Token( QUOTE_CLOSING_DOUBLE, lex2 ) );
		- }
		- else if( lex1.isType( QUOTE_SINGLE ) &&
		- lex2.anyType( PUNCT, PERIOD, DASH ) && lex3.isType( QUOTE_DOUBLE ) ) {
		- // E.g., '," (contraction ruled out from previous conditionals)
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex1 ) );
		- resolved( unresolved );
		- mClosingSingleQuote++;
		- }
		- else if( lex2.anyType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) {
		- // After tokenizing, the parser will attempt to resolve ambiguities.
		- unresolved.add( new Lexeme[]{lex1, lex2, lex3} );
		- }
		- }
		-
		- private void resolve(
		- final List<Lexeme[]> unresolved, final Consumer<Token> consumer ) {
		- // Some non-emitted tokenized lexemes may be ambiguous.
		- final var ambiguousLeadingQuotes = new ArrayList<Lexeme[]>( 16 );
		- final var ambiguousLaggingQuotes = new ArrayList<Lexeme[]>( 16 );
		- var resolvedLeadingQuotes = 0;
		- var resolvedLaggingQuotes = 0;
		-
		- // Count the number of ambiguous and non-ambiguous open single quotes.
		- for( var i = unresolved.iterator(); i.hasNext(); ) {
		- final var quotes = i.next();
		- final var lex1 = quotes[ 0 ];
		- final var lex2 = quotes[ 1 ];
		- final var lex3 = quotes[ 2 ];
		-
		- if( lex2.isType( QUOTE_SINGLE ) ) {
		- final var word1 = lex1 == SOT ? "" : lex1.toString( mText );
		- final var word3 = lex3 == EOT ? "" : lex3.toString( mText );
		-
		- if( sContractions.beganAmbiguously( word3 ) ) {
		- // E.g., 'Cause
		- if( lex1.isType( QUOTE_SINGLE ) ) {
		- // E.g., ''Cause
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
		- i.remove();
		- }
		- else {
		- // The contraction is uncertain until a closing quote is found that
		- // may balance this single quote.
		- ambiguousLeadingQuotes.add( quotes );
		- }
		- }
		- else if( sContractions.beganUnambiguously( word3 ) ) {
		- // The quote mark forms a word that does not stand alone from its
		- // contraction. For example, twas is not a word: it's 'twas.
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
		- i.remove();
		- }
		- else if( sContractions.endedAmbiguously( word1 ) ) {
		- ambiguousLaggingQuotes.add( quotes );
		- }
		- else if( (lex1.isSot() \|\| lex1.anyType( LEADING_QUOTE_OPENING_SINGLE ))
		- && lex3.anyType( LAGGING_QUOTE_OPENING_SINGLE ) ) {
		- consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) );
		- resolvedLeadingQuotes++;
		- mOpeningSingleQuote++;
		- i.remove();
		- }
		- else if( lex1.anyType( LEADING_QUOTE_CLOSING_SINGLE ) &&
		- (lex3.isEot() \|\| lex3.anyType( LAGGING_QUOTE_CLOSING_SINGLE )) ) {
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
		- resolvedLaggingQuotes++;
		- mClosingSingleQuote++;
		- i.remove();
		- }
		- else if( lex3.isType( NUMBER ) ) {
		- // E.g., '04
		- ambiguousLeadingQuotes.add( quotes );
		- }
		- }
		- }
		-
		- final var ambiguousLeadingCount = ambiguousLeadingQuotes.size();
		- final var ambiguousLaggingCount = ambiguousLaggingQuotes.size();
		-
		- if( resolvedLeadingQuotes == 1 && resolvedLaggingQuotes == 0 ) {
		- if( ambiguousLeadingCount == 0 && ambiguousLaggingCount == 1 ) {
		- final var balanced = mClosingSingleQuote - mOpeningSingleQuote == 0;
		- final var quote = balanced ? QUOTE_APOSTROPHE : QUOTE_CLOSING_SINGLE;
		- final var lex = ambiguousLaggingQuotes.get( 0 );
		- consumer.accept( new Token( quote, lex[ 1 ] ) );
		- unresolved.remove( lex );
		- }
		- else if( ambiguousLeadingCount == 0 && unresolved.size() == 1 ) {
		- // Must be a closing quote.
		- final var closing = unresolved.get( 0 );
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
		- unresolved.remove( closing );
		- }
		- }
		- else if( ambiguousLeadingCount == 0 && ambiguousLaggingCount > 0 ) {
		- // If there are no ambiguous leading quotes then all ambiguous lagging
		- // quotes must be contractions.
		- ambiguousLaggingQuotes.forEach(
		- lex -> {
		- consumer.accept( new Token( QUOTE_APOSTROPHE, lex[ 1 ] ) );
		- unresolved.remove( lex );
		- }
		- );
		- }
		- else if( ambiguousLeadingCount == 0 ) {
		- if( resolvedLaggingQuotes < resolvedLeadingQuotes ) {
		- for( final var i = unresolved.iterator(); i.hasNext(); ) {
		- final var closing = i.next()[ 1 ];
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing ) );
		- i.remove();
		- }
		- }
		- else if( mOpeningSingleQuote - mClosingSingleQuote == unresolved.size() ) {
		- for( final var i = unresolved.iterator(); i.hasNext(); ) {
		- final var closing = i.next();
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
		- i.remove();
		- }
		- }
		- else if( unresolved.size() == 2 ) {
		- final var closing = unresolved.get( 0 );
		- final var opening = unresolved.get( 1 );
		- consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
		- consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
		-
		- unresolved.clear();
		- }
		- }
		- else if( ambiguousLeadingCount == 1 && resolvedLaggingQuotes == 1 ) {
		- final var opening = ambiguousLeadingQuotes.get( 0 );
		- consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
		- unresolved.remove( opening );
		- }
		- }
		-
		- /**
		- * Remove the last {@link Lexeme}s from the given list.
		- *
		- * @param unresolved The list of {@link Lexeme}s to modify.
		- */
		- private void resolved( final List<Lexeme[]> unresolved ) {
		+public final class Parser {
		+ /**
		+ * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
		+ new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOP};
		+
		+ /**
		+ * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
		+ new LexemeType[]{WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
		+
		+ /**
		+ * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
		+ new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE};
		+
		+ /**
		+ * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
		+ new LexemeType[]{SPACE, DASH, QUOTE_DOUBLE, CLOSING_GROUP, EOL};
		+
		+ /**
		+ * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
		+ new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, OPENING_GROUP, EOP};
		+
		+ /**
		+ * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
		+ new LexemeType[]{WORD, NUMBER, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE};
		+
		+ /**
		+ * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
		+ new LexemeType[]{WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_SINGLE};
		+
		+ /**
		+ * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
		+ new LexemeType[]{SPACE, DASH, QUOTE_SINGLE, CLOSING_GROUP, EOL};
		+
		+ /**
		+ * The text to parse. A reference is required as a minor optimization in
		+ * memory and speed: the lexer records integer offsets, rather than new
		+ * {@link String} instances, to track parsed lexemes.
		+ */
		+ private final String mText;
		+
		+ /**
		+ * Converts a string into an iterable list of {@link Lexeme} instances.
		+ */
		+ private final Lexer mLexer;
		+
		+ /**
		+ * Sets of contractions that help disambiguate single quotes in the text.
		+ * These are effectively immutable while parsing.
		+ */
		+ private final Contractions sContractions;
		+
		+ /**
		+ * Incremented for each opening single quote emitted. Used to help resolve
		+ * ambiguities when single quote marks are balanced.
		+ */
		+ private int mOpeningSingleQuote;
		+
		+ /**
		+ * Incremented for each closing single quote emitted. Used to help resolve
		+ * ambiguities when single quote marks are balanced.
		+ */
		+ private int mClosingSingleQuote;
		+
		+ /**
		+ * Constructs a new {@link Parser} using the default contraction sets
		+ * to help resolve some ambiguous scenarios.
		+ *
		+ * @param text The prose to parse, containing zero or more quotation
		+ * characters.
		+ */
		+ public Parser( final String text ) {
		+ this( text, new Contractions.Builder().build() );
		+ }
		+
		+ /**
		+ * Constructs a new {@link Parser} using the default contraction sets
		+ * to help resolve some ambiguous scenarios.
		+ *
		+ * @param text The prose to parse, containing zero or more quotation
		+ * characters.
		+ * @param contractions Custom sets of contractions to help resolve
		+ * ambiguities.
		+ */
		+ public Parser( final String text, final Contractions contractions ) {
		+ mText = text;
		+ mLexer = new Lexer( mText );
		+ sContractions = contractions;
		+ }
		+
		+ /**
		+ * Iterates over the entire text provided at construction, emitting
		+ * {@link Token}s that can be used to convert straight quotes to curly
		+ * quotes.
		+ *
		+ * @param tokenConsumer Receives emitted {@link Token}s.
		+ */
		+ public void parse(
		+ final Consumer<Token> tokenConsumer,
		+ final Consumer<Lexeme> lexemeConsumer ) {
		+ final var lexemes = new CircularFifoQueue<Lexeme>( 3 );
		+
		+ // Allow consuming the very first token without checking the queue size.
		+ flush( lexemes );
		+
		+ final var unresolved = new ArrayList<Lexeme[]>();
		+ Lexeme lexeme;
		+
		+ // Create and convert a list of all unambiguous quote characters.
		+ while( (lexeme = mLexer.next()) != EOT ) {
		+ tokenize( lexeme, lexemes, tokenConsumer, unresolved );
		+ }
		+
		+ // By loop's end, the lexemes list contains tokens for all except the
		+ // final two elements (from tokenizing in triplets). Tokenize the remaining
		+ // unprocessed lexemes.
		+ tokenize( EOT, lexemes, tokenConsumer, unresolved );
		+ tokenize( EOT, lexemes, tokenConsumer, unresolved );
		+
		+ // Attempt to resolve any remaining unambiguous quotes.
		+ resolve( unresolved, tokenConsumer );
		+
		+ // Notify of any unambiguous quotes that could not be resolved.
		+ unresolved.forEach( ( lex ) -> lexemeConsumer.accept( lex[ 1 ] ) );
		+ }
		+
		+ private void tokenize( final Lexeme lexeme,
		+ final CircularFifoQueue<Lexeme> lexemes,
		+ final Consumer<Token> consumer,
		+ final List<Lexeme[]> unresolved ) {
		+ // Add the next lexeme to tokenize into the queue for immediate processing.
		+ lexemes.add( lexeme );
		+
		+ final var lex1 = lexemes.get( 0 );
		+ final var lex2 = lexemes.get( 1 );
		+ final var lex3 = lexemes.get( 2 );
		+
		+ if( lex2.isType( QUOTE_SINGLE ) && lex3.isType( WORD ) &&
		+ lex1.anyType( WORD, PERIOD, NUMBER ) ) {
		+ // Examples: y'all, Ph.D.'ll, 20's, she's
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
		+ flush( lexemes );
		+ }
		+ else if( lex1.isType( QUOTE_SINGLE ) && lex3.isType( QUOTE_SINGLE ) &&
		+ "n".equalsIgnoreCase( lex2.toString( mText ) ) ) {
		+ // I.e., 'n'
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
		+ flush( lexemes );
		+ truncate( unresolved );
		+ }
		+ else if( lex2.isType( QUOTE_SINGLE ) && lex1.isType( NUMBER ) ) {
		+ if( lex3.isType( QUOTE_SINGLE ) ) {
		+ // E.g., 2''
		+ consumer.accept(
		+ new Token( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() ) );
		+ flush( lexemes );
		+ }
		+ else {
		+ // E.g., 2'
		+ consumer.accept( new Token( QUOTE_PRIME_SINGLE, lex2 ) );
		+ }
		+ }
		+ else if( lex2.isType( QUOTE_DOUBLE ) && lex1.isType( NUMBER ) ) {
		+ // E.g., 2"
		+ consumer.accept( new Token( QUOTE_PRIME_DOUBLE, lex2 ) );
		+ }
		+ else if( lex2.isType( WORD ) && lex3.isType( QUOTE_SINGLE ) &&
		+ sContractions.endedUnambiguously( lex2.toString( mText ) ) ) {
		+ // E.g., thinkin'
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex3 ) );
		+ flush( lexemes );
		+ }
		+ else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) ) {
		+ if( lex3.anyType( SPACE, PUNCT ) \|\| (lex3.isType( WORD ) &&
		+ lex3.toString( mText ).equalsIgnoreCase( "s" )) ) {
		+ // Sentences must re-written to avoid starting with numerals.
		+ // Examples: '20s, '02
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) );
		+ }
		+ else {
		+ // E.g., '2''
		+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex1 ) );
		+ mOpeningSingleQuote++;
		+ }
		+
		+ truncate( unresolved );
		+ }
		+ else if( lex2.isType( QUOTE_SINGLE ) &&
		+ lex1.anyType( PUNCT, PERIOD, ELLIPSIS, DASH ) &&
		+ (lex3.anyType( EOL, EOP ) \|\| lex3.isEot()) ) {
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
		+ mClosingSingleQuote++;
		+ }
		+ else if( lex1.isType( ESC_SINGLE ) ) {
		+ // E.g., \'
		+ consumer.accept( new Token( QUOTE_STRAIGHT_SINGLE, lex1 ) );
		+ }
		+ else if( lex1.isType( ESC_DOUBLE ) ) {
		+ // E.g., \"
		+ consumer.accept( new Token( QUOTE_STRAIGHT_DOUBLE, lex1 ) );
		+
		+ if( lex2.isType( QUOTE_SINGLE ) &&
		+ (lex3.isEot() \|\| lex3.anyType( SPACE, DASH, EOL, EOP )) ) {
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
		+ mClosingSingleQuote++;
		+ }
		+ }
		+ else if( lex2.isType( QUOTE_DOUBLE ) &&
		+ (lex1.isSot() \|\| lex1.anyType( LEADING_QUOTE_OPENING_DOUBLE )) &&
		+ lex3.anyType( LAGGING_QUOTE_OPENING_DOUBLE ) ) {
		+ // Examples: "", "..., "word, ---"word
		+ consumer.accept( new Token( QUOTE_OPENING_DOUBLE, lex2 ) );
		+ }
		+ else if( lex2.isType( QUOTE_DOUBLE ) &&
		+ lex1.anyType( LEADING_QUOTE_CLOSING_DOUBLE ) &&
		+ (lex3.isEot() \|\| lex3.anyType( LAGGING_QUOTE_CLOSING_DOUBLE )) ) {
		+ // Examples: ..."', word"', ?"'
		+ consumer.accept( new Token( QUOTE_CLOSING_DOUBLE, lex2 ) );
		+ }
		+ else if( lex1.isType( QUOTE_SINGLE ) &&
		+ lex2.anyType( PUNCT, PERIOD, DASH ) && lex3.isType( QUOTE_DOUBLE ) ) {
		+ // E.g., '," (contraction ruled out from previous conditionals)
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex1 ) );
		+ truncate( unresolved );
		+ mClosingSingleQuote++;
		+ }
		+ else if( lex2.anyType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) {
		+ // After tokenizing, the parser will attempt to resolve ambiguities.
		+ unresolved.add( new Lexeme[]{lex1, lex2, lex3} );
		+ }
		+ }
		+
		+ private void resolve(
		+ final List<Lexeme[]> unresolved, final Consumer<Token> consumer ) {
		+ // Some non-emitted tokenized lexemes may be ambiguous.
		+ final var ambiguousLeadingQuotes = new ArrayList<Lexeme[]>( 16 );
		+ final var ambiguousLaggingQuotes = new ArrayList<Lexeme[]>( 16 );
		+ var resolvedLeadingQuotes = 0;
		+ var resolvedLaggingQuotes = 0;
		+
		+ // Count the number of ambiguous and non-ambiguous open single quotes.
		+ for( var i = unresolved.iterator(); i.hasNext(); ) {
		+ final var quotes = i.next();
		+ final var lex1 = quotes[ 0 ];
		+ final var lex2 = quotes[ 1 ];
		+ final var lex3 = quotes[ 2 ];
		+
		+ if( lex2.isType( QUOTE_SINGLE ) ) {
		+ final var word1 = lex1 == SOT ? "" : lex1.toString( mText );
		+ final var word3 = lex3 == EOT ? "" : lex3.toString( mText );
		+
		+ if( sContractions.beganAmbiguously( word3 ) ) {
		+ // E.g., 'Cause
		+ if( lex1.isType( QUOTE_SINGLE ) ) {
		+ // E.g., ''Cause
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
		+ i.remove();
		+ }
		+ else {
		+ // The contraction is uncertain until a closing quote is found that
		+ // may balance this single quote.
		+ ambiguousLeadingQuotes.add( quotes );
		+ }
		+ }
		+ else if( sContractions.beganUnambiguously( word3 ) ) {
		+ // The quote mark forms a word that does not stand alone from its
		+ // contraction. For example, twas is not a word: it's 'twas.
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) );
		+ i.remove();
		+ }
		+ else if( sContractions.endedAmbiguously( word1 ) ) {
		+ ambiguousLaggingQuotes.add( quotes );
		+ }
		+ else if( (lex1.isSot() \|\| lex1.anyType( LEADING_QUOTE_OPENING_SINGLE ))
		+ && lex3.anyType( LAGGING_QUOTE_OPENING_SINGLE ) ) {
		+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) );
		+ resolvedLeadingQuotes++;
		+ mOpeningSingleQuote++;
		+ i.remove();
		+ }
		+ else if( lex1.anyType( LEADING_QUOTE_CLOSING_SINGLE ) &&
		+ (lex3.isEot() \|\| lex3.anyType( LAGGING_QUOTE_CLOSING_SINGLE )) ) {
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
		+ resolvedLaggingQuotes++;
		+ mClosingSingleQuote++;
		+ i.remove();
		+ }
		+ else if( lex3.isType( NUMBER ) ) {
		+ // E.g., '04
		+ ambiguousLeadingQuotes.add( quotes );
		+ }
		+ }
		+ }
		+
		+ final var ambiguousLeadingCount = ambiguousLeadingQuotes.size();
		+ final var ambiguousLaggingCount = ambiguousLaggingQuotes.size();
		+
		+ if( resolvedLeadingQuotes == 1 && resolvedLaggingQuotes == 0 ) {
		+ if( ambiguousLeadingCount == 0 && ambiguousLaggingCount == 1 ) {
		+ final var balanced = mClosingSingleQuote - mOpeningSingleQuote == 0;
		+ final var quote = balanced ? QUOTE_APOSTROPHE : QUOTE_CLOSING_SINGLE;
		+ final var lex = ambiguousLaggingQuotes.get( 0 );
		+ consumer.accept( new Token( quote, lex[ 1 ] ) );
		+ unresolved.remove( lex );
		+ }
		+ else if( ambiguousLeadingCount == 0 && unresolved.size() == 1 ) {
		+ // Must be a closing quote.
		+ final var closing = unresolved.get( 0 );
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
		+ unresolved.remove( closing );
		+ }
		+ }
		+ else if( ambiguousLeadingCount == 0 && ambiguousLaggingCount > 0 ) {
		+ // If there are no ambiguous leading quotes then all ambiguous lagging
		+ // quotes must be contractions.
		+ ambiguousLaggingQuotes.forEach(
		+ lex -> {
		+ consumer.accept( new Token( QUOTE_APOSTROPHE, lex[ 1 ] ) );
		+ unresolved.remove( lex );
		+ }
		+ );
		+ }
		+ else if( ambiguousLeadingCount == 0 ) {
		+ if( resolvedLaggingQuotes < resolvedLeadingQuotes ) {
		+ for( final var i = unresolved.iterator(); i.hasNext(); ) {
		+ final var closing = i.next()[ 1 ];
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing ) );
		+ i.remove();
		+ }
		+ }
		+ else if( mOpeningSingleQuote - mClosingSingleQuote == unresolved.size() ) {
		+ for( final var i = unresolved.iterator(); i.hasNext(); ) {
		+ final var closing = i.next();
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
		+ i.remove();
		+ }
		+ }
		+ else if( unresolved.size() == 2 ) {
		+ final var closing = unresolved.get( 0 );
		+ final var opening = unresolved.get( 1 );
		+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, closing[ 1 ] ) );
		+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
		+
		+ unresolved.clear();
		+ }
		+ }
		+ else if( ambiguousLeadingCount == 1 && resolvedLaggingQuotes == 1 ) {
		+ final var opening = ambiguousLeadingQuotes.get( 0 );
		+ consumer.accept( new Token( QUOTE_OPENING_SINGLE, opening[ 1 ] ) );
		+ unresolved.remove( opening );
		+ }
		+ }
		+
		+ /**
		+ * Remove the last {@link Lexeme}s from the given list.
		+ *
		+ * @param unresolved The list of {@link Lexeme}s to modify.
		+ */
		+ private void truncate( final List<Lexeme[]> unresolved ) {
		if( !unresolved.isEmpty() ) {
		unresolved.remove( unresolved.size() - 1 );

lib/src/main/java/com/keenwrite/quotes/SmartQuotes.java

		-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
		-package com.keenwrite.quotes;
		-
		-import java.util.ArrayList;
		-import java.util.Map;
		-import java.util.function.Consumer;
		-
		-import static com.keenwrite.quotes.TokenType.*;
		-import static java.util.Collections.sort;
		-
		-/**
		- * Responsible for replacing {@link Token} instances with equivalent smart
		- * quotes (or straight quotes). This will inform the caller when ambiguous
		- * quotes cannot be reliably resolved.
		- */
		-public class SmartQuotes {
		- private static final Map<TokenType, String> REPLACEMENTS = Map.of(
		- QUOTE_OPENING_SINGLE, "‘",
		- QUOTE_CLOSING_SINGLE, "’",
		- QUOTE_OPENING_DOUBLE, "“",
		- QUOTE_CLOSING_DOUBLE, "”",
		- QUOTE_STRAIGHT_SINGLE, "'",
		- QUOTE_STRAIGHT_DOUBLE, "\"",
		- QUOTE_APOSTROPHE, "'",
		- QUOTE_PRIME_SINGLE, "′",
		- QUOTE_PRIME_DOUBLE, "″"
		- );
		-
		- /**
		- * Converts
		- * @param text
		- * @return
		- */
		- public static String convert( final String text ) {
		- return convert( text, ( lexeme ) -> {} );
		- }
		-
		- public static String convert(
		- final String text, final Consumer<Lexeme> consumer ) {
		- final var parser = new Parser( text );
		- final var tokens = new ArrayList<Token>();
		-
		- // Store all parsed quotation marks.
		- parser.parse( tokens::add );
		-
		- // The parser may emit tokens in any order.
		- sort( tokens );
		-
		- final var result = new StringBuilder( text.length() );
		- var position = 0;
		-
		- for( final var token : tokens ) {
		- if( position <= token.began() ) {
		- result.append( text, position, token.began() );
		- result.append( REPLACEMENTS.get( token.getType() ) );
		- }
		-
		- position = token.ended();
		- }
		-
		- return result.append( text.substring( position ) ).toString();
		- }
		-}

lib/src/main/java/com/keenwrite/quotes/Token.java

		* Represents a high-level token read from the text.
		*/
		-public class Token implements Comparable<Token> {
		+final class Token implements Comparable<Token> {
		private final TokenType mType;
		final int mBegan;
		final int mEnded;

		/**
		- * Convenience constructor to create a token that uses the lexeme's offsets.
		+ * Convenience constructor to create a token that uses the lexeme's
		+ * beginning and ending offsets to represent a complete token.
		*
		* @param type The type of {@link Token} to create.
		* @param lexeme Container for beginning and ending text offsets.
		*/
		Token( final TokenType type, final Lexeme lexeme ) {
		this( type, lexeme.began(), lexeme.ended() );
		}

		+ /**
		+ * This constructor can be used to create tokens that span more than a
		+ * single character. Almost all tokens represent a single character, only
		+ * the double-prime sequence ({@code ''}) is more than one character.
		+ *
		+ * @param type The type of {@link Token} to create.
		+ * @param began Beginning offset into text where token is found.
		+ * @param ended Ending offset into text where token is found.
		+ */
		Token( final TokenType type, final int began, final int ended ) {
		assert type != null;

		public int compareTo( final Token that ) {
		return this.mBegan - that.mBegan;
		- }
		-
		- @Override
		- public String toString() {
		- return getClass().getSimpleName() + "{" +
		- "mType=" + mType +
		- ", mBegan=" + mBegan +
		- ", mEnded=" + mEnded +
		- '}';
		}
		}

lib/src/main/java/com/keenwrite/quotes/TokenType.java

		QUOTE_PRIME_SINGLE,
		QUOTE_PRIME_DOUBLE,
		- TEXT,
		}

lib/src/test/java/com/keenwrite/quotes/KeenQuotesTest.java

		+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
		+package com.keenwrite.quotes;
		+
		+import org.junit.jupiter.api.Disabled;
		+import org.junit.jupiter.api.Test;
		+
		+import java.io.BufferedReader;
		+import java.io.IOException;
		+import java.io.InputStreamReader;
		+import java.util.function.Function;
		+
		+import static java.lang.System.out;
		+import static org.junit.jupiter.api.Assertions.assertEquals;
		+import static org.junit.jupiter.api.Assertions.assertNotNull;
		+
		+/**
		+ * Test that English straight quotes are converted to curly quotes and
		+ * apostrophes.
		+ */
		+public class KeenQuotesTest {
		+ /**
		+ * This is a single-use test that is useful for debugging.
		+ */
		+ @Test
		+ @Disabled
		+ public void test_parse_SingleLine_Parsed() {
		+ out.println( KeenQuotes.convert(
		+ "What's this '-5.5'',' '-10.2'' cm,' and another '-7.25''' thing?",
		+ out::println
		+ ) );
		+ }
		+
		+ /**
		+ * Tests that straight quotes are converted to curly quotes.
		+ *
		+ * @throws IOException Error opening file full of fun.
		+ */
		+ @Test
		+ public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException {
		+ testConverter( text -> KeenQuotes.convert( text, ( lexeme ) -> {} ) );
		+ }
		+
		+ /**
		+ * Reads a file full of couplets. The first of the pair is the input,
		+ * the second of the pair is the expected result. Couplets may include
		+ * newline characters to indicate end of lines and end of paragraphs.
		+ * Lines that start with {@code #} are ignored.
		+ *
		+ * @param parser The text processor capable of straight quote conversion.
		+ * @throws IOException Error opening file full of fun.
		+ */
		+ private void testConverter( final Function<String, String> parser )
		+ throws IOException {
		+ try( final var reader = openResource( "smartypants.txt" ) ) {
		+ String line;
		+ String testLine = "";
		+ String expected = "";
		+
		+ while( ((line = reader.readLine()) != null) ) {
		+ if( line.startsWith( "#" ) \|\| line.isBlank() ) { continue; }
		+
		+ // Read the first line of the couplet.
		+ if( testLine.isBlank() ) {
		+ testLine = line;
		+ continue;
		+ }
		+
		+ // Read the second line of the couplet.
		+ if( expected.isBlank() ) {
		+ expected = line;
		+ }
		+
		+ testLine = unescapeEol( testLine );
		+ expected = unescapeEol( expected );
		+
		+ final var actual = parser.apply( testLine );
		+ assertEquals( expected, actual );
		+
		+ testLine = "";
		+ expected = "";
		+ }
		+ }
		+ }
		+
		+ private static String unescapeEol( final String s ) {
		+ return String.join( "\n", s.split( "\\\\n" ) );
		+ }
		+
		+ @SuppressWarnings( "SameParameterValue" )
		+ private BufferedReader openResource( final String filename ) {
		+ final var is = getClass().getResourceAsStream( filename );
		+ assertNotNull( is );
		+
		+ return new BufferedReader( new InputStreamReader( is ) );
		+ }
		+}

lib/src/test/java/com/keenwrite/quotes/ParserTest.java


		@Test
		- void test_Conversion_Straight_Curly() {
		+ void test_Conversion_StraightQuotes_ExpectedConversionCount() {
		for( final var entry : TEST_CASES.entrySet() ) {
		parse( entry.getKey(), entry.getValue() );


		parser.parse(
		- ( token ) -> actual.merge( token.getType(), 1, Integer::sum )
		+ ( token ) -> actual.merge( token.getType(), 1, Integer::sum ),
		+ ( lexeme ) -> {}
		);

lib/src/test/java/com/keenwrite/quotes/SmartQuotesTest.java

		-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
		-package com.keenwrite.quotes;
		-
		-import org.junit.jupiter.api.Disabled;
		-import org.junit.jupiter.api.Test;
		-
		-import java.io.BufferedReader;
		-import java.io.IOException;
		-import java.io.InputStreamReader;
		-import java.util.function.Function;
		-
		-import static java.lang.System.out;
		-import static org.junit.jupiter.api.Assertions.assertEquals;
		-import static org.junit.jupiter.api.Assertions.assertNotNull;
		-
		-/**
		- * Test that English straight quotes are converted to curly quotes and
		- * apostrophes.
		- */
		-public class SmartQuotesTest {
		- /**
		- * This is a single-use test that is useful for debugging.
		- */
		- @Test
		- @Disabled
		- public void test_parse_SingleLine_Parsed() {
		- out.println( SmartQuotes.convert(
		- "What's this '-5.5'',' '-10.2'' cm,' and another '-7.25''' thing?",
		- out::println
		- ) );
		- }
		-
		- /**
		- * Tests that straight quotes are converted to curly quotes.
		- *
		- * @throws IOException Error opening file full of fun.
		- */
		- @Test
		- public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException {
		- testConverter( );
		- }
		-
		- /**
		- * Reads a file full of couplets. The first of the pair is the input,
		- * the second of the pair is the expected result. Couplets may include
		- * newline characters to indicate end of lines and end of paragraphs.
		- * Lines that start with {@code #} are ignored.
		- *
		- * @param parser The text processor capable of straight quote conversion.
		- * @throws IOException Error opening file full of fun.
		- */
		- private void testConverter( final Function<String, String> parser )
		- throws IOException {
		- try( final var reader = openResource( "smartypants.txt" ) ) {
		- String line;
		- String testLine = "";
		- String expected = "";
		-
		- while( ((line = reader.readLine()) != null) ) {
		- if( line.startsWith( "#" ) \|\| line.isBlank() ) { continue; }
		-
		- // Read the first line of the couplet.
		- if( testLine.isBlank() ) {
		- testLine = line;
		- continue;
		- }
		-
		- // Read the second line of the couplet.
		- if( expected.isBlank() ) {
		- expected = line;
		- }
		-
		- testLine = unescapeEol( testLine );
		- expected = unescapeEol( expected );
		-
		- final var actual = parser.apply( testLine );
		- assertEquals( expected, actual );
		-
		- testLine = "";
		- expected = "";
		- }
		- }
		- }
		-
		- private static String unescapeEol( final String s ) {
		- return String.join( "\n", s.split( "\\\\n" ) );
		- }
		-
		- @SuppressWarnings( "SameParameterValue" )
		- private BufferedReader openResource( final String filename ) {
		- final var is = getClass().getResourceAsStream( filename );
		- assertNotNull( is );
		-
		- return new BufferedReader( new InputStreamReader( is ) );
		- }
		-}

Delta	601 lines added, 572 lines removed, 29-line increase