Emits em- and en-dashes

Author	DaveJarvis <email>
Date	2025-08-30 11:16:16 GMT-0700
Commit	51c9559ce6a62a51631ee589e6e72af20ac0c47d
Parent	d4a8ea6

src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeGlyph.java

		LEX_DOUBLE_CHEVRON_RIGHT( '»' ),
		LEX_SINGLE_CHEVRON_LEFT( '‹' ),
		- LEX_SINGLE_CHEVRON_RIGHT( '›' );
		+ LEX_SINGLE_CHEVRON_RIGHT( '›' ),
		+
		+ LEX_SPACE( ' ' ),
		+
		+ LEX_ELLIPSES( '…' ),
		+ LEX_DASH_EN( '–' ),
		+ LEX_DASH_EM( '—' ),
		+ LEX_HYPHEN( '-' );

		private final char mGlyph;

src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeType.java

		public static final LexemeType EOP = new LexemeType();
		public static final LexemeType EOT = new LexemeType();
		- public static final LexemeType SPACE = new LexemeType();
		+ public static final LexemeType SPACE = new LexemeType( LEX_SPACE );
		public static final LexemeType WORD = new LexemeType();
		public static final LexemeType NUMBER = new LexemeType();
		public static final LexemeType PUNCT = new LexemeType();
		public static final LexemeType OPENING_GROUP = new LexemeType();
		public static final LexemeType CLOSING_GROUP = new LexemeType();
		- public static final LexemeType HYPHEN = new LexemeType();
		- public static final LexemeType DASH = new LexemeType();
		+ public static final LexemeType DASH_EN = new LexemeType( LEX_DASH_EN );
		+ public static final LexemeType DASH_EM = new LexemeType( LEX_DASH_EM );
		+ public static final LexemeType DASH_LINE = new LexemeType();
		+ public static final LexemeType HYPHEN = new LexemeType( LEX_HYPHEN );
		public static final LexemeType EQUALS = new LexemeType();
		public static final LexemeType PERIOD = new LexemeType();
		- public static final LexemeType ELLIPSIS = new LexemeType();
		+ public static final LexemeType ELLIPSIS = new LexemeType( LEX_ELLIPSES );
		public static final LexemeType ENDING = new LexemeType();
		public static final LexemeType ANY = new LexemeType();

src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexer.java

		);

		- private static final Set<Character> DASH_CHARS = new HashSet<>(
		- Arrays.asList( '-', '–', '—', '―' )
		- );
		-
		/**
		* Consider {@code _} and {@code *} as letters (in a word) because plain

		}
		else if( curr == '\r' \|\| curr == '\n' ) {
		- final var cr = new int[]{curr == '\r' ? 1 : 0};
		- final var lf = new int[]{curr == '\n' ? 1 : 0};
		+ final var cr = new int[]{ curr == '\r' ? 1 : 0 };
		+ final var lf = new int[]{ curr == '\n' ? 1 : 0 };

		// Swallow all consecutive CR (Mac), CRLF (Windows), and/or LF (Unix).

		token = QUOTE_SINGLE;
		}
		- else if( curr == '-' && i.peek() != '-' ) {
		- token = HYPHEN;
		+ else if( isHyphen( curr ) ) {
		+ final var began = i.index();
		+ i.skip( Lexer::isHyphen );
		+ final var count = i.index() - began;
		+
		+ token = switch( count ) {
		+ case 0 -> HYPHEN;
		+ case 1 -> DASH_EN;
		+ case 2 -> DASH_EM;
		+ default -> DASH_LINE;
		+ };
		}
		- else if( isDash( curr ) ) {
		- i.skip( Lexer::isDash );
		- token = DASH;
		+ else if( curr == '–' ) {
		+ token = DASH_EN;
		+ }
		+ else if( curr == '—' ) {
		+ token = DASH_EM;
		}
		else if( curr == '(' \|\| curr == '{' \|\| curr == '[' ) {


		/**
		- * Answers whether the given character may be part of an en- or em-dash.
		- * This must be called after it is known that the character isn't a lone
		- * hyphen.
		+ * Answers whether the given character is a hyphen.
		*
		- * @param curr The character to check as being a dash.
		- * @return {@code true} if the given character is part of a dash.
		+ * @param curr The character to check as being a hyphen.
		+ * @return {@code true} if the given character is a hyphen.
		*/
		- private static boolean isDash( final char curr ) {
		- return DASH_CHARS.contains( curr );
		+ private static boolean isHyphen( final char curr ) {
		+ return curr == '-';
		}

src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java

		};

		- private static final LexemeType[] PUNCT_PERIOD_ELLIPSIS_DASH = {
		- PUNCT, PERIOD, ELLIPSIS, DASH
		- };
		-
		- private static final LexemeType[] PUNCT_PERIOD = {
		- PUNCT, PERIOD
		- };
		-
		- private static final LexemeType[] SPACE_DASH_ENDING = {
		- SPACE, DASH, ENDING
		- };
		-
		- private static final LexemeType[] SPACE_ENDING = {
		- SPACE, ENDING
		- };
		-
		- private static final LexemeType[] SPACE_HYPHEN = {
		- SPACE, HYPHEN
		- };
		-
		- private static final LexemeType[] SPACE_PUNCT = {
		- SPACE, PUNCT
		- };
		-
		- private static final LexemeType[] SPACE_SOT = {
		- SPACE, SOT
		- };
		-
		- /**
		- * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
		- new LexemeType[]{
		- LexemeType.SOT, SPACE, DASH, QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP
		- };
		-
		- /**
		- * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
		- new LexemeType[]{
		- WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE
		- };
		-
		- /**
		- * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
		- new LexemeType[]{
		- WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE
		- };
		-
		- /**
		- * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
		- new LexemeType[]{
		- SPACE, HYPHEN, DASH, PUNCT, PERIOD, ELLIPSIS, QUOTE_DOUBLE, CLOSING_GROUP,
		- ENDING
		- };
		-
		- /**
		- * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
		- new LexemeType[]{
		- LexemeType.SOT, SPACE, DASH, EQUALS, OPENING_GROUP, EOL, EOP
		- };
		-
		- /**
		- * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
		- new LexemeType[]{
		- WORD, PUNCT, NUMBER, DASH, ELLIPSIS, OPENING_GROUP, QUOTE_SINGLE,
		- QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING, QUOTE_DOUBLE
		- };
		-
		- /**
		- * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
		- new LexemeType[]{
		- WORD, NUMBER, PERIOD, PUNCT, DASH, ELLIPSIS, CLOSING_GROUP, QUOTE_SINGLE,
		- QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING
		- };
		-
		- /**
		- * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
		- */
		- private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
		- new LexemeType[]{
		- SPACE, PUNCT, PERIOD, EQUALS, HYPHEN, DASH, QUOTE_SINGLE, CLOSING_GROUP,
		- ENDING
		- };
		-
		- private final CircularFifoQueue<Lexeme> mQ = new CircularFifoQueue<>( 4 );
		- private final String mText;
		- private final Contractions mContractions;
		- private final Consumer<Token> mConsumer;
		-
		- public QuoteEmitter(
		- final String text,
		- final Contractions contractions,
		- final Consumer<Token> consumer
		- ) {
		- assert text != null;
		- assert contractions != null;
		-
		- mText = text;
		- mContractions = contractions;
		- mConsumer = consumer;
		- }
		-
		- /**
		- * Scans the given text document for quotation marks and passes them to the
		- * given {@link Token} {@link Consumer}.
		- *
		- * @param text The prose to lex.
		- * @param contractions List of ambiguous and unambiguous contractions.
		- * @param consumer Receives
		- */
		- public static void analyze(
		- final String text,
		- final Contractions contractions,
		- final Consumer<Token> consumer,
		- final LexerFilter filter
		- ) {
		- final var emitter = new QuoteEmitter( text, contractions, consumer );
		- Lexer.lex( text, emitter, filter );
		- }
		-
		- /**
		- * @param lexeme the input argument
		- */
		- @Override
		- public void accept( final Lexeme lexeme ) {
		- mQ.add( lexeme );
		-
		- if( mQ.size() == 4 ) {
		- parse();
		- }
		- }
		-
		- private void parse() {
		- final var lex1 = mQ.get( 0 );
		- final var lex2 = mQ.get( 1 );
		- final var lex3 = mQ.get( 2 );
		- final var lex4 = mQ.get( 3 );
		-
		- // <y'all>, <Ph.D.'ll>, <20's>, <she's>
		- if( match( WORD_PERIOD_NUMBER, QUOTE_SINGLE, WORD, ANY ) ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <'n'>, <'N'>, <'owlin'>
		- else if(
		- match( ANY, QUOTE_SINGLE, WORD, QUOTE_SINGLE ) &&
		- mContractions.beganEndedUnambiguously( lex3.toString( mText ) )
		- ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- emit( QUOTE_APOSTROPHE, lex4 );
		- mQ.set( Lexeme.NONE, 3 );
		- }
		- // <2''>
		- else if( match( NUMBER, QUOTE_SINGLE, QUOTE_SINGLE, ANY ) ) {
		- // Force double primes to conform to the same constructor usage. This
		- // simplifies the tokens, reduces some memory usage,
		- final var lex = new Lexeme( PRIME_DOUBLE, lex2.began(), lex3.ended() );
		-
		- emit( QUOTE_PRIME_DOUBLE, lex );
		- mQ.set( Lexeme.NONE, 2 );
		- }
		- // <2'>
		- else if( match( NUMBER, QUOTE_SINGLE, ANY, ANY ) ) {
		- emit( QUOTE_PRIME_SINGLE, lex2 );
		- }
		- // <2">
		- else if( match( NUMBER, QUOTE_DOUBLE, ANY, ANY ) ) {
		- emit( QUOTE_PRIME_DOUBLE, lex2 );
		- }
		- // <thinkin'>
		- else if(
		- match( WORD, QUOTE_SINGLE, ANY, ANY ) &&
		- mContractions.endedUnambiguously( lex1.toString( mText ) )
		- ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <'02>
		- else if( match( ANY, QUOTE_SINGLE, NUMBER, SPACE_PUNCT ) ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <'20s>
		- else if(
		- match( ANY, QUOTE_SINGLE, NUMBER, WORD ) &&
		- "s".equalsIgnoreCase( lex4.toString( mText ) )
		- ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <.'\n>
		- else if( match( PUNCT_PERIOD_ELLIPSIS_DASH, QUOTE_SINGLE, ENDING, ANY ) ) {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- // <\'>
		- else if( match( ESC_SINGLE, ANY, ANY, ANY ) ) {
		- emit( QUOTE_STRAIGHT_SINGLE, lex1 );
		- }
		- // <\">
		- else if( match( ESC_DOUBLE, ANY, ANY, ANY ) ) {
		- emit( QUOTE_STRAIGHT_DOUBLE, lex1 );
		-
		- // <\"'--->
		- if( match( ESC_DOUBLE, QUOTE_SINGLE, SPACE_DASH_ENDING, ANY ) ) {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- }
		- // <---'" >
		- else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, SPACE_ENDING ) ) {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- // <o’-lantern>, <o' fellow>, <O'-the>
		- else if(
		- match( WORD, QUOTE_SINGLE, SPACE_HYPHEN, WORD ) &&
		- "o".equalsIgnoreCase( lex1.toString( mText ) )
		- ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <"">, <"...>, <"word>, <---"word>
		- else if(
		- match(
		- LEADING_QUOTE_OPENING_DOUBLE, QUOTE_DOUBLE,
		- LAGGING_QUOTE_OPENING_DOUBLE, ANY
		- )
		- ) {
		- emit( QUOTE_OPENING_DOUBLE, lex2 );
		- }
		- // <..."'>, <word"'>, <?"'>, <word"?>
		- else if(
		- match(
		- LEADING_QUOTE_CLOSING_DOUBLE, QUOTE_DOUBLE,
		- LAGGING_QUOTE_CLOSING_DOUBLE, ANY
		- )
		- ) {
		- emit( QUOTE_CLOSING_DOUBLE, lex2 );
		- }
		- // < ''E>
		- else if( match( SPACE_SOT, QUOTE_SINGLE, QUOTE_SINGLE, WORD ) ) {
		- // Consume both immediately to avoid the false ambiguity <'e>.
		- emit( QUOTE_OPENING_SINGLE, lex2 );
		- emit( QUOTE_APOSTROPHE, lex3 );
		- mQ.set( Lexeme.NONE, 1 );
		- mQ.set( Lexeme.NONE, 2 );
		- }
		- // <'...>, <'word>, <---'word>, < 'nation>
		- else if(
		- match(
		- LEADING_QUOTE_OPENING_SINGLE, QUOTE_SINGLE,
		- LAGGING_QUOTE_OPENING_SINGLE, ANY )
		- ) {
		- final var word = lex3.toString( mText );
		-
		- if( mContractions.beganAmbiguously( word ) ) {
		- emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
		- }
		- else if( mContractions.beganUnambiguously( word ) ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <"'"nested>
		- else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, WORD ) ) {
		- emit( QUOTE_OPENING_SINGLE, lex2 );
		- }
		- // <"'" >
		- else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
		- emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
		- }
		- // < '" >
		- else if( match( ANY, QUOTE_SINGLE, LAGGING_QUOTE_OPENING_SINGLE, ANY ) ) {
		- emit( QUOTE_OPENING_SINGLE, lex2 );
		- }
		- // Ambiguous
		- else {
		- emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
		- }
		- }
		- // <"'--- >
		- else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, DASH, ANY ) ) {
		- emit( QUOTE_OPENING_SINGLE, lex2 );
		- }
		- // <word'">, <...'--->, <"' >
		- else if(
		- match(
		- LEADING_QUOTE_CLOSING_SINGLE, QUOTE_SINGLE,
		- LAGGING_QUOTE_CLOSING_SINGLE, ANY
		- )
		- ) {
		- final var word = lex1.toString( mText );
		-
		- if( mContractions.endedAmbiguously( word ) ) {
		- emit( QUOTE_AMBIGUOUS_LAGGING, lex2 );
		- }
		- else {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- }
		- // <word';> (contraction inferred by previous matches)
		- else if( match( WORD, QUOTE_SINGLE, PUNCT_PERIOD, ANY ) ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- // <---'">
		- else if( match( DASH, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- // <'42>, <'-3.14>
		- else if( match( ANY, QUOTE_SINGLE, NUMBER, ANY ) ) {
		- emit( QUOTE_OPENING_SINGLE, lex2 );
		- }
		- // <PRE-PARSED><'---.>
		- else if( match( LexemeType.NONE, QUOTE_SINGLE, ANY, ANY ) ) {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- // <''Cause >
		- else if( match( QUOTE_SINGLE, QUOTE_SINGLE, WORD, ANY ) ) {
		- final var word = lex3.toString( mText );
		-
		- if( mContractions.beganAmbiguously( word ) ) {
		- emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
		- }
		- else if( mContractions.beganUnambiguously( word ) ) {
		- emit( QUOTE_APOSTROPHE, lex2 );
		- }
		- else {
		- emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
		- }
		- }
		- // <'"Trouble>
		- else if( match( QUOTE_SINGLE, QUOTE_DOUBLE, WORD, ANY ) ) {
		- emit( QUOTE_OPENING_DOUBLE, lex2 );
		- }
		- // International quotation marks.
		- else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) {
		- emit( QUOTE_OPENING_DOUBLE, lex2 );
		- }
		- else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) {
		- emit( QUOTE_OPENING_SINGLE, lex2 );
		- }
		- else if( match( ANY, QUOTE_DOUBLE_CLOSING, ANY, ANY ) ) {
		- emit( QUOTE_CLOSING_DOUBLE, lex2 );
		- }
		- else if( match( ANY, QUOTE_SINGLE_CLOSING, ANY, ANY ) ) {
		- emit( QUOTE_CLOSING_SINGLE, lex2 );
		- }
		- // Ambiguous (no match)
		- else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) {
		- emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
		- }
		- else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) {
		- emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 );
		- }
		- else if( match( ANY, ELLIPSIS, ANY, ANY ) ) {
		- emit( QUOTE_ELLIPSIS, lex2 );
		- }
		- }
		-
		- private void emit( final TokenType tokenType, final Lexeme lexeme ) {
		- mConsumer.accept( new Token( tokenType, lexeme ) );
		- }
		-
		- private boolean match(
		- final LexemeType l1,
		- final LexemeType l2,
		- final LexemeType l3,
		- final LexemeType l4 ) {
		- return mQ.get( 0 ).isType( l1 ) &&
		- mQ.get( 1 ).isType( l2 ) &&
		- mQ.get( 2 ).isType( l3 ) &&
		- mQ.get( 3 ).isType( l4 );
		- }
		-
		- private boolean match(
		- final LexemeType[] l1,
		- final LexemeType l2,
		- final LexemeType l3,
		- final LexemeType l4 ) {
		- return mQ.get( 0 ).isType( l1 ) &&
		- mQ.get( 1 ).isType( l2 ) &&
		- mQ.get( 2 ).isType( l3 ) &&
		- mQ.get( 3 ).isType( l4 );
		- }
		-
		- private boolean match(
		- final LexemeType l1,
		- final LexemeType l2,
		- final LexemeType[] l3,
		- final LexemeType l4 ) {
		- return mQ.get( 0 ).isType( l1 ) &&
		- mQ.get( 1 ).isType( l2 ) &&
		- mQ.get( 2 ).isType( l3 ) &&
		- mQ.get( 3 ).isType( l4 );
		- }
		-
		- private boolean match(
		- final LexemeType l1,
		- final LexemeType l2,
		- final LexemeType l3,
		- final LexemeType[] l4 ) {
		- return mQ.get( 0 ).isType( l1 ) &&
		- mQ.get( 1 ).isType( l2 ) &&
		- mQ.get( 2 ).isType( l3 ) &&
		- mQ.get( 3 ).isType( l4 );
		- }
		-
		- private boolean match(
		- final LexemeType[] l1,
		- final LexemeType l2,
		- final LexemeType[] l3,
		- final LexemeType l4 ) {
		+ private static final LexemeType[] DASHES = {
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN
		+ };
		+
		+ private static final LexemeType[] PUNCT_PERIOD_ELLIPSIS_DASH = {
		+ PUNCT, PERIOD, ELLIPSIS, DASH_EN, DASH_EM, DASH_LINE, HYPHEN
		+ };
		+
		+ private static final LexemeType[] PUNCT_PERIOD = {
		+ PUNCT, PERIOD
		+ };
		+
		+ private static final LexemeType[] SPACE_DASH_ENDING = {
		+ SPACE, DASH_EN, DASH_EM, DASH_LINE, HYPHEN, ENDING
		+ };
		+
		+ private static final LexemeType[] SPACE_ENDING = {
		+ SPACE, ENDING
		+ };
		+
		+ private static final LexemeType[] SPACE_HYPHEN = {
		+ SPACE, HYPHEN
		+ };
		+
		+ private static final LexemeType[] SPACE_PUNCT = {
		+ SPACE, PUNCT
		+ };
		+
		+ private static final LexemeType[] SPACE_SOT = {
		+ SPACE, SOT
		+ };
		+
		+ /**
		+ * Single quotes preceded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
		+ new LexemeType[]{
		+ LexemeType.SOT, SPACE,
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN,
		+ QUOTE_DOUBLE, OPENING_GROUP, EOL, EOP
		+ };
		+
		+ /**
		+ * Single quotes succeeded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_OPENING_SINGLE =
		+ new LexemeType[]{
		+ WORD, ELLIPSIS, QUOTE_SINGLE, QUOTE_DOUBLE
		+ };
		+
		+ /**
		+ * Single quotes preceded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_CLOSING_SINGLE =
		+ new LexemeType[]{
		+ WORD, NUMBER, PERIOD, PUNCT, ELLIPSIS, QUOTE_DOUBLE
		+ };
		+
		+ /**
		+ * Single quotes succeeded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
		+ new LexemeType[]{
		+ SPACE,
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN,
		+ PUNCT, PERIOD, ELLIPSIS, QUOTE_DOUBLE, CLOSING_GROUP, ENDING
		+ };
		+
		+ /**
		+ * Double quotes preceded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
		+ new LexemeType[]{
		+ LexemeType.SOT, SPACE,
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN,
		+ EQUALS, OPENING_GROUP, EOL, EOP
		+ };
		+
		+ /**
		+ * Double quotes succeeded by these {@link LexemeType}s may be opening quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_OPENING_DOUBLE =
		+ new LexemeType[]{
		+ WORD, PUNCT, NUMBER,
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN,
		+ ELLIPSIS, OPENING_GROUP,
		+ QUOTE_SINGLE, QUOTE_SINGLE_OPENING, QUOTE_SINGLE_CLOSING,
		+ QUOTE_DOUBLE
		+ };
		+
		+ /**
		+ * Double quotes preceded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LEADING_QUOTE_CLOSING_DOUBLE =
		+ new LexemeType[]{
		+ WORD, NUMBER, PERIOD, PUNCT,
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN,
		+ ELLIPSIS, CLOSING_GROUP,
		+ QUOTE_SINGLE, QUOTE_SINGLE_CLOSING, QUOTE_SINGLE_OPENING
		+ };
		+
		+ /**
		+ * Double quotes succeeded by these {@link LexemeType}s may be closing quotes.
		+ */
		+ private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
		+ new LexemeType[]{
		+ SPACE, PUNCT, PERIOD, EQUALS,
		+ DASH_EN, DASH_EM, DASH_LINE, HYPHEN,
		+ QUOTE_SINGLE, CLOSING_GROUP, ENDING
		+ };
		+
		+ private final CircularFifoQueue<Lexeme> mQ = new CircularFifoQueue<>( 4 );
		+ private final String mText;
		+ private final Contractions mContractions;
		+ private final Consumer<Token> mConsumer;
		+
		+ public QuoteEmitter(
		+ final String text,
		+ final Contractions contractions,
		+ final Consumer<Token> consumer
		+ ) {
		+ assert text != null;
		+ assert contractions != null;
		+
		+ mText = text;
		+ mContractions = contractions;
		+ mConsumer = consumer;
		+ }
		+
		+ /**
		+ * Scans the given text document for quotation marks and passes them to the
		+ * given {@link Token} {@link Consumer}.
		+ *
		+ * @param text The prose to lex.
		+ * @param contractions List of ambiguous and unambiguous contractions.
		+ * @param consumer Receives
		+ */
		+ public static void analyze(
		+ final String text,
		+ final Contractions contractions,
		+ final Consumer<Token> consumer,
		+ final LexerFilter filter
		+ ) {
		+ final var emitter = new QuoteEmitter( text, contractions, consumer );
		+ Lexer.lex( text, emitter, filter );
		+ }
		+
		+ /**
		+ * @param lexeme the input argument
		+ */
		+ @Override
		+ public void accept( final Lexeme lexeme ) {
		+ mQ.add( lexeme );
		+
		+ if( mQ.size() == 4 ) {
		+ parse();
		+ }
		+ }
		+
		+ private void parse() {
		+ final var lex1 = mQ.get( 0 );
		+ final var lex2 = mQ.get( 1 );
		+ final var lex3 = mQ.get( 2 );
		+ final var lex4 = mQ.get( 3 );
		+
		+ // <y'all>, <Ph.D.'ll>, <20's>, <she's>
		+ if( match( WORD_PERIOD_NUMBER, QUOTE_SINGLE, WORD, ANY ) ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <'n'>, <'N'>, <'owlin'>
		+ else if(
		+ match( ANY, QUOTE_SINGLE, WORD, QUOTE_SINGLE ) &&
		+ mContractions.beganEndedUnambiguously( lex3.toString( mText ) )
		+ ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ emit( QUOTE_APOSTROPHE, lex4 );
		+ mQ.set( Lexeme.NONE, 3 );
		+ }
		+ // <2''>
		+ else if( match( NUMBER, QUOTE_SINGLE, QUOTE_SINGLE, ANY ) ) {
		+ // Force double primes to conform to the same constructor usage. This
		+ // simplifies the tokens, reduces some memory usage,
		+ final var lex = new Lexeme( PRIME_DOUBLE, lex2.began(), lex3.ended() );
		+
		+ emit( QUOTE_PRIME_DOUBLE, lex );
		+ mQ.set( Lexeme.NONE, 2 );
		+ }
		+ // <2'>
		+ else if( match( NUMBER, QUOTE_SINGLE, ANY, ANY ) ) {
		+ emit( QUOTE_PRIME_SINGLE, lex2 );
		+ }
		+ // <2">
		+ else if( match( NUMBER, QUOTE_DOUBLE, ANY, ANY ) ) {
		+ emit( QUOTE_PRIME_DOUBLE, lex2 );
		+ }
		+ // <thinkin'>
		+ else if(
		+ match( WORD, QUOTE_SINGLE, ANY, ANY ) &&
		+ mContractions.endedUnambiguously( lex1.toString( mText ) )
		+ ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <'02>
		+ else if( match( ANY, QUOTE_SINGLE, NUMBER, SPACE_PUNCT ) ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <'20s>
		+ else if(
		+ match( ANY, QUOTE_SINGLE, NUMBER, WORD ) &&
		+ "s".equalsIgnoreCase( lex4.toString( mText ) )
		+ ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <.'\n>
		+ else if( match( PUNCT_PERIOD_ELLIPSIS_DASH, QUOTE_SINGLE, ENDING, ANY ) ) {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ // <\'>
		+ else if( match( ESC_SINGLE, ANY, ANY, ANY ) ) {
		+ emit( QUOTE_STRAIGHT_SINGLE, lex1 );
		+ }
		+ // <\">
		+ else if( match( ESC_DOUBLE, ANY, ANY, ANY ) ) {
		+ emit( QUOTE_STRAIGHT_DOUBLE, lex1 );
		+
		+ // <\"'--->
		+ if( match( ESC_DOUBLE, QUOTE_SINGLE, SPACE_DASH_ENDING, ANY ) ) {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ }
		+ // <---'" >
		+ else if( match( DASHES, QUOTE_SINGLE, QUOTE_DOUBLE, SPACE_ENDING ) ) {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ // <o’-lantern>, <o' fellow>, <O'-the>
		+ else if(
		+ match( WORD, QUOTE_SINGLE, SPACE_HYPHEN, WORD ) &&
		+ "o".equalsIgnoreCase( lex1.toString( mText ) )
		+ ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <"">, <"...>, <"word>, <---"word>
		+ else if(
		+ match(
		+ LEADING_QUOTE_OPENING_DOUBLE, QUOTE_DOUBLE,
		+ LAGGING_QUOTE_OPENING_DOUBLE, ANY
		+ )
		+ ) {
		+ emit( QUOTE_OPENING_DOUBLE, lex2 );
		+ }
		+ // <..."'>, <word"'>, <?"'>, <word"?>
		+ else if(
		+ match(
		+ LEADING_QUOTE_CLOSING_DOUBLE, QUOTE_DOUBLE,
		+ LAGGING_QUOTE_CLOSING_DOUBLE, ANY
		+ )
		+ ) {
		+ emit( QUOTE_CLOSING_DOUBLE, lex2 );
		+ }
		+ // < ''E>
		+ else if( match( SPACE_SOT, QUOTE_SINGLE, QUOTE_SINGLE, WORD ) ) {
		+ // Consume both immediately to avoid the false ambiguity <'e>.
		+ emit( QUOTE_OPENING_SINGLE, lex2 );
		+ emit( QUOTE_APOSTROPHE, lex3 );
		+ mQ.set( Lexeme.NONE, 1 );
		+ mQ.set( Lexeme.NONE, 2 );
		+ }
		+ // <'...>, <'word>, <---'word>, < 'nation>
		+ else if(
		+ match(
		+ LEADING_QUOTE_OPENING_SINGLE, QUOTE_SINGLE,
		+ LAGGING_QUOTE_OPENING_SINGLE, ANY )
		+ ) {
		+ final var word = lex3.toString( mText );
		+
		+ if( mContractions.beganAmbiguously( word ) ) {
		+ emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
		+ }
		+ else if( mContractions.beganUnambiguously( word ) ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <"'"nested>
		+ else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, WORD ) ) {
		+ emit( QUOTE_OPENING_SINGLE, lex2 );
		+ }
		+ // <"'" >
		+ else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
		+ emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
		+ }
		+ // < '" >
		+ else if( match( ANY, QUOTE_SINGLE, LAGGING_QUOTE_OPENING_SINGLE, ANY ) ) {
		+ emit( QUOTE_OPENING_SINGLE, lex2 );
		+ }
		+ // Ambiguous
		+ else {
		+ emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
		+ }
		+ }
		+ // <"'--- >
		+ else if( match( QUOTE_DOUBLE, QUOTE_SINGLE, DASHES, ANY ) ) {
		+ emit( QUOTE_OPENING_SINGLE, lex2 );
		+ }
		+ // <word'">, <...'--->, <"' >
		+ else if(
		+ match(
		+ LEADING_QUOTE_CLOSING_SINGLE, QUOTE_SINGLE,
		+ LAGGING_QUOTE_CLOSING_SINGLE, ANY
		+ )
		+ ) {
		+ final var word = lex1.toString( mText );
		+
		+ if( mContractions.endedAmbiguously( word ) ) {
		+ emit( QUOTE_AMBIGUOUS_LAGGING, lex2 );
		+ }
		+ else {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ }
		+ // <word';> (contraction inferred by previous matches)
		+ else if( match( WORD, QUOTE_SINGLE, PUNCT_PERIOD, ANY ) ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ // <---'">
		+ else if( match( DASHES, QUOTE_SINGLE, QUOTE_DOUBLE, ANY ) ) {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ // <'42>, <'-3.14>
		+ else if( match( ANY, QUOTE_SINGLE, NUMBER, ANY ) ) {
		+ emit( QUOTE_OPENING_SINGLE, lex2 );
		+ }
		+ // <PRE-PARSED><'---.>
		+ else if( match( LexemeType.NONE, QUOTE_SINGLE, ANY, ANY ) ) {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ // <''Cause >
		+ else if( match( QUOTE_SINGLE, QUOTE_SINGLE, WORD, ANY ) ) {
		+ final var word = lex3.toString( mText );
		+
		+ if( mContractions.beganAmbiguously( word ) ) {
		+ emit( QUOTE_AMBIGUOUS_LEADING, lex2 );
		+ }
		+ else if( mContractions.beganUnambiguously( word ) ) {
		+ emit( QUOTE_APOSTROPHE, lex2 );
		+ }
		+ else {
		+ emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
		+ }
		+ }
		+ // <'"Trouble>
		+ else if( match( QUOTE_SINGLE, QUOTE_DOUBLE, WORD, ANY ) ) {
		+ emit( QUOTE_OPENING_DOUBLE, lex2 );
		+ }
		+ // International quotation marks.
		+ else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) {
		+ emit( QUOTE_OPENING_DOUBLE, lex2 );
		+ }
		+ else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) {
		+ emit( QUOTE_OPENING_SINGLE, lex2 );
		+ }
		+ else if( match( ANY, QUOTE_DOUBLE_CLOSING, ANY, ANY ) ) {
		+ emit( QUOTE_CLOSING_DOUBLE, lex2 );
		+ }
		+ else if( match( ANY, QUOTE_SINGLE_CLOSING, ANY, ANY ) ) {
		+ emit( QUOTE_CLOSING_SINGLE, lex2 );
		+ }
		+ // Ambiguous (no match)
		+ else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) {
		+ emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
		+ }
		+ else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) {
		+ emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 );
		+ }
		+ else if( match( ANY, ELLIPSIS, ANY, ANY ) ) {
		+ emit( QUOTE_ELLIPSIS, lex2 );
		+ }
		+ else if( match( ANY, DASH_EN, ANY, ANY ) ) {
		+ emit( QUOTE_DASH_EN, lex2 );
		+ }
		+ else if( match( ANY, DASH_EM, ANY, ANY ) ) {
		+ emit( QUOTE_DASH_EM, lex2 );
		+ }
		+ }
		+
		+ private void emit( final TokenType tokenType, final Lexeme lexeme ) {
		+ mConsumer.accept( new Token( tokenType, lexeme ) );
		+ }
		+
		+ private boolean match(
		+ final LexemeType l1,
		+ final LexemeType l2,
		+ final LexemeType l3,
		+ final LexemeType l4 ) {
		+ return mQ.get( 0 ).isType( l1 ) &&
		+ mQ.get( 1 ).isType( l2 ) &&
		+ mQ.get( 2 ).isType( l3 ) &&
		+ mQ.get( 3 ).isType( l4 );
		+ }
		+
		+ private boolean match(
		+ final LexemeType[] l1,
		+ final LexemeType l2,
		+ final LexemeType l3,
		+ final LexemeType l4 ) {
		+ return mQ.get( 0 ).isType( l1 ) &&
		+ mQ.get( 1 ).isType( l2 ) &&
		+ mQ.get( 2 ).isType( l3 ) &&
		+ mQ.get( 3 ).isType( l4 );
		+ }
		+
		+ private boolean match(
		+ final LexemeType l1,
		+ final LexemeType l2,
		+ final LexemeType[] l3,
		+ final LexemeType l4 ) {
		+ return mQ.get( 0 ).isType( l1 ) &&
		+ mQ.get( 1 ).isType( l2 ) &&
		+ mQ.get( 2 ).isType( l3 ) &&
		+ mQ.get( 3 ).isType( l4 );
		+ }
		+
		+ private boolean match(
		+ final LexemeType l1,
		+ final LexemeType l2,
		+ final LexemeType l3,
		+ final LexemeType[] l4 ) {
		+ return mQ.get( 0 ).isType( l1 ) &&
		+ mQ.get( 1 ).isType( l2 ) &&
		+ mQ.get( 2 ).isType( l3 ) &&
		+ mQ.get( 3 ).isType( l4 );
		+ }
		+
		+ private boolean match(
		+ final LexemeType[] l1,
		+ final LexemeType l2,
		+ final LexemeType[] l3,
		+ final LexemeType l4 ) {
		+ return mQ.get( 0 ).isType( l1 ) &&
		+ mQ.get( 1 ).isType( l2 ) &&
		+ mQ.get( 2 ).isType( l3 ) &&
		+ mQ.get( 3 ).isType( l4 );
		+ }
		+
		+ private boolean match(
		+ final LexemeType[] l1,
		+ final LexemeType l2,
		+ final LexemeType l3,
		+ final LexemeType[] l4 ) {
		return mQ.get( 0 ).isType( l1 ) &&
		mQ.get( 1 ).isType( l2 ) &&

src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java

		ENTITIES.put( QUOTE_PRIME_QUADRUPLE, "&qprime;" );
		ENTITIES.put( QUOTE_ELLIPSIS, "…" );
		+ ENTITIES.put( QUOTE_DASH_EN, "–" );
		+ ENTITIES.put( QUOTE_DASH_EM, "—" );
		+ ENTITIES.put( QUOTE_DASH_MINUS, "−" );
		}


		CHARS.put( QUOTE_PRIME_QUADRUPLE, "⁗" );
		CHARS.put( QUOTE_ELLIPSIS, "…" );
		+ CHARS.put( QUOTE_DASH_MINUS, "−" );
		+ CHARS.put( QUOTE_DASH_EN, "–" );
		+ CHARS.put( QUOTE_DASH_EM, "—" );
		}

src/main/java/com/whitemagicsoftware/keenquotes/parser/TokenType.java

		QUOTE_AMBIGUOUS_SINGLE( "single-ambiguous" ),
		QUOTE_ELLIPSIS,
		+ QUOTE_DASH_MINUS,
		+ QUOTE_DASH_EN,
		+ QUOTE_DASH_EM,
		NONE;

src/main/java/com/whitemagicsoftware/keenquotes/util/FastCharacterIterator.java

		*/
		public final class FastCharacterIterator {
		-
		private final String mS;
		private final int mLen;

src/test/java/com/whitemagicsoftware/keenquotes/lex/LexerTest.java

		@Test
		void test_Lexing_PunctuationMarks_EmitPunctuationMarks() {
		+ testType( "-", HYPHEN );
		+ testType( "--", DASH_EN );
		+ testType( "–", DASH_EN );
		+ testType( "---", DASH_EM );
		+ testType( "--- -- -", DASH_EM, SPACE, DASH_EN, SPACE, HYPHEN );
		+ testType( "—", DASH_EM );
		+ testType( "----", DASH_LINE );
		+ testType( "-----", DASH_LINE );
		+ testType( "------", DASH_LINE );
		+ testType( "—-—", DASH_EM, HYPHEN, DASH_EM );
		testType( "!", PUNCT );
		testType( ";", PUNCT );
		testType( "\\", PUNCT );
		testType( ".", PERIOD );
		- testType( "-", HYPHEN );
		- testType( "--", DASH );
		- testType( "---", DASH );
		- testType( "–", DASH );
		- testType( "―", DASH );
		- testType( "—", DASH );
		- testType( "—-—", DASH );
		testType( "...", ELLIPSIS );
		testType( ". .", ELLIPSIS );

		var counter = new AtomicInteger();

		- Lexer.lex( text, lexeme -> {
		- // Ignore the SOT and EOT lexemes (avoids duplication). Each test will
		- // include EOL, EOP, and EOT tokens due to the lexer's algorithm.
		- if( !lexeme.isType( SOT, EOT ) && counter.get() < elements.size() ) {
		- final var expected = elements.get( counter.getAndIncrement() );
		- final var actual = f.apply( lexeme, text );
		+ Lexer.lex(
		+ text, lexeme -> {
		+ // Ignore the SOT and EOT lexemes (avoids duplication). Each test will
		+ // include EOL, EOP, and EOT tokens due to the lexer's algorithm.
		+ if( !lexeme.isType( SOT, EOT ) && counter.get() < elements.size() ) {
		+ final var expected = elements.get( counter.getAndIncrement() );
		+ final var actual = f.apply( lexeme, text );

		- assertEquals( expected, actual );
		- }
		- }, filter );
		+ assertEquals( expected, actual );
		+ }
		+ }, filter
		+ );

		// Ensure all expected values are matched (verify end of text reached).

src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-1-pass.txt


		With--"air quotes"--and dashes.
		-With--“air quotes”--and dashes.
		+With–“air quotes”–and dashes.

		"Not all open quotes are closed...
		“Not all open quotes are closed…

		"---retroactively!"
		-“---retroactively!”
		+“—retroactively!”

		"And this" and "and this" and "and this" and "and another."
		“And this” and “and this” and “and this” and “and another.”

		"Will'll invite?" Mrs. Thorne asked;\n"because he's coming--he's at the gate."
		-“Will'll invite?” Mrs. Thorne asked;\n“because he's coming--he's at the gate.”
		+“Will'll invite?” Mrs. Thorne asked;\n“because he's coming–he's at the gate.”

		Model "T2000"

		'...even better!'
		‘…even better!’
		+
		+With-'imaginary'-dashes.
		+With-‘imaginary’-dashes.

		With--'imaginary'--dashes.
		-With--‘imaginary’--dashes.
		+With–‘imaginary’–dashes.
		+
		+With---'imaginary'---dashes.
		+With—‘imaginary’—dashes.

		'It's a beautiful day!'


		"'I'm in danger. Help? Take me to'--an address on Van Ness Avenue.
		-“‘I'm in danger. Help? Take me to’--an address on Van Ness Avenue.
		+“‘I'm in danger. Help? Take me to’–an address on Van Ness Avenue.

		"Ah," she said, "you knew! He told you--and you said 'my dear'! How could you?!"
		-“Ah,” she said, “you knew! He told you--and you said ‘my dear’! How could you?!”
		+“Ah,” she said, “you knew! He told you–and you said ‘my dear’! How could you?!”

		shouldn't drop letters, nor say "ain't" or "hain't."
		shouldn't drop letters, nor say “ain't” or “hain't.”

		"’Kearney lives on the banks of Killarney—’
		-“’Kearney lives on the banks of Killarney—’
		+“’Kearney lives on the banks of Killarney—’

		# ########################################################################


		"She said, 'Llamas'll languish, they'll--
		-“She said, ‘Llamas'll languish, they'll--
		+“She said, ‘Llamas'll languish, they'll–

		'The 70s are my favorite numbers,' she said.


		Computer says, "'It is mysteries---'"
		-Computer says, “‘It is mysteries---’”
		+Computer says, “‘It is mysteries—’”

src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-2-pass.txt


		"Jane said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
		-“Jane said, ‘'E'll be spooky, Sam's son with the jack-o'-lantern!’” said the O'Mally twins'---y'know---ghosts in unison.
		+“Jane said, ‘'E'll be spooky, Sam's son with the jack-o'-lantern!’” said the O'Mally twins'—y'know—ghosts in unison.

		''Sup, Doc?'


		"Not much o' fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin' man, used to say, says he, 'If e'er I sow my wheat wi'out brinin', I'm a Dutchman,' says he; an' that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren't goin' to bother mysen about Dutchmen. There's fools enoo, an' rogues enoo, wi'out lookin' i' books for 'em."
		-“Not much o' fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin' man, used to say, says he, ‘If e'er I sow my wheat wi'out brinin', I'm a Dutchman,’ says he; an' that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren't goin' to bother mysen about Dutchmen. There's fools enoo, an' rogues enoo, wi'out lookin' i' books for 'em.”
		+“Not much o' fellow-creaturs, I think, Miss; all I know—my old master, as war a knowin' man, used to say, says he, ‘If e'er I sow my wheat wi'out brinin', I'm a Dutchman,’ says he; an' that war as much as to say as a Dutchman war a fool, or next door. Nay, nay, I aren't goin' to bother mysen about Dutchmen. There's fools enoo, an' rogues enoo, wi'out lookin' i' books for 'em.”

		I leave proofs to those who 've seen 'em;\n\nBut this I heard her say, and can't be wrong\n\nAnd all may think which way their judgments lean 'em,\n\n''T is strange---the Hebrew noun which means "I am,"\n\nThe English always use to govern d--n.'
		-I leave proofs to those who 've seen 'em;\n\nBut this I heard her say, and can't be wrong\n\nAnd all may think which way their judgments lean 'em,\n\n‘'T is strange---the Hebrew noun which means “I am,”\n\nThe English always use to govern d--n.’
		+I leave proofs to those who 've seen 'em;\n\nBut this I heard her say, and can't be wrong\n\nAnd all may think which way their judgments lean 'em,\n\n‘'T is strange—the Hebrew noun which means “I am,”\n\nThe English always use to govern d–n.’

		''E's got a 'ittle box 'n a big 'un,' she said, 'wit' th' 'ittle 'un 'bout 2'×6". An' no, y'ain't cryin' on th' "soap box" to me no mo, y'hear. 'Cause it 'tweren't ever a spec o' fun!' I says to my frien'.


		"You 'cause---" all fifteen years' worth.
		-“You 'cause---” all fifteen years' worth.
		+“You 'cause—” all fifteen years' worth.

		"'---has a prison.'"
		-“‘---has a prison.’”
		+“‘—has a prison.’”

		"Teach 'em t'be more 'an we ever been!"

src/test/resources/com/whitemagicsoftware/keenquotes/texts/xml.txt

		<bold>'twas</bold> redeemed for the <em>cat</em>'s eye

		-<a href="https://x.org" title="X's Homepage">X11's bomb</a>
		-<a href="https://x.org" title="X's Homepage">X11's bomb</a>
		+<a href="https://dj.ca" title="X's Homepage">X11's bomb</a>
		+<a href="https://dj.ca" title="X's Homepage">X11's bomb</a>
		+
		+<a href="https://d--j.ca">D--J</a>
		+<a href="https://d--j.ca">D–J</a>

		<body><p>Test lungs' capacity; shovel's handle hit his head's electronics.</p><p>Hoped to bring him 'round.</p></body>

Delta	539 lines added, 471 lines removed, 68-line increase