Dave Jarvis' Repositories

Home Files Commits Tags Clone

Repository:

git clone https://repo.autonoma.ca/repo/keenquotes.git

M src/main/java/com/whitemagicsoftware/keenquotes/app/KeenQuotes.java

     else {
       try {
         final var filter = settings.filterXml() ? FILTER_XML : FILTER_PLAIN;
         final var c = new Curler( contractions, filter );
         final var c = new Curler(
           contractions,
           settings.filterXml() ? FILTER_XML : FILTER_PLAIN,
           settings.entities()
         );
         out.print( convert( c ) );

M src/main/java/com/whitemagicsoftware/keenquotes/app/Settings.java

   /**
    * Encode quotation marks using HTML entities.
    */
   @CommandLine.Option(
     names = {"-e", "--entities"},
     description = "Encode quotation marks using HTML entities"
+  )
   private boolean mEntities;
   /**
    * Enable the {@link XmlFilter}.
    */
    * @return {@code true} to list the contractions.
    */
   boolean displayList() {
     return mDisplayList;
+  }
   boolean displayList() { return mDisplayList; }
   /**
    * Answers whether quotation marks within XML elements are ignored.
+   *
    * @return {@code true} to honour quotation marks inside XML elements.
    */
   boolean filterXml() { return mFilterXml; }
   /**
    * Answers whether entities must be encoded using HTML entities.
+   *
    * @return {@code true} to encode quotation marks using HTML entities.
    */
   boolean entities() { return mEntities; }
   List<String> getBeganUnambiguous() {

A src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeGlyph.java

 /* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
 package com.whitemagicsoftware.keenquotes.lex;
 /**
  * Common international quotation mark symbols to allow for re-encoding when
  * exporting back to text.
  */
 public enum LexemeGlyph {
   /**
    * Any other character that's not a quotation mark.
    */
   LEX_OTHER( (char) 0 ),
   LEX_SINGLE_QUOTE( '\'' ),
   LEX_SINGLE_QUOTE_OPENING( '‘' ),
   LEX_SINGLE_QUOTE_CLOSING( '’' ),
   LEX_DOUBLE_QUOTE( '"' ),
   LEX_DOUBLE_QUOTE_OPENING( '“' ),
   LEX_DOUBLE_QUOTE_CLOSING( '”' ),
   LEX_DOUBLE_QUOTE_OPENING_LOW( '„' ),
   LEX_DOUBLE_CHEVRON_LEFT( '«' ),
   LEX_DOUBLE_CHEVRON_RIGHT( '»' ),
   LEX_SINGLE_CHEVRON_LEFT( '‹' ),
   LEX_SINGLE_CHEVRON_RIGHT( '›' );
   private final char mGlyph;
   LexemeGlyph( final char glyph ) {
     mGlyph = glyph;
+  }
   /**
    * Answers whether the given character matches the internal glyph.
+   *
    * @param glyph The character to compare against the internal character.
    * @return {@code true} iff the characters are equal.
    */
   public boolean equals( final char glyph ) {
     return mGlyph == glyph;
+  }
   public String text() {
     return Character.toString( mGlyph );
+  }
+}

M src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeType.java

 package com.whitemagicsoftware.keenquotes.lex;
 import static com.whitemagicsoftware.keenquotes.lex.LexemeGlyph.*;
 /**
  * Represents the type of {@link Lexeme} parsed by the {@link Lexer}.
  */
 @SuppressWarnings( "SpellCheckingInspection" )
 public enum LexemeType {
   QUOTE_SINGLE,
   QUOTE_SINGLE_OPENING,
   QUOTE_SINGLE_CLOSING,
   QUOTE_DOUBLE,
   QUOTE_DOUBLE_OPENING,
   QUOTE_DOUBLE_CLOSING,
   ESC_SINGLE,
   ESC_DOUBLE,
   SOT,
   EOL,
   EOP,
   EOT,
   SPACE,
   WORD,
   NUMBER,
   PUNCT,
   OPENING_GROUP,
   CLOSING_GROUP,
   HYPHEN,
   DASH,
   EQUALS,
   PERIOD,
   ELLIPSIS,
   ENDING,
   ANY,
   NONE
 public final class LexemeType {
   // @formatter:off
   public static final LexemeType QUOTE_SINGLE = new LexemeType( LEX_SINGLE_QUOTE );
   public static final LexemeType QUOTE_SINGLE_OPENING = new LexemeType( LEX_SINGLE_QUOTE_OPENING );
   public static final LexemeType QUOTE_SINGLE_CLOSING = new LexemeType( LEX_SINGLE_QUOTE_CLOSING );
   public static final LexemeType QUOTE_DOUBLE = new LexemeType( LEX_DOUBLE_QUOTE );
   public static final LexemeType QUOTE_DOUBLE_OPENING = new LexemeType( LEX_DOUBLE_QUOTE_OPENING );
   public static final LexemeType QUOTE_DOUBLE_CLOSING = new LexemeType( LEX_DOUBLE_QUOTE_CLOSING );
   public static final LexemeType ESC_SINGLE = new LexemeType();
   public static final LexemeType ESC_DOUBLE = new LexemeType();
   public static final LexemeType PRIME_DOUBLE = new LexemeType();
   public static final LexemeType SOT = new LexemeType();
   public static final LexemeType EOL = new LexemeType();
   public static final LexemeType EOP = new LexemeType();
   public static final LexemeType EOT = new LexemeType();
   public static final LexemeType SPACE = new LexemeType();
   public static final LexemeType WORD = new LexemeType();
   public static final LexemeType NUMBER = new LexemeType();
   public static final LexemeType PUNCT = new LexemeType();
   public static final LexemeType OPENING_GROUP = new LexemeType();
   public static final LexemeType CLOSING_GROUP = new LexemeType();
   public static final LexemeType HYPHEN = new LexemeType();
   public static final LexemeType DASH = new LexemeType();
   public static final LexemeType EQUALS = new LexemeType();
   public static final LexemeType PERIOD = new LexemeType();
   public static final LexemeType ELLIPSIS = new LexemeType();
   public static final LexemeType ENDING = new LexemeType();
   public static final LexemeType ANY = new LexemeType();
   public static final LexemeType NONE = new LexemeType();
   // @formatter:on
   private LexemeGlyph mGlyph;
   /**
    * Constructs an instance of {@link LexemeType} using
    * {@link LexemeGlyph#LEX_OTHER} to indicate that this type of lexeme isn't
    * a quotation mark glyph.
    */
   public LexemeType() {
     this( LEX_OTHER );
+  }
   /**
    * Constructs an instance of {@link LexemeType} using a particular glyph.
+   *
    * @param glyph Typically represents an internationalized quotation mark
    *              character.
    */
   public LexemeType( final LexemeGlyph glyph ) {
     setGlyph( glyph );
+  }
   /**
    * Changes the type of glyph associated with this type of lexeme. This
    * is useful for passing along different glyphs represented by the same
    * lexeme (such as different opening quotation marks).
+   *
    * @param glyph The new {@link LexemeGlyph} to associate, often an
    *              internationalized quotation mark.
    * @return {@code this} to allow chaining.
    */
   public LexemeType with( final LexemeGlyph glyph ) {
     setGlyph( glyph );
     return this;
+  }
   /**
    * Provides the glyph used to identify international quotation marks.
+   *
    * @return The glyph set either at construction time or after calling
    * {@link #with(LexemeGlyph)}.
    */
   public LexemeGlyph glyph() {
     return mGlyph;
+  }
   private void setGlyph( final LexemeGlyph glyph ) {
     mGlyph = glyph;
+  }
   /**
    * Provides useful debugging information.
+   *
    * @return The class name and encodable glyph.
    */
   @Override
   public String toString() {
     return getClass().getSimpleName() +
       '[' + glyph() + ']';
+  }
+}

M src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexer.java

 import java.util.function.Consumer;
 import static com.whitemagicsoftware.keenquotes.lex.LexemeGlyph.*;
 import static com.whitemagicsoftware.keenquotes.lex.LexemeType.*;
 import static java.lang.Character.isWhitespace;
       // Allow filters to skip character sequences (such as XML tags). This
       // must allow back-to-back filtering, hence the loop.
       while( filter.test( i ) );
       while( filter.test( i ) ) ;
       final var index = i.index();
         token = CLOSING_GROUP;
+      }
       else if( curr == '“' ) {
         token = QUOTE_DOUBLE_OPENING;
       else if( LEX_DOUBLE_QUOTE_OPENING.equals( curr ) ) {
         token = QUOTE_DOUBLE_OPENING.with( LEX_DOUBLE_QUOTE_OPENING );
+      }
       else if( curr == '”' ) {
         token = QUOTE_DOUBLE_CLOSING;
       else if( LEX_DOUBLE_QUOTE_CLOSING.equals( curr ) ) {
         token = QUOTE_DOUBLE_CLOSING.with( LEX_DOUBLE_QUOTE_CLOSING );
+      }
       else if( curr == '‘' ) {
         token = QUOTE_SINGLE_OPENING;
       else if( LEX_SINGLE_QUOTE_OPENING.equals( curr ) ) {
         token = QUOTE_SINGLE_OPENING.with( LEX_SINGLE_QUOTE_OPENING );
+      }
       else if( curr == '’' ) {
         token = QUOTE_SINGLE_CLOSING;
       else if( LEX_SINGLE_QUOTE_CLOSING.equals( curr ) ) {
         token = QUOTE_SINGLE_CLOSING.with( LEX_SINGLE_QUOTE_CLOSING );
+      }
       else if( curr == '\\' ) {
       else if( curr == '=' ) {
         token = EQUALS;
+      }
       else if( curr == ',' && i.peek() == ',' ) {
         i.skip( next -> next == ',' );
         token = QUOTE_DOUBLE_OPENING.with( LEX_DOUBLE_QUOTE_OPENING_LOW );
+      }
       else if( LEX_DOUBLE_QUOTE_OPENING_LOW.equals( curr ) ) {
         token = QUOTE_DOUBLE_OPENING.with( LEX_DOUBLE_QUOTE_OPENING_LOW );
+      }
       else if( LEX_SINGLE_CHEVRON_LEFT.equals( curr ) ) {
         token = QUOTE_SINGLE_OPENING.with( LEX_SINGLE_CHEVRON_LEFT );
+      }
       else if( LEX_DOUBLE_CHEVRON_LEFT.equals( curr ) ) {
         token = QUOTE_DOUBLE_OPENING.with( LEX_DOUBLE_CHEVRON_LEFT );
+      }
       else if( LEX_SINGLE_CHEVRON_RIGHT.equals( curr ) ) {
         token = QUOTE_SINGLE_CLOSING.with( LEX_SINGLE_CHEVRON_RIGHT );
+      }
       else if( LEX_DOUBLE_CHEVRON_RIGHT.equals( curr ) ) {
         token = QUOTE_DOUBLE_CLOSING.with( LEX_DOUBLE_CHEVRON_RIGHT );
+      }
       else if( curr == DONE ) {

M src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java

 import com.whitemagicsoftware.keenquotes.lex.FilterType;
 import com.whitemagicsoftware.keenquotes.lex.LexerFilter;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
 import static java.util.Map.entry;
 import static java.util.Map.ofEntries;
 /**
  * Resolves straight quotes into curly quotes throughout a document.
  */
 @SuppressWarnings( "unused" )
 public class Curler implements Function<String, String> {
   /**
    * Provides an entity-based set of {@link Token} replacements.
    */
   public static final Map<TokenType, String> ENTITIES = ofEntries(
     entry( QUOTE_OPENING_SINGLE, "&lsquo;" ),
     entry( QUOTE_CLOSING_SINGLE, "&rsquo;" ),
     entry( QUOTE_OPENING_DOUBLE, "&ldquo;" ),
     entry( QUOTE_CLOSING_DOUBLE, "&rdquo;" ),
     entry( QUOTE_STRAIGHT_SINGLE, "'" ),
     entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
     entry( QUOTE_APOSTROPHE, "&apos;" ),
     entry( QUOTE_PRIME_SINGLE, "&prime;" ),
     entry( QUOTE_PRIME_DOUBLE, "&Prime;" ),
     entry( QUOTE_PRIME_TRIPLE, "&tprime;" ),
     entry( QUOTE_PRIME_QUADRUPLE, "&qprime;" )
   );
   /**
    * Provides a character-based set of {@link Token} replacements.
    */
   public static final Map<TokenType, String> CHARS = ofEntries(
     entry( QUOTE_OPENING_SINGLE, "‘" ),
     entry( QUOTE_CLOSING_SINGLE, "’" ),
     entry( QUOTE_OPENING_DOUBLE, "“" ),
     entry( QUOTE_CLOSING_DOUBLE, "”" ),
     entry( QUOTE_STRAIGHT_SINGLE, "'" ),
     entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
     entry( QUOTE_APOSTROPHE, "’" ),
     entry( QUOTE_PRIME_SINGLE, "′" ),
     entry( QUOTE_PRIME_DOUBLE, "″" ),
     entry( QUOTE_PRIME_TRIPLE, "‴" ),
     entry( QUOTE_PRIME_QUADRUPLE, "⁗" )
   );
   private final Contractions mContractions;
   private final Map<TokenType, String> mReplacements;
   private final FilterType mFilterType;
   /**
    * Maps quotes to HTML entities.
+   *
    * @param c          Contractions listings.
    * @param parserType Creates a parser based on document content structure.
    */
   public Curler( final Contractions c, final FilterType parserType ) {
     this( c, ENTITIES, parserType );
+  }
   private final LexerFilter mFilter;
   private final boolean mEntities;
   /**
    * Maps quotes to curled character equivalents.
+   *
    * @param c            Contractions listings.
    * @param replacements Map of recognized quotes to output types (entity or
    *                     Unicode character).
    * @param c        Contractions listings.
    * @param entities {@code true} to convert quotation marks to HTML entities.
    */
   public Curler(
     final Contractions c,
     final Map<TokenType, String> replacements,
     final FilterType parserType
     final FilterType filterType,
     final boolean entities
   ) {
     assert c != null;
     mContractions = c;
     mReplacements = replacements;
     mFilterType = parserType;
     mEntities = entities;
     mFilter = filterType.filter();
+  }
       text,
       mContractions,
       replace( output, offset, mReplacements ),
       mFilterType.filter()
       replace( output, offset, mEntities ),
       mFilter
     );
     return output.toString();
+  }
   /**
    * Replaces non-ambiguous tokens with their equivalent string representation.
+   *
    * @param output       Continuously updated result document.
    * @param offset       Accumulating index where {@link Token} is replaced.
    * @param replacements Map of {@link TokenType}s to replacement strings.
    * @param output   Continuously updated result document.
    * @param offset   Accumulating index where {@link Token} is replaced.
    * @param entities {@code true} to convert quotation marks to HTML entities.
    * @return Instructions to replace a {@link Token} in the result document.
    */
   public static Consumer<Token> replace(
     final StringBuilder output,
     final AtomicInteger offset,
     final Map<TokenType, String> replacements
     final boolean entities
   ) {
     return token -> {
       if( !token.isAmbiguous() ) {
         final var entity = token.toString( replacements );
         final var text = token.toString( entities );
         output.replace(
           token.began() + offset.get(),
           token.ended() + offset.get(),
           entity
           text
         );
         offset.addAndGet( entity.length() - (token.ended() - token.began()) );
         offset.addAndGet( text.length() - (token.ended() - token.began()) );
+      }
     };

M src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java

     // <2''>
     else if( match( NUMBER, QUOTE_SINGLE, QUOTE_SINGLE, ANY ) ) {
       emit( QUOTE_PRIME_DOUBLE, lex2.began(), lex3.ended() );
       // Force double primes to conform to the same constructor usage. This
       // simplifies the tokens, reduces some memory usage,
       final var lex = new Lexeme( PRIME_DOUBLE, lex2.began(), lex3.ended() );
       emit( QUOTE_PRIME_DOUBLE, lex );
       mQ.set( Lexeme.NONE, 2 );
+    }
         emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
+      }
+    }
     // <'"Trouble>
     else if( match( QUOTE_SINGLE, QUOTE_DOUBLE, WORD, ANY ) ) {
       emit( QUOTE_OPENING_DOUBLE, lex2 );
+    }
     else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) {
       emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 );
+    }
     // International opening double quotation mark.
     else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) {
       emit( QUOTE_OPENING_DOUBLE, lex2 );
+    }
     // International opening single quotation mark.
     else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) {
       emit( QUOTE_OPENING_SINGLE, lex2 );
+    }
     // International double closing quotation mark.
     else if( match( ANY, ANY, ANY, QUOTE_DOUBLE_CLOSING ) ) {
       emit( QUOTE_CLOSING_DOUBLE, lex4 );
+    }
     // International single closing quotation mark.
     else if( match( ANY, ANY, ANY, QUOTE_SINGLE_CLOSING ) ) {
       emit( QUOTE_CLOSING_SINGLE, lex4 );
+    }
     // Ambiguous (no match)
     else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) {
       emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
+    }
+  }
   private void emit( final TokenType tokenType, final Lexeme lexeme ) {
     mConsumer.accept( new Token( tokenType, lexeme ) );
+  }
   private void emit(
     final TokenType tokenType,
     final int began,
     final int ended ) {
     mConsumer.accept( new Token( tokenType, began, ended ) );
+  }

M src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java

 import com.whitemagicsoftware.keenquotes.lex.Lexeme;
 import com.whitemagicsoftware.keenquotes.lex.LexemeGlyph;
 import java.util.Map;
 import static com.whitemagicsoftware.keenquotes.lex.LexemeGlyph.*;
 import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
 import static java.util.Map.entry;
 import static java.util.Map.ofEntries;
 /**
  * Represents a high-level token read from a text document.
  */
 final class Token implements Comparable<Token>, Stem {
   /**
    * Provides an entity-based set of {@link Token} replacements.
    */
   private static final Map<TokenType, String> ENTITIES = ofEntries(
     entry( QUOTE_OPENING_SINGLE, "&lsquo;" ),
     entry( QUOTE_CLOSING_SINGLE, "&rsquo;" ),
     entry( QUOTE_OPENING_DOUBLE, "&ldquo;" ),
     entry( QUOTE_CLOSING_DOUBLE, "&rdquo;" ),
     entry( QUOTE_STRAIGHT_SINGLE, "'" ),
     entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
     entry( QUOTE_APOSTROPHE, "&apos;" ),
     entry( QUOTE_PRIME_SINGLE, "&prime;" ),
     entry( QUOTE_PRIME_DOUBLE, "&Prime;" ),
     entry( QUOTE_PRIME_TRIPLE, "&tprime;" ),
     entry( QUOTE_PRIME_QUADRUPLE, "&qprime;" )
   );
   /**
    * Provides a character-based set of {@link Token} replacements.
    */
   private static final Map<TokenType, String> CHARS = ofEntries(
     entry( QUOTE_OPENING_SINGLE, "‘" ),
     entry( QUOTE_CLOSING_SINGLE, "’" ),
     entry( QUOTE_OPENING_DOUBLE, "“" ),
     entry( QUOTE_CLOSING_DOUBLE, "”" ),
     entry( QUOTE_STRAIGHT_SINGLE, "'" ),
     entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
     entry( QUOTE_APOSTROPHE, "’" ),
     entry( QUOTE_PRIME_SINGLE, "′" ),
     entry( QUOTE_PRIME_DOUBLE, "″" ),
     entry( QUOTE_PRIME_TRIPLE, "‴" ),
     entry( QUOTE_PRIME_QUADRUPLE, "⁗" )
   );
   /**
    * Glyphs not found in the table will use the document's glyph.
    */
   private static final Map<LexemeGlyph, String> I18N_ENTITIES = ofEntries(
     entry( LEX_DOUBLE_QUOTE_OPENING_LOW, "&#8222;" ),
     entry( LEX_DOUBLE_CHEVRON_LEFT, "&laquo;" ),
     entry( LEX_DOUBLE_CHEVRON_RIGHT, "&raquo;" ),
     entry( LEX_SINGLE_CHEVRON_LEFT, "&lsaquo;" ),
     entry( LEX_SINGLE_CHEVRON_RIGHT, "&rsaquo;" )
   );
   /**
    * Denotes that the token does not represent a value in the parsed document.
    */
   public static final Token NONE = new Token( TokenType.NONE, Lexeme.NONE );
   private TokenType mTokenType;
   private final int mBegan;
   private final int mEnded;
   private final Lexeme mLexeme;
   /**
    * Convenience constructor to create a token that uses the lexeme's
    * beginning and ending offsets to represent a complete token.
+   *
    * @param type   The type of {@link Token} to create.
    * @param lexeme Container for beginning and ending text offsets.
    */
   Token( final TokenType type, final Lexeme lexeme ) {
     this( type, lexeme.began(), lexeme.ended() );
+  }
   /**
    * This constructor can be used to create tokens that span more than a
    * single character. Almost all tokens represent a single character, only
    * the double-prime sequence ({@code ''}) is more than one character.
+   *
    * @param tokenType The type of {@link Token} to create.
    * @param began     Beginning offset into text where token is found.
    * @param ended     Ending offset into text where token is found.
    * @param lexeme    Container for text offsets and i18n glyphs.
    */
   Token( final TokenType tokenType, final int began, final int ended ) {
   Token( final TokenType tokenType, final Lexeme lexeme ) {
     assert tokenType != null;
     assert began >= 0;
     assert ended >= began;
     assert lexeme.began() >= 0;
     assert lexeme.ended() >= lexeme.began();
     mTokenType = tokenType;
     mBegan = began;
     mEnded = ended;
     mLexeme = lexeme;
+  }
     assert token != NONE;
     return mEnded <= token.mBegan;
     return mLexeme.ended() <= token.began();
+  }
     assert token != NONE;
     return mBegan > token.mEnded;
     return mLexeme.began() > token.ended();
+  }
   int began() {
     return mBegan;
     return mLexeme.began();
+  }
   int ended() {
     return mEnded;
     return mLexeme.ended();
+  }
   @Override
   public int compareTo( final Token that ) {
     return this.mBegan - that.mBegan;
     return this.began() - that.began();
+  }
+  }
   public String toString( final Map<TokenType, String> entities ) {
     return entities.get( getType() );
   /**
    * Converts this token to its string representation, which will either be
    * an HTML entity or a character.
+   *
    * @param entities {@code true} to convert quotation marks to HTML entities.
    * @return A plain quotation mark character or an HTML entity.
    */
   public String toString( final boolean entities ) {
     final var glyph = mLexeme.getType().glyph();
     return entities
       ? I18N_ENTITIES.getOrDefault( glyph, ENTITIES.get( getType() ) )
       : CHARS.getOrDefault( getType(), glyph.text() );
+  }
   @Override
   public String toString() {
     return getClass().getSimpleName() + '[' +
       "mType=" + mTokenType +
       ", mBegan=" + mBegan +
       ", mEnded=" + mEnded +
       "mType=" + getType() +
       ", mBegan=" + began() +
       ", mEnded=" + ended() +
       ']';
+  }

M src/test/java/com/whitemagicsoftware/keenquotes/lex/LexerTest.java

   @Test
   void test_Lexing_Quotes_EmitQuotes() {
     testType( "'", QUOTE_SINGLE );
     testType( "\"", QUOTE_DOUBLE );
     testType( "‘", QUOTE_SINGLE_OPENING );
     testType( "‹", QUOTE_SINGLE_OPENING );
     testType( "’", QUOTE_SINGLE_CLOSING );
     testType( "›", QUOTE_SINGLE_CLOSING );
     testType( "'", QUOTE_SINGLE );
     testType( "“", QUOTE_DOUBLE_OPENING );
     testType( "„", QUOTE_DOUBLE_OPENING );
     testType( "«", QUOTE_DOUBLE_OPENING );
     testType( ",,", QUOTE_DOUBLE_OPENING );
     testType( "\"", QUOTE_DOUBLE );
     testType( "”", QUOTE_DOUBLE_CLOSING );
     testType( "»", QUOTE_DOUBLE_CLOSING );
     testType( "3 o'clock", NUMBER, SPACE, WORD, QUOTE_SINGLE, WORD );
+  }

M src/test/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolverTest.java

 import java.util.concurrent.atomic.AtomicInteger;
 import static com.whitemagicsoftware.keenquotes.parser.Curler.ENTITIES;
 import static com.whitemagicsoftware.keenquotes.parser.Curler.replace;
 import static com.whitemagicsoftware.keenquotes.parser.Curler.*;
 import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+  }
   @Test
   @Disabled
   @SuppressWarnings( "unused" )
   void test_Resolve_InvalidGrammar_AmbiguousRemain() throws IOException {
     test( "invalid-grammar.txt" );
         input,
         CONTRACTIONS,
         replace( output, offset, ENTITIES ),
         replace( output, offset, true ),
         filter -> false
       );

M src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java

  @Test
  public void test_Parse_UncurledQuotes1_CurlyQuotes() throws IOException {
    testCurler( createCurler( FILTER_PLAIN ), "unambiguous-1-pass.txt" );
    testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-1-pass.txt" );
  }

  @Test
  public void test_Parse_UncurledQuotes2_CurlyQuotes() throws IOException {
    testCurler( createCurler( FILTER_PLAIN ), "unambiguous-2-pass.txt" );
    testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-2-pass.txt" );
  }

  @Test
  @Disabled
  @SuppressWarnings( {"unused", "JUnit3StyleTestMethodInJUnit4Class"} )
  public void test_Parse_AmbiguousQuotes_PartiallyCurled() throws IOException {
    testCurler( createCurler( FILTER_PLAIN ), "ambiguous-n-pass.txt" );
    testCurler( createCurler( FILTER_PLAIN, false ), "ambiguous-n-pass.txt" );
  }

  @Test
  public void test_Parse_UncurledQuotesXml_CurlyQuotes() throws IOException {
    testCurler( createCurler( FILTER_XML ), "xml.txt" );
    testCurler( createCurler( FILTER_XML, true ), "xml.txt" );
  }

  @Test
  public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException {
    testCurler( createCurler( FILTER_PLAIN, true ), "i18n.txt" );
  }


    }

    final var curler = createCurler( FILTER_XML );
    final var curler = createCurler( FILTER_XML, true );
    System.out.println( curler.apply( sb.toString() ) );
  }

  }

  private Function<String, String> createCurler( final FilterType parserType ) {
    return new Curler( new Contractions.Builder().build(), parserType );
  private Function<String, String> createCurler(
    final FilterType filterType,
    final boolean entities ) {
    return new Curler( createContractions(), filterType, entities );
  }

  private Contractions createContractions() {
    return new Contractions.Builder().build();
  }
}

M src/test/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitterTest.java

 import java.util.concurrent.atomic.AtomicInteger;
 import static com.whitemagicsoftware.keenquotes.parser.Curler.ENTITIES;
 import static com.whitemagicsoftware.keenquotes.parser.Curler.replace;
 import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 class QuoteEmitterTest {
   private final Contractions CONTRACTIONS = new Contractions.Builder().build();
   @Test
   void test_Emit_MultipleInputs_QuotesEmitted() throws IOException {
     final var couplets = readPairs(
       "unambiguous-1-pass.txt" );
     final var couplets = readPairs( "unambiguous-1-pass.txt" );
     couplets.forEach( couplet -> {
         input,
         CONTRACTIONS,
         replace( output, offset, ENTITIES ),
         replace( output, offset, true ),
         filter -> false
       );

A src/test/resources/com/whitemagicsoftware/keenquotes/texts/i18n.txt

 # ########################################################################
 # French
 # ########################################################################
 «Ce n'est pas la mer à boire,» il me dit.
 &laquo;Ce n&apos;est pas la mer à boire,&raquo; il me dit.
 Dit il, ‹Ce n'est pas la mer à boire?›
 Dit il, &lsaquo;Ce n&apos;est pas la mer à boire?&rsaquo;
 # ########################################################################
 # Dutch
 # ########################################################################
 ,,Dit is een citaat," zei hij.
 &#8222;Dit is een citaat,&rdquo; zei hij.
 ,,Ik heb twee opa's en twee oma's," zei het meisje,„hoeveel heb jij er?"
 &#8222;Ik heb twee opa&apos;s en twee oma&apos;s,&rdquo; zei het meisje,&#8222;hoeveel heb jij er?&rdquo;
 "Zeg, wat betekent 'quod non' eigenlijk?", vroeg Nynke.
 &ldquo;Zeg, wat betekent &lsquo;quod non&rsquo; eigenlijk?&rdquo;, vroeg Nynke.
 "Wat zijn 'zebra's'?", vroeg de jongen.
 &ldquo;Wat zijn &lsquo;zebra&apos;s&rsquo;?&rdquo;, vroeg de jongen.
 "'s Morgens gaat het regenen in 's-Gravenhage," zei de weervrouw.
 &ldquo;&apos;s Morgens gaat het regenen in &apos;s-Gravenhage,&rdquo; zei de weervrouw.
 De voorzitter zei: 'Gelukkig nieuwjaar!'
 De voorzitter zei: &lsquo;Gelukkig nieuwjaar!&rsquo;
 'Wat een gedoe, al die baby'tjes hun lolly'tjes geven,' zuchtte de oppas, 'en het is nog ongezond ook!'
 &lsquo;Wat een gedoe, al die baby&apos;tjes hun lolly&apos;tjes geven,&rsquo; zuchtte de oppas, &lsquo;en het is nog ongezond ook!&rsquo;
 "In '84," zei hij terwijl hij een hand door z'n haar haalde, "heb ik Alice's zus een kus gegeven op d'r wangen."
 &ldquo;In &apos;84,&rdquo; zei hij terwijl hij een hand door z&apos;n haar haalde, &ldquo;heb ik Alice&apos;s zus een kus gegeven op d&apos;r wangen.&rdquo;

M src/test/resources/com/whitemagicsoftware/keenquotes/texts/invalid-grammar.txt

 # """wtf"""
 # &ldquo;&ldquo;&ldquo;wtf&rdquo;&rdquo;&rdquo;
 """insane"""
 &ldquo;&rdquo;&ldquo;insane&rdquo;&ldquo;&rdquo;
 # '''wtf'''
 # &lsquo;&lsquo;&lsquo;wtf&rsquo;&rsquo;&rsquo;
 '''wtf'''
 &lsquo;&lsquo;&lsquo;wtf&rsquo;&rsquo;&rsquo;

M src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-1-pass.txt

 "’Kearney lives on the banks of Killarney—’
 &ldquo;’Kearney lives on the banks of Killarney—’
 &ldquo;’Kearney lives on the banks of Killarney—&rsquo;
 # ########################################################################

29	29	@Test
30	30	public void test_Parse_UncurledQuotes1_CurlyQuotes() throws IOException {
31		testCurler( createCurler( FILTER_PLAIN ), "unambiguous-1-pass.txt" );
	31	testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-1-pass.txt" );
32	32	}
33	33
34	34	@Test
35	35	public void test_Parse_UncurledQuotes2_CurlyQuotes() throws IOException {
36		testCurler( createCurler( FILTER_PLAIN ), "unambiguous-2-pass.txt" );
	36	testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-2-pass.txt" );
37	37	}
38	38
39		@Test
	39	@Disabled
	40	@SuppressWarnings( {"unused", "JUnit3StyleTestMethodInJUnit4Class"} )
40	41	public void test_Parse_AmbiguousQuotes_PartiallyCurled() throws IOException {
41		testCurler( createCurler( FILTER_PLAIN ), "ambiguous-n-pass.txt" );
	42	testCurler( createCurler( FILTER_PLAIN, false ), "ambiguous-n-pass.txt" );
42	43	}
43	44
44	45	@Test
45	46	public void test_Parse_UncurledQuotesXml_CurlyQuotes() throws IOException {
46		testCurler( createCurler( FILTER_XML ), "xml.txt" );
	47	testCurler( createCurler( FILTER_XML, true ), "xml.txt" );
	48	}
	49
	50	@Test
	51	public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException {
	52	testCurler( createCurler( FILTER_PLAIN, true ), "i18n.txt" );
47	53	}
48	54
...
68	74	}
69	75
70		final var curler = createCurler( FILTER_XML );
	76	final var curler = createCurler( FILTER_XML, true );
71	77	System.out.println( curler.apply( sb.toString() ) );
72	78	}
...
95	101	}
96	102
97		private Function<String, String> createCurler( final FilterType parserType ) {
98		return new Curler( new Contractions.Builder().build(), parserType );
	103	private Function<String, String> createCurler(
	104	final FilterType filterType,
	105	final boolean entities ) {
	106	return new Curler( createContractions(), filterType, entities );
	107	}
	108
	109	private Contractions createContractions() {
	110	return new Contractions.Builder().build();
99	111	}
100	112	}