Dave Jarvis' Repositories

Home Files Commits Tags Clone

Repository:

git clone https://repo.autonoma.ca/repo/keenquotes.git

M src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java

 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
     private final Set<String> mEndedAmbiguous = new HashSet<>();
     public void withBeganUnambiguous( final Set<String> words ) {
     public void withBeganUnambiguous( final List<String> words ) {
       mBeganUnambiguous.addAll( words );
+    }
     public void withEndedUnambiguous( final Set<String> words ) {
     public void withEndedUnambiguous( final List<String> words ) {
       mEndedUnambiguous.addAll( words );
+    }
     public void withBeganAmbiguous( final Set<String> words ) {
     public void withBeganAmbiguous( final List<String> words ) {
       mBeganAmbiguous.addAll( words );
+    }
     public void withEndedAmbiguous( final Set<String> words ) {
     public void withEndedAmbiguous( final List<String> words ) {
       mEndedAmbiguous.addAll( words );
+    }
      */
     public Contractions build() {
       mBeganUnambiguous.addAll( from( mBeganUnambiguous, BEGAN_UNAMBIGUOUS ) );
       mEndedUnambiguous.addAll( from( mEndedUnambiguous, ENDED_UNAMBIGUOUS ) );
       mBeganAmbiguous.addAll( from( mBeganAmbiguous, BEGAN_AMBIGUOUS ) );
       mEndedAmbiguous.addAll( from( mEndedAmbiguous, ENDED_AMBIGUOUS ) );
       mBeganUnambiguous.addAll( BEGAN_UNAMBIGUOUS );
       mEndedUnambiguous.addAll( ENDED_UNAMBIGUOUS );
       mBeganAmbiguous.addAll( BEGAN_AMBIGUOUS );
       mEndedAmbiguous.addAll( ENDED_AMBIGUOUS );
       return new Contractions( this );
     "neath",
     "nother",
     "nuff",
     "onna",
     "onna'",

M src/main/java/com/whitemagicsoftware/keenquotes/Converter.java

 import java.util.Map;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import static com.whitemagicsoftware.keenquotes.TokenType.*;
 import static java.util.Collections.sort;
 /**
  * Responsible for converting curly quotes to HTML entities throughout a
  * text string.
  */
 public class Converter {
 public class Converter implements Function<String, String> {
   private static final Map<TokenType, String> REPLACEMENTS = Map.of(
     QUOTE_OPENING_SINGLE, "&lsquo;",
     QUOTE_PRIME_DOUBLE, "&Prime;"
   );
   private final Consumer<Lexeme> mUnresolved;
   private final Contractions mContractions;
   public Converter( final Consumer<Lexeme> unresolved ) {
     this( unresolved, new Contractions.Builder().build() );
+  }
   public Converter( final Consumer<Lexeme> unresolved, final Contractions c ) {
     mUnresolved = unresolved;
     mContractions = c;
+  }
   /**
    * Converts straight quotes to curly quotes and primes. Any quotation marks
    * that cannot be converted are passed to the {@link Consumer}.
    * that cannot be converted are passed to the {@link Consumer}. This method
    * is re-entrant, but not tested to be thread-safe.
+   *
    * @param text       The text to parse.
    * @param unresolved Recipient for ambiguous {@link Lexeme}s.
    * @param text The text to parse.
    * @return The given text string with as many straight quotes converted to
    * curly quotes as is feasible.
    */
   public static String convert(
     final String text, final Consumer<Lexeme> unresolved ) {
     final var parser = new Parser( text );
   @Override
   public String apply( final String text ) {
     final var parser = new Parser( text, mContractions );
     final var tokens = new ArrayList<Token>();
     // Parse the tokens and consume all unresolved lexemes.
     parser.parse( tokens::add, unresolved );
     parser.parse( tokens::add, mUnresolved );
     // The parser may emit tokens in any order.

M src/main/java/com/whitemagicsoftware/keenquotes/KeenQuotes.java

   public void run() {
     if( getSettings().displayList() ) {
       displayList();
     final var settings = getSettings();
     final var contractions = createContractions( settings );
     if( settings.displayList() ) {
       System.out.println( contractions.toString() );
+    }
     else {
       convert();
       convert( new Converter( System.err::println, contractions ) );
+    }
+  }
   private void displayList() {
     System.out.println( new Contractions.Builder().build().toString() );
   private Contractions createContractions( final Settings settings ) {
     final var builder = new Contractions.Builder();
     builder.withBeganUnambiguous( settings.getBeganUnambiguous() );
     builder.withEndedUnambiguous( settings.getEndedUnambiguous() );
     builder.withBeganAmbiguous( settings.getBeganAmbiguous() );
     builder.withEndedAmbiguous( settings.getEndedAmbiguous() );
     return builder.build();
+  }
   private void convert() {
     final StringBuilder sb = new StringBuilder();
   private void convert( final Converter converter ) {
     final var sb = new StringBuilder();
     try( final BufferedReader reader = open( System.in ) ) {
     try( final var reader = open( System.in ) ) {
       String line;
       final var sep = System.lineSeparator();
       while( (line = reader.readLine()) != null ) {
         sb.append( line );
         sb.append( sep );
+      }
       System.out.println(
         Converter.convert( sb.toString(), System.err::println )
       );
       System.out.println( converter.apply( sb.toString() ) );
     } catch( final Exception ex ) {
       ex.printStackTrace( System.err );
+  }
   /**
    * Main application entry point.
+   *
    * @param args Command-line arguments.
    */
   public static void main( final String[] args ) {
     final var app = new KeenQuotes();

M src/main/java/com/whitemagicsoftware/keenquotes/Lexer.java

         lexeme = createLexeme( QUOTE_DOUBLE, began, i.getIndex() );
+      }
       else if( curr == '‘') {
       else if( curr == '‘' ) {
         lexeme = createLexeme( QUOTE_SINGLE_OPENING, began, i.getIndex() );
+      }
       else if( curr == '’') {
       else if( curr == '’' ) {
         lexeme = createLexeme( QUOTE_SINGLE_CLOSING, began, i.getIndex() );
+      }
       else if( curr == '-' && peek( i ) == '-' || curr == '—' ) {
         slurp( i, ( next, ci ) -> next == '-' || next == '—' );
         lexeme = createLexeme( DASH, began, i.getIndex() );
+      }
       else if( isDigit( curr ) || isNumeric( curr ) && isDigit( peek( i ) ) ) {
         lexeme = createLexeme( isWord ? WORD : NUMBER, began, i.getIndex() );
+      }
       else if( curr == '-' ) {
       else if( curr == '-' && peek( i ) != '-' ) {
         lexeme = createLexeme( HYPHEN, began, i.getIndex() );
+      }
       else if( isDash( curr ) ) {
         slurp( i, ( next, ci ) -> isDash( next ) );
         lexeme = createLexeme( DASH, began, i.getIndex() );
+      }
       else if( curr == '.' ) {
         // Parse all consecutive periods into an ellipsis lexeme. This will
         // not capture space-separated ellipsis (such as ". . .").
         lexeme = createLexeme(
           slurp( i, ( next, ci ) -> next == '.' ) == 0 ? PERIOD : ELLIPSIS,
           slurp( i, ( next, ci ) ->
             next == '.' || (next == ' ' && peek( ci ) == '.') ) == 0
             ? PERIOD
             : ELLIPSIS,
           began, i.getIndex()
         );
     return
       curr == '.' || curr == ',' || curr == '-' || curr == '+' || curr == '^';
+  }
   /**
    * Answers whether the given character may be part of an en- or em-dash.
    * This must be called after it is known that the character isn't a lone
    * hyphen.
+   *
    * @param curr The character to check as being a dash.
    * @return {@code true} if the given character is part of a dash.
    */
   private boolean isDash( final char curr ) {
     return curr == '-' || curr == '–' || curr == '—';
+  }

M src/main/java/com/whitemagicsoftware/keenquotes/Parser.java

    */
   private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
     new LexemeType[]{SPACE, DASH, EQUALS, QUOTE_SINGLE, OPENING_GROUP, EOL, EOP};
     new LexemeType[]{SPACE, DASH, EQUALS, QUOTE_SINGLE, OPENING_GROUP, EOL,
       EOP};
   /**
    */
   private int mClosingSingleQuote;
   /**
    * Constructs a new {@link Parser} using the default contraction sets
    * to help resolve some ambiguous scenarios.
+   *
    * @param text The prose to parse, containing zero or more quotation
    *             characters.
    */
   public Parser( final String text ) {
     this( text, new Contractions.Builder().build() );
+  }
   /**

M src/main/java/com/whitemagicsoftware/keenquotes/Settings.java

 import picocli.CommandLine;
 import java.util.List;
 import java.util.concurrent.Callable;
 import static java.util.Arrays.asList;
 import static java.util.Collections.emptyList;
 @CommandLine.Command(
   private final KeenQuotes mMain;
 //  /**
 //   * List of unambiguous contractions having leading apostrophes.
 //   */
 //  @CommandLine.Option(
 //    names = {"-ub", "--unamb-began"},
 //    description =
 //      "Contractions to treat as unambiguous (e.g., cause,bout)",
 //    paramLabel = "words"
 //  )
 //  private String[] mUnambiguousBegan;
 //
 //  /**
 //   * List of unambiguous contractions having lagging apostrophes.
 //   */
 //  @CommandLine.Option(
 //    names = {"-ue", "--unamb-ended"},
 //    description =
 //      "Contractions to treat as unambiguous (e.g., frien,thinkin)",
 //    paramLabel = "words"
 //  )
 //  private String[] mUnambiguousEnded;
 //
 //  /**
 //   * List of ambiguous contractions having leading apostrophes.
 //   */
 //  @CommandLine.Option(
 //    names = {"-ab", "--amb-began"},
 //    description =
 //      "Contractions to treat as ambiguous (e.g., sup,kay)",
 //    paramLabel = "words"
 //  )
 //  private String[] mAmbiguousBegan;
 //
 //  /**
 //   * List of ambiguous contractions having lagging apostrophes.
 //   */
 //  @CommandLine.Option(
 //    names = {"-ae", "--amb-ended"},
 //    description =
 //      "Contractions to treat as ambiguous (e.g., gi,o)",
 //    paramLabel = "words"
 //  )
 //  private String[] mAmbiguousEnded;
 //
   /**
    * List of unambiguous contractions having leading apostrophes.
    */
   @CommandLine.Option(
     names = {"-ub", "--unamb-began"},
     description =
       "Contractions to treat as unambiguous (e.g., cause,bout)",
     paramLabel = "words"
+  )
   private String[] mBeganUnambiguous;
   /**
    * List of unambiguous contractions having lagging apostrophes.
    */
   @CommandLine.Option(
     names = {"-ue", "--unamb-ended"},
     description =
       "Contractions to treat as unambiguous (e.g., frien,thinkin)",
     paramLabel = "words"
+  )
   private String[] mEndedUnambiguous;
   /**
    * List of ambiguous contractions having leading apostrophes.
    */
   @CommandLine.Option(
     names = {"-ab", "--amb-began"},
     description =
       "Contractions to treat as ambiguous (e.g., sup,kay)",
     paramLabel = "words"
+  )
   private String[] mBeganAmbiguous;
   /**
    * List of ambiguous contractions having lagging apostrophes.
    */
   @CommandLine.Option(
     names = {"-ae", "--amb-ended"},
     description =
       "Contractions to treat as ambiguous (e.g., gi,o)",
     paramLabel = "words"
+  )
   private String[] mEndedAmbiguous;
   /**
    * Display default values.
    * @return {@code true} to list the contractions.
    */
   public boolean displayList() {
   boolean displayList() {
     return mDisplayList;
+  }
   List<String> getBeganUnambiguous() {
     return nullSafe( mBeganUnambiguous );
+  }
   List<String> getEndedUnambiguous() {
     return nullSafe( mEndedUnambiguous );
+  }
   List<String> getBeganAmbiguous() {
     return nullSafe( mBeganAmbiguous );
+  }
   List<String> getEndedAmbiguous() {
     return nullSafe( mEndedAmbiguous );
+  }
   private List<String> nullSafe( final String[] words ) {
     return words == null ? emptyList() : asList( words );
+  }

M src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java

 import java.util.function.Function;
 import static com.whitemagicsoftware.keenquotes.Converter.convert;
 import static java.lang.System.out;
 import static org.junit.jupiter.api.Assertions.assertEquals;
   @Disabled
   public void test_parse_SingleLine_Parsed() {
     out.println( convert(
       "\"’Kearney lives on the banks of Killarney—’",
       out::println
     final var converter = new Converter( out::println );
     out.println( converter.apply(
       "\"’Kearney lives on the banks of Killarney—’"
     ) );
+  }
   @Test
   public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException {
     testConverter( text -> convert( text, ( lexeme ) -> {} ) );
     testConverter( new Converter( ( lex ) -> {} ) );
+  }
+    }
     System.out.println( convert( sb.toString(), out::println ) );
     final var converter = new Converter( out::println );
     System.out.println( converter.apply( sb.toString() ) );
+  }

M src/test/java/com/whitemagicsoftware/keenquotes/LexerTest.java

     testType( "--", DASH );
     testType( "---", DASH );
     testType( "–", DASH );
     testType( "—", DASH );
     testType( "—-—", DASH );
     testType( "...", ELLIPSIS );
     testType( ". .", ELLIPSIS );
     testType( ". . .", ELLIPSIS );
     testType( ".. ... ....", ELLIPSIS );
+  }

M src/test/java/com/whitemagicsoftware/keenquotes/ParserTest.java

  */
 class ParserTest {
   @SuppressWarnings( "TextBlockMigration" )
   private final static Map<String, Map<TokenType, Integer>> TEST_CASES =
     Map.of(
       "'Twas and 'tis whate'er lay 'twixt dawn and dusk 'n River Styx.",
       Map.of( QUOTE_APOSTROPHE, 5 ),
       """
              But I must leave the proofs to those who 've seen 'em;
              But this I heard her say, and can't be wrong
              And all may think which way their judgments lean 'em,
              ''T is strange---the Hebrew noun which means "I am,"
              The English always use to govern d--n.'
         """,
       "But I must leave the proofs to those who 've seen 'em;\n" +
         "But this I heard her say, and can't be wrong\n" +
         "And all may think which way their judgments lean 'em,\n" +
         "''T is strange---the Hebrew noun which means \"I am,\"\n" +
         "The English always use to govern d--n.'",
       Map.of( QUOTE_APOSTROPHE, 5 )
     );
   private void parse( final String text, final Map<TokenType, Integer> tally ) {
     final var parser = new Parser( text );
     final var contractions = new Contractions.Builder().build();
     final var parser = new Parser( text, contractions );
     final var actual = new HashMap<TokenType, Integer>();