Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Fix parser tests, remove ParserFactory

AuthorDave Jarvis <email>
Date2022-08-13 15:35:33 GMT-0700
Commit3cca8ab60c6fd0811689277778b41a4f15cc4b01
Parent7928edf
src/main/java/com/whitemagicsoftware/keenquotes/Converter.java
package com.whitemagicsoftware.keenquotes;
-import com.whitemagicsoftware.keenquotes.ParserFactory.ParserType;
-
import java.util.ArrayList;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Function;
import static com.whitemagicsoftware.keenquotes.TokenType.*;
import static java.util.Collections.sort;
+import static java.util.Map.*;
/**
* Responsible for converting curly quotes to HTML entities throughout a
* text string.
*/
@SuppressWarnings( "unused" )
public class Converter implements Function<String, String> {
- public static final Map<TokenType, String> ENTITIES = Map.of(
- QUOTE_OPENING_SINGLE, "&lsquo;",
- QUOTE_CLOSING_SINGLE, "&rsquo;",
- QUOTE_OPENING_DOUBLE, "&ldquo;",
- QUOTE_CLOSING_DOUBLE, "&rdquo;",
- QUOTE_STRAIGHT_SINGLE, "'",
- QUOTE_STRAIGHT_DOUBLE, "\"",
- QUOTE_APOSTROPHE, "&apos;",
- QUOTE_PRIME_SINGLE, "&prime;",
- QUOTE_PRIME_DOUBLE, "&Prime;"
+ public static final Map<TokenType, String> ENTITIES = ofEntries(
+ entry( QUOTE_OPENING_SINGLE, "&lsquo;" ),
+ entry( QUOTE_CLOSING_SINGLE, "&rsquo;" ),
+ entry( QUOTE_OPENING_DOUBLE, "&ldquo;" ),
+ entry( QUOTE_CLOSING_DOUBLE, "&rdquo;" ),
+ entry( QUOTE_STRAIGHT_SINGLE, "'" ),
+ entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
+ entry( QUOTE_APOSTROPHE, "&apos;" ),
+ entry( QUOTE_PRIME_SINGLE, "&prime;" ),
+ entry( QUOTE_PRIME_DOUBLE, "&Prime;" ),
+ entry( QUOTE_PRIME_TRIPLE, "&tprime;" ),
+ entry( QUOTE_PRIME_QUADRUPLE, "&qprime;" )
);
/**
* Used by external applications to initialize the replacement map.
*/
- public static final Map<TokenType, String> CHARS = Map.of(
- QUOTE_OPENING_SINGLE, "‘",
- QUOTE_CLOSING_SINGLE, "’",
- QUOTE_OPENING_DOUBLE, "“",
- QUOTE_CLOSING_DOUBLE, "”",
- QUOTE_STRAIGHT_SINGLE, "'",
- QUOTE_STRAIGHT_DOUBLE, "\"",
- QUOTE_APOSTROPHE, "’",
- QUOTE_PRIME_SINGLE, "′",
- QUOTE_PRIME_DOUBLE, "″"
+ public static final Map<TokenType, String> CHARS = ofEntries(
+ entry( QUOTE_OPENING_SINGLE, "‘" ),
+ entry( QUOTE_CLOSING_SINGLE, "’" ),
+ entry( QUOTE_OPENING_DOUBLE, "“" ),
+ entry( QUOTE_CLOSING_DOUBLE, "”" ),
+ entry( QUOTE_STRAIGHT_SINGLE, "'" ),
+ entry( QUOTE_STRAIGHT_DOUBLE, "\"" ),
+ entry( QUOTE_APOSTROPHE, "’" ),
+ entry( QUOTE_PRIME_SINGLE, "′" ),
+ entry( QUOTE_PRIME_DOUBLE, "″" ),
+ entry( QUOTE_PRIME_TRIPLE, "‴" ),
+ entry( QUOTE_PRIME_QUADRUPLE, "⁗" )
);
private final Consumer<Lexeme> mUnresolved;
private final Contractions mContractions;
private final Map<TokenType, String> mReplacements;
- private final ParserFactory mFactory;
+ private final ParserType mParserType;
/**
mContractions = c;
mReplacements = replacements;
- mFactory = new ParserFactory( parserType );
+ mParserType = parserType;
}
@Override
public String apply( final String text ) {
- final var parser = mFactory.createParser( text, mContractions );
+ final var parser = new Parser( text, mContractions );
final var tokens = new ArrayList<Token>();
// Parse the tokens and consume all unresolved lexemes.
- parser.parse( tokens::add, mUnresolved );
+ parser.parse( tokens::add, mUnresolved, mParserType.filter() );
// The parser may emit tokens in any order.
src/main/java/com/whitemagicsoftware/keenquotes/FastCharacterIterator.java
* </p>
*/
-public class FastCharacterIterator {
+final class FastCharacterIterator {
private final String mS;
*/
public char current() {
- return hasNext() ? mS.charAt( mPos ) : DONE;
+ final var pos = mPos;
+ return pos < mLen ? mS.charAt( pos ) : DONE;
}
final var pos = mPos;
return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE;
- }
-
- /**
- * Answers whether the internal index is less than the string's length,
- * meaning that calling {@link #next()} followed by {@link #current()} will
- * succeed.
- *
- * @return {@code true} if there are more characters to iterate.
- */
- public boolean hasNext() {
- return mPos < mLen;
}
src/main/java/com/whitemagicsoftware/keenquotes/KeenQuotes.java
import java.util.Properties;
-import static com.whitemagicsoftware.keenquotes.ParserFactory.ParserType.PARSER_PLAIN;
+import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_PLAIN;
import static java.lang.String.format;
import static java.lang.System.*;
src/main/java/com/whitemagicsoftware/keenquotes/Lexer.java
* Turns text into words, numbers, punctuation, spaces, and more.
*/
-public class Lexer {
+public final class Lexer {
/**
* Tokenizes a sequence of characters. The order of comparisons is optimized
}
- public static void lex(
+ private static void lex(
final FastCharacterIterator i,
final Consumer<Lexeme> emitter,
src/main/java/com/whitemagicsoftware/keenquotes/Parser.java
public void parse(
final Consumer<Token> tokenConsumer,
- final Consumer<Lexeme> lexemeConsumer ) {
+ final Consumer<Lexeme> lexemeConsumer,
+ final Consumer<FastCharacterIterator> filter ) {
final var lexemes = new CircularFifoQueue<Lexeme>( 3 );
mClosingDoubleQuotes.clear();
}
- });
+ }, filter);
// By loop's end, the lexemes list contains tokens for all except the
src/main/java/com/whitemagicsoftware/keenquotes/ParserFactory.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-/**
- * Responsible for creating new {@link Parser} instances based on the
- * {@link ParserType}. The document content format must be known in advance.
- */
-public class ParserFactory {
- public enum ParserType {
- PARSER_PLAIN,
- PARSER_XML
- }
-
- private final ParserType mParserType;
-
- public ParserFactory( final ParserType parserType ) {
- mParserType = parserType;
- }
-
- public Parser createParser(
- final String text, final Contractions contractions ) {
-
- return mParserType == ParserType.PARSER_PLAIN
- ? new Parser( text, contractions )
- : new XmlParser( text, contractions );
- }
-}
src/main/java/com/whitemagicsoftware/keenquotes/ParserType.java
+/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes;
+
+import java.util.function.Consumer;
+
+public enum ParserType {
+ PARSER_PLAIN( filter -> {} ),
+ PARSER_XML( filter -> new XmlFilter() );
+
+ private final Consumer<FastCharacterIterator> mFilter;
+
+ ParserType( final Consumer<FastCharacterIterator> filter ) {
+ mFilter = filter;
+ }
+
+ Consumer<FastCharacterIterator> filter() {
+ return mFilter;
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/TokenType.java
QUOTE_PRIME_SINGLE,
QUOTE_PRIME_DOUBLE,
+ QUOTE_PRIME_TRIPLE,
+ QUOTE_PRIME_QUADRUPLE,
}
src/main/java/com/whitemagicsoftware/keenquotes/XmlParser.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
-package com.whitemagicsoftware.keenquotes;
-
-/**
- * Turns text into {@link Lexeme}s, allowing XML elements to be ignored.
- */
-public final class XmlParser extends Parser {
- /**
- * Constructs a new {@link Parser} using the default contraction sets
- * to help resolve some ambiguous scenarios.
- *
- * @param text The prose to parse, containing zero or more quotation
- * characters.
- * @param contractions Custom sets of contractions to help resolve
- */
- public XmlParser(
- final String text, final Contractions contractions ) {
- super( text, contractions );
- }
-}
src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java
package com.whitemagicsoftware.keenquotes;
-import com.whitemagicsoftware.keenquotes.ParserFactory.ParserType;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import java.util.stream.Stream;
-import static com.whitemagicsoftware.keenquotes.ParserFactory.ParserType.PARSER_PLAIN;
-import static com.whitemagicsoftware.keenquotes.ParserFactory.ParserType.PARSER_XML;
+import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_PLAIN;
+import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_XML;
import static java.lang.System.out;
import static org.junit.jupiter.api.Assertions.assertEquals;
src/test/java/com/whitemagicsoftware/keenquotes/ParserTest.java
parser.parse(
- ( token ) -> actual.merge( token.getType(), 1, Integer::sum ),
- ( lexeme ) -> {}
+ token -> actual.merge( token.getType(), 1, Integer::sum ),
+ lexeme -> {},
+ filter -> {}
);
Delta64 lines added, 96 lines removed, 32-line decrease