| Author | Dave Jarvis <email> |
|---|---|
| Date | 2022-08-13 15:35:33 GMT-0700 |
| Commit | 3cca8ab60c6fd0811689277778b41a4f15cc4b01 |
| Parent | 7928edf |
| package com.whitemagicsoftware.keenquotes; | ||
| -import com.whitemagicsoftware.keenquotes.ParserFactory.ParserType; | ||
| - | ||
| import java.util.ArrayList; | ||
| import java.util.Map; | ||
| import java.util.function.Consumer; | ||
| import java.util.function.Function; | ||
| import static com.whitemagicsoftware.keenquotes.TokenType.*; | ||
| import static java.util.Collections.sort; | ||
| +import static java.util.Map.*; | ||
| /** | ||
| * Responsible for converting curly quotes to HTML entities throughout a | ||
| * text string. | ||
| */ | ||
| @SuppressWarnings( "unused" ) | ||
| public class Converter implements Function<String, String> { | ||
| - public static final Map<TokenType, String> ENTITIES = Map.of( | ||
| - QUOTE_OPENING_SINGLE, "‘", | ||
| - QUOTE_CLOSING_SINGLE, "’", | ||
| - QUOTE_OPENING_DOUBLE, "“", | ||
| - QUOTE_CLOSING_DOUBLE, "”", | ||
| - QUOTE_STRAIGHT_SINGLE, "'", | ||
| - QUOTE_STRAIGHT_DOUBLE, "\"", | ||
| - QUOTE_APOSTROPHE, "'", | ||
| - QUOTE_PRIME_SINGLE, "′", | ||
| - QUOTE_PRIME_DOUBLE, "″" | ||
| + public static final Map<TokenType, String> ENTITIES = ofEntries( | ||
| + entry( QUOTE_OPENING_SINGLE, "‘" ), | ||
| + entry( QUOTE_CLOSING_SINGLE, "’" ), | ||
| + entry( QUOTE_OPENING_DOUBLE, "“" ), | ||
| + entry( QUOTE_CLOSING_DOUBLE, "”" ), | ||
| + entry( QUOTE_STRAIGHT_SINGLE, "'" ), | ||
| + entry( QUOTE_STRAIGHT_DOUBLE, "\"" ), | ||
| + entry( QUOTE_APOSTROPHE, "'" ), | ||
| + entry( QUOTE_PRIME_SINGLE, "′" ), | ||
| + entry( QUOTE_PRIME_DOUBLE, "″" ), | ||
| + entry( QUOTE_PRIME_TRIPLE, "‴" ), | ||
| + entry( QUOTE_PRIME_QUADRUPLE, "⁗" ) | ||
| ); | ||
| /** | ||
| * Used by external applications to initialize the replacement map. | ||
| */ | ||
| - public static final Map<TokenType, String> CHARS = Map.of( | ||
| - QUOTE_OPENING_SINGLE, "‘", | ||
| - QUOTE_CLOSING_SINGLE, "’", | ||
| - QUOTE_OPENING_DOUBLE, "“", | ||
| - QUOTE_CLOSING_DOUBLE, "”", | ||
| - QUOTE_STRAIGHT_SINGLE, "'", | ||
| - QUOTE_STRAIGHT_DOUBLE, "\"", | ||
| - QUOTE_APOSTROPHE, "’", | ||
| - QUOTE_PRIME_SINGLE, "′", | ||
| - QUOTE_PRIME_DOUBLE, "″" | ||
| + public static final Map<TokenType, String> CHARS = ofEntries( | ||
| + entry( QUOTE_OPENING_SINGLE, "‘" ), | ||
| + entry( QUOTE_CLOSING_SINGLE, "’" ), | ||
| + entry( QUOTE_OPENING_DOUBLE, "“" ), | ||
| + entry( QUOTE_CLOSING_DOUBLE, "”" ), | ||
| + entry( QUOTE_STRAIGHT_SINGLE, "'" ), | ||
| + entry( QUOTE_STRAIGHT_DOUBLE, "\"" ), | ||
| + entry( QUOTE_APOSTROPHE, "’" ), | ||
| + entry( QUOTE_PRIME_SINGLE, "′" ), | ||
| + entry( QUOTE_PRIME_DOUBLE, "″" ), | ||
| + entry( QUOTE_PRIME_TRIPLE, "‴" ), | ||
| + entry( QUOTE_PRIME_QUADRUPLE, "⁗" ) | ||
| ); | ||
| private final Consumer<Lexeme> mUnresolved; | ||
| private final Contractions mContractions; | ||
| private final Map<TokenType, String> mReplacements; | ||
| - private final ParserFactory mFactory; | ||
| + private final ParserType mParserType; | ||
| /** | ||
| mContractions = c; | ||
| mReplacements = replacements; | ||
| - mFactory = new ParserFactory( parserType ); | ||
| + mParserType = parserType; | ||
| } | ||
| @Override | ||
| public String apply( final String text ) { | ||
| - final var parser = mFactory.createParser( text, mContractions ); | ||
| + final var parser = new Parser( text, mContractions ); | ||
| final var tokens = new ArrayList<Token>(); | ||
| // Parse the tokens and consume all unresolved lexemes. | ||
| - parser.parse( tokens::add, mUnresolved ); | ||
| + parser.parse( tokens::add, mUnresolved, mParserType.filter() ); | ||
| // The parser may emit tokens in any order. | ||
| * </p> | ||
| */ | ||
| -public class FastCharacterIterator { | ||
| +final class FastCharacterIterator { | ||
| private final String mS; | ||
| */ | ||
| public char current() { | ||
| - return hasNext() ? mS.charAt( mPos ) : DONE; | ||
| + final var pos = mPos; | ||
| + return pos < mLen ? mS.charAt( pos ) : DONE; | ||
| } | ||
| final var pos = mPos; | ||
| return pos + 1 < mLen ? mS.charAt( pos + 1 ) : DONE; | ||
| - } | ||
| - | ||
| - /** | ||
| - * Answers whether the internal index is less than the string's length, | ||
| - * meaning that calling {@link #next()} followed by {@link #current()} will | ||
| - * succeed. | ||
| - * | ||
| - * @return {@code true} if there are more characters to iterate. | ||
| - */ | ||
| - public boolean hasNext() { | ||
| - return mPos < mLen; | ||
| } | ||
| import java.util.Properties; | ||
| -import static com.whitemagicsoftware.keenquotes.ParserFactory.ParserType.PARSER_PLAIN; | ||
| +import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_PLAIN; | ||
| import static java.lang.String.format; | ||
| import static java.lang.System.*; |
| * Turns text into words, numbers, punctuation, spaces, and more. | ||
| */ | ||
| -public class Lexer { | ||
| +public final class Lexer { | ||
| /** | ||
| * Tokenizes a sequence of characters. The order of comparisons is optimized | ||
| } | ||
| - public static void lex( | ||
| + private static void lex( | ||
| final FastCharacterIterator i, | ||
| final Consumer<Lexeme> emitter, | ||
| public void parse( | ||
| final Consumer<Token> tokenConsumer, | ||
| - final Consumer<Lexeme> lexemeConsumer ) { | ||
| + final Consumer<Lexeme> lexemeConsumer, | ||
| + final Consumer<FastCharacterIterator> filter ) { | ||
| final var lexemes = new CircularFifoQueue<Lexeme>( 3 ); | ||
| mClosingDoubleQuotes.clear(); | ||
| } | ||
| - }); | ||
| + }, filter); | ||
| // By loop's end, the lexemes list contains tokens for all except the | ||
| -/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| -package com.whitemagicsoftware.keenquotes; | ||
| - | ||
| -/** | ||
| - * Responsible for creating new {@link Parser} instances based on the | ||
| - * {@link ParserType}. The document content format must be known in advance. | ||
| - */ | ||
| -public class ParserFactory { | ||
| - public enum ParserType { | ||
| - PARSER_PLAIN, | ||
| - PARSER_XML | ||
| - } | ||
| - | ||
| - private final ParserType mParserType; | ||
| - | ||
| - public ParserFactory( final ParserType parserType ) { | ||
| - mParserType = parserType; | ||
| - } | ||
| - | ||
| - public Parser createParser( | ||
| - final String text, final Contractions contractions ) { | ||
| - | ||
| - return mParserType == ParserType.PARSER_PLAIN | ||
| - ? new Parser( text, contractions ) | ||
| - : new XmlParser( text, contractions ); | ||
| - } | ||
| -} | ||
| +/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. */ | ||
| +package com.whitemagicsoftware.keenquotes; | ||
| + | ||
| +import java.util.function.Consumer; | ||
| + | ||
| +public enum ParserType { | ||
| + PARSER_PLAIN( filter -> {} ), | ||
| + PARSER_XML( filter -> new XmlFilter() ); | ||
| + | ||
| + private final Consumer<FastCharacterIterator> mFilter; | ||
| + | ||
| + ParserType( final Consumer<FastCharacterIterator> filter ) { | ||
| + mFilter = filter; | ||
| + } | ||
| + | ||
| + Consumer<FastCharacterIterator> filter() { | ||
| + return mFilter; | ||
| + } | ||
| +} | ||
| QUOTE_PRIME_SINGLE, | ||
| QUOTE_PRIME_DOUBLE, | ||
| + QUOTE_PRIME_TRIPLE, | ||
| + QUOTE_PRIME_QUADRUPLE, | ||
| } | ||
| -/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| -package com.whitemagicsoftware.keenquotes; | ||
| - | ||
| -/** | ||
| - * Turns text into {@link Lexeme}s, allowing XML elements to be ignored. | ||
| - */ | ||
| -public final class XmlParser extends Parser { | ||
| - /** | ||
| - * Constructs a new {@link Parser} using the default contraction sets | ||
| - * to help resolve some ambiguous scenarios. | ||
| - * | ||
| - * @param text The prose to parse, containing zero or more quotation | ||
| - * characters. | ||
| - * @param contractions Custom sets of contractions to help resolve | ||
| - */ | ||
| - public XmlParser( | ||
| - final String text, final Contractions contractions ) { | ||
| - super( text, contractions ); | ||
| - } | ||
| -} | ||
| package com.whitemagicsoftware.keenquotes; | ||
| -import com.whitemagicsoftware.keenquotes.ParserFactory.ParserType; | ||
| import org.junit.jupiter.api.Disabled; | ||
| import org.junit.jupiter.api.Test; | ||
| import java.util.stream.Stream; | ||
| -import static com.whitemagicsoftware.keenquotes.ParserFactory.ParserType.PARSER_PLAIN; | ||
| -import static com.whitemagicsoftware.keenquotes.ParserFactory.ParserType.PARSER_XML; | ||
| +import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_PLAIN; | ||
| +import static com.whitemagicsoftware.keenquotes.ParserType.PARSER_XML; | ||
| import static java.lang.System.out; | ||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
| parser.parse( | ||
| - ( token ) -> actual.merge( token.getType(), 1, Integer::sum ), | ||
| - ( lexeme ) -> {} | ||
| + token -> actual.merge( token.getType(), 1, Integer::sum ), | ||
| + lexeme -> {}, | ||
| + filter -> {} | ||
| ); | ||
| Delta | 64 lines added, 96 lines removed, 32-line decrease |
|---|