Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Add ability to swap entities for unicode characters

AuthorDave Jarvis <email>
Date2021-06-28 21:16:54 GMT-0700
Commit1acf9fb70c79d7170177d370f168c1a8ba9b5512
Parent79ed092
src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java
private static final Set<String> BEGAN_UNAMBIGUOUS = Set.of(
"aporth",
+ // about you
"boutcha",
+ // about you
"boutchu",
+ // about well
+ "boutwell",
+ // except
"cept",
+ // decided
+ "cided",
+ // because
"cos",
"dillo",
+ // them
"em",
+ // afraid
"fraid",
+ // against
"gainst",
+ // him
"im",
+ // and
"n",
+ // beneath
"neath",
+ // another
"nother",
+ // enough
"nuff",
+ // gonna
"onna",
"onna'",
// horse
"oss",
// horses
"osses",
+ // upon
"pon",
"s",
"sblood",
+ // excuse
"scuse",
"sfar",
"sfoot",
+ // considered
+ "sidered",
"t",
"taint",
"twould",
"twouldn",
- "ve"
+ // have
+ "ve",
+ // exactly
+ "xactly"
);
// friend
"frien",
+ // just
+ "jes",
// just
"jus",
src/main/java/com/whitemagicsoftware/keenquotes/Converter.java
* text string.
*/
+@SuppressWarnings( "unused" )
public class Converter implements Function<String, String> {
- private static final Map<TokenType, String> REPLACEMENTS = Map.of(
+ public static final Map<TokenType, String> ENTITIES = Map.of(
QUOTE_OPENING_SINGLE, "&lsquo;",
QUOTE_CLOSING_SINGLE, "&rsquo;",
QUOTE_OPENING_DOUBLE, "&ldquo;",
QUOTE_CLOSING_DOUBLE, "&rdquo;",
QUOTE_STRAIGHT_SINGLE, "'",
QUOTE_STRAIGHT_DOUBLE, "\"",
QUOTE_APOSTROPHE, "&apos;",
QUOTE_PRIME_SINGLE, "&prime;",
QUOTE_PRIME_DOUBLE, "&Prime;"
+ );
+
+ /**
+ * Used by external applications to initialize the replacement map.
+ */
+ public static final Map<TokenType, String> CHARS = Map.of(
+ QUOTE_OPENING_SINGLE, "‘",
+ QUOTE_CLOSING_SINGLE, "’",
+ QUOTE_OPENING_DOUBLE, "“",
+ QUOTE_CLOSING_DOUBLE, "”",
+ QUOTE_STRAIGHT_SINGLE, "'",
+ QUOTE_STRAIGHT_DOUBLE, "\"",
+ QUOTE_APOSTROPHE, "’",
+ QUOTE_PRIME_SINGLE, "′",
+ QUOTE_PRIME_DOUBLE, "″"
);
private final Consumer<Lexeme> mUnresolved;
private final Contractions mContractions;
+ private final Map<TokenType, String> mReplacements;
+ /**
+ * Maps quotes to HTML entities.
+ *
+ * @param unresolved Consumes {@link Lexeme}s that could not be converted
+ * into HTML entities.
+ */
public Converter( final Consumer<Lexeme> unresolved ) {
this( unresolved, new Contractions.Builder().build() );
+ }
+
+ /**
+ * Maps quotes to HTML entities.
+ *
+ * @param unresolved Consumes {@link Lexeme}s that could not be converted
+ * into HTML entities.
+ */
+ public Converter(
+ final Consumer<Lexeme> unresolved,
+ final Map<TokenType, String> replacements ) {
+ this( unresolved, new Contractions.Builder().build(), replacements );
}
+ /**
+ * Maps quotes to HTML entities.
+ *
+ * @param unresolved Consumes {@link Lexeme}s that could not be converted
+ * into HTML entities.
+ * @param c Contractions listings.
+ */
public Converter( final Consumer<Lexeme> unresolved, final Contractions c ) {
+ this( unresolved, c, ENTITIES );
+ }
+
+ /**
+ * Maps quotes to curled equivalents.
+ *
+ * @param unresolved Consumes {@link Lexeme}s that could not be converted
+ * into HTML entities.
+ * @param c Contractions listings.
+ * @param replacements Map of recognized quotes to output types (entity or
+ * Unicode character).
+ */
+ public Converter(
+ final Consumer<Lexeme> unresolved,
+ final Contractions c,
+ final Map<TokenType, String> replacements ) {
mUnresolved = unresolved;
mContractions = c;
+ mReplacements = replacements;
}
if( position <= token.began() ) {
result.append( text, position, token.began() );
- result.append( REPLACEMENTS.get( token.getType() ) );
+ result.append( mReplacements.get( token.getType() ) );
}
src/main/java/com/whitemagicsoftware/keenquotes/Lexeme.java
@Override
public String toString() {
- return getClass().getSimpleName() + '{' +
+ return getClass().getSimpleName() + '[' +
"mType=" + mType +
", mBegan=" + mBegan +
", mEnded=" + mEnded +
- '}';
+ ']';
}
Delta90 lines added, 5 lines removed, 85-line increase