Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Build conversion application

AuthorDave Jarvis <email>
Date2021-06-17 21:14:59 GMT-0700
Commit5f3aa535e4618f6a76103f493d2a26d1e5b19124
Parent4dda57c
.gitignore
-# Ignore Gradle project-specific cache directory
.gradle
-
-# Ignore Gradle build output directory
-lib/build
-
.idea
gradle
gradlew*
+build
+
README.md
A Java library to convert straight quotes into curly quotes.
+# Requirements
+
+Download and install OpenJDK 16 or greater.
+
+# Download
+
+Download the `.jar` file from this repository.
+
+# Run
+
+Run the software as follows:
+
+ java -jar keenquotes.jar < src.txt > dst.txt 2> err.txt
+
+Where:
+
+* `src.txt` -- Input document file that contains straight quotes.
+* `dst.txt` -- Output document file that'll contain curled quotes.
+* `err.txt` -- Error file that will note ambiguous conversion errors.
+
+For help, run the software as follows:
+
+ java -jar keenquotes.jar -h
+
# Software Design
build.gradle
plugins {
id 'application'
+ id 'com.palantir.git-version' version '0.12.3'
}
group 'com.whitemagicsoftware'
-version '1.0'
repositories {
main {
java {
- srcDirs = ["src/main"]
+ srcDirs = ["src/main/java"]
}
}
mainClassName = "com.whitemagicsoftware.${applicationName}.KeenQuotes"
}
+
+version = gitVersion()
+
+def resourceDir = sourceSets.main.resources.srcDirs[0]
+final File propertiesFile = file("${resourceDir}/com/whitemagicsoftware/${applicationName}/app.properties")
+propertiesFile.write("application.version=${version}")
jar {
useJUnitPlatform()
}
-
src/main/java/com/whitemagicsoftware/keenquotes/Contractions.java
"movin",
"neighborin",
+ "neighbourin",
"nothin",
"notin",
src/main/java/com/whitemagicsoftware/keenquotes/Converter.java
+package com.whitemagicsoftware.keenquotes;
+
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.function.Consumer;
+
+import static com.whitemagicsoftware.keenquotes.TokenType.*;
+import static java.util.Collections.sort;
+
+/**
+ * Responsible for converting curly quotes to HTML entities throughout a
+ * text string.
+ */
+public class Converter {
+ private static final Map<TokenType, String> REPLACEMENTS = Map.of(
+ QUOTE_OPENING_SINGLE, "&lsquo;",
+ QUOTE_CLOSING_SINGLE, "&rsquo;",
+ QUOTE_OPENING_DOUBLE, "&ldquo;",
+ QUOTE_CLOSING_DOUBLE, "&rdquo;",
+ QUOTE_STRAIGHT_SINGLE, "'",
+ QUOTE_STRAIGHT_DOUBLE, "\"",
+ QUOTE_APOSTROPHE, "&apos;",
+ QUOTE_PRIME_SINGLE, "&prime;",
+ QUOTE_PRIME_DOUBLE, "&Prime;"
+ );
+
+ /**
+ * Converts straight quotes to curly quotes and primes. Any quotation marks
+ * that cannot be converted are passed to the {@link Consumer}.
+ *
+ * @param text The text to parse.
+ * @param unresolved Recipient for ambiguous {@link Lexeme}s.
+ * @return The given text string with as many straight quotes converted to
+ * curly quotes as is feasible.
+ */
+ public static String convert(
+ final String text, final Consumer<Lexeme> unresolved ) {
+ final var parser = new Parser( text );
+ final var tokens = new ArrayList<Token>();
+
+ // Parse the tokens and consume all unresolved lexemes.
+ parser.parse( tokens::add, unresolved );
+
+ // The parser may emit tokens in any order.
+ sort( tokens );
+
+ final var result = new StringBuilder( text.length() );
+ var position = 0;
+
+ for( final var token : tokens ) {
+ if( position <= token.began() ) {
+ result.append( text, position, token.began() );
+ result.append( REPLACEMENTS.get( token.getType() ) );
+ }
+
+ position = token.ended();
+ }
+
+ return result.append( text.substring( position ) ).toString();
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/KeenQuotes.java
package com.whitemagicsoftware.keenquotes;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.function.Consumer;
+import picocli.CommandLine;
-import static com.whitemagicsoftware.keenquotes.TokenType.*;
-import static java.util.Collections.sort;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Properties;
+
+import static java.lang.String.format;
+import static picocli.CommandLine.Help.Ansi.Style.*;
+import static picocli.CommandLine.Help.ColorScheme;
/**
* Responsible for replacing {@link Token} instances with equivalent smart
* quotes (or straight quotes). This will inform the caller when ambiguous
* quotes cannot be reliably resolved.
*/
public final class KeenQuotes {
- private static final Map<TokenType, String> REPLACEMENTS = Map.of(
- QUOTE_OPENING_SINGLE, "&lsquo;",
- QUOTE_CLOSING_SINGLE, "&rsquo;",
- QUOTE_OPENING_DOUBLE, "&ldquo;",
- QUOTE_CLOSING_DOUBLE, "&rdquo;",
- QUOTE_STRAIGHT_SINGLE, "'",
- QUOTE_STRAIGHT_DOUBLE, "\"",
- QUOTE_APOSTROPHE, "&apos;",
- QUOTE_PRIME_SINGLE, "&prime;",
- QUOTE_PRIME_DOUBLE, "&Prime;"
- );
+ private final Settings mSettings = new Settings( this );
+
+ private static ColorScheme createColourScheme() {
+ return new ColorScheme.Builder()
+ .commands( bold )
+ .options( fg_blue, bold )
+ .parameters( fg_blue )
+ .optionParams( italic )
+ .errors( fg_red, bold )
+ .stackTraces( italic )
+ .build();
+ }
+
+ public void run() {
+ final StringBuilder sb = new StringBuilder();
+
+ try( final BufferedReader reader = open( System.in ) ) {
+ String line;
+ final var sep = System.lineSeparator();
+
+ while( (line = reader.readLine()) != null ) {
+ sb.append( line );
+ sb.append( sep );
+ }
+
+ System.out.println(
+ Converter.convert( sb.toString(), System.err::println )
+ );
+ } catch( final Exception ex ) {
+ ex.printStackTrace( System.err );
+ }
+ }
+
+ private Settings getSettings() {
+ return mSettings;
+ }
/**
- * Converts straight quotes to curly quotes and primes. Any quotation marks
- * that cannot be converted are passed to the {@link Consumer}.
+ * Returns the application version number retrieved from the application
+ * properties file. The properties file is generated at build time, which
+ * keys off the repository.
*
- * @param text The text to parse.
- * @param unresolved Recipient for ambiguous {@link Lexeme}s.
- * @return The given text string with as many straight quotes converted to
- * curly quotes as is feasible.
+ * @return The application version number.
+ * @throws RuntimeException An {@link IOException} occurred.
*/
- public static String convert(
- final String text, final Consumer<Lexeme> unresolved ) {
- final var parser = new Parser( text );
- final var tokens = new ArrayList<Token>();
+ private static String getVersion() {
+ try {
+ final var properties = loadProperties( "app.properties" );
+ return properties.getProperty( "application.version" );
+ } catch( final Exception ex ) {
+ throw new RuntimeException( ex );
+ }
+ }
- // Parse the tokens and consume all unresolved lexemes.
- parser.parse( tokens::add, unresolved );
+ @SuppressWarnings( "SameParameterValue" )
+ private static Properties loadProperties( final String resource )
+ throws IOException {
+ final var properties = new Properties();
+ properties.load( getResourceAsStream( getResourceName( resource ) ) );
+ return properties;
+ }
- // The parser may emit tokens in any order.
- sort( tokens );
+ private static String getResourceName( final String resource ) {
+ return format( "%s/%s", getPackagePath(), resource );
+ }
- final var result = new StringBuilder( text.length() );
- var position = 0;
+ private static String getPackagePath() {
+ return KeenQuotes.class.getPackageName().replace( '.', '/' );
+ }
- for( final var token : tokens ) {
- if( position <= token.began() ) {
- result.append( text, position, token.began() );
- result.append( REPLACEMENTS.get( token.getType() ) );
- }
+ private static InputStream getResourceAsStream( final String resource ) {
+ return KeenQuotes.class.getClassLoader().getResourceAsStream( resource );
+ }
- position = token.ended();
- }
+ @SuppressWarnings( "SameParameterValue" )
+ private static BufferedReader open( final InputStream in ) {
+ return new BufferedReader( new InputStreamReader( in ) );
+ }
- return result.append( text.substring( position ) ).toString();
+ public static void main( final String[] args ) {
+ final var app = new KeenQuotes();
+ final var parser = new CommandLine( app.getSettings() );
+ parser.setColorScheme( createColourScheme() );
+
+ final var exitCode = parser.execute( args );
+ final var parseResult = parser.getParseResult();
+
+ if( parseResult.isUsageHelpRequested() ) {
+ System.exit( exitCode );
+ }
+ else if( parseResult.isVersionHelpRequested() ) {
+ System.out.println( getVersion() );
+ System.exit( exitCode );
+ }
}
}
src/main/java/com/whitemagicsoftware/keenquotes/Settings.java
+/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+package com.whitemagicsoftware.keenquotes;
+
+import picocli.CommandLine;
+
+import java.util.concurrent.Callable;
+
+@CommandLine.Command(
+ name = "KeenQuotes",
+ mixinStandardHelpOptions = true,
+ description = "Converts straight quotes to curly quotes."
+)
+@SuppressWarnings( {"FieldMayBeFinal", "CanBeFinal"} )
+public final class Settings implements Callable<Integer> {
+ /**
+ * Main executable class.
+ */
+ private final KeenQuotes mMain;
+
+ /**
+ * List of unambiguous contractions having leading apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ub", "--unamb-began"},
+ description =
+ "Contractions to treat as unambiguous (e.g., cause,bout)",
+ paramLabel = "words"
+ )
+ private String[] mUnambiguousBegan;
+
+ /**
+ * List of unambiguous contractions having lagging apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ue", "--unamb-ended"},
+ description =
+ "Contractions to treat as unambiguous (e.g., frien,thinkin)",
+ paramLabel = "words"
+ )
+ private String[] mUnambiguousEnded;
+
+ /**
+ * List of ambiguous contractions having leading apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ab", "--amb-began"},
+ description =
+ "Contractions to treat as ambiguous (e.g., sup,kay)",
+ paramLabel = "words"
+ )
+ private String[] mAmbiguousBegan;
+
+ /**
+ * List of ambiguous contractions having lagging apostrophes.
+ */
+ @CommandLine.Option(
+ names = {"-ae", "--amb-ended"},
+ description =
+ "Contractions to treat as ambiguous (e.g., gi,o)",
+ paramLabel = "words"
+ )
+ private String[] mAmbiguousEnded;
+
+ /**
+ * Display default values.
+ */
+ @CommandLine.Option(
+ names = {"-l", "--list"},
+ description = "List all ambiguous and unambiguous contractions"
+ )
+ private boolean mDisplayList;
+
+ public Settings( final KeenQuotes main ) {
+ assert main != null;
+ mMain = main;
+ }
+
+ /**
+ * Invoked after the command-line arguments are parsed to launch the
+ * application.
+ *
+ * @return Exit level zero.
+ */
+ @Override
+ public Integer call() {
+ mMain.run();
+ return 0;
+ }
+}
src/test/java/com/whitemagicsoftware/keenquotes/KeenQuotesTest.java
package com.whitemagicsoftware.keenquotes;
+import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.function.Function;
+import static com.whitemagicsoftware.keenquotes.Converter.convert;
import static java.lang.System.out;
import static org.junit.jupiter.api.Assertions.assertEquals;
*/
@Test
- //@Disabled
+ @Disabled
public void test_parse_SingleLine_Parsed() {
- out.println( KeenQuotes.convert(
+ out.println( convert(
"\"’Kearney lives on the banks of Killarney—’",
out::println
@Test
public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException {
- testConverter( text -> KeenQuotes.convert( text, ( lexeme ) -> {} ) );
+ testConverter( text -> convert( text, ( lexeme ) -> {} ) );
}
@ParameterizedTest
- @ValueSource( strings = {"westrup"} )
+ @ValueSource( strings = {"habberton"} )
void test_Parse_Story_Converted( final String filename ) throws IOException {
final var sb = new StringBuilder( 2 ^ 20 );
}
- System.out.println( KeenQuotes.convert( sb.toString(), out::println ) );
+ System.out.println( convert( sb.toString(), out::println ) );
}
Delta286 lines added, 53 lines removed, 233-line increase