| Author | Dave Jarvis <email> |
|---|---|
| Date | 2021-06-17 21:14:59 GMT-0700 |
| Commit | 5f3aa535e4618f6a76103f493d2a26d1e5b19124 |
| Parent | 4dda57c |
| -# Ignore Gradle project-specific cache directory | ||
| .gradle | ||
| - | ||
| -# Ignore Gradle build output directory | ||
| -lib/build | ||
| - | ||
| .idea | ||
| gradle | ||
| gradlew* | ||
| +build | ||
| + | ||
| A Java library to convert straight quotes into curly quotes. | ||
| +# Requirements | ||
| + | ||
| +Download and install OpenJDK 16 or greater. | ||
| + | ||
| +# Download | ||
| + | ||
| +Download the `.jar` file from this repository. | ||
| + | ||
| +# Run | ||
| + | ||
| +Run the software as follows: | ||
| + | ||
| + java -jar keenquotes.jar < src.txt > dst.txt 2> err.txt | ||
| + | ||
| +Where: | ||
| + | ||
| +* `src.txt` -- Input document file that contains straight quotes. | ||
| +* `dst.txt` -- Output document file that'll contain curled quotes. | ||
| +* `err.txt` -- Error file that will note ambiguous conversion errors. | ||
| + | ||
| +For help, run the software as follows: | ||
| + | ||
| + java -jar keenquotes.jar -h | ||
| + | ||
| # Software Design | ||
| plugins { | ||
| id 'application' | ||
| + id 'com.palantir.git-version' version '0.12.3' | ||
| } | ||
| group 'com.whitemagicsoftware' | ||
| -version '1.0' | ||
| repositories { | ||
| main { | ||
| java { | ||
| - srcDirs = ["src/main"] | ||
| + srcDirs = ["src/main/java"] | ||
| } | ||
| } | ||
| mainClassName = "com.whitemagicsoftware.${applicationName}.KeenQuotes" | ||
| } | ||
| + | ||
| +version = gitVersion() | ||
| + | ||
| +def resourceDir = sourceSets.main.resources.srcDirs[0] | ||
| +final File propertiesFile = file("${resourceDir}/com/whitemagicsoftware/${applicationName}/app.properties") | ||
| +propertiesFile.write("application.version=${version}") | ||
| jar { | ||
| useJUnitPlatform() | ||
| } | ||
| - | ||
| "movin", | ||
| "neighborin", | ||
| + "neighbourin", | ||
| "nothin", | ||
| "notin", |
| +package com.whitemagicsoftware.keenquotes; | ||
| + | ||
| +import java.util.ArrayList; | ||
| +import java.util.Map; | ||
| +import java.util.function.Consumer; | ||
| + | ||
| +import static com.whitemagicsoftware.keenquotes.TokenType.*; | ||
| +import static java.util.Collections.sort; | ||
| + | ||
| +/** | ||
| + * Responsible for converting curly quotes to HTML entities throughout a | ||
| + * text string. | ||
| + */ | ||
| +public class Converter { | ||
| + private static final Map<TokenType, String> REPLACEMENTS = Map.of( | ||
| + QUOTE_OPENING_SINGLE, "‘", | ||
| + QUOTE_CLOSING_SINGLE, "’", | ||
| + QUOTE_OPENING_DOUBLE, "“", | ||
| + QUOTE_CLOSING_DOUBLE, "”", | ||
| + QUOTE_STRAIGHT_SINGLE, "'", | ||
| + QUOTE_STRAIGHT_DOUBLE, "\"", | ||
| + QUOTE_APOSTROPHE, "'", | ||
| + QUOTE_PRIME_SINGLE, "′", | ||
| + QUOTE_PRIME_DOUBLE, "″" | ||
| + ); | ||
| + | ||
| + /** | ||
| + * Converts straight quotes to curly quotes and primes. Any quotation marks | ||
| + * that cannot be converted are passed to the {@link Consumer}. | ||
| + * | ||
| + * @param text The text to parse. | ||
| + * @param unresolved Recipient for ambiguous {@link Lexeme}s. | ||
| + * @return The given text string with as many straight quotes converted to | ||
| + * curly quotes as is feasible. | ||
| + */ | ||
| + public static String convert( | ||
| + final String text, final Consumer<Lexeme> unresolved ) { | ||
| + final var parser = new Parser( text ); | ||
| + final var tokens = new ArrayList<Token>(); | ||
| + | ||
| + // Parse the tokens and consume all unresolved lexemes. | ||
| + parser.parse( tokens::add, unresolved ); | ||
| + | ||
| + // The parser may emit tokens in any order. | ||
| + sort( tokens ); | ||
| + | ||
| + final var result = new StringBuilder( text.length() ); | ||
| + var position = 0; | ||
| + | ||
| + for( final var token : tokens ) { | ||
| + if( position <= token.began() ) { | ||
| + result.append( text, position, token.began() ); | ||
| + result.append( REPLACEMENTS.get( token.getType() ) ); | ||
| + } | ||
| + | ||
| + position = token.ended(); | ||
| + } | ||
| + | ||
| + return result.append( text.substring( position ) ).toString(); | ||
| + } | ||
| +} | ||
| package com.whitemagicsoftware.keenquotes; | ||
| -import java.util.ArrayList; | ||
| -import java.util.Map; | ||
| -import java.util.function.Consumer; | ||
| +import picocli.CommandLine; | ||
| -import static com.whitemagicsoftware.keenquotes.TokenType.*; | ||
| -import static java.util.Collections.sort; | ||
| +import java.io.BufferedReader; | ||
| +import java.io.IOException; | ||
| +import java.io.InputStream; | ||
| +import java.io.InputStreamReader; | ||
| +import java.util.Properties; | ||
| + | ||
| +import static java.lang.String.format; | ||
| +import static picocli.CommandLine.Help.Ansi.Style.*; | ||
| +import static picocli.CommandLine.Help.ColorScheme; | ||
| /** | ||
| * Responsible for replacing {@link Token} instances with equivalent smart | ||
| * quotes (or straight quotes). This will inform the caller when ambiguous | ||
| * quotes cannot be reliably resolved. | ||
| */ | ||
| public final class KeenQuotes { | ||
| - private static final Map<TokenType, String> REPLACEMENTS = Map.of( | ||
| - QUOTE_OPENING_SINGLE, "‘", | ||
| - QUOTE_CLOSING_SINGLE, "’", | ||
| - QUOTE_OPENING_DOUBLE, "“", | ||
| - QUOTE_CLOSING_DOUBLE, "”", | ||
| - QUOTE_STRAIGHT_SINGLE, "'", | ||
| - QUOTE_STRAIGHT_DOUBLE, "\"", | ||
| - QUOTE_APOSTROPHE, "'", | ||
| - QUOTE_PRIME_SINGLE, "′", | ||
| - QUOTE_PRIME_DOUBLE, "″" | ||
| - ); | ||
| + private final Settings mSettings = new Settings( this ); | ||
| + | ||
| + private static ColorScheme createColourScheme() { | ||
| + return new ColorScheme.Builder() | ||
| + .commands( bold ) | ||
| + .options( fg_blue, bold ) | ||
| + .parameters( fg_blue ) | ||
| + .optionParams( italic ) | ||
| + .errors( fg_red, bold ) | ||
| + .stackTraces( italic ) | ||
| + .build(); | ||
| + } | ||
| + | ||
| + public void run() { | ||
| + final StringBuilder sb = new StringBuilder(); | ||
| + | ||
| + try( final BufferedReader reader = open( System.in ) ) { | ||
| + String line; | ||
| + final var sep = System.lineSeparator(); | ||
| + | ||
| + while( (line = reader.readLine()) != null ) { | ||
| + sb.append( line ); | ||
| + sb.append( sep ); | ||
| + } | ||
| + | ||
| + System.out.println( | ||
| + Converter.convert( sb.toString(), System.err::println ) | ||
| + ); | ||
| + } catch( final Exception ex ) { | ||
| + ex.printStackTrace( System.err ); | ||
| + } | ||
| + } | ||
| + | ||
| + private Settings getSettings() { | ||
| + return mSettings; | ||
| + } | ||
| /** | ||
| - * Converts straight quotes to curly quotes and primes. Any quotation marks | ||
| - * that cannot be converted are passed to the {@link Consumer}. | ||
| + * Returns the application version number retrieved from the application | ||
| + * properties file. The properties file is generated at build time, which | ||
| + * keys off the repository. | ||
| * | ||
| - * @param text The text to parse. | ||
| - * @param unresolved Recipient for ambiguous {@link Lexeme}s. | ||
| - * @return The given text string with as many straight quotes converted to | ||
| - * curly quotes as is feasible. | ||
| + * @return The application version number. | ||
| + * @throws RuntimeException An {@link IOException} occurred. | ||
| */ | ||
| - public static String convert( | ||
| - final String text, final Consumer<Lexeme> unresolved ) { | ||
| - final var parser = new Parser( text ); | ||
| - final var tokens = new ArrayList<Token>(); | ||
| + private static String getVersion() { | ||
| + try { | ||
| + final var properties = loadProperties( "app.properties" ); | ||
| + return properties.getProperty( "application.version" ); | ||
| + } catch( final Exception ex ) { | ||
| + throw new RuntimeException( ex ); | ||
| + } | ||
| + } | ||
| - // Parse the tokens and consume all unresolved lexemes. | ||
| - parser.parse( tokens::add, unresolved ); | ||
| + @SuppressWarnings( "SameParameterValue" ) | ||
| + private static Properties loadProperties( final String resource ) | ||
| + throws IOException { | ||
| + final var properties = new Properties(); | ||
| + properties.load( getResourceAsStream( getResourceName( resource ) ) ); | ||
| + return properties; | ||
| + } | ||
| - // The parser may emit tokens in any order. | ||
| - sort( tokens ); | ||
| + private static String getResourceName( final String resource ) { | ||
| + return format( "%s/%s", getPackagePath(), resource ); | ||
| + } | ||
| - final var result = new StringBuilder( text.length() ); | ||
| - var position = 0; | ||
| + private static String getPackagePath() { | ||
| + return KeenQuotes.class.getPackageName().replace( '.', '/' ); | ||
| + } | ||
| - for( final var token : tokens ) { | ||
| - if( position <= token.began() ) { | ||
| - result.append( text, position, token.began() ); | ||
| - result.append( REPLACEMENTS.get( token.getType() ) ); | ||
| - } | ||
| + private static InputStream getResourceAsStream( final String resource ) { | ||
| + return KeenQuotes.class.getClassLoader().getResourceAsStream( resource ); | ||
| + } | ||
| - position = token.ended(); | ||
| - } | ||
| + @SuppressWarnings( "SameParameterValue" ) | ||
| + private static BufferedReader open( final InputStream in ) { | ||
| + return new BufferedReader( new InputStreamReader( in ) ); | ||
| + } | ||
| - return result.append( text.substring( position ) ).toString(); | ||
| + public static void main( final String[] args ) { | ||
| + final var app = new KeenQuotes(); | ||
| + final var parser = new CommandLine( app.getSettings() ); | ||
| + parser.setColorScheme( createColourScheme() ); | ||
| + | ||
| + final var exitCode = parser.execute( args ); | ||
| + final var parseResult = parser.getParseResult(); | ||
| + | ||
| + if( parseResult.isUsageHelpRequested() ) { | ||
| + System.exit( exitCode ); | ||
| + } | ||
| + else if( parseResult.isVersionHelpRequested() ) { | ||
| + System.out.println( getVersion() ); | ||
| + System.exit( exitCode ); | ||
| + } | ||
| } | ||
| } |
| +/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| +package com.whitemagicsoftware.keenquotes; | ||
| + | ||
| +import picocli.CommandLine; | ||
| + | ||
| +import java.util.concurrent.Callable; | ||
| + | ||
| +@CommandLine.Command( | ||
| + name = "KeenQuotes", | ||
| + mixinStandardHelpOptions = true, | ||
| + description = "Converts straight quotes to curly quotes." | ||
| +) | ||
| +@SuppressWarnings( {"FieldMayBeFinal", "CanBeFinal"} ) | ||
| +public final class Settings implements Callable<Integer> { | ||
| + /** | ||
| + * Main executable class. | ||
| + */ | ||
| + private final KeenQuotes mMain; | ||
| + | ||
| + /** | ||
| + * List of unambiguous contractions having leading apostrophes. | ||
| + */ | ||
| + @CommandLine.Option( | ||
| + names = {"-ub", "--unamb-began"}, | ||
| + description = | ||
| + "Contractions to treat as unambiguous (e.g., cause,bout)", | ||
| + paramLabel = "words" | ||
| + ) | ||
| + private String[] mUnambiguousBegan; | ||
| + | ||
| + /** | ||
| + * List of unambiguous contractions having lagging apostrophes. | ||
| + */ | ||
| + @CommandLine.Option( | ||
| + names = {"-ue", "--unamb-ended"}, | ||
| + description = | ||
| + "Contractions to treat as unambiguous (e.g., frien,thinkin)", | ||
| + paramLabel = "words" | ||
| + ) | ||
| + private String[] mUnambiguousEnded; | ||
| + | ||
| + /** | ||
| + * List of ambiguous contractions having leading apostrophes. | ||
| + */ | ||
| + @CommandLine.Option( | ||
| + names = {"-ab", "--amb-began"}, | ||
| + description = | ||
| + "Contractions to treat as ambiguous (e.g., sup,kay)", | ||
| + paramLabel = "words" | ||
| + ) | ||
| + private String[] mAmbiguousBegan; | ||
| + | ||
| + /** | ||
| + * List of ambiguous contractions having lagging apostrophes. | ||
| + */ | ||
| + @CommandLine.Option( | ||
| + names = {"-ae", "--amb-ended"}, | ||
| + description = | ||
| + "Contractions to treat as ambiguous (e.g., gi,o)", | ||
| + paramLabel = "words" | ||
| + ) | ||
| + private String[] mAmbiguousEnded; | ||
| + | ||
| + /** | ||
| + * Display default values. | ||
| + */ | ||
| + @CommandLine.Option( | ||
| + names = {"-l", "--list"}, | ||
| + description = "List all ambiguous and unambiguous contractions" | ||
| + ) | ||
| + private boolean mDisplayList; | ||
| + | ||
| + public Settings( final KeenQuotes main ) { | ||
| + assert main != null; | ||
| + mMain = main; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Invoked after the command-line arguments are parsed to launch the | ||
| + * application. | ||
| + * | ||
| + * @return Exit level zero. | ||
| + */ | ||
| + @Override | ||
| + public Integer call() { | ||
| + mMain.run(); | ||
| + return 0; | ||
| + } | ||
| +} | ||
| package com.whitemagicsoftware.keenquotes; | ||
| +import org.junit.jupiter.api.Disabled; | ||
| import org.junit.jupiter.api.Test; | ||
| import org.junit.jupiter.params.ParameterizedTest; | ||
| import org.junit.jupiter.params.provider.ValueSource; | ||
| import java.io.BufferedReader; | ||
| import java.io.IOException; | ||
| import java.io.InputStreamReader; | ||
| import java.util.function.Function; | ||
| +import static com.whitemagicsoftware.keenquotes.Converter.convert; | ||
| import static java.lang.System.out; | ||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
| */ | ||
| @Test | ||
| - //@Disabled | ||
| + @Disabled | ||
| public void test_parse_SingleLine_Parsed() { | ||
| - out.println( KeenQuotes.convert( | ||
| + out.println( convert( | ||
| "\"’Kearney lives on the banks of Killarney—’", | ||
| out::println | ||
| @Test | ||
| public void test_Parse_StraightQuotes_CurlyQuotes() throws IOException { | ||
| - testConverter( text -> KeenQuotes.convert( text, ( lexeme ) -> {} ) ); | ||
| + testConverter( text -> convert( text, ( lexeme ) -> {} ) ); | ||
| } | ||
| @ParameterizedTest | ||
| - @ValueSource( strings = {"westrup"} ) | ||
| + @ValueSource( strings = {"habberton"} ) | ||
| void test_Parse_Story_Converted( final String filename ) throws IOException { | ||
| final var sb = new StringBuilder( 2 ^ 20 ); | ||
| } | ||
| - System.out.println( KeenQuotes.convert( sb.toString(), out::println ) ); | ||
| + System.out.println( convert( sb.toString(), out::println ) ); | ||
| } | ||
| Delta | 286 lines added, 53 lines removed, 233-line increase |
|---|