| Author | DaveJarvis <email> |
|---|---|
| Date | 2024-10-09 16:58:44 GMT-0700 |
| Commit | 1fae3e3e90038c3b4032e60b0e2d980dcf590854 |
| Parent | a94a338 |
| } | ||
| -tasks.withType( JavaCompile ).configureEach { | ||
| - options.compilerArgs += "--enable-preview" | ||
| -} | ||
| - | ||
| -tasks.withType( JavaExec ).configureEach { | ||
| - jvmArgs += "--enable-preview" | ||
| -} | ||
| - | ||
| -tasks.withType( Test ).configureEach { | ||
| - jvmArgs += "--enable-preview" | ||
| -} | ||
| - |
| -/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| +/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| + * | ||
| + * SPDX-License-Identifier: BSD-2-Clause | ||
| + */ | ||
| package com.whitemagicsoftware.keenquotes.app; | ||
| contractions, | ||
| settings.filterXml() ? FILTER_XML : FILTER_PLAIN, | ||
| - settings.entities() | ||
| + settings.apostrophe() | ||
| ); | ||
| -/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| +/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| + * | ||
| + * SPDX-License-Identifier: BSD-2-Clause | ||
| + */ | ||
| package com.whitemagicsoftware.keenquotes.app; | ||
| import com.whitemagicsoftware.keenquotes.lex.XmlFilter; | ||
| +import com.whitemagicsoftware.keenquotes.parser.Apostrophe; | ||
| import picocli.CommandLine; | ||
| import java.util.List; | ||
| import java.util.concurrent.Callable; | ||
| +import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.fromType; | ||
| import static java.util.Arrays.asList; | ||
| import static java.util.Collections.emptyList; | ||
| /** | ||
| - * Encode quotation marks using HTML entities. | ||
| + * Set the type of entity to use when encoding apostrophes. | ||
| */ | ||
| @CommandLine.Option( | ||
| - names = {"-e", "--entities"}, | ||
| - description = "Encode quotation marks using HTML entities" | ||
| + names = {"-a", "--apostrophe"}, | ||
| + description = "Converted apostrophe entity (regular, modifier, hex, entity)" | ||
| ) | ||
| - private boolean mEntities; | ||
| + private String mApostrophe; | ||
| /** | ||
| * @return {@code true} to encode quotation marks using HTML entities. | ||
| */ | ||
| - boolean entities() { return mEntities; } | ||
| + Apostrophe apostrophe() { return fromType( mApostrophe ); } | ||
| List<String> getBeganUnambiguous() { | ||
| +/* Copyright 2024 White Magic Software, Ltd. -- All rights reserved. | ||
| + * | ||
| + * SPDX-License-Identifier: BSD-2-Clause | ||
| + */ | ||
| +package com.whitemagicsoftware.keenquotes.parser; | ||
| + | ||
| +/** | ||
| + * When converting quotation marks, these values are used to indicate what | ||
| + * type of entity to use in the conversion. | ||
| + */ | ||
| +public enum Apostrophe { | ||
| + /** | ||
| + * No conversion is performed. | ||
| + */ | ||
| + CONVERT_REGULAR( "'", "regular" ), | ||
| + /** | ||
| + * Apostrophes become MODIFIER LETTER APOSTROPHE ({@code ʼ}). | ||
| + */ | ||
| + CONVERT_MODIFIER( "ʼ", "modifier" ), | ||
| + /** | ||
| + * Apostrophes become APOSTROPHE ({@code '}). | ||
| + */ | ||
| + CONVERT_APOS_HEX( "'", "hex" ), | ||
| + /** | ||
| + * Apostrophes become XML APOSTROPHE ({@code '}). | ||
| + */ | ||
| + CONVERT_APOS_ENTITY( "'", "entity" ); | ||
| + | ||
| + private final String mCode; | ||
| + private final String mType; | ||
| + | ||
| + Apostrophe( final String code, final String type ) { | ||
| + mCode = code; | ||
| + mType = type; | ||
| + } | ||
| + | ||
| + public boolean isType( final String type ) { | ||
| + return mType.equalsIgnoreCase( type ); | ||
| + } | ||
| + | ||
| + public String toString() { | ||
| + return mCode; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns the instance that matches the given type. | ||
| + * | ||
| + * @param type The type of apostrophe entity conversion to use. | ||
| + * @return The {@link Apostrophe} to use when converting entities. | ||
| + */ | ||
| + public static Apostrophe fromType( final String type ) { | ||
| + for( final var apostrophe : Apostrophe.values() ) { | ||
| + if( apostrophe.isType( type ) ) { | ||
| + return apostrophe; | ||
| + } | ||
| + } | ||
| + | ||
| + return Apostrophe.CONVERT_REGULAR; | ||
| + } | ||
| +} | ||
| -/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| +/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| + * | ||
| + * SPDX-License-Identifier: BSD-2-Clause | ||
| + */ | ||
| package com.whitemagicsoftware.keenquotes.parser; | ||
| * Placeholder for various types of contractions. | ||
| */ | ||
| -@SuppressWarnings( { "SpellCheckingInspection", "GrazieInspection" } ) | ||
| +@SuppressWarnings( "SpellCheckingInspection" ) | ||
| public class Contractions { | ||
| // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet | ||
| // allow o' to match because 'a sentence can end with the letter o'. | ||
| - return getEndedAmbiguous().contains( check ) || | ||
| - check.endsWith( "s" ) || check.endsWith( "z" ) || | ||
| - check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" )); | ||
| + return | ||
| + getEndedAmbiguous().contains( check ) || | ||
| + check.endsWith( "s" ) || | ||
| + check.endsWith( "z" ) || | ||
| + check.endsWith( "x" ) || | ||
| + (check.length() > 1 && check.endsWith( "n" )); | ||
| } | ||
| return | ||
| toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) + | ||
| - toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) + | ||
| - toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) + | ||
| - toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) + | ||
| - toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ); | ||
| + toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) + | ||
| + toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) + | ||
| + toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) + | ||
| + toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" ); | ||
| } | ||
| private final Contractions mContractions; | ||
| private final LexerFilter mFilter; | ||
| - private final boolean mEntities; | ||
| + private final Apostrophe mApostrophe; | ||
| /** | ||
| * Maps quotes to curled character equivalents. | ||
| * | ||
| - * @param c Contractions listings. | ||
| - * @param entities {@code true} to convert quotation marks to HTML entities. | ||
| + * @param c Contractions listings. | ||
| + * @param apostrophe How to convert quotation marks to HTML entities. | ||
| */ | ||
| public Curler( | ||
| final Contractions c, | ||
| final FilterType filterType, | ||
| - final boolean entities | ||
| + final Apostrophe apostrophe | ||
| ) { | ||
| assert c != null; | ||
| mContractions = c; | ||
| - mEntities = entities; | ||
| mFilter = filterType.filter(); | ||
| + mApostrophe = apostrophe; | ||
| } | ||
| text, | ||
| mContractions, | ||
| - replace( output, offset, mEntities ), | ||
| + replace( output, offset, mApostrophe ), | ||
| mFilter | ||
| ); | ||
| return output.toString(); | ||
| } | ||
| /** | ||
| * Replaces non-ambiguous tokens with their equivalent string representation. | ||
| * | ||
| - * @param output Continuously updated result document. | ||
| - * @param offset Accumulating index where {@link Token} is replaced. | ||
| - * @param entities {@code true} to convert quotation marks to HTML entities. | ||
| + * @param output Continuously updated result document. | ||
| + * @param offset Accumulating index where {@link Token} is replaced. | ||
| + * @param apostrophe How to convert quotation marks to HTML entities. | ||
| * @return Instructions to replace a {@link Token} in the result document. | ||
| */ | ||
| public static Consumer<Token> replace( | ||
| final StringBuilder output, | ||
| final AtomicInteger offset, | ||
| - final boolean entities | ||
| + final Apostrophe apostrophe | ||
| ) { | ||
| return token -> { | ||
| if( !token.isAmbiguous() ) { | ||
| - final var text = token.toString( entities ); | ||
| + final var text = token.toString( apostrophe ); | ||
| output.replace( | ||
| -/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */ | ||
| +/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| + * | ||
| + * SPDX-License-Identifier: BSD-2-Clause | ||
| + */ | ||
| package com.whitemagicsoftware.keenquotes.parser; | ||
| import com.whitemagicsoftware.keenquotes.lex.Lexeme; | ||
| import com.whitemagicsoftware.keenquotes.lex.LexemeGlyph; | ||
| import java.util.EnumMap; | ||
| import java.util.Map; | ||
| import static com.whitemagicsoftware.keenquotes.lex.LexemeGlyph.*; | ||
| +import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*; | ||
| import static com.whitemagicsoftware.keenquotes.parser.TokenType.*; | ||
| ENTITIES.put( QUOTE_STRAIGHT_SINGLE, "'" ); | ||
| ENTITIES.put( QUOTE_STRAIGHT_DOUBLE, "\"" ); | ||
| - ENTITIES.put( QUOTE_APOSTROPHE, "'" ); | ||
| ENTITIES.put( QUOTE_PRIME_SINGLE, "′" ); | ||
| ENTITIES.put( QUOTE_PRIME_DOUBLE, "″" ); | ||
| boolean isAmbiguous() { | ||
| return mTokenType == QUOTE_AMBIGUOUS_SINGLE || | ||
| - mTokenType == QUOTE_AMBIGUOUS_DOUBLE || | ||
| - mTokenType == QUOTE_AMBIGUOUS_LEADING || | ||
| - mTokenType == QUOTE_AMBIGUOUS_LAGGING; | ||
| + mTokenType == QUOTE_AMBIGUOUS_DOUBLE || | ||
| + mTokenType == QUOTE_AMBIGUOUS_LEADING || | ||
| + mTokenType == QUOTE_AMBIGUOUS_LAGGING; | ||
| } | ||
| public String toXml() { | ||
| return "<" + | ||
| - mTokenType + | ||
| - " type='" + getType().name() + "'" + | ||
| - " began='" + began() + "'" + | ||
| - " ended='" + ended() + "' />"; | ||
| + mTokenType + | ||
| + " type='" + getType().name() + "'" + | ||
| + " began='" + began() + "'" + | ||
| + " ended='" + ended() + "' />"; | ||
| } | ||
| /** | ||
| * Converts this token to its string representation, which will either be | ||
| * an HTML entity or a character. | ||
| * | ||
| - * @param entities {@code true} to convert quotation marks to HTML entities. | ||
| + * @param apostrophe How to convert quotation marks to HTML entities. | ||
| * @return A plain quotation mark character or an HTML entity. | ||
| */ | ||
| - public String toString( final boolean entities ) { | ||
| + public String toString( final Apostrophe apostrophe ) { | ||
| final var glyph = mLexeme.getType().glyph(); | ||
| + final var tokenType = getType(); | ||
| - return entities | ||
| - ? I18N_ENTITIES.getOrDefault( glyph, ENTITIES.get( getType() ) ) | ||
| - : CHARS.getOrDefault( getType(), glyph.text() ); | ||
| + // Retrieves the base glyph, unless curling was requested. When curling | ||
| + // apostrophes, this will determine the user-selected type of apostrophe | ||
| + // entity to use. | ||
| + return apostrophe == CONVERT_REGULAR | ||
| + ? CHARS.getOrDefault( tokenType, glyph.text() ) | ||
| + : I18N_ENTITIES.getOrDefault( glyph, convert( tokenType, apostrophe ) ); | ||
| + } | ||
| + | ||
| + private String convert( | ||
| + final TokenType tokenType, | ||
| + final Apostrophe apostrophe ) { | ||
| + return tokenType == QUOTE_APOSTROPHE | ||
| + ? apostrophe.toString() | ||
| + : ENTITIES.get( tokenType ); | ||
| } | ||
| @Override | ||
| public String toString() { | ||
| return getClass().getSimpleName() + '[' + | ||
| - "mType=" + getType() + | ||
| - ", mBegan=" + began() + | ||
| - ", mEnded=" + ended() + | ||
| - ']'; | ||
| + "mType=" + getType() + | ||
| + ", mBegan=" + began() + | ||
| + ", mEnded=" + ended() + | ||
| + ']'; | ||
| } | ||
| } | ||
| -/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. | ||
| +/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| * | ||
| * SPDX-License-Identifier: BSD-2-Clause | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| +import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*; | ||
| import static com.whitemagicsoftware.keenquotes.parser.Curler.*; | ||
| import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs; | ||
| input, | ||
| CONTRACTIONS, | ||
| - replace( output, offset, true ), | ||
| - filter -> false | ||
| + replace( output, offset, CONVERT_APOS_ENTITY ), | ||
| + _ -> false | ||
| ); | ||
| -/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. | ||
| +/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| * | ||
| * SPDX-License-Identifier: BSD-2-Clause | ||
| import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN; | ||
| import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML; | ||
| +import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*; | ||
| import static com.whitemagicsoftware.keenquotes.texts.TestResource.open; | ||
| import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs; | ||
| @Test | ||
| public void test_Parse_UncurledQuotes1_CurlyQuotes() throws IOException { | ||
| - testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-1-pass.txt" ); | ||
| + testCurler( | ||
| + createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ), | ||
| + "unambiguous-1-pass.txt" | ||
| + ); | ||
| } | ||
| @Test | ||
| public void test_Parse_UncurledQuotes2_CurlyQuotes() throws IOException { | ||
| - testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-2-pass.txt" ); | ||
| + testCurler( | ||
| + createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ), | ||
| + "unambiguous-2-pass.txt" | ||
| + ); | ||
| } | ||
| @Disabled | ||
| @SuppressWarnings( "unused" ) | ||
| public void test_Parse_AmbiguousQuotes_PartiallyCurled() throws IOException { | ||
| - testCurler( createCurler( FILTER_PLAIN, false ), "ambiguous-n-pass.txt" ); | ||
| + testCurler( | ||
| + createCurler( FILTER_PLAIN, CONVERT_REGULAR ), "ambiguous-n-pass.txt" | ||
| + ); | ||
| } | ||
| @Test | ||
| public void test_Parse_UncurledQuotesXml_CurlyQuotes() throws IOException { | ||
| - testCurler( createCurler( FILTER_XML, true ), "xml.txt" ); | ||
| + testCurler( | ||
| + createCurler( FILTER_XML, CONVERT_APOS_ENTITY ), "xml.txt" | ||
| + ); | ||
| } | ||
| @Test | ||
| public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException { | ||
| - testCurler( createCurler( FILTER_PLAIN, true ), "i18n.txt" ); | ||
| + testCurler( | ||
| + createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ), "i18n.txt" | ||
| + ); | ||
| } | ||
| */ | ||
| @ParameterizedTest | ||
| - @ValueSource( strings = {"autonoma"} ) | ||
| + @ValueSource( strings = { "autonoma" } ) | ||
| @Disabled | ||
| void test_Parse_Story_Converted( final String filename ) throws IOException { | ||
| final var sb = new StringBuilder( 2 ^ 20 ); | ||
| + final var name = String.format( "%s%s", filename, ".html" ); | ||
| - try( final var reader = open( STR."\{filename}.html" ) ) { | ||
| + try( final var reader = open( name ) ) { | ||
| String line; | ||
| while( (line = reader.readLine()) != null ) { | ||
| sb.append( line ).append( SEP ); | ||
| } | ||
| } | ||
| - final var curler = createCurler( FILTER_XML, true ); | ||
| + final var curler = createCurler( FILTER_XML, CONVERT_APOS_ENTITY ); | ||
| System.out.println( curler.apply( sb.toString() ) ); | ||
| } | ||
| private Function<String, String> createCurler( | ||
| final FilterType filterType, | ||
| - final boolean entities ) { | ||
| - return new Curler( createContractions(), filterType, entities ); | ||
| + final Apostrophe apostrophe ) { | ||
| + return new Curler( createContractions(), filterType, apostrophe ); | ||
| } | ||
| -/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved. | ||
| +/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved. | ||
| * | ||
| * SPDX-License-Identifier: BSD-2-Clause | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| +import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.CONVERT_APOS_ENTITY; | ||
| import static com.whitemagicsoftware.keenquotes.parser.Curler.replace; | ||
| import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs; | ||
| input, | ||
| CONTRACTIONS, | ||
| - replace( output, offset, true ), | ||
| - filter -> false | ||
| + replace( output, offset, CONVERT_APOS_ENTITY ), | ||
| + _ -> false | ||
| ); | ||
| Computer says, “‘It is mysteries---’” | ||
| -He goes 'long with it. | ||
| -He goes 'long with it. | ||
| - | ||
| -The 'long and short' of it. | ||
| -The ‘long and short’ of it. | ||
| - |
| 'Twas and 'tis whate'er lay 'twixt dawn and dusk 'n River Styx. | ||
| +'He goes 'long with it.' | ||
| +‘He goes 'long with it.’ | ||
| + | ||
| # ######################################################################## | ||
| # Possessives |
| Delta | 171 lines added, 81 lines removed, 90-line increase |
|---|