Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Adds apostrophe entity selection

AuthorDaveJarvis <email>
Date2024-10-09 16:58:44 GMT-0700
Commit1fae3e3e90038c3b4032e60b0e2d980dcf590854
Parenta94a338
build.gradle
}
-tasks.withType( JavaCompile ).configureEach {
- options.compilerArgs += "--enable-preview"
-}
-
-tasks.withType( JavaExec ).configureEach {
- jvmArgs += "--enable-preview"
-}
-
-tasks.withType( Test ).configureEach {
- jvmArgs += "--enable-preview"
-}
-
src/main/java/com/whitemagicsoftware/keenquotes/app/KeenQuotes.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.app;
contractions,
settings.filterXml() ? FILTER_XML : FILTER_PLAIN,
- settings.entities()
+ settings.apostrophe()
);
src/main/java/com/whitemagicsoftware/keenquotes/app/Settings.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.app;
import com.whitemagicsoftware.keenquotes.lex.XmlFilter;
+import com.whitemagicsoftware.keenquotes.parser.Apostrophe;
import picocli.CommandLine;
import java.util.List;
import java.util.concurrent.Callable;
+import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.fromType;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
/**
- * Encode quotation marks using HTML entities.
+ * Set the type of entity to use when encoding apostrophes.
*/
@CommandLine.Option(
- names = {"-e", "--entities"},
- description = "Encode quotation marks using HTML entities"
+ names = {"-a", "--apostrophe"},
+ description = "Converted apostrophe entity (regular, modifier, hex, entity)"
)
- private boolean mEntities;
+ private String mApostrophe;
/**
* @return {@code true} to encode quotation marks using HTML entities.
*/
- boolean entities() { return mEntities; }
+ Apostrophe apostrophe() { return fromType( mApostrophe ); }
List<String> getBeganUnambiguous() {
src/main/java/com/whitemagicsoftware/keenquotes/parser/Apostrophe.java
+/* Copyright 2024 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+package com.whitemagicsoftware.keenquotes.parser;
+
+/**
+ * When converting quotation marks, these values are used to indicate what
+ * type of entity to use in the conversion.
+ */
+public enum Apostrophe {
+ /**
+ * No conversion is performed.
+ */
+ CONVERT_REGULAR( "'", "regular" ),
+ /**
+ * Apostrophes become MODIFIER LETTER APOSTROPHE ({@code &#x2bc;}).
+ */
+ CONVERT_MODIFIER( "&#x2bc;", "modifier" ),
+ /**
+ * Apostrophes become APOSTROPHE ({@code &#x27;}).
+ */
+ CONVERT_APOS_HEX( "&#x27;", "hex" ),
+ /**
+ * Apostrophes become XML APOSTROPHE ({@code &apos;}).
+ */
+ CONVERT_APOS_ENTITY( "&apos;", "entity" );
+
+ private final String mCode;
+ private final String mType;
+
+ Apostrophe( final String code, final String type ) {
+ mCode = code;
+ mType = type;
+ }
+
+ public boolean isType( final String type ) {
+ return mType.equalsIgnoreCase( type );
+ }
+
+ public String toString() {
+ return mCode;
+ }
+
+ /**
+ * Returns the instance that matches the given type.
+ *
+ * @param type The type of apostrophe entity conversion to use.
+ * @return The {@link Apostrophe} to use when converting entities.
+ */
+ public static Apostrophe fromType( final String type ) {
+ for( final var apostrophe : Apostrophe.values() ) {
+ if( apostrophe.isType( type ) ) {
+ return apostrophe;
+ }
+ }
+
+ return Apostrophe.CONVERT_REGULAR;
+ }
+}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Contractions.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
* Placeholder for various types of contractions.
*/
-@SuppressWarnings( { "SpellCheckingInspection", "GrazieInspection" } )
+@SuppressWarnings( "SpellCheckingInspection" )
public class Contractions {
// Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
// allow o' to match because 'a sentence can end with the letter o'.
- return getEndedAmbiguous().contains( check ) ||
- check.endsWith( "s" ) || check.endsWith( "z" ) ||
- check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
+ return
+ getEndedAmbiguous().contains( check ) ||
+ check.endsWith( "s" ) ||
+ check.endsWith( "z" ) ||
+ check.endsWith( "x" ) ||
+ (check.length() > 1 && check.endsWith( "n" ));
}
return
toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) +
- toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
- toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) +
- toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
- toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" );
+ toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
+ toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) +
+ toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
+ toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" );
}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java
private final Contractions mContractions;
private final LexerFilter mFilter;
- private final boolean mEntities;
+ private final Apostrophe mApostrophe;
/**
* Maps quotes to curled character equivalents.
*
- * @param c Contractions listings.
- * @param entities {@code true} to convert quotation marks to HTML entities.
+ * @param c Contractions listings.
+ * @param apostrophe How to convert quotation marks to HTML entities.
*/
public Curler(
final Contractions c,
final FilterType filterType,
- final boolean entities
+ final Apostrophe apostrophe
) {
assert c != null;
mContractions = c;
- mEntities = entities;
mFilter = filterType.filter();
+ mApostrophe = apostrophe;
}
text,
mContractions,
- replace( output, offset, mEntities ),
+ replace( output, offset, mApostrophe ),
mFilter
);
return output.toString();
}
/**
* Replaces non-ambiguous tokens with their equivalent string representation.
*
- * @param output Continuously updated result document.
- * @param offset Accumulating index where {@link Token} is replaced.
- * @param entities {@code true} to convert quotation marks to HTML entities.
+ * @param output Continuously updated result document.
+ * @param offset Accumulating index where {@link Token} is replaced.
+ * @param apostrophe How to convert quotation marks to HTML entities.
* @return Instructions to replace a {@link Token} in the result document.
*/
public static Consumer<Token> replace(
final StringBuilder output,
final AtomicInteger offset,
- final boolean entities
+ final Apostrophe apostrophe
) {
return token -> {
if( !token.isAmbiguous() ) {
- final var text = token.toString( entities );
+ final var text = token.toString( apostrophe );
output.replace(
src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java
-/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
+/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
package com.whitemagicsoftware.keenquotes.parser;
import com.whitemagicsoftware.keenquotes.lex.Lexeme;
import com.whitemagicsoftware.keenquotes.lex.LexemeGlyph;
import java.util.EnumMap;
import java.util.Map;
import static com.whitemagicsoftware.keenquotes.lex.LexemeGlyph.*;
+import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*;
import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
ENTITIES.put( QUOTE_STRAIGHT_SINGLE, "'" );
ENTITIES.put( QUOTE_STRAIGHT_DOUBLE, "\"" );
- ENTITIES.put( QUOTE_APOSTROPHE, "&apos;" );
ENTITIES.put( QUOTE_PRIME_SINGLE, "&prime;" );
ENTITIES.put( QUOTE_PRIME_DOUBLE, "&Prime;" );
boolean isAmbiguous() {
return mTokenType == QUOTE_AMBIGUOUS_SINGLE ||
- mTokenType == QUOTE_AMBIGUOUS_DOUBLE ||
- mTokenType == QUOTE_AMBIGUOUS_LEADING ||
- mTokenType == QUOTE_AMBIGUOUS_LAGGING;
+ mTokenType == QUOTE_AMBIGUOUS_DOUBLE ||
+ mTokenType == QUOTE_AMBIGUOUS_LEADING ||
+ mTokenType == QUOTE_AMBIGUOUS_LAGGING;
}
public String toXml() {
return "<" +
- mTokenType +
- " type='" + getType().name() + "'" +
- " began='" + began() + "'" +
- " ended='" + ended() + "' />";
+ mTokenType +
+ " type='" + getType().name() + "'" +
+ " began='" + began() + "'" +
+ " ended='" + ended() + "' />";
}
/**
* Converts this token to its string representation, which will either be
* an HTML entity or a character.
*
- * @param entities {@code true} to convert quotation marks to HTML entities.
+ * @param apostrophe How to convert quotation marks to HTML entities.
* @return A plain quotation mark character or an HTML entity.
*/
- public String toString( final boolean entities ) {
+ public String toString( final Apostrophe apostrophe ) {
final var glyph = mLexeme.getType().glyph();
+ final var tokenType = getType();
- return entities
- ? I18N_ENTITIES.getOrDefault( glyph, ENTITIES.get( getType() ) )
- : CHARS.getOrDefault( getType(), glyph.text() );
+ // Retrieves the base glyph, unless curling was requested. When curling
+ // apostrophes, this will determine the user-selected type of apostrophe
+ // entity to use.
+ return apostrophe == CONVERT_REGULAR
+ ? CHARS.getOrDefault( tokenType, glyph.text() )
+ : I18N_ENTITIES.getOrDefault( glyph, convert( tokenType, apostrophe ) );
+ }
+
+ private String convert(
+ final TokenType tokenType,
+ final Apostrophe apostrophe ) {
+ return tokenType == QUOTE_APOSTROPHE
+ ? apostrophe.toString()
+ : ENTITIES.get( tokenType );
}
@Override
public String toString() {
return getClass().getSimpleName() + '[' +
- "mType=" + getType() +
- ", mBegan=" + began() +
- ", mEnded=" + ended() +
- ']';
+ "mType=" + getType() +
+ ", mBegan=" + began() +
+ ", mEnded=" + ended() +
+ ']';
}
}
src/test/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolverTest.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved.
*
* SPDX-License-Identifier: BSD-2-Clause
import java.util.concurrent.atomic.AtomicInteger;
+import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*;
import static com.whitemagicsoftware.keenquotes.parser.Curler.*;
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
input,
CONTRACTIONS,
- replace( output, offset, true ),
- filter -> false
+ replace( output, offset, CONVERT_APOS_ENTITY ),
+ _ -> false
);
src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved.
*
* SPDX-License-Identifier: BSD-2-Clause
import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML;
+import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*;
import static com.whitemagicsoftware.keenquotes.texts.TestResource.open;
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
@Test
public void test_Parse_UncurledQuotes1_CurlyQuotes() throws IOException {
- testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-1-pass.txt" );
+ testCurler(
+ createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ),
+ "unambiguous-1-pass.txt"
+ );
}
@Test
public void test_Parse_UncurledQuotes2_CurlyQuotes() throws IOException {
- testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-2-pass.txt" );
+ testCurler(
+ createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ),
+ "unambiguous-2-pass.txt"
+ );
}
@Disabled
@SuppressWarnings( "unused" )
public void test_Parse_AmbiguousQuotes_PartiallyCurled() throws IOException {
- testCurler( createCurler( FILTER_PLAIN, false ), "ambiguous-n-pass.txt" );
+ testCurler(
+ createCurler( FILTER_PLAIN, CONVERT_REGULAR ), "ambiguous-n-pass.txt"
+ );
}
@Test
public void test_Parse_UncurledQuotesXml_CurlyQuotes() throws IOException {
- testCurler( createCurler( FILTER_XML, true ), "xml.txt" );
+ testCurler(
+ createCurler( FILTER_XML, CONVERT_APOS_ENTITY ), "xml.txt"
+ );
}
@Test
public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException {
- testCurler( createCurler( FILTER_PLAIN, true ), "i18n.txt" );
+ testCurler(
+ createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ), "i18n.txt"
+ );
}
*/
@ParameterizedTest
- @ValueSource( strings = {"autonoma"} )
+ @ValueSource( strings = { "autonoma" } )
@Disabled
void test_Parse_Story_Converted( final String filename ) throws IOException {
final var sb = new StringBuilder( 2 ^ 20 );
+ final var name = String.format( "%s%s", filename, ".html" );
- try( final var reader = open( STR."\{filename}.html" ) ) {
+ try( final var reader = open( name ) ) {
String line;
while( (line = reader.readLine()) != null ) {
sb.append( line ).append( SEP );
}
}
- final var curler = createCurler( FILTER_XML, true );
+ final var curler = createCurler( FILTER_XML, CONVERT_APOS_ENTITY );
System.out.println( curler.apply( sb.toString() ) );
}
private Function<String, String> createCurler(
final FilterType filterType,
- final boolean entities ) {
- return new Curler( createContractions(), filterType, entities );
+ final Apostrophe apostrophe ) {
+ return new Curler( createContractions(), filterType, apostrophe );
}
src/test/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitterTest.java
-/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
+/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved.
*
* SPDX-License-Identifier: BSD-2-Clause
import java.util.concurrent.atomic.AtomicInteger;
+import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.CONVERT_APOS_ENTITY;
import static com.whitemagicsoftware.keenquotes.parser.Curler.replace;
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
input,
CONTRACTIONS,
- replace( output, offset, true ),
- filter -> false
+ replace( output, offset, CONVERT_APOS_ENTITY ),
+ _ -> false
);
src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-1-pass.txt
Computer says, &ldquo;&lsquo;It is mysteries---&rsquo;&rdquo;
-He goes 'long with it.
-He goes &apos;long with it.
-
-The 'long and short' of it.
-The &lsquo;long and short&rsquo; of it.
-
src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-2-pass.txt
&apos;Twas and &apos;tis whate&apos;er lay &apos;twixt dawn and dusk &apos;n River Styx.
+'He goes 'long with it.'
+&lsquo;He goes &apos;long with it.&rsquo;
+
# ########################################################################
# Possessives
Delta171 lines added, 81 lines removed, 90-line increase