Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Differentiate exporting HTML entities versus characters from honouring straight quotes in XML/HTML elements

AuthorDave Jarvis <email>
Date2022-10-07 23:40:54 GMT-0700
Commitcb43e015cc1dbb1bed424339b5de1a644c7a76fa
Parent944cce6
src/main/java/com/whitemagicsoftware/keenquotes/app/KeenQuotes.java
import java.util.Properties;
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML;
import static java.lang.String.format;
import static java.lang.System.*;
else {
try {
- final var filter = settings.filterXml();
- final var c = new Curler( contractions, filter );
+ final var c = new Curler(
+ contractions,
+ settings.filterXml() ? FILTER_XML : FILTER_PLAIN,
+ settings.entities()
+ );
out.print( convert( c ) );
src/main/java/com/whitemagicsoftware/keenquotes/app/Settings.java
/**
+ * Encode quotation marks using HTML entities.
+ */
+ @CommandLine.Option(
+ names = {"-e", "--entities"},
+ description = "Encode quotation marks using HTML entities"
+ )
+ private boolean mEntities;
+
+ /**
* Enable the {@link XmlFilter}.
*/
* @return {@code true} to list the contractions.
*/
- boolean displayList() {
- return mDisplayList;
- }
+ boolean displayList() { return mDisplayList; }
+ /**
+ * Answers whether quotation marks within XML elements are ignored.
+ *
+ * @return {@code true} to honour quotation marks inside XML elements.
+ */
boolean filterXml() { return mFilterXml; }
+
+ /**
+ * Answers whether entities must be encoded using HTML entities.
+ *
+ * @return {@code true} to encode quotation marks using HTML entities.
+ */
+ boolean entities() { return mEntities; }
List<String> getBeganUnambiguous() {
src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java
package com.whitemagicsoftware.keenquotes.parser;
+import com.whitemagicsoftware.keenquotes.lex.FilterType;
+import com.whitemagicsoftware.keenquotes.lex.LexerFilter;
+
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.function.Function;
-
-import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
-import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML;
/**
* Resolves straight quotes into curly quotes throughout a document.
*/
@SuppressWarnings( "unused" )
public class Curler implements Function<String, String> {
private final Contractions mContractions;
+ private final LexerFilter mFilter;
private final boolean mEntities;
public Curler(
final Contractions c,
+ final FilterType filterType,
final boolean entities
) {
assert c != null;
mContractions = c;
mEntities = entities;
+ mFilter = filterType.filter();
}
mContractions,
replace( output, offset, mEntities ),
- (mEntities ? FILTER_XML : FILTER_PLAIN).filter()
+ mFilter
);
src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
package com.whitemagicsoftware.keenquotes.parser;
+import com.whitemagicsoftware.keenquotes.lex.FilterType;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.util.function.Function;
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
+import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML;
import static com.whitemagicsoftware.keenquotes.texts.TestResource.open;
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
@Test
public void test_Parse_UncurledQuotes1_CurlyQuotes() throws IOException {
- testCurler( createCurler( true ), "unambiguous-1-pass.txt" );
+ testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-1-pass.txt" );
}
@Test
public void test_Parse_UncurledQuotes2_CurlyQuotes() throws IOException {
- testCurler( createCurler( true ), "unambiguous-2-pass.txt" );
+ testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-2-pass.txt" );
}
@Disabled
@SuppressWarnings( {"unused", "JUnit3StyleTestMethodInJUnit4Class"} )
public void test_Parse_AmbiguousQuotes_PartiallyCurled() throws IOException {
- testCurler( createCurler( false ), "ambiguous-n-pass.txt" );
+ testCurler( createCurler( FILTER_PLAIN, false ), "ambiguous-n-pass.txt" );
}
@Test
public void test_Parse_UncurledQuotesXml_CurlyQuotes() throws IOException {
- testCurler( createCurler( true ), "xml.txt" );
+ testCurler( createCurler( FILTER_XML, true ), "xml.txt" );
}
@Test
public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException {
- testCurler( createCurler( true ), "i18n.txt" );
+ testCurler( createCurler( FILTER_PLAIN, true ), "i18n.txt" );
}
}
- final var curler = createCurler( true );
+ final var curler = createCurler( FILTER_XML, true );
System.out.println( curler.apply( sb.toString() ) );
}
}
- private Function<String, String> createCurler( final boolean entities ) {
- return new Curler( createContractions(), entities );
+ private Function<String, String> createCurler(
+ final FilterType filterType,
+ final boolean entities ) {
+ return new Curler( createContractions(), filterType, entities );
}
Delta49 lines added, 17 lines removed, 32-line increase