Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Add emitted open/close quote tallies, update test texts

AuthorDave Jarvis <email>
Date2021-06-06 22:24:31 GMT-0700
Commit72021e74fd62d4add95acfce9f30aad629fef18b
Parenta74d89e
lib/src/main/java/com/keenwrite/quotes/Contractions.java
public static boolean contractionBeganUnambiguously( final String word ) {
assert word != null;
-
return BEGAN_UNAMBIGUOUS.contains( word.toLowerCase() );
}
public static boolean contractionBeganAmbiguously( final String word ) {
assert word != null;
-
return BEGAN_AMBIGUOUS.contains( word.toLowerCase() );
}
public static boolean contractionEndedAmbiguously( final String word ) {
assert word != null;
-
final var check = word.toLowerCase();
return ENDED_AMBIGUOUS.contains( check ) || check.endsWith( "s" ) ||
check.endsWith( "n" ) || check.endsWith( "z" ) ||
check.endsWith( "x" ) || check.endsWith( "ch" );
}
public static boolean contractionEndedUnambiguously( final String word ) {
assert word != null;
-
return ENDED_UNAMBIGUOUS.contains( word.toLowerCase() );
}
lib/src/main/java/com/keenwrite/quotes/LexemeType.java
NUMBER,
PUNCT,
+ OPENING_GROUP,
+ CLOSING_GROUP,
HYPHEN,
PERIOD,
lib/src/main/java/com/keenwrite/quotes/Lexer.java
}
}
+ else if( curr == '(' || curr == '{' || curr == '[' ) {
+ lexeme = createLexeme( OPENING_GROUP, began, i.getIndex() );
+ }
+ else if( curr == ')' || curr == '}' || curr == ']' ) {
+ lexeme = createLexeme( CLOSING_GROUP, began, i.getIndex() );
+ }
else if( curr != DONE ) {
lexeme = createLexeme( PUNCT, began, i.getIndex() );
lib/src/main/java/com/keenwrite/quotes/Parser.java
*/
private static final LexemeType[] LEADING_QUOTE_OPENING_SINGLE =
- new LexemeType[]{SPACE, HYPHEN, QUOTE_DOUBLE};
+ new LexemeType[]{SPACE, HYPHEN, QUOTE_DOUBLE, OPENING_GROUP};
/**
*/
private static final LexemeType[] LAGGING_QUOTE_CLOSING_SINGLE =
- new LexemeType[]{SPACE, HYPHEN, QUOTE_DOUBLE, EOL};
+ new LexemeType[]{SPACE, HYPHEN, QUOTE_DOUBLE, CLOSING_GROUP, EOL};
/**
* Double quotes preceded by these {@link LexemeType}s may be opening quotes.
*/
private static final LexemeType[] LEADING_QUOTE_OPENING_DOUBLE =
- new LexemeType[]{SPACE, HYPHEN, QUOTE_SINGLE};
+ new LexemeType[]{SPACE, HYPHEN, QUOTE_SINGLE, OPENING_GROUP};
/**
*/
private static final LexemeType[] LAGGING_QUOTE_CLOSING_DOUBLE =
- new LexemeType[]{SPACE, HYPHEN, QUOTE_SINGLE, EOL};
+ new LexemeType[]{SPACE, HYPHEN, QUOTE_SINGLE, CLOSING_GROUP, EOL};
/**
private final CircularFifoQueue<Lexeme> mLexemes =
new CircularFifoQueue<>( 3 );
+
+ /**
+ * Incremented for each opening single quote emitted. Used to help resolve
+ * ambiguities when single quote marks are balanced.
+ */
+ private int mOpeningSingleQuote;
+
+ /**
+ * Incremented for each closing single quote emitted. Used to help resolve
+ * ambiguities when single quote marks are balanced.
+ */
+ private int mClosingSingleQuote;
public Parser( final String text ) {
else if( (lex1.isSot() || lex1.anyType( LEADING_QUOTE_OPENING_SINGLE ))
&& lex3.anyType( LAGGING_QUOTE_OPENING_SINGLE ) ) {
- resolvedLeadingQuotes++;
consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) );
i.remove();
+ resolvedLeadingQuotes++;
+ mOpeningSingleQuote++;
}
else if( lex1.anyType( LEADING_QUOTE_CLOSING_SINGLE ) &&
(lex3.isEot() || lex3.anyType( LAGGING_QUOTE_CLOSING_SINGLE )) ) {
- resolvedLaggingQuotes++;
consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
i.remove();
+ resolvedLaggingQuotes++;
+ mClosingSingleQuote++;
}
else if( lex3.isType( NUMBER ) ) {
System.out.println( "ambig leading: " + ambiguousLeadingQuotes.size() );
System.out.println( "ambig lagging: " + ambiguousLaggingQuotes.size() );
- System.out.println( "unambig leading: " + resolvedLeadingQuotes );
- System.out.println( "unambig lagging: " + resolvedLaggingQuotes );
+ System.out.println( "resolv leading: " + resolvedLeadingQuotes );
+ System.out.println( "resolv lagging: " + resolvedLaggingQuotes );
if( resolvedLeadingQuotes == 1 && resolvedLaggingQuotes == 0 ) {
if( ambiguousLeadingQuotes.size() == 0 && ambiguousLaggingQuotes.size() == 1 ) {
- consumer.accept(
- new Token( QUOTE_CLOSING_SINGLE, ambiguousLaggingQuotes.get( 0 ) )
- );
+ final var balanced = mClosingSingleQuote - mOpeningSingleQuote == 0;
+ final var quote = balanced ? QUOTE_APOSTROPHE : QUOTE_CLOSING_SINGLE;
+ consumer.accept( new Token( quote, ambiguousLaggingQuotes.get( 0 ) ) );
}
}
// E.g., \"
consumer.accept( new Token( QUOTE_STRAIGHT_DOUBLE, lex1 ) );
+
+ if( lex2.isType( QUOTE_SINGLE ) &&
+ (lex3.isEot() || lex3.anyType( SPACE, HYPHEN, EOL )) ) {
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex2 ) );
+ mClosingSingleQuote++;
+ }
}
else if( lex2.isType( QUOTE_DOUBLE ) &&
(lex1.isSot() || lex1.anyType( LEADING_QUOTE_OPENING_DOUBLE )) &&
lex3.anyType( LAGGING_QUOTE_OPENING_DOUBLE ) ) {
- // Examples: "'Twas, "", "...
+ // Examples: "", "..., "word, ---"word
consumer.accept( new Token( QUOTE_OPENING_DOUBLE, lex2 ) );
}
else if( lex2.isType( QUOTE_DOUBLE ) &&
lex1.anyType( LEADING_QUOTE_CLOSING_DOUBLE ) &&
(lex3.isEot() || lex3.anyType( LAGGING_QUOTE_CLOSING_DOUBLE )) ) {
+ // E.g., ..."', word"', ?"'
consumer.accept( new Token( QUOTE_CLOSING_DOUBLE, lex2 ) );
+ }
+ else if( lex1.isType( QUOTE_SINGLE ) &&
+ lex2.anyType( PUNCT, PERIOD ) && lex3.isType( QUOTE_DOUBLE ) ) {
+ // E.g., '," (contraction ruled out from previous conditionals)
+ consumer.accept( new Token( QUOTE_CLOSING_SINGLE, lex1 ) );
+ mClosingSingleQuote++;
}
else if( lex2.anyType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) {
lib/src/test/java/com/keenwrite/quotes/SmartQuotesTest.java
@Test
public void test_parse_SingleLine_Parsed() {
- out.println( SmartQuotes.replace( "Took place in '04, yes'm!"));
-
- out.println( "-------" );
- out.println( SmartQuotes.replace( "'Bout that time I says, 'Boys! I been " +
- "thinkin' 'bout th' Universe.'"));
-
- out.println( "-------" );
-
- out.println( SmartQuotes.replace( "\"John asked, 'What are you, beyond " +
- "\"somethin' shiny?\"'\" said Fred.\n" ) );
+ out.println( SmartQuotes.replace(
+ "'Bout that time I says, 'Boys! I been thinkin' 'bout th' Universe.'" )
+ );
}
lib/src/test/resources/com/keenwrite/quotes/smartypants.txt
&ldquo;I heard she said, &lsquo;That&apos;s Sam&apos;s&rsquo;,&rdquo; said the Sams&apos; cat.
-"'Janes said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
-&ldquo;&lsquo;Janes said, &lsquo;&apos;E&apos;ll be spooky, Sam&apos;s son with the jack-o&apos;-lantern!&rsquo;&rdquo; said the O&apos;Mally twins&apos;---y&apos;know---ghosts in unison.
+"'Jane said, ''E'll be spooky, Sam's son with the jack-o'-lantern!'" said the O'Mally twins'---y'know---ghosts in unison.
+&ldquo;&lsquo;Jane said, &lsquo;&apos;E&apos;ll be spooky, Sam&apos;s son with the jack-o&apos;-lantern!&rsquo;&rdquo; said the O&apos;Mally twins&apos;---y&apos;know---ghosts in unison.
'He's at Sams'
-&lsquo;He&apos; at Sams&rsquo;
+&lsquo;He&apos;s at Sams&rsquo;
ma'am
ma&apos;am
'Twas midnight
&apos;Twas midnight
"Hello," said the spider. "'Shelob' is my name."
&ldquo;Hello,&rdquo; said the spider. &ldquo;&lsquo;Shelob&rsquo; is my name.&rdquo;
-
-'A', 'B', and 'C' are letters.
-&lsquo;A&rsquo; &lsquo;B&rsquo; and &lsquo;C&rsquo; are letters.
-
-'Oak,' 'elm,'and 'beech' are names of trees. So is 'pine.'
-&lsquo;Oak,&rsquo; &lsquo;elm,&rsquo; and &lsquo;beech&rsquo; are names of trees. So is &lsquo;pine.&rsquo;
'He said, \"I want to go.\"' Were you alive in the 70's?
Workin' hard
Workin&apos; hard
-
-'He said, "Thinkin'."'
-&lsquo;He said, &ldquo;Thinkin&rsquo;.&rdquo;&rsquo;
"She said, 'Llamas'll languish, they'll--
iPad 3's battery life is not great.
iPad 3&apos;s battery life is not great.
+
+'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.'
+&lsquo;Oak,&rsquo; &lsquo;elm,&rsquo; and &lsquo;beech&rsquo; are names of trees. So is &lsquo;pine.&rsquo;
Book 'em, Danno. Rock 'n' roll. 'Cause 'twas the season.
Book &apos;em, Danno. Rock &apos;n&apos; roll. &apos;Cause &apos;twas the season.
+
+'He said, "Thinkin'."'
+&lsquo;He said, &ldquo;Thinkin&apos;.&rdquo;&rsquo;
+
+'A', 'B', and 'C' are letters.
+&lsquo;A&rsquo; &lsquo;B&rsquo; and &lsquo;C&rsquo; are letters.
Delta62 lines added, 38 lines removed, 24-line increase