Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Add and fix test for mixed I18L quotation marks

AuthorDave Jarvis <email>
Date2022-10-08 10:30:24 GMT-0700
Commit1d9ef4b5dbd9669f43e635a0d0d5c9130b38d323
Parentcb43e01
src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexeme.java
/**
+ * Creates a new instance with a copy of {@link LexemeType} to prevent
+ * subsequent mutations from affecting the resolution of ambiguous quotes.
+ *
+ * @return A semi-deep copy of this instance.
+ */
+ public Lexeme copy() {
+ return new Lexeme( mType.copy(), mBegan, mEnded );
+ }
+
+ /**
* Answers whether the given {@link LexemeType} is the same as this
* instance's internal {@link LexemeType}.
src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeType.java
/**
+ * Creates a new instance with a copy of {@link LexemeGlyph} to prevent
+ * mutations by calling {@link #with(LexemeGlyph)} from affecting ambiguous
+ * quotes resolution.
+ *
+ * @return A semi-deep copy of this instance.
+ */
+ LexemeType copy() {
+ return new LexemeType( glyph() );
+ }
+
+ /**
* Changes the type of glyph associated with this type of lexeme. This
* is useful for passing along different glyphs represented by the same
src/main/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolver.java
if( token.isType( QUOTE_OPENING_SINGLE ) ||
token.isType( QUOTE_OPENING_DOUBLE ) ) {
- mTree = mTree.opening( token );
+ mTree = mTree.opening( token.copy() );
}
// Close the subtree if it was open, try to close it.
else if( token.isType( QUOTE_CLOSING_SINGLE ) ||
token.isType( QUOTE_CLOSING_DOUBLE ) ) {
- mTree = mTree.closing( token );
+ mTree = mTree.closing( token.copy() );
}
else if( token.isType( QUOTE_AMBIGUOUS_DOUBLE ) ) {
// Create subtrees for: <" ... ">, <" ">, <"">, etc.
if( mTree.hasOpeningDoubleQuote() ) {
token.setTokenType( QUOTE_CLOSING_DOUBLE );
- mTree = mTree.closing( token );
+ mTree = mTree.closing( token.copy() );
}
else {
token.setTokenType( QUOTE_OPENING_DOUBLE );
- mTree = mTree.opening( token );
+ mTree = mTree.opening( token.copy() );
}
}
// Add ambiguous tokens to be resolved; add apostrophes for later emitting.
else {
- mTree.add( token );
+ mTree.add( token.copy() );
}
}
src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java
emit( QUOTE_OPENING_DOUBLE, lex2 );
}
- else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) {
- emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 );
- }
- // International opening double quotation mark.
+ // International quotation marks.
else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) {
emit( QUOTE_OPENING_DOUBLE, lex2 );
}
- // International opening single quotation mark.
else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) {
emit( QUOTE_OPENING_SINGLE, lex2 );
}
- // International double closing quotation mark.
- else if( match( ANY, ANY, ANY, QUOTE_DOUBLE_CLOSING ) ) {
- emit( QUOTE_CLOSING_DOUBLE, lex4 );
+ else if( match( ANY, QUOTE_DOUBLE_CLOSING, ANY, ANY ) ) {
+ emit( QUOTE_CLOSING_DOUBLE, lex2 );
}
- // International single closing quotation mark.
- else if( match( ANY, ANY, ANY, QUOTE_SINGLE_CLOSING ) ) {
- emit( QUOTE_CLOSING_SINGLE, lex4 );
+ else if( match( ANY, QUOTE_SINGLE_CLOSING, ANY, ANY ) ) {
+ emit( QUOTE_CLOSING_SINGLE, lex2 );
}
// Ambiguous (no match)
else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) {
emit( QUOTE_AMBIGUOUS_SINGLE, lex2 );
+ }
+ else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) {
+ emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 );
}
}
src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java
/**
+ * Creates a new instance with a copy of {@link Lexeme} to prevent
+ * subsequent mutations from affecting the resolution of ambiguous quotes.
+ *
+ * @return A semi-deep copy of this instance.
+ */
+ Token copy() {
+ return new Token( mTokenType, mLexeme.copy() );
+ }
+
+ /**
* Answers whether this {@link Token} appears before the given {@link Token}
* in the document. If they overlap, this will return {@code false}.
src/test/resources/com/whitemagicsoftware/keenquotes/texts/i18n.txt
# ########################################################################
+# Mixed
+# ########################################################################
+«What's ‹going› on?» ,,Hey there!"
+&laquo;What&apos;s &lsaquo;going&rsaquo; on?&raquo; &#8222;Hey there!&rdquo;
+
+# ########################################################################
# French
# ########################################################################
Delta50 lines added, 16 lines removed, 34-line increase