Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git

Fixes French double and single quotes for regular parsing mode

AuthorDaveJarvis <email>
Date2025-09-02 00:20:41 GMT-0700
Commit06d1a5b3a571105e87c308f91049aa902c8d472a
Parentf00b99c
src/main/java/com/whitemagicsoftware/keenquotes/lex/LexemeType.java
public static final LexemeType QUOTE_DOUBLE_OPENING = new LexemeType( LEX_DOUBLE_QUOTE_OPENING );
public static final LexemeType QUOTE_DOUBLE_CLOSING = new LexemeType( LEX_DOUBLE_QUOTE_CLOSING );
+ public static final LexemeType QUOTE_SINGLE_CHEVRON_OPENING = new LexemeType( LEX_DOUBLE_QUOTE_OPENING );
+ public static final LexemeType QUOTE_SINGLE_CHEVRON_CLOSING = new LexemeType( LEX_DOUBLE_QUOTE_CLOSING );
+ public static final LexemeType QUOTE_DOUBLE_CHEVRON_OPENING = new LexemeType( LEX_SINGLE_QUOTE_OPENING );
+ public static final LexemeType QUOTE_DOUBLE_CHEVRON_CLOSING = new LexemeType( LEX_SINGLE_QUOTE_CLOSING );
public static final LexemeType ESC_SINGLE = new LexemeType();
public static final LexemeType ESC_DOUBLE = new LexemeType();
src/main/java/com/whitemagicsoftware/keenquotes/lex/Lexer.java
token = QUOTE_SINGLE_CLOSING.with( LEX_SINGLE_QUOTE_CLOSING );
}
+ else if( LEX_SINGLE_CHEVRON_LEFT.equals( curr ) ) {
+ token = QUOTE_SINGLE_CHEVRON_OPENING.with( LEX_SINGLE_CHEVRON_LEFT );
+ }
+ else if( LEX_DOUBLE_CHEVRON_LEFT.equals( curr ) ) {
+ token = QUOTE_DOUBLE_CHEVRON_OPENING.with( LEX_DOUBLE_CHEVRON_LEFT );
+ }
+ else if( LEX_SINGLE_CHEVRON_RIGHT.equals( curr ) ) {
+ token = QUOTE_SINGLE_CHEVRON_CLOSING.with( LEX_SINGLE_CHEVRON_RIGHT );
+ }
+ else if( LEX_DOUBLE_CHEVRON_RIGHT.equals( curr ) ) {
+ token = QUOTE_DOUBLE_CHEVRON_CLOSING.with( LEX_DOUBLE_CHEVRON_RIGHT );
+ }
else if( curr == '\\' ) {
final var next = i.advance();
src/main/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitter.java
emit( QUOTE_CLOSING_SINGLE, lex2 );
}
+ else if( match( ANY, QUOTE_SINGLE_CHEVRON_OPENING, ANY, ANY ) ) {
+ emit( QUOTE_CHEVRON_SINGLE_OPENING, lex2 );
+ }
+ else if( match( ANY, QUOTE_SINGLE_CHEVRON_CLOSING, ANY, ANY ) ) {
+ emit( QUOTE_CHEVRON_SINGLE_CLOSING, lex2 );
+ }
+ else if( match( ANY, QUOTE_DOUBLE_CHEVRON_OPENING, ANY, ANY ) ) {
+ emit( QUOTE_CHEVRON_DOUBLE_OPENING, lex2 );
+ }
+ else if( match( ANY, QUOTE_DOUBLE_CHEVRON_CLOSING, ANY, ANY ) ) {
+ emit( QUOTE_CHEVRON_DOUBLE_CLOSING, lex2 );
+ }
// Ambiguous (no match)
else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) {
src/main/java/com/whitemagicsoftware/keenquotes/parser/TokenType.java
QUOTE_CLOSING_DOUBLE( "closing-double" ),
QUOTE_APOSTROPHE( "apostrophe" ),
+ QUOTE_CHEVRON_SINGLE_OPENING( "chevron-single-opening" ),
+ QUOTE_CHEVRON_SINGLE_CLOSING( "chevron-single-closing" ),
+ QUOTE_CHEVRON_DOUBLE_OPENING( "chevron-double-opening" ),
+ QUOTE_CHEVRON_DOUBLE_CLOSING( "chevron-double-closing" ),
QUOTE_STRAIGHT_SINGLE,
QUOTE_STRAIGHT_DOUBLE,
src/test/java/com/whitemagicsoftware/keenquotes/lex/LexerTest.java
void test_Lexing_Quotes_EmitQuotes() {
testType( "‘", QUOTE_SINGLE_OPENING );
- testType( "‹", QUOTE_SINGLE_OPENING );
+ testType( "‹", QUOTE_SINGLE_CHEVRON_OPENING );
testType( "’", QUOTE_SINGLE_CLOSING );
- testType( "›", QUOTE_SINGLE_CLOSING );
+ testType( "›", QUOTE_SINGLE_CHEVRON_CLOSING );
testType( "'", QUOTE_SINGLE );
testType( "“", QUOTE_DOUBLE_OPENING );
testType( "„", QUOTE_DOUBLE_OPENING );
- testType( "«", QUOTE_DOUBLE_OPENING );
+ testType( "«", QUOTE_DOUBLE_CHEVRON_OPENING );
testType( ",,", QUOTE_DOUBLE_OPENING );
testType( "\"", QUOTE_DOUBLE );
testType( "”", QUOTE_DOUBLE_CLOSING );
- testType( "»", QUOTE_DOUBLE_CLOSING );
+ testType( "»", QUOTE_DOUBLE_CHEVRON_CLOSING );
testType( "3 o'clock", NUMBER, SPACE, WORD, QUOTE_SINGLE, WORD );
src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
@Test
+ public void test_Parse_UncurledQuotesXml_Regular() throws IOException {
+ testCurler(
+ createCurler( FILTER_XML, CONVERT_REGULAR ), "xml-regular.txt"
+ );
+ }
+
+ @Test
public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException {
testCurler(
src/test/resources/com/whitemagicsoftware/keenquotes/texts/xml-regular.txt
+# ########################################################################
+# Regular conversion
+# ########################################################################
+
+<p>style “double courbure” (« » ou “ ”)</p>
+<p>style “double courbure” (« » ou “ ”)</p>
Delta49 lines added, 4 lines removed, 45-line increase