| 16 | 16 | public static final LexemeType QUOTE_DOUBLE_OPENING = new LexemeType( LEX_DOUBLE_QUOTE_OPENING ); |
| 17 | 17 | public static final LexemeType QUOTE_DOUBLE_CLOSING = new LexemeType( LEX_DOUBLE_QUOTE_CLOSING ); |
| 18 | public static final LexemeType QUOTE_SINGLE_CHEVRON_OPENING = new LexemeType( LEX_DOUBLE_QUOTE_OPENING ); |
|
| 19 | public static final LexemeType QUOTE_SINGLE_CHEVRON_CLOSING = new LexemeType( LEX_DOUBLE_QUOTE_CLOSING ); |
|
| 20 | public static final LexemeType QUOTE_DOUBLE_CHEVRON_OPENING = new LexemeType( LEX_SINGLE_QUOTE_OPENING ); |
|
| 21 | public static final LexemeType QUOTE_DOUBLE_CHEVRON_CLOSING = new LexemeType( LEX_SINGLE_QUOTE_CLOSING ); |
|
| 18 | 22 | public static final LexemeType ESC_SINGLE = new LexemeType(); |
| 19 | 23 | public static final LexemeType ESC_DOUBLE = new LexemeType(); |
| 164 | 164 | token = QUOTE_SINGLE_CLOSING.with( LEX_SINGLE_QUOTE_CLOSING ); |
| 165 | 165 | } |
| 166 | else if( LEX_SINGLE_CHEVRON_LEFT.equals( curr ) ) { |
|
| 167 | token = QUOTE_SINGLE_CHEVRON_OPENING.with( LEX_SINGLE_CHEVRON_LEFT ); |
|
| 168 | } |
|
| 169 | else if( LEX_DOUBLE_CHEVRON_LEFT.equals( curr ) ) { |
|
| 170 | token = QUOTE_DOUBLE_CHEVRON_OPENING.with( LEX_DOUBLE_CHEVRON_LEFT ); |
|
| 171 | } |
|
| 172 | else if( LEX_SINGLE_CHEVRON_RIGHT.equals( curr ) ) { |
|
| 173 | token = QUOTE_SINGLE_CHEVRON_CLOSING.with( LEX_SINGLE_CHEVRON_RIGHT ); |
|
| 174 | } |
|
| 175 | else if( LEX_DOUBLE_CHEVRON_RIGHT.equals( curr ) ) { |
|
| 176 | token = QUOTE_DOUBLE_CHEVRON_CLOSING.with( LEX_DOUBLE_CHEVRON_RIGHT ); |
|
| 177 | } |
|
| 166 | 178 | else if( curr == '\\' ) { |
| 167 | 179 | final var next = i.advance(); |
| 395 | 395 | emit( QUOTE_CLOSING_SINGLE, lex2 ); |
| 396 | 396 | } |
| 397 | else if( match( ANY, QUOTE_SINGLE_CHEVRON_OPENING, ANY, ANY ) ) { |
|
| 398 | emit( QUOTE_CHEVRON_SINGLE_OPENING, lex2 ); |
|
| 399 | } |
|
| 400 | else if( match( ANY, QUOTE_SINGLE_CHEVRON_CLOSING, ANY, ANY ) ) { |
|
| 401 | emit( QUOTE_CHEVRON_SINGLE_CLOSING, lex2 ); |
|
| 402 | } |
|
| 403 | else if( match( ANY, QUOTE_DOUBLE_CHEVRON_OPENING, ANY, ANY ) ) { |
|
| 404 | emit( QUOTE_CHEVRON_DOUBLE_OPENING, lex2 ); |
|
| 405 | } |
|
| 406 | else if( match( ANY, QUOTE_DOUBLE_CHEVRON_CLOSING, ANY, ANY ) ) { |
|
| 407 | emit( QUOTE_CHEVRON_DOUBLE_CLOSING, lex2 ); |
|
| 408 | } |
|
| 397 | 409 | // Ambiguous (no match) |
| 398 | 410 | else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) { |
| 11 | 11 | QUOTE_CLOSING_DOUBLE( "closing-double" ), |
| 12 | 12 | QUOTE_APOSTROPHE( "apostrophe" ), |
| 13 | QUOTE_CHEVRON_SINGLE_OPENING( "chevron-single-opening" ), |
|
| 14 | QUOTE_CHEVRON_SINGLE_CLOSING( "chevron-single-closing" ), |
|
| 15 | QUOTE_CHEVRON_DOUBLE_OPENING( "chevron-double-opening" ), |
|
| 16 | QUOTE_CHEVRON_DOUBLE_CLOSING( "chevron-double-closing" ), |
|
| 13 | 17 | QUOTE_STRAIGHT_SINGLE, |
| 14 | 18 | QUOTE_STRAIGHT_DOUBLE, |
| 72 | 72 | void test_Lexing_Quotes_EmitQuotes() { |
| 73 | 73 | testType( "‘", QUOTE_SINGLE_OPENING ); |
| 74 | testType( "‹", QUOTE_SINGLE_OPENING ); |
|
| 74 | testType( "‹", QUOTE_SINGLE_CHEVRON_OPENING ); |
|
| 75 | 75 | testType( "’", QUOTE_SINGLE_CLOSING ); |
| 76 | testType( "›", QUOTE_SINGLE_CLOSING ); |
|
| 76 | testType( "›", QUOTE_SINGLE_CHEVRON_CLOSING ); |
|
| 77 | 77 | testType( "'", QUOTE_SINGLE ); |
| 78 | 78 | |
| 79 | 79 | testType( "“", QUOTE_DOUBLE_OPENING ); |
| 80 | 80 | testType( "„", QUOTE_DOUBLE_OPENING ); |
| 81 | testType( "«", QUOTE_DOUBLE_OPENING ); |
|
| 81 | testType( "«", QUOTE_DOUBLE_CHEVRON_OPENING ); |
|
| 82 | 82 | testType( ",,", QUOTE_DOUBLE_OPENING ); |
| 83 | 83 | testType( "\"", QUOTE_DOUBLE ); |
| 84 | 84 | |
| 85 | 85 | testType( "”", QUOTE_DOUBLE_CLOSING ); |
| 86 | testType( "»", QUOTE_DOUBLE_CLOSING ); |
|
| 86 | testType( "»", QUOTE_DOUBLE_CHEVRON_CLOSING ); |
|
| 87 | 87 | |
| 88 | 88 | testType( "3 o'clock", NUMBER, SPACE, WORD, QUOTE_SINGLE, WORD ); |
| 63 | 63 | |
| 64 | 64 | @Test |
| 65 | public void test_Parse_UncurledQuotesXml_Regular() throws IOException { |
|
| 66 | testCurler( |
|
| 67 | createCurler( FILTER_XML, CONVERT_REGULAR ), "xml-regular.txt" |
|
| 68 | ); |
|
| 69 | } |
|
| 70 | ||
| 71 | @Test |
|
| 65 | 72 | public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException { |
| 66 | 73 | testCurler( |
| 14 | 14 | Dit il, ‹Ce n'est pas la mer à boire?› |
| 15 | 15 | |
| 16 | style “double courbure” (« » ou “ ”) |
|
| 17 | style “double courbure” (« » ou “ ”) |
|
| 18 | ||
| 16 | 19 | # ######################################################################## |
| 17 | 20 | # Dutch |
| 1 | # ######################################################################## |
|
| 2 | # Regular conversion |
|
| 3 | # ######################################################################## |
|
| 4 | ||
| 5 | <p>style “double courbure” (« » ou “ ”)</p> |
|
| 6 | <p>style “double courbure” (« » ou “ ”)</p> |
|
| 1 | 7 |
| 23 | 23 | <style>@font-face { font-family: 'Literata Book'; src: url('/fonts/LiterataBook.otf') format('opentype'); }</style> |
| 24 | 24 | |
| 25 | <p>style “double courbure” (« » ou “ ”)</p> |
|
| 26 | <p>style “double courbure” (« » ou “ ”)</p> |
|
| 27 |