| 57 | 57 | |
| 58 | 58 | /** |
| 59 | * Creates a new instance with a copy of {@link LexemeType} to prevent |
|
| 60 | * subsequent mutations from affecting the resolution of ambiguous quotes. |
|
| 61 | * |
|
| 62 | * @return A semi-deep copy of this instance. |
|
| 63 | */ |
|
| 64 | public Lexeme copy() { |
|
| 65 | return new Lexeme( mType.copy(), mBegan, mEnded ); |
|
| 66 | } |
|
| 67 | ||
| 68 | /** |
|
| 59 | 69 | * Answers whether the given {@link LexemeType} is the same as this |
| 60 | 70 | * instance's internal {@link LexemeType}. |
| 61 | 61 | |
| 62 | 62 | /** |
| 63 | * Creates a new instance with a copy of {@link LexemeGlyph} to prevent |
|
| 64 | * mutations by calling {@link #with(LexemeGlyph)} from affecting ambiguous |
|
| 65 | * quotes resolution. |
|
| 66 | * |
|
| 67 | * @return A semi-deep copy of this instance. |
|
| 68 | */ |
|
| 69 | LexemeType copy() { |
|
| 70 | return new LexemeType( glyph() ); |
|
| 71 | } |
|
| 72 | ||
| 73 | /** |
|
| 63 | 74 | * Changes the type of glyph associated with this type of lexeme. This |
| 64 | 75 | * is useful for passing along different glyphs represented by the same |
| 57 | 57 | if( token.isType( QUOTE_OPENING_SINGLE ) || |
| 58 | 58 | token.isType( QUOTE_OPENING_DOUBLE ) ) { |
| 59 | mTree = mTree.opening( token ); |
|
| 59 | mTree = mTree.opening( token.copy() ); |
|
| 60 | 60 | } |
| 61 | 61 | // Close the subtree if it was open, try to close it. |
| 62 | 62 | else if( token.isType( QUOTE_CLOSING_SINGLE ) || |
| 63 | 63 | token.isType( QUOTE_CLOSING_DOUBLE ) ) { |
| 64 | mTree = mTree.closing( token ); |
|
| 64 | mTree = mTree.closing( token.copy() ); |
|
| 65 | 65 | } |
| 66 | 66 | else if( token.isType( QUOTE_AMBIGUOUS_DOUBLE ) ) { |
| 67 | 67 | // Create subtrees for: <" ... ">, <" ">, <"">, etc. |
| 68 | 68 | if( mTree.hasOpeningDoubleQuote() ) { |
| 69 | 69 | token.setTokenType( QUOTE_CLOSING_DOUBLE ); |
| 70 | mTree = mTree.closing( token ); |
|
| 70 | mTree = mTree.closing( token.copy() ); |
|
| 71 | 71 | } |
| 72 | 72 | else { |
| 73 | 73 | token.setTokenType( QUOTE_OPENING_DOUBLE ); |
| 74 | mTree = mTree.opening( token ); |
|
| 74 | mTree = mTree.opening( token.copy() ); |
|
| 75 | 75 | } |
| 76 | 76 | } |
| 77 | 77 | // Add ambiguous tokens to be resolved; add apostrophes for later emitting. |
| 78 | 78 | else { |
| 79 | mTree.add( token ); |
|
| 79 | mTree.add( token.copy() ); |
|
| 80 | 80 | } |
| 81 | 81 | } |
| 358 | 358 | emit( QUOTE_OPENING_DOUBLE, lex2 ); |
| 359 | 359 | } |
| 360 | else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) { |
|
| 361 | emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 ); |
|
| 362 | } |
|
| 363 | // International opening double quotation mark. |
|
| 360 | // International quotation marks. |
|
| 364 | 361 | else if( match( ANY, QUOTE_DOUBLE_OPENING, ANY, ANY ) ) { |
| 365 | 362 | emit( QUOTE_OPENING_DOUBLE, lex2 ); |
| 366 | 363 | } |
| 367 | // International opening single quotation mark. |
|
| 368 | 364 | else if( match( ANY, QUOTE_SINGLE_OPENING, ANY, ANY ) ) { |
| 369 | 365 | emit( QUOTE_OPENING_SINGLE, lex2 ); |
| 370 | 366 | } |
| 371 | // International double closing quotation mark. |
|
| 372 | else if( match( ANY, ANY, ANY, QUOTE_DOUBLE_CLOSING ) ) { |
|
| 373 | emit( QUOTE_CLOSING_DOUBLE, lex4 ); |
|
| 367 | else if( match( ANY, QUOTE_DOUBLE_CLOSING, ANY, ANY ) ) { |
|
| 368 | emit( QUOTE_CLOSING_DOUBLE, lex2 ); |
|
| 374 | 369 | } |
| 375 | // International single closing quotation mark. |
|
| 376 | else if( match( ANY, ANY, ANY, QUOTE_SINGLE_CLOSING ) ) { |
|
| 377 | emit( QUOTE_CLOSING_SINGLE, lex4 ); |
|
| 370 | else if( match( ANY, QUOTE_SINGLE_CLOSING, ANY, ANY ) ) { |
|
| 371 | emit( QUOTE_CLOSING_SINGLE, lex2 ); |
|
| 378 | 372 | } |
| 379 | 373 | // Ambiguous (no match) |
| 380 | 374 | else if( match( ANY, QUOTE_SINGLE, ANY, ANY ) ) { |
| 381 | 375 | emit( QUOTE_AMBIGUOUS_SINGLE, lex2 ); |
| 376 | } |
|
| 377 | else if( match( ANY, QUOTE_DOUBLE, ANY, ANY ) ) { |
|
| 378 | emit( QUOTE_AMBIGUOUS_DOUBLE, lex2 ); |
|
| 382 | 379 | } |
| 383 | 380 | } |
| 86 | 86 | |
| 87 | 87 | /** |
| 88 | * Creates a new instance with a copy of {@link Lexeme} to prevent |
|
| 89 | * subsequent mutations from affecting the resolution of ambiguous quotes. |
|
| 90 | * |
|
| 91 | * @return A semi-deep copy of this instance. |
|
| 92 | */ |
|
| 93 | Token copy() { |
|
| 94 | return new Token( mTokenType, mLexeme.copy() ); |
|
| 95 | } |
|
| 96 | ||
| 97 | /** |
|
| 88 | 98 | * Answers whether this {@link Token} appears before the given {@link Token} |
| 89 | 99 | * in the document. If they overlap, this will return {@code false}. |
| 1 | 1 | # ######################################################################## |
| 2 | # Mixed |
|
| 3 | # ######################################################################## |
|
| 4 | «What's ‹going› on?» ,,Hey there!" |
|
| 5 | «What's ‹going› on?» „Hey there!” |
|
| 6 | ||
| 7 | # ######################################################################## |
|
| 2 | 8 | # French |
| 3 | 9 | # ######################################################################## |