| 6 | 6 | import java.util.ArrayList; |
| 7 | 7 | import java.util.Collections; |
| 8 | import java.util.List; |
|
| 8 | 9 | import java.util.function.Consumer; |
| 9 | 10 | |
| ... | ||
| 88 | 89 | |
| 89 | 90 | Collections.sort( tokens ); |
| 91 | ||
| 92 | // All laggards appearing before the first leader are apostrophes. |
|
| 93 | resolve( tokens ); |
|
| 94 | ||
| 95 | // Replacing laggards may have made leaders resolvable. |
|
| 96 | mTree.visit( this::disambiguate ); |
|
| 90 | 97 | |
| 91 | 98 | // Relay the tokens, in order, for updating the parsed document. |
| 92 | 99 | tokens.forEach( mConsumer ); |
| 100 | } |
|
| 101 | ||
| 102 | /** |
|
| 103 | * Converts all laggards into apostrophes up until the first leader is found. |
|
| 104 | * |
|
| 105 | * @param tokens The list of sorted {@link Token}s to convert. |
|
| 106 | */ |
|
| 107 | private void resolve( final List<Token> tokens ) { |
|
| 108 | assert tokens != null; |
|
| 109 | ||
| 110 | for( final var token : tokens ) { |
|
| 111 | if( token.isType( QUOTE_AMBIGUOUS_LEADING ) ) { |
|
| 112 | // Once a leader quote is found, any laggard could be a closing quote. |
|
| 113 | break; |
|
| 114 | } |
|
| 115 | else if( token.isType( QUOTE_AMBIGUOUS_LAGGING ) ) { |
|
| 116 | token.setTokenType( QUOTE_APOSTROPHE ); |
|
| 117 | } |
|
| 118 | } |
|
| 93 | 119 | } |
| 94 | 120 | |
| 394 | 394 | // what's up|to eat |
| 395 | 395 | "sup", |
| 396 | // cannot (tan't)|melanin |
|
| 396 | // cannot (tan't)|colour |
|
| 397 | 397 | "tan", |
| 398 | 398 | // still|turn soil |
| 62 | 62 | * @param parserType Creates a parser based on document content structure. |
| 63 | 63 | */ |
| 64 | public Curler( |
|
| 65 | final Contractions c, |
|
| 66 | final FilterType parserType |
|
| 67 | ) { |
|
| 64 | public Curler( final Contractions c, final FilterType parserType ) { |
|
| 68 | 65 | this( c, ENTITIES, parserType ); |
| 69 | 66 | } |
| 3 | 3 | |
| 4 | 4 | import com.whitemagicsoftware.keenquotes.lex.FilterType; |
| 5 | import org.junit.jupiter.api.Disabled; |
|
| 5 | 6 | import org.junit.jupiter.api.Test; |
| 6 | 7 | import org.junit.jupiter.params.ParameterizedTest; |
| ... | ||
| 46 | 47 | */ |
| 47 | 48 | @ParameterizedTest |
| 48 | @ValueSource( strings = {"habberton"} ) |
|
| 49 | //@Disabled |
|
| 49 | @ValueSource( strings = {"autonoma"} ) |
|
| 50 | @Disabled |
|
| 50 | 51 | void test_Parse_Story_Converted( final String filename ) throws IOException { |
| 51 | 52 | final var sb = new StringBuilder( 2 ^ 20 ); |
| 52 | 53 | |
| 53 | try( final var reader = open( filename + ".txt" ) ) { |
|
| 54 | try( final var reader = open( filename + ".html" ) ) { |
|
| 54 | 55 | String line; |
| 55 | 56 | |
| 56 | 57 | while( (line = reader.readLine()) != null ) { |
| 57 | 58 | sb.append( line ).append( SEP ); |
| 58 | 59 | } |
| 59 | 60 | } |
| 60 | 61 | |
| 61 | final var converter = createCurler( FILTER_PLAIN ); |
|
| 62 | System.out.println( converter.apply( sb.toString() ) ); |
|
| 62 | final var curler = createCurler( FILTER_XML ); |
|
| 63 | System.out.println( curler.apply( sb.toString() ) ); |
|
| 63 | 64 | } |
| 64 | 65 | |
| 32 | 32 | # Mixed |
| 33 | 33 | # ######################################################################## |
| 34 | #"Granpa Harry 'spects you, Miss Mayton." |
|
| 35 | #“Granpa Harry 'spects you, Miss Mayton.” |
|
| 34 | "Granpa Harry 'spects you, Miss Mayton." |
|
| 35 | “Granpa Harry 'spects you, Miss Mayton.” |
|
| 36 | 36 | |
| 37 | 37 | "She said, 'That's Sam's'," said the Sams' cat. |
| 14 | 14 | <a href="https://x.org" title="X's Homepage">X11's bomb</a> |
| 15 | 15 | |
| 16 | <body><p>Test lungs' capacity; shovel's handle hit his head's electronics.</p><p>Hoped to bring him 'round.</p></body> |
|
| 17 | <body><p>Test lungs' capacity; shovel's handle hit his head's electronics.</p><p>Hoped to bring him 'round.</p></body> |
|
| 18 |