Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git
M src/main/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolver.java
6 6
import java.util.ArrayList;
7 7
import java.util.Collections;
8
import java.util.List;
8 9
import java.util.function.Consumer;
9 10
...
88 89
89 90
    Collections.sort( tokens );
91
92
    // All laggards appearing before the first leader are apostrophes.
93
    resolve( tokens );
94
95
    // Replacing laggards may have made leaders resolvable.
96
    mTree.visit( this::disambiguate );
90 97
91 98
    // Relay the tokens, in order, for updating the parsed document.
92 99
    tokens.forEach( mConsumer );
100
  }
101
102
  /**
103
   * Converts all laggards into apostrophes up until the first leader is found.
104
   *
105
   * @param tokens The list of sorted {@link Token}s to convert.
106
   */
107
  private void resolve( final List<Token> tokens ) {
108
    assert tokens != null;
109
110
    for( final var token : tokens ) {
111
      if( token.isType( QUOTE_AMBIGUOUS_LEADING ) ) {
112
        // Once a leader quote is found, any laggard could be a closing quote.
113
        break;
114
      }
115
      else if( token.isType( QUOTE_AMBIGUOUS_LAGGING ) ) {
116
        token.setTokenType( QUOTE_APOSTROPHE );
117
      }
118
    }
93 119
  }
94 120
M src/main/java/com/whitemagicsoftware/keenquotes/parser/Contractions.java
394 394
    // what's up|to eat
395 395
    "sup",
396
    // cannot (tan't)|melanin
396
    // cannot (tan't)|colour
397 397
    "tan",
398 398
    // still|turn soil
M src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java
62 62
   * @param parserType Creates a parser based on document content structure.
63 63
   */
64
  public Curler(
65
    final Contractions c,
66
    final FilterType parserType
67
  ) {
64
  public Curler( final Contractions c, final FilterType parserType ) {
68 65
    this( c, ENTITIES, parserType );
69 66
  }
M src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
3 3
4 4
import com.whitemagicsoftware.keenquotes.lex.FilterType;
5
import org.junit.jupiter.api.Disabled;
5 6
import org.junit.jupiter.api.Test;
6 7
import org.junit.jupiter.params.ParameterizedTest;
...
46 47
   */
47 48
  @ParameterizedTest
48
  @ValueSource( strings = {"habberton"} )
49
  //@Disabled
49
  @ValueSource( strings = {"autonoma"} )
50
  @Disabled
50 51
  void test_Parse_Story_Converted( final String filename ) throws IOException {
51 52
    final var sb = new StringBuilder( 2 ^ 20 );
52 53
53
    try( final var reader = open( filename + ".txt" ) ) {
54
    try( final var reader = open( filename + ".html" ) ) {
54 55
      String line;
55 56
56 57
      while( (line = reader.readLine()) != null ) {
57 58
        sb.append( line ).append( SEP );
58 59
      }
59 60
    }
60 61
61
    final var converter = createCurler( FILTER_PLAIN );
62
    System.out.println( converter.apply( sb.toString() ) );
62
    final var curler = createCurler( FILTER_XML );
63
    System.out.println( curler.apply( sb.toString() ) );
63 64
  }
64 65
M src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-2-pass.txt
32 32
# Mixed
33 33
# ########################################################################
34
#"Granpa Harry 'spects you, Miss Mayton."
35
#&ldquo;Granpa Harry &apos;spects you, Miss Mayton.&rdquo;
34
"Granpa Harry 'spects you, Miss Mayton."
35
&ldquo;Granpa Harry &apos;spects you, Miss Mayton.&rdquo;
36 36
37 37
"She said, 'That's Sam's'," said the Sams' cat.
M src/test/resources/com/whitemagicsoftware/keenquotes/texts/xml.txt
14 14
<a href="https://x.org" title="X's Homepage">X11&apos;s bomb</a>
15 15
16
<body><p>Test lungs' capacity; shovel's handle hit his head's electronics.</p><p>Hoped to bring him 'round.</p></body>
17
<body><p>Test lungs&apos; capacity; shovel&apos;s handle hit his head&apos;s electronics.</p><p>Hoped to bring him &apos;round.</p></body>
18