Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenquotes.git
M build.gradle
81 81
}
82 82
83
tasks.withType( JavaCompile ).configureEach {
84
  options.compilerArgs += "--enable-preview"
85
}
86
87
tasks.withType( JavaExec ).configureEach {
88
  jvmArgs += "--enable-preview"
89
}
90
91
tasks.withType( Test ).configureEach {
92
  jvmArgs += "--enable-preview"
93
}
94
M src/main/java/com/whitemagicsoftware/keenquotes/app/KeenQuotes.java
1
/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
1
/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
2
 *
3
 * SPDX-License-Identifier: BSD-2-Clause
4
 */
2 5
package com.whitemagicsoftware.keenquotes.app;
3 6
...
49 52
          contractions,
50 53
          settings.filterXml() ? FILTER_XML : FILTER_PLAIN,
51
          settings.entities()
54
          settings.apostrophe()
52 55
        );
53 56
M src/main/java/com/whitemagicsoftware/keenquotes/app/Settings.java
1
/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
1
/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
2
 *
3
 * SPDX-License-Identifier: BSD-2-Clause
4
 */
2 5
package com.whitemagicsoftware.keenquotes.app;
3 6
4 7
import com.whitemagicsoftware.keenquotes.lex.XmlFilter;
8
import com.whitemagicsoftware.keenquotes.parser.Apostrophe;
5 9
import picocli.CommandLine;
6 10
7 11
import java.util.List;
8 12
import java.util.concurrent.Callable;
9 13
14
import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.fromType;
10 15
import static java.util.Arrays.asList;
11 16
import static java.util.Collections.emptyList;
...
77 82
78 83
  /**
79
   * Encode quotation marks using HTML entities.
84
   * Set the type of entity to use when encoding apostrophes.
80 85
   */
81 86
  @CommandLine.Option(
82
    names = {"-e", "--entities"},
83
    description = "Encode quotation marks using HTML entities"
87
    names = {"-a", "--apostrophe"},
88
    description = "Converted apostrophe entity (regular, modifier, hex, entity)"
84 89
  )
85
  private boolean mEntities;
90
  private String mApostrophe;
86 91
87 92
  /**
...
118 123
   * @return {@code true} to encode quotation marks using HTML entities.
119 124
   */
120
  boolean entities() { return mEntities; }
125
  Apostrophe apostrophe() { return fromType( mApostrophe ); }
121 126
122 127
  List<String> getBeganUnambiguous() {
A src/main/java/com/whitemagicsoftware/keenquotes/parser/Apostrophe.java
1
/* Copyright 2024 White Magic Software, Ltd. -- All rights reserved.
2
 *
3
 * SPDX-License-Identifier: BSD-2-Clause
4
 */
5
package com.whitemagicsoftware.keenquotes.parser;
6
7
/**
8
 * When converting quotation marks, these values are used to indicate what
9
 * type of entity to use in the conversion.
10
 */
11
public enum Apostrophe {
12
  /**
13
   * No conversion is performed.
14
   */
15
  CONVERT_REGULAR( "'", "regular" ),
16
  /**
17
   * Apostrophes become MODIFIER LETTER APOSTROPHE ({@code &#x2bc;}).
18
   */
19
  CONVERT_MODIFIER( "&#x2bc;", "modifier" ),
20
  /**
21
   * Apostrophes become APOSTROPHE ({@code &#x27;}).
22
   */
23
  CONVERT_APOS_HEX( "&#x27;", "hex" ),
24
  /**
25
   * Apostrophes become XML APOSTROPHE ({@code &apos;}).
26
   */
27
  CONVERT_APOS_ENTITY( "&apos;", "entity" );
28
29
  private final String mCode;
30
  private final String mType;
31
32
  Apostrophe( final String code, final String type ) {
33
    mCode = code;
34
    mType = type;
35
  }
36
37
  public boolean isType( final String type ) {
38
    return mType.equalsIgnoreCase( type );
39
  }
40
41
  public String toString() {
42
    return mCode;
43
  }
44
45
  /**
46
   * Returns the instance that matches the given type.
47
   *
48
   * @param type The type of apostrophe entity conversion to use.
49
   * @return The {@link Apostrophe} to use when converting entities.
50
   */
51
  public static Apostrophe fromType( final String type ) {
52
    for( final var apostrophe : Apostrophe.values() ) {
53
      if( apostrophe.isType( type ) ) {
54
        return apostrophe;
55
      }
56
    }
57
58
    return Apostrophe.CONVERT_REGULAR;
59
  }
60
}
1 61
M src/main/java/com/whitemagicsoftware/keenquotes/parser/Contractions.java
1
/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
1
/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
2
 *
3
 * SPDX-License-Identifier: BSD-2-Clause
4
 */
2 5
package com.whitemagicsoftware.keenquotes.parser;
3 6
...
14 17
 * Placeholder for various types of contractions.
15 18
 */
16
@SuppressWarnings( { "SpellCheckingInspection", "GrazieInspection" } )
19
@SuppressWarnings( "SpellCheckingInspection" )
17 20
public class Contractions {
18 21
...
142 145
    // Ensure that 'n' isn't matched for ambiguity by enforcing length, yet
143 146
    // allow o' to match because 'a sentence can end with the letter o'.
144
    return getEndedAmbiguous().contains( check ) ||
145
      check.endsWith( "s" ) || check.endsWith( "z" ) ||
146
      check.endsWith( "x" ) || (check.length() > 1 && check.endsWith( "n" ));
147
    return
148
      getEndedAmbiguous().contains( check ) ||
149
      check.endsWith( "s" ) ||
150
      check.endsWith( "z" ) ||
151
      check.endsWith( "x" ) ||
152
      (check.length() > 1 && check.endsWith( "n" ));
147 153
  }
148 154
...
171 177
    return
172 178
      toString( getBeganEndedUnambiguous(), "Unambiguous Began/Ended", "'%s" ) +
173
        toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
174
        toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) +
175
        toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
176
        toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" );
179
      toString( getBeganUnambiguous(), "Unambiguous Began", "'%s" ) +
180
      toString( getEndedUnambiguous(), "Unambiguous Ended", "%s'" ) +
181
      toString( getBeganAmbiguous(), "Ambiguous Began", "'%s" ) +
182
      toString( getEndedAmbiguous(), "Ambiguous Ended", "%s'" );
177 183
  }
178 184
M src/main/java/com/whitemagicsoftware/keenquotes/parser/Curler.java
19 19
  private final Contractions mContractions;
20 20
  private final LexerFilter mFilter;
21
  private final boolean mEntities;
21
  private final Apostrophe mApostrophe;
22 22
23 23
  /**
24 24
   * Maps quotes to curled character equivalents.
25 25
   *
26
   * @param c        Contractions listings.
27
   * @param entities {@code true} to convert quotation marks to HTML entities.
26
   * @param c      Contractions listings.
27
   * @param apostrophe How to convert quotation marks to HTML entities.
28 28
   */
29 29
  public Curler(
30 30
    final Contractions c,
31 31
    final FilterType filterType,
32
    final boolean entities
32
    final Apostrophe apostrophe
33 33
  ) {
34 34
    assert c != null;
35 35
36 36
    mContractions = c;
37
    mEntities = entities;
38 37
    mFilter = filterType.filter();
38
    mApostrophe = apostrophe;
39 39
  }
40 40
...
56 56
      text,
57 57
      mContractions,
58
      replace( output, offset, mEntities ),
58
      replace( output, offset, mApostrophe ),
59 59
      mFilter
60 60
    );
61 61
62 62
    return output.toString();
63 63
  }
64 64
65 65
  /**
66 66
   * Replaces non-ambiguous tokens with their equivalent string representation.
67 67
   *
68
   * @param output   Continuously updated result document.
69
   * @param offset   Accumulating index where {@link Token} is replaced.
70
   * @param entities {@code true} to convert quotation marks to HTML entities.
68
   * @param output Continuously updated result document.
69
   * @param offset Accumulating index where {@link Token} is replaced.
70
   * @param apostrophe How to convert quotation marks to HTML entities.
71 71
   * @return Instructions to replace a {@link Token} in the result document.
72 72
   */
73 73
  public static Consumer<Token> replace(
74 74
    final StringBuilder output,
75 75
    final AtomicInteger offset,
76
    final boolean entities
76
    final Apostrophe apostrophe
77 77
  ) {
78 78
    return token -> {
79 79
      if( !token.isAmbiguous() ) {
80
        final var text = token.toString( entities );
80
        final var text = token.toString( apostrophe );
81 81
82 82
        output.replace(
M src/main/java/com/whitemagicsoftware/keenquotes/parser/Token.java
1
/* Copyright 2021 White Magic Software, Ltd. -- All rights reserved. */
1
/* Copyright 2021-2024 White Magic Software, Ltd. -- All rights reserved.
2
 *
3
 * SPDX-License-Identifier: BSD-2-Clause
4
 */
2 5
package com.whitemagicsoftware.keenquotes.parser;
3 6
4 7
import com.whitemagicsoftware.keenquotes.lex.Lexeme;
5 8
import com.whitemagicsoftware.keenquotes.lex.LexemeGlyph;
6 9
7 10
import java.util.EnumMap;
8 11
import java.util.Map;
9 12
10 13
import static com.whitemagicsoftware.keenquotes.lex.LexemeGlyph.*;
14
import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*;
11 15
import static com.whitemagicsoftware.keenquotes.parser.TokenType.*;
12 16
...
28 32
    ENTITIES.put( QUOTE_STRAIGHT_SINGLE, "'" );
29 33
    ENTITIES.put( QUOTE_STRAIGHT_DOUBLE, "\"" );
30
    ENTITIES.put( QUOTE_APOSTROPHE, "&apos;" );
31 34
    ENTITIES.put( QUOTE_PRIME_SINGLE, "&prime;" );
32 35
    ENTITIES.put( QUOTE_PRIME_DOUBLE, "&Prime;" );
...
154 157
  boolean isAmbiguous() {
155 158
    return mTokenType == QUOTE_AMBIGUOUS_SINGLE ||
156
      mTokenType == QUOTE_AMBIGUOUS_DOUBLE ||
157
      mTokenType == QUOTE_AMBIGUOUS_LEADING ||
158
      mTokenType == QUOTE_AMBIGUOUS_LAGGING;
159
           mTokenType == QUOTE_AMBIGUOUS_DOUBLE ||
160
           mTokenType == QUOTE_AMBIGUOUS_LEADING ||
161
           mTokenType == QUOTE_AMBIGUOUS_LAGGING;
159 162
  }
160 163
...
183 186
  public String toXml() {
184 187
    return "<" +
185
      mTokenType +
186
      " type='" + getType().name() + "'" +
187
      " began='" + began() + "'" +
188
      " ended='" + ended() + "' />";
188
           mTokenType +
189
           " type='" + getType().name() + "'" +
190
           " began='" + began() + "'" +
191
           " ended='" + ended() + "' />";
189 192
  }
190 193
191 194
  /**
192 195
   * Converts this token to its string representation, which will either be
193 196
   * an HTML entity or a character.
194 197
   *
195
   * @param entities {@code true} to convert quotation marks to HTML entities.
198
   * @param apostrophe How to convert quotation marks to HTML entities.
196 199
   * @return A plain quotation mark character or an HTML entity.
197 200
   */
198
  public String toString( final boolean entities ) {
201
  public String toString( final Apostrophe apostrophe ) {
199 202
    final var glyph = mLexeme.getType().glyph();
203
    final var tokenType = getType();
200 204
201
    return entities
202
      ? I18N_ENTITIES.getOrDefault( glyph, ENTITIES.get( getType() ) )
203
      : CHARS.getOrDefault( getType(), glyph.text() );
205
    // Retrieves the base glyph, unless curling was requested. When curling
206
    // apostrophes, this will determine the user-selected type of apostrophe
207
    // entity to use.
208
    return apostrophe == CONVERT_REGULAR
209
      ? CHARS.getOrDefault( tokenType, glyph.text() )
210
      : I18N_ENTITIES.getOrDefault( glyph, convert( tokenType, apostrophe ) );
211
  }
212
213
  private String convert(
214
    final TokenType tokenType,
215
    final Apostrophe apostrophe ) {
216
    return tokenType == QUOTE_APOSTROPHE
217
      ? apostrophe.toString()
218
      : ENTITIES.get( tokenType );
204 219
  }
205 220
206 221
  @Override
207 222
  public String toString() {
208 223
    return getClass().getSimpleName() + '[' +
209
      "mType=" + getType() +
210
      ", mBegan=" + began() +
211
      ", mEnded=" + ended() +
212
      ']';
224
           "mType=" + getType() +
225
           ", mBegan=" + began() +
226
           ", mEnded=" + ended() +
227
           ']';
213 228
  }
214 229
}
M src/test/java/com/whitemagicsoftware/keenquotes/parser/AmbiguityResolverTest.java
1
/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
1
/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved.
2 2
 *
3 3
 * SPDX-License-Identifier: BSD-2-Clause
...
11 11
import java.util.concurrent.atomic.AtomicInteger;
12 12
13
import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*;
13 14
import static com.whitemagicsoftware.keenquotes.parser.Curler.*;
14 15
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
...
46 47
        input,
47 48
        CONTRACTIONS,
48
        replace( output, offset, true ),
49
        filter -> false
49
        replace( output, offset, CONVERT_APOS_ENTITY ),
50
        _ -> false
50 51
      );
51 52
M src/test/java/com/whitemagicsoftware/keenquotes/parser/CurlerTest.java
1
/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
1
/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved.
2 2
 *
3 3
 * SPDX-License-Identifier: BSD-2-Clause
...
16 16
import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_PLAIN;
17 17
import static com.whitemagicsoftware.keenquotes.lex.FilterType.FILTER_XML;
18
import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.*;
18 19
import static com.whitemagicsoftware.keenquotes.texts.TestResource.open;
19 20
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
...
32 33
  @Test
33 34
  public void test_Parse_UncurledQuotes1_CurlyQuotes() throws IOException {
34
    testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-1-pass.txt" );
35
    testCurler(
36
      createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ),
37
      "unambiguous-1-pass.txt"
38
    );
35 39
  }
36 40
37 41
  @Test
38 42
  public void test_Parse_UncurledQuotes2_CurlyQuotes() throws IOException {
39
    testCurler( createCurler( FILTER_PLAIN, true ), "unambiguous-2-pass.txt" );
43
    testCurler(
44
      createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ),
45
      "unambiguous-2-pass.txt"
46
    );
40 47
  }
41 48
42 49
  @Disabled
43 50
  @SuppressWarnings( "unused" )
44 51
  public void test_Parse_AmbiguousQuotes_PartiallyCurled() throws IOException {
45
    testCurler( createCurler( FILTER_PLAIN, false ), "ambiguous-n-pass.txt" );
52
    testCurler(
53
      createCurler( FILTER_PLAIN, CONVERT_REGULAR ), "ambiguous-n-pass.txt"
54
    );
46 55
  }
47 56
48 57
  @Test
49 58
  public void test_Parse_UncurledQuotesXml_CurlyQuotes() throws IOException {
50
    testCurler( createCurler( FILTER_XML, true ), "xml.txt" );
59
    testCurler(
60
      createCurler( FILTER_XML, CONVERT_APOS_ENTITY ), "xml.txt"
61
    );
51 62
  }
52 63
53 64
  @Test
54 65
  public void test_Parse_UncurledQuotesI11l_CurlyQuotes() throws IOException {
55
    testCurler( createCurler( FILTER_PLAIN, true ), "i18n.txt" );
66
    testCurler(
67
      createCurler( FILTER_PLAIN, CONVERT_APOS_ENTITY ), "i18n.txt"
68
    );
56 69
  }
57 70
...
64 77
   */
65 78
  @ParameterizedTest
66
  @ValueSource( strings = {"autonoma"} )
79
  @ValueSource( strings = { "autonoma" } )
67 80
  @Disabled
68 81
  void test_Parse_Story_Converted( final String filename ) throws IOException {
69 82
    final var sb = new StringBuilder( 2 ^ 20 );
83
    final var name = String.format( "%s%s", filename, ".html" );
70 84
71
    try( final var reader = open( STR."\{filename}.html" ) ) {
85
    try( final var reader = open( name ) ) {
72 86
      String line;
73 87
74 88
      while( (line = reader.readLine()) != null ) {
75 89
        sb.append( line ).append( SEP );
76 90
      }
77 91
    }
78 92
79
    final var curler = createCurler( FILTER_XML, true );
93
    final var curler = createCurler( FILTER_XML, CONVERT_APOS_ENTITY );
80 94
    System.out.println( curler.apply( sb.toString() ) );
81 95
  }
...
106 120
  private Function<String, String> createCurler(
107 121
    final FilterType filterType,
108
    final boolean entities ) {
109
    return new Curler( createContractions(), filterType, entities );
122
    final Apostrophe apostrophe ) {
123
    return new Curler( createContractions(), filterType, apostrophe );
110 124
  }
111 125
M src/test/java/com/whitemagicsoftware/keenquotes/parser/QuoteEmitterTest.java
1
/* Copyright 2022 White Magic Software, Ltd. -- All rights reserved.
1
/* Copyright 2022-2024 White Magic Software, Ltd. -- All rights reserved.
2 2
 *
3 3
 * SPDX-License-Identifier: BSD-2-Clause
...
10 10
import java.util.concurrent.atomic.AtomicInteger;
11 11
12
import static com.whitemagicsoftware.keenquotes.parser.Apostrophe.CONVERT_APOS_ENTITY;
12 13
import static com.whitemagicsoftware.keenquotes.parser.Curler.replace;
13 14
import static com.whitemagicsoftware.keenquotes.texts.TestResource.readPairs;
...
30 31
        input,
31 32
        CONTRACTIONS,
32
        replace( output, offset, true ),
33
        filter -> false
33
        replace( output, offset, CONVERT_APOS_ENTITY ),
34
        _ -> false
34 35
      );
35 36
M src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-1-pass.txt
242 242
Computer says, &ldquo;&lsquo;It is mysteries---&rsquo;&rdquo;
243 243
244
He goes 'long with it.
245
He goes &apos;long with it.
246
247
The 'long and short' of it.
248
The &lsquo;long and short&rsquo; of it.
249
M src/test/resources/com/whitemagicsoftware/keenquotes/texts/unambiguous-2-pass.txt
83 83
&apos;Twas and &apos;tis whate&apos;er lay &apos;twixt dawn and dusk &apos;n River Styx.
84 84
85
'He goes 'long with it.'
86
&lsquo;He goes &apos;long with it.&rsquo;
87
85 88
# ########################################################################
86 89
# Possessives