Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenwrite.git
docs/references.md
```
-![image title](https://placekitten.com/600/350)
+![image title](https://loremflickr.com/600/350)
:: Figure caption text
```
-In [@fig:kitten], a cute kitten is shown.
+In [@fig:animal], a cute animal is shown.
-![image title](https://placekitten.com/600/350)
+![image title](https://loremflickr.com/600/350)
-:: World's cutest kitten {#fig:kitten}
+:: World's cutest animal {#fig:animal}
-There is no cuter kitten than the one in [@fig:kitten].
+There is no cuter animal than the one in [@fig:animal].
```
generated by the typesetting system:
-| Type name | English name
-|---|---|
-| algorithm | Algorithm |
-| alg | Algorithm |
-| equation | Equation |
-| eqn | Equation |
-| eq | Equation |
-| figure | Figure |
-| fig | Figure |
-| formula | Formula |
-| listing | Listing |
-| list | Listing |
-| lst | Listing |
-| lyric | Lyrics |
-| music | Score |
-| score | Score |
-| source | Listing |
-| src | Listing |
-| tab | Table |
-| table | Table |
-| tbl | Table |
+| Type name | English name |
+|-----------|--------------|
+| algorithm | Algorithm |
+| alg | Algorithm |
+| equation | Equation |
+| eqn | Equation |
+| eq | Equation |
+| figure | Figure |
+| fig | Figure |
+| formula | Formula |
+| listing | Listing |
+| list | Listing |
+| lst | Listing |
+| lyric | Lyrics |
+| music | Score |
+| score | Score |
+| source | Listing |
+| src | Listing |
+| tab | Table |
+| table | Table |
+| tbl | Table |
These values are defined in the theme's `xhtml/xml-references.tex` file.
-
src/main/java/com/keenwrite/editors/TextResource.java
import com.keenwrite.io.MediaType;
+import com.keenwrite.util.EncodingDetector;
import javafx.beans.property.ReadOnlyBooleanProperty;
import javafx.scene.Node;
-import org.mozilla.universalchardet.UniversalDetector;
import java.io.File;
import java.nio.charset.Charset;
import java.nio.file.Path;
import static com.keenwrite.constants.Constants.DEFAULT_CHARSET;
import static com.keenwrite.events.StatusEvent.clue;
import static com.keenwrite.io.SysFile.toFile;
-import static java.nio.charset.Charset.forName;
import static java.nio.file.Files.readAllBytes;
import static java.nio.file.Files.write;
import static java.util.Arrays.asList;
-import static java.util.Locale.ENGLISH;
/**
private Charset detectEncoding( final byte[] bytes ) {
- final var detector = new UniversalDetector( null );
- detector.handleData( bytes, 0, bytes.length );
- detector.dataEnd();
-
- final var detectedCharset = detector.getDetectedCharset();
-
- // TODO: Revert when the issue has been fixed.
- // https://github.com/albfernandez/juniversalchardet/issues/35
- return switch( detectedCharset ) {
- case null -> DEFAULT_CHARSET;
- case "US-ASCII", "TIS620" -> DEFAULT_CHARSET;
- default -> forName( detectedCharset.toUpperCase( ENGLISH ) );
- };
+ return new EncodingDetector().detect( bytes );
}
src/main/java/com/keenwrite/util/EncodingDetector.java
+/* Copyright 2024 White Magic Software, Ltd. -- All rights reserved.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+package com.keenwrite.util;
+
+import org.mozilla.universalchardet.UniversalDetector;
+
+import java.nio.charset.Charset;
+
+import static com.keenwrite.constants.Constants.DEFAULT_CHARSET;
+import static java.nio.charset.Charset.forName;
+import static java.util.Locale.ENGLISH;
+
+/**
+ * Wraps the {@link UniversalDetector} with to provide enhanced abilities
+ * and bug fixes (if needed).
+ */
+public class EncodingDetector {
+
+ private final UniversalDetector mDetector;
+
+ public EncodingDetector() {
+ mDetector = new UniversalDetector( null );
+ }
+
+ /**
+ * Returns the character set for the constructed input. This will coerce
+ * both US-ASCII and TIS620 to UTF-8.
+ *
+ * @param bytes The textual content having an as yet unknown encoding.
+ * @return The character encoding for the given bytes.
+ */
+ public Charset detect( final byte[] bytes ) {
+ mDetector.handleData( bytes, 0, bytes.length );
+ mDetector.dataEnd();
+
+ final String detectedCharset = mDetector.getDetectedCharset();
+
+ // TODO: Revert when the issue has been fixed.
+ // https://github.com/albfernandez/juniversalchardet/issues/35
+ return switch( detectedCharset ) {
+ case null -> DEFAULT_CHARSET;
+ case "US-ASCII", "TIS620" -> DEFAULT_CHARSET;
+ default -> forName( detectedCharset.toUpperCase( ENGLISH ) );
+ };
+ }
+}
src/main/resources/lexicons/en.txt
Binary files differ
src/test/java/com/keenwrite/encoding/EncodingTest.java
package com.keenwrite.encoding;
+import com.keenwrite.util.EncodingDetector;
import org.junit.jupiter.api.Test;
-import org.mozilla.universalchardet.UniversalDetector;
-import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
public class EncodingTest {
@Test
public void test_Encoding_UTF8_UTF8() {
final var bytes = testBytes();
-
- final var detector = new UniversalDetector( null );
- detector.handleData( bytes, 0, bytes.length );
- detector.dataEnd();
-
+ final var detector = new EncodingDetector();
final var expectedCharset = StandardCharsets.UTF_8;
- final var detectedCharset = detector.getDetectedCharset();
-
- assertNotNull( detectedCharset );
-
- final var actualCharset = Charset.forName( detectedCharset );
+ final var actualCharset = detector.detect( bytes );
assertEquals( expectedCharset, actualCharset );
src/test/java/com/keenwrite/io/MediaTypeTest.java
"https://kroki.io/robots.txt", TEXT_PLAIN,
"https://place-hold.it/300x500", IMAGE_GIF,
- "https://placekitten.com/g/200/300", IMAGE_JPEG,
+ "https://loremflickr.com/200/300", IMAGE_JPEG,
"https://upload.wikimedia.org/wikipedia/commons/9/9f/Vimlogo.svg", IMAGE_SVG_XML,
"https://kroki.io//graphviz/svg/eNpLyUwvSizIUHBXqPZIzcnJ17ULzy_KSanlAgB1EAjQ", IMAGE_SVG_XML
);
//@formatter:on
map.forEach( ( k, v ) -> {
try( var response = open( k ) ) {
+ System.out.printf( "%s => %s%n", k, v );
assertEquals( v, response.getMediaType() );
} catch( final Exception e ) {
src/test/java/com/keenwrite/processors/html/XhtmlProcessorTest.java
XHTML_TEX,
"""
- <html><head><title/><meta content="2" name="count"/></head><body><p>the 👍 emoji</p>
+ <html><head><title/><meta charset="UTF-8"/><meta content="2" name="count"/></head><body><p>the 👍 emoji</p>
</body></html>"""
)
src/test/java/com/keenwrite/processors/markdown/extensions/images/ImageLinkExtensionTest.java
private static final String PATH_KITTEN_JPG = STR."\{URI_PATH}.jpg";
+ /** Web server that doles out images. */
+ private static final String PLACEHOLDER = "loremflickr.com";
+
private static final Map<String, String> IMAGES = new LinkedHashMap<>();
static {
add( PATH_KITTEN_PNG, URI_PATH );
add( PATH_KITTEN_PNG, URI_FILE );
add( PATH_KITTEN_PNG, PATH_KITTEN_PNG );
add( PATH_KITTEN_JPG, PATH_KITTEN_JPG );
- add( "//placekitten.com/200/200", "//placekitten.com/200/200" );
- add( "ftp://placekitten.com/200/200", "ftp://placekitten.com/200/200" );
- add( "http://placekitten.com/200/200", "http://placekitten.com/200/200" );
- add( "https://placekitten.com/200/200", "https://placekitten.com/200/200" );
+ add( STR."//\{PLACEHOLDER}/200/200", STR."//\{PLACEHOLDER}/200/200" );
+ add( STR."ftp://\{PLACEHOLDER}/200/200", STR."ftp://\{PLACEHOLDER}/200/200" );
+ add( STR."http://\{PLACEHOLDER}/200/200", STR."http://\{PLACEHOLDER}/200/200" );
+ add( STR."https://\{PLACEHOLDER}/200/200", STR."https://\{PLACEHOLDER}/200/200" );
}
src/test/java/com/keenwrite/processors/markdown/extensions/references/CaptionsAndCrossReferencesExtensionTest.java
args(
"""
- ![kitteh](placekitten)
+ ![kitteh](kitten)
:: Caption **bold** {#fig:label} *italics*
""",
"""
<p><span class="caption">Caption <strong>bold</strong> <em>italics</em></span><a class="name" data-type="fig" name="label" /></p>
- <p><img src="placekitten" alt="kitteh" /></p>
+ <p><img src="kitten" alt="kitteh" /></p>
"""
),

Fixes unit tests

Author DaveJarvis <email>
Date 2024-04-29 17:13:59 GMT-0700
Commit 39c7a45a49c810a62b505b96d687ee68f535bfde
Parent 3e8076c
Delta 91 lines added, 64 lines removed, 27-line increase