| ``` | ||
| - | ||
| + | ||
| :: Figure caption text | ||
| ``` | ||
| -In [@fig:kitten], a cute kitten is shown. | ||
| +In [@fig:animal], a cute animal is shown. | ||
| - | ||
| + | ||
| -:: World's cutest kitten {#fig:kitten} | ||
| +:: World's cutest animal {#fig:animal} | ||
| -There is no cuter kitten than the one in [@fig:kitten]. | ||
| +There is no cuter animal than the one in [@fig:animal]. | ||
| ``` | ||
| generated by the typesetting system: | ||
| -| Type name | English name | ||
| -|---|---| | ||
| -| algorithm | Algorithm | | ||
| -| alg | Algorithm | | ||
| -| equation | Equation | | ||
| -| eqn | Equation | | ||
| -| eq | Equation | | ||
| -| figure | Figure | | ||
| -| fig | Figure | | ||
| -| formula | Formula | | ||
| -| listing | Listing | | ||
| -| list | Listing | | ||
| -| lst | Listing | | ||
| -| lyric | Lyrics | | ||
| -| music | Score | | ||
| -| score | Score | | ||
| -| source | Listing | | ||
| -| src | Listing | | ||
| -| tab | Table | | ||
| -| table | Table | | ||
| -| tbl | Table | | ||
| +| Type name | English name | | ||
| +|-----------|--------------| | ||
| +| algorithm | Algorithm | | ||
| +| alg | Algorithm | | ||
| +| equation | Equation | | ||
| +| eqn | Equation | | ||
| +| eq | Equation | | ||
| +| figure | Figure | | ||
| +| fig | Figure | | ||
| +| formula | Formula | | ||
| +| listing | Listing | | ||
| +| list | Listing | | ||
| +| lst | Listing | | ||
| +| lyric | Lyrics | | ||
| +| music | Score | | ||
| +| score | Score | | ||
| +| source | Listing | | ||
| +| src | Listing | | ||
| +| tab | Table | | ||
| +| table | Table | | ||
| +| tbl | Table | | ||
| These values are defined in the theme's `xhtml/xml-references.tex` file. | ||
| - | ||
| import com.keenwrite.io.MediaType; | ||
| +import com.keenwrite.util.EncodingDetector; | ||
| import javafx.beans.property.ReadOnlyBooleanProperty; | ||
| import javafx.scene.Node; | ||
| -import org.mozilla.universalchardet.UniversalDetector; | ||
| import java.io.File; | ||
| import java.nio.charset.Charset; | ||
| import java.nio.file.Path; | ||
| import static com.keenwrite.constants.Constants.DEFAULT_CHARSET; | ||
| import static com.keenwrite.events.StatusEvent.clue; | ||
| import static com.keenwrite.io.SysFile.toFile; | ||
| -import static java.nio.charset.Charset.forName; | ||
| import static java.nio.file.Files.readAllBytes; | ||
| import static java.nio.file.Files.write; | ||
| import static java.util.Arrays.asList; | ||
| -import static java.util.Locale.ENGLISH; | ||
| /** | ||
| private Charset detectEncoding( final byte[] bytes ) { | ||
| - final var detector = new UniversalDetector( null ); | ||
| - detector.handleData( bytes, 0, bytes.length ); | ||
| - detector.dataEnd(); | ||
| - | ||
| - final var detectedCharset = detector.getDetectedCharset(); | ||
| - | ||
| - // TODO: Revert when the issue has been fixed. | ||
| - // https://github.com/albfernandez/juniversalchardet/issues/35 | ||
| - return switch( detectedCharset ) { | ||
| - case null -> DEFAULT_CHARSET; | ||
| - case "US-ASCII", "TIS620" -> DEFAULT_CHARSET; | ||
| - default -> forName( detectedCharset.toUpperCase( ENGLISH ) ); | ||
| - }; | ||
| + return new EncodingDetector().detect( bytes ); | ||
| } | ||
| +/* Copyright 2024 White Magic Software, Ltd. -- All rights reserved. | ||
| + * | ||
| + * SPDX-License-Identifier: MIT | ||
| + */ | ||
| +package com.keenwrite.util; | ||
| + | ||
| +import org.mozilla.universalchardet.UniversalDetector; | ||
| + | ||
| +import java.nio.charset.Charset; | ||
| + | ||
| +import static com.keenwrite.constants.Constants.DEFAULT_CHARSET; | ||
| +import static java.nio.charset.Charset.forName; | ||
| +import static java.util.Locale.ENGLISH; | ||
| + | ||
| +/** | ||
| + * Wraps the {@link UniversalDetector} with to provide enhanced abilities | ||
| + * and bug fixes (if needed). | ||
| + */ | ||
| +public class EncodingDetector { | ||
| + | ||
| + private final UniversalDetector mDetector; | ||
| + | ||
| + public EncodingDetector() { | ||
| + mDetector = new UniversalDetector( null ); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns the character set for the constructed input. This will coerce | ||
| + * both US-ASCII and TIS620 to UTF-8. | ||
| + * | ||
| + * @param bytes The textual content having an as yet unknown encoding. | ||
| + * @return The character encoding for the given bytes. | ||
| + */ | ||
| + public Charset detect( final byte[] bytes ) { | ||
| + mDetector.handleData( bytes, 0, bytes.length ); | ||
| + mDetector.dataEnd(); | ||
| + | ||
| + final String detectedCharset = mDetector.getDetectedCharset(); | ||
| + | ||
| + // TODO: Revert when the issue has been fixed. | ||
| + // https://github.com/albfernandez/juniversalchardet/issues/35 | ||
| + return switch( detectedCharset ) { | ||
| + case null -> DEFAULT_CHARSET; | ||
| + case "US-ASCII", "TIS620" -> DEFAULT_CHARSET; | ||
| + default -> forName( detectedCharset.toUpperCase( ENGLISH ) ); | ||
| + }; | ||
| + } | ||
| +} | ||
| package com.keenwrite.encoding; | ||
| +import com.keenwrite.util.EncodingDetector; | ||
| import org.junit.jupiter.api.Test; | ||
| -import org.mozilla.universalchardet.UniversalDetector; | ||
| -import java.nio.charset.Charset; | ||
| import java.nio.charset.StandardCharsets; | ||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
| -import static org.junit.jupiter.api.Assertions.assertNotNull; | ||
| public class EncodingTest { | ||
| @Test | ||
| public void test_Encoding_UTF8_UTF8() { | ||
| final var bytes = testBytes(); | ||
| - | ||
| - final var detector = new UniversalDetector( null ); | ||
| - detector.handleData( bytes, 0, bytes.length ); | ||
| - detector.dataEnd(); | ||
| - | ||
| + final var detector = new EncodingDetector(); | ||
| final var expectedCharset = StandardCharsets.UTF_8; | ||
| - final var detectedCharset = detector.getDetectedCharset(); | ||
| - | ||
| - assertNotNull( detectedCharset ); | ||
| - | ||
| - final var actualCharset = Charset.forName( detectedCharset ); | ||
| + final var actualCharset = detector.detect( bytes ); | ||
| assertEquals( expectedCharset, actualCharset ); |
| "https://kroki.io/robots.txt", TEXT_PLAIN, | ||
| "https://place-hold.it/300x500", IMAGE_GIF, | ||
| - "https://placekitten.com/g/200/300", IMAGE_JPEG, | ||
| + "https://loremflickr.com/200/300", IMAGE_JPEG, | ||
| "https://upload.wikimedia.org/wikipedia/commons/9/9f/Vimlogo.svg", IMAGE_SVG_XML, | ||
| "https://kroki.io//graphviz/svg/eNpLyUwvSizIUHBXqPZIzcnJ17ULzy_KSanlAgB1EAjQ", IMAGE_SVG_XML | ||
| ); | ||
| //@formatter:on | ||
| map.forEach( ( k, v ) -> { | ||
| try( var response = open( k ) ) { | ||
| + System.out.printf( "%s => %s%n", k, v ); | ||
| assertEquals( v, response.getMediaType() ); | ||
| } catch( final Exception e ) { |
| XHTML_TEX, | ||
| """ | ||
| - <html><head><title/><meta content="2" name="count"/></head><body><p>the 👍 emoji</p> | ||
| + <html><head><title/><meta charset="UTF-8"/><meta content="2" name="count"/></head><body><p>the 👍 emoji</p> | ||
| </body></html>""" | ||
| ) |
| private static final String PATH_KITTEN_JPG = STR."\{URI_PATH}.jpg"; | ||
| + /** Web server that doles out images. */ | ||
| + private static final String PLACEHOLDER = "loremflickr.com"; | ||
| + | ||
| private static final Map<String, String> IMAGES = new LinkedHashMap<>(); | ||
| static { | ||
| add( PATH_KITTEN_PNG, URI_PATH ); | ||
| add( PATH_KITTEN_PNG, URI_FILE ); | ||
| add( PATH_KITTEN_PNG, PATH_KITTEN_PNG ); | ||
| add( PATH_KITTEN_JPG, PATH_KITTEN_JPG ); | ||
| - add( "//placekitten.com/200/200", "//placekitten.com/200/200" ); | ||
| - add( "ftp://placekitten.com/200/200", "ftp://placekitten.com/200/200" ); | ||
| - add( "http://placekitten.com/200/200", "http://placekitten.com/200/200" ); | ||
| - add( "https://placekitten.com/200/200", "https://placekitten.com/200/200" ); | ||
| + add( STR."//\{PLACEHOLDER}/200/200", STR."//\{PLACEHOLDER}/200/200" ); | ||
| + add( STR."ftp://\{PLACEHOLDER}/200/200", STR."ftp://\{PLACEHOLDER}/200/200" ); | ||
| + add( STR."http://\{PLACEHOLDER}/200/200", STR."http://\{PLACEHOLDER}/200/200" ); | ||
| + add( STR."https://\{PLACEHOLDER}/200/200", STR."https://\{PLACEHOLDER}/200/200" ); | ||
| } | ||
| args( | ||
| """ | ||
| -  | ||
| +  | ||
| :: Caption **bold** {#fig:label} *italics* | ||
| """, | ||
| """ | ||
| <p><span class="caption">Caption <strong>bold</strong> <em>italics</em></span><a class="name" data-type="fig" name="label" /></p> | ||
| - <p><img src="placekitten" alt="kitteh" /></p> | ||
| + <p><img src="kitten" alt="kitteh" /></p> | ||
| """ | ||
| ), |
| Author | DaveJarvis <email> |
|---|---|
| Date | 2024-04-29 17:13:59 GMT-0700 |
| Commit | 39c7a45a49c810a62b505b96d687ee68f535bfde |
| Parent | 3e8076c |
| Delta | 91 lines added, 64 lines removed, 27-line increase |