Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenwrite.git

Remove HTTP request duplication, sniff media types, prevent widows/orphans

Author DaveJarvis <email>
Date 2021-04-05 17:13:39 GMT-0700
Commit c4d8101e8388090a8f217083ac6f4346503cde14
Parent aee09ca
src/main/java/com/keenwrite/io/HttpFacade.java
+package com.keenwrite.io;
+
+import java.io.BufferedInputStream;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.zip.GZIPInputStream;
+
+import static java.lang.System.getProperty;
+import static java.lang.System.setProperty;
+import static java.net.HttpURLConnection.HTTP_OK;
+import static java.net.HttpURLConnection.setFollowRedirects;
+
+/**
+ * Responsible for making HTTP requests, a thin wrapper around the
+ * {@link URLConnection} class. This will attempt to use compression.
+ * <p>
+ * This class must be used within a try-with-resources block to ensure all
+ * resources are released, even if only calling {@link Response#getMediaType()}.
+ * </p>
+ */
+public class HttpFacade {
+ static {
+ setProperty( "http.keepAlive", "false" );
+ setFollowRedirects( true );
+ }
+
+ /**
+ * Sends an HTTP GET request to a server.
+ *
+ * @param url The remote resource to fetch.
+ * @return The server response.
+ */
+ public static Response httpGet( final URL url ) throws IOException {
+ return new Response( url );
+ }
+
+ /**
+ * Convenience method to send an HTTP GET request to a server.
+ *
+ * @param uri The remote resource to fetch.
+ * @return The server response.
+ * @see #httpGet(URL)
+ */
+ public static Response httpGet( final URI uri ) throws IOException {
+ return httpGet( uri.toURL() );
+ }
+
+ /**
+ * Convenience method to send an HTTP GET request to a server.
+ *
+ * @param url The remote resource to fetch.
+ * @return The server response.
+ * @see #httpGet(URL)
+ */
+ public static Response httpGet( final String url ) throws IOException {
+ return httpGet( new URL( url ) );
+ }
+
+ /**
+ * Callers are responsible for closing the response.
+ */
+ public static final class Response implements Closeable {
+ private final HttpURLConnection mConn;
+ private final BufferedInputStream mStream;
+
+ private Response( final URL url ) throws IOException {
+ assert url != null;
+ final var connection = url.openConnection();
+
+ if( connection instanceof HttpURLConnection ) {
+ mConn = (HttpURLConnection) connection;
+ mConn.setUseCaches( false );
+ mConn.setInstanceFollowRedirects( true );
+ mConn.setRequestProperty( "Accept-Encoding", "gzip" );
+ mConn.setRequestProperty( "User-Agent", getProperty( "http.agent" ) );
+ mConn.setRequestMethod( "GET" );
+ mConn.setConnectTimeout( 15000 );
+ mConn.setRequestProperty( "connection", "close" );
+ mConn.connect();
+
+ final var code = mConn.getResponseCode();
+
+ // Even though there are other "okay" error codes, tell the user when
+ // a resource has changed in any unexpected way.
+ if( code != HTTP_OK ) {
+ throw new IOException( url.toString() + " [HTTP " + code + "]" );
+ }
+
+ mStream = openBufferedInputStream();
+ }
+ else {
+ throw new UnsupportedOperationException( url.toString() );
+ }
+ }
+
+ /**
+ * Returns the {@link MediaType} based on the resulting HTTP content type
+ * provided by the server. If the content type from the server is not
+ * found, this will probe the first several bytes to determine the type.
+ *
+ * @return The stream's IANA-defined {@link MediaType}.
+ */
+ public MediaType getMediaType() throws IOException {
+ final var contentType = mConn.getContentType();
+ var mediaType = MediaType.valueFrom( contentType );
+
+ if( mediaType.isUndefined() ) {
+ mediaType = StreamMediaType.getMediaType( mStream );
+ }
+
+ return mediaType;
+ }
+
+ /**
+ * Returns the stream opened using an HTTP connection, decompressing if
+ * the server supports gzip compression. The caller must close the stream
+ * by calling {@link #close()} on this object.
+ *
+ * @return The stream representing the content at the URL used to
+ * construct the {@link HttpFacade}.
+ */
+ public InputStream getInputStream() throws IOException {
+ return mStream;
+ }
+
+ /**
+ * This will disconnect the HTTP request and close the associated stream.
+ */
+ @Override
+ public void close() {
+ mConn.disconnect();
+ }
+
+ /**
+ * Opens the connection for reading. It is an error to call this more than
+ * once. This may use gzip compression. A {@link BufferedInputStream} is
+ * returned to allow peeking at the stream when checking the content
+ * type.
+ *
+ * @return The {@link InputStream} containing content from an HTTP request.
+ * @throws IOException Could not open the stream.
+ */
+ private BufferedInputStream openBufferedInputStream() throws IOException {
+ final var encoding = mConn.getContentEncoding();
+ final var is = mConn.getInputStream();
+
+ return new BufferedInputStream(
+ "gzip".equalsIgnoreCase( encoding ) ? new GZIPInputStream( is ) : is );
+ }
+ }
+}
src/main/java/com/keenwrite/io/HttpMediaType.java
package com.keenwrite.io;
-import java.net.MalformedURLException;
+import java.io.IOException;
import java.net.URI;
-import java.net.URL;
-import java.net.http.HttpClient;
-import java.net.http.HttpRequest;
import static com.keenwrite.events.StatusEvent.clue;
+import static com.keenwrite.io.HttpFacade.httpGet;
import static com.keenwrite.io.MediaType.UNDEFINED;
-import static java.net.http.HttpClient.Redirect.NORMAL;
-import static java.net.http.HttpRequest.BodyPublishers.noBody;
-import static java.net.http.HttpResponse.BodyHandlers.ofString;
-import static java.time.Duration.ofSeconds;
/**
* Responsible for determining {@link MediaType} based on the content-type from
* an HTTP request.
*/
public final class HttpMediaType {
-
- private static final HttpClient HTTP_CLIENT = HttpClient
- .newBuilder()
- .connectTimeout( ofSeconds( 10 ) )
- .followRedirects( NORMAL )
- .build();
/**
* Performs an HTTP request to determine the media type based on the
* Content-Type header returned from the server.
*
* @param uri Determine the media type for this resource.
* @return The data type for the resource or {@link MediaType#UNDEFINED} if
* unmapped.
- * @throws MalformedURLException The {@link URI} could not be converted to
- * an instance of {@link URL}.
+ * @throws IOException The {@link URI} could not be fetched.
*/
- public static MediaType valueFrom( final URI uri )
- throws MalformedURLException {
- final var mediaType = new MediaType[]{UNDEFINED};
-
- try {
- clue( "Main.status.image.request.init" );
- final var request = HttpRequest
- .newBuilder()
- .setHeader( "User-Agent", System.getProperty( "http.agent" ) )
- .method( "GET", noBody() )
- .uri( uri )
- .build();
- clue( "Main.status.image.request.fetch", uri.getHost() );
- final var response = HTTP_CLIENT.send( request, ofString() );
- final var headers = response.headers();
- final var map = headers.map();
+ public static MediaType valueFrom( final URI uri ) throws IOException {
+ var mediaType = UNDEFINED;
- map.forEach( ( key, values ) -> {
- if( "content-type".equalsIgnoreCase( key ) ) {
- mediaType[ 0 ] = MediaType.valueFrom( values.get( 0 ) );
- clue( "Main.status.image.request.success", mediaType[ 0 ] );
- }
- } );
+ clue( "Main.status.image.request.init" );
- clue();
- } catch( final Exception ex ) {
- clue( ex );
+ try( final var response = httpGet( uri ) ) {
+ clue( "Main.status.image.request.fetch", uri.getHost() );
+ mediaType = response.getMediaType();
+ clue( "Main.status.image.request.success", mediaType );
}
- return mediaType[ 0 ];
+ return mediaType;
}
}
src/main/java/com/keenwrite/io/StreamMediaType.java
package com.keenwrite.io;
+import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
private StreamMediaType() {
+ }
+
+ /**
+ * Returns the {@link MediaType} for a given set of bytes.
+ *
+ * @param data Binary data to compare against the list of known formats.
+ * @return The IANA-defined {@link MediaType}, or
+ * {@link MediaType#UNDEFINED} if indeterminate.
+ */
+ public static MediaType getMediaType( final byte[] data ) {
+ assert data != null;
+
+ final var source = new int[]{
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+
+ for( int i = 0; i < data.length; i++ ) {
+ source[ i ] = data[ i ] & 0xFF;
+ }
+
+ for( final var key : FORMAT.keySet() ) {
+ int i = -1;
+ boolean matches = true;
+
+ while( ++i < FORMAT_LENGTH && key[ i ] != END_OF_DATA && matches ) {
+ matches = key[ i ] == source[ i ] || key[ i ] == -1;
+ }
+
+ if( matches ) {
+ return FORMAT.get( key );
+ }
+ }
+
+ return UNDEFINED;
}
+ /**
+ * Convenience method to return the probed media type for the given
+ * {@link Path} instance by delegating to {@link #getMediaType(InputStream)}.
+ *
+ * @param path Path to ascertain the {@link MediaType}.
+ * @return The IANA-defined {@link MediaType}, or
+ * {@link MediaType#UNDEFINED} if indeterminate.
+ * @throws IOException Could not read from the {@link File}.
+ */
public static MediaType getMediaType( final Path path ) throws IOException {
return getMediaType( path.toFile() );
/**
* Convenience method to return the probed media type for the given
- * {@link InputStream} instance. The caller is responsible for closing
+ * {@link BufferedInputStream} instance. <strong>This resets the stream
+ * pointer</strong> making the call idempotent. Users of this class should
+ * prefer to call this method when operating on streams to avoid advancing
* the stream.
*
- * @param is Data source to ascertain the {@link MediaType}.
+ * @param bis Data source to ascertain the {@link MediaType}.
* @return The IANA-defined {@link MediaType}, or
* {@link MediaType#UNDEFINED} if indeterminate.
- * @throws IOException Could not read from the {@link InputStream}.
+ * @throws IOException Could not read from the {@link File}.
*/
- public static MediaType getMediaType( final InputStream is )
+ public static MediaType getMediaType( final BufferedInputStream bis )
throws IOException {
- final var input = new byte[ FORMAT_LENGTH ];
- final var count = is.read( input, 0, FORMAT_LENGTH );
-
- if( count > 1 ) {
- final var available = new byte[ count ];
- arraycopy( input, 0, available, 0, count );
- return getMediaType( available );
- }
+ bis.mark( FORMAT_LENGTH );
+ final var result = getMediaType( (InputStream) bis );
+ bis.reset();
- return UNDEFINED;
+ return result;
}
/**
- * Returns the {@link MediaType} for a given set of bytes.
+ * Helper method to return the probed media type for the given
+ * {@link InputStream} instance. The caller is responsible for closing
+ * the stream. <strong>This advances the stream pointer.</strong>
*
- * @param source Binary data to compare against the list of known formats.
+ * @param is Data source to ascertain the {@link MediaType}.
* @return The IANA-defined {@link MediaType}, or
* {@link MediaType#UNDEFINED} if indeterminate.
+ * @throws IOException Could not read from the {@link InputStream}.
+ * @see #getMediaType(BufferedInputStream) to perform a non-destructive
+ * read.
*/
- public static MediaType getMediaType( final byte[] source ) {
- assert source != null;
-
- final var header = new int[]{
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
-
- for( int i = 0; i < source.length; i++ ) {
- header[ i ] = source[ i ] & 0xFF;
- }
-
- for( final var key : FORMAT.keySet() ) {
- int i = -1;
- boolean matches = true;
-
- while( ++i < FORMAT_LENGTH && key[ i ] != END_OF_DATA && matches ) {
- matches = key[ i ] == header[ i ];
- }
+ private static MediaType getMediaType( final InputStream is )
+ throws IOException {
+ final var input = new byte[ FORMAT_LENGTH ];
+ final var count = is.read( input, 0, FORMAT_LENGTH );
- if( matches ) {
- return FORMAT.get( key );
- }
+ if( count > 1 ) {
+ final var available = new byte[ count ];
+ arraycopy( input, 0, available, 0, count );
+ return getMediaType( available );
}
src/main/java/com/keenwrite/processors/PdfProcessor.java
import static com.keenwrite.Bootstrap.APP_TITLE_LOWERCASE;
+import static com.keenwrite.Messages.get;
import static com.keenwrite.events.StatusEvent.clue;
import static com.keenwrite.io.MediaType.TEXT_XML;
public String apply( final String xhtml ) {
try {
+ clue( get( "Main.status.typeset.create" ) );
final var sTypesetter = new Typesetter( mContext.getWorkspace() );
final var document = TEXT_XML.createTemporaryFile( APP_TITLE_LOWERCASE );
- final var exportPath = mContext.getExportPath();
- sTypesetter.typeset( writeString( document, xhtml ), exportPath );
+ final var pathOutput = mContext.getExportPath();
+ clue( get( "Main.status.typeset.export" ) );
+ final var pathInput = writeString( document, xhtml );
+ sTypesetter.typeset( pathInput, pathOutput );
} catch( final Exception ex ) {
clue( ex );
src/main/java/com/keenwrite/processors/XhtmlProcessor.java
package com.keenwrite.processors;
-import com.keenwrite.io.StreamMediaType;
+import com.keenwrite.io.HttpFacade;
import com.keenwrite.preferences.Workspace;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.FileNotFoundException;
-import java.net.HttpURLConnection;
-import java.net.URL;
import java.nio.file.Path;
import java.util.regex.Pattern;
import static com.keenwrite.Bootstrap.APP_TITLE_LOWERCASE;
import static com.keenwrite.events.StatusEvent.clue;
-import static com.keenwrite.io.MediaType.valueFrom;
import static com.keenwrite.preferences.WorkspaceKeys.KEY_IMAGES_DIR;
import static com.keenwrite.preferences.WorkspaceKeys.KEY_IMAGES_ORDER;
import static com.keenwrite.util.ProtocolScheme.getProtocol;
import static java.lang.String.format;
import static java.nio.file.Files.copy;
-import static java.nio.file.Files.move;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import static java.util.regex.Pattern.UNICODE_CHARACTER_CLASS;
// Download remote resources into temporary files.
if( protocol.isRemote() ) {
- final var url = new URL( src );
- final var conn = (HttpURLConnection) url.openConnection();
- conn.setUseCaches( false );
- conn.setInstanceFollowRedirects( true );
-
- final var contentType = conn.getContentType();
- var mediaType = valueFrom( contentType );
+ final var response = HttpFacade.httpGet( src);
+ final var mediaType = response.getMediaType();
imageFile = mediaType.createTemporaryFile( APP_TITLE_LOWERCASE );
- try( final var image = conn.getInputStream() ) {
+ try( final var image = response.getInputStream() ) {
copy( image, imageFile, REPLACE_EXISTING );
- }
-
- conn.disconnect();
-
- if( mediaType.isUndefined() ) {
- mediaType = StreamMediaType.getMediaType( imageFile );
- final var file = mediaType.createTemporaryFile( APP_TITLE_LOWERCASE );
- move( imageFile, file, REPLACE_EXISTING );
- imageFile = file;
}
src/main/java/com/keenwrite/typesetting/Typesetter.java
*/
public class Typesetter {
- private static final File TYPESETTER = new File( "context");
+ private static final File TYPESETTER = new File( "context" );
private static final ExecutorService sService = newFixedThreadPool( 5 );
final var builder = new ProcessBuilder( mArgs );
builder.directory( mDirectory.toFile() );
+
+ // TODO: Create luatex-cache directory in system temporary directory.
+// final var env = builder.environment();
+// env.put( "TEXMFCACHE", System.getProperty( "java.io.tmpdir" ) );
+
final var process = builder.start();
process.waitFor();
src/main/resources/com/keenwrite/messages.properties
Main.status.image.request.error.cert=Could not accept certificate for ''{0}''
+Main.status.typeset.create=Creating typesetter
+Main.status.typeset.export=Export document as XHTML
Main.status.typeset.began=Started typesetting ''{0}''
Main.status.typeset.ended.success=Finished typesetting ''{0}'' ({1} elapsed)
Action.file.export.pdf.description=Typeset the document
+Action.file.export.pdf.accelerator=Shortcut+P
Action.file.export.pdf.text=_PDF
Action.file.export.pdf.icon=FILE_PDF_ALT
themes/plain/document.tex
]
+% Prevent widows and orphans by forcing at least two (2) lines together
+\startsetups[grid][TextPenalties]
+ \setdefaultpenalties
+ \setpenalties\widowpenalties{2}{10000}
+ \setpenalties\clubpenalties {2}{10000}
+\stopsetups
+
+\setuplayout[
+ grid=yes,
+ setups=TextPenalties,
+]
themes/plain/figures.tex
order={svg,pdf,png},
maxwidth=\makeupwidth,
- width=\makeupwidth,
]
themes/plain/tables.tex
topframe=on,
bottomframe=on,
- framecolor=TextColourPrimaryLt,
+ framecolor=TextColourTertiaryLt,
toffset=2pt,
boffset=2pt,
Delta 266 lines added, 104 lines removed, 162-line increase