Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenwrite.git

Calculate correct document text offset for inserting caret position marker in XML text documents using VTD-XML.

Authordjarvis <email>
Date2016-12-17 17:59:28 GMT-0800
Commitff26965bbffd46da4560c2235b222cdf59936448
Parent6fd26e6
Delta281 lines added, 102 lines removed, 179-line increase
src/main/java/com/scrivenvar/processors/CaretReplacementProcessor.java
+/*
+ * Copyright 2016 White Magic Software, Ltd.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.scrivenvar.processors;
+
+import static com.scrivenvar.Constants.CARET_POSITION_HTML;
+import static com.scrivenvar.Constants.CARET_POSITION_MD;
+
+/**
+ * Responsible for replacing the caret position marker with an HTML element
+ * suitable to use as a reference for scrolling a view port.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public class CaretReplacementProcessor extends AbstractProcessor<String> {
+ private static final int INDEX_NOT_FOUND = -1;
+
+ public CaretReplacementProcessor( final Processor<String> processor ) {
+ super( processor );
+ }
+
+ /**
+ * Replaces each MD_CARET_POSITION with an HTML element that has an id
+ * attribute of CARET_POSITION. This should only replace one item.
+ *
+ * @param t The text that contains
+ *
+ * @return
+ */
+ @Override
+ public String processLink( final String t ) {
+ return replace(t, CARET_POSITION_MD, CARET_POSITION_HTML );
+ }
+
+ /**
+ * Replaces the needle with thread in the given haystack. Based on Apache
+ * Commons 3 StringUtils.replace method. Should be faster than
+ * String.replace, which performs a little regex under the hood.
+ *
+ * @param haystack Search this string for the needle, must not be null.
+ * @param needle The text to find in the haystack.
+ * @param thread Replace the needle with this text, if the needle is found.
+ *
+ * @return The haystack with the first instance of needle replaced with
+ * thread.
+ */
+ private static String replace(
+ final String haystack, final String needle, final String thread ) {
+
+ final int end = haystack.indexOf( needle, 0 );
+
+ if( end == INDEX_NOT_FOUND ) {
+ return haystack;
+ }
+
+ int start = 0;
+ final int needleLength = needle.length();
+
+ int increase = thread.length() - needleLength;
+ increase = (increase < 0 ? 0 : increase);
+ final StringBuilder buffer = new StringBuilder( haystack.length() + increase );
+
+ if( end != INDEX_NOT_FOUND ) {
+ buffer.append( haystack.substring( start, end ) ).append( thread );
+ start = end + needleLength;
+ }
+
+ return buffer.append( haystack.substring( start ) ).toString();
+ }
+}
src/main/java/com/scrivenvar/processors/MarkdownCaretReplacementProcessor.java
-/*
- * Copyright 2016 White Magic Software, Ltd.
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * o Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * o Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-package com.scrivenvar.processors;
-
-import static com.scrivenvar.Constants.CARET_POSITION_HTML;
-import static com.scrivenvar.Constants.CARET_POSITION_MD;
-
-/**
- * Responsible for replacing the caret position marker with an HTML element
- * suitable to use as a reference for scrolling a view port.
- *
- * @author White Magic Software, Ltd.
- */
-public class MarkdownCaretReplacementProcessor extends AbstractProcessor<String> {
- private static final int INDEX_NOT_FOUND = -1;
-
- public MarkdownCaretReplacementProcessor( final Processor<String> processor ) {
- super( processor );
- }
-
- /**
- * Replaces each MD_CARET_POSITION with an HTML element that has an id
- * attribute of CARET_POSITION. This should only replace one item.
- *
- * @param t The text that contains
- *
- * @return
- */
- @Override
- public String processLink( final String t ) {
- return replace(t, CARET_POSITION_MD, CARET_POSITION_HTML );
- }
-
- /**
- * Replaces the needle with thread in the given haystack. Based on Apache
- * Commons 3 StringUtils.replace method. Should be faster than
- * String.replace, which performs a little regex under the hood.
- *
- * @param haystack Search this string for the needle, must not be null.
- * @param needle The text to find in the haystack.
- * @param thread Replace the needle with this text, if the needle is found.
- *
- * @return The haystack with the first instance of needle replaced with
- * thread.
- */
- private static String replace(
- final String haystack, final String needle, final String thread ) {
-
- final int end = haystack.indexOf( needle, 0 );
-
- if( end == INDEX_NOT_FOUND ) {
- return haystack;
- }
-
- int start = 0;
- final int needleLength = needle.length();
-
- int increase = thread.length() - needleLength;
- increase = (increase < 0 ? 0 : increase);
- final StringBuilder buffer = new StringBuilder( haystack.length() + increase );
-
- if( end != INDEX_NOT_FOUND ) {
- buffer.append( haystack.substring( start, end ) ).append( thread );
- start = end + needleLength;
- }
-
- return buffer.append( haystack.substring( start ) ).toString();
- }
-}
src/main/java/com/scrivenvar/processors/XMLCaretInsertionProcessor.java
+/*
+ * Copyright 2016 White Magic Software, Ltd.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.scrivenvar.processors;
+
+import com.scrivenvar.FileEditorTab;
+import com.ximpleware.VTDGen;
+import static com.ximpleware.VTDGen.TOKEN_CHARACTER_DATA;
+import com.ximpleware.VTDNav;
+
+/**
+ * Inserts a caret position indicator into the document.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public class XMLCaretInsertionProcessor extends AbstractProcessor<String> {
+
+ private FileEditorTab tab;
+
+ /**
+ *
+ * @param processor Next link in the processing chain.
+ * @param tab
+ */
+ public XMLCaretInsertionProcessor( final Processor<String> processor, final FileEditorTab tab ) {
+ super( processor );
+ setFileEditorTab( tab );
+ }
+
+ /**
+ * Inserts a caret at a valid position within the XML document.
+ *
+ * @param t The string into which caret position marker text is inserted.
+ *
+ * @return t with a caret position marker included, or t if no place to insert
+ * could be found.
+ */
+ @Override
+ public String processLink( final String t ) {
+ int insertOffset = -1;
+
+ if( t.length() > 0 ) {
+
+ try {
+ final VTDGen vg = new VTDGen();
+
+ vg.setDoc( t.getBytes() );
+ vg.parse( true );
+
+ final VTDNav vn = vg.getNav();
+
+ final int caretOffset = getCaretPosition();
+ final int tokens = vn.getTokenCount();
+
+ int currTextTokenIndex = 0;
+ int prevTextTokenIndex = currTextTokenIndex;
+ int currTokenOffset = 0;
+
+ boolean found = false;
+
+ // To find the insertion spot even faster, the algorithm could
+ // use a binary search or interpolation search algorithm. This
+ // would reduce the worst-case iterations to O(log n) from O(n).
+ while( currTextTokenIndex < tokens && !found ) {
+ final int prevTokenOffset = currTokenOffset;
+ final int currTokenType = vn.getTokenType( currTextTokenIndex );
+
+ if( currTokenType == TOKEN_CHARACTER_DATA ) {
+ currTokenOffset = vn.getTokenOffset( currTextTokenIndex );
+
+ if( currTokenOffset > caretOffset ) {
+ found = true;
+
+ final int prevTokenLength = vn.getTokenLength( prevTextTokenIndex );
+
+ // If the caret falls within the limits of the previous token, then
+ // insert the caret position marker at the caret offset.
+ if( isBetween( caretOffset, prevTokenOffset, prevTokenOffset + prevTokenLength ) ) {
+ insertOffset = caretOffset;
+ } else {
+ // The caret position is outside the previous token's text
+ // boundaries, but the current text token is far away. The
+ // cursor should be positioned into the closer text token.
+ // For now, the cursor is positioned at the start of the
+ // current text token.
+ insertOffset = currTokenOffset;
+ }
+
+ continue;
+ }
+
+ prevTextTokenIndex = currTextTokenIndex;
+ }
+
+ currTextTokenIndex++;
+ }
+
+ } catch( final Exception ex ) {
+ ex.printStackTrace();
+ }
+ }
+
+
+ /*
+ System.out.println( "-- CARET --------------------------------" );
+ System.out.println( "offset: " + caretOffset );
+ System.out.println( "-- BETWEEN PREV TOKEN --------------------" );
+ System.out.println( "index : " + prevTextTokenIndex );
+ System.out.println( "type : " + prevTokenType );
+ System.out.println( "offset : " + prevTokenOffset );
+ System.out.println( "length : " + prevTokenLength );
+ System.out.println( "offset + length: " + (prevTokenOffset + prevTokenLength - 1) );
+ System.out.println( "text : '" + prevToken.trim() + "'" );
+ System.out.println( "-- CURR TOKEN ---------------------------" );
+ System.out.println( "index : " + currTextTokenIndex );
+ System.out.println( "type : " + currTokenType );
+ System.out.println( "offset : " + currTokenOffset );
+ System.out.println( "length : " + currTokenLength );
+ System.out.println( "between: " + currBetween );
+ System.out.println( "text : '" + currToken.trim() + "'" );
+ */
+
+
+ if( insertOffset > 0 ) {
+ // Insert the caret at the given offset.
+ // TODO: Create and use CaretInsertion superclass.
+ System.out.println( "insert offset: " + insertOffset );
+ System.out.println( "caret offset : " + getCaretPosition() );
+ }
+
+ return t;
+ }
+
+
+
+ private int getCaretPosition() {
+ return getFileEditorTab().getCaretPosition();
+ }
+
+ private void setFileEditorTab( final FileEditorTab tab ) {
+ this.tab = tab;
+ }
+
+ private FileEditorTab getFileEditorTab() {
+ return this.tab;
+ }
+
+ private boolean isBetween( int i, int min, int max ) {
+ return i >= min && i <= max;
+ }
+}
src/main/java/com/scrivenvar/processors/XMLProcessor.java
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
-import org.jdom2.ProcessingInstruction;
+import static net.sf.saxon.tree.util.ProcInstParser.getPseudoAttribute;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.DefaultHandler;
public class XMLProcessor extends AbstractProcessor<String> {
- private ProcessingInstructionHandler handler = new ProcessingInstructionHandler();
+ private final ProcessingInstructionHandler handler = new ProcessingInstructionHandler();
private String href;
private Path path;
/**
+ * Constructs an XML processor that can transform an XML document into another
+ * format based on the XSL file specified as a processing instruction. The
+ * path must point to the directory where the XSL file is found, which implies
+ * that they must be in the same directory.
*
* @param processor Next link in the processing chain.
- * @param path
+ * @param path The path to the XML file content to be processed.
*/
public XMLProcessor( final Processor<String> processor, final Path path ) {
final Source xslt = new StreamSource( xslPath.toFile() );
final Transformer transformer = factory.newTransformer( xslt );
-
+
final StreamResult sr = new StreamResult( output );
transformer.transform( source, sr );
-
+
result = output.toString();
@Override
public void processingInstruction( final String target, final String data ) {
- final ProcessingInstruction xmlstylesheet
- = new ProcessingInstruction( target, data );
- setHref( xmlstylesheet.getPseudoAttributeValue( "href" ) );
+ if( "xml-stylesheet".equalsIgnoreCase( target ) ) {
+ setHref( getPseudoAttribute( data, "href" ) );
+ }
}
}