Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/keenwrite.git

Tested StringUtils vs Aho-Corasick string replacement algorithms. Updated code for mapping the variables into string values. Added code to switch variable names for their values during rendering.

Authordjarvis <email>
Date2016-11-27 20:52:32 GMT-0800
Commit8db959d7a1cf4485a49fd2db94886c5978452039
Parent2eff5ec
Delta296 lines added, 6 lines removed, 290-line increase
src/main/java/com/scrivenvar/ui/VariableTreeItem.java
import static com.scrivenvar.definition.DefinitionPane.SEPARATOR;
import static com.scrivenvar.editor.VariableNameInjector.DEFAULT_MAX_VAR_LENGTH;
+import java.util.HashMap;
+import java.util.Map;
import java.util.Stack;
import javafx.scene.control.TreeItem;
*/
public class VariableTreeItem<T> extends TreeItem<T> {
+ private final static int DEFAULT_MAP_SIZE = 1000;
+
+ /**
+ * Flattened tree.
+ */
+ private Map<String, String> map;
/**
*/
public VariableTreeItem<T> findLeaf( final String text ) {
- final Stack<TreeItem<T>> stack = new Stack<>();
- final TreeItem<T> root = this;
+ final Stack<VariableTreeItem<T>> stack = new Stack<>();
+ final VariableTreeItem<T> root = this;
stack.push( root );
boolean found = false;
- TreeItem<T> node = null;
+ VariableTreeItem<T> node = null;
- while( !stack.isEmpty() && !found ) {
+ while( !found && !stack.isEmpty() ) {
node = stack.pop();
- if( node.isLeaf() && node.getValue().toString().startsWith( text ) ) {
+ if( node.valueStartsWith( text ) ) {
found = true;
} else {
for( final TreeItem<T> child : node.getChildren() ) {
- stack.push( child );
+ stack.push( (VariableTreeItem<T>)child );
}
// No match found, yet.
node = null;
}
}
return (VariableTreeItem<T>)node;
+ }
+
+ /**
+ * Returns true if this node is a leaf and its value starts with the given
+ * text.
+ *
+ * @param s The text to compare against the node value.
+ *
+ * @return true Node is a leaf and its value starts with the given value.
+ */
+ private boolean valueStartsWith( final String s ) {
+ return isLeaf() && getValue().toString().startsWith( s );
}
return sb.toString();
+ }
+
+ /**
+ * Returns the hierarchy, flattened to key-value pairs.
+ *
+ * @return A map of this tree's key-value pairs.
+ */
+ public Map<String, String> getMap() {
+ if( this.map == null ) {
+ this.map = new HashMap<>( DEFAULT_MAP_SIZE );
+ populate( this, this.map );
+ }
+
+ return this.map;
+ }
+
+ private void populate( final TreeItem<T> parent, final Map<String, String> map ) {
+ for( final TreeItem<T> child : parent.getChildren() ) {
+ if( child.isLeaf() ) {
+ final String key = toVariable( ((VariableTreeItem<String>)child).toPath() );
+ final String value = child.getValue().toString();
+
+ map.put( key, value );
+ } else {
+ populate( child, map );
+ }
+ }
+ }
+
+ /**
+ * Converts the name of the key to a simple variable by enclosing it with
+ * dollar symbols.
+ *
+ * @param key The key name to change to a variable.
+ *
+ * @return $key$
+ */
+ private String toVariable( final String key ) {
+ return "$" + key + "$";
}
}
src/main/java/com/scrivenvar/processors/text/AhoCorsickReplacer.java
+/*
+ * Copyright 2016 White Magic Software, Ltd.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.scrivenvar.processors.text;
+
+import java.util.Map;
+import org.ahocorasick.trie.Emit;
+import org.ahocorasick.trie.Trie;
+
+/**
+ * Replaces text using an Aho-Corsick algorithm.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public class AhoCorsickReplacer extends AbstractTextReplacer {
+
+ /**
+ * Default (empty) constructor.
+ */
+ protected AhoCorsickReplacer() {
+ }
+
+ @Override
+ public String replace( final String text, final Map<String, String> map ) {
+ // Create a buffer sufficiently large that re-allocations are minimized.
+ final StringBuilder sb = new StringBuilder( (int)(text.length() * 1.25) );
+
+ // The TrieBuilder should only match whole words and ignore overlaps (there
+ // shouldn't be any).
+ final Trie.TrieBuilder builder = Trie.builder();
+ builder.onlyWholeWords();
+ builder.removeOverlaps();
+
+ for( final String key : keys( map ) ) {
+ builder.addKeyword( key );
+ }
+
+ int index = 0;
+
+ for( final Emit emit : builder.build().parseText( text ) ) {
+ sb.append( text.substring( index, emit.getStart() ) );
+ sb.append( map.get( emit.getKeyword() ) );
+ index = emit.getEnd() + 1;
+ }
+
+ // Add the remainder of the string (contains no more matches).
+ sb.append( text.substring( index ) );
+
+ return sb.toString();
+ }
+}
src/main/java/com/scrivenvar/processors/text/StringUtilsReplacer.java
+/*
+ * Copyright 2016 White Magic Software, Ltd.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.scrivenvar.processors.text;
+
+import java.util.Map;
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * Replaces text using Apache's StringUtils.replaceEach method.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public class StringUtilsReplacer extends AbstractTextReplacer {
+
+ /**
+ * Default (empty) constructor.
+ */
+ protected StringUtilsReplacer() {
+ }
+
+ @Override
+ public String replace( final String text, final Map<String, String> map ) {
+ return StringUtils.replaceEach( text, keys( map ), values( map ) );
+ }
+}
src/main/java/com/scrivenvar/processors/text/TextReplacementFactory.java
+/*
+ * Copyright 2016 White Magic Software, Ltd.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.scrivenvar.processors.text;
+
+/**
+ * Used to generate a class capable of efficiently replacing variable
+ * definitions with their values.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public class TextReplacementFactory {
+
+ /**
+ * Returns a text search/replacement instance that is reasonably optimal for
+ * the given length of text.
+ *
+ * @param length The length of text that requires some search and replacing.
+ *
+ * @return A class that can search and replace text with utmost expediency.
+ */
+ public static TextReplacer getTextReplacer( final int length ) {
+ // After about 1,500 characters, the StringUtils implementation is less
+ // performant than the Aho-Corsick implementation.
+ //
+ // Ssee http://stackoverflow.com/a/40836618/59087
+ return length < 1500
+ ? new StringUtilsReplacer()
+ : new AhoCorsickReplacer();
+ }
+}
src/main/java/com/scrivenvar/processors/text/TextReplacer.java
+/*
+ * Copyright 2016 White Magic Software, Ltd.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.scrivenvar.processors.text;
+
+import java.util.Map;
+
+/**
+ * Defines the ability to replace text given a set of keys and values.
+ *
+ * @author White Magic Software, Ltd.
+ */
+public interface TextReplacer {
+
+ /**
+ * Searches through the given text for any of the keys given in the map and
+ * replaces the keys that appear in the text with the key's corresponding
+ * value.
+ *
+ * @param text The text that contains zero or more keys.
+ * @param map The set of keys mapped to replacement values.
+ *
+ * @return The given text with all keys replaced with corresponding values.
+ */
+ public String replace( String text, Map<String, String> map );
+
+}