| Author | Dave Jarvis <email> |
|---|---|
| Date | 2021-05-31 00:04:24 GMT-0700 |
| Commit | 5aaef2914036bc30c89d07651e24f0f860f7bbde |
| Parent | 3c33bc6 |
| plugins { | ||
| - // Apply the java-library plugin for API and implementation separation. | ||
| - id 'java-library' | ||
| + // Apply the java-library plugin for API and implementation separation. | ||
| + id 'java-library' | ||
| } | ||
| repositories { | ||
| - // Use Maven Central for resolving dependencies. | ||
| - mavenCentral() | ||
| + // Use Maven Central for resolving dependencies. | ||
| + mavenCentral() | ||
| } | ||
| dependencies { | ||
| - // Use JUnit Jupiter API for testing. | ||
| - testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.1' | ||
| - | ||
| - // Use JUnit Jupiter Engine for testing. | ||
| - testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' | ||
| - | ||
| - // This dependency is exported to consumers, that is to say found on their compile classpath. | ||
| - api 'org.apache.commons:commons-math3:3.6.1' | ||
| + // Use JUnit Jupiter API for testing. | ||
| + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.7.1' | ||
| - // This dependency is used internally, and not exposed to consumers on their own compile classpath. | ||
| - implementation 'com.google.guava:guava:30.0-jre' | ||
| + // Use JUnit Jupiter Engine for testing. | ||
| + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' | ||
| } | ||
| tasks.named('test') { | ||
| - // Use junit platform for unit tests. | ||
| - useJUnitPlatform() | ||
| + // Use junit platform for unit tests. | ||
| + useJUnitPlatform() | ||
| } | ||
| +/* | ||
| + * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| + * contributor license agreements. See the NOTICE file distributed with | ||
| + * this work for additional information regarding copyright ownership. | ||
| + * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| + * (the "License"); you may not use this file except in compliance with | ||
| + * the License. You may obtain a copy of the License at | ||
| + * | ||
| + * http://www.apache.org/licenses/LICENSE-2.0 | ||
| + * | ||
| + * Unless required by applicable law or agreed to in writing, software | ||
| + * distributed under the License is distributed on an "AS IS" BASIS, | ||
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| + * See the License for the specific language governing permissions and | ||
| + * limitations under the License. | ||
| + */ | ||
| +package com.keenwrite.quotes; | ||
| + | ||
| +import java.util.*; | ||
| + | ||
| +/** | ||
| + * CircularFifoQueue is a first-in first-out queue with a fixed size that | ||
| + * replaces its oldest element if full. | ||
| + * <p> | ||
| + * The removal order of a {@link CircularFifoQueue} is based on the | ||
| + * insertion order; elements are removed in the same order in which they | ||
| + * were added. The iteration order is the same as the removal order. | ||
| + * </p> | ||
| + * <p> | ||
| + * The {@link #add(Object)}, {@link #remove()}, {@link #peek()}, | ||
| + * {@link #poll()}, | ||
| + * {@link #offer(Object)} operations all perform in constant time. | ||
| + * All other operations perform in linear time or worse. | ||
| + * </p> | ||
| + * <p> | ||
| + * This queue prevents null objects from being added. | ||
| + * </p> | ||
| + * | ||
| + * @param <E> the type of elements in this collection | ||
| + * @since 4.0 | ||
| + */ | ||
| +public class CircularFifoQueue<E> extends AbstractCollection<E> | ||
| + implements Queue<E> { | ||
| + /** | ||
| + * Underlying storage array. | ||
| + */ | ||
| + private final transient E[] elements; | ||
| + | ||
| + /** | ||
| + * Array index of first (oldest) queue element. | ||
| + */ | ||
| + private transient int start; | ||
| + | ||
| + /** | ||
| + * Index mod maxElements of the array position following the last queue | ||
| + * element. Queue elements start at elements[start] and "wrap around" | ||
| + * elements[maxElements-1], ending at elements[decrement(end)]. | ||
| + * For example, elements = {c,a,b}, start=1, end=1 corresponds to | ||
| + * the queue [a,b,c]. | ||
| + */ | ||
| + private transient int end; | ||
| + | ||
| + /** | ||
| + * Flag to indicate if the queue is currently full. | ||
| + */ | ||
| + private transient boolean full; | ||
| + | ||
| + /** | ||
| + * Capacity of the queue. | ||
| + */ | ||
| + private final int maxElements; | ||
| + | ||
| + /** | ||
| + * Constructor that creates a queue with the specified size. | ||
| + * | ||
| + * @param size the size of the queue (cannot be changed) | ||
| + * @throws IllegalArgumentException if the size is < 1 | ||
| + */ | ||
| + @SuppressWarnings( "unchecked" ) | ||
| + public CircularFifoQueue( final int size ) { | ||
| + if( size <= 0 ) { | ||
| + throw new IllegalArgumentException( "The size must be greater than 0" ); | ||
| + } | ||
| + | ||
| + elements = (E[]) new Object[ size ]; | ||
| + maxElements = elements.length; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns the number of elements stored in the queue. | ||
| + * | ||
| + * @return this queue's size | ||
| + */ | ||
| + @Override | ||
| + public int size() { | ||
| + int size; | ||
| + | ||
| + if( end < start ) { | ||
| + size = maxElements - start + end; | ||
| + } | ||
| + else if( end == start ) { | ||
| + size = full ? maxElements : 0; | ||
| + } | ||
| + else { | ||
| + size = end - start; | ||
| + } | ||
| + | ||
| + return size; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns true if this queue is empty; false otherwise. | ||
| + * | ||
| + * @return true if this queue is empty | ||
| + */ | ||
| + @Override | ||
| + public boolean isEmpty() { | ||
| + return size() == 0; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns {@code true} if the capacity limit of this queue has been reached, | ||
| + * i.e. the number of elements stored in the queue equals its maximum size. | ||
| + * | ||
| + * @return {@code true} if the capacity limit has been reached, {@code | ||
| + * false} otherwise | ||
| + * @since 4.1 | ||
| + */ | ||
| + public boolean isAtFullCapacity() { | ||
| + return size() == maxElements; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Clears this queue. | ||
| + */ | ||
| + @Override | ||
| + public void clear() { | ||
| + full = false; | ||
| + start = 0; | ||
| + end = 0; | ||
| + Arrays.fill( elements, null ); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Adds the given element to this queue. If the queue is full, the least | ||
| + * recently added | ||
| + * element is discarded so that a new element can be inserted. | ||
| + * | ||
| + * @param element the element to add | ||
| + * @return true, always | ||
| + * @throws NullPointerException if the given element is null | ||
| + */ | ||
| + @Override | ||
| + public boolean add( final E element ) { | ||
| + Objects.requireNonNull( element, "element" ); | ||
| + | ||
| + if( isAtFullCapacity() ) { | ||
| + remove(); | ||
| + } | ||
| + | ||
| + elements[ end++ ] = element; | ||
| + | ||
| + if( end >= maxElements ) { | ||
| + end = 0; | ||
| + } | ||
| + | ||
| + if( end == start ) { | ||
| + full = true; | ||
| + } | ||
| + | ||
| + return true; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns the element at the specified position in this queue. | ||
| + * | ||
| + * @param index the position of the element in the queue | ||
| + * @return the element at position {@code index} | ||
| + * @throws NoSuchElementException if the requested position is outside the | ||
| + * range [0, size) | ||
| + */ | ||
| + public E get( final int index ) { | ||
| + final int sz = size(); | ||
| + if( index < 0 || index >= sz ) { | ||
| + throw new NoSuchElementException( | ||
| + String.format( | ||
| + "The specified index %1$d is outside the available range [0, %2$d)", | ||
| + index, | ||
| + sz ) ); | ||
| + } | ||
| + | ||
| + final int idx = (start + index) % maxElements; | ||
| + return elements[ idx ]; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Adds the given element to this queue. If the queue is full, the least | ||
| + * recently added | ||
| + * element is discarded so that a new element can be inserted. | ||
| + * | ||
| + * @param element the element to add | ||
| + * @return true, always | ||
| + * @throws NullPointerException if the given element is null | ||
| + */ | ||
| + @Override | ||
| + public boolean offer( final E element ) { | ||
| + return add( element ); | ||
| + } | ||
| + | ||
| + @Override | ||
| + public E poll() { | ||
| + if( isEmpty() ) { | ||
| + return null; | ||
| + } | ||
| + return remove(); | ||
| + } | ||
| + | ||
| + @Override | ||
| + public E element() { | ||
| + if( isEmpty() ) { | ||
| + throw new NoSuchElementException( "queue is empty" ); | ||
| + } | ||
| + | ||
| + return peek(); | ||
| + } | ||
| + | ||
| + @Override | ||
| + public E peek() { | ||
| + return isEmpty() ? null : elements[ start ]; | ||
| + } | ||
| + | ||
| + @Override | ||
| + public E remove() { | ||
| + if( isEmpty() ) { | ||
| + throw new NoSuchElementException( "queue is empty" ); | ||
| + } | ||
| + | ||
| + final E element = elements[ start ]; | ||
| + if( null != element ) { | ||
| + elements[ start++ ] = null; | ||
| + | ||
| + if( start >= maxElements ) { | ||
| + start = 0; | ||
| + } | ||
| + full = false; | ||
| + } | ||
| + return element; | ||
| + } | ||
| + | ||
| + //----------------------------------------------------------------------- | ||
| + | ||
| + /** | ||
| + * Increments the internal index. | ||
| + * | ||
| + * @param index the index to increment | ||
| + * @return the updated index | ||
| + */ | ||
| + private int increment( int index ) { | ||
| + index++; | ||
| + if( index >= maxElements ) { | ||
| + index = 0; | ||
| + } | ||
| + return index; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Decrements the internal index. | ||
| + * | ||
| + * @param index the index to decrement | ||
| + * @return the updated index | ||
| + */ | ||
| + private int decrement( int index ) { | ||
| + index--; | ||
| + if( index < 0 ) { | ||
| + index = maxElements - 1; | ||
| + } | ||
| + return index; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Returns an iterator over this queue's elements. | ||
| + * | ||
| + * @return an iterator over this queue's elements | ||
| + */ | ||
| + @Override | ||
| + public Iterator<E> iterator() { | ||
| + return new Iterator<>() { | ||
| + | ||
| + private int index = start; | ||
| + private int lastReturnedIndex = -1; | ||
| + private boolean isFirst = full; | ||
| + | ||
| + @Override | ||
| + public boolean hasNext() { | ||
| + return isFirst || index != end; | ||
| + } | ||
| + | ||
| + @Override | ||
| + public E next() { | ||
| + if( !hasNext() ) { | ||
| + throw new NoSuchElementException(); | ||
| + } | ||
| + | ||
| + isFirst = false; | ||
| + lastReturnedIndex = index; | ||
| + index = increment( index ); | ||
| + return elements[ lastReturnedIndex ]; | ||
| + } | ||
| + | ||
| + @Override | ||
| + public void remove() { | ||
| + if( lastReturnedIndex == -1 ) { | ||
| + throw new IllegalStateException(); | ||
| + } | ||
| + | ||
| + // First element can be removed quickly | ||
| + if( lastReturnedIndex == start ) { | ||
| + CircularFifoQueue.this.remove(); | ||
| + lastReturnedIndex = -1; | ||
| + return; | ||
| + } | ||
| + | ||
| + int pos = lastReturnedIndex + 1; | ||
| + if( start < lastReturnedIndex && pos < end ) { | ||
| + // shift in one part | ||
| + System.arraycopy( | ||
| + elements, pos, elements, lastReturnedIndex, end - pos ); | ||
| + } | ||
| + else { | ||
| + // Other elements require us to shift the subsequent elements | ||
| + while( pos != end ) { | ||
| + if( pos >= maxElements ) { | ||
| + elements[ pos - 1 ] = elements[ 0 ]; | ||
| + pos = 0; | ||
| + } | ||
| + else { | ||
| + elements[ decrement( pos ) ] = elements[ pos ]; | ||
| + pos = increment( pos ); | ||
| + } | ||
| + } | ||
| + } | ||
| + | ||
| + lastReturnedIndex = -1; | ||
| + end = decrement( end ); | ||
| + elements[ end ] = null; | ||
| + full = false; | ||
| + index = decrement( index ); | ||
| + } | ||
| + }; | ||
| + } | ||
| +} | ||
| package com.keenwrite.quotes; | ||
| +import java.util.ArrayDeque; | ||
| +import java.util.Deque; | ||
| import java.util.function.Consumer; | ||
| + | ||
| +import static com.keenwrite.quotes.TokenType.*; | ||
| /** | ||
| * Converts straight double/single quotes and apostrophes to curly equivalents. | ||
| + * First, handle single quotes as apostrophes, which include: | ||
| + * <ol> | ||
| + * <li>Inner contractions (WORD ' WORD) -- you'd've</li> | ||
| + * <li>Inner contractions (PERIOD ' WORD) -- Ph.d.'ll</li> | ||
| + * <li>Numeric contractions (NUMBER ' WORD) -- 70's</li> | ||
| + * <li>Outer contractions (' WORD ') -- 'n'</li> | ||
| + * <li>Unambiguous beginning contractions (' WORD) -- 'Twas</li> | ||
| + * </ol> | ||
| + * Next, handle single and double quotes as primes and double primes: | ||
| + * <ol> | ||
| + * <li>Single prime (NUMBER ') -- 2'</li> | ||
| + * <li>Double prime (NUMBER ") -- 7.5"</li> | ||
| + * </ol> | ||
| + * Next, handle balanced double quotes: | ||
| + * <ol> | ||
| + * <li>Double quotes (" (WORD (SPACE+ WORD)? (PUNCT | PERIOD))+ ")</li> | ||
| + * <li>Single quotes (' (WORD (SPACE+ WORD)? (PUNCT | PERIOD))+ ')</li> | ||
| + * </ol> | ||
| */ | ||
| public class Parser implements Consumer<Token> { | ||
| private final String mText; | ||
| + private final CircularFifoQueue<Token> mTokens = new CircularFifoQueue<>( 3 ); | ||
| + | ||
| + private final Deque<Token> mStack = new ArrayDeque<>(); | ||
| public Parser( final String text ) { | ||
| mText = text; | ||
| + mTokens.add( Token.EOT ); | ||
| + mTokens.add( Token.EOT ); | ||
| + mTokens.add( Token.EOT ); | ||
| } | ||
| public void parse() { | ||
| final var tokenizer = new Tokenizer(); | ||
| tokenizer.tokenize( mText, this ); | ||
| } | ||
| @Override | ||
| public void accept( final Token token ) { | ||
| - System.out.print( token.getType() + " " ); | ||
| + mTokens.add( token ); | ||
| + | ||
| + final var token1 = mTokens.get( 0 ); | ||
| + final var token2 = mTokens.get( 1 ); | ||
| + final var token3 = mTokens.get( 2 ); | ||
| + | ||
| + if( token2.isType( QUOTE_SINGLE ) && | ||
| + token3.isType( WORD ) && | ||
| + token1.anyType( WORD, PERIOD, NUMBER ) ) { | ||
| + System.out.println( "APOSTROPHE: " + token2 ); | ||
| + } | ||
| + else if( token1.isType( QUOTE_SINGLE ) && | ||
| + "n".equalsIgnoreCase( token2.toString( mText ) ) && | ||
| + token3.isType( QUOTE_SINGLE ) ) { | ||
| + System.out.printf( "APOSTROPHES: %s %s%n", token1, token3 ); | ||
| + } | ||
| + else if( token1.isType( NUMBER ) && token2.isType( QUOTE_SINGLE ) ) { | ||
| + System.out.println( "PRIME: " + token2 ); | ||
| + } | ||
| + else if( token1.isType( NUMBER ) && token2.isType( QUOTE_DOUBLE ) ) { | ||
| + System.out.println( "DOUBLE PRIME: " + token2 ); | ||
| + } | ||
| + else if( token.anyType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) { | ||
| + mStack.push( token ); | ||
| + | ||
| + if( mStack.isEmpty() ) { | ||
| + System.out.println( "EMPTY STACK?!" ); | ||
| + } | ||
| + } | ||
| } | ||
| } | ||
| + | ||
| +/* | ||
| + private enum TokenType { | ||
| + QUOTE_OPENING_SINGLE, | ||
| + QUOTE_OPENING_DOUBLE, | ||
| + QUOTE_CLOSING_SINGLE, | ||
| + QUOTE_CLOSING_DOUBLE, | ||
| + QUOTE_APOSTROPHE, | ||
| + QUOTE_PRIME_SINGLE, | ||
| + QUOTE_PRIME_DOUBLE, | ||
| + TEXT | ||
| + } | ||
| +*/ | ||
| } | ||
| + public boolean anyType( final TokenType... types ) { | ||
| + for( final var type : types ) { | ||
| + if( mType == type ) { | ||
| + return true; | ||
| + } | ||
| + } | ||
| + | ||
| + return false; | ||
| + } | ||
| + | ||
| TokenType getType() { | ||
| return mType; | ||
| + } | ||
| + | ||
| + @Override | ||
| + public String toString() { | ||
| + return getClass().getSimpleName() + "{" + | ||
| + "mType=" + mType + | ||
| + ", mBegan=" + mBegan + | ||
| + ", mEnded=" + mEnded + | ||
| + '}'; | ||
| } | ||
| public enum TokenType { | ||
| - QSINGLE, | ||
| - QDOUBLE, | ||
| + QUOTE_SINGLE, | ||
| + QUOTE_DOUBLE, | ||
| WORD, | ||
| NUMBER, | ||
| NEWLINE, | ||
| SPACE, | ||
| PUNCT, | ||
| PERIOD, | ||
| INVALID | ||
| } | ||
| - | ||
| -/* | ||
| - private enum TokenType { | ||
| -// QUOTE_OPENING_SINGLE, | ||
| -// QUOTE_OPENING_DOUBLE, | ||
| -// QUOTE_CLOSING_SINGLE, | ||
| -// QUOTE_CLOSING_DOUBLE, | ||
| -// QUOTE_APOSTROPHE, | ||
| -// QUOTE_PRIME_SINGLE, | ||
| -// QUOTE_PRIME_DOUBLE, | ||
| - TEXT | ||
| - } | ||
| - */ |
| /** | ||
| - * Emits a series of tokens that | ||
| + * Emits a series of tokens that represent information about text that is | ||
| + * needed to convert straight quotes to curly quotes. | ||
| * | ||
| * @param text The text to split into tokens. | ||
| } | ||
| + /** | ||
| + * Tokenizes a sequence of characters. The order of comparisons is optimized | ||
| + * towards probability of the occurrence of a character in regular English | ||
| + * prose: letters, space, quotation marks, numbers, periods, new lines, | ||
| + * then end of text. | ||
| + * | ||
| + * @param i The sequence of characters to tokenize. | ||
| + * @return The next token in the sequence. | ||
| + */ | ||
| private Token tokenize( final CharacterIterator i ) { | ||
| int began = i.getIndex(); | ||
| boolean isWord = false; | ||
| Token token = null; | ||
| do { | ||
| final var curr = i.current(); | ||
| - | ||
| - if( curr == DONE ) { | ||
| - return Token.EOT; | ||
| - } | ||
| if( isLetter( curr ) ) { | ||
| isWord = true; | ||
| if( !isLetterOrDigit( peek( i ) ) ) { | ||
| token = createToken( WORD, began, i.getIndex() ); | ||
| } | ||
| + } | ||
| + else if( curr == ' ' ) { | ||
| + token = createToken( SPACE, began, i.getIndex() ); | ||
| + } | ||
| + else if( curr == '\'' ) { | ||
| + token = createToken( QUOTE_SINGLE, began, i.getIndex() ); | ||
| + } | ||
| + else if( curr == '"' ) { | ||
| + token = createToken( QUOTE_DOUBLE, began, i.getIndex() ); | ||
| } | ||
| else if( isDigit( curr ) || isNumeric( curr ) && isDigit( peek( i ) ) ) { | ||
| token = createToken( isWord ? WORD : NUMBER, began, i.getIndex() ); | ||
| + } | ||
| + else if( curr == '.' ) { | ||
| + token = createToken( PERIOD, began, i.getIndex() ); | ||
| } | ||
| else if( curr == '\r' ) { | ||
| else if( isWhitespace( curr ) ) { | ||
| token = createToken( SPACE, began, i.getIndex() ); | ||
| - } | ||
| - else if( curr == '\'' ) { | ||
| - token = createToken( QSINGLE, began, i.getIndex() ); | ||
| - } | ||
| - else if( curr == '"' ) { | ||
| - token = createToken( QDOUBLE, began, i.getIndex() ); | ||
| } | ||
| - else if( curr == '.' ) { | ||
| - token = createToken( PERIOD, began, i.getIndex() ); | ||
| + else if( curr != DONE ) { | ||
| + token = createToken( PUNCT, began, i.getIndex() ); | ||
| } | ||
| else { | ||
| - token = createToken( PUNCT, began, i.getIndex() ); | ||
| + token = Token.EOT; | ||
| } | ||
| @Test | ||
| void test_Conversion_Straight_Curly() { | ||
| - final var parser = new Parser( "Salut tout le monde!\nÇa va?"); | ||
| - parser.parse(); | ||
| - System.out.println(); | ||
| + new Parser( "\"It's the 70's jack-o'-lantern\"").parse(); | ||
| + new Parser( "Fish-'n'-chips!").parse(); | ||
| + new Parser( "That's a 35' x 10\" yacht!").parse(); | ||
| + //new Parser( "'70s are Sams' faves.'").parse(); | ||
| } | ||
| } |
| testType( "-123.", PUNCT, NUMBER, PERIOD ); | ||
| testType( " 123.123.123", SPACE, NUMBER ); | ||
| - testType( "123 123\"", NUMBER, SPACE, NUMBER, QDOUBLE ); | ||
| + testType( "123 123\"", NUMBER, SPACE, NUMBER, QUOTE_DOUBLE ); | ||
| testType( "-123,123.123", PUNCT, NUMBER ); | ||
| } | ||
| @Test | ||
| void test_Tokenize_Words_EmitWords() { | ||
| testType( "abc", WORD ); | ||
| testType( "abc abc", WORD, SPACE, WORD ); | ||
| testType( "abc123", WORD ); | ||
| testType( "-123abc", PUNCT, NUMBER, WORD ); | ||
| - testType( "abc-o'-abc", WORD, PUNCT, WORD, QSINGLE, PUNCT, WORD ); | ||
| + testType( "abc-o'-abc", WORD, PUNCT, WORD, QUOTE_SINGLE, PUNCT, WORD ); | ||
| } | ||
| @Test | ||
| void test_Tokenize_Quotes_EmitQuotes() { | ||
| - testType( "'", QSINGLE ); | ||
| - testType( "\"", QDOUBLE ); | ||
| - testType( "3 o'clock", NUMBER, SPACE, WORD, QSINGLE, WORD ); | ||
| + testType( "'", QUOTE_SINGLE ); | ||
| + testType( "\"", QUOTE_DOUBLE ); | ||
| + testType( "3 o'clock", NUMBER, SPACE, WORD, QUOTE_SINGLE, WORD ); | ||
| } | ||
| Delta | 487 lines added, 54 lines removed, 433-line increase |
|---|