Fix bug that could occur against non-well-formed XML documents

Author	Dave Jarvis <email>
Date	2022-11-02 20:16:44 GMT-0700
Commit	ca3c3a32cebb7612243c3dd65ef97872cef76329
Parent	86c8e10

src/main/java/com/whitemagicsoftware/keenquotes/lex/XmlFilter.java

		import com.whitemagicsoftware.keenquotes.util.FastCharacterIterator;

		-import java.text.CharacterIterator;
		import java.util.Set;
		+
		+import static java.text.CharacterIterator.DONE;

		/**

		);

		- public XmlFilter() {}
		+ public XmlFilter() { }

		/**
		* Skip XML tags found within the prose, which hides the elements.
		*/
		@Override
		public boolean test( final FastCharacterIterator i ) {
		final var match = i.current() == '<';

		if( match ) {
		- final var openingTag = nextTag( i );
		+ try {
		+ final var openingTag = nextTag( i );

		- if( UNTOUCHABLE.contains( openingTag.toLowerCase() ) ) {
		- String closingTag;
		+ if( UNTOUCHABLE.contains( openingTag.toLowerCase() ) ) {
		+ String closingTag;

		- do {
		- closingTag = nextTag( i );
		+ do {
		+ closingTag = nextTag( i );
		+ }
		+ while( !closingTag.endsWith( openingTag ) );
		}
		- while( !closingTag.endsWith( openingTag ) );
		+ } catch( final IndexOutOfBoundsException ex ) {
		+ // The document ran out of characters; XML is not well-formed; stop
		+ // parsing additional text.
		+ return false;
		}
		}

		return match;
		}

		/**
		* Skips to the next greater than or less than symbol.
		*
		- * @param i The {@link CharacterIterator} used to scan through the text, one
		- * character at a time.
		+ * @param i Scans through the text, one character at a time.
		* @return An opening/closing tag name, or the content within the element.
		+ * @throws IndexOutOfBoundsException The tag was not closed before the
		+ * document ended.
		*/
		private String nextTag( final FastCharacterIterator i ) {
		final var begin = i.index();

		- i.skip( next -> next != '>' && next != '<' );
		+ i.skip( next -> next != '>' && next != '<' && next != DONE );

		// Swallow the trailing greater than symbol.

src/test/java/com/whitemagicsoftware/keenquotes/lex/LexerTest.java


		@Test
		- void test_Lexing_Words_LexemeValues() {
		+ void test_Lexing_Words_EmitMixedValues() {
		testText( "abc 123", "abc", " ", "123" );
		testText( "-123 abc", "-123", " ", "abc" );

		Lexer.lex( text, lexeme -> {
		// Ignore the SOT and EOT lexemes (avoids duplication). Each test will
		- // include EOL, EOP, and EOT tokens due to the Lexer's algorithm.
		+ // include EOL, EOP, and EOT tokens due to the lexer's algorithm.
		if( !lexeme.isType( SOT, EOT ) && counter.get() < elements.size() ) {
		final var expected = elements.get( counter.getAndIncrement() );

Delta	21 lines added, 13 lines removed, 8-line increase