| Author | Dave Jarvis <email> |
|---|---|
| Date | 2021-06-14 19:31:21 GMT-0700 |
| Commit | a98944d15f49a9bfa8980fd234476cb3ed06093a |
| Parent | 247e05a |
| lexeme = createLexeme( QUOTE_DOUBLE, began, i.getIndex() ); | ||
| } | ||
| - else if( curr == '-' ) { | ||
| - lexeme = createLexeme( | ||
| - slurp( i, ( next, ci ) -> next == '-' ) == 0 ? HYPHEN : DASH, | ||
| - began, i.getIndex() | ||
| - ); | ||
| + else if( curr == '-' && peek( i ) == '-' ) { | ||
| + slurp( i, ( next, ci ) -> next == '-' ); | ||
| + | ||
| + lexeme = createLexeme( DASH, began, i.getIndex() ); | ||
| } | ||
| else if( isDigit( curr ) || isNumeric( curr ) && isDigit( peek( i ) ) ) { | ||
| // Parse all consecutive number characters to prevent the main loop | ||
| // from switching back to word tokens. | ||
| slurp( i, ( next, ci ) -> | ||
| isDigit( next ) || isNumeric( next ) && isDigit( peek( ci ) ) | ||
| ); | ||
| lexeme = createLexeme( isWord ? WORD : NUMBER, began, i.getIndex() ); | ||
| + } | ||
| + else if( curr == '-' ) { | ||
| + lexeme = createLexeme( HYPHEN, began, i.getIndex() ); | ||
| } | ||
| else if( curr == '.' ) { | ||
| private static boolean isNumeric( final char curr ) { | ||
| - return curr == '.' || curr == ','; | ||
| + return curr == '.' || curr == ',' || curr == '-' || curr == '+'; | ||
| } | ||
| flush( lexemes ); | ||
| } | ||
| - else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) && | ||
| - lex3.isType( WORD ) && | ||
| - lex3.toString( mText ).equalsIgnoreCase( "s" ) ) { | ||
| - // Sentences must re-written to avoid starting with numerals. | ||
| - // E.g., '70s | ||
| - consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) ); | ||
| - } | ||
| - else if( lex2.isType( QUOTE_SINGLE ) && lex3.isType( NUMBER ) ) { | ||
| - // E.g., '02 | ||
| - consumer.accept( new Token( QUOTE_APOSTROPHE, lex2 ) ); | ||
| + else if( lex2.isType( NUMBER ) && lex1.isType( QUOTE_SINGLE ) ) { | ||
| + if( lex3.anyType( SPACE, PUNCT ) || (lex3.isType( WORD ) && | ||
| + lex3.toString( mText ).equalsIgnoreCase( "s" )) ) { | ||
| + // Sentences must re-written to avoid starting with numerals. | ||
| + // Examples: '20s, '02 | ||
| + consumer.accept( new Token( QUOTE_APOSTROPHE, lex1 ) ); | ||
| + } | ||
| + else { | ||
| + // E.g., '2'' | ||
| + consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex1 ) ); | ||
| + mOpeningSingleQuote++; | ||
| + } | ||
| + | ||
| + resolved( unresolved ); | ||
| } | ||
| else if( lex2.isType( QUOTE_SINGLE ) && | ||
| resolved( unresolved ); | ||
| mClosingSingleQuote++; | ||
| - } | ||
| - else if( lex2.isType( QUOTE_SINGLE ) && lex1.isType( SPACE ) && | ||
| - lex3.anyType( HYPHEN, NUMBER ) ) { | ||
| - consumer.accept( new Token( QUOTE_OPENING_SINGLE, lex2 ) ); | ||
| - mOpeningSingleQuote++; | ||
| } | ||
| else if( lex2.anyType( QUOTE_SINGLE, QUOTE_DOUBLE ) ) { | ||
| void test_Lexing_Words_TokenValues() { | ||
| testText( "abc 123", "abc", " ", "123" ); | ||
| - testText( "-123 abc", "-", "123", " ", "abc" ); | ||
| + testText( "-123 abc", "-123", " ", "abc" ); | ||
| } | ||
| @Test | ||
| void test_Lexing_Numbers_EmitNumbers() { | ||
| testType( ".123", NUMBER ); | ||
| - testType( "-123.", HYPHEN, NUMBER, PERIOD ); | ||
| + testType( "-123.", NUMBER, PERIOD ); | ||
| testType( " 123.123.123", SPACE, NUMBER ); | ||
| testType( "123 123\"", NUMBER, SPACE, NUMBER, QUOTE_DOUBLE ); | ||
| - testType( "-123,123.123", HYPHEN, NUMBER ); | ||
| + testType( "-123,123.123", NUMBER ); | ||
| testType( "...1,023...", ELLIPSIS, NUMBER, ELLIPSIS ); | ||
| } | ||
| @Test | ||
| void test_Lexing_Words_EmitWords() { | ||
| testType( "abc", WORD ); | ||
| testType( "abc abc", WORD, SPACE, WORD ); | ||
| testType( "abc...", WORD, ELLIPSIS ); | ||
| testType( "abc123", WORD, NUMBER ); | ||
| - testType( "-123abc", HYPHEN, NUMBER, WORD ); | ||
| + testType( "-123abc", NUMBER, WORD ); | ||
| testType( "abc-o'-abc", WORD, HYPHEN, WORD, QUOTE_SINGLE, HYPHEN, WORD ); | ||
| } | ||
| @Test | ||
| void test_Lexing_PunctuationMarks_EmitPunctuationMarks() { | ||
| testType( "!", PUNCT ); | ||
| testType( ";", PUNCT ); | ||
| testType( ".", PERIOD ); | ||
| testType( "-", HYPHEN ); | ||
| + testType( "--", DASH ); | ||
| + testType( "---", DASH ); | ||
| testType( "...", ELLIPSIS ); | ||
| } |
| import java.util.function.Function; | ||
| -import static java.lang.System.*; | ||
| +import static java.lang.System.out; | ||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
| import static org.junit.jupiter.api.Assertions.assertNotNull; | ||
| public void test_parse_SingleLine_Parsed() { | ||
| out.println( SmartQuotes.replace( | ||
| - "What's this '-5.5''' and other '-10.2'' cm' and another '-7.25''' thing?" | ||
| + "What's this '-5.5'',' '-10.2'' cm,' and another '-7.25''' thing?" | ||
| ) ); | ||
| } | ||
| } | ||
| - private static String unescapeEol( final String s) { | ||
| + private static String unescapeEol( final String s ) { | ||
| return String.join( "\n", s.split( "\\\\n" ) ); | ||
| } | ||
| Bob's table is 5′×4′. | ||
| -What's this '-5.5''', '-10.2'' cm', and another '-7.25''' thing? | ||
| -What's this ‘-5.5″’, ‘-10.2″ cm’, and another ‘-7.25″’ thing? | ||
| +What's this '-5.5'',' '-10.2'' cm,' and another '-7.25''' thing? | ||
| +What's this ‘-5.5″,’ ‘-10.2″ cm,’ and another ‘-7.25″’ thing? | ||
| 'What's this -5.5'' thing?' | ||
| "Not all open quotes are closed... | ||
| “Not all open quotes are closed... | ||
| + | ||
| +"And this" and "and this" and "and this" and "and another." | ||
| +“And this” and “and this” and “and this” and “and another.” | ||
| # ######################################################################## | ||
| Delta | 36 lines added, 30 lines removed, 6-line increase |
|---|