Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new SuffixParser to handle UTF-8 chracters with more than one byte #82

Merged
merged 5 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<groupId>software.amazon.event.ruler</groupId>
<artifactId>event-ruler</artifactId>
<name>Event Ruler</name>
<version>1.2.0</version>
<version>1.2.1</version>
<description>Event Ruler is a Java library that allows matching Rules to Events. An event is a list of fields,
which may be given as name/value pairs or as a JSON object. A rule associates event field names with lists of
possible values. There are two reasons to use Ruler: 1/ It's fast; the time it takes to match Events doesn't
Expand Down
11 changes: 9 additions & 2 deletions src/main/software/amazon/event/ruler/input/DefaultParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

import java.nio.charset.StandardCharsets;

import static software.amazon.event.ruler.MatchType.ANYTHING_BUT_SUFFIX;
import static software.amazon.event.ruler.MatchType.EQUALS_IGNORE_CASE;
import static software.amazon.event.ruler.MatchType.ANYTHING_BUT_IGNORE_CASE;
import static software.amazon.event.ruler.MatchType.SUFFIX;
import static software.amazon.event.ruler.MatchType.WILDCARD;

/**
Expand Down Expand Up @@ -36,14 +38,16 @@ public class DefaultParser implements MatchTypeParser, ByteParser {
private static final DefaultParser SINGLETON = new DefaultParser();
private final WildcardParser wildcardParser;
private final EqualsIgnoreCaseParser equalsIgnoreCaseParser;
private final SuffixParser suffixParser;

DefaultParser() {
this(new WildcardParser(), new EqualsIgnoreCaseParser());
this(new WildcardParser(), new EqualsIgnoreCaseParser(), new SuffixParser());
}

DefaultParser(WildcardParser wildcardParser, EqualsIgnoreCaseParser equalsIgnoreCaseParser) {
DefaultParser(WildcardParser wildcardParser, EqualsIgnoreCaseParser equalsIgnoreCaseParser, SuffixParser suffixParser) {
this.wildcardParser = wildcardParser;
this.equalsIgnoreCaseParser = equalsIgnoreCaseParser;
this.suffixParser = suffixParser;
}

public static DefaultParser getParser() {
Expand All @@ -56,7 +60,10 @@ public InputCharacter[] parse(final MatchType type, final String value) {
return wildcardParser.parse(value);
} else if (type == EQUALS_IGNORE_CASE || type == ANYTHING_BUT_IGNORE_CASE) {
return equalsIgnoreCaseParser.parse(value);
} else if (type == SUFFIX || type == ANYTHING_BUT_SUFFIX) {
return suffixParser.parse(value);
}

final byte[] utf8bytes = value.getBytes(StandardCharsets.UTF_8);
final InputCharacter[] result = new InputCharacter[utf8bytes.length];
for (int i = 0; i < utf8bytes.length; i++) {
Expand Down
26 changes: 26 additions & 0 deletions src/main/software/amazon/event/ruler/input/SuffixParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package software.amazon.event.ruler.input;

import java.nio.charset.StandardCharsets;

/**
* A parser to be used specifically for suffix rules.
*
* This undoes the `reverse()` from {@code software.amazon.event.ruler.Patterns} intentionally
* to ensure we can correctly reverse utf-8 characters with 2+ bytes like '大' and '雨'.
*/
public class SuffixParser implements StringValueParser {

SuffixParser() { }

@Override
public InputCharacter[] parse(String value) {
final byte[] utf8bytes = new StringBuilder(value).reverse()
.toString().getBytes(StandardCharsets.UTF_8);
final InputCharacter[] result = new InputCharacter[utf8bytes.length];
for (int i = 0; i < utf8bytes.length; i++) {
byte utf8byte = utf8bytes[utf8bytes.length - i - 1];
result[i] = new InputByte(utf8byte);
}
return result;
}
}
19 changes: 19 additions & 0 deletions src/test/software/amazon/event/ruler/ACMachineTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,25 @@ public void testPrefixMatching() throws Exception {
assertEquals(2, rules.size());
}

@Test
public void testSuffixChineseMatch() throws Exception {
Machine m = new Machine();
String rule = "{\n" +
" \"status\": {\n" +
" \"weatherText\": [{\"suffix\": \"统治者\"}]\n" +
" }\n" +
"}";
String eventStr ="{\n" +
" \"status\": {\n" +
" \"weatherText\": \"事件统治者\",\n" +
" \"pm25\": 23\n" +
" }\n" +
"}";
m.addRule("r1", rule);
List<String> matchRules = m.rulesForJSONEvent(eventStr);
assertEquals(1, matchRules.size());
}

@Test
public void testCityLotsProblemLines() throws Exception {

Expand Down
19 changes: 19 additions & 0 deletions src/test/software/amazon/event/ruler/MachineTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1557,6 +1557,25 @@ public void testApproxSizeForSimplestPossibleMachine() throws Exception {
assertEquals(60, machine.approximateObjectCount());
}

@Test
public void testSuffixChineseMatch() throws Exception {
Machine m = new Machine();
String rule = "{\n" +
" \"status\": {\n" +
" \"weatherText\": [{\"suffix\": \"统治者\"}]\n" +
" }\n" +
"}";
String eventStr ="{\n" +
" \"status\": {\n" +
" \"weatherText\": \"事件统治者\",\n" +
" \"pm25\": 23\n" +
" }\n" +
"}";
m.addRule("r1", rule);
List<String> matchRules = m.rulesForEvent(eventStr);
assertEquals(1, matchRules.size());
}

@Test(timeout = 500)
public void testApproximateSizeDoNotTakeForeverForRulesWithNumericMatchers() throws Exception {
Machine machine = new Machine();
Expand Down
30 changes: 21 additions & 9 deletions src/test/software/amazon/event/ruler/input/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import static software.amazon.event.ruler.input.DefaultParser.getParser;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

public class ParserTest {

Expand All @@ -27,30 +25,44 @@ public void testParseString() {

@Test
public void testOtherMatchTypes() {
final boolean[] parserInvoked = { false, false };
final int[] parserInvokedCount = { 0, 0, 0 };
DefaultParser parser = new DefaultParser(
new WildcardParser() {
@Override
public InputCharacter[] parse(String value) {
parserInvoked[0] = true;
parserInvokedCount[0] +=1;
return null;
}
},
new EqualsIgnoreCaseParser() {
@Override
public InputCharacter[] parse(String value) {
parserInvoked[1] = true;
parserInvokedCount[1] += 1;
return null;
}
},
new SuffixParser() {
@Override
public InputCharacter[] parse(String value) {
parserInvokedCount[2] += 1;
return null;
}
}
);

assertNull(parser.parse(MatchType.WILDCARD, "abc"));
assertTrue(parserInvoked[0]);
assertFalse(parserInvoked[1]);
assertEquals(parserInvokedCount[0], 1);
assertEquals(parserInvokedCount[1], 0);
assertEquals(parserInvokedCount[2], 0);

assertNull(parser.parse(MatchType.EQUALS_IGNORE_CASE, "abc"));
assertTrue(parserInvoked[0]);
assertTrue(parserInvoked[1]);
assertEquals(parserInvokedCount[0], 1);
assertEquals(parserInvokedCount[1], 1);
assertEquals(parserInvokedCount[2], 0);

assertNull(parser.parse(MatchType.SUFFIX, "abc"));
assertEquals(parserInvokedCount[0], 1);
assertEquals(parserInvokedCount[1], 1);
assertEquals(parserInvokedCount[2], 1);
}
}
40 changes: 40 additions & 0 deletions src/test/software/amazon/event/ruler/input/SuffixParserTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package software.amazon.event.ruler.input;

import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertArrayEquals;

public class SuffixParserTest {

private SuffixParser parser;

@Before
public void setup() {
parser = new SuffixParser();
}

@Test
public void testParseSimpleString() {
assertArrayEquals(new InputCharacter[] {
new InputByte((byte) 34), new InputByte((byte) 97) ,
new InputByte((byte) 98), new InputByte((byte) 99)
}, parser.parse("\"abc"));
}

@Test
public void testParseReverseString() {
assertArrayEquals(new InputCharacter[] {
new InputByte((byte) 34), new InputByte((byte) 100) , new InputByte((byte) 99) ,
new InputByte((byte) 98), new InputByte((byte) 97)
}, parser.parse("\"dcba"));
}

@Test
public void testParseChineseString() {
assertArrayEquals(new InputCharacter[] {
new InputByte((byte) 34), new InputByte((byte) -88) ,
new InputByte((byte) -101), new InputByte((byte) -23)
}, parser.parse("\"雨"));
}
}