Skip to content

Commit

Permalink
Merge pull request #6 from amoe/category_encoder_multiple_values
Browse files Browse the repository at this point in the history
Multiple value support for category encoder
  • Loading branch information
David Banks authored Sep 23, 2022
2 parents 2ca6e21 + 72764f9 commit 826f01a
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 21 deletions.
2 changes: 1 addition & 1 deletion resources/basic_schema_for_institutional_account.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
{
"name": "LookupList",
"options": {
"delimiter": null,
"delimiter": ",\\s*",
"resourceName": "category"
},
"targetField": {
Expand Down
Binary file modified resources/basic_schema_for_institutional_account_upload.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion resources/basic_schema_for_non_institutional_account.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
{
"name": "LookupList",
"options": {
"delimiter": null,
"delimiter": ",\\s*",
"resourceName": "category"
},
"targetField": {
Expand Down
Binary file modified resources/basic_schema_for_non_institutional_account_upload.xlsx
Binary file not shown.
34 changes: 20 additions & 14 deletions src/converter_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,38 @@ IntermediateMappingOutput LookupValueConverter::applyConversion(string input, Op
return result;
}

// NB: Note that we don't support the delimiter here, even though we provide it
// as an option on the default field encoders for some reason. This is probably
// a bug.
IntermediateMappingOutput LookupListConverter::applyConversion(
string input, OptionsMap options
) {
string resourceName = options.at("resourceName").value();
optional<string> delimiter = options.at("delimiter");
LookupType type = LOOKUP_TYPE_NAMES.at(resourceName);

vector<string> producedPaths;
QJsonArray producedValue;
using std::cout;
cout << "in here" << endl;

if (input.empty() || isWhitespaceOnly(input)) {
// In our domain this really doesn't make sense. So just silently
// discard the input and move on.
spdlog::debug("LookupList: discarding blank input");
} else {
QJsonValue mappedResult = registry->lookupByString(type, input);

if (mappedResult.isNull()) {
// We don't want to produce nulls in general.
spdlog::warn("LookupList: mapped value result was null, eliding");
vector<string> inputValues{input};
if (delimiter.has_value()) {
string delimiterRegex = delimiter.value();
inputValues = splitByRegexp(input, delimiterRegex);
}

for (string categoryName: inputValues) {
if (categoryName.empty() || isWhitespaceOnly(categoryName)) {
// In our domain this really doesn't make sense. So just silently
// discard the input and move on.
spdlog::debug("LookupList: discarding blank input");
} else {
producedValue.append(mappedResult);
QJsonValue mappedResult = registry->lookupByString(type, categoryName);

if (mappedResult.isNull()) {
// We don't want to produce nulls in general.
spdlog::warn("LookupList: mapped value result was null, eliding");
} else {
producedValue.append(mappedResult);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/default_field_encoders.hh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ namespace default_field_encoders {
optional<TargetField>(TargetField(TargetFieldType::STANDARD, "categories")),
ConverterName::LOOKUP_LIST,
{},
{{"delimiter", nullopt},
{{"delimiter", optional<string>(",\\s*")},
{"resourceName", optional<string>("category")}}
);

Expand Down
30 changes: 28 additions & 2 deletions src/mapping_engine_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,6 @@ TEST_F(MappingEngineTest, CategoryEncoderCheck) {

MappingOutput result = this->engine->convert(theDocument, theScheme);

// Expect an empty article object because we haven't defined any other
// converters.
QJsonObject expectedArticle;
vector<string> expectedSourcePaths = {};

Expand All @@ -251,6 +249,34 @@ TEST_F(MappingEngineTest, CategoryEncoderCheck) {
ASSERT_THAT(result.getSourcePaths(), Eq(expectedSourcePaths));
}

TEST_F(MappingEngineTest, CategoryEncoderMultipleValuesCheck) {
MappingScheme theScheme = {default_field_encoders::CATEGORY_ENCODER};
vector<string> theDocument = {"North American History, Biochemistry"};

EXPECT_CALL(
lookups, lookupByString(LookupType::CATEGORY, "North American History")
).WillOnce(Return(QJsonValue(1703)));
EXPECT_CALL(
lookups, lookupByString(LookupType::CATEGORY, "Biochemistry")
).WillOnce(Return(QJsonValue(42)));

MappingOutput result = this->engine->convert(theDocument, theScheme);

QJsonObject expectedArticle;
vector<string> expectedSourcePaths = {};

const string expectedResult = R"(
{
"categories": [1703, 42]
}
)";


ASSERT_THAT(result.getArticleObject(), Eq(deserialize(expectedResult)));
ASSERT_THAT(result.getSourcePaths(), Eq(expectedSourcePaths));
}


TEST_F(MappingEngineTest, HandlesBlankCategories) {
MappingScheme theScheme = {default_field_encoders::CATEGORY_ENCODER};
vector<string> theDocument = {""};
Expand Down
2 changes: 1 addition & 1 deletion src/mapping_scheme_deserializer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ const string inputForCategoryEncoder = R"(
"name": "LookupList",
"validationRules": [],
"options": {
"delimiter": null,
"delimiter": ",\\s*",
"resourceName": "category"
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/mapping_scheme_serializer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ const string expectedResultForCategoryEncoder = R"(
"name": "LookupList",
"validationRules": [],
"options": {
"delimiter": null,
"delimiter": ",\\s*",
"resourceName": "category"
}
}
Expand Down

0 comments on commit 826f01a

Please sign in to comment.