Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Presto's format_datetime function with time zone #11283

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 36 additions & 29 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ std::string formatFractionOfSecond(
return toAdd;
}

int32_t appendTimezoneOffset(int64_t offset, char* result) {
int32_t appendTimezoneOffset(int64_t offset, char* result, bool includeColon) {
int pos = 0;
if (offset >= 0) {
result[pos++] = '+';
Expand All @@ -546,7 +546,9 @@ int32_t appendTimezoneOffset(int64_t offset, char* result) {
result[pos++] = char(hours % 10 + '0');
}

result[pos++] = ':';
if (includeColon) {
result[pos++] = ':';
}

const auto minutes = (offset / 60) % 60;
if LIKELY (minutes == 0) {
Expand Down Expand Up @@ -1051,20 +1053,24 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const {
size += std::max((int)token.pattern.minRepresentDigits, 9);
break;
case DateTimeFormatSpecifier::TIMEZONE:
if (timezone == nullptr) {
VELOX_USER_FAIL("Timezone unknown");
}
size += std::max(
token.pattern.minRepresentDigits, timezone->name().length());
VELOX_UNSUPPORTED(
"Date format specifier is not supported: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);

break;
case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID:
if (token.pattern.minRepresentDigits != 2) {
VELOX_UNSUPPORTED(
"Date format specifier is not supported: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
if (token.pattern.minRepresentDigits == 1) {
size += 8;
} else if (token.pattern.minRepresentDigits == 2) {
size += 9;
} else {
if (timezone == nullptr) {
VELOX_USER_FAIL("Timezone unknown");
}
size += std::max(
token.pattern.minRepresentDigits, timezone->name().length());
}
size += 9;
break;
// Not supported.
case DateTimeFormatSpecifier::WEEK_YEAR:
Expand All @@ -1082,7 +1088,8 @@ int32_t DateTimeFormatter::format(
const tz::TimeZone* timezone,
const uint32_t maxResultSize,
char* result,
bool allowOverflow) const {
bool allowOverflow,
const std::optional<std::string>& zeroOffsetText) const {
int64_t offset = 0;
Timestamp t = timestamp;
if (timezone != nullptr) {
Expand Down Expand Up @@ -1294,28 +1301,28 @@ int32_t DateTimeFormatter::format(
case DateTimeFormatSpecifier::TIMEZONE: {
// TODO: implement short name time zone, need a map from full name to
// short name
if (token.pattern.minRepresentDigits <= 3) {
VELOX_UNSUPPORTED("short name time zone is not yet supported");
}
if (timezone == nullptr) {
VELOX_USER_FAIL("Timezone unknown");
}
const auto& piece = timezone->name();
std::memcpy(result, piece.data(), piece.length());
result += piece.length();
VELOX_UNSUPPORTED("time zone name is not yet supported");
} break;

case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: {
// Zone: 'Z' outputs offset without a colon, 'ZZ' outputs the offset
// with a colon, 'ZZZ' or more outputs the zone id.
// TODO Add support for 'Z' and 'ZZZ'.
if (token.pattern.minRepresentDigits != 2) {
VELOX_UNSUPPORTED(
"format is not supported for specifier {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
if (offset == 0 && zeroOffsetText.has_value()) {
std::memcpy(result, zeroOffsetText->data(), zeroOffsetText->size());
result += zeroOffsetText->size();
break;
}
result += appendTimezoneOffset(offset, result);

if (token.pattern.minRepresentDigits >= 3) {
const auto& piece = timezone->name();
std::memcpy(result, piece.data(), piece.length());
result += piece.length();
break;
}

result += appendTimezoneOffset(
offset, result, token.pattern.minRepresentDigits == 2);
break;
}
case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR: {
Expand Down
3 changes: 2 additions & 1 deletion velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ class DateTimeFormatter {
const tz::TimeZone* timezone,
const uint32_t maxResultSize,
char* result,
bool allowOverflow = false) const;
bool allowOverflow = false,
const std::optional<std::string>& zeroOffsetText = std::nullopt) const;

private:
std::unique_ptr<char[]> literalBuf_;
Expand Down
4 changes: 2 additions & 2 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1799,8 +1799,8 @@ struct ToISO8601Function {
out_type<Varchar>& result) const {
const auto maxResultSize = formatter_->maxResultSize(timeZone);
result.reserve(maxResultSize);
const auto resultSize =
formatter_->format(timestamp, timeZone, maxResultSize, result.data());
const auto resultSize = formatter_->format(
timestamp, timeZone, maxResultSize, result.data(), false, "Z");
result.resize(resultSize);
}

Expand Down
21 changes: 13 additions & 8 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3217,8 +3217,9 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
// Time zone test cases - 'z'
setQueryTimeZone("Asia/Kolkata");
EXPECT_EQ(
"Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "zzzz"));
"Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "ZZZ"));
EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ"));
EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z"));

// Literal test cases.
EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'"));
Expand All @@ -3243,12 +3244,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
"AD 19 1970 4 Thu 1970 1 1 1 AM 8 8 8 8 3 11 5 Asia/Kolkata",
formatDatetime(
parseTimestamp("1970-01-01 02:33:11.5"),
"G C Y e E y D M d a K h H k m s S zzzz"));
"G C Y e E y D M d a K h H k m s S ZZZ"));
EXPECT_EQ(
"AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 8 8 8 8 3 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata",
formatDatetime(
parseTimestamp("1970-01-01 02:33:11.5"),
"G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ zzzz"));
"G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ ZZZ"));

disableAdjustTimestampToTimezone();
EXPECT_EQ(
Expand Down Expand Up @@ -4512,11 +4513,11 @@ TEST_F(DateTimeFunctionsTest, toISO8601Timestamp) {
"to_iso8601(c0)", std::make_optional(parseTimestamp(timestamp)));
};
disableAdjustTimestampToTimezone();
EXPECT_EQ("2024-11-01T10:00:00.000+00:00", toIso("2024-11-01 10:00"));
EXPECT_EQ("2024-11-04T10:00:00.000+00:00", toIso("2024-11-04 10:00"));
EXPECT_EQ("2024-11-04T15:05:34.100+00:00", toIso("2024-11-04 15:05:34.1"));
EXPECT_EQ("2024-11-04T15:05:34.123+00:00", toIso("2024-11-04 15:05:34.123"));
EXPECT_EQ("0022-11-01T10:00:00.000+00:00", toIso("22-11-01 10:00"));
EXPECT_EQ("2024-11-01T10:00:00.000Z", toIso("2024-11-01 10:00"));
EXPECT_EQ("2024-11-04T10:00:00.000Z", toIso("2024-11-04 10:00"));
EXPECT_EQ("2024-11-04T15:05:34.100Z", toIso("2024-11-04 15:05:34.1"));
EXPECT_EQ("2024-11-04T15:05:34.123Z", toIso("2024-11-04 15:05:34.123"));
EXPECT_EQ("0022-11-01T10:00:00.000Z", toIso("22-11-01 10:00"));

setQueryTimeZone("America/Los_Angeles");
EXPECT_EQ("2024-11-01T03:00:00.000-07:00", toIso("2024-11-01 10:00"));
Expand Down Expand Up @@ -4562,6 +4563,10 @@ TEST_F(DateTimeFunctionsTest, toISO8601TimestampWithTimezone) {
EXPECT_EQ(
"0022-11-01T10:00:00.000+05:41:16",
toIso("22-11-01 10:00", "Asia/Kathmandu"));

EXPECT_EQ("2024-11-01T10:00:00.000Z", toIso("2024-11-01 10:00", "UTC"));
EXPECT_EQ("2024-11-04T10:00:45.120Z", toIso("2024-11-04 10:00:45.12", "UTC"));
EXPECT_EQ("0022-11-01T10:00:00.000Z", toIso("22-11-01 10:00", "UTC"));
}

TEST_F(DateTimeFunctionsTest, atTimezoneTest) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void castToString(
const auto* timestamps = input.as<SimpleVector<int64_t>>();

auto expectedFormatter =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz");
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS ZZZ");
VELOX_CHECK(
!expectedFormatter.hasError(),
"Default format should always be valid, error: {}",
Expand Down
Loading