Skip to content

Commit

Permalink
feat: add timestamptz type with precision parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
richtia committed Feb 8, 2024
1 parent 2349b79 commit 0c5dd49
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 17 deletions.
25 changes: 25 additions & 0 deletions extensions/functions_datetime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ scalar_functions:
description: Timezone string from IANA tzdb.
value: string
return: i64
- args:
- name: component
options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND,
MILLISECOND, MICROSECOND, NANOSECOND, SUBSECOND, UNIX_TIME, TIMEZONE_OFFSET ]
description: The part of the value to extract.
- name: x
value: precision_timestamp_tz<P1>
- name: timezone
description: Timezone string from IANA tzdb.
value: string
return: i64
- args:
- name: component
options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND,
Expand Down Expand Up @@ -121,6 +132,20 @@ scalar_functions:
description: Timezone string from IANA tzdb.
value: string
return: i64
- args:
- name: component
options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK,
SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ]
description: The part of the value to extract.
- name: indexing
options: [ ONE, ZERO ]
description: Start counting from 1 or 0.
- name: x
value: precision_timestamp_tz<P1>
- name: timezone
description: Timezone string from IANA tzdb.
value: string
return: i64
- args:
- name: component
options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK,
Expand Down
10 changes: 9 additions & 1 deletion proto/substrait/algebra.proto
Original file line number Diff line number Diff line change
Expand Up @@ -811,10 +811,12 @@ message Expression {
// If the precision is 6 or less then this is the microseconds since the UNIX epoch
// If the precision is more than 6 then this is the nanoseconds since the UNIX epoch
uint64 precision_timestamp = 34;
uint64 precision_timestamp_tz = 35;
Struct struct = 25;
Map map = 26;
// Timestamp in units of microseconds since the UNIX epoch.
int64 timestamp_tz = 27;
// Deprecated in favor of `PrecisionTimestampTZ precision_timestamp_tz`
int64 timestamp_tz = 27 [deprecated = true];
bytes uuid = 28;
Type null = 29; // a typed null literal
List list = 30;
Expand Down Expand Up @@ -855,6 +857,12 @@ message Expression {
int32 precision = 1;
}

message PrecisionTimestampTZ {
// The maximum number of digits allowed in the value.
// Supported values are 0, 3, 6, and 9. The default is 6.
int32 precision = 1;
}

message Map {
message KeyValue {
Literal key = 1;
Expand Down
10 changes: 9 additions & 1 deletion proto/substrait/parameterized_types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ message ParameterizedType {
Type.Time time = 17;
Type.IntervalYear interval_year = 19;
Type.IntervalDay interval_day = 20;
Type.TimestampTZ timestamp_tz = 29;
// Deprecated in favor of `ParameterizedPrecisionTimestampTZ precision_timestamp_tz`
Type.TimestampTZ timestamp_tz = 29 [deprecated = true];
Type.UUID uuid = 32;

ParameterizedFixedChar fixed_char = 21;
ParameterizedVarChar varchar = 22;
ParameterizedFixedBinary fixed_binary = 23;
ParameterizedDecimal decimal = 24;
ParameterizedPrecisionTimestamp precision_timestamp = 34;
ParameterizedPrecisionTimestampTZ precision_timestamp_tz = 35;

ParameterizedStruct struct = 25;
ParameterizedList list = 27;
Expand Down Expand Up @@ -96,6 +98,12 @@ message ParameterizedType {
Type.Nullability nullability = 3;
}

message ParameterizedPrecisionTimestampTZ {
IntegerOption precision = 1;
uint32 variation_pointer = 2;
Type.Nullability nullability = 3;
}

message ParameterizedStruct {
repeated ParameterizedType types = 1;
uint32 variation_pointer = 2;
Expand Down
10 changes: 9 additions & 1 deletion proto/substrait/type.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ message Type {
Time time = 17;
IntervalYear interval_year = 19;
IntervalDay interval_day = 20;
TimestampTZ timestamp_tz = 29;
// Deprecated in favor of `PrecisionTimestampTZ precision_timestamp_tz`
TimestampTZ timestamp_tz = 29 [deprecated = true];
UUID uuid = 32;

FixedChar fixed_char = 21;
VarChar varchar = 22;
FixedBinary fixed_binary = 23;
Decimal decimal = 24;
PrecisionTimestamp precision_timestamp = 33;
PrecisionTimestampTZ precision_timestamp_tz = 34;

Struct struct = 25;
List list = 27;
Expand Down Expand Up @@ -167,6 +169,12 @@ message Type {
Nullability nullability = 3;
}

message PrecisionTimestampTZ {
optional int32 precision = 1;
uint32 type_variation_reference = 2;
Nullability nullability = 3;
}

message Struct {
repeated Type types = 1;
uint32 type_variation_reference = 2;
Expand Down
10 changes: 9 additions & 1 deletion proto/substrait/type_expressions.proto
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ message DerivationExpression {
Type.Time time = 17;
Type.IntervalYear interval_year = 19;
Type.IntervalDay interval_day = 20;
Type.TimestampTZ timestamp_tz = 29;
// Deprecated in favor of `ExpressionPrecisionTimestampTZ precision_timestamp_tz`
Type.TimestampTZ timestamp_tz = 29 [deprecated = true];
Type.UUID uuid = 32;

ExpressionFixedChar fixed_char = 21;
ExpressionVarChar varchar = 22;
ExpressionFixedBinary fixed_binary = 23;
ExpressionDecimal decimal = 24;
ExpressionPrecisionTimestamp precision_timestamp = 40;
ExpressionPrecisionTimestampTZ precision_timestamp_tz = 41;

ExpressionStruct struct = 25;
ExpressionList list = 27;
Expand Down Expand Up @@ -88,6 +90,12 @@ message DerivationExpression {
Type.Nullability nullability = 3;
}

message ExpressionPrecisionTimestampTZ {
DerivationExpression precision = 1;
uint32 variation_pointer = 2;
Type.Nullability nullability = 3;
}

message ExpressionStruct {
repeated DerivationExpression types = 1;
uint32 variation_pointer = 2;
Expand Down
27 changes: 14 additions & 13 deletions site/docs/types/type_classes.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ Simple type classes are those that don't support any form of configuration. For
| fp64 | An 8-byte double-precision floating point number with the same range and precision as defined for the [IEEE 754 64-bit floating-point format](https://standards.ieee.org/ieee/754/6210/). | `double`
| string | A unicode string of text, [0..2,147,483,647] UTF-8 bytes in length. | `string`
| binary | A binary value, [0..2,147,483,647] bytes in length. | `binary`
| timestamp | A naive timestamp within [1000-01-01 00:00:00.000000..9999-12-31 23:59:59.999999], with microsecond precision. Does not include timezone information and can thus not be unambiguously mapped to a moment on the timeline without context. Similar to naive datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 (in an unspecified timezone)
| timestamp_tz | A timezone-aware timestamp within [1000-01-01 00:00:00.000000 UTC..9999-12-31 23:59:59.999999 UTC], with microsecond precision. Similar to aware datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 UTC
| timestamp | A naive timestamp with microsecond precision. Does not include timezone information and can thus not be unambiguously mapped to a moment on the timeline without context. Similar to naive datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 (in an unspecified timezone)
| timestamp_tz | A timezone-aware timestamp with microsecond precision. Similar to aware datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 UTC
| date | A date within [1000-01-01..9999-12-31]. | `int32` days since `1970-01-01`
| time | A time since the beginning of any day. Range of [0..86,399,999,999] microseconds; leap seconds need not be supported. | `int64` microseconds past midnight
| interval_year | Interval year to month. Supports a range of [-10,000..10,000] years with month precision (= [-120,000..120,000] months). Usually stored as separate integers for years and months, but only the total number of months is significant, i.e. `1y 0m` is considered equal to `0y 12m` or `1001y -12000m`. | `int32` years and `int32` months, with the added constraint that each component can never independently specify more than 10,000 years, even if the components have opposite signs (e.g. `-10000y 200000m` is **not** allowed)
Expand All @@ -31,17 +31,18 @@ Simple type classes are those that don't support any form of configuration. For

Compound type classes are type classes that need to be configured by means of a parameter pack.

| Type Name | Description | Protobuf representation for literals
|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------
| FIXEDCHAR&lt;L&gt; | A fixed-length unicode string of L characters. L must be within [1..2,147,483,647]. | L-character `string`
| VARCHAR&lt;L&gt; | A unicode string of at most L characters.L must be within [1..2,147,483,647]. | `string` with at most L characters
| FIXEDBINARY&lt;L&gt; | A binary string of L bytes. When casting, values shorter than L are padded with zeros, and values longer than L are right-trimmed. | L-byte `bytes`
| DECIMAL&lt;P, S&gt; | A fixed-precision decimal value having precision (P, number of digits) <= 38 and scale (S, number of fractional digits) 0 <= S <= P. | 16-byte `bytes` representing a little-endian 128-bit integer, to be divided by 10^S to get the decimal value
| STRUCT&lt;T1,...,Tn&gt; | A list of types in a defined order. | `repeated Literal`, types matching T1..Tn
| NSTRUCT&lt;N:T1,...,N:Tn&gt; | **Pseudo-type**: A struct that maps unique names to value types. Each name is a UTF-8-encoded string. Each value can have a distinct type. Note that NSTRUCT is actually a pseudo-type, because Substrait's core type system is based entirely on ordinal positions, not named fields. Nonetheless, when working with systems outside Substrait, names are important. | n/a
| LIST&lt;T&gt; | A list of values of type T. The list can be between [0..2,147,483,647] values in length. | `repeated Literal`, all types matching T
| MAP&lt;K, V&gt; | An unordered list of type K keys with type V values. Keys may be repeated. While the key type could be nullable, keys may not be null. | `repeated KeyValue` (in turn two `Literal`s), all key types matching K and all value types matching V
| PRECISIONTIMESTAMP&lt;P&gt; | A timestamp with fractional second precision (P, number of digits) <= 9. Does not include timezone information and can thus not be unambiguously mapped to a moment on the timeline without context. Similar to naive datetime in Python. | `uint64` nanoseconds since 1970-01-01 00:00:00.000000000 (in an unspecified timezone)
| Type Name | Description | Protobuf representation for literals
|-------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------
| FIXEDCHAR&lt;L&gt; | A fixed-length unicode string of L characters. L must be within [1..2,147,483,647]. | L-character `string`
| VARCHAR&lt;L&gt; | A unicode string of at most L characters.L must be within [1..2,147,483,647]. | `string` with at most L characters
| FIXEDBINARY&lt;L&gt; | A binary string of L bytes. When casting, values shorter than L are padded with zeros, and values longer than L are right-trimmed. | L-byte `bytes`
| DECIMAL&lt;P, S&gt; | A fixed-precision decimal value having precision (P, number of digits) <= 38 and scale (S, number of fractional digits) 0 <= S <= P. | 16-byte `bytes` representing a little-endian 128-bit integer, to be divided by 10^S to get the decimal value
| STRUCT&lt;T1,...,Tn&gt; | A list of types in a defined order. | `repeated Literal`, types matching T1..Tn
| NSTRUCT&lt;N:T1,...,N:Tn&gt; | **Pseudo-type**: A struct that maps unique names to value types. Each name is a UTF-8-encoded string. Each value can have a distinct type. Note that NSTRUCT is actually a pseudo-type, because Substrait's core type system is based entirely on ordinal positions, not named fields. Nonetheless, when working with systems outside Substrait, names are important. | n/a
| LIST&lt;T&gt; | A list of values of type T. The list can be between [0..2,147,483,647] values in length. | `repeated Literal`, all types matching T
| MAP&lt;K, V&gt; | An unordered list of type K keys with type V values. Keys may be repeated. While the key type could be nullable, keys may not be null. | `repeated KeyValue` (in turn two `Literal`s), all key types matching K and all value types matching V
| PRECISIONTIMESTAMP&lt;P&gt; | A timestamp with fractional second precision (P, number of digits) <= 9. Does not include timezone information and can thus not be unambiguously mapped to a moment on the timeline without context. Similar to naive datetime in Python. | `uint64` nanoseconds since 1970-01-01 00:00:00.000000000 (in an unspecified timezone)
| PRECISIONTIMESTAMPTZ&lt;P&gt; | A timezone-aware timestamp, with fractional second precision (P, number of digits) <= 9. Similar to aware datetime in Python. | `int64` microseconds since 1970-01-01 00:00:00.000000 UTC

## User-Defined Types

Expand Down

0 comments on commit 0c5dd49

Please sign in to comment.