Skip to content

Commit

Permalink
Disable fastpath reading for some data types in ORC (#10939)
Browse files Browse the repository at this point in the history
Summary:
As discussed in prestodb/presto#23037 (comment), we need to disable fastpath reads of some ORC data types, so that we can add TPCDS related tests in the Presto native module.

CC: Yuhta aditi-pandit

Pull Request resolved: #10939

Reviewed By: Yuhta

Differential Revision: D62373833

Pulled By: mbasmanova

fbshipit-source-id: f38c7959ffb72c1ecbda9c7de4631dfa5ee73e39
  • Loading branch information
wypb authored and facebook-github-bot committed Sep 12, 2024
1 parent 485329e commit d1ac079
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 1 deletion.
7 changes: 7 additions & 0 deletions velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ class SelectiveDecimalColumnReader : public SelectiveColumnReader {
DwrfParams& params,
common::ScanSpec& scanSpec);

bool hasBulkPath() const override {
// Only ORC uses RLEv2 encoding. Currently, ORC decimal data does not
// support fastpath reads. When reading RLEv2-encoded decimal data
// with null, the query will fail.
return version_ != velox::dwrf::RleVersion_2;
}

void seekToRowGroup(uint32_t index) override;
uint64_t skip(uint64_t numValues) override;

Expand Down
6 changes: 5 additions & 1 deletion velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class SelectiveIntegerDirectColumnReader
const bool dataVInts = stripe.getUseVInts(si);

format_ = stripe.format();
version_ = convertRleVersion(stripe.getEncoding(encodingKey).kind());
if (format_ == velox::dwrf::DwrfFormat::kDwrf) {
intDecoder_ = createDirectDecoder</*isSigned=*/true>(
stripe.getStream(si, params.streamLabels().label(), true),
Expand All @@ -64,7 +65,10 @@ class SelectiveIntegerDirectColumnReader
}

bool hasBulkPath() const override {
return true;
// Only ORC uses RLEv2 encoding. Currently, ORC integer data does not
// support fastpath reads. When reading RLEv2-encoded integer data
// with null, the query will fail.
return version_ != velox::dwrf::RleVersion_2;
}

void seekToRowGroup(uint32_t index) override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ class SelectiveStringDictionaryColumnReader
DwrfParams& params,
common::ScanSpec& scanSpec);

bool hasBulkPath() const override {
// Only ORC uses RLEv2 encoding. Currently, ORC string data does not
// support fastpath reads. When reading RLEv2-encoded string data
// with null, the query will fail.
return version_ != velox::dwrf::RleVersion_2;
}

void seekToRowGroup(uint32_t index) override {
SelectiveColumnReader::seekToRowGroup(index);
auto positionsProvider = formatData_->as<DwrfData>().seekToRowGroup(index);
Expand Down
39 changes: 39 additions & 0 deletions velox/dwio/orc/test/ReaderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,42 @@ TEST_F(OrcReaderTest, testOrcReadAllType) {
EXPECT_EQ(structCol->toString(0, 2, ",", false), "{1, 2}");
}
}

TEST_F(OrcReaderTest, testOrcRlev2) {
google::InstallFailureSignalHandler();
const std::string dateOrc(getExamplesFilePath("rlev2.orc"));
auto schema =
ROW({"id", "price", "name"}, {BIGINT(), DECIMAL(7, 2), VARCHAR()});
auto spec = std::make_shared<common::ScanSpec>("<root>");
spec->addAllChildFields(*schema);

dwio::common::ReaderOptions readerOpts{pool()};
readerOpts.setScanSpec(spec);
readerOpts.setFileFormat(dwio::common::FileFormat::ORC);

auto reader = DwrfReader::create(
createFileBufferedInput(dateOrc, readerOpts.memoryPool()), readerOpts);

RowReaderOptions rowReaderOptions;
rowReaderOptions.setScanSpec(spec);
auto rowReader = reader->createRowReader(rowReaderOptions);

auto batch = BaseVector::create(schema, 0, &readerOpts.memoryPool());
while (rowReader->next(500, batch)) {
auto rowVector = batch->as<RowVector>();
auto idCol =
rowVector->childAt(0)->loadedVector()->as<SimpleVector<int64_t>>();
auto priceCol =
rowVector->childAt(1)->loadedVector()->as<SimpleVector<int64_t>>();
auto nameCol =
rowVector->childAt(2)->loadedVector()->as<SimpleVector<StringView>>();

EXPECT_EQ(5, rowVector->size());
EXPECT_EQ(idCol->valueAt(0), 1);

auto priceColType = rowVector->type()->childAt(1);
EXPECT_EQ(
DecimalUtil::toString(priceCol->valueAt(0), priceColType), "111.11");
EXPECT_EQ(nameCol->valueAt(0), "AAAA");
}
}
Binary file added velox/dwio/orc/test/examples/rlev2.orc
Binary file not shown.

0 comments on commit d1ac079

Please sign in to comment.