diff --git a/tensorflow_io/BUILD b/tensorflow_io/BUILD
index 6373f11e3..29b1cf042 100644
--- a/tensorflow_io/BUILD
+++ b/tensorflow_io/BUILD
@@ -15,6 +15,7 @@ cc_binary(
         "//tensorflow_io/core:bigtable_ops",
         "//tensorflow_io/core:audio_video_ops",
         "//tensorflow_io/core:avro_ops",
+        "//tensorflow_io/core:avro_atds",
         "//tensorflow_io/core:orc_ops",
         "//tensorflow_io/core:cpuinfo",
         "//tensorflow_io/core:file_ops",
diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD
index e68c817a2..04c8ccac6 100644
--- a/tensorflow_io/core/BUILD
+++ b/tensorflow_io/core/BUILD
@@ -484,6 +484,61 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "avro_atds",
+    srcs = [
+        "kernels/avro/atds/atds_decoder.cc",
+        "kernels/avro/atds/errors.cc",
+        "kernels/avro/atds_dataset_kernels.cc",
+    ],
+    hdrs = [
+        "kernels/avro/atds/atds_decoder.h",
+        "kernels/avro/atds/avro_block_reader.h",
+        "kernels/avro/atds/avro_decoder_template.h",
+        "kernels/avro/atds/decoder_base.h",
+        "kernels/avro/atds/decompression_handler.h",
+        "kernels/avro/atds/dense_feature_decoder.h",
+        "kernels/avro/atds/errors.h",
+        "kernels/avro/atds/opaque_contextual_feature_decoder.h",
+        "kernels/avro/atds/shuffle_handler.h",
+        "kernels/avro/atds/sparse_feature_decoder.h",
+        "kernels/avro/atds/sparse_feature_internal_decoder.h",
+        "kernels/avro/atds/sparse_value_buffer.h",
+        "kernels/avro/atds/varlen_feature_decoder.h",
+        "kernels/avro/atds_dataset_kernels.h",
+    ],
+    copts = tf_io_copts(),
+    linkstatic = True,
+    deps = [
+        ":avro_ops",
+        "@avro",
+        "@local_config_tf//:libtensorflow_framework",
+        "@local_config_tf//:tf_header_lib",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "avro_atds_tests",
+    srcs = [
+        "kernels/avro/atds/atds_decoder_test.cc",
+        "kernels/avro/atds/avro_block_reader_test.cc",
+        "kernels/avro/atds/decoder_test_util.cc",
+        "kernels/avro/atds/decoder_test_util.h",
+        "kernels/avro/atds/dense_feature_decoder_test.cc",
+        "kernels/avro/atds/shuffle_handler_test.cc",
+        "kernels/avro/atds/sparse_feature_decoder_test.cc",
+        "kernels/avro/atds/sparse_value_buffer_test.cc",
+        "kernels/avro/atds/varlen_feature_decoder_test.cc",
+    ],
+    copts = tf_io_copts(),
+    deps = [
+        ":avro_atds",
+        "//tensorflow_io/core:avro_ops",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "orc_ops",
     srcs = [
diff --git a/tensorflow_io/core/kernels/avro/atds/atds_decoder.cc b/tensorflow_io/core/kernels/avro/atds/atds_decoder.cc
new file mode 100644
index 000000000..997c417a3
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/atds_decoder.cc
@@ -0,0 +1,82 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/atds_decoder.h"
+
+#include "api/Generic.hh"
+#include "api/Specific.hh"
+#include "tensorflow_io/core/kernels/avro/atds/dense_feature_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+#include "tensorflow_io/core/kernels/avro/atds/opaque_contextual_feature_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder.h"
+
+namespace tensorflow {
+namespace atds {
+
+Status ATDSDecoder::Initialize(const avro::ValidSchema& schema) {
+  auto& root_node = schema.root();
+  if (root_node->type() != avro::AVRO_RECORD) {
+    return ATDSNotRecordError(avro::toString(root_node->type()),
+                              schema.toJson());
+  }
+
+  size_t num_of_columns = root_node->leaves();
+  feature_names_.resize(num_of_columns, "");
+  decoder_types_.resize(num_of_columns, FeatureType::opaque_contextual);
+  decoders_.resize(num_of_columns);
+
+  for (size_t i = 0; i < dense_features_.size(); i++) {
+    TF_RETURN_IF_ERROR(
+        InitializeFeatureDecoder(schema, root_node, dense_features_[i]));
+  }
+
+  for (size_t i = 0; i < sparse_features_.size(); i++) {
+    TF_RETURN_IF_ERROR(
+        InitializeFeatureDecoder(schema, root_node, sparse_features_[i]));
+  }
+
+  for (size_t i = 0; i < varlen_features_.size(); i++) {
+    TF_RETURN_IF_ERROR(
+        InitializeFeatureDecoder(schema, root_node, varlen_features_[i]));
+  }
+
+  size_t opaque_contextual_index = 0;
+  for (size_t i = 0; i < num_of_columns; i++) {
+    if (decoder_types_[i] == FeatureType::opaque_contextual) {
+      decoders_[i] = std::unique_ptr<DecoderBase>(
+          new opaque_contextual::FeatureDecoder(opaque_contextual_index++));
+
+      auto& opaque_contextual_node = root_node->leafAt(i);
+      skipped_data_.emplace_back(opaque_contextual_node);
+      if (opaque_contextual_node->hasName()) {
+        feature_names_[i] = root_node->leafAt(i)->name();
+        LOG(WARNING) << "Column '" << feature_names_[i] << "' from input data"
+                     << " is not used. Cost of parsing an unused column is "
+                        "prohibitive!! "
+                     << "Consider dropping it to improve I/O performance.";
+      }
+    }
+  }
+
+  // Decoder requires unvaried schema in all input files.
+  // Copy the schema to validate other input files.
+  schema_ = schema;
+
+  return OkStatus();
+}
+
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/atds_decoder.h b/tensorflow_io/core/kernels/avro/atds/atds_decoder.h
new file mode 100644
index 000000000..8c7c907e6
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/atds_decoder.h
@@ -0,0 +1,150 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_H_
+
+#include "api/Decoder.hh"
+#include "api/GenericDatum.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_base.h"
+#include "tensorflow_io/core/kernels/avro/atds/dense_feature_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+#include "tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder.h"
+
+namespace tensorflow {
+namespace atds {
+
+class NullableFeatureDecoder : public DecoderBase {
+ public:
+  explicit NullableFeatureDecoder(std::unique_ptr<DecoderBase>& decoder,
+                                  size_t non_null_index)
+      : decoder_(std::move(decoder)), non_null_index_(non_null_index) {}
+
+  Status operator()(avro::DecoderPtr& decoder,
+                    std::vector<Tensor>& dense_tensors,
+                    sparse::ValueBuffer& buffer,
+                    std::vector<avro::GenericDatum>& skipped_data,
+                    size_t offset) {
+    auto index = decoder->decodeUnionIndex();
+    if (index != non_null_index_) {
+      return NullValueError();
+    }
+    return decoder_->operator()(decoder, dense_tensors, buffer, skipped_data,
+                                offset);
+  }
+
+ private:
+  std::unique_ptr<DecoderBase> decoder_;
+  const size_t non_null_index_;
+};
+
+class ATDSDecoder {
+ public:
+  explicit ATDSDecoder(const std::vector<dense::Metadata>& dense_features,
+                       const std::vector<sparse::Metadata>& sparse_features,
+                       const std::vector<varlen::Metadata>& varlen_features)
+      : dense_features_(dense_features),
+        sparse_features_(sparse_features),
+        varlen_features_(varlen_features) {}
+
+  Status Initialize(const avro::ValidSchema&);
+
+  Status DecodeATDSDatum(avro::DecoderPtr& decoder,
+                         std::vector<Tensor>& dense_tensors,
+                         sparse::ValueBuffer& buffer,
+                         std::vector<avro::GenericDatum>& skipped_data,
+                         size_t offset) {
+    // LOG(INFO) << "Decode atds from offset: " << offset;
+    for (size_t i = 0; i < decoders_.size(); i++) {
+      Status status = decoders_[i]->operator()(decoder, dense_tensors, buffer,
+                                               skipped_data, offset);
+      if (TF_PREDICT_FALSE(!status.ok())) {
+        return FeatureDecodeError(feature_names_[i], status.error_message());
+      }
+    }
+    // LOG(INFO) << "Decode atds from offset Done: " << offset;
+    return OkStatus();
+  }
+
+  const std::vector<avro::GenericDatum>& GetSkippedData() {
+    return skipped_data_;
+  }
+
+  const avro::ValidSchema& GetSchema() { return schema_; }
+
+ private:
+  template <typename Metadata>
+  Status InitializeFeatureDecoder(const avro::ValidSchema& schema,
+                                  const avro::NodePtr& root_node,
+                                  const Metadata& metadata) {
+    size_t pos;
+    if (!root_node->nameIndex(metadata.name, pos)) {
+      return FeatureNotFoundError(metadata.name, schema.toJson());
+    }
+    decoder_types_[pos] = metadata.type;
+    feature_names_[pos] = metadata.name;
+
+    auto& feature_node = root_node->leafAt(pos);
+    if (feature_node->type() == avro::AVRO_UNION) {
+      size_t non_null_index = 0;
+      size_t num_union_types = feature_node->leaves();
+
+      if (num_union_types == 2 &&
+          feature_node->leafAt(0)->type() == avro::AVRO_NULL) {
+        non_null_index = 1;
+      }
+
+      if (num_union_types == 1 || num_union_types == 2) {
+        auto& non_null_feature_node = feature_node->leafAt(non_null_index);
+        TF_RETURN_IF_ERROR(ValidateSchema(non_null_feature_node, metadata));
+        std::unique_ptr<DecoderBase> decoder_base =
+            CreateFeatureDecoder(non_null_feature_node, metadata);
+        decoders_[pos] = std::unique_ptr<DecoderBase>(
+            new NullableFeatureDecoder(decoder_base, non_null_index));
+      } else {
+        std::ostringstream oss;
+        feature_node->printJson(oss, 0);
+        return InvalidUnionTypeError(metadata.name, oss.str());
+      }
+    } else {
+      TF_RETURN_IF_ERROR(ValidateSchema(feature_node, metadata));
+      decoders_[pos] = CreateFeatureDecoder(feature_node, metadata);
+    }
+
+    return OkStatus();
+  }
+
+  const std::vector<dense::Metadata>& dense_features_;
+  const std::vector<sparse::Metadata>& sparse_features_;
+  const std::vector<varlen::Metadata>& varlen_features_;
+
+  std::vector<string> feature_names_;
+  std::vector<std::unique_ptr<DecoderBase>> decoders_;
+  std::vector<FeatureType> decoder_types_;
+
+  std::vector<avro::GenericDatum> skipped_data_;
+  avro::ValidSchema schema_;
+};
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/atds_decoder_test.cc b/tensorflow_io/core/kernels/avro/atds/atds_decoder_test.cc
new file mode 100644
index 000000000..61e735974
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/atds_decoder_test.cc
@@ -0,0 +1,147 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/atds_decoder.h"
+
+#include "api/Decoder.hh"
+#include "api/GenericDatum.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+#include "tensorflow_io/core/kernels/avro/atds/dense_feature_decoder.h"
+
+namespace tensorflow {
+namespace atds {
+
+TEST(ATDSDecoder, TestMixedFeatures) {
+  std::vector<string> feature_names = {
+      "dense_float_1d", "dense_long_2d",  "unused_dense",
+      "sparse_int_1d",  "unsed_sparse",   "sparse_string_2d",
+      "unused_varlen",  "varlen_bool_1d", "varlen_string_2d"};
+  std::vector<size_t> feature_pos = {0, 1, 0, 1, 2, 3};
+  std::vector<std::initializer_list<int64>> feature_shapes = {
+      {3}, {2, 2}, {101}, {6, 10}, {-1}, {-1, -1}};
+  std::vector<PartialTensorShape> tensor_shapes;
+  for (auto shape : feature_shapes) {
+    tensor_shapes.emplace_back(shape);
+  }
+
+  ATDSSchemaBuilder schema_builder = ATDSSchemaBuilder();
+  schema_builder.AddDenseFeature(feature_names[0], DT_FLOAT, 1)
+      .AddDenseFeature(feature_names[1], DT_INT64, 2)
+      .AddDenseFeature(feature_names[2], DT_FLOAT, 2)  // unused
+      .AddSparseFeature(feature_names[3], DT_INT32, 1)
+      .AddSparseFeature(feature_names[4], DT_DOUBLE, 1)  // unused
+      .AddSparseFeature(feature_names[5], DT_STRING, 2)
+      .AddDenseFeature(feature_names[6], DT_BOOL, 0)  // unused
+      .AddDenseFeature(feature_names[7], DT_BOOL, 1)
+      .AddDenseFeature(feature_names[8], DT_STRING, 2);
+
+  string schema = schema_builder.Build();
+  avro::ValidSchema writer_schema = schema_builder.BuildVaildSchema();
+
+  avro::GenericDatum atds_datum(writer_schema);
+  std::vector<float> dense_float_1d = {1.0, 2.0, 3.0};
+  std::vector<std::vector<int64_t>> dense_long_2d = {{1, 3}, {2, 4}};
+  std::vector<std::vector<float>> unused_dense = {{1.0, 2.0}};
+
+  std::vector<std::vector<long>> sparse_int_1d_indices = {{100}};
+  std::vector<int> sparse_int_1d_values = {100};
+  std::vector<std::vector<long>> sparse_string_2d_indices = {{5, 5}, {4, 8}};
+  std::vector<string> sparse_string_2d_values = {"TensorFlow", "Linkedin"};
+  std::vector<std::vector<long>> unsed_sparse_indices = {{0, 1}};
+  std::vector<double> unsed_sparse_values = {1.0, -1.0};
+
+  std::vector<bool> varlen_bool_1d = {true, false, true};
+  std::vector<std::vector<string>> varlen_string_2d = {{"ABC"}, {}, {"DEF"}};
+  std::vector<string> expected_varlen_string_2d_values = {"ABC", "DEF"};
+  bool unused_varlen = true;
+
+  AddDenseValue(atds_datum, feature_names[0], dense_float_1d);
+  AddDenseValue(atds_datum, feature_names[1], dense_long_2d);
+  AddDenseValue(atds_datum, feature_names[2], unused_dense);
+  AddSparseValue(atds_datum, feature_names[3], sparse_int_1d_indices,
+                 sparse_int_1d_values);
+  AddSparseValue(atds_datum, feature_names[4], unsed_sparse_indices,
+                 unsed_sparse_values);
+  AddSparseValue(atds_datum, feature_names[5], sparse_string_2d_indices,
+                 sparse_string_2d_values);
+  AddDenseValue(atds_datum, feature_names[6], unused_varlen);
+  AddDenseValue(atds_datum, feature_names[7], varlen_bool_1d);
+  AddDenseValue(atds_datum, feature_names[8], varlen_string_2d);
+
+  avro::OutputStreamPtr out_stream = EncodeAvroGenericDatum(atds_datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  avro::DecoderPtr decoder = avro::binaryDecoder();
+  decoder->init(*in_stream);
+
+  std::vector<dense::Metadata> dense_features;
+  dense_features.emplace_back(FeatureType::dense, feature_names[0], DT_FLOAT,
+                              tensor_shapes[0], feature_pos[0]);
+  dense_features.emplace_back(FeatureType::dense, feature_names[1], DT_INT64,
+                              tensor_shapes[1], feature_pos[1]);
+
+  size_t values_index = 0;
+  std::vector<sparse::Metadata> sparse_features;
+  sparse_features.emplace_back(FeatureType::sparse, feature_names[3], DT_INT32,
+                               tensor_shapes[2], feature_pos[2], values_index);
+  sparse_features.emplace_back(FeatureType::sparse, feature_names[5], DT_STRING,
+                               tensor_shapes[3], feature_pos[3], values_index);
+
+  std::vector<varlen::Metadata> varlen_features;
+  size_t string_value_index = 1;  // index 0 is used by sparse_string_2d.
+  varlen_features.emplace_back(FeatureType::varlen, feature_names[7], DT_BOOL,
+                               tensor_shapes[4], feature_pos[4], values_index);
+  varlen_features.emplace_back(FeatureType::varlen, feature_names[8], DT_STRING,
+                               tensor_shapes[5], feature_pos[5],
+                               string_value_index);
+
+  ATDSDecoder atds_decoder =
+      ATDSDecoder(dense_features, sparse_features, varlen_features);
+  Status init_status = atds_decoder.Initialize(writer_schema);
+  ASSERT_TRUE(init_status.ok());
+
+  std::vector<Tensor> dense_tensors;
+  dense_tensors.emplace_back(DT_FLOAT, TensorShape(feature_shapes[0]));
+  dense_tensors.emplace_back(DT_INT64, TensorShape(feature_shapes[1]));
+
+  sparse::ValueBuffer buffer;
+  buffer.indices.resize(4);
+  buffer.num_of_elements.resize(4);
+  buffer.string_values.resize(2);
+  buffer.int_values.resize(1);
+  buffer.bool_values.resize(1);
+
+  std::vector<avro::GenericDatum> skipped_data = atds_decoder.GetSkippedData();
+  long offset = 0L;
+  Status decode_status =
+      atds_decoder.DecodeATDSDatum(decoder, dense_tensors, buffer, skipped_data,
+                                   static_cast<size_t>(offset));
+  ASSERT_TRUE(decode_status.ok());
+  AssertTensorValues(dense_tensors[0], dense_float_1d);
+  AssertTensorValues(dense_tensors[1], dense_long_2d);
+  ValidateBuffer(buffer, sparse_features[0], {offset, 100},
+                 sparse_int_1d_values, {1});
+  ValidateBuffer(buffer, sparse_features[1], {offset, 5, 4, offset, 5, 8},
+                 sparse_string_2d_values, {2});
+  ValidateBuffer(buffer, varlen_features[0], {offset, 0, offset, 1, offset, 2},
+                 varlen_bool_1d, {3});
+  ValidateBuffer(buffer, varlen_features[1], {offset, 0, 0, offset, 2, 0},
+                 expected_varlen_string_2d_values, {2});
+}
+
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/avro_block_reader.h b/tensorflow_io/core/kernels/avro/atds/avro_block_reader.h
new file mode 100644
index 000000000..10dad8605
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/avro_block_reader.h
@@ -0,0 +1,225 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_AVRO_BLOCK_READER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_AVRO_BLOCK_READER_H_
+
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+
+#include "api/Compiler.hh"
+#include "api/DataFile.hh"
+#include "api/Decoder.hh"
+#include "api/Specific.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/lib/io/random_inputstream.h"
+
+namespace tensorflow {
+namespace data {
+
+struct AvroBlock {
+  int64_t object_count;
+  int64_t num_to_decode;
+  int64_t num_decoded;
+  int64_t byte_count;
+  int64_t counts;
+  tstring content;
+  avro::Codec codec;
+  size_t read_offset;
+};
+
+class FileBufferInputStream : public avro::InputStream {
+ public:
+  FileBufferInputStream(tensorflow::RandomAccessFile* file, int64 buffer_size)
+      : reader_(nullptr),
+        limit_(0),
+        pos_(0),
+        count_(0),
+        skip_(0),
+        buffer_size_(buffer_size) {
+    reader_ = absl::make_unique<io::RandomAccessInputStream>(file);
+  }
+
+  bool next(const uint8_t** data, size_t* len) override {
+    while (pos_ == limit_) {
+      if (skip_ > 0) {
+        reader_->SkipNBytes(static_cast<int64>(skip_));
+        skip_ = 0;
+      }
+
+      buf_.clear();
+      Status status = reader_->ReadNBytes(buffer_size_, &buf_);
+      pos_ = 0;
+      limit_ = buf_.size();
+      if (limit_ == 0 && errors::IsOutOfRange(status)) {
+        return false;
+      }
+    }
+
+    if (*len == 0 || pos_ + *len > limit_) {
+      *len = limit_ - pos_;
+    }
+
+    *data = reinterpret_cast<uint8_t*>(buf_.data()) + pos_;
+    pos_ += *len;
+    count_ += *len;
+
+    return *len != 0;
+  }
+
+  void backup(size_t len) override {
+    pos_ -= len;
+    count_ -= len;
+  }
+
+  void skip(size_t len) override {
+    if (pos_ + len > limit_) {
+      skip_ = pos_ + len - limit_;
+      pos_ = limit_;
+    } else {
+      pos_ += len;
+    }
+
+    count_ += len;
+  }
+
+  size_t byteCount() const override { return count_; }
+
+ private:
+  std::unique_ptr<io::RandomAccessInputStream> reader_;
+  size_t limit_, pos_, count_, skip_;
+  const int64 buffer_size_;
+  tstring buf_;
+};
+
+constexpr const char* const AVRO_SCHEMA_KEY = "avro.schema";
+constexpr const char* const AVRO_CODEC_KEY = "avro.codec";
+constexpr const char* const AVRO_NULL_CODEC = "null";
+constexpr const char* const AVRO_DEFLATE_CODEC = "deflate";
+constexpr const char* const AVRO_SNAPPY_CODEC = "snappy";
+
+using Magic = std::array<uint8_t, 4>;
+static const Magic magic = {{'O', 'b', 'j', '\x01'}};
+
+using AvroMetadata = std::map<std::string, std::vector<uint8_t>>;
+
+class AvroBlockReader {
+ public:
+  AvroBlockReader(tensorflow::RandomAccessFile* file, int64 buffer_size)
+      : stream_(nullptr), decoder_(nullptr) {
+    stream_ = std::make_unique<FileBufferInputStream>(file, buffer_size);
+    decoder_ = avro::binaryDecoder();
+    ReadHeader();
+  }
+
+  const avro::ValidSchema& GetSchema() { return data_schema_; }
+
+  Status ReadBlock(AvroBlock& block) {
+    decoder_->init(*stream_);
+    const uint8_t* p = 0;
+    size_t n = 0;
+    if (!stream_->next(&p, &n)) {
+      return errors::OutOfRange("eof");
+    }
+    stream_->backup(n);
+
+    avro::decode(*decoder_, block.object_count);
+    // LOG(INFO) << "block object counts = " << block.object_count;
+    avro::decode(*decoder_, block.byte_count);
+    // LOG(INFO) << "block bytes counts = " << block.byte_count;
+    block.content.reserve(block.byte_count);
+
+    decoder_->init(*stream_);
+    int64_t remaining_bytes = block.byte_count;
+    while (remaining_bytes > 0) {
+      const uint8_t* data;
+      size_t len = remaining_bytes;
+      if (!stream_->next(&data, &len)) {
+        return errors::OutOfRange("eof");
+      }
+      block.content.append(reinterpret_cast<const char*>(data), len);
+      remaining_bytes -= len;
+    }
+    // LOG(INFO) << "block content = " << block.content;
+    block.codec = codec_;
+    block.read_offset = 0;
+    block.num_decoded = 0;
+    block.num_to_decode = 0;
+    decoder_->init(*stream_);
+    avro::DataFileSync sync_marker;
+    avro::decode(*decoder_, sync_marker);
+    if (sync_marker != sync_marker_) {
+      return errors::DataLoss("Avro sync marker mismatch.");
+    }
+
+    return OkStatus();
+  }
+
+ private:
+  void ReadHeader() {
+    decoder_->init(*stream_);
+    Magic m;
+    avro::decode(*decoder_, m);
+    if (magic != m) {
+      throw avro::Exception("Invalid data file. Magic does not match.");
+    }
+    avro::decode(*decoder_, metadata_);
+    AvroMetadata::const_iterator it = metadata_.find(AVRO_SCHEMA_KEY);
+    if (it == metadata_.end()) {
+      throw avro::Exception("No schema in metadata");
+    }
+
+    string schema = std::string(
+        reinterpret_cast<const char*>(it->second.data()), it->second.size());
+    // LOG(INFO) << schema;
+    std::istringstream iss(schema);
+    avro::compileJsonSchema(iss, data_schema_);
+
+    it = metadata_.find(AVRO_CODEC_KEY);
+    if (it != metadata_.end()) {
+      size_t length = it->second.size();
+      const char* codec = reinterpret_cast<const char*>(it->second.data());
+      // LOG(INFO) << "Codec = " << std::string(codec, length);
+      if (strncmp(codec, AVRO_DEFLATE_CODEC, length) == 0) {
+        codec_ = avro::DEFLATE_CODEC;
+      } else if (strncmp(codec, AVRO_SNAPPY_CODEC, length) == 0) {
+        codec_ = avro::SNAPPY_CODEC;
+      } else if (strncmp(codec, AVRO_NULL_CODEC, length) == 0) {
+        codec_ = avro::NULL_CODEC;
+      } else {
+        throw avro::Exception("Unknown codec in data file: " +
+                              std::string(codec, it->second.size()));
+      }
+    } else {
+      codec_ = avro::NULL_CODEC;
+    }
+
+    avro::decode(*decoder_, sync_marker_);
+  }
+
+  AvroMetadata metadata_;
+  avro::DataFileSync sync_marker_;
+  avro::Codec codec_;
+
+  std::unique_ptr<FileBufferInputStream> stream_;
+  avro::DecoderPtr decoder_;
+  avro::ValidSchema data_schema_;
+};
+
+}  // namespace data
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_AVRO_BLOCK_READER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/avro_block_reader_test.cc b/tensorflow_io/core/kernels/avro/atds/avro_block_reader_test.cc
new file mode 100644
index 000000000..e6d0b5b85
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/avro_block_reader_test.cc
@@ -0,0 +1,625 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/avro_block_reader.h"
+
+#include "absl/memory/memory.h"
+#include "tensorflow/core/platform/file_system.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+//#include "tensorflow/tsl/platform/default/posix_file_system.h"
+
+#include <fstream>
+#include <ostream>
+
+#include "api/DataFile.hh"
+#include "api/Generic.hh"
+#include "api/GenericDatum.hh"
+#include "api/Stream.hh"
+
+namespace tensorflow {
+namespace data {
+
+class MockRandomAccessFile : public RandomAccessFile {
+ public:
+  explicit MockRandomAccessFile(char* content, size_t len)
+      : content_(content), len_(len) {}
+
+  Status Read(uint64 offset, size_t n, StringPiece* result,
+              char* scratch) const override {
+    size_t bytes_to_copy = std::min(n, len_ - static_cast<size_t>(offset));
+    memcpy(scratch, content_ + offset, bytes_to_copy);
+    *result = StringPiece(scratch, bytes_to_copy);
+    if (bytes_to_copy == n) {
+      return OkStatus();
+    }
+    return Status(tensorflow::error::Code::OUT_OF_RANGE, "eof");
+  }
+
+ private:
+  const char* content_;
+  size_t len_;
+};
+
+TEST(FileBufferInputStreamTest, SINGLE_BUFFER) {
+  char content[8];
+  for (size_t i = 0; i < 8; i++) {
+    content[i] = '0' + i;
+  }
+  std::unique_ptr<tensorflow::RandomAccessFile> raf =
+      absl::make_unique<MockRandomAccessFile>(content, 8);
+  int64 buffer_size = 8;
+  FileBufferInputStream stream(raf.get(), buffer_size);
+  const uint8_t* data;
+  size_t len = 4;
+  ASSERT_TRUE(stream.next(&data, &len));
+  ASSERT_EQ(4, len);
+  ASSERT_EQ(4, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("0123", (char*)data, len);
+
+  stream.skip(1);
+  len = 3;
+  stream.next(&data, &len);
+  ASSERT_EQ(3, len);
+  ASSERT_EQ(8, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("567", (char*)data, len);
+
+  stream.backup(5);
+  len = 3;
+  stream.next(&data, &len);
+  ASSERT_EQ(3, len);
+  ASSERT_EQ(6, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("345", (char*)data, len);
+}
+
+TEST(FileBufferInputStreamTest, READ_PAST_BUFFER) {
+  char content[16];
+  for (size_t i = 0; i < 16; i++) {
+    content[i] = 'a' + i;
+  }
+  std::unique_ptr<tensorflow::RandomAccessFile> raf =
+      absl::make_unique<MockRandomAccessFile>(content, 16);
+  int64 buffer_size = 8;
+  FileBufferInputStream stream(raf.get(), buffer_size);
+  const uint8_t* data;
+  size_t len = 3;
+  ASSERT_TRUE(stream.next(&data, &len));
+  ASSERT_EQ(3, len);
+  ASSERT_EQ(3, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("abc", (char*)data, len);
+
+  len = 7;
+  stream.next(&data, &len);
+  ASSERT_EQ(5, len);
+  ASSERT_EQ(8, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("defgh", (char*)data, len);
+
+  len = 4;
+  stream.next(&data, &len);
+  ASSERT_EQ(4, len);
+  ASSERT_EQ(12, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("ijkl", (char*)data, len);
+}
+
+TEST(FileBufferInputStreamTest, SKIP_PAST_BUFFER) {
+  char content[16];
+  for (size_t i = 0; i < 16; i++) {
+    content[i] = 'a' + i;
+  }
+  std::unique_ptr<tensorflow::RandomAccessFile> raf =
+      absl::make_unique<MockRandomAccessFile>(content, 16);
+  int64 buffer_size = 8;
+  FileBufferInputStream stream(raf.get(), buffer_size);
+  const uint8_t* data;
+  size_t len = 3;
+  ASSERT_TRUE(stream.next(&data, &len));
+  ASSERT_EQ(3, len);
+  ASSERT_EQ(3, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("abc", (char*)data, len);
+
+  stream.skip(7);
+  ASSERT_EQ(10, stream.byteCount());
+
+  len = 4;
+  stream.next(&data, &len);
+  ASSERT_EQ(4, len);
+  ASSERT_EQ(14, stream.byteCount());
+  tensorflow::atds::AssertValueEqual("klmn", (char*)data, len);
+}
+
+static constexpr size_t OS_BUFFER_SIZE = 1024;
+
+class StringOutputStream : public avro::OutputStream {
+ public:
+  StringOutputStream(string* buf) : buf_(buf), pos_(0) {}
+
+  bool next(uint8_t** data, size_t* len) {
+    size_t capacity = buf_->capacity();
+    if (pos_ == capacity) {
+      buf_->resize(capacity + OS_BUFFER_SIZE);
+    }
+    *data =
+        reinterpret_cast<uint8_t*>(const_cast<char*>(&(buf_->c_str())[pos_]));
+    size_t new_capacity = buf_->capacity();
+    *len = new_capacity - pos_;
+    pos_ = new_capacity;
+    return true;
+  }
+
+  void backup(size_t len) { pos_ -= len; }
+
+  uint64_t byteCount() const { return pos_; }
+
+  void flush() {}
+
+ private:
+  string* buf_;
+  size_t pos_;
+};
+
+static constexpr int64 BUFFER_SIZE = 1024;
+
+Status AvroBlockReaderTest(char* content, size_t byte_count) {
+  std::unique_ptr<tensorflow::RandomAccessFile> raf =
+      absl::make_unique<MockRandomAccessFile>(content, byte_count);
+  std::unique_ptr<AvroBlockReader> reader =
+      absl::make_unique<AvroBlockReader>(raf.get(), BUFFER_SIZE);
+  AvroBlock blk;
+  return reader->ReadBlock(blk);
+}
+
+template <typename T>
+void AvroBlockReaderTest(char* expected_content, int64_t object_count,
+                         size_t expected_byte_count, avro::ValidSchema& schema,
+                         const std::vector<T>& data) {
+  string buf;
+  std::unique_ptr<avro::OutputStream> os =
+      absl::make_unique<StringOutputStream>(&buf);
+  avro::DataFileWriter<T> writer(std::move(os), schema);
+  for (T datum : data) {
+    writer.write(datum);
+  }
+  writer.flush();
+  writer.close();
+
+  std::unique_ptr<tensorflow::RandomAccessFile> raf =
+      absl::make_unique<MockRandomAccessFile>(const_cast<char*>(buf.c_str()),
+                                              buf.capacity());
+  std::unique_ptr<AvroBlockReader> reader =
+      absl::make_unique<AvroBlockReader>(raf.get(), BUFFER_SIZE);
+  tensorflow::atds::AssertValueEqual(schema, reader->GetSchema());
+  AvroBlock blk;
+  Status status = reader->ReadBlock(blk);
+  ASSERT_TRUE(status.ok());
+  tensorflow::atds::AssertValueEqual(avro::NULL_CODEC, blk.codec);
+  tensorflow::atds::AssertValueEqual(object_count, blk.object_count);
+  tensorflow::atds::AssertValueEqual(expected_byte_count, blk.byte_count);
+  tensorflow::atds::AssertValueEqual(expected_content, blk.content.c_str(),
+                                     blk.byte_count);
+}
+
+/*
+ * These bytes assume the Avro file format specified here:
+ * https://avro.apache.org/docs/1.9.1/spec.html#Object+Container+Files Bytes
+ * were manually generated via:
+ *   1. Writing schema to a file (schema.avsc):
+ *      {
+ *        "type" : "record",
+ *        "name" : "row",
+ *        "fields" : [
+ *          {
+ *            "name": "dense_1d",
+ *            "type": {
+ *              "type": "array",
+ *              "items": "int"
+ *            }
+ *          },
+ *          {
+ *            "name": "dense_2d",
+ *            "type": {
+ *              "type": "array",
+ *              "items": {
+ *                "type": "array",
+ *                "items": "int"
+ *              }
+ *            }
+ *          }
+ *        ]
+ *      }
+ *   2. Writing test data to a file (test.json):
+ *     {
+ *       "dense_1d": [1, 2, 3],
+ *       "dense_2d": [[4, 5], [6, 7]]
+ *     }
+ *   3. Converting json to avro:
+ *     dali avro fromjson test.json --schema-file schema.avsc > test.avro
+ *
+ * If avro file format changes, this byte array will need to be regenerated, and
+ * test cases modified to change different byte locations in the array.
+ */
+static constexpr size_t BYTEARRAY_SIZE = 268;
+static constexpr char WELLFORMED_CONTENT[] = {
+    0x4f,
+    0x62,
+    0x6a,
+    0x01,
+    0x04,
+    0x16,
+    0x61,
+    0x76,
+    0x72,
+    0x6f,
+    0x2e,
+    0x73,
+    0x63,
+    0x68,
+    0x65,
+    0x6d,  // Obj...avro.schem
+    0x61,
+    static_cast<char>(0xec),
+    0x02,
+    0x7b,
+    0x22,
+    0x74,
+    0x79,
+    0x70,
+    0x65,
+    0x22,
+    0x3a,
+    0x22,
+    0x72,
+    0x65,
+    0x63,
+    0x6f,  // a..{"type":"reco
+    0x72,
+    0x64,
+    0x22,
+    0x2c,
+    0x22,
+    0x6e,
+    0x61,
+    0x6d,
+    0x65,
+    0x22,
+    0x3a,
+    0x22,
+    0x72,
+    0x6f,
+    0x77,
+    0x22,  // rd","name":"row"
+    0x2c,
+    0x22,
+    0x66,
+    0x69,
+    0x65,
+    0x6c,
+    0x64,
+    0x73,
+    0x22,
+    0x3a,
+    0x5b,
+    0x7b,
+    0x22,
+    0x6e,
+    0x61,
+    0x6d,  // ,"fields":[{"nam
+    0x65,
+    0x22,
+    0x3a,
+    0x22,
+    0x64,
+    0x65,
+    0x6e,
+    0x73,
+    0x65,
+    0x5f,
+    0x31,
+    0x64,
+    0x22,
+    0x2c,
+    0x22,
+    0x74,  // e":"dense_1d","t
+    0x79,
+    0x70,
+    0x65,
+    0x22,
+    0x3a,
+    0x7b,
+    0x22,
+    0x74,
+    0x79,
+    0x70,
+    0x65,
+    0x22,
+    0x3a,
+    0x22,
+    0x61,
+    0x72,  // ype":{"type":"ar
+    0x72,
+    0x61,
+    0x79,
+    0x22,
+    0x2c,
+    0x22,
+    0x69,
+    0x74,
+    0x65,
+    0x6d,
+    0x73,
+    0x22,
+    0x3a,
+    0x22,
+    0x69,
+    0x6e,  // ray","items":"in
+    0x74,
+    0x22,
+    0x7d,
+    0x7d,
+    0x2c,
+    0x7b,
+    0x22,
+    0x6e,
+    0x61,
+    0x6d,
+    0x65,
+    0x22,
+    0x3a,
+    0x22,
+    0x64,
+    0x65,  // t"}},{"name":"de
+    0x6e,
+    0x73,
+    0x65,
+    0x5f,
+    0x32,
+    0x64,
+    0x22,
+    0x2c,
+    0x22,
+    0x74,
+    0x79,
+    0x70,
+    0x65,
+    0x22,
+    0x3a,
+    0x7b,  // nse_2d","type":{
+    0x22,
+    0x74,
+    0x79,
+    0x70,
+    0x65,
+    0x22,
+    0x3a,
+    0x22,
+    0x61,
+    0x72,
+    0x72,
+    0x61,
+    0x79,
+    0x22,
+    0x2c,
+    0x22,  // "type":"array","
+    0x69,
+    0x74,
+    0x65,
+    0x6d,
+    0x73,
+    0x22,
+    0x3a,
+    0x7b,
+    0x22,
+    0x74,
+    0x79,
+    0x70,
+    0x65,
+    0x22,
+    0x3a,
+    0x22,  // items":{"type":"
+    0x61,
+    0x72,
+    0x72,
+    0x61,
+    0x79,
+    0x22,
+    0x2c,
+    0x22,
+    0x69,
+    0x74,
+    0x65,
+    0x6d,
+    0x73,
+    0x22,
+    0x3a,
+    0x22,  // array","items":"
+    0x69,
+    0x6e,
+    0x74,
+    0x22,
+    0x7d,
+    0x7d,
+    0x7d,
+    0x5d,
+    0x7d,
+    0x14,
+    0x61,
+    0x76,
+    0x72,
+    0x6f,
+    0x2e,
+    0x63,  // int"}}}]}.avro.c
+    0x6f,
+    0x64,
+    0x65,
+    0x63,
+    0x08,
+    0x6e,
+    0x75,
+    0x6c,
+    0x6c,
+    0x00,
+    static_cast<char>(0xe1),
+    0x26,
+    0x18,
+    0x0e,
+    static_cast<char>(0xc9),
+    static_cast<char>(0xbe),  // odec.null..&....
+    0x5a,
+    static_cast<char>(0x8c),
+    0x5f,
+    static_cast<char>(0xe0),
+    static_cast<char>(0xcd),
+    0x5c,
+    0x62,
+    static_cast<char>(0xc2),
+    0x3f,
+    0x05,
+    0x02,
+    0x1e,
+    0x06,
+    0x02,
+    0x04,
+    0x06,  // Z._..\b.?.......
+    0x00,
+    0x04,
+    0x04,
+    0x08,
+    0x0a,
+    0x00,
+    0x04,
+    0x0c,
+    0x0e,
+    0x00,
+    0x00,
+    static_cast<char>(0xe1),
+    0x26,
+    0x18,
+    0x0e,
+    static_cast<char>(0xc9),  // ............&...
+    static_cast<char>(0xbe),
+    0x5a,
+    static_cast<char>(0x8c),
+    0x5f,
+    static_cast<char>(0xe0),
+    static_cast<char>(0xcd),
+    0x5c,
+    0x62,
+    static_cast<char>(0xc2),
+    0x3f,
+    0x05,
+    0x0a  // .Z._..\b.?..
+};
+
+TEST(AvroBlockReaderTest, MALFORMED_MAGIC) {
+  char malformed_magic[BYTEARRAY_SIZE];
+  memcpy(malformed_magic, WELLFORMED_CONTENT, BYTEARRAY_SIZE);
+  malformed_magic[2] = 0x6b;  // Fill third byte with random character
+  avro::Exception expected_exception("No exception thrown");
+  try {
+    AvroBlockReaderTest(malformed_magic, BYTEARRAY_SIZE);
+  } catch (avro::Exception e) {
+    expected_exception = e;
+  }
+  ASSERT_STREQ("Invalid data file. Magic does not match.",
+               expected_exception.what());
+}
+
+TEST(AvroBlockReaderTest, MISSING_SCHEMA) {
+  char missing_schema[BYTEARRAY_SIZE];
+  memcpy(missing_schema, WELLFORMED_CONTENT, BYTEARRAY_SIZE);
+  missing_schema[6] = 0x62;  // Replace "avro.schema" with "bvro.schema"
+  avro::Exception expected_exception("No exception thrown");
+  try {
+    AvroBlockReaderTest(missing_schema, BYTEARRAY_SIZE);
+  } catch (avro::Exception e) {
+    expected_exception = e;
+  }
+  ASSERT_STREQ("No schema in metadata", expected_exception.what());
+}
+
+TEST(AvroBlockReaderTest, UNSUPPORTED_CODEC) {
+  char unsupported_codec[BYTEARRAY_SIZE];
+  memcpy(unsupported_codec, WELLFORMED_CONTENT, BYTEARRAY_SIZE);
+  unsupported_codec[213] = 0x6f;  // Change codec from "null" to "oull"
+  avro::Exception expected_exception("No exception thrown");
+  try {
+    AvroBlockReaderTest(unsupported_codec, BYTEARRAY_SIZE);
+  } catch (avro::Exception e) {
+    expected_exception = e;
+  }
+  ASSERT_STREQ("Unknown codec in data file: oull", expected_exception.what());
+}
+
+TEST(AvroBlockReaderTest, SYNC_MARKER_MISMATCH) {
+  char sync_marker_mismatch[BYTEARRAY_SIZE];
+  memcpy(sync_marker_mismatch, WELLFORMED_CONTENT, BYTEARRAY_SIZE);
+  sync_marker_mismatch[218] =
+      0xe2;  // Change second byte of sync marker from 0xe1 to 0xe2
+  Status status = AvroBlockReaderTest(sync_marker_mismatch, BYTEARRAY_SIZE);
+  ASSERT_EQ(error::Code::DATA_LOSS, status.code());
+  ASSERT_STREQ("Avro sync marker mismatch.", status.error_message().c_str());
+}
+
+TEST(AvroBlockReaderTest, BYTE_COUNT_EOF) {
+  char byte_count_eof[BYTEARRAY_SIZE];
+  memcpy(byte_count_eof, WELLFORMED_CONTENT, BYTEARRAY_SIZE);
+  byte_count_eof[235] = 0x6e;  // Change byte count from 0x1e (15) to 0x6e (55)
+  Status status = AvroBlockReaderTest(byte_count_eof, BYTEARRAY_SIZE);
+  ASSERT_EQ(error::Code::OUT_OF_RANGE, status.code());
+  ASSERT_STREQ("eof", status.error_message().c_str());
+}
+
+TEST(AvroBlockReaderTest, DENSE_2D) {
+  string feature_name = "dense_2d";
+  tensorflow::atds::ATDSSchemaBuilder schema_builder =
+      tensorflow::atds::ATDSSchemaBuilder();
+  schema_builder.AddDenseFeature(feature_name, DT_INT32, 2);
+  avro::ValidSchema schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum datum(schema);
+  tensorflow::atds::AddDenseValue<int>(datum, feature_name, {{1, 2}, {3, 4}});
+  avro::OutputStreamPtr out_stream =
+      tensorflow::atds::EncodeAvroGenericDatum(datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  const uint8_t* expected_content;
+  size_t expected_len;
+  in_stream->next(&expected_content, &expected_len);
+  AvroBlockReaderTest<avro::GenericDatum>((char*)expected_content, 1,
+                                          expected_len, schema, {datum});
+}
+
+TEST(AvroBlockReaderTest, SPARSE_2D) {
+  string feature_name = "sparse_2d";
+  tensorflow::atds::ATDSSchemaBuilder schema_builder =
+      tensorflow::atds::ATDSSchemaBuilder();
+  schema_builder.AddSparseFeature(feature_name, DT_INT64, 2);
+  avro::ValidSchema schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum datum1(schema);
+  avro::GenericDatum datum2(schema);
+  tensorflow::atds::AddSparseValue<int64_t>(datum1, feature_name,
+                                            {{1, 2}, {3, 4}}, {5, 6});
+  tensorflow::atds::AddSparseValue<int64_t>(datum2, feature_name,
+                                            {{7, 8}, {9, 10}}, {11, 12});
+  std::vector<avro::GenericDatum> records = {datum1, datum2};
+  avro::OutputStreamPtr out_stream =
+      tensorflow::atds::EncodeAvroGenericData(records);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  const uint8_t* expected_content;
+  size_t expected_len;
+  in_stream->next(&expected_content, &expected_len);
+  AvroBlockReaderTest<avro::GenericDatum>(
+      (char*)expected_content, 2, expected_len, schema, {datum1, datum2});
+}
+
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h b/tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h
new file mode 100644
index 000000000..3588cc28a
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h
@@ -0,0 +1,61 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_AVRO_DECODER_TEMPLATE_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_AVRO_DECODER_TEMPLATE_H_
+
+#include "api/Decoder.hh"
+
+namespace avro {
+namespace decoder_t {
+
+template <
+    typename T,
+    typename = typename std::enable_if<
+        std::is_same<int, T>::value || std::is_same<long, T>::value ||
+            std::is_same<float, T>::value || std::is_same<double, T>::value ||
+            std::is_same<bool, T>::value,
+        T>::type>
+inline T Decode(avro::DecoderPtr& decoder);
+
+template <>
+inline int Decode(avro::DecoderPtr& decoder) {
+  return decoder->decodeInt();
+}
+
+template <>
+inline long Decode(avro::DecoderPtr& decoder) {
+  return decoder->decodeLong();
+}
+
+template <>
+inline float Decode(avro::DecoderPtr& decoder) {
+  return decoder->decodeFloat();
+}
+
+template <>
+inline double Decode(avro::DecoderPtr& decoder) {
+  return decoder->decodeDouble();
+}
+
+template <>
+inline bool Decode(avro::DecoderPtr& decoder) {
+  return decoder->decodeBool();
+}
+
+}  // namespace decoder_t
+}  // namespace avro
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_AVRO_DECODER_TEMPLATE_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/decoder_base.h b/tensorflow_io/core/kernels/avro/atds/decoder_base.h
new file mode 100644
index 000000000..456984ae3
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/decoder_base.h
@@ -0,0 +1,70 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_BASE_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_BASE_H_
+
+#include "api/Decoder.hh"
+#include "api/GenericDatum.hh"
+#include "api/Node.hh"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow_io/core/kernels/avro/atds/sparse_value_buffer.h"
+
+namespace tensorflow {
+namespace atds {
+
+enum class FeatureType { dense, sparse, varlen, opaque_contextual };
+
+static const std::map<avro::Type, DataType> avro_to_tf_datatype = {
+    {avro::AVRO_INT, DT_INT32},     {avro::AVRO_LONG, DT_INT64},
+    {avro::AVRO_STRING, DT_STRING}, {avro::AVRO_BYTES, DT_STRING},
+    {avro::AVRO_FLOAT, DT_FLOAT},   {avro::AVRO_DOUBLE, DT_DOUBLE},
+    {avro::AVRO_BOOL, DT_BOOL}};
+
+/*
+ * Decoders decode avro features into Tensors.
+ * All decoder implementations must implement the operator overload '()'.
+ * Decoders are invoked in a multithreaded context(controlled by
+ * `num_parallel_calls`). Therefore the implementations must be threadsafe.
+ * TODO: Add static analysis to check thread-safety(BDP-7562)
+ * */
+class DecoderBase {
+ public:
+  virtual ~DecoderBase() {}
+
+  virtual Status operator()(avro::DecoderPtr&, std::vector<Tensor>&,
+                            sparse::ValueBuffer&,
+                            std::vector<avro::GenericDatum>&, size_t) = 0;
+};
+
+/*
+ * Template Metadata class must implement the following public members.
+ *   FeatureType type
+ *   string name
+ *   DataType dtype
+ *   PartialTensorShape shape
+ * */
+template <typename Metadata>
+std::unique_ptr<DecoderBase> CreateFeatureDecoder(const avro::NodePtr&,
+                                                  const Metadata&);
+
+template <typename Metadata>
+Status ValidateSchema(const avro::NodePtr&, const Metadata&);
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_BASE_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/decoder_test_util.cc b/tensorflow_io/core/kernels/avro/atds/decoder_test_util.cc
new file mode 100644
index 000000000..bfb88fa2d
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/decoder_test_util.cc
@@ -0,0 +1,208 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+
+#include "api/Compiler.hh"
+#include "api/Generic.hh"
+#include "api/Specific.hh"
+#include "api/ValidSchema.hh"
+
+namespace tensorflow {
+namespace atds {
+
+constexpr const char kATDSSchemaPrefix[] =
+    "{"
+    "\"type\" : \"record\", "
+    "\"name\" : \"AvroTensorDataset\", "
+    "\"namespace\" : \"com.organization.avrotensordataset\", "
+    "\"fields\" : [ ";
+
+constexpr const char kATDSSchemaSuffix[] =
+    " ] "
+    "}";
+
+ATDSSchemaBuilder::ATDSSchemaBuilder()
+    : schema_(kATDSSchemaPrefix), num_of_features_(0) {}
+
+ATDSSchemaBuilder& ATDSSchemaBuilder::AddDenseFeature(
+    const string& name, DataType dtype, size_t rank,
+    const avro::Type avro_type) {
+  string type = GenerateArrayType(dtype, rank, avro_type);
+  string feature_schema = BuildFeatureSchema(name, type);
+  AddFeature(feature_schema);
+  return *this;
+}
+
+ATDSSchemaBuilder& ATDSSchemaBuilder::AddSparseFeature(
+    const string& name, DataType dtype, size_t rank,
+    const avro::Type avro_type) {
+  std::vector<size_t> order(rank + 1, 0);
+  for (size_t i = 0; i < order.size(); i++) {
+    order[i] = i;
+  }
+  AddSparseFeature(name, dtype, order, avro_type);
+  return *this;
+}
+
+ATDSSchemaBuilder& ATDSSchemaBuilder::AddSparseFeature(
+    const string& name, DataType dtype, const std::vector<size_t>& order,
+    const avro::Type avro_type) {
+  string indices_type = GenerateArrayType(DT_INT64, 1);
+  string values_type = GenerateArrayType(dtype, 1, avro_type);
+  string fields = "";
+
+  auto values_index = order.size() - 1;
+  for (size_t i = 0; i < order.size(); i++) {
+    if (i > 0) {
+      fields += ", ";
+    }
+    if (order[i] == values_index) {
+      fields += BuildFeatureSchema("values", values_type);
+    } else {
+      auto indices_name = "indices" + std::to_string(order[i]);
+      fields += BuildFeatureSchema(indices_name, indices_type);
+    }
+  }
+
+  string type =
+      "{"
+      "\"type\" : \"record\", "
+      "\"name\" : \"" +
+      name +
+      "\", "
+      "\"fields\" : [ " +
+      fields +
+      " ] "
+      "}";
+  string feature_schema = BuildFeatureSchema(name, type);
+  AddFeature(feature_schema);
+  return *this;
+}
+
+ATDSSchemaBuilder& ATDSSchemaBuilder::AddOpaqueContextualFeature(
+    const string& name, const string& type) {
+  string feature_schema = BuildFeatureSchema(name, type);
+  AddFeature(feature_schema);
+  return *this;
+}
+
+string ATDSSchemaBuilder::Build() { return schema_ + kATDSSchemaSuffix; }
+
+avro::ValidSchema ATDSSchemaBuilder::BuildVaildSchema() {
+  string schema = Build();
+
+  std::istringstream iss(schema);
+  avro::ValidSchema valid_schema;
+  avro::compileJsonSchema(iss, valid_schema);
+  return valid_schema;
+}
+
+void ATDSSchemaBuilder::AddFeature(const string& feature_schema) {
+  if (num_of_features_ > 0) {
+    schema_ += ", ";
+  }
+  schema_ += feature_schema;
+  num_of_features_++;
+}
+
+string ATDSSchemaBuilder::BuildFeatureSchema(const string& name,
+                                             const string& type) {
+  return "{"
+         "\"name\" : \"" +
+         name +
+         "\", "
+         "\"type\" : " +
+         type + " }";
+}
+
+string ATDSSchemaBuilder::BuildNullableFeatureSchema(const string& name,
+                                                     const string& type) {
+  return "{"
+         "\"name\" : \"" +
+         name +
+         "\", "
+         "\"type\" : [ \"null\", " +
+         type +
+         " ] "
+         "}";
+}
+
+string ATDSSchemaBuilder::GenerateDataType(DataType dtype,
+                                           const avro::Type avro_type) {
+  switch (dtype) {
+    case DT_INT32: {
+      return "\"int\"";
+    }
+    case DT_INT64: {
+      return "\"long\"";
+    }
+    case DT_FLOAT: {
+      return "\"float\"";
+    }
+    case DT_DOUBLE: {
+      return "\"double\"";
+    }
+    case DT_STRING: {
+      if (avro_type == avro::AVRO_BYTES) {
+        return "\"bytes\"";
+      }
+      return "\"string\"";
+    }
+    case DT_BOOL: {
+      return "\"boolean\"";
+    }
+    default: {
+      return "";
+    }
+  }
+}
+
+string ATDSSchemaBuilder::GenerateArrayType(DataType dtype, size_t rank,
+                                            const avro::Type avro_type) {
+  if (rank == 0) {
+    return GenerateDataType(dtype, avro_type);
+  }
+
+  string type = GenerateArrayType(dtype, rank - 1, avro_type);
+  return "{"
+         "\"type\" : \"array\", "
+         "\"items\" : " +
+         type + " }";
+}
+
+avro::OutputStreamPtr EncodeAvroGenericDatum(avro::GenericDatum& datum) {
+  avro::EncoderPtr encoder = avro::binaryEncoder();
+  avro::OutputStreamPtr out_stream = avro::memoryOutputStream();
+  encoder->init(*out_stream);
+  avro::encode(*encoder, datum);
+  encoder->flush();
+  return std::move(out_stream);
+}
+
+avro::OutputStreamPtr EncodeAvroGenericData(
+    std::vector<avro::GenericDatum>& data) {
+  avro::EncoderPtr encoder = avro::binaryEncoder();
+  avro::OutputStreamPtr out_stream = avro::memoryOutputStream();
+  encoder->init(*out_stream);
+  for (auto& datum : data) {
+    avro::encode(*encoder, datum);
+  }
+  encoder->flush();
+  return std::move(out_stream);
+}
+
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/decoder_test_util.h b/tensorflow_io/core/kernels/avro/atds/decoder_test_util.h
new file mode 100644
index 000000000..1407c5b90
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/decoder_test_util.h
@@ -0,0 +1,404 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_TEST_UTIL_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_TEST_UTIL_H_
+
+#include "api/Encoder.hh"
+#include "api/GenericDatum.hh"
+#include "api/Node.hh"
+#include "api/Specific.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/atds_decoder.h"
+
+namespace tensorflow {
+namespace atds {
+
+using byte_array = std::vector<uint8_t>;
+
+class ATDSSchemaBuilder {
+ public:
+  ATDSSchemaBuilder();
+
+  ATDSSchemaBuilder& AddDenseFeature(
+      const string& name, DataType dtype, size_t rank,
+      const avro::Type avro_type = avro::AVRO_NULL);
+  ATDSSchemaBuilder& AddSparseFeature(
+      const string& name, DataType dtype, size_t rank,
+      const avro::Type avro_type = avro::AVRO_NULL);
+  ATDSSchemaBuilder& AddSparseFeature(
+      const string& name, DataType dtype, const std::vector<size_t>& order,
+      const avro::Type avro_type = avro::AVRO_NULL);
+  ATDSSchemaBuilder& AddOpaqueContextualFeature(const string& name,
+                                                const string& type);
+
+  string Build();
+  avro::ValidSchema BuildVaildSchema();
+
+ private:
+  void AddFeature(const string&);
+  string BuildFeatureSchema(const string&, const string&);
+  string BuildNullableFeatureSchema(const string&, const string&);
+  string GenerateDataType(DataType, const avro::Type = avro::AVRO_NULL);
+  string GenerateArrayType(DataType, size_t,
+                           const avro::Type = avro::AVRO_NULL);
+
+  string schema_;
+  size_t num_of_features_;
+};
+
+template <typename T>
+DataType GetDataType() {
+  return DataTypeToEnum<T>().value;
+}
+
+template <>
+inline DataType GetDataType<string>() {
+  return DT_STRING;
+}
+
+inline std::vector<uint8_t> StringToByte(const std::string& s) {
+  std::vector<uint8_t> result;
+  result.reserve(s.size());
+  std::copy(s.begin(), s.end(), std::back_inserter(result));
+  return result;
+}
+
+inline std::string ByteToString(const std::vector<uint8_t>& t) {
+  std::string result;
+  std::copy(t.begin(), t.end(), std::back_inserter(result));
+  return result;
+}
+
+// avro::Type is used to differentiate between byte and string, both of which
+// map to datatype
+template <typename T>
+void AddDenseValue(avro::GenericDatum& datum, const string& name,
+                   const T& value) {
+  auto& record = datum.value<avro::GenericRecord>();
+  auto& feature = record.field(name);
+  feature.value<T>() = value;
+}
+
+template <typename T>
+void AddDenseValue(avro::GenericDatum& datum, const string& name,
+                   const std::vector<T>& values) {
+  auto& record = datum.value<avro::GenericRecord>();
+  auto& feature = record.field(name).value<avro::GenericArray>();
+  auto& feature_values = feature.value();
+  for (T value : values) {
+    feature_values.emplace_back(value);
+  }
+}
+
+template <>
+inline void AddDenseValue(avro::GenericDatum& datum, const string& name,
+                          const byte_array& value) {
+  auto& record = datum.value<avro::GenericRecord>();
+  auto& feature = record.field(name);
+  feature.value<byte_array>() = value;
+}
+
+template <typename T>
+inline void AddDenseValue(avro::GenericDatum& datum, const string& name,
+                          const std::vector<std::vector<T>>& values) {
+  auto& record = datum.value<avro::GenericRecord>();
+  auto& feature = record.field(name).value<avro::GenericArray>();
+  auto& sub_array_schema = feature.schema()->leafAt(0);
+
+  auto& feature_values = feature.value();
+  for (size_t i = 0; i < values.size(); i++) {
+    feature_values.emplace_back(sub_array_schema);
+    auto& sub_array = feature_values.back().value<avro::GenericArray>().value();
+    for (size_t j = 0; j < values[i].size(); j++) {
+      sub_array.emplace_back(values[i][j]);
+    }
+  }
+}
+
+template <>
+inline void AddDenseValue(avro::GenericDatum& datum, const string& name,
+                          const std::vector<byte_array>& values) {
+  auto& record = datum.value<avro::GenericRecord>();
+  auto& feature = record.field(name).value<avro::GenericArray>();
+  auto& feature_values = feature.value();
+  for (byte_array value : values) {
+    feature_values.emplace_back(value);
+  }
+}
+
+template <typename T>
+void AddSparseValue(avro::GenericDatum& datum, const string& name,
+                    const std::vector<std::vector<long>>& indices,
+                    const std::vector<T>& values) {
+  auto& record = datum.value<avro::GenericRecord>();
+  auto& feature = record.field(name).value<avro::GenericRecord>();
+
+  for (size_t i = 0; i < indices.size(); i++) {
+    auto indices_key = "indices" + std::to_string(i);
+    auto& indices_array =
+        feature.field(indices_key).value<avro::GenericArray>().value();
+    for (long index : indices[i]) {
+      indices_array.emplace_back(static_cast<int64_t>(index));
+    }
+  }
+
+  auto& values_array =
+      feature.field("values").value<avro::GenericArray>().value();
+  for (T value : values) {
+    values_array.emplace_back(value);
+  }
+}
+
+avro::OutputStreamPtr EncodeAvroGenericDatum(avro::GenericDatum& datum);
+avro::OutputStreamPtr EncodeAvroGenericData(
+    std::vector<avro::GenericDatum>& data);
+
+template <typename T, typename F>
+void AssertValueEqual(const T& v1, const F& v2) {
+  ASSERT_EQ(v1, v2);
+}
+
+template <>
+inline void AssertValueEqual(const avro::NodePtr& v1, const avro::NodePtr& v2) {
+  ASSERT_EQ(v1->type(), v2->type());
+  ASSERT_EQ(v1->leaves(), v2->leaves());
+  for (size_t i = 0; i < v1->leaves(); i++) {
+    AssertValueEqual(v1->leafAt(i), v2->leafAt(i));
+  }
+}
+
+template <>
+inline void AssertValueEqual(const avro::ValidSchema& v1,
+                             const avro::ValidSchema& v2) {
+  AssertValueEqual(v1.root(), v2.root());
+}
+
+template <>
+inline void AssertValueEqual(const tstring& v1, const string& v2) {
+  ASSERT_STREQ(v1.c_str(), v2.c_str());
+}
+
+template <>
+inline void AssertValueEqual(const string& v1, const tstring& v2) {
+  ASSERT_STREQ(v1.c_str(), v2.c_str());
+}
+
+inline void AssertValueEqual(const char* v1, const char* v2, int len) {
+  for (int i = 0; i < len; i++) {
+    ASSERT_EQ(v1[i], v2[i]);
+  }
+}
+
+template <>
+inline void AssertValueEqual(const float& v1, const float& v2) {
+  ASSERT_NEAR(v1, v2, 1e-6);
+}
+
+template <>
+inline void AssertValueEqual(const double& v1, const double& v2) {
+  ASSERT_NEAR(v1, v2, 1e-6);
+}
+
+template <typename T, typename U>
+void AssertVectorValues(const std::vector<T>& actual,
+                        const std::vector<U>& expected) {
+  ASSERT_EQ(actual.size(), expected.size());
+  for (size_t i = 0; i < expected.size(); i++) {
+    AssertValueEqual(actual[i], expected[i]);
+  }
+}
+
+template <typename T>
+inline void AssertVectorValues(const std::vector<T>& actual,
+                               const std::vector<byte_array>& expected) {
+  ASSERT_EQ(actual.size(), expected.size());
+  for (size_t i = 0; i < expected.size(); i++) {
+    AssertValueEqual(actual[i], ByteToString(expected[i]));
+  }
+}
+
+template <typename T>
+void AssertTensorValues(const Tensor& tensor, const T& scalar) {
+  AssertValueEqual(tensor.scalar<T>()(), scalar);
+}
+
+template <>
+inline void AssertTensorValues(const Tensor& tensor, const string& scalar) {
+  AssertValueEqual(tensor.scalar<tstring>()(), scalar);
+}
+
+template <typename T>
+void AssertTensorValues(const Tensor& tensor, const std::vector<T>& vec) {
+  for (size_t i = 0; i < vec.size(); i++) {
+    AssertValueEqual(tensor.vec<T>()(i), vec[i]);
+  }
+  ASSERT_EQ(tensor.NumElements(), vec.size());
+}
+
+template <>
+inline void AssertTensorValues(const Tensor& tensor, const byte_array& scalar) {
+  AssertValueEqual(tensor.scalar<tstring>()(), ByteToString(scalar));
+}
+
+template <>
+inline void AssertTensorValues(const Tensor& tensor,
+                               const std::vector<string>& vec) {
+  for (size_t i = 0; i < vec.size(); i++) {
+    AssertValueEqual(tensor.vec<tstring>()(i), vec[i]);
+  }
+  ASSERT_EQ(tensor.NumElements(), vec.size());
+}
+
+template <typename T>
+void AssertTensorValues(const Tensor& tensor,
+                        const std::vector<std::vector<T>>& matrix) {
+  size_t size = 0;
+  for (size_t i = 0; i < matrix.size(); i++) {
+    for (size_t j = 0; j < matrix[i].size(); j++) {
+      AssertValueEqual(tensor.matrix<T>()(i, j), matrix[i][j]);
+    }
+    size += matrix[i].size();
+  }
+  ASSERT_EQ(tensor.NumElements(), size);
+}
+
+template <>
+inline void AssertTensorValues(const Tensor& tensor,
+                               const std::vector<byte_array>& vec) {
+  for (size_t i = 0; i < vec.size(); i++) {
+    AssertValueEqual(tensor.vec<tstring>()(i), ByteToString(vec[i]));
+  }
+  ASSERT_EQ(tensor.NumElements(), vec.size());
+}
+
+template <>
+inline void AssertTensorValues(const Tensor& tensor,
+                               const std::vector<std::vector<string>>& matrix) {
+  size_t size = 0;
+  for (size_t i = 0; i < matrix.size(); i++) {
+    for (size_t j = 0; j < matrix[i].size(); j++) {
+      AssertValueEqual(tensor.matrix<tstring>()(i, j), matrix[i][j]);
+    }
+    size += matrix[i].size();
+  }
+  ASSERT_EQ(tensor.NumElements(), size);
+}
+
+template <>
+inline void AssertTensorValues(
+    const Tensor& tensor, const std::vector<std::vector<byte_array>>& matrix) {
+  size_t size = 0;
+  for (size_t i = 0; i < matrix.size(); i++) {
+    for (size_t j = 0; j < matrix[i].size(); j++) {
+      AssertValueEqual(tensor.matrix<tstring>()(i, j),
+                       ByteToString(matrix[i][j]));
+    }
+    size += matrix[i].size();
+  }
+  ASSERT_EQ(tensor.NumElements(), size);
+}
+
+template <typename T>
+void AssertTensorRangeEqual(const Tensor& tensor, std::vector<T> values,
+                            size_t offset) {
+  for (size_t i = 0; i < values.size(); i++) {
+    T actual = tensor.vec<T>()(offset + i);
+    AssertValueEqual(actual, values[i]);
+  }
+}
+
+template <>
+inline void AssertTensorRangeEqual(const Tensor& tensor,
+                                   std::vector<string> values, size_t offset) {
+  for (size_t i = 0; i < values.size(); i++) {
+    tstring actual = tensor.vec<tstring>()(offset + i);
+    AssertValueEqual(actual, values[i]);
+  }
+}
+
+template <typename T, typename Metadata>
+void ValidateBuffer(sparse::ValueBuffer& buffer, const Metadata& metadata,
+                    std::vector<long> indices, std::vector<T> values,
+                    std::vector<size_t> num_of_elements) {
+  size_t indices_index = metadata.indices_index;
+  size_t values_index = metadata.values_index;
+
+  AssertVectorValues(buffer.indices[indices_index], indices);
+  std::vector<T>& actual_values =
+      sparse::GetValueVector<T>(buffer, values_index);
+  AssertVectorValues(actual_values, values);
+  AssertVectorValues(buffer.num_of_elements[indices_index], num_of_elements);
+}
+
+template <typename Metadata>
+void ValidateBuffer(sparse::ValueBuffer& buffer, const Metadata& metadata,
+                    std::vector<long> indices, std::vector<byte_array> values,
+                    std::vector<size_t> num_of_elements) {
+  size_t indices_index = metadata.indices_index;
+  size_t values_index = metadata.values_index;
+
+  AssertVectorValues(buffer.indices[indices_index], indices);
+  std::vector<string>& actual_values =
+      sparse::GetValueVector<string>(buffer, values_index);
+  AssertVectorValues(actual_values, values);
+  AssertVectorValues(buffer.num_of_elements[indices_index], num_of_elements);
+}
+
+namespace sparse {
+
+template <typename T>
+std::vector<std::vector<T>>& GetValuesBuffer(ValueBuffer& buffer);
+
+template <>
+inline std::vector<std::vector<int>>& GetValuesBuffer(ValueBuffer& buffer) {
+  return buffer.int_values;
+}
+
+template <>
+inline std::vector<std::vector<long>>& GetValuesBuffer(ValueBuffer& buffer) {
+  return buffer.long_values;
+}
+
+template <>
+inline std::vector<std::vector<float>>& GetValuesBuffer(ValueBuffer& buffer) {
+  return buffer.float_values;
+}
+
+template <>
+inline std::vector<std::vector<double>>& GetValuesBuffer(ValueBuffer& buffer) {
+  return buffer.double_values;
+}
+
+template <>
+inline std::vector<std::vector<string>>& GetValuesBuffer(ValueBuffer& buffer) {
+  return buffer.string_values;
+}
+
+template <>
+inline std::vector<std::vector<bool>>& GetValuesBuffer(ValueBuffer& buffer) {
+  return buffer.bool_values;
+}
+
+}  // namespace sparse
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECODER_TEST_UTIL_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/decompression_handler.h b/tensorflow_io/core/kernels/avro/atds/decompression_handler.h
new file mode 100644
index 000000000..d3aed08bc
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/decompression_handler.h
@@ -0,0 +1,120 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECOMPRESSION_HANDLER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECOMPRESSION_HANDLER_H_
+
+#include <boost/crc.hpp>  // for boost::crc_32_type
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+#include <boost/random/mersenne_twister.hpp>
+
+#include "api/Compiler.hh"
+#include "api/DataFile.hh"
+#include "api/Decoder.hh"
+#include "api/Specific.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_block_reader.h"
+
+#ifdef SNAPPY_CODEC_AVAILABLE
+#include <snappy.h>
+#endif
+namespace tensorflow {
+namespace data {
+class DecompressionHandler {
+ public:
+  DecompressionHandler() {}
+
+  // Adapted from
+  // https://github.com/apache/avro/blob/release-1.9.1/lang/c++/impl/DataFile.cc#L58
+  boost::iostreams::zlib_params get_zlib_params() {
+    boost::iostreams::zlib_params ret;
+    ret.method = boost::iostreams::zlib::deflated;
+    ret.noheader = true;
+    return ret;
+  }
+
+#ifdef SNAPPY_CODEC_AVAILABLE
+  avro::InputStreamPtr decompressSnappyCodec(AvroBlock& block) {
+    boost::crc_32_type crc;
+    std::string uncompressed;
+    size_t len = block.content.size();
+    const auto& compressed = block.content;
+    int b1 = compressed[len - 4] & 0xFF;
+    int b2 = compressed[len - 3] & 0xFF;
+    int b3 = compressed[len - 2] & 0xFF;
+    int b4 = compressed[len - 1] & 0xFF;
+
+    uint32_t checksum = (b1 << 24) + (b2 << 16) + (b3 << 8) + (b4);
+    if (!snappy::Uncompress(compressed.data(), len - 4, &uncompressed)) {
+      throw avro::Exception(
+          "Snappy Compression reported an error when decompressing");
+    }
+    crc.process_bytes(uncompressed.data(), uncompressed.size());
+    uint32_t c = crc();
+    if (checksum != c) {
+      throw avro::Exception(
+          boost::format("Checksum did not match for Snappy compression: "
+                        "Expected: %1%, computed: %2%") %
+          checksum % c);
+    }
+    block.content = uncompressed;
+    block.byte_count = uncompressed.size();
+    block.codec = avro::NULL_CODEC;
+    uint8_t* dt =
+        reinterpret_cast<uint8_t*>(block.content.data() + block.read_offset);
+    return avro::memoryInputStream(dt,
+                                   block.content.size() - block.read_offset);
+  }
+#endif
+
+  avro::InputStreamPtr decompressDeflateCodec(AvroBlock& block) {
+    boost::iostreams::filtering_istream stream;
+    stream.push(boost::iostreams::zlib_decompressor(get_zlib_params()));
+    stream.push(boost::iostreams::basic_array_source<char>(
+        block.content.data(), block.content.size()));
+    auto uncompressed = tstring();
+    auto reader = avro::nonSeekableIstreamInputStream(stream);
+    size_t n_data = 0;
+
+    const uint8_t* data = nullptr;
+    while (reader->next(&data, &n_data)) {
+      uncompressed.append((const char*)data, n_data);
+    }
+    block.content = uncompressed;
+    block.codec = avro::NULL_CODEC;
+    block.byte_count = uncompressed.size();
+    uint8_t* dt =
+        reinterpret_cast<uint8_t*>(block.content.data() + block.read_offset);
+    return avro::memoryInputStream(dt,
+                                   block.content.size() - block.read_offset);
+  }
+
+  avro::InputStreamPtr decompressNullCodec(AvroBlock& block) {
+    size_t offset = block.read_offset;
+    uint8_t* data = reinterpret_cast<uint8_t*>(block.content.data() + offset);
+    size_t size = block.content.size() - offset;
+    return avro::memoryInputStream(data, size);
+  }
+};
+
+}  // namespace data
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DECOMPRESSION_HANDLER_H_
\ No newline at end of file
diff --git a/tensorflow_io/core/kernels/avro/atds/dense_feature_decoder.h b/tensorflow_io/core/kernels/avro/atds/dense_feature_decoder.h
new file mode 100644
index 000000000..b24d3854c
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/dense_feature_decoder.h
@@ -0,0 +1,244 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DENSE_FEATURE_DECODER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DENSE_FEATURE_DECODER_H_
+
+#include "api/Decoder.hh"
+#include "api/Node.hh"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_base.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+
+namespace tensorflow {
+namespace atds {
+
+namespace dense {
+
+struct Metadata {
+  Metadata(FeatureType type, const string& name, DataType dtype,
+           const PartialTensorShape& shape, size_t tensor_position)
+      : type(type),
+        name(name),
+        dtype(dtype),
+        shape(shape),
+        tensor_position(tensor_position) {}
+
+  FeatureType type;
+  string name;
+  DataType dtype;
+  PartialTensorShape shape;
+
+  size_t tensor_position;
+};
+
+template <typename T>
+inline Status DecodeFixedLenArray(avro::DecoderPtr& decoder, T** buf, int rank,
+                                  const PartialTensorShape& shape) {
+  if (rank == 0) {
+    *((*buf)++) = avro::decoder_t::Decode<T>(decoder);
+    return OkStatus();
+  }
+
+  int dim = shape.dims() - rank;
+  size_t size = static_cast<size_t>(shape.dim_size(dim));
+  size_t number = 0;
+  if (rank == 1) {
+    for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+      number += m;
+      if (TF_PREDICT_FALSE(number > size)) {
+        return ShapeError(number, dim, shape);
+      }
+      for (size_t i = 0; i < m; i++) {
+        *((*buf)++) = avro::decoder_t::Decode<T>(decoder);
+      }
+    }
+    if (TF_PREDICT_FALSE(number != size)) {
+      return ShapeError(number, dim, shape);
+    }
+    return OkStatus();
+  }
+
+  for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+    number += m;
+    if (TF_PREDICT_FALSE(number > size)) {
+      return ShapeError(number, dim, shape);
+    }
+    for (size_t i = 0; i < m; i++) {
+      TF_RETURN_IF_ERROR(DecodeFixedLenArray<T>(decoder, buf, rank - 1, shape));
+    }
+  }
+  if (TF_PREDICT_FALSE(number != size)) {
+    return ShapeError(number, dim, shape);
+  }
+  return OkStatus();
+}
+
+// This template specification handles both byte and string.
+// It assumes that avro decodeBytes and decodeString are both reading bytes into
+// uint8 arrays see:
+// https://github.com/apache/avro/blob/branch-1.9/lang/c%2B%2B/impl/BinaryDecoder.cc#L133
+// As long as that as that assumption holds a separate bytes implementation is
+// not required.
+template <>
+inline Status DecodeFixedLenArray(avro::DecoderPtr& decoder, tstring** buf,
+                                  int rank, const PartialTensorShape& shape) {
+  std::string s;
+  if (rank == 0) {
+    decoder->decodeString(s);
+    *((*buf)++) = s;
+    return OkStatus();
+  }
+
+  int dim = shape.dims() - rank;
+  size_t size = static_cast<size_t>(shape.dim_size(dim));
+  size_t number = 0;
+  if (rank == 1) {
+    for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+      number += m;
+      if (TF_PREDICT_FALSE(number > size)) {
+        return ShapeError(number, dim, shape);
+      }
+      for (size_t i = 0; i < m; i++) {
+        decoder->decodeString(s);
+        *((*buf)++) = s;
+      }
+    }
+    if (TF_PREDICT_FALSE(number != size)) {
+      return ShapeError(number, dim, shape);
+    }
+    return OkStatus();
+  }
+
+  for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+    number += m;
+    if (TF_PREDICT_FALSE(number > size)) {
+      return ShapeError(number, dim, shape);
+    }
+    for (size_t i = 0; i < m; i++) {
+      TF_RETURN_IF_ERROR(
+          DecodeFixedLenArray<tstring>(decoder, buf, rank - 1, shape));
+    }
+  }
+  if (TF_PREDICT_FALSE(number != size)) {
+    return ShapeError(number, dim, shape);
+  }
+  return OkStatus();
+}
+
+template <typename T>
+class FeatureDecoder : public DecoderBase {
+ public:
+  explicit FeatureDecoder(const Metadata& metadata)
+      : metadata_(metadata), rank_(metadata.shape.dims()) {}
+
+  Status operator()(avro::DecoderPtr& decoder,
+                    std::vector<Tensor>& dense_tensors,
+                    sparse::ValueBuffer& buffer,
+                    std::vector<avro::GenericDatum>& skipped_data,
+                    size_t offset) {
+    auto size = metadata_.shape.num_elements();
+    auto& tensor = dense_tensors[metadata_.tensor_position];
+    T* buf = reinterpret_cast<T*>(tensor.data()) + offset * size;
+    return DecodeFixedLenArray<T>(decoder, &buf, rank_, metadata_.shape);
+  }
+
+ private:
+  const Metadata& metadata_;
+  const int rank_;
+};
+
+}  // namespace dense
+
+template <>
+inline std::unique_ptr<DecoderBase> CreateFeatureDecoder(
+    const avro::NodePtr& node, const dense::Metadata& metadata) {
+  switch (metadata.dtype) {
+    case DT_INT32: {
+      return std::move(std::make_unique<dense::FeatureDecoder<int>>(metadata));
+    }
+    case DT_INT64: {
+      return std::move(std::make_unique<dense::FeatureDecoder<long>>(metadata));
+    }
+    case DT_FLOAT: {
+      return std::move(
+          std::make_unique<dense::FeatureDecoder<float>>(metadata));
+    }
+    case DT_DOUBLE: {
+      return std::move(
+          std::make_unique<dense::FeatureDecoder<double>>(metadata));
+    }
+    case DT_STRING: {
+      return std::move(
+          std::make_unique<dense::FeatureDecoder<tstring>>(metadata));
+    }
+    case DT_BOOL: {
+      return std::move(std::make_unique<dense::FeatureDecoder<bool>>(metadata));
+    }
+    default: {
+      TypeNotSupportedAbort(metadata.dtype);
+    }
+  }
+  return nullptr;
+}
+
+template <>
+inline Status ValidateSchema(const avro::NodePtr& node,
+                             const dense::Metadata& metadata) {
+  avro::NodePtr n = node;
+  size_t avro_rank = 0;
+  // Check schema consists of non-nullable nested arrays.
+  while (n->leaves() != 0) {
+    if (n->leaves() != 1 || n->type() != avro::AVRO_ARRAY) {
+      std::ostringstream oss;
+      node->printJson(oss, 0);
+      return InvalidDenseFeatureSchema(metadata.name, oss.str());
+    }
+    n = n->leafAt(0);
+    avro_rank++;
+  }
+  avro::Type avro_type = n->type();
+  std::map<avro::Type, DataType>::const_iterator tf_type =
+      avro_to_tf_datatype.find(avro_type);
+  if (tf_type == avro_to_tf_datatype.end()) {
+    // Check schema data type is supported.
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return UnsupportedValueTypeError(metadata.name, oss.str());
+  } else if (tf_type->second != metadata.dtype) {
+    // Check schema data type and metadata type match.
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return SchemaValueTypeMismatch(metadata.name, avro_type, metadata.dtype,
+                                   oss.str());
+  }
+  // Check schema rank and metadata rank match.
+  size_t metadata_rank = static_cast<size_t>(metadata.shape.dims());
+  if (avro_rank != metadata_rank) {
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return FeatureRankMismatch(metadata.name, avro_rank, metadata_rank,
+                               oss.str());
+  }
+  return OkStatus();
+}
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DENSE_FEATURE_DECODER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/dense_feature_decoder_test.cc b/tensorflow_io/core/kernels/avro/atds/dense_feature_decoder_test.cc
new file mode 100644
index 000000000..b972674af
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/dense_feature_decoder_test.cc
@@ -0,0 +1,183 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "api/Decoder.hh"
+#include "api/Stream.hh"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/atds_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+
+namespace tensorflow {
+namespace atds {
+namespace dense {
+
+template <typename T>
+void DenseDecoderTest(const T& values, DataType dtype,
+                      std::initializer_list<int64> shape,
+                      const avro::Type avro_type = avro::AVRO_NULL) {
+  string feature_name = "feature";
+  ATDSSchemaBuilder schema_builder = ATDSSchemaBuilder();
+  schema_builder.AddDenseFeature(feature_name, dtype, shape.size(), avro_type);
+
+  string schema = schema_builder.Build();
+  avro::ValidSchema writer_schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum atds_datum(writer_schema);
+  AddDenseValue(atds_datum, feature_name, values);
+  avro::OutputStreamPtr out_stream = EncodeAvroGenericDatum(atds_datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  avro::DecoderPtr decoder = avro::binaryDecoder();
+  decoder->init(*in_stream);
+
+  std::vector<dense::Metadata> dense_features;
+  std::vector<sparse::Metadata> sparse_features;
+  std::vector<varlen::Metadata> varlen_features;
+  size_t pos = 0;
+  PartialTensorShape tensor_shape(shape);
+  dense_features.emplace_back(FeatureType::dense, feature_name, dtype,
+                              tensor_shape, pos);
+
+  ATDSDecoder atds_decoder =
+      ATDSDecoder(dense_features, sparse_features, varlen_features);
+  Status init_status = atds_decoder.Initialize(writer_schema);
+  ASSERT_TRUE(init_status.ok());
+
+  sparse::ValueBuffer buffer;
+  std::vector<avro::GenericDatum> skipped_data = atds_decoder.GetSkippedData();
+  std::vector<Tensor> dense_tensors;
+  dense_tensors.emplace_back(dtype, TensorShape(shape));
+  size_t offset = 0;
+
+  Status decode_status = atds_decoder.DecodeATDSDatum(
+      decoder, dense_tensors, buffer, skipped_data, offset);
+  ASSERT_TRUE(decode_status.ok());
+  const Tensor tensor = dense_tensors[pos];
+  AssertTensorValues(tensor, values);
+}
+
+TEST(DenseDecoderTest, DT_INT32_scalar) {
+  int value = -7;
+  DenseDecoderTest(value, DT_INT32, {});
+}
+
+TEST(DenseDecoderTest, DT_INT32_1D) {
+  std::vector<int> values = {1, 2, 3};
+  DenseDecoderTest(values, DT_INT32, {3});
+}
+
+TEST(DenseDecoderTest, DT_INT32_2D) {
+  std::vector<std::vector<int>> values = {{-1, -2, -3}, {4, 5, 6}, {-7, 8, 9}};
+  DenseDecoderTest(values, DT_INT32, {3, 3});
+}
+
+TEST(DenseDecoderTest, DT_INT64_scalar) {
+  int64_t value = 1;
+  DenseDecoderTest(value, DT_INT64, {});
+}
+
+TEST(DenseDecoderTest, DT_INT64_1D) {
+  std::vector<int64_t> values = {1};
+  DenseDecoderTest(values, DT_INT64, {1});
+}
+
+TEST(DenseDecoderTest, DT_INT64_2D) {
+  std::vector<std::vector<int64_t>> values = {{1}};
+  DenseDecoderTest(values, DT_INT64, {1, 1});
+}
+
+TEST(DenseDecoderTest, DT_FLOAT_scalar) {
+  float value = -0.6;
+  DenseDecoderTest(value, DT_FLOAT, {});
+}
+
+TEST(DenseDecoderTest, DT_FLOAT_1D) {
+  std::vector<float> values = {1.5, 0.5, 1.7, 2.6};
+  DenseDecoderTest(values, DT_FLOAT, {4});
+}
+
+TEST(DenseDecoderTest, DT_FLOAT_2D) {
+  std::vector<std::vector<float>> values = {{-0.1, -0.2, -0.3},
+                                            {-1.4, 5.4, 6.6}};
+  DenseDecoderTest(values, DT_FLOAT, {2, 3});
+}
+
+TEST(DenseDecoderTest, DT_DOUBLE_scalar) {
+  double value = -0.99;
+  DenseDecoderTest(value, DT_DOUBLE, {});
+}
+
+TEST(DenseDecoderTest, DT_DOUBLE_1D) {
+  std::vector<double> values = {1.852, 0.79};
+  DenseDecoderTest(values, DT_DOUBLE, {2});
+}
+
+TEST(DenseDecoderTest, DT_DOUBLE_2D) {
+  std::vector<std::vector<double>> values = {{-3.14, -2.07}};
+  DenseDecoderTest(values, DT_DOUBLE, {1, 2});
+}
+
+TEST(DenseDecoderTest, DT_STRING_scalar) {
+  string value = "abc";
+  DenseDecoderTest(value, DT_STRING, {});
+}
+
+TEST(DenseDecoderTest, DT_STRING_1D) {
+  std::vector<string> values = {"", "", ""};
+  DenseDecoderTest(values, DT_STRING, {3});
+}
+
+TEST(DenseDecoderTest, DT_STRING_2D) {
+  std::vector<std::vector<string>> values = {{"abc"}, {"ABC"}, {"LINKEDIN"}};
+  DenseDecoderTest(values, DT_STRING, {3, 1});
+}
+
+TEST(DenseDecoderTest, DT_BYTES_scalar) {
+  byte_array value{0xb4, 0xaf, 0x98, 0x1a};
+  DenseDecoderTest(value, DT_STRING, {}, avro::AVRO_BYTES);
+}
+
+TEST(DenseDecoderTest, DT_BYTES_1D) {
+  byte_array v1{0xb4, 0xaf, 0x98, 0x1a};
+  byte_array v2{0xb4, 0xaf, 0x98};
+  byte_array v3{0xb4, 0x98, 0x1a};
+  std::vector<byte_array> values = {v1, v2, v3};
+  DenseDecoderTest(values, DT_STRING, {3}, avro::AVRO_BYTES);
+}
+
+TEST(DenseDecoderTest, DT_BYTES_2D) {
+  byte_array v1{0xb4, 0xaf, 0x98, 0x1a};
+  byte_array v2{0xb4, 0xaf, 0x98};
+  byte_array v3{0xb4, 0x98, 0x1a};
+  std::vector<std::vector<byte_array>> values = {{v1}, {v2}, {v2}};
+  DenseDecoderTest(values, DT_STRING, {3, 1}, avro::AVRO_BYTES);
+}
+
+TEST(DenseDecoderTest, DT_BOOL_scalar) {
+  bool value = true;
+  DenseDecoderTest(value, DT_BOOL, {});
+}
+
+TEST(DenseDecoderTest, DT_BOOL_1D) {
+  std::vector<bool> values = {true, false, true};
+  DenseDecoderTest(values, DT_BOOL, {3});
+}
+
+TEST(DenseDecoderTest, DT_BOOL_2D) {
+  std::vector<std::vector<bool>> values = {{false, false}, {true, true}};
+  DenseDecoderTest(values, DT_BOOL, {2, 2});
+}
+
+}  // namespace dense
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/errors.cc b/tensorflow_io/core/kernels/avro/atds/errors.cc
new file mode 100644
index 000000000..8d8b1e118
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/errors.cc
@@ -0,0 +1,198 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/strcat.h"
+
+namespace tensorflow {
+namespace atds {
+
+namespace {
+constexpr char kSupportedDTypeMessage[] =
+    "Only DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_STRING, and DT_BOOL are "
+    "supported.";
+}  // namespace
+
+void TypeNotSupportedAbort(DataType dtype) {
+  LOG(ERROR) << "Data type " << DataTypeString(dtype) << " is not supported. "
+             << kSupportedDTypeMessage;
+  std::abort();
+}
+
+void SparseIndicesTypeNotSupportedAbort(avro::Type indices_type) {
+  LOG(ERROR) << "Sparse indices type " << avro::toString(indices_type)
+             << " is not supported. Only AVRO_INT and AVRO_LONG are supported";
+  std::abort();
+}
+
+Status TypeNotSupportedError(DataType dtype) {
+  return errors::InvalidArgument(
+      strings::StrCat("Data type ", DataTypeString(dtype), " is not supported.",
+                      kSupportedDTypeMessage));
+}
+
+Status SparseArraysNotEqualError(const std::vector<size_t>& decoded_numbers,
+                                 const std::vector<size_t>& feature_index) {
+  size_t rank = decoded_numbers.size() - 1;
+  string array_names = "[";
+  string decoded_values = "[";
+  for (size_t i = 0; i <= rank; i++) {
+    if (i > 0) {
+      strings::StrAppend(&array_names, ", ");
+      strings::StrAppend(&decoded_values, ", ");
+    }
+    strings::StrAppend(&decoded_values, decoded_numbers[i]);
+
+    size_t index = feature_index[i];
+    if (index == rank) {
+      strings::StrAppend(&array_names, "values");
+    } else {
+      strings::StrAppend(&array_names, "indices", index);
+    }
+  }
+  strings::StrAppend(&array_names, "]");
+  strings::StrAppend(&decoded_values, "]");
+
+  return errors::InvalidArgument(strings::StrCat(
+      "Numbers of decoded value in indice and values array are different. ",
+      "Numbers of decoded value in ", array_names, " arrays are ",
+      decoded_values));
+}
+
+Status ShapeError(size_t number, int dim, const PartialTensorShape& shape) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Number of decoded value ", number,
+      " does not match the expected dimension size ", shape.dim_size(dim),
+      " at the ", dim + 1, "th dimension in user defined shape ",
+      shape.DebugString()));
+}
+
+Status NullValueError() {
+  return errors::InvalidArgument("Feature value is null.");
+}
+
+Status FeatureDecodeError(const string& feature_name, const string& reason) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Failed to decode feature ", feature_name, ". Reason: ", reason));
+}
+
+Status ATDSNotRecordError(const string& type, const string& schema) {
+  return errors::InvalidArgument(
+      strings::StrCat("ATDS schema is expected to be an Avro Record but found ",
+                      type, ". Invalid schema found: ", schema));
+}
+
+Status FeatureNotFoundError(const string& feature_name, const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "User defined feature '", feature_name,
+      "' cannot be found in the input data.", " Input data schema: ", schema));
+}
+
+Status InvalidUnionTypeError(const string& feature_name, const string& schema) {
+  return errors::InvalidArgument(
+      strings::StrCat("Feature '", feature_name, "' has invalid union schema. ",
+                      "A feature can only be an union of itself or an union of "
+                      "'null' type and itself.",
+                      "Invalid union schema found: ", schema));
+}
+
+Status MissingValuesColumnError(const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Sparse schema is missing values column. Input data schema: ", schema));
+}
+
+Status NonContiguousIndicesError(const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Sparse schema indices should be contiguous (indices0, indices1, ...). ",
+      "Input data schema: ", schema));
+}
+
+Status ExtraFieldError(const string& schema) {
+  return errors::InvalidArgument(
+      strings::StrCat("Sparse schema can only contain 'indices' columns and a "
+                      "'values' column. ",
+                      "Input data schema: ", schema));
+}
+
+Status UnsupportedSparseIndicesTypeError(const string& feature_name,
+                                         const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Unsupported indices type found in feature '", feature_name, "'. ",
+      "Sparse tensor indices must be a non-nullable array of non-nullable int "
+      "or long. "
+      "Invalid schema found: ",
+      schema));
+}
+
+Status UnsupportedValueTypeError(const string& feature_name,
+                                 const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Unsupported value type found in feature '", feature_name, "'. ",
+      "Tensor value must be a non-nullable array of non-nullable int, long, "
+      "float, double, boolean, bytes, or string. "
+      "Invalid schema found: ",
+      schema));
+}
+
+Status SchemaValueTypeMismatch(const string& feature_name, avro::Type avro_type,
+                               DataType metadata_type, const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Schema value type and metadata type mismatch in feature '", feature_name,
+      "'. ", "Avro schema data type: ", avro::toString(avro_type),
+      ", metadata type: ", DataTypeString(metadata_type),
+      ". Invalid schema found: ", schema));
+}
+
+Status InvalidDenseFeatureSchema(const string& feature_name,
+                                 const string& schema) {
+  return errors::InvalidArgument(
+      strings::StrCat("Dense feature '", feature_name,
+                      "' must be non-nullable nested arrays only. ",
+                      "Invalid schema found: ", schema));
+}
+
+Status InvalidVarlenFeatureSchema(const string& feature_name,
+                                  const string& schema) {
+  return errors::InvalidArgument(
+      strings::StrCat("Varlen feature '", feature_name,
+                      "' must be non-nullable nested arrays only. ",
+                      "Invalid schema found: ", schema));
+}
+
+Status FeatureRankMismatch(const string& feature_name, size_t avro_rank,
+                           size_t metadata_rank, const string& schema) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Mismatch between avro schema rank and metadata rank in feature '",
+      feature_name, "'. ", "Avro schema rank: ", std::to_string(avro_rank),
+      ", metadata rank: ", std::to_string(metadata_rank), ". ",
+      "Invalid schema found: ", schema));
+}
+
+Status VariedSchemaNotSupportedError(const string& expected_schema,
+                                     const string& filename,
+                                     const string& varied_schema,
+                                     const string& next_filename) {
+  return errors::InvalidArgument(strings::StrCat(
+      "Avro schema should be consistent for all input files.",
+      " Schema in file ", filename, " varies from the schema in file ",
+      next_filename, "\n", expected_schema, "\n != \n", varied_schema));
+}
+
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/errors.h b/tensorflow_io/core/kernels/avro/atds/errors.h
new file mode 100644
index 000000000..3dfe8d4ee
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/errors.h
@@ -0,0 +1,80 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_ERRORS_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_ERRORS_H_
+
+#include "api/Types.hh"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/status.h"
+
+namespace tensorflow {
+namespace atds {
+
+void TypeNotSupportedAbort(DataType dtype);
+
+void SparseIndicesTypeNotSupportedAbort(avro::Type type);
+
+Status TypeNotSupportedError(DataType dtype);
+
+Status SparseArraysNotEqualError(const std::vector<size_t>& decoded_numbers,
+                                 const std::vector<size_t>& feature_index);
+
+Status ShapeError(size_t number, int dim, const PartialTensorShape& shape);
+
+Status NullValueError();
+
+Status FeatureDecodeError(const string& feature_name, const string& reason);
+
+Status ATDSNotRecordError(const string& type, const string& schema);
+
+Status FeatureNotFoundError(const string& feature_name, const string& schema);
+
+Status InvalidUnionTypeError(const string& feature_name, const string& schema);
+
+Status MissingValuesColumnError(const string& schema);
+
+Status NonContiguousIndicesError(const string& schema);
+
+Status ExtraFieldError(const string& schema);
+
+Status UnsupportedSparseIndicesTypeError(const string& feature_name,
+                                         const string& schema);
+
+Status UnsupportedValueTypeError(const string& feature_name,
+                                 const string& schema);
+
+Status SchemaValueTypeMismatch(const string& feature_name, avro::Type avro_type,
+                               DataType metadata_type, const string& schema);
+
+Status InvalidDenseFeatureSchema(const string& feature_name,
+                                 const string& schema);
+
+Status InvalidVarlenFeatureSchema(const string& feature_name,
+                                  const string& schema);
+
+Status FeatureRankMismatch(const string& feature_name, size_t avro_rank,
+                           size_t metadata_rank, const string& schema);
+
+Status VariedSchemaNotSupportedError(const string& expected_schema,
+                                     const string& filename,
+                                     const string& varied_schema,
+                                     const string& next_filename);
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_ERRORS_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/opaque_contextual_feature_decoder.h b/tensorflow_io/core/kernels/avro/atds/opaque_contextual_feature_decoder.h
new file mode 100644
index 000000000..abd1adfca
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/opaque_contextual_feature_decoder.h
@@ -0,0 +1,49 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_OPAQUE_CONTEXTUAL_FEATURE_DECODER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_OPAQUE_CONTEXTUAL_FEATURE_DECODER_H_
+
+#include "api/Decoder.hh"
+#include "api/Generic.hh"
+#include "api/Specific.hh"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_base.h"
+
+namespace tensorflow {
+namespace atds {
+namespace opaque_contextual {
+
+class FeatureDecoder : public DecoderBase {
+ public:
+  explicit FeatureDecoder(size_t datum_index) : datum_index_(datum_index) {}
+
+  Status operator()(avro::DecoderPtr& decoder,
+                    std::vector<Tensor>& dense_tensors,
+                    sparse::ValueBuffer& buffer,
+                    std::vector<avro::GenericDatum>& skipped_data,
+                    size_t offset) {
+    avro::decode(*decoder, skipped_data[datum_index_]);
+    return OkStatus();
+  }
+
+ private:
+  const size_t datum_index_;
+};
+
+}  // namespace opaque_contextual
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_OPAQUE_CONTEXTUAL_FEATURE_DECODER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/shuffle_handler.h b/tensorflow_io/core/kernels/avro/atds/shuffle_handler.h
new file mode 100644
index 000000000..1e396d38e
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/shuffle_handler.h
@@ -0,0 +1,120 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SHUFFLE_HANDLER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SHUFFLE_HANDLER_H_
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/random_distributions.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_block_reader.h"
+
+namespace tensorflow {
+namespace data {
+
+class ShuffleHandler {
+ public:
+  ShuffleHandler(mutex* mu) {
+    mu_ = mu;
+    ResetRngs();
+  }
+  void SampleBlocks(size_t batch_size, bool shuffle,
+                    std::vector<std::unique_ptr<AvroBlock>>& blocks) {
+    size_t i = 0;
+    size_t block_size = blocks.size();
+    // LOG(INFO) << "shuffle batch size " << batch_size << " shuffle block size:
+    // " << block_size;
+    if (!shuffle) {
+      size_t j = 0;
+      while (i < batch_size) {
+        auto& random_block = blocks[j];
+        random_block->num_to_decode =
+            std::min(random_block->object_count - random_block->num_decoded,
+                     static_cast<int64_t>(batch_size - i));
+        i += random_block->num_to_decode;
+        if ((random_block->num_decoded + random_block->num_to_decode) ==
+            random_block->object_count) {
+          j++;
+        }
+      }
+    } else {
+      while (i < batch_size) {
+        size_t block_id = Random() % block_size;
+        // LOG(INFO) << "shuffle block size " << block_size << " block_id: " <<
+        // block_id << " actual block size: " << blocks.size();
+        auto& random_block = blocks[block_id];
+        int64 remaining = random_block->object_count -
+                          random_block->num_decoded -
+                          random_block->num_to_decode;
+        if (remaining > 0) {
+          // Decode the whole block when it has less than 1/10 of the undecoded
+          // records. It is to quickly recycle the almost decoded blocks.
+          int64 decode_all_threshold = random_block->object_count / 10;
+          size_t decode_num = 1;
+          if (remaining <= decode_all_threshold) {
+            decode_num =
+                std::min(static_cast<size_t>(remaining), batch_size - i);
+          }
+          random_block->num_to_decode += decode_num;
+          i += decode_num;
+        }
+      }
+    }
+    // update counts so that the elements don't have huge gaps
+    for (size_t k = 0; k < block_size; k++) {
+      blocks[k]->counts = blocks[k]->num_to_decode;
+      if (k > 0) {
+        blocks[k]->counts += blocks[k - 1]->counts;
+      }
+      // LOG(INFO) << "block " << k << " object count: " <<
+      // blocks[k]->object_count << " counts: " << blocks[k]->counts
+      //            << " num_decoded: " << blocks[k]->num_decoded << "
+      //            num_to_decode: " << blocks[k]->num_to_decode;
+    }
+  }
+  // function to produce random numbers
+  random::SingleSampleAdapter<random::PhiloxRandom>::ResultType Random()
+      TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+    num_random_samples_++;
+    return generator_->operator()();
+  }
+
+  void ResetRngs() TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+    // Reset the generators based on the current iterator seeds.
+    int64 seed_ = random::New64();
+    int64 seed2_ = random::New64();
+    parent_generator_ = std::make_unique<random::PhiloxRandom>(seed_, seed2_);
+    generator_ =
+        std::make_unique<random::SingleSampleAdapter<random::PhiloxRandom>>(
+            parent_generator_.get());
+    generator_->Skip(num_random_samples_);
+    num_random_samples_ = 0;
+  }
+
+ private:
+  // this is not owned by ShuffleHandler. This is owned by the calling class
+  mutex* mu_;
+  int64 num_random_samples_ TF_GUARDED_BY(*mu_) = 0;
+  std::unique_ptr<random::PhiloxRandom> parent_generator_ TF_GUARDED_BY(*mu_);
+  std::unique_ptr<random::SingleSampleAdapter<random::PhiloxRandom>> generator_
+      TF_GUARDED_BY(*mu_);
+};
+
+}  // namespace data
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SHUFFLE_HANDLER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/shuffle_handler_test.cc b/tensorflow_io/core/kernels/avro/atds/shuffle_handler_test.cc
new file mode 100644
index 000000000..0f7c3d01c
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/shuffle_handler_test.cc
@@ -0,0 +1,108 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/shuffle_handler.h"
+
+#include "tensorflow/core/data/name_utils.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/io/inputbuffer.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/random_distributions.h"
+#include "tensorflow/core/platform/blocking_counter.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_block_reader.h"
+
+namespace tensorflow {
+namespace data {
+
+class ShuffleTest : public ::testing::Test {
+ protected:
+  ShuffleTest() { shuffle_handler_ = std::make_unique<ShuffleHandler>(&mu_); }
+
+  void SetUp() override {
+    for (size_t i = 0; i < 10; i++) {
+      int64 rand_mult = static_cast<int64>(rand() % 5 + 5);
+      blocks_.emplace_back(std::make_unique<AvroBlock>(AvroBlock{
+          rand_mult * 64,    // int64_t object_count;
+          0,                 // int64_t num_to_decode;
+          0,                 // int64_t num_decoded;
+          100000,            // int64_t byte_count;
+          0,                 // int64_t counts;
+          tstring("haha"),   // tstring content;
+          avro::NULL_CODEC,  // avro::Codec codec;
+          4888               // size_t read_offset;
+      }));
+    }
+  }
+  mutex mu_;
+  std::unique_ptr<ShuffleHandler> shuffle_handler_;
+  std::vector<std::unique_ptr<AvroBlock>> blocks_;
+};
+
+TEST_F(ShuffleTest, NoShuffleTest) {
+  size_t batch_size = 1024;
+  size_t shuffle_buffer_size = 0;
+  shuffle_handler_->SampleBlocks(batch_size, shuffle_buffer_size > 0, blocks_);
+  // assert that the sum of all num_to_decode == batch_size
+  size_t sum_of_num_to_decode = 0;
+  for (size_t i = 0; i < blocks_.size(); i++) {
+    sum_of_num_to_decode += blocks_[i]->num_to_decode;
+  }
+  EXPECT_EQ(sum_of_num_to_decode, batch_size);
+}
+
+TEST_F(ShuffleTest, ShuffleBufferTest) {
+  size_t batch_size = 1024;
+  size_t shuffle_buffer_size = 2048;
+  shuffle_handler_->SampleBlocks(batch_size, shuffle_buffer_size > 0, blocks_);
+  // assert that the sum of all num_to_decode == batch_size
+  size_t sum_of_num_to_decode = 0;
+  for (size_t i = 0; i < blocks_.size(); i++) {
+    sum_of_num_to_decode += blocks_[i]->num_to_decode;
+  }
+  EXPECT_EQ(sum_of_num_to_decode, batch_size);
+}
+
+TEST_F(ShuffleTest, UniformDistributionTest) {
+  const int64 bin_size = 10;
+  int64 bins[bin_size] = {0};  // observed frequencies
+  int64 error = 50;  // none of the 10 bins will differ from the avg (1000
+                     // datapoints) by more than this
+  int64 num = 0;
+  int64 num_samples = 1000;
+  int64 avg = num_samples / bin_size;
+  int64 k = 0;
+  int64 idx = 0;
+  while (k < num_samples) {
+    num = shuffle_handler_->Random() % num_samples;
+    idx = num /
+          avg;  // 0-99 goes to bucket 0, 100-199 goes to bucket 1 and so on.
+    bins[idx]++;
+    k++;
+  }
+  // check uniformity by ensuring that every bin is near the avg num of points
+  for (int i = 0; i < bin_size; i++) {
+    EXPECT_NEAR(bins[i], avg, error);
+  }
+}
+
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder.h b/tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder.h
new file mode 100644
index 000000000..067bf5c3f
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder.h
@@ -0,0 +1,258 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_FEATURE_DECODER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_FEATURE_DECODER_H_
+
+#include "api/Decoder.hh"
+#include "api/Node.hh"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_base.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+#include "tensorflow_io/core/kernels/avro/atds/sparse_feature_internal_decoder.h"
+
+namespace tensorflow {
+namespace atds {
+
+namespace sparse {
+
+struct Metadata {
+  Metadata(FeatureType type, const string& name, DataType dtype,
+           const PartialTensorShape& shape, size_t indices_index,
+           size_t values_index)
+      : type(type),
+        name(name),
+        dtype(dtype),
+        shape(shape),
+        indices_index(indices_index),
+        values_index(values_index) {}
+
+  FeatureType type;
+  string name;
+  DataType dtype;
+  PartialTensorShape shape;
+
+  size_t indices_index;
+  size_t values_index;
+};
+
+template <typename T>
+class FeatureDecoder : public DecoderBase {
+ public:
+  explicit FeatureDecoder(const Metadata& metadata,
+                          const std::vector<size_t>& decoding_order,
+                          const std::vector<avro::Type>& indices_type)
+      : metadata_(metadata),
+        rank_(metadata.shape.dims()),
+        decoding_order_(decoding_order),
+        long_indices_decoder_(metadata.indices_index, rank_),
+        int_indices_decoder_(metadata.indices_index, rank_),
+        values_decoder_(metadata.values_index) {
+    auto num_decoders = decoding_order.size();
+    decoders_.reserve(num_decoders);
+    for (size_t i = 0; i < num_decoders; i++) {
+      auto index = decoding_order[i];
+      if (index == rank_) {
+        decoders_.emplace_back(&values_decoder_);
+      } else if (indices_type[index] == avro::AVRO_LONG) {
+        decoders_.emplace_back(&long_indices_decoder_);
+      } else if (indices_type[index] == avro::AVRO_INT) {
+        decoders_.emplace_back(&int_indices_decoder_);
+      } else {
+        SparseIndicesTypeNotSupportedAbort(indices_type[index]);
+      }
+    }
+  }
+
+  Status operator()(avro::DecoderPtr& decoder,
+                    std::vector<Tensor>& dense_tensors,
+                    sparse::ValueBuffer& buffer,
+                    std::vector<avro::GenericDatum>& skipped_data,
+                    size_t offset) {
+    size_t num_decoders = decoders_.size();
+    std::vector<size_t> decoded_numbers(num_decoders, 0);
+    size_t indices_index = metadata_.indices_index;
+    size_t indices_start = buffer.indices[indices_index].size();
+    for (size_t i = 0; i < num_decoders; i++) {
+      decoded_numbers[i] = decoders_[i]->Decode(
+          decoder, buffer, decoding_order_[i], indices_start);
+    }
+
+    if (TF_PREDICT_FALSE(!std::all_of(
+            decoded_numbers.cbegin(), decoded_numbers.cend(),
+            [d = decoded_numbers[0]](size_t n) { return n == d; }))) {
+      return SparseArraysNotEqualError(decoded_numbers, decoding_order_);
+    }
+
+    // Rank after batching equals to the number of decoders.
+    FillBatchIndices(buffer.indices[indices_index], indices_start,
+                     static_cast<long>(offset), num_decoders);
+
+    auto& num_of_elements = buffer.num_of_elements[indices_index];
+    size_t total_num_elements = decoded_numbers[0];
+    if (!num_of_elements.empty()) {
+      total_num_elements += num_of_elements.back();
+    }
+    num_of_elements.push_back(total_num_elements);
+    return OkStatus();
+  }
+
+ private:
+  void FillBatchIndices(std::vector<long>& v, size_t indices_start,
+                        long batch_offset, size_t rank_after_batch) {
+    size_t end = v.size();
+    for (size_t i = indices_start; i < end; i += rank_after_batch) {
+      v[i] = batch_offset;
+    }
+  }
+
+  const Metadata& metadata_;
+  const size_t rank_;
+  const std::vector<size_t> decoding_order_;
+  IndicesDecoder<long> long_indices_decoder_;
+  IndicesDecoder<int> int_indices_decoder_;
+  ValuesDecoder<T> values_decoder_;
+  std::vector<InternalDecoder*> decoders_;  // not owned.
+};
+
+}  // namespace sparse
+
+template <>
+inline std::unique_ptr<DecoderBase> CreateFeatureDecoder(
+    const avro::NodePtr& node, const sparse::Metadata& metadata) {
+  size_t rank = static_cast<size_t>(metadata.shape.dims());
+  std::vector<size_t> decoding_order(rank + 1);
+  std::vector<avro::Type> indices_types(rank);
+
+  for (size_t d = 0; d < rank; d++) {
+    auto indice_key = "indices" + std::to_string(d);
+    size_t indice_pos;
+    node->nameIndex(indice_key, indice_pos);
+    decoding_order[indice_pos] = d;
+    indices_types[d] = node->leafAt(indice_pos)->leafAt(0)->type();
+  }
+
+  size_t values_pos;
+  node->nameIndex("values", values_pos);
+  decoding_order[values_pos] = rank;
+
+  switch (metadata.dtype) {
+    case DT_INT32: {
+      return std::move(std::make_unique<sparse::FeatureDecoder<int>>(
+          metadata, decoding_order, indices_types));
+    }
+    case DT_INT64: {
+      return std::move(std::make_unique<sparse::FeatureDecoder<long>>(
+          metadata, decoding_order, indices_types));
+    }
+    case DT_FLOAT: {
+      return std::move(std::make_unique<sparse::FeatureDecoder<float>>(
+          metadata, decoding_order, indices_types));
+    }
+    case DT_DOUBLE: {
+      return std::move(std::make_unique<sparse::FeatureDecoder<double>>(
+          metadata, decoding_order, indices_types));
+    }
+    case DT_STRING: {
+      return std::move(std::make_unique<sparse::FeatureDecoder<string>>(
+          metadata, decoding_order, indices_types));
+    }
+    case DT_BOOL: {
+      return std::move(std::make_unique<sparse::FeatureDecoder<bool>>(
+          metadata, decoding_order, indices_types));
+    }
+    default: {
+      TypeNotSupportedAbort(metadata.dtype);
+    }
+  }
+  return nullptr;
+}
+
+template <>
+inline Status ValidateSchema(const avro::NodePtr& node,
+                             const sparse::Metadata& metadata) {
+  size_t values_pos;
+  // Check values column exists.
+  if (!node->nameIndex("values", values_pos)) {
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return MissingValuesColumnError(oss.str());
+  }
+  // Check values column is a non-nullable array.
+  auto value_leaf = node->leafAt(values_pos);
+  avro::Type value_type = value_leaf->type();
+  if (value_type != avro::AVRO_ARRAY) {
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return UnsupportedValueTypeError(metadata.name, oss.str());
+  }
+  avro::Type value_item_type = value_leaf->leafAt(0)->type();
+  std::map<avro::Type, DataType>::const_iterator tf_type =
+      avro_to_tf_datatype.find(value_item_type);
+  if (tf_type == avro_to_tf_datatype.end()) {
+    // Check schema data type is supported.
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return UnsupportedValueTypeError(metadata.name, oss.str());
+  } else if (tf_type->second != metadata.dtype) {
+    // Check schema data type and metadata type match.
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return SchemaValueTypeMismatch(metadata.name, value_item_type,
+                                   metadata.dtype, oss.str());
+  }
+  size_t rank = static_cast<size_t>(metadata.shape.dims());
+  for (size_t i = 0; i < rank; i++) {
+    auto indice_key = "indices" + std::to_string(i);
+    size_t indice_pos;
+    // Check for contiguous "indices0", "indices1", ... "indicesN" columns
+    if (!node->nameIndex(indice_key, indice_pos)) {
+      std::ostringstream oss;
+      node->printJson(oss, 0);
+      return NonContiguousIndicesError(oss.str());
+    }
+    // Check each "indices" column is a non-nullable array.
+    auto indice_leaf = node->leafAt(indice_pos);
+    avro::Type indices_type = indice_leaf->type();
+    if (indices_type != avro::AVRO_ARRAY) {
+      std::ostringstream oss;
+      node->printJson(oss, 0);
+      return UnsupportedSparseIndicesTypeError(metadata.name, oss.str());
+    }
+    // Check each "indices" array consists of int or long.
+    avro::Type item_type = indice_leaf->leafAt(0)->type();
+    if (item_type != avro::AVRO_INT && item_type != avro::AVRO_LONG) {
+      std::ostringstream oss;
+      node->printJson(oss, 0);
+      return UnsupportedSparseIndicesTypeError(metadata.name, oss.str());
+    }
+  }
+  // Check schema rank and metadata rank match.
+  if (node->leaves() != rank + 1) {
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return ExtraFieldError(oss.str());
+  }
+  return OkStatus();
+}
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_FEATURE_DECODER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder_test.cc b/tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder_test.cc
new file mode 100644
index 000000000..f62dd1ae1
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder_test.cc
@@ -0,0 +1,258 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/sparse_feature_decoder.h"
+
+#include "api/Decoder.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+
+namespace tensorflow {
+namespace atds {
+namespace sparse {
+
+using Indices = std::vector<std::vector<long>>;
+
+template <typename T>
+void SparseDecoderTest(const Indices& indices, const std::vector<T>& values,
+                       const std::vector<size_t>& order,
+                       std::initializer_list<int64> shape, long offset,
+                       const avro::Type avro_type = avro::AVRO_NULL) {
+  DataType dtype = GetDataType<T>();
+  string feature_name = "feature";
+  ATDSSchemaBuilder schema_builder = ATDSSchemaBuilder();
+  schema_builder.AddSparseFeature(feature_name, dtype, order, avro_type);
+
+  string schema = schema_builder.Build();
+  avro::ValidSchema writer_schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum atds_datum(writer_schema);
+  AddSparseValue(atds_datum, feature_name, indices, values);
+
+  avro::OutputStreamPtr out_stream = EncodeAvroGenericDatum(atds_datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  avro::DecoderPtr decoder = avro::binaryDecoder();
+  decoder->init(*in_stream);
+
+  std::vector<dense::Metadata> dense_features;
+  std::vector<sparse::Metadata> sparse_features;
+  std::vector<varlen::Metadata> varlen_features;
+  size_t indices_index = 0, values_index = 0;
+  PartialTensorShape tensor_shape(shape);
+  sparse_features.emplace_back(FeatureType::sparse, feature_name, dtype,
+                               tensor_shape, indices_index, values_index);
+
+  ATDSDecoder atds_decoder =
+      ATDSDecoder(dense_features, sparse_features, varlen_features);
+  Status init_status = atds_decoder.Initialize(writer_schema);
+  ASSERT_TRUE(init_status.ok());
+
+  std::vector<avro::GenericDatum> skipped_data = atds_decoder.GetSkippedData();
+  std::vector<Tensor> dense_tensors;
+  ValueBuffer buffer;
+  GetValuesBuffer<T>(buffer).resize(1);
+  buffer.indices.resize(1);
+  buffer.num_of_elements.resize(1);
+  Status decode_status = atds_decoder.DecodeATDSDatum(
+      decoder, dense_tensors, buffer, skipped_data, offset);
+  ASSERT_TRUE(decode_status.ok());
+
+  auto rank = indices.size();
+  auto num_elem = values.size();
+  std::vector<long> expected_indices((rank + 1) * num_elem, offset);
+  for (size_t i = 0; i < indices.size(); i++) {
+    auto dim = i + 1;
+    for (size_t j = 0; j < indices[i].size(); j++) {
+      expected_indices[dim + j * (rank + 1)] = indices[i][j];
+    }
+  }
+  std::vector<size_t> expected_num_elements = {num_elem};
+
+  ValidateBuffer(buffer, sparse_features[0], expected_indices, values,
+                 expected_num_elements);
+}
+
+template <>
+inline void SparseDecoderTest(const Indices& indices,
+                              const std::vector<byte_array>& values,
+                              const std::vector<size_t>& order,
+                              std::initializer_list<int64> shape, long offset,
+                              const avro::Type avro_type) {
+  DataType dtype = DT_STRING;
+  string feature_name = "feature";
+  ATDSSchemaBuilder schema_builder = ATDSSchemaBuilder();
+  schema_builder.AddSparseFeature(feature_name, dtype, order, avro_type);
+
+  string schema = schema_builder.Build();
+  avro::ValidSchema writer_schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum atds_datum(writer_schema);
+  AddSparseValue(atds_datum, feature_name, indices, values);
+
+  avro::OutputStreamPtr out_stream = EncodeAvroGenericDatum(atds_datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  avro::DecoderPtr decoder = avro::binaryDecoder();
+  decoder->init(*in_stream);
+
+  std::vector<dense::Metadata> dense_features;
+  std::vector<sparse::Metadata> sparse_features;
+  std::vector<varlen::Metadata> varlen_features;
+  size_t indices_index = 0, values_index = 0;
+  PartialTensorShape tensor_shape(shape);
+  sparse_features.emplace_back(FeatureType::sparse, feature_name, dtype,
+                               tensor_shape, indices_index, values_index);
+
+  ATDSDecoder atds_decoder =
+      ATDSDecoder(dense_features, sparse_features, varlen_features);
+  Status init_status = atds_decoder.Initialize(writer_schema);
+  ASSERT_TRUE(init_status.ok());
+
+  std::vector<avro::GenericDatum> skipped_data = atds_decoder.GetSkippedData();
+  std::vector<Tensor> dense_tensors;
+  ValueBuffer buffer;
+  GetValuesBuffer<string>(buffer).resize(1);
+  buffer.indices.resize(1);
+  buffer.num_of_elements.resize(1);
+  Status decode_status = atds_decoder.DecodeATDSDatum(
+      decoder, dense_tensors, buffer, skipped_data, offset);
+  ASSERT_TRUE(decode_status.ok());
+
+  auto rank = indices.size();
+  auto num_elem = values.size();
+  std::vector<long> expected_indices((rank + 1) * num_elem, offset);
+  for (size_t i = 0; i < indices.size(); i++) {
+    auto dim = i + 1;
+    for (size_t j = 0; j < indices[i].size(); j++) {
+      expected_indices[dim + j * (rank + 1)] = indices[i][j];
+    }
+  }
+  std::vector<size_t> expected_num_elements = {num_elem};
+
+  ValidateBuffer(buffer, sparse_features[0], expected_indices, values,
+                 expected_num_elements);
+}
+
+TEST(SparseDecoderTest, DT_INT32_1D) {
+  std::vector<int> values = {1, 2, 3};
+  SparseDecoderTest({{1, 3, 5}}, values, {0, 1}, {10}, 0);
+}
+
+TEST(SparseDecoderTest, DT_INT32_2D) {
+  std::vector<int> values = {-1, 2};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {0, 1, 2}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, DT_INT64_1D) {
+  std::vector<int64_t> values = {4};
+  SparseDecoderTest({{1}}, values, {0, 1}, {100}, 0);
+}
+
+TEST(SparseDecoderTest, DT_INT64_2D) {
+  std::vector<int64_t> values = {77, 99, 131, 121};
+  SparseDecoderTest({{3, 3, 3, 3}, {2, 4, 6, 8}}, values, {0, 1, 2}, {10, 9},
+                    0);
+}
+
+TEST(SparseDecoderTest, DT_FLOAT_1D) {
+  std::vector<float> values = {0.0};
+  SparseDecoderTest({{0}}, values, {0, 1}, {10}, 0);
+}
+
+TEST(SparseDecoderTest, DT_FLOAT_2D) {
+  std::vector<float> values = {1.0, 0.0};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {0, 1, 2}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, DT_DOUBLE_1D) {
+  std::vector<double> values = {1.0, 2.0, 3.0};
+  SparseDecoderTest({{1, 3, 5}}, values, {0, 1}, {256}, 0);
+}
+
+TEST(SparseDecoderTest, DT_DOUBLE_2D) {
+  std::vector<double> values = {0.77, 0.3145};
+  SparseDecoderTest({{0, 1}, {0, 1}}, values, {0, 1, 2}, {2, 2}, 0);
+}
+
+TEST(SparseDecoderTest, DT_STRING_1D) {
+  std::vector<string> values = {"abc"};
+  SparseDecoderTest({{1}}, values, {0, 1}, {100}, 0);
+}
+
+TEST(SparseDecoderTest, DT_STRING_2D) {
+  std::vector<string> values = {"abc", "cdf", "pdf", "rdf"};
+  SparseDecoderTest({{1000, 1200, 98742, 919101}, {10101, 9291, 0, 191}},
+                    values, {0, 1, 2}, {1000000, 12000}, 0);
+}
+
+TEST(SparseDecoderTest, DT_BYTES_1D) {
+  byte_array value = {0xb4, 0xaf, 0x98, 0x1a};
+  std::vector<byte_array> values = {value};
+  SparseDecoderTest({{1}}, values, {0, 1}, {100}, 0, avro::AVRO_BYTES);
+}
+
+TEST(SparseDecoderTest, DT_BYTES_2D) {
+  byte_array v1{0xb4, 0xaf, 0x98, 0x1a};
+  byte_array v2{0xb4, 0xaf, 0x98};
+  byte_array v3{0xb4, 0x98, 0x1a};
+  byte_array v4{0xb4, 0x98};
+  std::vector<byte_array> values = {v1, v2, v3, v4};
+  SparseDecoderTest({{1000, 1200, 98742, 919101}, {10101, 9291, 0, 191}},
+                    values, {0, 1, 2}, {1000000, 12000}, 0, avro::AVRO_BYTES);
+}
+
+TEST(SparseDecoderTest, DT_BOOL_1D) {
+  std::vector<bool> values = {true, false, true};
+  SparseDecoderTest({{0, 1, 2}}, values, {0, 1}, {10}, 0);
+}
+
+TEST(SparseDecoderTest, DT_BOOL_2D) {
+  std::vector<bool> values = {false, false, true};
+  SparseDecoderTest({{3, 5, 5}, {2, 4, 8}}, values, {0, 1, 2}, {10, 10}, 0);
+}
+
+TEST(SparseDecoderTest, 2D_Order_0_2_1) {
+  std::vector<int> values = {-1, 2};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {0, 2, 1}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, 2D_Order_2_0_1) {
+  std::vector<int> values = {-1, 2};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {2, 0, 1}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, 2D_Order_2_1_0) {
+  std::vector<int> values = {-1, 2};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {2, 1, 0}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, 2D_Order_1_2_0) {
+  std::vector<int> values = {-1, 2};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {1, 2, 0}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, 2D_Order_1_0_2) {
+  std::vector<int> values = {-1, 2};
+  SparseDecoderTest({{3, 5}, {2, 4}}, values, {1, 0, 2}, {10, 5}, 0);
+}
+
+TEST(SparseDecoderTest, NonZeroOffset) {
+  std::vector<int64_t> values = {77, 99, 131, 121};
+  SparseDecoderTest({{3, 3, 3, 3}, {2, 4, 6, 8}}, values, {0, 1, 2}, {10, 9},
+                    99);
+}
+
+}  // namespace sparse
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/sparse_feature_internal_decoder.h b/tensorflow_io/core/kernels/avro/atds/sparse_feature_internal_decoder.h
new file mode 100644
index 000000000..3f717c920
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/sparse_feature_internal_decoder.h
@@ -0,0 +1,150 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_FEATURE_INTERNAL_DECODER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_FEATURE_INTERNAL_DECODER_H_
+
+#include "api/Decoder.hh"
+#include "tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_base.h"
+
+namespace tensorflow {
+namespace atds {
+namespace sparse {
+
+template <typename T>
+inline size_t DecodeVarLenValues(avro::DecoderPtr& decoder, std::vector<T>& v) {
+  size_t count = 0;
+  for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+    count += m;
+    for (size_t i = 0; i < m; i++) {
+      v.emplace_back(avro::decoder_t::Decode<T>(decoder));
+    }
+  }
+  return count;
+}
+
+// This template specification handles both byte and string.
+// It assumes that avro decodeBytes and decodeString are both reading bytes into
+// uint8 arrays see:
+// https://github.com/apache/avro/blob/branch-1.9/lang/c%2B%2B/impl/BinaryDecoder.cc#L133
+// As long as that as that assumption holds a separate bytes implementation is
+// not required.
+template <>
+inline size_t DecodeVarLenValues(avro::DecoderPtr& decoder,
+                                 std::vector<string>& v) {
+  size_t count = 0;
+  for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+    count += m;
+    for (size_t i = 0; i < m; i++) {
+      v.push_back("");
+      decoder->decodeString(v.back());
+    }
+  }
+  return count;
+}
+
+class InternalDecoder {
+ public:
+  virtual ~InternalDecoder() {}
+
+  virtual size_t Decode(avro::DecoderPtr& decoder, ValueBuffer& buffer,
+                        size_t dim, size_t indices_start) = 0;
+};
+
+template <typename T>
+class ValuesDecoder : public InternalDecoder {
+ public:
+  explicit ValuesDecoder(size_t values_index) : values_index_(values_index) {}
+
+  // Two size_t parameters are only used in IndicesDecoder.
+  size_t Decode(avro::DecoderPtr& decoder, ValueBuffer& buffer,
+                size_t not_used_1, size_t not_used_2) {
+    return DecodeVarLenValues<T>(decoder,
+                                 GetValueVector<T>(buffer, values_index_));
+  }
+
+ private:
+  const size_t values_index_;
+};
+
+template <
+    typename T,
+    typename = typename std::enable_if<
+        std::is_same<int, T>::value || std::is_same<long, T>::value, T>::type>
+class IndicesDecoder : public InternalDecoder {
+ public:
+  explicit IndicesDecoder(size_t indices_index, size_t rank)
+      : indices_index_(indices_index), rank_after_batch_(rank + 1) {}
+
+  size_t Decode(avro::DecoderPtr& decoder, ValueBuffer& buffer, size_t dim,
+                size_t indices_start) {
+    auto& v = buffer.indices[indices_index_];
+    size_t count = 0;
+    size_t start = indices_start;
+    auto dim_after_batch = dim + 1;
+    for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+      count += m;
+      size_t end = start + m * rank_after_batch_;
+      if (end > v.capacity()) {
+        v.reserve(2 * v.capacity());
+      }
+      if (end > v.size()) {
+        v.resize(end);
+      }
+      for (size_t i = start + dim_after_batch; i < end;
+           i += rank_after_batch_) {
+        v[i] = static_cast<long>(avro::decoder_t::Decode<T>(decoder));
+      }
+      start = end;
+    }
+    return count;
+  }
+
+ private:
+  const size_t indices_index_;
+  const size_t rank_after_batch_;
+};
+
+template <>
+inline size_t IndicesDecoder<long>::Decode(avro::DecoderPtr& decoder,
+                                           ValueBuffer& buffer, size_t dim,
+                                           size_t indices_start) {
+  auto& v = buffer.indices[indices_index_];
+  size_t count = 0;
+  size_t start = indices_start;
+  auto dim_after_batch = dim + 1;
+  for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+    count += m;
+    size_t end = start + m * rank_after_batch_;
+    if (end > v.capacity()) {
+      v.reserve(2 * v.capacity());
+    }
+    if (end > v.size()) {
+      v.resize(end);
+    }
+    for (size_t i = start + dim_after_batch; i < end; i += rank_after_batch_) {
+      v[i] = decoder->decodeLong();
+    }
+    start = end;
+  }
+  return count;
+}
+
+}  // namespace sparse
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_FEATURE_INTERNAL_DECODER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/sparse_value_buffer.h b/tensorflow_io/core/kernels/avro/atds/sparse_value_buffer.h
new file mode 100644
index 000000000..30440d388
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/sparse_value_buffer.h
@@ -0,0 +1,195 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_VALUE_BUFFER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_VALUE_BUFFER_H_
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+
+namespace tensorflow {
+namespace atds {
+namespace sparse {
+
+template <typename T>
+using vecvec = std::vector<std::vector<T>>;
+
+struct ValueBuffer {
+  vecvec<int> int_values;
+  vecvec<long> long_values;
+  vecvec<float> float_values;
+  vecvec<double> double_values;
+  vecvec<bool> bool_values;
+  vecvec<string> string_values;
+
+  vecvec<long> indices;
+  vecvec<size_t> num_of_elements;
+};
+
+template <typename T>
+std::vector<T>& GetValueVector(ValueBuffer& buffer, size_t index);
+
+template <>
+inline std::vector<int>& GetValueVector(ValueBuffer& buffer, size_t index) {
+  return buffer.int_values[index];
+}
+
+template <>
+inline std::vector<long>& GetValueVector(ValueBuffer& buffer, size_t index) {
+  return buffer.long_values[index];
+}
+
+template <>
+inline std::vector<float>& GetValueVector(ValueBuffer& buffer, size_t index) {
+  return buffer.float_values[index];
+}
+
+template <>
+inline std::vector<double>& GetValueVector(ValueBuffer& buffer, size_t index) {
+  return buffer.double_values[index];
+}
+
+template <>
+inline std::vector<string>& GetValueVector(ValueBuffer& buffer, size_t index) {
+  return buffer.string_values[index];
+}
+
+template <>
+inline std::vector<bool>& GetValueVector(ValueBuffer& buffer, size_t index) {
+  return buffer.bool_values[index];
+}
+
+template <typename T>
+const std::vector<T>& GetValueVector(const ValueBuffer& buffer, size_t index);
+
+template <>
+inline const std::vector<int>& GetValueVector(const ValueBuffer& buffer,
+                                              size_t index) {
+  return buffer.int_values[index];
+}
+
+template <>
+inline const std::vector<long>& GetValueVector(const ValueBuffer& buffer,
+                                               size_t index) {
+  return buffer.long_values[index];
+}
+
+template <>
+inline const std::vector<float>& GetValueVector(const ValueBuffer& buffer,
+                                                size_t index) {
+  return buffer.float_values[index];
+}
+
+template <>
+inline const std::vector<double>& GetValueVector(const ValueBuffer& buffer,
+                                                 size_t index) {
+  return buffer.double_values[index];
+}
+
+template <>
+inline const std::vector<string>& GetValueVector(const ValueBuffer& buffer,
+                                                 size_t index) {
+  return buffer.string_values[index];
+}
+
+template <>
+inline const std::vector<bool>& GetValueVector(const ValueBuffer& buffer,
+                                               size_t index) {
+  return buffer.bool_values[index];
+}
+
+inline Status FillIndicesTensor(const std::vector<long>& buffer, Tensor& tensor,
+                                size_t offset) {
+  void* dest =
+      reinterpret_cast<void*>(reinterpret_cast<long*>(tensor.data()) + offset);
+  const void* src = reinterpret_cast<const void*>(buffer.data());
+  size_t len = buffer.size() * sizeof(long);
+  std::memcpy(dest, src, len);
+  return OkStatus();
+}
+
+template <typename T>
+inline Status FillValuesTensor(const sparse::ValueBuffer& buffer,
+                               Tensor& tensor, size_t values_index,
+                               size_t offset) {
+  auto& values = GetValueVector<T>(buffer, values_index);
+  void* dest =
+      reinterpret_cast<void*>(reinterpret_cast<T*>(tensor.data()) + offset);
+  const void* src = reinterpret_cast<const void*>(values.data());
+  size_t len = values.size() * sizeof(T);
+  std::memcpy(dest, src, len);
+  return OkStatus();
+}
+
+template <>
+inline Status FillValuesTensor<string>(const sparse::ValueBuffer& buffer,
+                                       Tensor& tensor, size_t values_index,
+                                       size_t offset) {
+  auto& values = buffer.string_values[values_index];
+  for (size_t i = 0; i < values.size(); i++) {
+    tensor.flat<tstring>()(offset++) = std::move(values[i]);
+  }
+  return OkStatus();
+}
+
+template <>
+inline Status FillValuesTensor<bool>(const sparse::ValueBuffer& buffer,
+                                     Tensor& tensor, size_t values_index,
+                                     size_t offset) {
+  auto& values = buffer.bool_values[values_index];
+  for (size_t i = 0; i < values.size(); i++) {
+    tensor.flat<bool>()(offset++) = values[i];
+  }
+  return OkStatus();
+}
+
+inline Status FillValuesTensor(const sparse::ValueBuffer& buffer,
+                               Tensor& values_tensor, DataType dtype,
+                               size_t values_index, size_t offset) {
+  switch (dtype) {
+    case DT_INT32: {
+      return FillValuesTensor<int>(buffer, values_tensor, values_index, offset);
+    }
+    case DT_INT64: {
+      return FillValuesTensor<long>(buffer, values_tensor, values_index,
+                                    offset);
+    }
+    case DT_FLOAT: {
+      return FillValuesTensor<float>(buffer, values_tensor, values_index,
+                                     offset);
+    }
+    case DT_DOUBLE: {
+      return FillValuesTensor<double>(buffer, values_tensor, values_index,
+                                      offset);
+    }
+    case DT_STRING: {
+      return FillValuesTensor<string>(buffer, values_tensor, values_index,
+                                      offset);
+    }
+    case DT_BOOL: {
+      return FillValuesTensor<bool>(buffer, values_tensor, values_index,
+                                    offset);
+    }
+    default: {
+      return TypeNotSupportedError(dtype);
+    }
+  }
+}
+
+}  // namespace sparse
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_SPARSE_VALUE_BUFFER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/sparse_value_buffer_test.cc b/tensorflow_io/core/kernels/avro/atds/sparse_value_buffer_test.cc
new file mode 100644
index 000000000..e002ad32c
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/sparse_value_buffer_test.cc
@@ -0,0 +1,90 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/sparse_value_buffer.h"
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+
+namespace tensorflow {
+namespace atds {
+namespace sparse {
+
+class FillIndicesTensorTest : public ::testing::TestWithParam<size_t> {};
+
+TEST_P(FillIndicesTensorTest, Offset) {
+  std::vector<long> buffer = {1, 3, 5, 7};
+  size_t offset = GetParam();
+  int64 limit = static_cast<int64>(buffer.size() + offset);
+  Tensor tensor(DT_INT64, {limit});
+  Status status = FillIndicesTensor(buffer, tensor, offset);
+  ASSERT_TRUE(status.ok());
+  AssertTensorRangeEqual(tensor, buffer, offset);
+}
+
+INSTANTIATE_TEST_SUITE_P(offset_0_1_2, FillIndicesTensorTest,
+                         ::testing::Values(0, 1, 2));
+
+template <typename T>
+void FillValuesTensorTest(const std::vector<T>& values, size_t values_index,
+                          size_t offset) {
+  DataType dtype = GetDataType<T>();
+
+  sparse::ValueBuffer buffer;
+  auto& values_buffer = GetValuesBuffer<T>(buffer);
+  values_buffer.resize(values_index + 1);
+  values_buffer.back() = values;
+  int64 size = static_cast<int64>(offset + values.size());
+  Tensor tensor(dtype, {size});
+
+  Status status = FillValuesTensor(buffer, tensor, dtype, values_index, offset);
+  ASSERT_TRUE(status.ok());
+  AssertTensorRangeEqual(tensor, values, offset);
+}
+
+TEST(FillValuesTensorTest, DT_INT32) {
+  std::vector<int> values = {3, 2, 1, -1};
+  FillValuesTensorTest(values, 0, 0);
+}
+
+TEST(FillValuesTensorTest, DT_INT64) {
+  std::vector<long> values = {-1, -2};
+  FillValuesTensorTest(values, 1, 0);
+}
+
+TEST(FillValuesTensorTest, DT_FLOAT) {
+  std::vector<float> values = {0.0, 1.0, -1.0};
+  FillValuesTensorTest(values, 0, 2);
+}
+
+TEST(FillValuesTensorTest, DT_DOUBLE) {
+  std::vector<double> values = {3.17, 4.02, 5.13};
+  FillValuesTensorTest(values, 11, 11);
+}
+
+TEST(FillValuesTensorTest, DT_STRING) {
+  std::vector<string> values = {"ABC"};
+  FillValuesTensorTest(values, 7, 0);
+}
+
+TEST(FillValuesTensorTest, DT_BOOL) {
+  std::vector<bool> values = {false, true, true};
+  FillValuesTensorTest(values, 0, 5);
+}
+
+}  // namespace sparse
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder.h b/tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder.h
new file mode 100644
index 000000000..6ad179a55
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder.h
@@ -0,0 +1,337 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_RAGGED_FEATURE_DECODER_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_RAGGED_FEATURE_DECODER_H_
+
+#include "api/Decoder.hh"
+#include "api/Node.hh"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_decoder_template.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_base.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+#include "tensorflow_io/core/kernels/avro/atds/sparse_value_buffer.h"
+
+namespace tensorflow {
+namespace atds {
+
+namespace varlen {
+
+struct Metadata {
+  Metadata(FeatureType type, const string& name, DataType dtype,
+           const PartialTensorShape& shape, size_t indices_index,
+           size_t values_index)
+      : type(type),
+        name(name),
+        dtype(dtype),
+        shape(shape),
+        indices_index(indices_index),
+        values_index(values_index) {}
+
+  FeatureType type;
+  string name;
+  DataType dtype;
+  PartialTensorShape shape;
+
+  size_t indices_index;
+  size_t values_index;
+};
+
+inline void FillIndicesBuffer(std::vector<long>& indices_buf,
+                              std::vector<long>& current_indice) {
+  for (const auto& indice_dim : current_indice) {
+    indices_buf.emplace_back(indice_dim);
+  }
+}
+
+template <typename T>
+inline Status DecodeVarlenArray(avro::DecoderPtr& decoder,
+                                std::vector<long>& indices_buf,
+                                std::vector<T>& values_buf,
+                                std::vector<long>& current_indice, int rank,
+                                const PartialTensorShape& shape) {
+  if (rank == 0) {
+    FillIndicesBuffer(indices_buf, current_indice);
+    values_buf.emplace_back(avro::decoder_t::Decode<T>(decoder));
+    return OkStatus();
+  }
+
+  current_indice.emplace_back(0);
+  int dim = shape.dims() - rank;
+  int64 size = shape.dim_size(dim);
+  int64 number = 0;
+  if (size > 0) {
+    // slow path with dimension check.
+    if (rank == 1) {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        number += static_cast<int64>(m);
+        if (TF_PREDICT_FALSE(number > size)) {
+          return ShapeError(number, dim, shape);
+        }
+        for (size_t i = 0; i < m; i++) {
+          FillIndicesBuffer(indices_buf, current_indice);
+          values_buf.emplace_back(avro::decoder_t::Decode<T>(decoder));
+          current_indice.back()++;
+        }
+      }
+    } else {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        number += static_cast<int64>(m);
+        if (TF_PREDICT_FALSE(number > size)) {
+          return ShapeError(number, dim, shape);
+        }
+        for (size_t i = 0; i < m; i++) {
+          TF_RETURN_IF_ERROR(DecodeVarlenArray<T>(decoder, indices_buf,
+                                                  values_buf, current_indice,
+                                                  rank - 1, shape));
+          current_indice.back()++;
+        }
+      }
+    }
+    if (TF_PREDICT_FALSE(number != size)) {
+      return ShapeError(number, dim, shape);
+    }
+  } else {
+    // fast path without dimension check as the dimension can have unlimited
+    // values.
+    if (rank == 1) {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        for (size_t i = 0; i < m; i++) {
+          FillIndicesBuffer(indices_buf, current_indice);
+          values_buf.emplace_back(avro::decoder_t::Decode<T>(decoder));
+          current_indice.back()++;
+        }
+      }
+    } else {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        for (size_t i = 0; i < m; i++) {
+          TF_RETURN_IF_ERROR(DecodeVarlenArray<T>(decoder, indices_buf,
+                                                  values_buf, current_indice,
+                                                  rank - 1, shape));
+          current_indice.back()++;
+        }
+      }
+    }
+  }
+
+  current_indice.pop_back();
+  return OkStatus();
+}
+
+// This template specification handles both byte and string.
+// It assumes that avro decodeBytes and decodeString are both reading bytes into
+// uint8 arrays see:
+// https://github.com/apache/avro/blob/branch-1.9/lang/c%2B%2B/impl/BinaryDecoder.cc#L133
+// As long as that as that assumption holds a separate bytes implementation is
+// not required.
+template <>
+inline Status DecodeVarlenArray(avro::DecoderPtr& decoder,
+                                std::vector<long>& indices_buf,
+                                std::vector<string>& values_buf,
+                                std::vector<long>& current_indice, int rank,
+                                const PartialTensorShape& shape) {
+  if (rank == 0) {
+    FillIndicesBuffer(indices_buf, current_indice);
+    values_buf.push_back("");
+    decoder->decodeString(values_buf.back());
+    return OkStatus();
+  }
+
+  current_indice.emplace_back(0);
+  int dim = shape.dims() - rank;
+  int64 size = shape.dim_size(dim);
+  int64 number = 0;
+  if (size > 0) {
+    // slow path with dimension check.
+    if (rank == 1) {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        number += static_cast<int64>(m);
+        if (TF_PREDICT_FALSE(number > size)) {
+          return ShapeError(number, dim, shape);
+        }
+        for (size_t i = 0; i < m; i++) {
+          FillIndicesBuffer(indices_buf, current_indice);
+          values_buf.push_back("");
+          decoder->decodeString(values_buf.back());
+          current_indice.back()++;
+        }
+      }
+    } else {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        number += static_cast<int64>(m);
+        if (TF_PREDICT_FALSE(number > size)) {
+          return ShapeError(number, dim, shape);
+        }
+        for (size_t i = 0; i < m; i++) {
+          TF_RETURN_IF_ERROR(DecodeVarlenArray(decoder, indices_buf, values_buf,
+                                               current_indice, rank - 1,
+                                               shape));
+          current_indice.back()++;
+        }
+      }
+    }
+    if (TF_PREDICT_FALSE(number != size)) {
+      return ShapeError(number, dim, shape);
+    }
+  } else {
+    // fast path without dimension check as the dimension can have unlimited
+    // values.
+    if (rank == 1) {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        for (size_t i = 0; i < m; i++) {
+          FillIndicesBuffer(indices_buf, current_indice);
+          values_buf.push_back("");
+          decoder->decodeString(values_buf.back());
+          current_indice.back()++;
+        }
+      }
+    } else {
+      for (size_t m = decoder->arrayStart(); m != 0; m = decoder->arrayNext()) {
+        for (size_t i = 0; i < m; i++) {
+          TF_RETURN_IF_ERROR(DecodeVarlenArray(decoder, indices_buf, values_buf,
+                                               current_indice, rank - 1,
+                                               shape));
+          current_indice.back()++;
+        }
+      }
+    }
+  }
+
+  current_indice.pop_back();
+  return OkStatus();
+}
+
+template <typename T>
+class FeatureDecoder : public DecoderBase {
+ public:
+  explicit FeatureDecoder(const Metadata& metadata)
+      : metadata_(metadata), rank_(metadata.shape.dims()) {}
+
+  Status operator()(avro::DecoderPtr& decoder,
+                    std::vector<Tensor>& dense_tensors,
+                    sparse::ValueBuffer& buffer,
+                    std::vector<avro::GenericDatum>& skipped_data,
+                    size_t offset) {
+    // declaring std::vector locally to make it thread safe
+    std::vector<long> current_indices;
+    current_indices.reserve(rank_ + 1);  // additional batch dim.
+    current_indices.resize(1);
+    current_indices[0] = offset;
+    size_t indices_index = metadata_.indices_index;
+
+    auto& indices_buf = buffer.indices[indices_index];
+    auto& values_buf =
+        sparse::GetValueVector<T>(buffer, metadata_.values_index);
+    size_t values_buf_size = values_buf.size();
+    TF_RETURN_IF_ERROR(DecodeVarlenArray<T>(decoder, indices_buf, values_buf,
+                                            current_indices, rank_,
+                                            metadata_.shape));
+    size_t total_num_elements = values_buf.size() - values_buf_size;
+    auto& num_of_elements = buffer.num_of_elements[indices_index];
+    if (!num_of_elements.empty()) {
+      total_num_elements += num_of_elements.back();
+    }
+    num_of_elements.push_back(total_num_elements);
+    return OkStatus();
+  }
+
+ private:
+  const Metadata& metadata_;
+  const int rank_;
+};
+
+}  // namespace varlen
+
+template <>
+inline std::unique_ptr<DecoderBase> CreateFeatureDecoder(
+    const avro::NodePtr& node, const varlen::Metadata& metadata) {
+  switch (metadata.dtype) {
+    case DT_INT32: {
+      return std::move(std::make_unique<varlen::FeatureDecoder<int>>(metadata));
+    }
+    case DT_INT64: {
+      return std::move(
+          std::make_unique<varlen::FeatureDecoder<long>>(metadata));
+    }
+    case DT_FLOAT: {
+      return std::move(
+          std::make_unique<varlen::FeatureDecoder<float>>(metadata));
+    }
+    case DT_DOUBLE: {
+      return std::move(
+          std::make_unique<varlen::FeatureDecoder<double>>(metadata));
+    }
+    case DT_STRING: {
+      return std::move(
+          std::make_unique<varlen::FeatureDecoder<string>>(metadata));
+    }
+    case DT_BOOL: {
+      return std::move(
+          std::make_unique<varlen::FeatureDecoder<bool>>(metadata));
+    }
+    default: {
+      TypeNotSupportedAbort(metadata.dtype);
+    }
+  }
+  return nullptr;
+}
+
+template <>
+inline Status ValidateSchema(const avro::NodePtr& node,
+                             const varlen::Metadata& metadata) {
+  avro::NodePtr n = node;
+  size_t avro_rank = 0;
+  // Check schema consists of non-nullable nested arrays.
+  while (n->leaves() != 0) {
+    if (n->leaves() != 1 || n->type() != avro::AVRO_ARRAY) {
+      std::ostringstream oss;
+      n->printJson(oss, 0);
+      return InvalidVarlenFeatureSchema(metadata.name, oss.str());
+    }
+    n = n->leafAt(0);
+    avro_rank++;
+  }
+  avro::Type avro_type = n->type();
+  std::map<avro::Type, DataType>::const_iterator tf_type =
+      avro_to_tf_datatype.find(avro_type);
+  if (tf_type == avro_to_tf_datatype.end()) {
+    // Check schema data type is supported.
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return UnsupportedValueTypeError(metadata.name, oss.str());
+  } else if (tf_type->second != metadata.dtype) {
+    // Check schema data type and metadata type match.
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return SchemaValueTypeMismatch(metadata.name, avro_type, metadata.dtype,
+                                   oss.str());
+  }
+  // Check schema rank and metadata rank match.
+  size_t metadata_rank = static_cast<size_t>(metadata.shape.dims());
+  if (avro_rank != metadata_rank) {
+    std::ostringstream oss;
+    node->printJson(oss, 0);
+    return FeatureRankMismatch(metadata.name, avro_rank, metadata_rank,
+                               oss.str());
+  }
+  return OkStatus();
+}
+
+}  // namespace atds
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_RAGGED_FEATURE_DECODER_H_
diff --git a/tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder_test.cc b/tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder_test.cc
new file mode 100644
index 000000000..f1bf9464e
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder_test.cc
@@ -0,0 +1,333 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds/varlen_feature_decoder.h"
+
+#include "api/Decoder.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow_io/core/kernels/avro/atds/decoder_test_util.h"
+
+namespace tensorflow {
+namespace atds {
+namespace varlen {
+
+template <typename T, typename Type>
+void VarlenDecoderTest(const T& values, DataType dtype,
+                       std::initializer_list<int64> shape,
+                       const std::vector<long>& expected_indices,
+                       const std::vector<Type>& expected_values, long offset,
+                       const avro::Type avro_type = avro::AVRO_NULL) {
+  string feature_name = "feature";
+  ATDSSchemaBuilder schema_builder = ATDSSchemaBuilder();
+  schema_builder.AddDenseFeature(feature_name, dtype, shape.size(), avro_type);
+
+  string schema = schema_builder.Build();
+  avro::ValidSchema writer_schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum atds_datum(writer_schema);
+  AddDenseValue(atds_datum, feature_name, values);
+
+  avro::OutputStreamPtr out_stream = EncodeAvroGenericDatum(atds_datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  avro::DecoderPtr decoder = avro::binaryDecoder();
+  decoder->init(*in_stream);
+
+  std::vector<dense::Metadata> dense_features;
+  std::vector<sparse::Metadata> sparse_features;
+  std::vector<varlen::Metadata> varlen_features;
+  size_t indices_index = 0, values_index = 0;
+  PartialTensorShape tensor_shape(shape);
+  varlen_features.emplace_back(FeatureType::varlen, feature_name, dtype,
+                               tensor_shape, indices_index, values_index);
+
+  ATDSDecoder atds_decoder =
+      ATDSDecoder(dense_features, sparse_features, varlen_features);
+  Status init_status = atds_decoder.Initialize(writer_schema);
+  ASSERT_TRUE(init_status.ok());
+
+  std::vector<avro::GenericDatum> skipped_data = atds_decoder.GetSkippedData();
+  std::vector<Tensor> dense_tensors;
+  sparse::ValueBuffer buffer;
+  sparse::GetValuesBuffer<Type>(buffer).resize(1);
+  buffer.indices.resize(1);
+  buffer.num_of_elements.resize(1);
+  Status decode_status =
+      atds_decoder.DecodeATDSDatum(decoder, dense_tensors, buffer, skipped_data,
+                                   static_cast<size_t>(offset));
+  ASSERT_TRUE(decode_status.ok());
+
+  std::vector<size_t> expected_num_elements = {expected_values.size()};
+
+  ValidateBuffer(buffer, varlen_features[0], expected_indices, expected_values,
+                 expected_num_elements);
+}
+
+template <typename T>
+inline void VarlenDecoderTest(const T& values, DataType dtype,
+                              std::initializer_list<int64> shape,
+                              const std::vector<long>& expected_indices,
+                              const std::vector<byte_array>& expected_values,
+                              long offset, const avro::Type avro_type) {
+  string feature_name = "feature";
+  ATDSSchemaBuilder schema_builder = ATDSSchemaBuilder();
+  schema_builder.AddDenseFeature(feature_name, dtype, shape.size(), avro_type);
+
+  string schema = schema_builder.Build();
+  avro::ValidSchema writer_schema = schema_builder.BuildVaildSchema();
+  avro::GenericDatum atds_datum(writer_schema);
+  AddDenseValue(atds_datum, feature_name, values);
+
+  avro::OutputStreamPtr out_stream = EncodeAvroGenericDatum(atds_datum);
+  avro::InputStreamPtr in_stream = avro::memoryInputStream(*out_stream);
+  avro::DecoderPtr decoder = avro::binaryDecoder();
+  decoder->init(*in_stream);
+
+  std::vector<dense::Metadata> dense_features;
+  std::vector<sparse::Metadata> sparse_features;
+  std::vector<varlen::Metadata> varlen_features;
+  size_t indices_index = 0, values_index = 0;
+  PartialTensorShape tensor_shape(shape);
+  varlen_features.emplace_back(FeatureType::varlen, feature_name, dtype,
+                               tensor_shape, indices_index, values_index);
+
+  ATDSDecoder atds_decoder =
+      ATDSDecoder(dense_features, sparse_features, varlen_features);
+  Status init_status = atds_decoder.Initialize(writer_schema);
+  ASSERT_TRUE(init_status.ok());
+
+  std::vector<avro::GenericDatum> skipped_data = atds_decoder.GetSkippedData();
+  std::vector<Tensor> dense_tensors;
+  sparse::ValueBuffer buffer;
+  sparse::GetValuesBuffer<string>(buffer).resize(1);
+  buffer.indices.resize(1);
+  buffer.num_of_elements.resize(1);
+  Status decode_status =
+      atds_decoder.DecodeATDSDatum(decoder, dense_tensors, buffer, skipped_data,
+                                   static_cast<size_t>(offset));
+  ASSERT_TRUE(decode_status.ok());
+
+  std::vector<size_t> expected_num_elements = {expected_values.size()};
+
+  ValidateBuffer(buffer, varlen_features[0], expected_indices, expected_values,
+                 expected_num_elements);
+}
+
+TEST(VarlenDecoderTest, DT_INT32_scalar) {
+  int value = -7;
+  long offset = 1;
+  std::vector<long> expected_indices = {offset};
+  std::vector<int> expected_values = {value};
+
+  VarlenDecoderTest(value, DT_INT32, {}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_INT32_1D) {
+  std::vector<int> values = {1, 2, 3};
+  long offset = 9;
+  std::vector<long> expected_indices = {offset, 0, offset, 1, offset, 2};
+  std::vector<int> expected_values = values;
+
+  VarlenDecoderTest(values, DT_INT32, {-1}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_INT32_2D) {
+  std::vector<std::vector<int>> values = {{-1}, {4, 5, 6}, {-7, 8}};
+  long offset = 16;
+  std::vector<long> expected_indices = {offset, 0, 0, offset, 1, 0,
+                                        offset, 1, 1, offset, 1, 2,
+                                        offset, 2, 0, offset, 2, 1};
+  std::vector<int> expected_values = {-1, 4, 5, 6, -7, 8};
+
+  VarlenDecoderTest(values, DT_INT32, {3, -1}, expected_indices,
+                    expected_values, offset);
+}
+
+TEST(VarlenDecoderTest, DT_INT64_scalar) {
+  long value = 1;
+  long offset = 0;
+  std::vector<long> expected_indices = {offset};
+  std::vector<long> expected_values = {value};
+  VarlenDecoderTest(value, DT_INT64, {}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_INT64_1D) {
+  std::vector<int64_t> values = {1};
+  long offset = 3;
+  std::vector<long> expected_indices = {offset, 0};
+  std::vector<int64_t> expected_values = values;
+  VarlenDecoderTest(values, DT_INT64, {-1}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_INT64_2D) {
+  std::vector<std::vector<int64_t>> values = {{1}};
+  long offset = 3;
+  std::vector<long> expected_indices = {offset, 0, 0};
+  std::vector<int64_t> expected_values = {1};
+  VarlenDecoderTest(values, DT_INT64, {-1, -1}, expected_indices,
+                    expected_values, offset);
+}
+
+TEST(VarlenDecoderTest, DT_FLOAT_scalar) {
+  float value = -0.6;
+  long offset = 5;
+  std::vector<long> expected_indices = {offset};
+  std::vector<float> expected_values = {value};
+  VarlenDecoderTest(value, DT_FLOAT, {}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_FLOAT_1D) {
+  std::vector<float> values = {};
+  long offset = 111;
+  std::vector<long> expected_indices = {};
+  std::vector<float> expected_values = values;
+  VarlenDecoderTest(values, DT_FLOAT, {-1}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_FLOAT_2D) {
+  std::vector<std::vector<float>> values = {{-0.1, -0.2, -0.3}, {-1.4, 5.4}};
+  long offset = 111;
+  std::vector<long> expected_indices = {
+      offset, 0, 0, offset, 0, 1, offset, 0, 2, offset, 1, 0, offset, 1, 1};
+  std::vector<float> expected_values = {-0.1, -0.2, -0.3, -1.4, 5.4};
+  VarlenDecoderTest(values, DT_FLOAT, {-1, -1}, expected_indices,
+                    expected_values, offset);
+}
+
+TEST(VarlenDecoderTest, DT_DOUBLE_scalar) {
+  double value = -0.99;
+  long offset = 1;
+  std::vector<long> expected_indices = {offset};
+  std::vector<double> expected_values = {value};
+  VarlenDecoderTest(value, DT_DOUBLE, {}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_DOUBLE_1D) {
+  std::vector<double> values = {1.852, 0.79};
+  long offset = 3;
+  std::vector<long> expected_indices = {offset, 0, offset, 1};
+  std::vector<double> expected_values = values;
+  VarlenDecoderTest(values, DT_DOUBLE, {-1}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_DOUBLE_2D) {
+  std::vector<std::vector<double>> values = {};
+  long offset = 5;
+  std::vector<long> expected_indices = {};
+  std::vector<double> expected_values = {};
+  VarlenDecoderTest(values, DT_DOUBLE, {-1, 2}, expected_indices,
+                    expected_values, offset);
+}
+
+TEST(VarlenDecoderTest, DT_STRING_scalar) {
+  string value = "abc";
+  long offset = 7;
+  std::vector<long> expected_indices = {offset};
+  std::vector<string> expected_values = {"abc"};
+  VarlenDecoderTest(value, DT_STRING, {}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_BYTES_scalar) {
+  byte_array value{0xb4, 0x98, 0x1a};
+  long offset = 7;
+  std::vector<long> expected_indices = {offset};
+  std::vector<byte_array> expected_values = {value};
+  VarlenDecoderTest(value, DT_STRING, {}, expected_indices, expected_values,
+                    offset, avro::AVRO_BYTES);
+}
+
+TEST(VarlenDecoderTest, DT_STRING_1D) {
+  std::vector<string> values = {"", "", ""};
+  long offset = 0;
+  std::vector<long> expected_indices = {offset, 0, offset, 1, offset, 2};
+  std::vector<string> expected_values = values;
+  VarlenDecoderTest(values, DT_STRING, {-1}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_BYTES_1D) {
+  byte_array v1{0xb4, 0xaf, 0x98, 0x1a};
+  byte_array v2{0xb4, 0xaf, 0x98};
+  byte_array v3{0xb4, 0x98, 0x1a};
+  std::vector<byte_array> values = {v1, v2, v3};
+  long offset = 0;
+  std::vector<long> expected_indices = {offset, 0, offset, 1, offset, 2};
+  std::vector<byte_array> expected_values = values;
+  VarlenDecoderTest(values, DT_STRING, {-1}, expected_indices, expected_values,
+                    offset, avro::AVRO_BYTES);
+}
+
+TEST(VarlenDecoderTest, DT_STRING_2D) {
+  std::vector<std::vector<string>> values = {{"abc"}, {"ABC"}, {"LINKEDIN"}};
+  long offset = 0;
+  std::vector<long> expected_indices = {offset, 0,      0, offset, 1,
+                                        0,      offset, 2, 0};
+  std::vector<string> expected_values = {"abc", "ABC", "LINKEDIN"};
+  VarlenDecoderTest(values, DT_STRING, {-1, 1}, expected_indices,
+                    expected_values, offset);
+}
+
+TEST(VarlenDecoderTest, DT_BYTES_2D) {
+  byte_array v1{0xb4, 0xaf, 0x98, 0x1a};
+  byte_array v2{0xb4, 0xaf, 0x98};
+  byte_array v3{0xb4, 0x98, 0x1a};
+  std::vector<std::vector<byte_array>> values = {{v1}, {v2}, {v3}};
+  long offset = 0;
+  std::vector<long> expected_indices = {offset, 0,      0, offset, 1,
+                                        0,      offset, 2, 0};
+  std::vector<byte_array> expected_values = {v1, v2, v3};
+  VarlenDecoderTest(values, DT_STRING, {-1, 1}, expected_indices,
+                    expected_values, offset, avro::AVRO_BYTES);
+}
+
+TEST(VarlenDecoderTest, DT_BOOL_scalar) {
+  bool value = true;
+  long offset = 0;
+  std::vector<long> expected_indices = {offset};
+  std::vector<bool> expected_values = {value};
+  VarlenDecoderTest(value, DT_BOOL, {}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_BOOL_1D) {
+  std::vector<bool> values = {true, false, true};
+  long offset = 3;
+  std::vector<long> expected_indices = {offset, 0, offset, 1, offset, 2};
+  std::vector<bool> expected_values = values;
+  VarlenDecoderTest(values, DT_BOOL, {-1}, expected_indices, expected_values,
+                    offset);
+}
+
+TEST(VarlenDecoderTest, DT_BOOL_2D) {
+  std::vector<std::vector<bool>> values = {{}, {true, true}};
+  long offset = 4;
+  std::vector<long> expected_indices = {offset, 1, 0, offset, 1, 1};
+  std::vector<bool> expected_values = {true, true};
+  VarlenDecoderTest(values, DT_BOOL, {2, -1}, expected_indices, expected_values,
+                    offset);
+}
+
+}  // namespace varlen
+}  // namespace atds
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds_dataset_kernels.cc b/tensorflow_io/core/kernels/avro/atds_dataset_kernels.cc
new file mode 100644
index 000000000..906d7bf9b
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds_dataset_kernels.cc
@@ -0,0 +1,1210 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow_io/core/kernels/avro/atds_dataset_kernels.h"
+
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+#include <cstring>
+#include <vector>
+
+#include "api/Compiler.hh"
+#include "api/DataFile.hh"
+#include "api/Decoder.hh"
+#include "api/Specific.hh"
+#include "api/Stream.hh"
+#include "api/ValidSchema.hh"
+#include "tensorflow/core/data/name_utils.h"
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_requires.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/io/inputbuffer.h"
+#include "tensorflow/core/platform/blocking_counter.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/strcat.h"
+#include "tensorflow/core/profiler/lib/traceme.h"
+#include "tensorflow_io/core/kernels/avro/atds/atds_decoder.h"
+#include "tensorflow_io/core/kernels/avro/atds/avro_block_reader.h"
+#include "tensorflow_io/core/kernels/avro/atds/decompression_handler.h"
+#include "tensorflow_io/core/kernels/avro/atds/errors.h"
+#include "tensorflow_io/core/kernels/avro/atds/shuffle_handler.h"
+
+namespace tensorflow {
+namespace data {
+
+void ParallelFor(const std::function<void(size_t)>& f, size_t n,
+                 thread::ThreadPool* thread_pool) {
+  if (n == 0) return;
+  if (thread_pool == nullptr) {
+    for (size_t i = 0; i < n; ++i) {
+      f(i);
+    }
+  } else {
+    BlockingCounter counter(n - 1);
+    for (size_t i = 1; i < n; ++i) {
+      thread_pool->Schedule([i, &f, &counter] {
+        f(i);
+        counter.DecrementCount();
+      });
+    }
+    f(0);
+    counter.Wait();
+  }
+}
+
+/* static */ constexpr const char* const ATDSDatasetOp::kDatasetType;
+/* static */ constexpr const char* const ATDSDatasetOp::kFileNames;
+/* static */ constexpr const char* const ATDSDatasetOp::kBatchSize;
+/* static */ constexpr const char* const ATDSDatasetOp::kDropRemainder;
+/* static */ constexpr const char* const ATDSDatasetOp::kReaderBufferSize;
+/* static */ constexpr const char* const ATDSDatasetOp::kShuffleBufferSize;
+/* static */ constexpr const char* const ATDSDatasetOp::kNumParallelCalls;
+/* static */ constexpr const char* const ATDSDatasetOp::kFeatureKeys;
+/* static */ constexpr const char* const ATDSDatasetOp::kFeatureTypes;
+/* static */ constexpr const char* const ATDSDatasetOp::kSparseDtypes;
+/* static */ constexpr const char* const ATDSDatasetOp::kSparseShapes;
+/* static */ constexpr const char* const ATDSDatasetOp::kOutputDtypes;
+/* static */ constexpr const char* const ATDSDatasetOp::kOutputShapes;
+/* static */ constexpr const char* const ATDSDatasetOp::kDenseType;
+/* static */ constexpr const char* const ATDSDatasetOp::kSparseType;
+/* static */ constexpr const char* const ATDSDatasetOp::kVarlenType;
+
+class ATDSDatasetOp::Dataset : public DatasetBase {
+ public:
+  explicit Dataset(OpKernelContext* ctx, std::vector<tstring> filenames,
+                   size_t batch_size, bool drop_remainder,
+                   int64 reader_buffer_size, int64 shuffle_buffer_size,
+                   int64 num_parallel_calls,
+                   const std::vector<string>& feature_keys,
+                   const std::vector<string>& feature_types,
+                   const std::vector<DataType>& sparse_dtypes,
+                   const std::vector<PartialTensorShape>& sparse_shapes,
+                   const std::vector<DataType>& output_dtypes,
+                   const std::vector<PartialTensorShape>& output_shapes)
+      : DatasetBase(DatasetContext(ctx)),
+        filenames_(std::move(filenames)),
+        batch_size_(batch_size),
+        reader_buffer_size_(reader_buffer_size),
+        shuffle_buffer_size_(shuffle_buffer_size),
+        num_parallel_calls_(num_parallel_calls),
+        drop_remainder_(drop_remainder),
+        feature_keys_(feature_keys),
+        feature_types_(feature_types),
+        sparse_dtypes_(sparse_dtypes),
+        sparse_shapes_(sparse_shapes),
+        output_dtypes_(output_dtypes),
+        output_shapes_(output_shapes) {
+    size_t num_of_features = feature_keys_.size();
+    output_tensor_types_.reserve(num_of_features);
+    sparse_value_index_.reserve(sparse_dtypes.size());
+    for (size_t i = 0; i < num_of_features; i++) {
+      if (feature_types[i] == kDenseType) {
+        output_tensor_types_.emplace_back(TensorType::dense);
+        auto dim_v = output_shapes[i].dim_sizes();
+        size_t rank = dim_v.size();
+
+        TensorShapeProto proto;
+        PartialTensorShape shape;
+        for (size_t d = 1; d < rank; d++) {
+          proto.add_dim()->set_size(dim_v[d]);
+        }
+        if (!PartialTensorShape::BuildPartialTensorShape(proto, &shape).ok()) {
+          LOG(ERROR) << "Error encountered in creating PartialTensorShape for "
+                        "dense features.";
+        }
+        dense_features_.emplace_back(atds::FeatureType::dense, feature_keys_[i],
+                                     output_dtypes[i], shape, num_of_dense_);
+        num_of_dense_++;
+      } else if (feature_types[i] == kSparseType ||
+                 feature_types[i] == kVarlenType) {
+        output_tensor_types_.emplace_back(TensorType::sparse);
+
+        auto& shape = sparse_shapes[num_of_sparse_];
+        // The estimated number of elements in this sparse tensor.
+        // The estimated number is used to preallocate sparse value buffer.
+        size_t estimated_elements = 1;
+        if (feature_types[i] == kVarlenType) {
+          for (auto dim : shape) {
+            // Assume unknown dim will only have 1 element. For example,
+            // varlen tensor with shape [-1, 2, -1] is expected to have 2
+            // elements in total.
+            if (dim.size > 0) {
+              estimated_elements *= dim.size;
+            }
+          }
+        }
+        size_t rank_after_batch = static_cast<size_t>(shape.dims() + 1);
+        sparse_expected_elements_.indices.push_back(rank_after_batch *
+                                                    estimated_elements);
+
+        size_t values_index = 0;
+        auto dtype = sparse_dtypes[num_of_sparse_];
+        if (dtype == DT_INT32) {
+          values_index = sparse_dtype_counts_.int_counts++;
+          sparse_expected_elements_.int_values.push_back(estimated_elements);
+        } else if (dtype == DT_INT64) {
+          values_index = sparse_dtype_counts_.long_counts++;
+          sparse_expected_elements_.long_values.push_back(estimated_elements);
+        } else if (dtype == DT_FLOAT) {
+          values_index = sparse_dtype_counts_.float_counts++;
+          sparse_expected_elements_.float_values.push_back(estimated_elements);
+        } else if (dtype == DT_DOUBLE) {
+          values_index = sparse_dtype_counts_.double_counts++;
+          sparse_expected_elements_.double_values.push_back(estimated_elements);
+        } else if (dtype == DT_STRING) {
+          values_index = sparse_dtype_counts_.string_counts++;
+          sparse_expected_elements_.string_values.push_back(estimated_elements);
+        } else if (dtype == DT_BOOL) {
+          values_index = sparse_dtype_counts_.bool_counts++;
+          sparse_expected_elements_.bool_values.push_back(estimated_elements);
+        }
+        sparse_value_index_.emplace_back(values_index);
+
+        if (feature_types[i] == kSparseType) {
+          sparse_features_.emplace_back(
+              atds::FeatureType::sparse, feature_keys_[i],
+              sparse_dtypes[num_of_sparse_], sparse_shapes[num_of_sparse_],
+              num_of_sparse_, values_index);
+        } else if (feature_types[i] == kVarlenType) {
+          varlen_features_.emplace_back(
+              atds::FeatureType::varlen, feature_keys_[i],
+              sparse_dtypes[num_of_sparse_], sparse_shapes[num_of_sparse_],
+              num_of_sparse_, values_index);
+        }
+        num_of_sparse_++;
+      } else {
+        LOG(ERROR) << "Unknown feature type " << feature_types[i];
+      }
+    }
+
+    for (auto& dtype : output_dtypes) {
+      output_dtype_vector_.push_back(dtype);
+    }
+  }
+
+  std::unique_ptr<IteratorBase> MakeIteratorInternal(
+      const string& prefix) const override {
+    return absl::make_unique<Iterator>(Iterator::Params{
+        this, name_utils::IteratorPrefix(kDatasetType, prefix)});
+  }
+
+  const DataTypeVector& output_dtypes() const override {
+    return output_dtype_vector_;
+  }
+
+  const std::vector<PartialTensorShape>& output_shapes() const override {
+    return output_shapes_;
+  }
+
+  string DebugString() const override {
+    return name_utils::DatasetDebugString(kDatasetType);
+  }
+
+  Status InputDatasets(std::vector<const DatasetBase*>* inputs) const override {
+    return OkStatus();
+  }
+
+  Status CheckExternalState() const override { return OkStatus(); }
+
+ protected:
+  Status AsGraphDefInternal(SerializationContext* ctx,
+                            DatasetGraphDefBuilder* b,
+                            Node** output) const override {
+    Node* filenames = nullptr;
+    TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames));
+    Node* batch_size = nullptr;
+    TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size));
+    Node* drop_remainder = nullptr;
+    TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder));
+    Node* reader_buffer_size = nullptr;
+    TF_RETURN_IF_ERROR(b->AddScalar(reader_buffer_size_, &reader_buffer_size));
+    Node* shuffle_buffer_size = nullptr;
+    TF_RETURN_IF_ERROR(
+        b->AddScalar(shuffle_buffer_size_, &shuffle_buffer_size));
+    Node* num_parallel_calls = nullptr;
+    TF_RETURN_IF_ERROR(b->AddScalar(num_parallel_calls_, &num_parallel_calls));
+
+    AttrValue feature_keys;
+    b->BuildAttrValue(feature_keys_, &feature_keys);
+    AttrValue feature_types;
+    b->BuildAttrValue(feature_types_, &feature_types);
+    AttrValue sparse_dtypes;
+    b->BuildAttrValue(sparse_dtypes_, &sparse_dtypes);
+    AttrValue sparse_shapes;
+    b->BuildAttrValue(sparse_shapes_, &sparse_shapes);
+    AttrValue output_dtypes;
+    b->BuildAttrValue(output_dtypes_, &output_dtypes);
+    AttrValue output_shapes;
+    b->BuildAttrValue(output_shapes_, &output_shapes);
+
+    TF_RETURN_IF_ERROR(b->AddDataset(
+        this,
+        {filenames, batch_size, drop_remainder, reader_buffer_size,
+         shuffle_buffer_size, num_parallel_calls},
+        {{kFeatureKeys, feature_keys},
+         {kFeatureTypes, feature_types},
+         {kSparseDtypes, sparse_dtypes},
+         {kSparseShapes, sparse_shapes},
+         {kOutputDtypes, output_dtypes},
+         {kOutputShapes, output_shapes}},
+        output));
+    return OkStatus();
+  }
+
+ private:
+  enum class TensorType { dense, sparse };
+
+  /**
+   * Utility struct to collect the number of sparse tensors for each DType.
+   */
+  struct SparseDtypeCounts {
+    size_t int_counts = 0;
+    size_t long_counts = 0;
+    size_t float_counts = 0;
+    size_t double_counts = 0;
+    size_t string_counts = 0;
+    size_t bool_counts = 0;
+  };
+
+  /**
+   * Utility struct to store the estimated number of elements for each sparse
+   * tensor. The estimated number in values tensor and indices tensor are
+   * ordered based on the layout in atds::sparse::ValueBuffer.
+   * The information is used for better buffer pre-allocation.
+   */
+  struct SparseExpectedElements {
+    std::vector<size_t> int_values;
+    std::vector<size_t> long_values;
+    std::vector<size_t> float_values;
+    std::vector<size_t> double_values;
+    std::vector<size_t> string_values;
+    std::vector<size_t> bool_values;
+    std::vector<size_t> indices;
+  };
+
+  class Iterator : public DatasetIterator<Dataset> {
+   public:
+    static constexpr const char* const kWaitingForData = "WaitingForData";
+    static constexpr const char* const kBlockReading = "BlockReading";
+    static constexpr const char* const kParsingThread = "ParsingThread_";
+    static constexpr const char* const kDeflateDecompression =
+        "DeflateDecompression";
+    static constexpr const char* const kSnappyDecompression =
+        "SnappyDecompression";
+    static constexpr const char* const kFillingSparseValues =
+        "FillingSparseValues";
+
+    explicit Iterator(const Params& params)
+        : DatasetIterator<Dataset>(params),
+          shuffle_handler_(nullptr),
+          cond_var_(std::make_shared<condition_variable>()),
+          write_var_(std::make_shared<condition_variable>()),
+          mu_(std::make_shared<mutex>()),
+          count_(0) {
+      batch_size_ = static_cast<size_t>(dataset()->batch_size_);
+      shuffle_buffer_size_ =
+          static_cast<size_t>(dataset()->shuffle_buffer_size_);
+      shuffle_handler_ = std::make_unique<ShuffleHandler>(mu_.get());
+      decompression_handler_ = std::make_unique<DecompressionHandler>();
+      auto& sparse_dtype_counts = dataset()->sparse_dtype_counts_;
+      value_buffer_.int_values.resize(sparse_dtype_counts.int_counts);
+      value_buffer_.long_values.resize(sparse_dtype_counts.long_counts);
+      value_buffer_.float_values.resize(sparse_dtype_counts.float_counts);
+      value_buffer_.double_values.resize(sparse_dtype_counts.double_counts);
+      value_buffer_.string_values.resize(sparse_dtype_counts.string_counts);
+      value_buffer_.bool_values.resize(sparse_dtype_counts.bool_counts);
+      value_buffer_.num_of_elements.resize(dataset()->num_of_sparse_);
+      value_buffer_.indices.resize(dataset()->num_of_sparse_);
+    }
+
+    ~Iterator() override {
+      // must ensure that the thread is cancelled.
+      CancelThreads();
+      // LOG(INFO) << "Decompression time per record (us): "  <<
+      // (static_cast<double>(GetTotalStats(total_decompress_micros_)) /
+      // GetTotalStats(num_decompressed_objects_)); LOG(INFO) << "Decode time
+      // per record (us): " <<
+      // (static_cast<double>(GetTotalStats(total_decode_micros_)) /
+      // GetTotalStats(total_records_parsed_));
+    }
+
+    void CancelThreads() TF_LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(*mu_);
+      mutex_lock i(input_mu_);
+      cancelled_ = true;
+      cond_var_->notify_all();
+      write_var_->notify_all();
+      // wait for thread to finish
+      if (prefetch_thread_) {
+        while (!prefetch_thread_finished_) {
+          write_var_->wait(i);
+        }
+      }
+    }
+
+    Status Initialize(IteratorContext* ctx) {
+      int64 num_threads = dataset()->num_parallel_calls_;
+      const int64 max_parallelism = port::MaxParallelism();
+      if (num_threads <= 0 || num_threads > max_parallelism) {
+        if (num_threads == tensorflow::data::model::kAutotune) {
+          LOG(INFO) << "Thread autotuning enabled for "
+                       "ATDSDatasetOp::Dataset::Iterator.";
+        }
+        LOG(INFO) << "Create ATDSDatasetOp::Dataset::Iterator thread pool with "
+                  << "the maximum parallelism number " << max_parallelism
+                  << " for this process.";
+        num_threads = max_parallelism;
+      }
+      thread_delays.resize(max_parallelism, 0);
+      thread_itrs.resize(max_parallelism, 0);
+      thread_pool_ =
+          ctx->CreateThreadPool(std::string(kDatasetType), num_threads);
+      return OkStatus();
+    }
+
+    Status GetNextInternal(IteratorContext* ctx,
+                           std::vector<Tensor>* out_tensors,
+                           bool* end_of_sequence) override {
+      mutex_lock l(*mu_);
+      EnsurePrefetchThreadStarted(ctx);
+      size_t total_buffer = total_buffer_size();
+      while (true) {
+        // LOG(INFO) << "b " << blocks_.size() << " c_: " << count_;
+        // while count_ is smaller than batch_size, wait on cond_var_ if not
+        // last file this will get woken up by the prefetch thread
+        size_t count = 0;
+        bool prefetch_thread_finished = false;
+        {
+          tensorflow::profiler::TraceMe trace(kWaitingForData);
+
+          mutex_lock i(input_mu_);
+          while (!cancelled_ && !prefetch_thread_finished_ &&
+                 count_ < total_buffer) {
+            // LOG(INFO) << "waiting on block refill " << blocks_.size() << "
+            // count: " << count_;
+            write_var_->notify_all();
+            cond_var_->wait(i);
+          }
+          // LOG(INFO) << "done waiting on block refill " << blocks_.size() << "
+          // count: " << count_;
+          if (cancelled_) {
+            return OkStatus();
+          }
+
+          count_ = 0;
+          // merge write_blocks_ into blocks_
+          blocks_.reserve(blocks_.size() + write_blocks_.size());
+          blocks_.insert(blocks_.end(),
+                         std::make_move_iterator(write_blocks_.begin()),
+                         std::make_move_iterator(write_blocks_.end()));
+          write_blocks_.clear();  // size down the write_blocks
+
+          size_t non_empty_idx = 0;
+          for (size_t i = 0; i < blocks_.size(); i++) {
+            count_ += blocks_[i]->object_count - blocks_[i]->num_decoded;
+            if (blocks_[i]->num_decoded < blocks_[i]->object_count) {
+              std::swap(blocks_[non_empty_idx], blocks_[i]);
+              non_empty_idx++;
+            }
+          }
+          blocks_.resize(non_empty_idx);
+
+          count = count_;
+          prefetch_thread_finished = prefetch_thread_finished_;
+
+          // let it continue to read batch_size_ or count_ records.
+          count_ -= std::min(count_, batch_size_);
+          write_var_->notify_all();
+
+          if (prefetch_thread_finished_) {
+            // Finished epoch, reset shuffle for new epoch
+            shuffle_handler_->ResetRngs();
+          }
+        }
+
+        bool drop_remainder = dataset()->drop_remainder_;
+        if (count >= batch_size_ ||
+            (!drop_remainder && prefetch_thread_finished && count > 0)) {
+          // LOG(INFO) << "Process "  <<  blocks_.size() << " blocks with " <<
+          // count << " objects. " << non_empty_idx << " batch: " <<
+          // batch_size_;
+          size_t batch_size = std::min(count, batch_size_);
+          PartialTensorShape batch_dim({static_cast<int64>(batch_size)});
+          auto num_of_dense = dataset()->num_of_dense_;
+          auto num_of_sparse = dataset()->num_of_sparse_;
+          auto& dense_features = dataset()->dense_features_;
+          std::vector<Tensor> dense_tensors;
+          for (size_t i = 0; i < num_of_dense; i++) {
+            auto& dense_feature = dense_features[i];
+            TensorShape shape;
+            batch_dim.Concatenate(dense_feature.shape).AsTensorShape(&shape);
+            dense_tensors.emplace_back(ctx->allocator({}), dense_feature.dtype,
+                                       shape);
+          }
+
+          size_t thread_pool_size =
+              static_cast<size_t>(thread_pool_->NumThreads());
+          size_t num_blocks = blocks_.size();
+          size_t num_threads = std::min(num_blocks, thread_pool_size);
+          num_threads = std::min(num_threads,
+                                 static_cast<size_t>(port::MaxParallelism()));
+
+          int64 user_defined_thread_num = dataset()->num_parallel_calls_;
+          if (user_defined_thread_num > 0) {
+            num_threads = std::min(
+                num_threads, static_cast<size_t>(user_defined_thread_num));
+          } else if (user_defined_thread_num ==
+                     tensorflow::data::model::kAutotune) {
+            num_threads = ComputeNumAutotuneThreads(num_threads);
+          }
+          total_records_parsed_.resize(num_threads, 0);
+          total_decode_micros_.resize(num_threads, 0);
+          num_decompressed_objects_.resize(num_threads, 0);
+          total_decompress_micros_.resize(num_threads, 0);
+          shuffle_handler_->SampleBlocks(batch_size, shuffle_buffer_size_ > 0,
+                                         blocks_);
+          std::vector<atds::sparse::ValueBuffer> sparse_buffer(num_threads,
+                                                               value_buffer_);
+
+          std::vector<Status> status_of_threads(num_threads);
+          auto process_block = [&](size_t i, size_t thread_idx,
+                                   avro::DecoderPtr& decoder,
+                                   atds::sparse::ValueBuffer& buffer,
+                                   std::vector<avro::GenericDatum>& skipped) {
+            // start is the offset in the each example, and therefore just need
+            // to be different from every other block.
+            size_t start = 0;
+            if (i > 0) {
+              start += blocks_[i - 1]->counts;
+            }
+            size_t end = blocks_[i]->counts;
+            // LOG(INFO) << "Block: " << i << " start: " << start << " end: " <<
+            // end << " read_so_far " << blocks_[i]->num_decoded
+            //   << " num_to_decode: " << blocks_[i]->num_to_decode << "
+            //   remaining: " << (blocks_[i]->object_count -
+            //   blocks_[i]->num_decoded);
+            avro::Codec codec = blocks_[i]->codec;
+            avro::InputStreamPtr input_stream = nullptr;
+            uint64 decompress_start_time = ctx->env()->NowMicros();
+            if (codec == avro::NULL_CODEC) {
+              input_stream =
+                  decompression_handler_->decompressNullCodec(*(blocks_[i]));
+            } else if (codec == avro::DEFLATE_CODEC) {
+              tensorflow::profiler::TraceMe traceme(kDeflateDecompression);
+              input_stream =
+                  decompression_handler_->decompressDeflateCodec(*(blocks_[i]));
+            }
+#ifdef SNAPPY_CODEC_AVAILABLE
+            else if (codec == avro::SNAPPY_CODEC) {
+              tensorflow::profiler::TraceMe traceme(kSnappyDecompression);
+              input_stream =
+                  decompression_handler_->decompressSnappyCodec(*(blocks_[i]));
+            }
+#endif
+            else {
+              throw avro::Exception(
+                  "Unsupported Avro codec. Only null or deflate is supported. "
+                  "Got " +
+                  codec);
+            }
+            uint64 decompress_end_time = ctx->env()->NowMicros();
+            if (codec != avro::NULL_CODEC) {
+              total_decompress_micros_[thread_idx] +=
+                  (decompress_end_time - decompress_start_time);
+              num_decompressed_objects_[thread_idx] += blocks_[i]->object_count;
+              // LOG(INFO) << "Block " << i << " decompress time (us): " <<
+              // (decompress_end_time - decompress_start_time)
+              //     << ", num records: " << blocks_[i]->object_count;
+            }
+            decoder->init(*input_stream);
+
+            while (start < end) {
+              // LOG(INFO) << "Block: " << i << " start: " << start;
+              uint64 datum_parse_start = ctx->env()->NowMicros();
+              auto decoding_status = atds_decoder_->DecodeATDSDatum(
+                  decoder, dense_tensors, buffer, skipped, start);
+              if (!decoding_status.ok()) {
+                // The decoding of this block has failed,
+                // setting the number of decoded objects to the total number of
+                // objects in the block so the decoder will skip decoding this
+                // block.
+                blocks_[i]->num_decoded = blocks_[i]->object_count;
+                return decoding_status;
+              }
+              uint64 datum_parse_end = ctx->env()->NowMicros();
+              total_decode_micros_[thread_idx] +=
+                  (datum_parse_end - datum_parse_start);
+              total_records_parsed_[thread_idx] += 1;
+              start++;
+              blocks_[i]->num_decoded++;
+              blocks_[i]->num_to_decode--;
+            }
+
+            if (blocks_[i]->object_count > blocks_[i]->num_decoded) {
+              decoder->init(*input_stream);
+              blocks_[i]->read_offset += input_stream->byteCount();
+              // LOG(INFO) << "Block: " << i << " Reset offset to " <<
+              // blocks_[i]->read_offset << ". " << (end - start)
+              //           << " datum left for block " << i;
+            }
+            // LOG(INFO) << "process block " << i << " . Read: " <<
+            // blocks_[i]->num_decoded;
+            return OkStatus();
+          };
+
+          std::vector<size_t> block_nums;
+          GetBlockRanges(num_threads, block_nums);
+          std::vector<uint64> thread_start_times;
+          thread_start_times.resize(num_threads, 0);
+          auto process = [&](size_t index) {
+            auto parsing_thread_name = [index]() {
+              return strings::StrCat(kParsingThread, index);
+            };
+            tensorflow::profiler::TraceMe trace(parsing_thread_name);
+
+            thread_start_times[index] = ctx->env()->NowMicros();
+            size_t block_start = 0;
+            if (index > 0) {
+              block_start = block_nums[index - 1];
+            }
+            size_t block_end = block_nums[index];
+            auto decoder = avro::binaryDecoder();
+            auto skipped = atds_decoder_->GetSkippedData();
+            auto& buffer = sparse_buffer[index];
+            size_t count_start = 0;
+            if (block_start > 0) {
+              count_start = blocks_[block_start - 1]->counts;
+            }
+            size_t num_of_datum = blocks_[block_end - 1]->counts - count_start;
+            InitSparseValueBuffer(buffer, num_of_datum);
+            // LOG(INFO) << "Thread " << index << " process blocks from " <<
+            // block_start << " to "
+            //           << block_end << " with " << num_of_datum << "
+            //           examples.";
+
+            status_of_threads[index] = OkStatus();
+            auto& status = status_of_threads[index];
+
+            for (size_t i = block_start; i < block_end && status.ok(); i++) {
+              if (blocks_[i]->codec != avro::NULL_CODEC ||
+                  blocks_[i]->num_to_decode > 0) {
+                status = process_block(i, index, decoder, buffer, skipped);
+              }
+            }
+            // LOG(INFO) << "Thread " << index << " process blocks from " <<
+            // block_start << " to " << block_end << ". Done.";
+          };
+          ParallelFor(process, num_threads, thread_pool_.get());
+          uint64 earliest_start_time = *std::min_element(
+              thread_start_times.begin(), thread_start_times.end());
+          for (size_t i = 0; i < num_threads; i++) {
+            thread_delays[i] += (thread_start_times[i] - earliest_start_time);
+            thread_itrs[i] += 1;
+          }
+          for (Status& status : status_of_threads) {
+            TF_RETURN_IF_ERROR(status);
+          }
+
+          std::vector<int64> num_of_elements(num_of_sparse, 0);
+          std::vector<Tensor> indices_tensors;
+          std::vector<Tensor> values_tensors;
+          std::vector<Tensor> shape_tensors;
+          indices_tensors.reserve(num_of_sparse);
+          values_tensors.reserve(num_of_sparse);
+          shape_tensors.reserve(num_of_sparse);
+          auto& sparse_dtypes = dataset()->sparse_dtypes_;
+          auto& sparse_shapes = dataset()->sparse_shapes_;
+          for (size_t i = 0; i < num_of_sparse; i++) {
+            for (size_t t = 0; t < num_threads; t++) {
+              // Check if vector is empty and move on to the next vector.
+              // If shuffle buffer and number of threads is large compared
+              // to the batch, this vector maybe empty for certain threads.
+              num_of_elements[i] += static_cast<int64>(
+                  GetLastElement(sparse_buffer[t].num_of_elements[i]));
+            }
+            auto& sparse_shape = sparse_shapes[i];
+
+            int64 rank = sparse_shape.dims() + 1;
+            TensorShape indices_shape({num_of_elements[i], rank});
+            TensorShape values_shape({num_of_elements[i]});
+            TensorShape shape_shape({rank});
+            indices_tensors.emplace_back(DT_INT64, indices_shape);
+            values_tensors.emplace_back(sparse_dtypes[i], values_shape);
+            shape_tensors.emplace_back(DT_INT64, shape_shape);
+
+            auto& shape_tensor = shape_tensors.back();
+            size_t d = 0;
+            shape_tensor.vec<long>()(d++) = batch_size;
+            for (auto dim : sparse_shape) {
+              if (dim.size > 0) {
+                shape_tensor.vec<long>()(d++) = dim.size;
+              } else {
+                // When dim size is unknown i.e. -1, scan indices array to find
+                // the largest dim value.
+                long max_dim = -1;
+                for (size_t t = 0; t < num_threads; t++) {
+                  auto& indices = sparse_buffer[t].indices[i];
+                  for (size_t pos = d; pos < indices.size(); pos += rank) {
+                    max_dim = std::max(max_dim, indices[pos]);
+                  }
+                }
+                shape_tensor.vec<long>()(d++) = max_dim + 1;
+              }
+            }
+          }
+
+          auto& sparse_value_index = dataset()->sparse_value_index_;
+          auto fill_sparse_value = [&](int64 thread_index) {
+            // LOG(INFO) << "Thread " << thread_index << " starts filling sparse
+            // value";
+            auto& buffer = sparse_buffer[thread_index];
+            for (size_t i = 0; i < num_of_sparse; i++) {
+              size_t offset = 0;
+              int64 index = thread_index;
+              while (index > 0) {
+                index--;
+                offset +=
+                    GetLastElement(sparse_buffer[index].num_of_elements[i]);
+              }
+
+              size_t rank_after_batch =
+                  static_cast<size_t>(sparse_shapes[i].dims() + 1);
+              atds::sparse::FillIndicesTensor(buffer.indices[i],
+                                              indices_tensors[i],
+                                              rank_after_batch * offset);
+              atds::sparse::FillValuesTensor(buffer, values_tensors[i],
+                                             sparse_dtypes[i],
+                                             sparse_value_index[i], offset);
+              // LOG(INFO) << "Thread " << thread_index << " filled sparse
+              // values.";
+            }
+          };
+
+          {
+            tensorflow::profiler::TraceMe trace(kFillingSparseValues);
+            ParallelFor(fill_sparse_value, num_threads, thread_pool_.get());
+          }
+
+          size_t feature_num = num_of_dense + num_of_sparse;
+          size_t dense_index = 0, sparse_index = 0;
+          auto& feature_types = dataset()->output_tensor_types_;
+          for (size_t i = 0; i < feature_num; i++) {
+            if (feature_types[i] == TensorType::dense) {
+              out_tensors->emplace_back(
+                  std::move(dense_tensors[dense_index++]));
+            } else if (feature_types[i] == TensorType::sparse) {
+              out_tensors->emplace_back(DT_VARIANT, TensorShape({3}));
+              auto& serialized_sparse_t = out_tensors->back();
+              serialized_sparse_t.vec<Variant>()(0) =
+                  std::move(indices_tensors[sparse_index]);
+              serialized_sparse_t.vec<Variant>()(1) =
+                  std::move(values_tensors[sparse_index]);
+              serialized_sparse_t.vec<Variant>()(2) =
+                  std::move(shape_tensors[sparse_index]);
+              sparse_index++;
+            }
+          }
+          // LOG(INFO) << "Done with batch " ;
+          *end_of_sequence = false;
+          return OkStatus();
+        } else {
+          *end_of_sequence = true;
+          return prefetch_thread_status_;
+        }
+      }
+    }
+
+   protected:
+    std::shared_ptr<model::Node> CreateNode(
+        IteratorContext* ctx, model::Node::Args args) const override {
+      return model::MakeSourceNode(std::move(args));
+    }
+
+    Status SaveInternal(SerializationContext* ctx,
+                        IteratorStateWriter* writer) override {
+      return errors::Unimplemented("SaveInternal");
+    }
+
+    Status RestoreInternal(IteratorContext* ctx,
+                           IteratorStateReader* reader) override {
+      return errors::Unimplemented(
+          "Iterator does not support 'RestoreInternal')");
+    }
+
+   private:
+    // Returns the last element of the provided integer vector is a null-safe
+    // fashion
+    size_t GetLastElement(const std::vector<size_t>& num_of_elements_at_i) {
+      if (num_of_elements_at_i.empty()) {
+        return 0;
+      }
+      return num_of_elements_at_i.back();
+    }
+
+    void PrefetchThread(const std::shared_ptr<IteratorContext>& ctx) {
+      size_t total_buffer = total_buffer_size();
+      std::unique_ptr<AvroBlockReader> reader;
+      std::unique_ptr<tensorflow::RandomAccessFile> file;
+      size_t current_file_index = 0;
+      while (true) {
+        // 1. wait for a slot in the buffer
+        {
+          mutex_lock l(input_mu_);
+          while (!cancelled_ && count_ >= total_buffer) {
+            // LOG(INFO) << "prefetch waiting on block size " << blocks_.size()
+            // << " count: " << count_;
+            cond_var_->notify_one();
+            write_var_->wait(l);
+          }
+          // LOG(INFO) << "prefetch done waiting on block size " <<
+          // blocks_.size() << " count: " << count_;
+          if (cancelled_) {
+            prefetch_thread_finished_ = true;
+            prefetch_thread_status_ = OkStatus();
+            cond_var_->notify_all();
+            write_var_->notify_all();
+            return;
+          }
+        }  // done with mutex_lock l
+        // 2. read the next elements unil count hits max
+        Status status = OkStatus();
+        if (!reader) {
+          status =
+              SetupStreamsLocked(ctx->env(), file, reader, current_file_index);
+          if (!status.ok()) {
+            mutex_lock l(input_mu_);
+            LOG(ERROR) << "Error loading file: "
+                       << dataset()->filenames_[current_file_index];
+            prefetch_thread_finished_ = true;
+            prefetch_thread_status_ = status;
+            cond_var_->notify_all();
+            write_var_->notify_all();
+            return;
+          }
+        }
+
+        // LOG(INFO) << "Before processing " << count_ << " datum left in
+        // block.";
+        tensorflow::profiler::TraceMe trace(kBlockReading);
+
+        auto block = std::make_unique<AvroBlock>();
+        status = reader->ReadBlock(*block);
+        // LOG(INFO) << "Read block status: " << status.ToString();
+        // done with mutex_lock input_l
+        if (!status.ok()) {
+          if (!errors::IsOutOfRange(status)) {
+            LOG(ERROR) << "Error in reading avro block. Cause: "
+                       << status.ToString();
+          }
+          // LOG(INFO) << "Resetting stream: " << status.ToString() << "b " <<
+          // blocks_.size() << " c_: " << count_;
+          ResetStreamsLocked(file, reader);
+          ++current_file_index;
+          if (current_file_index >= dataset()->filenames_.size()) {
+            mutex_lock l(input_mu_);
+            prefetch_thread_finished_ = true;
+            // Note: this is overwriting any previous errors
+            prefetch_thread_status_ = OkStatus();
+            cond_var_->notify_all();
+            write_var_->notify_all();
+            return;
+          }  // done with mutex_lock l
+        } else {
+          mutex_lock n(input_mu_);
+          count_ += block->object_count;
+          write_blocks_.emplace_back(std::move(block));
+          ++num_blocks_read_;
+        }
+      }  // end while
+    }
+
+    Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
+        TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+      if (!prefetch_thread_) {
+        std::shared_ptr<IteratorContext> new_ctx =
+            std::make_shared<IteratorContext>(*ctx);
+        prefetch_thread_ =
+            ctx->StartThread("atds_data_prefetch",
+                             [this, new_ctx]() { PrefetchThread(new_ctx); });
+      }
+      return OkStatus();
+    }
+
+    size_t total_buffer_size() { return batch_size_ + shuffle_buffer_size_; }
+
+    // Sets up reader streams to read from the file at `current_file_index_`.
+    Status SetupStreamsLocked(
+        Env* env, std::unique_ptr<tensorflow::RandomAccessFile>& file,
+        std::unique_ptr<AvroBlockReader>& reader, size_t current_file_index) {
+      if (current_file_index >= dataset()->filenames_.size()) {
+        return errors::InvalidArgument(
+            "current_file_index_:", current_file_index,
+            " >= filenames_.size():", dataset()->filenames_.size());
+      }
+
+      // Actually move on to next file.
+      const string& next_filename = dataset()->filenames_[current_file_index];
+      TF_RETURN_IF_ERROR(env->NewRandomAccessFile(next_filename, &file));
+      reader = absl::make_unique<AvroBlockReader>(
+          file.get(), dataset()->reader_buffer_size_);
+      if (atds_decoder_ == nullptr) {
+        atds_decoder_ = std::make_unique<atds::ATDSDecoder>(
+            dataset()->dense_features_, dataset()->sparse_features_,
+            dataset()->varlen_features_);
+        TF_RETURN_IF_ERROR(atds_decoder_->Initialize(reader->GetSchema()));
+        expected_schema_ = atds_decoder_->GetSchema().toJson(false);
+      } else if (expected_schema_ != reader->GetSchema().toJson(false)) {
+        string expected_schema = atds_decoder_->GetSchema().toJson(true);
+        string varied_schema = reader->GetSchema().toJson(true);
+        string filename = dataset()->filenames_[0];
+        return atds::VariedSchemaNotSupportedError(
+            expected_schema, filename, varied_schema, next_filename);
+      }
+      return OkStatus();
+    }
+
+    // Resets all reader streams.
+    void ResetStreamsLocked(std::unique_ptr<tensorflow::RandomAccessFile>& file,
+                            std::unique_ptr<AvroBlockReader>& reader) {
+      reader.reset();
+      file.reset();
+    }
+
+    void InitSparseValueBuffer(atds::sparse::ValueBuffer& buffer,
+                               size_t num_of_datum) {
+      auto& sparse_dtype_counts = dataset()->sparse_dtype_counts_;
+      auto& sparse_expected_elements = dataset()->sparse_expected_elements_;
+      for (size_t i = 0; i < sparse_dtype_counts.int_counts; i++) {
+        buffer.int_values[i].reserve(num_of_datum *
+                                     sparse_expected_elements.int_values[i]);
+      }
+      for (size_t i = 0; i < sparse_dtype_counts.long_counts; i++) {
+        buffer.long_values[i].reserve(num_of_datum *
+                                      sparse_expected_elements.long_values[i]);
+      }
+      for (size_t i = 0; i < sparse_dtype_counts.float_counts; i++) {
+        buffer.float_values[i].reserve(
+            num_of_datum * sparse_expected_elements.float_values[i]);
+      }
+      for (size_t i = 0; i < sparse_dtype_counts.double_counts; i++) {
+        buffer.double_values[i].reserve(
+            num_of_datum * sparse_expected_elements.double_values[i]);
+      }
+      for (size_t i = 0; i < sparse_dtype_counts.string_counts; i++) {
+        buffer.string_values[i].reserve(
+            num_of_datum * sparse_expected_elements.string_values[i]);
+      }
+      for (size_t i = 0; i < sparse_dtype_counts.bool_counts; i++) {
+        buffer.bool_values[i].reserve(num_of_datum *
+                                      sparse_expected_elements.bool_values[i]);
+      }
+
+      size_t num_of_sparse = dataset()->num_of_sparse_;
+      for (size_t i = 0; i < num_of_sparse; i++) {
+        buffer.num_of_elements[i].reserve(num_of_datum);
+        buffer.indices[i].reserve(num_of_datum *
+                                  sparse_expected_elements.indices[i]);
+      }
+    }
+
+    void GetUniformBlockRanges(size_t num_threads,
+                               std::vector<size_t>& block_nums) {
+      size_t num_blocks = blocks_.size();
+      size_t blocks_per_thread = num_blocks / num_threads;
+      size_t remainder = num_blocks % num_threads;
+      size_t block_idx = 0;
+      for (size_t i = 0; i < num_threads; i++) {
+        block_idx += blocks_per_thread;
+        if (i < remainder) {
+          block_idx += 1;
+        }
+        block_nums.emplace_back(block_idx);
+      }
+    }
+
+    double GetTotalCost(double& decode_cost_per_record,
+                        double& decompress_cost_per_record) {
+      decode_cost_per_record =
+          static_cast<double>(GetTotalStats(total_decode_micros_)) /
+          GetTotalStats(total_records_parsed_);
+      decompress_cost_per_record = 0;
+      double total_cost = decode_cost_per_record * batch_size_;
+      if (GetTotalStats(num_decompressed_objects_) > 0) {
+        decompress_cost_per_record =
+            static_cast<double>(GetTotalStats(total_decompress_micros_)) /
+            GetTotalStats(num_decompressed_objects_);
+        // Newly read blocks are appended to the end of blocks_ array, and all
+        // non-newly read blocks were already decompressed in previous
+        // GetNextInternal iterations. So we loop through blocks in reverse
+        // order, and terminate when we encounter an already decompressed block
+        // (null codec).
+        for (size_t i = blocks_.size();
+             i > 0 && blocks_[i - 1]->codec != avro::NULL_CODEC; i--) {
+          total_cost +=
+              (decompress_cost_per_record * blocks_[i - 1]->object_count);
+        }
+      }
+      return total_cost;
+    }
+
+    void GetCostBasedBlockRanges(size_t num_threads,
+                                 std::vector<size_t>& block_nums) {
+      size_t num_blocks = blocks_.size();
+      double decode_cost_per_record;
+      double decompress_cost_per_record;
+      double total_cost =
+          GetTotalCost(decode_cost_per_record, decompress_cost_per_record);
+      double cost_per_thread = total_cost / num_threads;
+      size_t block_idx = 0;
+      size_t thread_idx = 0;
+      double running_cost = 0;
+      while (thread_idx < num_threads) {
+        while (running_cost < cost_per_thread * (thread_idx + 1) &&
+               block_idx < num_blocks) {
+          if (blocks_[block_idx]->codec != avro::NULL_CODEC) {
+            running_cost +=
+                decompress_cost_per_record * blocks_[block_idx]->object_count;
+          }
+          running_cost +=
+              decode_cost_per_record * blocks_[block_idx]->num_to_decode;
+          block_idx++;
+        }
+        block_nums.emplace_back(block_idx);
+        thread_idx++;
+      }
+      block_nums[num_threads - 1] = num_blocks;
+    }
+
+    void GetBlockRanges(size_t num_threads, std::vector<size_t>& block_nums) {
+      block_nums.reserve(num_threads);
+      if (GetTotalStats(total_decode_micros_) == 0) {
+        // No decode time statistics yet. Divide blocks evenly between threads
+        GetUniformBlockRanges(num_threads, block_nums);
+      } else {
+        // Get block ranges per thread based on runtime data
+        GetCostBasedBlockRanges(num_threads, block_nums);
+      }
+    }
+
+    size_t ComputeNumAutotuneThreads(size_t curr_threads) {
+      size_t ideal_num_threads = curr_threads;
+      if (thread_itrs[0] > 0) {
+        double decode_cost_per_record;
+        double decompress_cost_per_record;
+        double total_cost =
+            GetTotalCost(decode_cost_per_record, decompress_cost_per_record);
+        double min_cost = std::numeric_limits<double>::max();
+        for (size_t i = 1; i < curr_threads; i++) {
+          // Compute cost when using `i` threads
+          double cost_per_thread = total_cost / i;
+          double max_thread_delay = 0;
+          for (size_t j = 0; j < i; j++) {
+            double thread_delay = 0;
+            if (thread_itrs[j] > 0) {
+              thread_delay = thread_delays[j] / thread_itrs[j];
+            }
+            max_thread_delay = std::max(thread_delay, max_thread_delay);
+          }
+          if (cost_per_thread + max_thread_delay < min_cost) {
+            min_cost = cost_per_thread + max_thread_delay;
+            ideal_num_threads = i;
+          }
+        }
+      }
+      return ideal_num_threads;
+    }
+
+    uint64 GetTotalStats(std::vector<uint64>& vec) {
+      return std::accumulate(vec.begin(), vec.end(), 0);
+    }
+
+    std::unique_ptr<ShuffleHandler> shuffle_handler_ = nullptr;
+    std::unique_ptr<DecompressionHandler> decompression_handler_ = nullptr;
+    const std::shared_ptr<condition_variable> cond_var_ = nullptr;
+    const std::shared_ptr<condition_variable> write_var_ = nullptr;
+    size_t batch_size_;
+    size_t shuffle_buffer_size_;
+
+    atds::sparse::ValueBuffer value_buffer_;
+    std::unique_ptr<thread::ThreadPool> thread_pool_ = nullptr;
+
+    const std::shared_ptr<mutex> mu_;
+    std::unique_ptr<Thread> prefetch_thread_ TF_GUARDED_BY(*mu_);
+    std::vector<std::unique_ptr<AvroBlock> > blocks_ TF_GUARDED_BY(*mu_);
+
+    mutex input_mu_ TF_ACQUIRED_BEFORE(*mu_);
+    size_t count_ TF_GUARDED_BY(input_mu_) = 0;
+    bool cancelled_ TF_GUARDED_BY(input_mu_) = false;
+    bool prefetch_thread_finished_ TF_GUARDED_BY(input_mu_) = false;
+    Status prefetch_thread_status_ TF_GUARDED_BY(input_mu_);
+    uint64 num_blocks_read_ TF_GUARDED_BY(input_mu_) = 0;
+    std::vector<std::unique_ptr<AvroBlock> > write_blocks_
+        TF_GUARDED_BY(input_mu_);
+
+    std::unique_ptr<atds::ATDSDecoder> atds_decoder_ = nullptr;
+    string expected_schema_ = "";
+    std::vector<uint64> total_records_parsed_ TF_GUARDED_BY(*mu_);
+    std::vector<uint64> total_decode_micros_ TF_GUARDED_BY(*mu_);
+    std::vector<uint64> num_decompressed_objects_ TF_GUARDED_BY(*mu_);
+    std::vector<uint64> total_decompress_micros_ TF_GUARDED_BY(*mu_);
+    std::vector<uint64> thread_delays TF_GUARDED_BY(*mu_);
+    std::vector<uint64> thread_itrs TF_GUARDED_BY(*mu_);
+  };
+
+  const std::vector<tstring> filenames_;
+  const int64 batch_size_, reader_buffer_size_, shuffle_buffer_size_,
+      num_parallel_calls_;
+  const bool drop_remainder_;
+  const std::vector<string> feature_keys_, feature_types_;
+  const std::vector<DataType> sparse_dtypes_;
+  const std::vector<PartialTensorShape> sparse_shapes_;
+  const std::vector<DataType> output_dtypes_;
+  const std::vector<PartialTensorShape> output_shapes_;
+  std::vector<size_t> sparse_value_index_;
+  DataTypeVector output_dtype_vector_;
+
+  std::vector<TensorType> output_tensor_types_;
+
+  std::vector<atds::dense::Metadata> dense_features_;
+  std::vector<atds::sparse::Metadata> sparse_features_;
+  std::vector<atds::varlen::Metadata> varlen_features_;
+  SparseDtypeCounts sparse_dtype_counts_;
+  SparseExpectedElements sparse_expected_elements_;
+  size_t num_of_dense_ = 0, num_of_sparse_ = 0;
+};
+
+ATDSDatasetOp::ATDSDatasetOp(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) {
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kFeatureKeys, &feature_keys_));
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kFeatureTypes, &feature_types_));
+
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kSparseDtypes, &sparse_dtypes_));
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kSparseShapes, &sparse_shapes_));
+
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kOutputDtypes, &output_dtypes_));
+  OP_REQUIRES_OK(ctx, ctx->GetAttr(kOutputShapes, &output_shapes_));
+
+  auto feature_num = feature_keys_.size();
+  OP_REQUIRES(ctx, feature_num == feature_types_.size(),
+              errors::InvalidArgument(strings::StrCat(
+                  "The length of feature_keys must equal to the ",
+                  "length of feature_types. [", feature_num,
+                  " != ", feature_types_.size(), "]")));
+
+  OP_REQUIRES(ctx, feature_num == output_dtypes_.size(),
+              errors::InvalidArgument(strings::StrCat(
+                  "The length of feature_keys must equal to the ",
+                  "length of output_dtypes. [", feature_num,
+                  " != ", output_dtypes_.size(), "]")));
+
+  OP_REQUIRES(ctx, feature_num == output_shapes_.size(),
+              errors::InvalidArgument(strings::StrCat(
+                  "The length of feature_keys must equal to the ",
+                  "length of output_shapes. [", feature_num,
+                  " != ", output_shapes_.size(), "]")));
+
+  size_t num_sparse = 0;
+  for (auto& type : feature_types_) {
+    OP_REQUIRES(
+        ctx, type == kDenseType || type == kSparseType || type == kVarlenType,
+        errors::InvalidArgument(strings::StrCat(
+            "Invalid feature_type, '", type, "'. Only ", kDenseType, ", ",
+            kSparseType, ", and ", kVarlenType, " are supported.")));
+    if (type == kSparseType || type == kVarlenType) {
+      num_sparse++;
+    }
+  }
+
+  OP_REQUIRES(ctx, sparse_dtypes_.size() == num_sparse,
+              errors::InvalidArgument(strings::StrCat(
+                  "The length of sparse_dtypes must equal to the number of ",
+                  "sparse features configured in feature_types. [",
+                  sparse_dtypes_.size(), " != ", num_sparse, "]")));
+
+  OP_REQUIRES(ctx, sparse_shapes_.size() == num_sparse,
+              errors::InvalidArgument(strings::StrCat(
+                  "The length of sparse_shapes must equal to the number of ",
+                  "sparse features configured in feature_types. [",
+                  sparse_shapes_.size(), " != ", num_sparse, "]")));
+}
+
+void ATDSDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase** output) {
+  const Tensor* filenames_tensor;
+  OP_REQUIRES_OK(ctx, ctx->input(kFileNames, &filenames_tensor));
+  OP_REQUIRES(
+      ctx, filenames_tensor->dims() <= 1,
+      errors::InvalidArgument("`filenames` must be a scalar or a vector."));
+
+  std::vector<tstring> filenames;
+  filenames.reserve(filenames_tensor->NumElements());
+  for (int i = 0; i < filenames_tensor->NumElements(); ++i) {
+    VLOG(2) << "Reading file: " << filenames_tensor->flat<tstring>()(i);
+    filenames.push_back(filenames_tensor->flat<tstring>()(i));
+  }
+
+  int64 batch_size = 0;
+  OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, kBatchSize, &batch_size));
+  OP_REQUIRES(
+      ctx, batch_size > 0,
+      errors::InvalidArgument(strings::StrCat(
+          "`batch_size` must be greater than 0 but found ", batch_size)));
+
+  bool drop_remainder = false;
+  OP_REQUIRES_OK(
+      ctx, ParseScalarArgument<bool>(ctx, kDropRemainder, &drop_remainder));
+
+  int64 reader_buffer_size = 0;
+  OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, kReaderBufferSize,
+                                                 &reader_buffer_size));
+  OP_REQUIRES(ctx, reader_buffer_size > 0,
+              errors::InvalidArgument(strings::StrCat(
+                  "`reader_buffer_size` must be greater than 0 but found ",
+                  reader_buffer_size)));
+
+  int64 shuffle_buffer_size = 0;
+  OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, kShuffleBufferSize,
+                                                 &shuffle_buffer_size));
+  OP_REQUIRES(
+      ctx, shuffle_buffer_size >= 0,
+      errors::InvalidArgument(strings::StrCat(
+          "`shuffle_buffer_size` must be greater than or equal to 0 but found ",
+          shuffle_buffer_size)));
+
+  int64 num_parallel_calls = 0;
+  OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, kNumParallelCalls,
+                                                 &num_parallel_calls));
+  OP_REQUIRES(ctx,
+              num_parallel_calls > 0 ||
+                  num_parallel_calls == tensorflow::data::model::kAutotune,
+              errors::InvalidArgument(
+                  strings::StrCat("`num_parallel_calls` must be a positive "
+                                  "integer or tf.data.AUTOTUNE, got ",
+                                  num_parallel_calls)));
+  *output = new Dataset(
+      ctx, std::move(filenames), batch_size, drop_remainder, reader_buffer_size,
+      shuffle_buffer_size, num_parallel_calls, feature_keys_, feature_types_,
+      sparse_dtypes_, sparse_shapes_, output_dtypes_, output_shapes_);
+}
+
+namespace {
+REGISTER_KERNEL_BUILDER(Name("IO>ATDSDataset").Device(DEVICE_CPU),
+                        ATDSDatasetOp);
+}  // namespace
+
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow_io/core/kernels/avro/atds_dataset_kernels.h b/tensorflow_io/core/kernels/avro/atds_dataset_kernels.h
new file mode 100644
index 000000000..22bec7d6c
--- /dev/null
+++ b/tensorflow_io/core/kernels/avro/atds_dataset_kernels.h
@@ -0,0 +1,60 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DATASET_OP_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DATASET_OP_H_
+
+#include "tensorflow/core/framework/dataset.h"
+
+namespace tensorflow {
+namespace data {
+
+class ATDSDatasetOp : public DatasetOpKernel {
+ public:
+  static constexpr const char* const kDatasetType = "ATDSDatum";
+  static constexpr const char* const kFileNames = "filenames";
+  static constexpr const char* const kBatchSize = "batch_size";
+  static constexpr const char* const kDropRemainder = "drop_remainder";
+  static constexpr const char* const kReaderBufferSize = "reader_buffer_size";
+  static constexpr const char* const kShuffleBufferSize = "shuffle_buffer_size";
+  static constexpr const char* const kNumParallelCalls = "num_parallel_calls";
+  static constexpr const char* const kFeatureKeys = "feature_keys";
+  static constexpr const char* const kFeatureTypes = "feature_types";
+  static constexpr const char* const kSparseDtypes = "sparse_dtypes";
+  static constexpr const char* const kSparseShapes = "sparse_shapes";
+  static constexpr const char* const kOutputDtypes = "output_dtypes";
+  static constexpr const char* const kOutputShapes = "output_shapes";
+
+  static constexpr const char* const kDenseType = "dense";
+  static constexpr const char* const kSparseType = "sparse";
+  static constexpr const char* const kVarlenType = "varlen";
+
+  explicit ATDSDatasetOp(OpKernelConstruction* ctx);
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override;
+
+ private:
+  class Dataset;
+
+  std::vector<string> feature_keys_, feature_types_;
+  std::vector<DataType> sparse_dtypes_, output_dtypes_;
+  std::vector<PartialTensorShape> sparse_shapes_, output_shapes_;
+};
+
+}  // namespace data
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_ATDS_DATASET_OP_H_
diff --git a/tensorflow_io/core/kernels/avro/avro_record_dataset_kernels.h b/tensorflow_io/core/kernels/avro/avro_record_dataset_kernels.h
index 3d1a8668a..0c94930c0 100644
--- a/tensorflow_io/core/kernels/avro/avro_record_dataset_kernels.h
+++ b/tensorflow_io/core/kernels/avro/avro_record_dataset_kernels.h
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_DATA_CORE_KERNELS_AVRO_RECORD_DATASET_OP_H_
-#define TENSORFLOW_DATA_CORE_KERNELS_AVRO_RECORD_DATASET_OP_H_
+#ifndef TENSORFLOW_IO_CORE_KERNELS_AVRO_RECORD_DATASET_OP_H_
+#define TENSORFLOW_IO_CORE_KERNELS_AVRO_RECORD_DATASET_OP_H_
 
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/dataset_metadata.pb.h"
@@ -38,4 +38,4 @@ class AvroRecordDatasetOp : public DatasetOpKernel {
 }  // namespace data
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_DATA_CORE_KERNELS_AVRO_RECORD_DATASET_OP_H_
+#endif  // TENSORFLOW_IO_CORE_KERNELS_AVRO_RECORD_DATASET_OP_H_
diff --git a/tensorflow_io/core/ops/avro_ops.cc b/tensorflow_io/core/ops/avro_ops.cc
index 658bdeb3d..8fc26733e 100644
--- a/tensorflow_io/core/ops/avro_ops.cc
+++ b/tensorflow_io/core/ops/avro_ops.cc
@@ -322,4 +322,38 @@ REGISTER_OP("IO>AvroReadablePartitions")
       return OkStatus();
     });
 
+REGISTER_OP("IO>ATDSDataset")
+    .Input("filenames: string")
+    .Input("batch_size: int64")
+    .Input("drop_remainder: bool")
+    .Input("reader_buffer_size: int64")
+    .Input("shuffle_buffer_size: int64")
+    .Input("num_parallel_calls: int64")
+    .Output("handle: variant")
+    .Attr("feature_keys: list(string) >= 0")
+    .Attr("feature_types: list(string) >= 0")
+    .Attr("sparse_dtypes: list({float,double,int64,int32,string,bool}) >= 0")
+    .Attr("sparse_shapes: list(shape) >= 0")
+    .Attr(
+        "output_dtypes: list({float,double,int64,int32,string,bool,variant}) "
+        ">= 0")
+    .Attr("output_shapes: list(shape) >= 0")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle unused;
+      // `filenames` must be a scalar or a vector
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused));
+      // `batch_size` must be a scalar
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      // `drop_remainder` must be a scalar
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      // `reader_buffer_size` must be a scalar
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      // `shuffle_buffer_size` must be a scalar
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      // `num_parallel_calls` must be a scalar
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      return shape_inference::ScalarShape(c);
+    });
+
 }  // namespace tensorflow
diff --git a/tensorflow_io/python/experimental/atds/__init__.py b/tensorflow_io/python/experimental/atds/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tensorflow_io/python/experimental/atds/dataset.py b/tensorflow_io/python/experimental/atds/dataset.py
new file mode 100644
index 000000000..f627ef9c4
--- /dev/null
+++ b/tensorflow_io/python/experimental/atds/dataset.py
@@ -0,0 +1,300 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ATDSDataset"""
+
+from typing import Optional
+
+import tensorflow as tf
+from tensorflow.python.data.util import convert
+from tensorflow.python.data.util import structure
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import tensor_util
+
+from tensorflow_io.python.ops import core_ops
+from tensorflow_io.python.experimental.atds.features import (
+    DenseFeature,
+    SparseFeature,
+    VarlenFeature,
+)
+
+# Argument default values used in ATDS Dataset.
+_DEFAULT_DROP_REMAINDER = False  # Do not drop last batch.
+_DEFAULT_READER_BUFFER_SIZE_BYTES = 128 * 1024  # 128 KB
+_DEFAULT_SHUFFLE_BUFFER_SIZE_EXAMPLES = 0  # shuffle is disabled.
+_DEFAULT_NUM_PARALLEL_CALLS = 1  # process sequentially.
+
+# Feature type name used in ATDS Dataset Op.
+_DENSE_FEATURE_TYPE = "dense"
+_SPARSE_FEATURE_TYPE = "sparse"
+_VARLEN_FEATURE_TYPE = "varlen"
+
+# Supported feature configs
+_SUPPORTED_FEATURE_CONFIG = (DenseFeature, SparseFeature, VarlenFeature)
+
+
+class ATDSDataset(dataset_ops.DatasetSource):
+    """A `Dataset` comprising records from one or more Avro files.
+
+    This dataset load Avro records from the files into a dict of tensors.
+    The output dict has feature name as key and tf.Tensor or tf.SparseTensor
+    as value. The output tensor values are batched with the user defined
+    batch size.
+
+    Shuffle can be enabled before batch by configuring shuffle buffer size.
+    The shuffle buffer size dictates the elements *in addition* to the batch size
+    that would be read and sampled.
+    This dataset keeps collecting Avro blocks(a sequence of Avro records),
+    until the total number of unread records is greater than the shuffle
+    buffer size + batch_size, then randomly samples block from the collected blocks.
+    An Avro Record from the sampled block will be parsed and batched into
+    the output tensors.
+
+    For instance, assume your dataset contains 5 blocks with 100 records in
+    each block. When the batch size is set to 32 and shuffle buffer size is set
+    to 128, this dataset will collect two blocks as the two blocks contains more
+    than 128 + 32 = 160 unread records, and randomly samples block from the two
+    blocks 32 times.
+    When a block is sampled, a record in the sampled block is read and batched
+    into the output tensor dict until all records in the sampled block are read.
+    If only one block fits into the batch + shuffle_buffer_size, records in that
+    block will be read sequentially without shuffle. Users can increase the
+    shuffle buffer size or apply dataset unbatch, shuffle, and batch for better shuffling.
+
+    The memory footprint of this shuffle buffer is signficantly different from tf.data.Dataset.shuffle
+    In Tensorflow's shuffle, the shuffle buffer specifies a separate buffer of elements to pick
+    random elements from. In this implementation, the shuffle buffer + batch forms the total number of
+    elements that would be read for sampling.
+
+    Here's an example comparing shuffle between AvroReader and Tensorflow:
+
+    Data size is 1000
+    Batch size is 64
+
+    Case 1: perfect shuffle
+    Shuffle buffer size 1000
+    TF: shuffle elements(1, 1000) to create a batch of size 64
+    AvroReader: shuffle elements(1, 1000) to create a batch of size 64
+
+    Case 2: not perfect but shuffle > batch
+    Shuffle buffer size is 256
+    TF:
+    Shuffle elements(1, 256) to pick 1 element
+    Shuffle elements(1, 257) to pick 1 element
+    …
+    shuffle(1, 320) to create a batch of size 64
+
+    AvroReader:
+    Shuffle elements(1, 320) to create a batch of size 64
+
+    Case 3: shuffle buffer < batch
+    Shuffle buffer size is 32
+    TF:
+    Shuffle elements(1, 32) to pick 1 element
+    Shuffle elements(1, 33) to pick 1 element
+    …
+    shuffle(1, 96) to create a batch of size 64
+
+    ATDS: Shuffle elements(1, 96) to create a batch of size 64
+
+    Case 4: no shuffle
+    Shuffle buffer size is 0
+    Tensorflow and ATDS both will just directly read to create a batch of size 64
+
+
+    A minimal example is given below:
+
+    >>> import tempfile
+    >>> import avro.schema
+    >>> from avro.datafile import DataFileWriter
+    >>> from avro.io import DatumWriter
+    >>> from tensorflow_io.python.experimental.atds.dataset import ATDSDataset
+    >>> from tensorflow_io.python.experimental.atds.features import DenseFeature
+    >>> example_path = os.path.join(tempfile.gettempdir(), "example.avro")
+    >>> np.random.seed(0)
+
+    >>> # Define Avro schema in ATDS format.
+    >>> json_schema = '''{
+    ...     "type": "record",
+    ...     "name": "example",
+    ...     "fields": [
+    ...         { "name": "x", "type": "float" },
+    ...         { "name": "y", "type": "float" }
+    ...     ]
+    ... }'''
+    >>> schema = avro.schema.Parse(json_schema)
+
+    >>> # Write the Avro records to a file.
+    >>> with open(example_path, "wb") as f:
+    ...     writer = DataFileWriter(f, DatumWriter(), schema)
+    ...     for _ in range(3):
+    ...         x, y = np.random.random(), np.random.random()
+    ...         writer.append({"x": x, "y": y})
+    ...     writer.close()
+
+    >>> # Read the data back out.
+    >>> feature_config = {
+    ...     "x": DenseFeature([], dtype=tf.float32),
+    ...     "y": DenseFeature([], dtype=tf.float32)
+    ... }
+    >>> for batch in ATDSDataset([example_path], batch_size=2,
+    ...                         features=feature_config):
+    ...     print("x = {x},  y = {y}".format(**batch))
+    x = [0.5488135  0.60276335],  y = [0.71518934 0.5448832 ]
+    x = [0.4236548],  y = [0.6458941]
+    """
+
+    def __init__(
+        self,
+        filenames,
+        batch_size,
+        features,
+        drop_remainder=False,
+        reader_buffer_size=None,
+        shuffle_buffer_size=None,
+        num_parallel_calls=None,
+    ):
+        """Creates a `ATDSDataset` to read one or more Avro files encoded with
+           ATDS Schema.
+
+           Each element of the dataset contains an Avro Record that will be
+           parsed into a dict of tensors.
+
+        Args:
+          filenames: A `tf.string` tensor containing one or more filenames.
+          batch_size: A `tf.int64` scalar representing the number of records to
+            read and parse per iteration.
+          features: A feature configuration dict with feature name as key and
+            ATDS feature as value. ATDS features can be one of the DenseFeature,
+            SparseFeature, or VarlenFeature. See
+            tensorflow_io.python.experimental.atds.features for more details.
+          drop_remainder: (Optional.) A `tf.bool` scalar tf.Tensor, representing
+            whether the last batch should be dropped in the case it has fewer
+            than batch_size elements. The default behavior is not to drop the
+            smaller batch.
+          reader_buffer_size: (Optional.) A `tf.int64` scalar representing the
+            number of bytes used in the file content buffering.
+          shuffle_buffer_size: (Optional.) A `tf.int64` scalar representing the
+            number of records to shuffle together before batching. If not
+            specified, data is batched without shuffle.
+          num_parallel_calls: (Optional.) A `tf.int64` scalar representing the
+            maximum thread number used in the dataset. If greater than one,
+            records in files are processed in parallel with deterministic order.
+            The number will be truncated when it is greater than the maximum
+            available parallelism number on the host. If set to `tf.data.AUTOTUNE`,
+            number of threads will be adjusted dynamically based on workload and
+            available resources. If not specified, records will be processed sequentially.
+
+        Raises:
+          TypeError: If any argument does not have the expected type.
+          ValueError: If any argument does not have the expected shape
+                      or features have invalid config.
+        """
+        self._filenames = filenames
+        self._batch_size = batch_size
+        self._drop_remainder = convert.optional_param_to_tensor(
+            "drop_remainder",
+            drop_remainder,
+            argument_default=_DEFAULT_DROP_REMAINDER,
+            argument_dtype=tf.bool,
+        )
+        self._reader_buffer_size = convert.optional_param_to_tensor(
+            "reader_buffer_size",
+            reader_buffer_size,
+            argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES,
+        )
+        self._shuffle_buffer_size = convert.optional_param_to_tensor(
+            "shuffle_buffer_size",
+            shuffle_buffer_size,
+            argument_default=_DEFAULT_SHUFFLE_BUFFER_SIZE_EXAMPLES,
+        )
+        self._num_parallel_calls = convert.optional_param_to_tensor(
+            "num_parallel_calls",
+            num_parallel_calls,
+            argument_default=_DEFAULT_NUM_PARALLEL_CALLS,
+        )
+
+        if features is None or not isinstance(features, dict):
+            raise ValueError(
+                f"Features can only be a dict with feature name as key"
+                f" and ATDS feature configuration as value but found {features}."
+                f" Available feature configuration are {_SUPPORTED_FEATURE_CONFIG}."
+            )
+        if not features:
+            raise ValueError(
+                "Features dict cannot be empty and should have at " "least one feature."
+            )
+
+        feature_keys = []
+        feature_types = []
+        sparse_dtypes = []
+        sparse_shapes = []
+
+        element_spec = {}
+        for key in sorted(features):
+            feature = features[key]
+            if not isinstance(feature, _SUPPORTED_FEATURE_CONFIG):
+                raise ValueError(
+                    f"Unknown ATDS feature configuration {feature}. "
+                    f"Only {_SUPPORTED_FEATURE_CONFIG} are supported."
+                )
+
+            feature_keys.append(key)
+            shape = [dim if dim != -1 else None for dim in feature.shape]
+            if isinstance(feature, DenseFeature):
+                feature_types.append(_DENSE_FEATURE_TYPE)
+                element_spec[key] = tf.TensorSpec(shape, feature.dtype)
+            elif isinstance(feature, SparseFeature):
+                feature_types.append(_SPARSE_FEATURE_TYPE)
+                sparse_dtypes.append(feature.dtype)
+                sparse_shapes.append(shape)
+                element_spec[key] = tf.SparseTensorSpec(shape, feature.dtype)
+            elif isinstance(feature, VarlenFeature):
+                feature_types.append(_VARLEN_FEATURE_TYPE)
+                sparse_dtypes.append(feature.dtype)
+                sparse_shapes.append(shape)
+                element_spec[key] = tf.SparseTensorSpec(shape, feature.dtype)
+
+        constant_drop_remainder = tensor_util.constant_value(self._drop_remainder)
+        if constant_drop_remainder:
+            constant_batch_size = tensor_util.constant_value(self._batch_size)
+            self._element_spec = nest.map_structure(
+                lambda spec: spec._batch(constant_batch_size), element_spec
+            )
+        else:
+            self._element_spec = nest.map_structure(
+                lambda spec: spec._batch(None), element_spec
+            )
+
+        variant_tensor = core_ops.io_atds_dataset(
+            filenames=self._filenames,
+            batch_size=self._batch_size,
+            drop_remainder=self._drop_remainder,
+            reader_buffer_size=self._reader_buffer_size,
+            shuffle_buffer_size=self._shuffle_buffer_size,
+            num_parallel_calls=self._num_parallel_calls,
+            feature_keys=feature_keys,
+            feature_types=feature_types,
+            sparse_dtypes=sparse_dtypes,
+            sparse_shapes=sparse_shapes,
+            output_dtypes=structure.get_flat_tensor_types(self._element_spec),
+            output_shapes=structure.get_flat_tensor_shapes(self._element_spec),
+        )
+        super().__init__(variant_tensor)
+
+    @property
+    def element_spec(self):
+        return self._element_spec
diff --git a/tensorflow_io/python/experimental/atds/features.py b/tensorflow_io/python/experimental/atds/features.py
new file mode 100644
index 000000000..a599d6ac8
--- /dev/null
+++ b/tensorflow_io/python/experimental/atds/features.py
@@ -0,0 +1,115 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import collections
+from typing import List
+
+import tensorflow as tf
+
+ATDS_SUPPORTED_DTYPES = [tf.int32, tf.int64, tf.float32, tf.float64, tf.string, tf.bool]
+
+
+def _raise_error_if_dtype_not_supported(dtype: tf.dtypes.DType):
+    if dtype not in ATDS_SUPPORTED_DTYPES:
+        raise ValueError(
+            f"{dtype} is not supported in ATDS. "
+            f"Available dtypes are {ATDS_SUPPORTED_DTYPES}."
+        )
+
+
+def _raise_error_if_shape_is_none(shape: List[int]):
+    if shape is None:
+        raise ValueError(f"Shape cannot be None.")
+
+
+def _raise_error_if_shape_has_none_or_zero(shape: List[int]):
+    for dim in shape:
+        if dim is None or dim == 0:
+            raise ValueError(
+                f"Dimension in shape cannot be None or 0 but found {shape}."
+            )
+
+
+def _raise_error_if_dtype_is_none(dtype: tf.dtypes.DType):
+    if dtype is None:
+        raise ValueError(f"dtype cannot be None.")
+
+
+def _validate_shape_and_dtype(shape: List[int], dtype: tf.dtypes.DType):
+    _raise_error_if_shape_is_none(shape)
+    _raise_error_if_shape_has_none_or_zero(shape)
+    _raise_error_if_dtype_is_none(dtype)
+    _raise_error_if_dtype_not_supported(dtype)
+
+
+class DenseFeature(collections.namedtuple("DenseFeature", ["shape", "dtype"])):
+    """
+    Configuration for reading and parsing a tf.Tensor encoded with
+    ATDS dense feature schema.
+
+    Fields:
+      shape: Shape of input data. Each dimension should be positive.
+      dtype: Data type of input.
+    """
+
+    def __new__(cls, shape: List[int], dtype: tf.dtypes.DType):
+        _validate_shape_and_dtype(shape, dtype)
+        for dim in shape:
+            if dim <= 0:
+                raise ValueError(
+                    f"Each dimension should be greater than 0"
+                    f" in DenseFeature but found {shape}."
+                )
+
+        return super().__new__(cls, shape, dtype)
+
+
+class SparseFeature(collections.namedtuple("SparseFeature", ["shape", "dtype"])):
+    """
+    Configuration for reading and parsing a tf.SparseTensor encoded with
+    ATDS sparse feature schema.
+
+    Fields:
+      shape: Shape of input data. shape cannot be empty.
+      dtype: Data type of input.
+    """
+
+    def __new__(cls, shape: List[int], dtype: tf.dtypes.DType):
+        _validate_shape_and_dtype(shape, dtype)
+        if len(shape) == 0:
+            raise ValueError("SparseFeature cannot be scalar.")
+
+        return super().__new__(cls, shape, dtype)
+
+
+class VarlenFeature(collections.namedtuple("VarlenFeature", ["shape", "dtype"])):
+    """
+    Configuration for reading and parsing a tf.SparseTensor encoded with
+    ATDS ragged feature schema.
+
+    Fields:
+      shape: Shape of input data. Use -1 as unknown dimension.
+      dtype: Data type of input.
+    """
+
+    def __new__(cls, shape: List[int], dtype: tf.dtypes.DType):
+        _validate_shape_and_dtype(shape, dtype)
+        for dim in shape:
+            if dim <= 0 and dim != -1:
+                raise ValueError(
+                    f"Each dimension should be greater than 0 or "
+                    f"-1 in VarlenFeature but found {shape}."
+                )
+
+        return super().__new__(cls, shape, dtype)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_atds_avro/__init__.py b/tests/test_atds_avro/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_atds_avro/benchmark/test_dense_tensor_atds_benchmark.py b/tests/test_atds_avro/benchmark/test_dense_tensor_atds_benchmark.py
new file mode 100644
index 000000000..62864ba6f
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_dense_tensor_atds_benchmark.py
@@ -0,0 +1,313 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDSDataset benchmark with dense tensors."""
+
+import pytest
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source_registry import (
+    TensorType,
+    SMALL_NUM_RECORDS,
+    SINGLE_PARTITION,
+)
+from tests.test_atds_avro.utils.atds_benchmark_utils import run_atds_benchmark
+
+
+@pytest.mark.benchmark(
+    group="dense_int32_0d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_int32_0d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        0,
+        tf.int32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_int32_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_int32_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        1,
+        tf.int32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_int32_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_int32_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        2,
+        tf.int32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_int64_0d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_int64_0d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        0,
+        tf.int64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_int64_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_int64_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        1,
+        tf.int64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_int64_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_int64_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        2,
+        tf.int64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_float32_0d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_float32_0d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        0,
+        tf.float32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_float32_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_float32_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        1,
+        tf.float32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_float32_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_float32_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        2,
+        tf.float32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_float64_0d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_float64_0d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        0,
+        tf.float64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_float64_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_float64_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        1,
+        tf.float64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_float64_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_float64_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        2,
+        tf.float64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_string_0d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_string_0d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        0,
+        tf.string,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_string_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_string_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        1,
+        tf.string,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_string_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_string_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        2,
+        tf.string,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_bool_0d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_bool_0d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        0,
+        tf.bool,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_bool_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_bool_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        1,
+        tf.bool,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="dense_bool_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_dense_bool_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.DENSE,
+        2,
+        tf.bool,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
diff --git a/tests/test_atds_avro/benchmark/test_sparse_tensor_atds_benchmark.py b/tests/test_atds_avro/benchmark/test_sparse_tensor_atds_benchmark.py
new file mode 100644
index 000000000..1a3dcfe65
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_sparse_tensor_atds_benchmark.py
@@ -0,0 +1,217 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDSDataset benchmark with sparse tensors."""
+
+import pytest
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source_registry import (
+    TensorType,
+    SMALL_NUM_RECORDS,
+    SINGLE_PARTITION,
+)
+from tests.test_atds_avro.utils.atds_benchmark_utils import run_atds_benchmark
+
+
+@pytest.mark.benchmark(
+    group="sparse_int32_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_int32_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        1,
+        tf.int32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_int32_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_int32_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        2,
+        tf.int32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_int64_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_int64_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        1,
+        tf.int64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_int64_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_int64_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        2,
+        tf.int64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_float32_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_float32_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        1,
+        tf.float32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_float32_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_float32_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        2,
+        tf.float32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_float64_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_float64_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        1,
+        tf.float64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_float64_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_float64_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        2,
+        tf.float64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_string_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_string_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        1,
+        tf.string,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_string_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_string_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        2,
+        tf.string,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_bool_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_bool_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        1,
+        tf.bool,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="sparse_bool_2d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_sparse_bool_2d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.SPARSE,
+        2,
+        tf.bool,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
diff --git a/tests/test_atds_avro/benchmark/test_varlen_tensor_atds_benchmark.py b/tests/test_atds_avro/benchmark/test_varlen_tensor_atds_benchmark.py
new file mode 100644
index 000000000..097b46a88
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_varlen_tensor_atds_benchmark.py
@@ -0,0 +1,121 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDSDataset benchmark with varlen tensors."""
+
+import pytest
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source_registry import (
+    TensorType,
+    SMALL_NUM_RECORDS,
+    SINGLE_PARTITION,
+)
+from tests.test_atds_avro.utils.atds_benchmark_utils import run_atds_benchmark
+
+
+@pytest.mark.benchmark(
+    group="varlen_int32_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_varlen_int32_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.VARLEN,
+        1,
+        tf.int32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="varlen_int64_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_varlen_int64_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.VARLEN,
+        1,
+        tf.int64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="varlen_float32_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_varlen_float32_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.VARLEN,
+        1,
+        tf.float32,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="varlen_float64_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_varlen_float64_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.VARLEN,
+        1,
+        tf.float64,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="varlen_string_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_varlen_string_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.VARLEN,
+        1,
+        tf.string,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
+
+
+@pytest.mark.benchmark(
+    group="varlen_bool_1d",
+)
+@pytest.mark.parametrize("batch_size", [(128)])
+def test_varlen_bool_1d(batch_size, benchmark):
+    run_atds_benchmark(
+        TensorType.VARLEN,
+        1,
+        tf.bool,
+        SMALL_NUM_RECORDS,
+        SINGLE_PARTITION,
+        batch_size,
+        benchmark,
+    )
diff --git a/tests/test_atds_avro/utils/__init__.py b/tests/test_atds_avro/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_atds_avro/utils/atds_benchmark_utils.py b/tests/test_atds_avro/utils/atds_benchmark_utils.py
new file mode 100644
index 000000000..11306b053
--- /dev/null
+++ b/tests/test_atds_avro/utils/atds_benchmark_utils.py
@@ -0,0 +1,196 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Utility functions for ATDS benchmarks."""
+
+import avro.schema
+import glob
+import json
+import os
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import (
+    SMALL_NUM_RECORDS,
+    get_canonical_name,
+    get_data_source_from_registry,
+)
+from tests.test_atds_avro.utils.generator.tensor_generator import (
+    IntTensorGenerator,
+    FloatTensorGenerator,
+    WordTensorGenerator,
+    BoolTensorGenerator,
+)
+from tests.test_atds_avro.utils.generator.sparse_tensor_generator import (
+    IntSparseTensorGenerator,
+    FloatSparseTensorGenerator,
+    WordSparseTensorGenerator,
+    BoolSparseTensorGenerator,
+    get_common_value_dist,
+)
+from tests.test_atds_avro.utils.generator.varlen_tensor_generator import (
+    IntVarLenTensorGenerator,
+    FloatVarLenTensorGenerator,
+    WordVarLenTensorGenerator,
+    BoolVarLenTensorGenerator,
+)
+
+
+from tensorflow_io.python.experimental.atds.dataset import ATDSDataset
+from tests.test_atds_avro.utils.atds_writer import ATDSWriter
+from tests.test_atds_avro.utils.benchmark_utils import benchmark_func
+
+
+_AVRO_TO_DTYPE = {
+    "int": tf.int32,
+    "long": tf.int64,
+    "float": tf.float32,
+    "double": tf.float64,
+    "boolean": tf.bool,
+    "string": tf.string,
+}
+
+_AVRO_TO_DENSE_TENSOR_GENERATOR = {
+    "int": IntTensorGenerator,
+    "long": IntTensorGenerator,
+    "float": FloatTensorGenerator,
+    "double": FloatTensorGenerator,
+    "boolean": BoolTensorGenerator,
+    "string": WordTensorGenerator,
+}
+
+_AVRO_TO_SPARSE_TENSOR_GENERATOR = {
+    "int": IntSparseTensorGenerator,
+    "long": IntSparseTensorGenerator,
+    "float": FloatSparseTensorGenerator,
+    "double": FloatSparseTensorGenerator,
+    "boolean": BoolSparseTensorGenerator,
+    "string": WordSparseTensorGenerator,
+}
+
+_AVRO_TO_VARLEN_TENSOR_GENERATOR = {
+    "int": IntVarLenTensorGenerator,
+    "long": IntVarLenTensorGenerator,
+    "float": FloatVarLenTensorGenerator,
+    "double": FloatVarLenTensorGenerator,
+    "boolean": BoolVarLenTensorGenerator,
+    "string": WordVarLenTensorGenerator,
+}
+
+
+def get_features_from_data_source(writer, data_source):
+    """Generates a dict of features from data source object
+
+    Args:
+      writer: ATDSWriter object
+      data_source: DataSource object
+    """
+    scenario = data_source.scenario
+    features = {
+        feature_name: writer._get_atds_feature(scenario[feature_name])
+        for feature_name in scenario
+    }
+    return features
+
+
+def get_dataset(
+    files,
+    features,
+    batch_size=1,
+    shuffle_buffer_size=0,
+    parallelism=os.cpu_count(),
+    interleave_parallelism=0,
+):
+    """Generates a tf.data.Dataset from a datasource
+
+    Args:
+      files: A list of files
+      features: Dict of features
+      batch_size: (Optional.) Batch size for ATDS dataset
+      shuffle_buffer_size: (Optional.) Size of the buffer used for shuffling. See
+          tensorflow_io/python/experimental/atds/dataset.py for details.
+          If unspecified, data is not shuffled.
+      parallelism: (Optional.) Number of threads to use while decoding. Defaults
+          to all available cores.
+    """
+    if interleave_parallelism == 0:
+        dataset = ATDSDataset(
+            filenames=files,
+            batch_size=batch_size,
+            features=features,
+            shuffle_buffer_size=shuffle_buffer_size,
+            num_parallel_calls=parallelism,
+        )
+    else:
+        dataset = tf.data.Dataset.list_files(files)
+        dataset = dataset.interleave(
+            lambda filename: ATDSDataset(
+                filenames=filename,
+                batch_size=batch_size,
+                features=features,
+                shuffle_buffer_size=shuffle_buffer_size,
+                num_parallel_calls=parallelism,
+            ),
+            cycle_length=interleave_parallelism,
+            num_parallel_calls=interleave_parallelism,
+        )
+    return dataset.prefetch(1)
+
+
+def _is_fully_defined_shape(shape):
+    return -1 not in shape
+
+
+def run_atds_benchmark(
+    tensor_type, rank, dtype, num_records, partitions, batch_size, benchmark
+):
+    data_source_name = get_canonical_name(
+        tensor_type, rank, dtype, num_records, partitions
+    )
+    data_source = get_data_source_from_registry(data_source_name)
+    run_atds_benchmark_from_data_source(data_source, batch_size, benchmark)
+
+
+def run_atds_benchmark_from_data_source(
+    data_source,
+    batch_size,
+    benchmark,
+    parallelism=tf.data.AUTOTUNE,
+    interleave_parallelism=0,
+    codec="null",
+    shuffle_buffer_size=0,
+    rounds=30,
+):
+    with ATDSWriter(codec=codec) as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+
+        dataset = get_dataset(
+            glob.glob(pattern),
+            get_features_from_data_source(writer, data_source),
+            batch_size=batch_size,
+            shuffle_buffer_size=shuffle_buffer_size,
+            parallelism=parallelism,
+            interleave_parallelism=interleave_parallelism,
+        )
+        count = benchmark.pedantic(
+            target=benchmark_func,
+            args=[dataset],
+            iterations=2,
+            # pytest-benchmark calculates statistic across rounds. Set it with
+            # larger number (N > 30) for test statistic.
+            rounds=rounds,
+            kwargs={},
+        )
+        assert count > 0, f"ATDS record count: {count} must be greater than 0"
diff --git a/tests/test_atds_avro/utils/atds_writer.py b/tests/test_atds_avro/utils/atds_writer.py
new file mode 100644
index 000000000..564ec1e44
--- /dev/null
+++ b/tests/test_atds_avro/utils/atds_writer.py
@@ -0,0 +1,304 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+import os
+import hashlib
+import json
+import numpy as np
+import tensorflow as tf
+
+from avro.schema import Parse as parse
+from avro.datafile import DataFileWriter
+from avro.io import DatumWriter
+
+from tests.test_atds_avro.utils.file_writer import FileWriter
+from tests.test_atds_avro.utils.generator.varlen_tensor_generator import (
+    VarLenTensorGeneratorBase,
+)
+from tensorflow_io.python.experimental.atds.features import (
+    DenseFeature,
+    SparseFeature,
+    VarlenFeature,
+)
+
+
+class ATDSWriter(FileWriter):
+    # ATDSWriter generates Avro data from input DataSource.
+    _BOOL_LIST_DTYPE = [tf.bool]
+    _INT64_LIST_DTYPE = [tf.int32, tf.int64, tf.uint32, tf.uint64]
+    _FLOAT_LIST_DTYPE = [tf.float32, tf.float64]
+    _BYTES_LIST_DTYPE = [tf.string]
+    _AVRORECORD_EXTENSION = "avro"
+    _SPARSE_INDICES_KEY = "indices"
+    _SPARSE_VALUES_KEY = "values"
+
+    _DTYPE_TO_AVRO = {
+        tf.float32: "float",
+        tf.float64: "double",
+        tf.int32: "int",
+        tf.int64: "long",
+        tf.bool: "boolean",
+        tf.string: "bytes",
+    }
+
+    _AVRO_TO_SPARSE_TENSOR = {
+        "int": "IntSparseTensor",
+        "long": "LongSparseTensor",
+        "double": "DoubleSparseTensor",
+        "float": "FloatSparseTensor",
+        "string": "StringSparseTensor",
+        "bytes": "BytesSparseTensor",
+        "boolean": "BoolSparseTensor",
+    }
+
+    def __init__(self, codec="null"):
+        """Create a new FileWriter.
+
+        This must be called by the constructors of subclasses.
+        """
+        super().__init__()
+        self._codec = codec
+
+    @property
+    def extension(self):
+        """Return the file extension of the written files."""
+        return ATDSWriter._AVRORECORD_EXTENSION
+
+    def hash_code(self):
+        """Return the hashed code of this file writer"""
+        hash_code = super().hash_code()
+
+        m = hashlib.sha256()
+        m.update(hash_code.encode())
+        m.update(self._codec.encode())
+        return m.hexdigest()
+
+    def _write_to_path_from_cached_data(self, dir_path, data_source, dataset):
+        if os.path.exists(dir_path):
+            return dir_path
+        os.makedirs(dir_path)
+
+        scenario = data_source.scenario
+        schema = parse(self.scenario_to_avro_schema(scenario))
+
+        filenames_to_num_records = self._get_filenames_to_num_records(data_source)
+        iterator = iter(dataset)
+        for filename in sorted(filenames_to_num_records):
+            file_path = os.path.join(dir_path, filename)
+            num_records = filenames_to_num_records[filename]
+
+            with open(file_path, "wb") as out:
+                file_writer = DataFileWriter(
+                    out, DatumWriter(), schema, codec=self._codec
+                )
+                for _ in range(num_records):
+                    features = {}
+                    record = iterator.get_next()
+                    for feature_name in record:
+                        generator = data_source.scenario[feature_name]
+                        # tf.Example only supports tf.float32, tf.int64, and tf.string
+                        # For other dtypes, cast feature into its original dtype.
+                        actual = tf.cast(record[feature_name], generator.spec.dtype)
+                        self._add_tensor_to_features(
+                            generator, feature_name, actual, features
+                        )
+                    file_writer.append(features)
+                file_writer.close()
+
+    def _write_to_path(self, dir_path, data_source):
+        """Generate benchmark data and write the data under the given path.
+
+        Args:
+          dir_path: A str path to write files to.
+          data_source: A DataSource objects.
+
+        Raises:
+          NotImplementedError: If subclass does not overload the function.
+        """
+        scenario = data_source.scenario
+        schema = parse(self.scenario_to_avro_schema(scenario))
+
+        filenames_to_num_records = self._get_filenames_to_num_records(data_source)
+        for filename in sorted(filenames_to_num_records):
+            file_path = os.path.join(dir_path, filename)
+            num_records = filenames_to_num_records[filename]
+
+            with open(file_path, "wb") as out:
+                file_writer = DataFileWriter(
+                    out, DatumWriter(), schema, codec=self._codec
+                )
+                for _ in range(num_records):
+                    features = {}
+                    for feature_name in scenario:
+                        generator = scenario[feature_name]
+                        tensor = generator.generate()
+                        self._add_tensor_to_features(
+                            generator, feature_name, tensor, features
+                        )
+                    file_writer.append(features)
+                file_writer.close()
+
+    def _add_tensor_to_features(self, generator, feature_name, tensor, features):
+        spec = generator.spec
+        if isinstance(spec, tf.TensorSpec):
+            self._add_dense_tensor_to_features(feature_name, tensor, features)
+        elif isinstance(spec, tf.SparseTensorSpec):
+            if issubclass(generator.get_generator_cls(), VarLenTensorGeneratorBase):
+                self._add_dense_tensor_to_features(
+                    feature_name, tf.sparse.to_dense(tensor), features
+                )
+            else:
+                self._add_sparse_tensor_to_features(feature_name, tensor, features)
+        else:
+            raise TypeError(f"Spec {spec} is not supported in ATDSWriter")
+
+    def scenario_to_avro_schema(self, scenario):
+        """
+        Goes through a scenario to convert it to an avro schema
+
+        """
+        schema = {"type": "record", "name": "row", "fields": []}
+        for feature_name in scenario:
+            generator = scenario[feature_name]
+            spec = generator.spec
+            if isinstance(spec, tf.TensorSpec):
+                self._add_dense_feature_schema(feature_name, spec, schema)
+            elif isinstance(spec, tf.SparseTensorSpec):
+                if issubclass(generator.get_generator_cls(), VarLenTensorGeneratorBase):
+                    self._add_dense_feature_schema(feature_name, spec, schema)
+                else:
+                    self._add_sparse_feature_schema(feature_name, spec, "long", schema)
+        schema_str = json.dumps(schema)
+        return schema_str
+
+    def _map_tensor_dtype_to_avro_dtype(self, dtype):
+        """This function maps tensor dtype into the python type suppored by avro."""
+        if dtype in ATDSWriter._DTYPE_TO_AVRO:
+            return ATDSWriter._DTYPE_TO_AVRO[dtype]
+        else:
+            raise TypeError(f"Type {dtype} is not supported in Avro.")
+
+    def _add_dense_feature_schema(self, name, spec, schema):
+        schema["fields"].append(
+            {
+                "name": name,
+                "type": self._add_dense_feature_type(
+                    spec.dtype, len(spec.shape.as_list())
+                ),
+            }
+        )
+
+    def _add_dense_feature_type(self, dtype, rank):
+        # if scalar then convert tf type to python type name
+        avro_type = self._map_tensor_dtype_to_avro_dtype(dtype)
+        if rank == 0:
+            return avro_type
+        else:
+            return {
+                "type": "array",
+                "items": self._add_dense_feature_type(dtype, rank - 1),
+            }
+
+    def _infer_sparse_tensor_type(self, dtype):
+        value_avro_type = self._map_tensor_dtype_to_avro_dtype(dtype)
+        return ATDSWriter._AVRO_TO_SPARSE_TENSOR[value_avro_type]
+
+    def _add_sparse_feature_schema(self, name, spec, indices_avro_type, schema):
+        schema["fields"].append(
+            {
+                "name": name,
+                "type": {
+                    "type": "record",
+                    "name": name + "_" + self._infer_sparse_tensor_type(spec.dtype),
+                    "fields": self._add_sparse_feature_fields(
+                        spec.dtype, indices_avro_type, len(spec.shape.as_list())
+                    ),
+                },
+            }
+        )
+
+    def _add_sparse_feature_fields(self, dtype, indices_avro_type, rank):
+        value_avro_type = self._map_tensor_dtype_to_avro_dtype(dtype)
+        fields = []
+        for dim in range(rank):
+            indices_name_at_dim = ATDSWriter._SPARSE_INDICES_KEY + str(dim)
+            fields.append(
+                {
+                    "name": indices_name_at_dim,
+                    "type": {"type": "array", "items": indices_avro_type},
+                }
+            )
+        value_field = {
+            "name": ATDSWriter._SPARSE_VALUES_KEY,
+            "type": {"type": "array", "items": value_avro_type},
+        }
+        fields.append(value_field)
+        return fields
+
+    def _get_flat_value(self, tensor):
+        rank = len(tensor.shape.as_list())
+        value = tensor.numpy()
+        if (
+            tensor.dtype == tf.string and rank == 0
+        ):  # There is no tolist() method for bytes or string
+            return value
+        return value.tolist()
+
+    def _add_dense_tensor_to_features(self, name, tensor, features):
+        features[name] = self._get_flat_value(tensor)
+
+    def _add_sparse_tensor_to_features(self, name, tensor, features):
+        rank = len(tensor.shape.as_list())
+        indices = tensor.indices.numpy()  # indices tensor must be a 2D array
+        # Split indices array along the second dimension so that the split arrays
+        # contain the indices for separate dimension. For example,
+        # indices = [[0, 1], [2, 3], [4, 5]] =>
+        #   indices_at_dim0 = [[0], [2], [4]] and
+        #   indices_at_dim1 = [[1], [3], [5]].
+        split_indices = np.split(indices, rank, axis=1)
+        features[name] = {}
+        for dim in range(rank):
+            indices_name_at_dim = ATDSWriter._SPARSE_INDICES_KEY + str(dim)
+            # convert indices to 1d array
+            features[name][indices_name_at_dim] = split_indices[dim].flatten().tolist()
+
+        features[name][ATDSWriter._SPARSE_VALUES_KEY] = self._get_flat_value(
+            tensor.values
+        )
+
+    def _get_atds_feature(self, generator):
+        """Build tf.Example parsing config
+
+        Args:
+          generator: A TensorGenerator, SparseTensorGenerator, or VarLenTensorGenerator for generating data.
+
+        Returns:
+          DenseFeature, SparseFeature, or VarlenFeature
+
+        Raises:
+          TypeError: if spec is not tf.TensorSpec or tf.SparseTensorSpec.
+        """
+        spec = generator.spec
+        if isinstance(spec, tf.TensorSpec):
+            return DenseFeature(shape=spec.shape.as_list(), dtype=spec.dtype)
+        elif isinstance(spec, tf.SparseTensorSpec):
+            if issubclass(generator.get_generator_cls(), VarLenTensorGeneratorBase):
+                atds_shape = [
+                    -1 if dim == None else dim for dim in spec.shape.as_list()
+                ]
+                return VarlenFeature(shape=atds_shape, dtype=spec.dtype)
+            else:
+                return SparseFeature(shape=spec.shape.as_list(), dtype=spec.dtype)
+        else:
+            raise TypeError(f"Spec {spec} is not supported in ATDSWriter.")
diff --git a/tests/test_atds_avro/utils/benchmark_utils.py b/tests/test_atds_avro/utils/benchmark_utils.py
new file mode 100644
index 000000000..6893f717b
--- /dev/null
+++ b/tests/test_atds_avro/utils/benchmark_utils.py
@@ -0,0 +1,125 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Utility functions for benchmarks."""
+
+import os
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source_registry import (
+    get_canonical_name,
+    get_data_source_from_registry,
+)
+from tests.test_atds_avro.utils.generator.tensor_generator import (
+    IntTensorGenerator,
+    FloatTensorGenerator,
+    BoolTensorGenerator,
+)
+from tests.test_atds_avro.utils.generator.sparse_tensor_generator import (
+    IntSparseTensorGenerator,
+    ValueDistribution,
+)
+from tests.test_atds_avro.utils.generator.varlen_tensor_generator import (
+    WordVarLenTensorGenerator,
+    DimensionDistribution,
+)
+from tests.test_atds_avro.utils.tf_record_writer import TFRecordWriter
+
+MIXED_TYPES_SCENARIO = {
+    # simulate scalar int as label.
+    "int32_0d_dense": IntTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.int32)),
+    # simulate large sparse categorical ids.
+    "int64_1d_sparse": IntSparseTensorGenerator(
+        tf.SparseTensorSpec(shape=[50000], dtype=tf.int32),
+        ValueDistribution.SINGLE_VALUE,
+    ),
+    # simulate 1d float embedding input.
+    "float32_1d_varlen": FloatTensorGenerator(
+        tf.TensorSpec(shape=[128], dtype=tf.float32)
+    ),
+    # simulate 2d images
+    "float64_2d_dense": FloatTensorGenerator(
+        tf.TensorSpec(shape=[32, 32], dtype=tf.float64)
+    ),
+    # simulate a sentence with varlen words.
+    "string_1d_sparse": WordVarLenTensorGenerator(
+        tf.SparseTensorSpec(shape=[None], dtype=tf.string),
+        DimensionDistribution.LARGE_DIM,
+    ),
+    # simulate concatenated bool wide features.
+    "bool_1d_dense": BoolTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.bool)),
+}
+
+
+def benchmark_func(dataset):
+    count = 0
+    for _ in dataset:
+        count += 1
+    return count
+
+
+def create_tf_record_dataset(
+    filenames, parse_function, batch_size, file_parallelism=None, shuffle_buffer_size=0
+):
+    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=file_parallelism)
+    if shuffle_buffer_size > 0:
+        dataset = dataset.shuffle(shuffle_buffer_size)
+    dataset = dataset.batch(batch_size)
+    dataset = dataset.map(parse_function)
+    dataset = dataset.prefetch(1)
+    return dataset
+
+
+def run_tf_record_benchmark(
+    tensor_type, rank, dtype, num_records, partitions, batch_size, benchmark
+):
+    data_source_name = get_canonical_name(
+        tensor_type, rank, dtype, num_records, partitions
+    )
+    data_source = get_data_source_from_registry(data_source_name)
+    run_tf_record_benchmark_from_data_source(data_source, batch_size, benchmark)
+
+
+def run_tf_record_benchmark_from_data_source(
+    data_source,
+    batch_size,
+    benchmark,
+    file_parallelism=None,
+    shuffle_buffer_size=0,
+    rounds=100,
+):
+    with TFRecordWriter() as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+        filenames = tf.data.Dataset.list_files(pattern)
+        parse_function = writer.create_tf_example_parser_fn(
+            data_source, with_batch=True
+        )
+        dataset = create_tf_record_dataset(
+            filenames,
+            parse_function,
+            batch_size,
+            file_parallelism=file_parallelism,
+            shuffle_buffer_size=shuffle_buffer_size,
+        )
+        count = benchmark.pedantic(
+            target=benchmark_func,
+            args=[dataset],
+            iterations=2,
+            # pytest-benchmark calculates statistic across rounds. Set it with
+            # larger number (N > 30) for test statistic.
+            rounds=rounds,
+            kwargs={},
+        )
+        assert count > 0, f"TF record count: {count} must be greater than 0"
diff --git a/tests/test_atds_avro/utils/data_source.py b/tests/test_atds_avro/utils/data_source.py
new file mode 100644
index 000000000..ac4b23904
--- /dev/null
+++ b/tests/test_atds_avro/utils/data_source.py
@@ -0,0 +1,106 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""DataSource"""
+
+import hashlib
+
+from tests.test_atds_avro.utils.hash_util import int_to_bytes
+
+
+class DataSource:
+    """DataSource describes properties in a benchmark data.
+
+    DataSource contains the metadata of a benchmark data including the total
+    number of records, the number of partitioned files and a scenario. A scenario
+    defines the features used in benchmark with the feature name, tensor spec,
+    and Generator used to generate the value. DataSource can be consumed by
+    FileWriter to generate the benchmark data descrbied by itself.
+    """
+
+    def __init__(self, scenario, num_records, partitions=1):
+        """Create a new DataSource.
+
+        Args:
+          scenario: A dict with feature name as key and Generator as value.
+            Scenario defines the features used in benchmark. Generator contains
+            tensor spec and the distribution to generate the tensor value.
+          num_records: An int defines total number of records in this data.
+          partitions: An int defines the number of partitioned files in this data.
+            Each partition can have different number of records. However, the total
+            number of records must be num_records.
+
+        Raises:
+          ValueError: If num_records or partitions is negative or partitions is
+            zero but num_records is greater than zero.
+        """
+        if num_records < 0:
+            raise ValueError(
+                "Number of records in DataSource must not be negative"
+                f" but got {num_records}."
+            )
+        if partitions < 0:
+            raise ValueError(
+                "Partition number in DataSource must not be negative"
+                f" but got {partitions}."
+            )
+        if partitions == 0 and num_records > 0:
+            raise ValueError(
+                "Cannot have zero partitions in DataSource with"
+                f"non-zero num_records ({num_records})."
+            )
+
+        self._scenario = scenario
+        self._num_records = num_records
+        self._partitions = partitions
+
+    @property
+    def scenario(self):
+        """Return the scenario of the benchmark data.
+
+        The scenario is a dict with feature name as key and Generator as value.
+        """
+        return self._scenario
+
+    @property
+    def num_records(self):
+        """Return the total number of records in this data as int."""
+        return self._num_records
+
+    @property
+    def partitions(self):
+        """Return the number of partitioned files in this data as int."""
+        return self._partitions
+
+    def hash_code(self):
+        """Return the consistent hashed code of the DataSource in hex str.
+
+        The hashed code can be used as the path for data source cache.
+
+        Returns:
+          A hex str generated by hashing algorithm.
+        """
+        m = hashlib.sha256()
+        # Step 1: hash sorted scenario dict
+        for name in sorted(self.scenario):
+            generator = self.scenario[name]
+            m.update(name.encode())
+            m.update(generator.hash_code().encode())
+
+        # Step 2: hash num_records and partitions
+        m.update(int_to_bytes(self.num_records))
+        m.update(int_to_bytes(self.partitions))
+
+        # Step 3: return hashed str in hex.
+        return m.hexdigest()
diff --git a/tests/test_atds_avro/utils/data_source_registry.py b/tests/test_atds_avro/utils/data_source_registry.py
new file mode 100644
index 000000000..a9296c9d5
--- /dev/null
+++ b/tests/test_atds_avro/utils/data_source_registry.py
@@ -0,0 +1,220 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Global DataSource registry with predefined DataSource used in benchmark"""
+
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.framework import registry
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.generator.tensor_generator import (
+    IntTensorGenerator,
+    FloatTensorGenerator,
+    WordTensorGenerator,
+    BoolTensorGenerator,
+)
+from tests.test_atds_avro.utils.generator.sparse_tensor_generator import (
+    IntSparseTensorGenerator,
+    FloatSparseTensorGenerator,
+    WordSparseTensorGenerator,
+    BoolSparseTensorGenerator,
+    ValueDistribution,
+)
+from tests.test_atds_avro.utils.generator.varlen_tensor_generator import (
+    DimensionDistribution,
+    IntVarLenTensorGenerator,
+    FloatVarLenTensorGenerator,
+    WordVarLenTensorGenerator,
+    BoolVarLenTensorGenerator,
+    DIM_DISTRIBUTION_TO_RANGE,
+)
+
+SMALL_NUM_RECORDS = 1024
+LARGE_NUM_RECORDS = 12 * 1024
+
+SINGLE_PARTITION = 1
+MULTIPLE_PARTITION = 6
+
+FILE_PARALLELISM = 2
+
+_data_source_registry = registry.Registry("data source")
+
+# Data source name with all types of tensors.
+ALL_TYPES_DATA_SOURCE_NAME = "all_types_data_source_name"
+
+
+class TensorType(Enum):
+    """Type of tensors used in benchmark"""
+
+    DENSE = 1
+    SPARSE = 2
+    VARLEN = 3
+
+
+def get_canonical_name(tensor_type, rank, dtype, num_records, partitions):
+    """Get canonical name which is used as key in global data source registry.
+
+    Args:
+      tensor_type: A TensorType enum.
+      rank: An int to represent the rank of tensor.
+      dtype: tf.dtypes.DType.
+      num_records: Number of records.
+      partitions: Number of file partitions.
+
+    Returns:
+      The canonical name to represent such data source in registry.
+
+    Raises:
+      TypeError: If tensor_type is not TensorType.
+      ValueError: if rank is unknown or negative.
+    """
+    if not isinstance(tensor_type, TensorType):
+        raise TypeError(
+            "Input tensor_type must be a TensorType enum" f" but found {tensor_type}"
+        )
+
+    if rank is None or rank < 0:
+        raise ValueError("Input rank must not be None or negative. Found {rank}.")
+
+    return f"{tensor_type.name}_{rank}D_{dtype.name}_{num_records}_{partitions}"
+
+
+def get_data_source_registry():
+    """Get the global data source registry. If the registry is empty,
+    initialize the registry with predefined data sources."""
+    global _data_source_registry
+    if not _data_source_registry.list():
+        _init_data_source_registry(_data_source_registry)
+    return _data_source_registry
+
+
+def _init_data_source_registry(registry):
+    shapes = [[], [128], [64, 64]]
+    dtypes = [tf.int32, tf.int64, tf.float32, tf.float64, tf.string, tf.bool]
+
+    # Register data source with dense tensors.
+    dense_generators = [
+        IntTensorGenerator,
+        IntTensorGenerator,
+        FloatTensorGenerator,
+        FloatTensorGenerator,
+        WordTensorGenerator,
+        BoolTensorGenerator,
+    ]
+    for cls, dtype in zip(dense_generators, dtypes):
+        for shape in shapes:
+            name = get_canonical_name(
+                TensorType.DENSE,
+                rank=len(shape),
+                dtype=dtype,
+                num_records=SMALL_NUM_RECORDS,
+                partitions=SINGLE_PARTITION,
+            )
+            generator = cls(tf.TensorSpec(shape=shape, dtype=dtype))
+            registry.register(
+                candidate=DataSource(
+                    scenario={name: generator},
+                    num_records=SMALL_NUM_RECORDS,
+                    partitions=SINGLE_PARTITION,
+                ),
+                name=name,
+            )
+
+    # Register data source with sparse tensors.
+    sparse_generators = [
+        IntSparseTensorGenerator,
+        IntSparseTensorGenerator,
+        FloatSparseTensorGenerator,
+        FloatSparseTensorGenerator,
+        WordSparseTensorGenerator,
+        BoolSparseTensorGenerator,
+    ]
+    for cls, dtype in zip(sparse_generators, dtypes):
+        for shape in shapes:
+            if len(shape) == 0:
+                # Skip scalars for sparse tensors
+                continue
+            name = get_canonical_name(
+                TensorType.SPARSE,
+                rank=len(shape),
+                dtype=dtype,
+                num_records=SMALL_NUM_RECORDS,
+                partitions=SINGLE_PARTITION,
+            )
+            value_dist = ValueDistribution.SMALL_NUM_VALUE  # 5 to 10 elements
+            generator = cls(tf.SparseTensorSpec(shape=shape, dtype=dtype), value_dist)
+            registry.register(
+                candidate=DataSource(
+                    scenario={name: generator},
+                    num_records=SMALL_NUM_RECORDS,
+                    partitions=SINGLE_PARTITION,
+                ),
+                name=name,
+            )
+
+    # Register data source with varlen tensors.
+    varlen_generators = [
+        IntVarLenTensorGenerator,
+        IntVarLenTensorGenerator,
+        FloatVarLenTensorGenerator,
+        FloatVarLenTensorGenerator,
+        WordVarLenTensorGenerator,
+        BoolVarLenTensorGenerator,
+    ]
+    varlen_shapes = [[None]]
+    for cls, dtype in zip(varlen_generators, dtypes):
+        for shape in varlen_shapes:
+            rank = len(shape)
+            name = get_canonical_name(
+                TensorType.VARLEN,
+                rank=rank,
+                dtype=dtype,
+                num_records=SMALL_NUM_RECORDS,
+                partitions=SINGLE_PARTITION,
+            )
+            dim_dist = DimensionDistribution.LARGE_DIM  # dim is between 5 to 10
+
+            generator = cls(tf.SparseTensorSpec(shape=shape, dtype=dtype), dim_dist)
+            registry.register(
+                candidate=DataSource(
+                    scenario={name: generator},
+                    num_records=SMALL_NUM_RECORDS,
+                    partitions=SINGLE_PARTITION,
+                ),
+                name=name,
+            )
+
+    # Registry data source with all types of tensors
+    scenario = {}
+    for key in registry.list():
+        data_source = registry.lookup(key)
+        scenario = {**scenario, **data_source.scenario}
+
+    # small num records for memory leak check
+    registry.register(
+        candidate=DataSource(
+            scenario=scenario,
+            num_records=SMALL_NUM_RECORDS,
+            partitions=MULTIPLE_PARTITION,
+        ),
+        name=ALL_TYPES_DATA_SOURCE_NAME,
+    )
+
+
+def get_data_source_from_registry(name):
+    registry = get_data_source_registry()
+    return registry.lookup(name)
diff --git a/tests/test_atds_avro/utils/file_writer.py b/tests/test_atds_avro/utils/file_writer.py
new file mode 100644
index 000000000..72cabdf56
--- /dev/null
+++ b/tests/test_atds_avro/utils/file_writer.py
@@ -0,0 +1,183 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""FileWriter"""
+
+import abc
+import os
+import shutil
+import hashlib
+import tempfile
+import tensorflow as tf
+
+TF_IO_BENCHMARK_DATA_CACHE = "TF_IO_BENCHMARK_DATA_CACHE"
+
+
+class FileWriter(metaclass=abc.ABCMeta):
+    """Base class for file writer.
+
+    FileWriter consumes a DataSource and generates benchmark data as described
+    in the DataSource. DataSource contains benchmark data metadata such as value
+    generators, total number of records, and number of partitioned files.
+
+    SubClass must implement `_write_to_path` function to write data under the
+    given path and follow the spec described in DataSource.
+    """
+
+    def __init__(self):
+        """Create a new FileWriter.
+
+        This must be called by the constructors of subclasses.
+        """
+        self._data_cache_path = os.getenv(TF_IO_BENCHMARK_DATA_CACHE, None)
+        self._dir_path = self._data_cache_path
+
+    def __enter__(self):
+        """Enter a context to create dir_path for file generation."""
+        if not self._data_cache_path:
+            self._dir_path = tempfile.mkdtemp()
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """Clean up dir_path after exiting the context."""
+        if not self._data_cache_path:
+            shutil.rmtree(self._dir_path, ignore_errors=True)
+        self._dir_path = None
+
+    def write(self, data_source):
+        """Generate benchmark data and return the path to the generated files.
+
+        Args:
+          data_source: A DataSource object that describes the properties of
+            the benchmark data.
+
+        Returns:
+          A str path to the generated files.
+        """
+        # Use data source hash code as data source folder name.
+        data_source_path = data_source.hash_code()
+        # Use writer hash code as the writer folder path.
+        writer_dir = self.hash_code()
+
+        if self._data_cache_path is not None:
+            cached_dataset = None
+            writer_path = os.path.join(
+                self._data_cache_path, data_source_path, writer_dir
+            )
+            if os.path.exists(writer_path):
+                return writer_path
+            from tests.test_atds_avro.utils.tf_record_writer import (
+                TFRecordWriter,
+            )
+
+            with TFRecordWriter() as tf_writer:
+                tf_record_cache_dir = os.path.join(
+                    self._data_cache_path, data_source_path, tf_writer.hash_code()
+                )
+                if not os.path.exists(tf_record_cache_dir):
+                    os.makedirs(tf_record_cache_dir, exist_ok=True)
+                    tf_writer._write_to_path(tf_record_cache_dir, data_source)
+                parser_fn = tf_writer.create_tf_example_parser_fn(data_source)
+                pattern = os.path.join(tf_record_cache_dir, f"*.{tf_writer.extension}")
+                cached_dataset = tf.data.Dataset.list_files(pattern, shuffle=False)
+                cached_dataset = tf.data.TFRecordDataset(cached_dataset)
+                cached_dataset = cached_dataset.map(parser_fn)
+                self._write_to_path_from_cached_data(
+                    writer_path, data_source, cached_dataset
+                )
+                return writer_path
+        else:
+            writer_path = os.path.join(self._dir_path, data_source_path, writer_dir)
+            if not os.path.exists(writer_path):
+                os.makedirs(writer_path, exist_ok=True)
+                self._write_to_path(writer_path, data_source)
+            return writer_path
+
+    @abc.abstractmethod
+    def _write_to_path(self, dir_path, data_source):
+        """Generate benchmark data and write the data under the given path.
+
+        Args:
+          dir_path: A str path to write files to.
+          data_source: A DataSource object.
+
+        Raises:
+          NotImplementedError: If subclass does not overload the function.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _write_to_path_from_cached_data(self, dir_path, data_source, dataset):
+        """Write the given dataset to the given path.
+
+        Args:
+          dir_path: A str path to write dataset to.
+          data_source: A DataSource object.
+          dataset: Cached dataset containing data to write.
+
+        Raises:
+          NotImplementedError: If subclass does not overload the function.
+        """
+        raise NotImplementedError
+
+    @property
+    @abc.abstractmethod
+    def extension(self):
+        """Returns the extension of the filename e.g. tfrecords, avro, etc.
+
+        Raises:
+          NotImplementedError: If subclass does not overload the function.
+        """
+        raise NotImplementedError
+
+    def hash_code(self):
+        """Return the hashed code of this file writer.
+
+        The hashed code is used to create the folder that the writer can write
+        data to. It is useful for benchmark data cache.
+
+        Returns:
+          The hashed code of the writer in hex str.
+        """
+        m = hashlib.sha256()
+        # Hash the instance class name by default. Subclass can overload the
+        # function to support customized hashing logic for its own state.
+        m.update(self.__class__.__name__.encode())
+        return m.hexdigest()
+
+    def _get_filenames_to_num_records(self, data_source):
+        """Returns a dict mapping filenames to the number of records in that file.
+
+        Args:
+          data_source: A DataSource describing the data to be written.
+
+        Returns:
+          A dict mapping filename to number of records in that file.
+        """
+        filenames_to_num_records = {}
+        partitions = data_source.partitions
+        record_per_partition = data_source.num_records // partitions
+        remaining = data_source.num_records % partitions
+
+        partition_length = len(str(partitions))
+        for file_index in range(partitions):
+            # Add leading zero to index_name e.g. 0001
+            index_name = str(file_index).zfill(partition_length)
+            filename = f"part-{index_name}.{self.extension}"
+
+            num_records = record_per_partition
+            if remaining and file_index < remaining:
+                num_records = num_records + 1
+            filenames_to_num_records[filename] = num_records
+        return filenames_to_num_records
diff --git a/tests/test_atds_avro/utils/generator/__init__.py b/tests/test_atds_avro/utils/generator/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_atds_avro/utils/generator/generator_base.py b/tests/test_atds_avro/utils/generator/generator_base.py
new file mode 100644
index 000000000..cdd038ec1
--- /dev/null
+++ b/tests/test_atds_avro/utils/generator/generator_base.py
@@ -0,0 +1,151 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Generator"""
+
+import abc
+import hashlib
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.hash_util import int_to_bytes
+
+
+class Generator(metaclass=abc.ABCMeta):
+    """Base class for tensor generator.
+
+    Generator generates tf.Tensor, tf.sparse.SparseTensor, or
+    tf.RaggedTensor based on the input spec. The spec is one of the
+    tf.TensorSpec, tf.SparseTensorSpec or tf.RaggedTensorSpec.
+    """
+
+    _SUPPORTED_SPEC = (tf.TensorSpec, tf.SparseTensorSpec, tf.RaggedTensorSpec)
+
+    # The value used in hashing None dimension.
+    _NONE_HASH_VALUE = "None"
+    _SHAPE_HASH_VALUE = "shape"
+    _DTYPE_HASH_VALUE = "dtype"
+
+    def __init__(self, spec):
+        """Create a new Generator.
+
+        This must be called by the constructors of subclasses.
+
+        Args:
+          spec: A tf.TensorSpec, tf.SparseTensorSpec or tf.RaggedTensorSpec that
+            describes the tensor generated by this generator.
+
+        Raises:
+          TypeError: If spec is not one of the tf.TensorSpec, tf.SparseTensorSpec
+            or tf.RaggedTensorSpec.
+        """
+        if not isinstance(spec, Generator._SUPPORTED_SPEC):
+            raise TypeError(
+                "Generator can only accept spec in one of the "
+                f"{Generator._SUPPORTED_SPEC} types but found {spec}."
+            )
+        self._spec = spec
+
+    @property
+    def spec(self):
+        """Return the spec that the generator will follow.
+
+        The spec is one of the tf.TensorSpec, tf.SparseTensorSpec, and
+        tf.RaggedTensorSpec.
+        """
+        return self._spec
+
+    @abc.abstractmethod
+    def generate(self):
+        """Generate tensor value based on the spec of the generator.
+
+        The output tensor must be compatible with the spec.
+
+        When the spec is tf.TensorSpec, tf.SparseTensorSpec, or
+        tf.RaggedTensorSpec, this function will return tf.Tensor,
+        tf.sparse.SparseTensor, or tf.RaggedTensor respectively.
+
+        Returns:
+          tf.Tensor, tf.sparse.SparseTensor, or tf.RaggedTensor
+
+        Raises:
+          NotImplementedError: If subclass does not overload the function.
+        """
+        raise NotImplementedError
+
+    def get_generator_cls(self):
+        """Get the generator class which this object represents."""
+        return type(self)
+
+    def hash_code(self):
+        """Return the hasehd code of this Generator in hex str.
+
+        The hashed code helps create the data source cache path on disk.
+
+        Returns:
+          A hashed hex str of this Generator.
+        """
+        m = hashlib.sha256()
+
+        # Step 1: hash class name
+        classname = self.get_generator_cls().__name__
+        m.update(classname.encode())
+
+        # Step 2: hash input spec using default hash function in tf.TypeSpec
+        if isinstance(self.spec, tf.TensorSpec):
+            m.update(tf.TensorSpec.__name__.encode())
+        elif isinstance(self.spec, tf.SparseTensorSpec):
+            m.update(tf.SparseTensorSpec.__name__.encode())
+        elif isinstance(self.spec, tf.RaggedTensorSpec):
+            m.update(tf.RaggedTensorSpec.__name__.encode())
+            m.update(b"ragged_rank")
+            m.update(int_to_bytes(self.spec.ragged_rank))
+            m.update(b"row_splits_dtype")
+            m.update(int_to_bytes(hash(self.spec.row_splits_dtype)))
+            m.update(b"flat_values_spec")
+            if self.spec.flat_values_spec:
+                m.update(int_to_bytes(hash(self.spec.flat_values_spec)))
+        else:
+            raise TypeError(
+                f"Generator.spec is not one of {Generator._SUPPORTED_SPEC}."
+            )
+
+        self._hash_shape(m, self.spec.shape)
+        self._hash_dtype(m, self.spec.dtype)
+
+        # Step 3" return hashed hex str.
+        return m.hexdigest()
+
+    def _hash_shape(self, m, shape):
+        """Hash shape info with a hash algorithm in hashlib.
+
+        Args:
+          m: A hashlib algorithm e.g. hashlib.sha256().
+          shape: A list of python int.
+        """
+        m.update(Generator._SHAPE_HASH_VALUE.encode())
+        for dim in shape:
+            if dim is None:
+                m.update(Generator._NONE_HASH_VALUE.encode())
+            else:
+                m.update(int_to_bytes(dim))
+
+    def _hash_dtype(self, m, dtype):
+        """Hash TensorFlow dtype with a hash algorithm in hashlib.
+
+        Args:
+          m: A hashlib algorithm e.g. hashlib.sha256().
+          dtype: TensorFlow dtype e.g. tf.float32.
+        """
+        m.update(Generator._DTYPE_HASH_VALUE.encode())
+        m.update(int_to_bytes(hash(dtype)))
diff --git a/tests/test_atds_avro/utils/generator/sparse_tensor_generator.py b/tests/test_atds_avro/utils/generator/sparse_tensor_generator.py
new file mode 100644
index 000000000..d693f476c
--- /dev/null
+++ b/tests/test_atds_avro/utils/generator/sparse_tensor_generator.py
@@ -0,0 +1,292 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SparseTensorGenerator"""
+
+import enum
+import random
+import hashlib
+import numpy as np
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.generator.generator_base import (
+    Generator,
+)
+from tests.test_atds_avro.utils.hash_util import int_to_bytes
+
+
+class ValueDistribution(enum.Enum):
+    SINGLE_VALUE = 1
+    SMALL_NUM_VALUE = 2  # 5 to 9 elements
+    LARGE_NUM_VALUE = 3  # 100 to 999 elements
+
+
+_VALUE_DISTRIBUTION_TO_RANGE = {
+    ValueDistribution.SINGLE_VALUE: (1, 2),
+    ValueDistribution.SMALL_NUM_VALUE: (5, 10),
+    ValueDistribution.LARGE_NUM_VALUE: (100, 1000),
+}
+
+
+def get_common_value_dist():
+    # Assume tensor is one-hot since this is a common use case
+    return ValueDistribution.SINGLE_VALUE
+
+
+class SparseTensorGeneratorBase(Generator):
+    """Base of SparseTensorGenerator that generates tf.sparse.SparseTensor."""
+
+    def __init__(self, spec, num_values):
+        """Create a new SparseTensorGeneratorBase.
+
+        This must be called by the constructors of subclasses e.g.
+        IntSparseTensorGenerator, FloatSparseTensorGenerator, etc.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          num_values: A value distribution or an int specifying number of non-zero values in the sparse tensor.
+
+        Raises:
+          TypeError: If spec is not tf.SparseTensorSpec.
+        """
+        super().__init__(spec)
+
+        if not isinstance(spec, tf.SparseTensorSpec):
+            raise TypeError(
+                "Input spec must be a tf.SparseTensorSpec in SparseTensorGenerator "
+                f"but found {spec}"
+            )
+
+        if not isinstance(num_values, (int, ValueDistribution)):
+            raise TypeError(
+                f"num_values must be an int or ValueDistribution but found {num_values}"
+            )
+
+        self._value_dist = None
+        if isinstance(num_values, int):
+            self._num_values = num_values
+        elif num_values == ValueDistribution.SINGLE_VALUE:
+            self._num_values = 1
+        else:
+            self._value_dist = num_values
+
+    def _get_num_values(self, shape):
+        ret = 0
+        if self._value_dist == None:
+            ret = self._num_values
+        else:
+            ret = np.random.randint(*_VALUE_DISTRIBUTION_TO_RANGE[self._value_dist])
+        return min(ret, np.prod(shape))
+
+    def _get_shape(self):
+        # If spec shape is None, generate shape with random rank between 1 and 5
+        shape = (
+            [None] * np.random.randint(1, 5)
+            if self.spec.shape == None
+            else self.spec.shape
+        )
+        # Populate unknown dimensions with random int between 1 and 10
+        return [dim if dim != None else np.random.randint(1, 10) for dim in shape]
+
+    def _generate_random_coords(self, num_values, shape):
+        if num_values == 0:
+            return np.empty((0, len(shape)), dtype=np.int64)
+        indices = sorted(random.sample(range(np.prod(shape)), num_values))
+        return [self._int_to_coord(idx, shape) for idx in indices]
+
+    def _int_to_coord(self, idx, shape):
+        """Convert an integer to its corresponding location in a tensor, in row-major order.
+        For example, in the 2d tensor
+        [[0, 1, 2]
+         [3, 4, 5]]
+        The index 3 will return [1, 0] (i.e. the entry in the second row, first column)
+        """
+        rank = len(shape)
+        ret = [0] * rank
+        for dim in range(rank):
+            val = idx % (shape[rank - dim - 1])
+            ret[rank - dim - 1] = val
+            idx = (idx - val) // shape[rank - dim - 1]
+        return ret
+
+    def hash_code(self):
+        hash_code = super().hash_code()
+
+        m = hashlib.sha256()
+        m.update(hash_code.encode())
+
+        # Hash input num_values
+        if self._value_dist:
+            m.update(ValueDistribution.__name__.encode())
+            m.update(self._value_dist.name.encode())  # num_values is enum
+        else:
+            m.update(int_to_bytes(self._num_values))  # num_values is constant int.
+        return m.hexdigest()
+
+
+class IntSparseTensorGenerator(SparseTensorGeneratorBase):
+    """IntSparseTensorGenerator generates tf.sparse.SparseTensor with dtype in tf.int32 or tf.int64"""
+
+    def __init__(self, spec, num_values):
+        """Create a new IntSparseTensorGenerator
+
+        With tf.int32 dtype, the generated int range is between -2^31 to 2^31 - 1.
+        With tf.int64 dtype, the generated int range is between -2^63 to 2^63 - 1.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          num_values: A value distribution or an int specifying number of non-zero values in the sparse tensor.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.int32 or tf.int64.
+        """
+        super().__init__(spec, num_values)
+
+        if spec.dtype not in [tf.int32, tf.int64]:
+            raise TypeError(
+                "IntSparseTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.int32 or tf.int64 but found {spec.dtype}."
+            )
+
+    def generate(self):
+        dtype = self.spec.dtype
+        info = np.iinfo(dtype.as_numpy_dtype)
+        shape = self._get_shape()
+        num_values = self._get_num_values(shape)
+        vals = np.random.randint(
+            low=info.min, high=info.max, size=[num_values], dtype=dtype.as_numpy_dtype
+        )
+        coords = self._generate_random_coords(num_values, shape)
+        return tf.SparseTensor(indices=coords, values=vals, dense_shape=shape)
+
+
+class FloatSparseTensorGenerator(SparseTensorGeneratorBase):
+    """FloatSparseTensorGenerator generates tf.sparse.SparseTensor with dtype in tf.float32
+    or tf.float64."""
+
+    def __init__(self, spec, num_values):
+        """Create a new FloatSparseTensorGenerator
+
+        The generated float range is between 0.0 to 1.0.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          num_values: A value distribution or an int specifying number of non-zero values in the sparse tensor.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.float32 or tf.float64.
+        """
+        super().__init__(spec, num_values)
+
+        if spec.dtype not in [tf.float32, tf.float64]:
+            raise TypeError(
+                "FloatSparseTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.float32 or tf.float64 but found {spec.dtype}."
+            )
+
+    def generate(self):
+        shape = self._get_shape()
+        num_values = self._get_num_values(shape)
+        vals = np.random.rand(num_values)
+        if self.spec.dtype == tf.float32:
+            vals = vals.astype(np.float32)
+        coords = self._generate_random_coords(num_values, shape)
+        return tf.SparseTensor(indices=coords, values=vals, dense_shape=shape)
+
+
+class WordSparseTensorGenerator(SparseTensorGeneratorBase):
+    """WordSparseTensorGenerator generates string tf.SparseTensor with string
+    length similar to a word."""
+
+    def __init__(self, spec, num_values, avg_length=5):
+        """Create a new WordSparseTensorGenerator
+
+        WordSparseTensorGenerator samples word length using Poisson distribution
+        with lambda equals to avg_length and generates random bytes for each word.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          num_values: A value distribution or an int specifying number of non-zero values in the sparse tensor.
+          avg_length: An int that represents the average word length.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.string.
+          ValueError: If avg_length is not positive.
+        """
+        super().__init__(spec, num_values)
+
+        if spec.dtype is not tf.string:
+            raise TypeError(
+                "WordSparseTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.string but found {spec.dtype}."
+            )
+
+        if avg_length < 1:
+            raise ValueError(
+                "WordSparseTensorGenerator must have positive avg_length"
+                f" but found {avg_length}."
+            )
+        self._avg_length = avg_length
+
+    def generate(self):
+        # Use Poisson distribution to sample the length of byte strings.
+        # The avg_length equals to the lambda in Poisson distribution.
+        shape = self._get_shape()
+        num_values = self._get_num_values(shape)
+        lengths = np.random.poisson(self._avg_length, size=num_values)
+
+        to_string = lambda length: np.random.bytes(length)
+        vfunc = np.vectorize(to_string)
+        vals = vfunc(lengths)
+        coords = self._generate_random_coords(num_values, shape)
+        return tf.SparseTensor(indices=coords, values=vals, dense_shape=shape)
+
+    def hash_code(self):
+        hash_code = super().hash_code()
+        m = hashlib.sha256()
+        m.update(hash_code.encode())
+        m.update(int_to_bytes(self._avg_length))
+        return m.hexdigest()
+
+
+class BoolSparseTensorGenerator(SparseTensorGeneratorBase):
+    """BoolSparseTensorGenerator generates tf.sparse.SparseTensor with dtype in tf.bool."""
+
+    def __init__(self, spec, num_values):
+        """Create a new BoolSparseTensorGenerator.
+
+        The generated bool value has equal true and false possibility.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          num_values: A value distribution or an int specifying number of non-zero values in the sparse tensor.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.bool.
+        """
+        super().__init__(spec, num_values)
+
+        if spec.dtype is not tf.bool:
+            raise TypeError(
+                "BoolSparseTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.bool but found {spec.dtype}."
+            )
+
+    def generate(self):
+        shape = self._get_shape()
+        # np.random.rand generates values from 0 to 1 using Uniform distribution
+        num_values = self._get_num_values(shape)
+        vals = np.random.rand(num_values) > 0.5
+        coords = self._generate_random_coords(num_values, shape)
+        return tf.SparseTensor(indices=coords, values=vals, dense_shape=shape)
diff --git a/tests/test_atds_avro/utils/generator/sparse_util.py b/tests/test_atds_avro/utils/generator/sparse_util.py
new file mode 100644
index 000000000..dc41947cb
--- /dev/null
+++ b/tests/test_atds_avro/utils/generator/sparse_util.py
@@ -0,0 +1,28 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+
+def coord_to_int(coord, shape):
+    """Convert a location in a tensor to its unique index, in row-major order.
+    For example, in the 2d tensor
+    [[0, 1, 2]
+     [3, 4, 5]]
+    The location [1, 0]  (i.e. the entry in the second row, first column) will return 3.
+    """
+    ret = 0
+    rank = len(shape)
+    for dim in range(rank):
+        ret = ret * shape[dim] + coord[dim]
+    return ret
diff --git a/tests/test_atds_avro/utils/generator/tensor_generator.py b/tests/test_atds_avro/utils/generator/tensor_generator.py
new file mode 100644
index 000000000..833f75efe
--- /dev/null
+++ b/tests/test_atds_avro/utils/generator/tensor_generator.py
@@ -0,0 +1,197 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TensorGenerator"""
+
+import hashlib
+import numpy as np
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.generator.generator_base import (
+    Generator,
+)
+from tests.test_atds_avro.utils.hash_util import int_to_bytes
+
+
+class TensorGeneratorBase(Generator):
+    """Base of TensorGenerator that generates tf.Tensor."""
+
+    def __init__(self, spec):
+        """Create a new TensorGeneratorBase.
+
+        This must be called by the constructors of subclasses e.g.
+        IntTensorGenerator, FloatTensorGenerator, etc.
+
+        Args:
+          spec: A tf.TensorSpec that describes the output tensor.
+
+        Raises:
+          TypeError: If spec is not tf.TensorSpec.
+          ValueError: If shape in spec is not fully defined.
+        """
+        super().__init__(spec)
+
+        if not isinstance(spec, tf.TensorSpec):
+            raise TypeError(
+                "Input spec must be a tf.TensorSpec in TensorGenerator "
+                f"but found {spec}"
+            )
+
+        spec.shape.assert_is_fully_defined()
+
+
+class IntTensorGenerator(TensorGeneratorBase):
+    """IntTensorGenerator generates tf.Tensor with dtype in tf.int32 or tf.int64"""
+
+    def __init__(self, spec):
+        """Create a new IntTensorGenerator
+
+        With tf.int32 dtype, the generated int range is between -2^31 to 2^31 - 1.
+        With tf.int64 dtype, the generated int range is between -2^63 to 2^63 - 1.
+
+        Args:
+          spec: A tf.TensorSpec that describes the output tensor.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.int32 or tf.int64.
+        """
+        super().__init__(spec)
+
+        if spec.dtype not in [tf.int32, tf.int64]:
+            raise TypeError(
+                "IntTensorGenerator can only generate tf.Tensor with "
+                f"dtype in tf.int32 or tf.int64 but found {spec.dtype}."
+            )
+
+    def generate(self):
+        dtype = self.spec.dtype
+        info = np.iinfo(dtype.as_numpy_dtype)
+        shape = self.spec.shape.as_list()
+        values = np.random.randint(
+            low=info.min, high=info.max, size=shape, dtype=dtype.as_numpy_dtype
+        )
+        return tf.convert_to_tensor(values, dtype=dtype, name=self.spec.name)
+
+
+class FloatTensorGenerator(TensorGeneratorBase):
+    """FloatTensorGenerator generates tf.Tensor with dtype in tf.float32
+    or tf.float64."""
+
+    def __init__(self, spec):
+        """Create a new FloatTensorGenerator
+
+        The generated float range is between 0.0 to 1.0.
+
+        Args:
+          spec: A tf.TensorSpec that describes the output tensor.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.float32 or tf.float64.
+        """
+        super().__init__(spec)
+
+        if spec.dtype not in [tf.float32, tf.float64]:
+            raise TypeError(
+                "FloatTensorGenerator can only generate tf.Tensor with "
+                f"dtype in tf.float32 or tf.float64 but found {spec.dtype}."
+            )
+
+    def generate(self):
+        shape = self.spec.shape.as_list()
+        values = np.random.rand(*shape)
+        return tf.convert_to_tensor(values, dtype=self.spec.dtype, name=self.spec.name)
+
+
+class WordTensorGenerator(TensorGeneratorBase):
+    """WordTensorGenerator generates string tf.Tensor with string
+    length similar to a word."""
+
+    def __init__(self, spec, avg_length=5):
+        """Create a new WordTensorGenerator
+
+        WordTensorGenerator samples word length using Poisson distribution
+        with lambda equals to avg_length and generates random bytes for each word.
+
+        Args:
+          spec: A tf.TensorSpec that describes the output tensor.
+          avg_length: An int that represents the average word length.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.string.
+          ValueError: If avg_length is not positive.
+        """
+        super().__init__(spec)
+
+        if spec.dtype is not tf.string:
+            raise TypeError(
+                "WordTensorGenerator can only generate tf.Tensor with "
+                f"dtype in tf.string but found {spec.dtype}."
+            )
+
+        if avg_length < 1:
+            raise ValueError(
+                "WordTensorGenerator must have positive avg_length"
+                f" but found {avg_length}."
+            )
+        self._avg_length = avg_length
+
+    def generate(self):
+        # Use Poisson distribution to sample the length of byte strings.
+        # The avg_length equals to the lambda in Poisson distribution.
+        shape = self.spec.shape.as_list()
+        lengths = np.random.poisson(self._avg_length, size=shape)
+
+        to_string = lambda length: np.random.bytes(length)
+        vfunc = np.vectorize(to_string)
+        values = vfunc(lengths)
+
+        return tf.convert_to_tensor(values, dtype=tf.string, name=self.spec.name)
+
+    def hash_code(self):
+        """Return the hashed code of this Generator in hex str."""
+        hash_code = super().hash_code()
+
+        m = hashlib.sha256()
+        m.update(hash_code.encode())
+        m.update(int_to_bytes(self._avg_length))
+        return m.hexdigest()
+
+
+class BoolTensorGenerator(TensorGeneratorBase):
+    """BoolTensorGenerator generates tf.Tensor with dtype in tf.bool."""
+
+    def __init__(self, spec):
+        """Create a new BoolTensorGenerator.
+
+        The generated bool value has equal true and false possibility.
+
+        Args:
+          spec: A tf.TensorSpec that describes the output tensor.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.bool.
+        """
+        super().__init__(spec)
+
+        if spec.dtype is not tf.bool:
+            raise TypeError(
+                "BoolTensorGenerator can only generate tf.Tensor with "
+                f"dtype in tf.bool but found {spec.dtype}."
+            )
+
+    def generate(self):
+        shape = self.spec.shape.as_list()
+        # np.random.rand generates values from 0 to 1 using Uniform distribution
+        values = np.random.rand(*shape) > 0.5
+        return tf.convert_to_tensor(values, dtype=tf.bool, name=self.spec.name)
diff --git a/tests/test_atds_avro/utils/generator/varlen_tensor_generator.py b/tests/test_atds_avro/utils/generator/varlen_tensor_generator.py
new file mode 100644
index 000000000..4dbe6293b
--- /dev/null
+++ b/tests/test_atds_avro/utils/generator/varlen_tensor_generator.py
@@ -0,0 +1,263 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""VarLenTensorGenerator"""
+
+import enum
+import hashlib
+import numpy as np
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.generator.generator_base import (
+    Generator,
+)
+from tests.test_atds_avro.utils.hash_util import int_to_bytes
+
+
+class DimensionDistribution(enum.Enum):
+    ONE_DIM = 1  # dimension size 1
+    TWO_DIM = 2  # dimension size 2
+    LARGE_DIM = 3  # dimension size from 5 to 10
+
+
+DIM_DISTRIBUTION_TO_RANGE = {DimensionDistribution.LARGE_DIM: (5, 10)}
+
+
+class VarLenTensorGeneratorBase(Generator):
+    """Base of VarLenTensorGeneratorBase that generates tf.sparse.SparseTensor."""
+
+    def __init__(self, spec, dim_dist):
+        """Create a new VarLenTensorGeneratorBase.
+
+        This must be called by the constructors of subclasses e.g.
+        IntVarLenTensorGeneratorBase, FloatVarLenTensorGeneratorBase, etc.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          dim_dist: Distribution of dimension sizes.
+
+        Raises:
+          TypeError: If spec is not tf.SparseTensorSpec.
+        """
+        super().__init__(spec)
+        if not isinstance(spec, tf.SparseTensorSpec):
+            raise TypeError(
+                "Input spec must be a tf.SparseTensorSpec in VarLenTensorGenerator "
+                f"but found {spec}"
+            )
+
+        if not isinstance(dim_dist, DimensionDistribution):
+            raise TypeError(
+                f"dim_dist must be a DimensionDistribution but found {dim_dist}"
+            )
+
+        if self.spec.shape.rank is None:
+            raise ValueError(f"Input spec must have known rank")
+
+        self._dim_dist = dim_dist
+
+    def _get_dim(self):
+        if self._dim_dist == DimensionDistribution.ONE_DIM:
+            return 1
+        elif self._dim_dist == DimensionDistribution.TWO_DIM:
+            return 2
+        elif self._dim_dist == DimensionDistribution.LARGE_DIM:
+            return np.random.randint(*DIM_DISTRIBUTION_TO_RANGE[self._dim_dist])
+        else:
+            raise ValueError(
+                f"Found unsupported dimension distribution {self._dim_dist}"
+            )
+
+    def _get_shape(self):
+        return [
+            dim if dim is not None else self._get_dim()
+            for dim in self.spec.shape.as_list()
+        ]
+
+    def _get_idx(self, depth, shape, current_idx, ret):
+        cur_dim = shape[depth]
+        # Generate full list of idx, e.g. for a [2, 3] tensor:
+        # [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2]]
+        for i in range(cur_dim):
+            current_idx[depth] = i
+            if depth == len(shape) - 1:
+                ret.append(current_idx.copy())
+            else:
+                self._get_idx(depth + 1, shape, current_idx, ret)
+
+    def hash_code(self):
+        hash_code = super().hash_code()
+
+        m = hashlib.sha256()
+        m.update(hash_code.encode())
+        m.update(DimensionDistribution.__name__.encode())
+        m.update(self._dim_dist.name.encode())
+        return m.hexdigest()
+
+
+class IntVarLenTensorGenerator(VarLenTensorGeneratorBase):
+    """IntVarLenTensorGenerator generates tf.sparse.SparseTensor with dtype in tf.int32 or tf.int64"""
+
+    def __init__(self, spec, dim_dist=DimensionDistribution.ONE_DIM):
+        """Create a new IntVarLenTensorGenerator
+
+        With tf.int32 dtype, the generated int range is between -2^31 to 2^31 - 1.
+        With tf.int64 dtype, the generated int range is between -2^63 to 2^63 - 1.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          dim_dist: Distribution of dimension sizes.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.int32 or tf.int64.
+        """
+        super().__init__(spec, dim_dist)
+
+        if spec.dtype not in [tf.int32, tf.int64]:
+            raise TypeError(
+                "IntVarLenTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.int32 or tf.int64 but found {spec.dtype}."
+            )
+
+    def generate(self):
+        dtype = self.spec.dtype
+        info = np.iinfo(dtype.as_numpy_dtype)
+        shape = self._get_shape()
+        idxs = []
+        self._get_idx(0, shape, [0] * len(shape), idxs)
+        vals = np.random.randint(
+            low=info.min, high=info.max, size=len(idxs), dtype=dtype.as_numpy_dtype
+        )
+        return tf.SparseTensor(indices=idxs, values=vals, dense_shape=shape)
+
+
+class FloatVarLenTensorGenerator(VarLenTensorGeneratorBase):
+    """FloatVarLenTensorGenerator generates tf.sparse.SparseTensor with dtype in tf.float32
+    or tf.float64."""
+
+    def __init__(self, spec, dim_dist=DimensionDistribution.ONE_DIM):
+        """Create a new FloatVarLenTensorGenerator
+
+        The generated float range is between 0.0 to 1.0.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          dim_dist: Distribution of dimension sizes.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.float32 or tf.float64.
+        """
+        super().__init__(spec, dim_dist)
+
+        if spec.dtype not in [tf.float32, tf.float64]:
+            raise TypeError(
+                "FloatVarLenTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.float32 or tf.float64 but found {spec.dtype}."
+            )
+
+    def generate(self):
+        shape = self._get_shape()
+        idxs = []
+        self._get_idx(0, shape, [0] * len(shape), idxs)
+        vals = np.random.rand(len(idxs))
+        if self.spec.dtype == tf.float32:
+            vals = vals.astype(np.float32)
+        return tf.SparseTensor(indices=idxs, values=vals, dense_shape=shape)
+
+
+class WordVarLenTensorGenerator(VarLenTensorGeneratorBase):
+    """WordVarLenTensorGenerator generates string tf.SparseTensor with string
+    length similar to a word."""
+
+    def __init__(self, spec, dim_dist=DimensionDistribution.ONE_DIM, avg_length=5):
+        """Create a new WordVarLenTensorGenerator
+
+        WordVarLenTensorGenerator samples word length using Poisson distribution
+        with lambda equals to avg_length and generates random bytes for each word.
+
+        Args:
+          spec: A tf.SparseTensorSpec that describes the output tensor.
+          dim_dist: Distribution of dimension sizes.
+          avg_length: An int that represents the average word length.
+
+        Raises:
+          TypeError: If dtype in spec is not tf.string.
+          ValueError: If avg_length is not positive.
+        """
+        super().__init__(spec, dim_dist)
+
+        if spec.dtype is not tf.string:
+            raise TypeError(
+                "WordVarLenTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.string but found {spec.dtype}."
+            )
+
+        if avg_length < 1:
+            raise ValueError(
+                "WordVarLenTensorGenerator must have positive avg_length"
+                f" but found {avg_length}."
+            )
+        self._avg_length = avg_length
+
+    def generate(self):
+        # Use Poisson distribution to sample the length of byte strings.
+        # The avg_length equals to the lambda in Poisson distribution.
+        shape = self._get_shape()
+        idxs = []
+        self._get_idx(0, shape, [0] * len(shape), idxs)
+        lengths = np.random.poisson(self._avg_length, size=len(idxs))
+
+        to_string = lambda length: np.random.bytes(length)
+        vfunc = np.vectorize(to_string)
+        vals = vfunc(lengths)
+        return tf.SparseTensor(indices=idxs, values=vals, dense_shape=shape)
+
+    def hash_code(self):
+        hash_code = super().hash_code()
+        m = hashlib.sha256()
+        m.update(hash_code.encode())
+        m.update(int_to_bytes(self._avg_length))
+        return m.hexdigest()
+
+
+class BoolVarLenTensorGenerator(VarLenTensorGeneratorBase):
+    """BoolVarLenTensorGenerator generates tf.sparse.SparseTensor with dtype in tf.bool."""
+
+    def __init__(self, spec, dim_dist=DimensionDistribution.ONE_DIM):
+        """Create a new BoolVarLenTensorGenerator.
+
+    The generated bool value has equal true and false possibility.
+
+    Args:
+      spec: A tf.SparseTensorSpec that describes the output tensor.\
+      dim_dist: Distribution of dimension sizes.
+
+    Raises:
+      TypeError: If dtype in spec is not tf.bool.
+    """
+        super().__init__(spec, dim_dist)
+
+        if spec.dtype is not tf.bool:
+            raise TypeError(
+                "BoolVarLenTensorGenerator can only generate tf.sparse.SparseTensor with "
+                f"dtype in tf.bool but found {spec.dtype}."
+            )
+
+    def generate(self):
+        shape = self._get_shape()
+        # np.random.rand generates values from 0 to 1 using Uniform distribution
+        idxs = []
+        self._get_idx(0, shape, [0] * len(shape), idxs)
+        vals = np.random.rand(len(idxs)) > 0.5
+        return tf.SparseTensor(indices=idxs, values=vals, dense_shape=shape)
diff --git a/tests/test_atds_avro/utils/hash_util.py b/tests/test_atds_avro/utils/hash_util.py
new file mode 100644
index 000000000..21ec4742f
--- /dev/null
+++ b/tests/test_atds_avro/utils/hash_util.py
@@ -0,0 +1,22 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for hashing"""
+
+
+def int_to_bytes(x):
+    # Add one bit because a signed N-bit int can only represent up to 2^(N-1) - 1
+    # (instead of an unsigned N-bit int which can represent up to 2^N - 1).
+    # For example, 128 requires 9 bits (therefore two bytes) in twos complement.
+    return x.to_bytes(x.bit_length() // 8 + 1, byteorder="little", signed=True)
diff --git a/tests/test_atds_avro/utils/stat_t_test_cli.py b/tests/test_atds_avro/utils/stat_t_test_cli.py
new file mode 100644
index 000000000..be5c1f976
--- /dev/null
+++ b/tests/test_atds_avro/utils/stat_t_test_cli.py
@@ -0,0 +1,352 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"Command line tool to analyze benchmark result with Welch's t-test."
+
+import argparse
+import collections
+import logging
+import json
+import os
+from operator import attrgetter
+
+import numpy as np
+from scipy.stats import t
+
+# Common field names used in pytest-benchmark JSON report.
+BENCHMARKS = "benchmarks"
+GROUP = "group"
+NAME = "name"
+STATS = "stats"
+MEAN = "mean"
+STDDEV = "stddev"
+ROUNDS = "rounds"
+
+
+class BenchmarkName(collections.namedtuple("BenchmarkName", ["group", "name"])):
+    """The name and group of a pytest-benchmark test.
+
+    Fields:
+      group: The name of the pytest-benchmark group that this test belongs to.
+             A benchmark group contains one or more benchmark tests.
+      name: The name of the benchmark test.
+    """
+
+    pass
+
+
+class BenchmarkResult(collections.namedtuple("BenchmarkResult", ["label", "result"])):
+    """Benchmark result loaded from pytest-benchmark JSON report.
+
+    Fields:
+      label: The label of the benchmark run.
+      result: A benchmark dict generated by the pytest-benchmark JSON report.
+              Key is a BenchmarkName. Value is a stat dict. Stat dict contains
+              statistics such as min, max, mean, std, etc
+    """
+
+    pass
+
+
+class TTestResult(
+    collections.namedtuple(
+        "TTestResult", ["p_value", "t_stat", "lower_bound", "upper_bound", "mean_delta"]
+    )
+):
+    """Welch's t-test result and one sided confidence interval.
+
+    Fields:
+      p_value: The p value of the Welch's t-test.
+      t_stat: The T statistic of the Welch's t-test.
+      lower_bound: The lower bound of the one sided confidence interval.
+      upper_bound: The upper bound of the one sided confidence interval.
+      mean_delta: The difference in sample mean.
+    """
+
+    pass
+
+
+class AlphaAction(argparse.Action):
+    """Parser action for confidence level alpha to validate its value."""
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        if values <= 0.0 or values >= 1.0:
+            parser.error(f"The alpha range of '{option_string}' should be (0.0, 1.0).")
+
+        setattr(namespace, self.dest, values)
+
+
+def load_benchmark_result(path):
+    filename = os.path.basename(path)
+    label = os.path.splitext(filename)[0]  # Remove file extension
+
+    with open(path) as f:
+        report = json.load(f)
+
+    result = {}
+    for benchmark_test in report[BENCHMARKS]:
+        group_name = benchmark_test[GROUP]
+        test_name = benchmark_test[NAME]
+
+        benchmark_name = BenchmarkName(group=group_name, name=test_name)
+        result[benchmark_name] = benchmark_test[STATS]
+
+    return BenchmarkResult(label=label, result=result)
+
+
+def run_welchs_ttest(stat1, stat2, alpha, faster):
+    """Run one tailed Welch's t-test to verify if stat1 is faster/slower than stat2
+
+    Please refer wiki for more details about Welch's t-test.
+    https://en.wikipedia.org/wiki/Welch%27s_t-test
+
+    Please check scipy for the t-test implementation details.
+    https://github.com/scipy/scipy/blob/v1.7.1/scipy/stats/stats.py#L5712-L5833
+
+    The confidence interval is computed with one sided approach. For more details, see
+    https://stats.stackexchange.com/questions/257526/can-one-sided-confidence-intervals-have-95-coverage
+
+    Args:
+      stat1: The first statistic dict collected by pytest-benchmark.
+      stat2: The second statistic dict collected by pytest-benchmark.
+      alpha: The confidence level.
+      faster: True to test if stat1 is faster than stat2. False to test
+              if stat1 is slower than stat2.
+
+    Returns:
+      A TTestResult
+    """
+    m1 = stat1[MEAN]
+    m2 = stat2[MEAN]
+
+    s1 = stat1[STDDEV]
+    s2 = stat2[STDDEV]
+
+    n1 = stat1[ROUNDS]
+    n2 = stat2[ROUNDS]
+
+    df1 = n1 - 1  # degree of freedom of stat1
+    df2 = n2 - 1  # degree of freedom of stat2
+
+    sample_v1 = s1 ** 2 / n1  # biased estimated sample variance of stat1
+    sample_v2 = s2 ** 2 / n2  # biased estimated sample variance of stat2
+
+    biased_variance = np.sqrt(sample_v1 + sample_v2)
+    # degree of freedom
+    df = (sample_v1 + sample_v2) ** 2 / (
+        sample_v1 ** 2 / (df1) + sample_v2 ** 2 / (df2)
+    )
+
+    mean_delta = m1 - m2
+    t_stat = mean_delta / biased_variance
+
+    if faster:
+        # Null hypothesis is stat1 >= stat2.
+        # Alternative hypothesis is stat1 < stat2.
+        p_value = t.cdf(t_stat, df)
+
+        # Compute one sided confidence interval (-inf, x)
+        upper_bound = mean_delta + t.ppf(1.0 - alpha, df) * biased_variance
+        upper_bound = format(upper_bound, ".5f")
+        lower_bound = "-inf"
+    else:
+        # Null hypothesis is stat1 <= stat2.
+        # Alternative hypothesis is stat1 > stat2.
+        p_value = 1.0 - t.cdf(t_stat, df)
+
+        # Compute one sided confidence interval (x, inf)
+        upper_bound = "inf"
+        lower_bound = mean_delta + t.ppf(alpha, df) * biased_variance
+        lower_bound = format(lower_bound, ".5f")
+
+    return TTestResult(
+        p_value=p_value,
+        t_stat=t_stat,
+        lower_bound=lower_bound,
+        upper_bound=upper_bound,
+        mean_delta=format(mean_delta, ".5f"),
+    )
+
+
+def create_result_messages(benchmark_results, alpha):
+    sorted_benchmarks = sorted(
+        list(benchmark_results.keys()), key=attrgetter("group", "name")
+    )
+
+    template = "\t{benchmark:55}{mean_delta:25}{confidence_interval:40}"
+
+    confidence = (1.0 - alpha) * 100
+    results = [
+        template.format(
+            benchmark="Benchmark test (group::name)",
+            mean_delta="Mean delta in second",
+            confidence_interval=f"{confidence}% confidence interval of mean delta",
+        )
+    ]
+    for benchmark in sorted_benchmarks:
+        ttest_result = benchmark_results[benchmark]
+        results.append(
+            template.format(
+                benchmark=f"{benchmark.group}::{benchmark.name}",
+                mean_delta=f"{ttest_result.mean_delta}",
+                confidence_interval=f"({ttest_result.lower_bound}, "
+                f"{ttest_result.upper_bound})",
+            )
+        )
+
+    return "\n".join(results)
+
+
+def log_benchmark_not_in_both_report(diff, in_result, not_in_result):
+    sorted_diff = sorted(list(diff), key=attrgetter("group", "name"))
+    sorted_benchmarks = "\n".join(
+        [f"\t{benchmark.group}::{benchmark.name}" for benchmark in sorted_diff]
+    )
+
+    message = (
+        f"Found following benchmarks in {in_result.label} "
+        f"but not in {not_in_result.label}.\n {sorted_benchmarks}"
+    )
+    logging.warning(message)
+
+
+def log_no_overlapped_benchmark_result(first_result, second_result):
+    message = (
+        f"Benchmark results in {first_result.label} "
+        f"and {second_result.label} have no intersection."
+    )
+    logging.warning(message)
+
+
+def log_not_significant_benchmark_result(benchmarks, label1, label2, order, alpha):
+    result_messages = create_result_messages(benchmarks, alpha)
+    message = (
+        f"The following benchmark results does NOT show that "
+        f"{label1} is statistically significant {order} than "
+        f"{label2}.\n {result_messages}"
+    )
+    logging.info(message)
+
+
+def log_significant_benchmark_result(benchmarks, label1, label2, order, alpha):
+    result_messages = create_result_messages(benchmarks, alpha)
+    message = (
+        f"The following benchmark results show that {label1} is "
+        f"statistically significant {order} than {label2}."
+        f"\n{result_messages}"
+    )
+    logging.info(message)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Run Welch's t-test on benchmark result."
+    )
+    parser.add_argument(
+        "first_report",
+        metavar="r1",
+        type=str,
+        help="Path to the first pytest-benchmark JSON report.",
+    )
+    parser.add_argument(
+        "second_report",
+        metavar="r2",
+        type=str,
+        help="Path to the second pytest-benchmark JSON report.",
+    )
+    parser.add_argument(
+        "-a",
+        "--alpha",
+        metavar="alpha",
+        type=float,
+        default=0.05,
+        action=AlphaAction,
+        help="The confidence level in t-test.",
+    )
+
+    slower_message = (
+        "Set this flag to test if the first result is slower "
+        "than the second. Otherwise, the tool will test if the "
+        "first result is faster than the second."
+    )
+    parser.add_argument("--slower", action="store_true", help=slower_message)
+
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    first_result = load_benchmark_result(args.first_report)
+    second_result = load_benchmark_result(args.second_report)
+
+    benchmark_in_first = set(first_result.result.keys())
+    benchmark_in_second = set(second_result.result.keys())
+
+    first_but_not_second = benchmark_in_first.difference(benchmark_in_second)
+    second_but_not_first = benchmark_in_second.difference(benchmark_in_first)
+    in_both = benchmark_in_first.intersection(benchmark_in_second)
+
+    if first_but_not_second:
+        log_benchmark_not_in_both_report(
+            diff=first_but_not_second,
+            in_result=first_result,
+            not_in_result=second_result,
+        )
+
+    if second_but_not_first:
+        log_benchmark_not_in_both_report(
+            diff=second_but_not_first,
+            in_result=second_result,
+            not_in_result=first_result,
+        )
+
+    if not in_both:
+        log_no_overlapped_benchmark_result(first_result, second_result)
+        return
+
+    is_faster = False if args.slower else True
+    alpha = args.alpha
+
+    significant = {}
+    not_significant = {}
+    for benchmark_test in in_both:
+        ttest_result = run_welchs_ttest(
+            stat1=first_result.result[benchmark_test],
+            stat2=second_result.result[benchmark_test],
+            alpha=alpha,
+            faster=is_faster,
+        )
+
+        if ttest_result.p_value < alpha:
+            significant[benchmark_test] = ttest_result
+        else:
+            not_significant[benchmark_test] = ttest_result
+
+    order = "faster" if is_faster else "slower"
+
+    if not_significant:
+        log_not_significant_benchmark_result(
+            not_significant, first_result.label, second_result.label, order, alpha
+        )
+
+    if significant:
+        log_significant_benchmark_result(
+            significant, first_result.label, second_result.label, order, alpha
+        )
+
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+    main()
diff --git a/tests/test_atds_avro/utils/tf_record_writer.py b/tests/test_atds_avro/utils/tf_record_writer.py
new file mode 100644
index 000000000..f830b0555
--- /dev/null
+++ b/tests/test_atds_avro/utils/tf_record_writer.py
@@ -0,0 +1,305 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TFRecordWriter"""
+
+import os
+from tests.test_atds_avro.utils.generator.tensor_generator import (
+    TensorGeneratorBase,
+)
+from tests.test_atds_avro.utils.generator.sparse_tensor_generator import (
+    SparseTensorGeneratorBase,
+)
+from tests.test_atds_avro.utils.generator.varlen_tensor_generator import (
+    VarLenTensorGeneratorBase,
+)
+import numpy as np
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.file_writer import FileWriter
+
+
+class TFRecordWriter(FileWriter):
+    """File writer for TFRecord dataset.
+
+    TFRecordWriter serializes tensors in tf.Example schema and write them
+    into files in TFRecord format. The written file can be loaded with
+    tf.data.TFRecordDataset.
+    """
+
+    # The TFRecord file extension.
+    _TFRECORD_EXTENSION = "tfrecords"
+
+    # TFRecord Dataset reads TFRecord data serialized in tf.Example schema.
+    # tf.Example only supported three dtypes i.e. int64, float32, and bytes.
+    # See https://www.tensorflow.org/tutorials/load_data/tfrecord#data_types_for_tftrainexample
+    # The lists below are used to map tensor dtype into supported dtype in
+    # tf.Example. For example, tf.bool will be mapped to int64.
+    _INT64_LIST_DTYPE = [tf.bool, tf.int32, tf.int64, tf.uint32, tf.uint64]
+    _FLOAT_LIST_DTYPE = [tf.float32, tf.float64]
+    _BYTES_LIST_DTYPE = [tf.string]
+
+    # Sparse tensor is composed of many 1D dense tensors in tf.Example.
+    # The suffix is used to name these dense tensors given the sparse
+    # tensor name. For example, a 2D sparse tensor 'feature' was represented
+    # with three dense tensors with name 'feature/indices0', 'feature/indices1',
+    # and 'feature/values'.
+    _SPARSE_INDICES_SUFFIX = "/indices"
+    _SPARSE_VALUES_SUFFIX = "/values"
+
+    def __init__(self):
+        """Create a new TFRecordWriter"""
+        super().__init__()
+
+    @property
+    def extension(self):
+        """Return the file extension of the written files."""
+        return TFRecordWriter._TFRECORD_EXTENSION
+
+    def _write_to_path(self, dir_path, data_source):
+        """Generate data based on the data_source and write
+        files under the given path."""
+        scenario = data_source.scenario
+
+        filenames_to_num_records = self._get_filenames_to_num_records(data_source)
+        for filename in sorted(filenames_to_num_records):
+            file_path = os.path.join(dir_path, filename)
+            num_records = filenames_to_num_records[filename]
+            with tf.io.TFRecordWriter(file_path) as file_writer:
+                for _ in range(num_records):
+                    features = {}
+
+                    for feature_name in scenario:
+                        generator = scenario[feature_name]
+                        tensor = generator.generate()
+                        self._add_tensor_to_features(
+                            generator, feature_name, tensor, features
+                        )
+                    record_bytes = tf.train.Example(
+                        features=tf.train.Features(feature=features)
+                    ).SerializeToString()
+                    file_writer.write(record_bytes)
+
+    def _write_to_path_from_cached_data(self, dir_path, data_source, dataset):
+        if os.path.exists(dir_path):
+            return dir_path
+        os.makedirs(dir_path)
+
+        scenario = data_source.scenario
+        filenames_to_num_records = self._get_filenames_to_num_records(data_source)
+        iterator = iter(dataset)
+        for filename in sorted(filenames_to_num_records):
+            file_path = os.path.join(dir_path, filename)
+            num_records = filenames_to_num_records[filename]
+
+            with tf.io.TFRecordWriter(file_path) as file_writer:
+                for _ in range(num_records):
+                    features = {}
+                    record = iterator.get_next()
+
+                    for feature_name in scenario:
+                        generator = scenario[feature_name]
+                        feature = record[feature_name]
+                        self._add_tensor_to_features(
+                            generator, feature_name, feature, features
+                        )
+                    record_bytes = tf.train.Example(
+                        features=tf.train.Features(feature=features)
+                    ).SerializeToString()
+                    file_writer.write(record_bytes)
+
+    def _add_dense_tensor_to_features(self, name, value, dtype, features):
+        """Wrap value np.array into tf.train.Feature and add it into features."""
+        if np.isscalar(value):
+            value = [value]  # Convert scalar into a list.
+        else:
+            value = value.flatten()
+
+        example_dtype = self._map_tensor_dtype_to_example_dtype(dtype)
+        if example_dtype is tf.int64:
+            features[name] = tf.train.Feature(
+                int64_list=tf.train.Int64List(value=value)
+            )
+        elif example_dtype is tf.float32:
+            features[name] = tf.train.Feature(
+                float_list=tf.train.FloatList(value=value)
+            )
+        elif example_dtype is tf.string:
+            features[name] = tf.train.Feature(
+                bytes_list=tf.train.BytesList(value=value)
+            )
+        else:
+            raise TypeError(f"Dtype {dtype} is not supported in tf.Example.")
+
+    def _map_tensor_dtype_to_example_dtype(self, dtype):
+        """As tf.Example only supports tf.float32, tf.int64, and tf.string dtype.
+        This function maps tensor dtype into the dtype supported by tf.Example."""
+        if dtype in TFRecordWriter._INT64_LIST_DTYPE:
+            return tf.int64
+        elif dtype in TFRecordWriter._FLOAT_LIST_DTYPE:
+            return tf.float32
+        elif dtype in TFRecordWriter._BYTES_LIST_DTYPE:
+            return tf.string
+        else:
+            raise TypeError(f"Dtype {dtype} is not supported in tf.Example.")
+
+    def _add_sparse_tensor_to_features(self, name, tensor, features):
+        indices_name = name + TFRecordWriter._SPARSE_INDICES_SUFFIX
+        values_name = name + TFRecordWriter._SPARSE_VALUES_SUFFIX
+
+        rank = len(tensor.shape.as_list())
+        indices = tensor.indices.numpy()  # indices tensor must be a 2D array
+        # Split indices array along the second dimension so that the split arrays
+        # contain the indices for separate dimension. For example,
+        # indices = [[0, 1], [2, 3], [4, 5]] =>
+        #   indices_at_dim0 = [[0], [2], [4]] and
+        #   indices_at_dim1 = [[1], [3], [5]].
+        split_indices = np.split(indices, rank, axis=1)
+        for dim in range(rank):
+            indices_name_at_dim = indices_name + str(dim)
+            self._add_dense_tensor_to_features(
+                name=indices_name_at_dim,
+                value=split_indices[dim],
+                dtype=tensor.indices.dtype,
+                features=features,
+            )
+
+        self._add_dense_tensor_to_features(
+            name=values_name,
+            value=tensor.values.numpy(),
+            dtype=tensor.values.dtype,
+            features=features,
+        )
+
+    def _add_tensor_to_features(self, generator, feature_name, tensor, features):
+        spec = generator.spec
+        if isinstance(spec, tf.TensorSpec):
+            self._add_dense_tensor_to_features(
+                feature_name, tensor.numpy(), tensor.dtype, features
+            )
+        elif isinstance(spec, tf.SparseTensorSpec):
+            if (
+                issubclass(generator.get_generator_cls(), VarLenTensorGeneratorBase)
+                and spec.shape.rank == 1
+            ):
+                self._add_dense_tensor_to_features(
+                    feature_name,
+                    tf.sparse.to_dense(tensor).numpy(),
+                    tensor.dtype,
+                    features,
+                )
+            elif (
+                issubclass(generator.get_generator_cls(), SparseTensorGeneratorBase)
+                and spec.shape.is_fully_defined()
+            ):
+                self._add_sparse_tensor_to_features(feature_name, tensor, features)
+            else:
+                raise ValueError(
+                    "SparseTensorSpec shape must be either a 1D varlen tensor from VarLenTensorGenerator "
+                    f"or fully defined sparse tensor from SparseTensorGenerator. Found {spec}"
+                )
+        else:
+            raise TypeError(f"Spec {spec} is not supported in TFRecordWriter")
+
+    def create_tf_example_parser_fn(self, data_source, with_batch=False):
+        """Create tf.Example parser function based on the data_source.
+
+        The parser function can be used for parsing tf.Example.
+        Example usage:
+
+        ``` python
+        data_source = DataSource(...)
+        with TFRecordWriter() as writer:
+            dir_path = writer.write(data_source)
+            parser_fn = writer.create_tf_example_parser_fn(data_source)
+
+            pattern = os.path.join(dir_path, f"*.{writer.extension}")
+            dataset = tf.data.Dataset.list_files(pattern)
+            dataset = tf.data.TFRecordDataset(dataset)
+            dataset = dataset.map(parser_fn)
+        ```
+
+        Args:
+          data_source: A DataSource object describe the format of the data.
+          with_batch: True if the parser function should take a number of
+                      serialized tf.Example proto. Default is false.
+
+        Returns:
+          A callable function that takes serialized tf.Example proto as input,
+          and returns the parsed tensor dict.
+        """
+        scenario = data_source.scenario
+        feature_description = {
+            name: self._build_tf_example_parsing_config(name, scenario[name])
+            for name in scenario
+        }
+
+        if with_batch:
+
+            def _batch_examples_parser_fn(example_proto):
+                return tf.io.parse_example(example_proto, feature_description)
+
+            return _batch_examples_parser_fn
+
+        def _single_example_parser_fn(example_proto):
+            return tf.io.parse_single_example(example_proto, feature_description)
+
+        return _single_example_parser_fn
+
+    def _build_tf_example_parsing_config(self, name, generator):
+        """Build tf.Example parsing config
+
+        Args:
+          name: A str feature name.
+          generator: Generator for this tensor.
+
+        Returns:
+          tf.io.FixedLenFeature if generator is TensorGenerator.
+          tf.io.SparseFeature if generator is SparseTensorGenerator.
+          tf.io.VarLenFeature if generator is VarlenTensorGenerator.
+
+        Raises:
+          TypeError: if generator is not TensorGenerator, SparseTensorGenerator, or VarlenTensorGenerator.
+        """
+        spec = generator.spec
+        example_dtype = self._map_tensor_dtype_to_example_dtype(spec.dtype)
+        if isinstance(spec, tf.TensorSpec):
+            return tf.io.FixedLenFeature(shape=spec.shape, dtype=example_dtype)
+        elif isinstance(spec, tf.SparseTensorSpec):
+            if (
+                issubclass(generator.get_generator_cls(), VarLenTensorGeneratorBase)
+                and spec.shape.rank == 1
+            ):
+                return tf.io.VarLenFeature(dtype=example_dtype)
+            elif (
+                issubclass(generator.get_generator_cls(), SparseTensorGeneratorBase)
+                and spec.shape.is_fully_defined()
+            ):
+                index_name = name + TFRecordWriter._SPARSE_INDICES_SUFFIX
+                rank = len(spec.shape)
+                index_key = [f"{index_name}{dim}" for dim in range(rank)]
+                value_key = name + TFRecordWriter._SPARSE_VALUES_SUFFIX
+                return tf.io.SparseFeature(
+                    index_key=index_key,
+                    value_key=value_key,
+                    dtype=example_dtype,
+                    size=spec.shape,
+                )
+            else:
+                raise ValueError(
+                    "SparseTensorSpec shape must be either a 1D varlen tensor from VarLenTensorGenerator "
+                    f"or fully defined sparse tensor from SparseTensorGenerator. Found {spec}"
+                )
+        else:
+            raise TypeError(f"Spec {spec} is not supported in TFRecordWriter.")
diff --git a/tools/docker/devel.Dockerfile b/tools/docker/devel.Dockerfile
index ab6e8812c..6cb5d39c5 100644
--- a/tools/docker/devel.Dockerfile
+++ b/tools/docker/devel.Dockerfile
@@ -40,6 +40,8 @@ ARG PIP_ADD_PACKAGES=""
 
 RUN /bin/bash -c "source activate tfio-dev && python -m pip install \
     avro-python3 \
+    python-snappy \
+    parameterized \
     pytest \
     pytest-benchmark \
     pylint \
@@ -48,6 +50,7 @@ RUN /bin/bash -c "source activate tfio-dev && python -m pip install \
     google-cloud-bigquery-storage==1.1.0 \
     pyarrow==${ARROW_VERSION} \
     pandas \
+    scipy \
     fastavro \
     gast==0.2.2 \
     ${PIP_ADD_PACKAGES} \