From dec68b30e09a2c933ef918e5b8ad2fdd2418887e Mon Sep 17 00:00:00 2001 From: oliverhu Date: Fri, 16 Apr 2021 22:52:54 -0700 Subject: [PATCH 01/18] add dummy orc ops --- WORKSPACE | 20 ++++++++ tensorflow_io/core/BUILD | 16 +++++++ tensorflow_io/core/kernels/orc/hello-time.cc | 49 ++++++++++++++++++++ third_party/liborc.BUILD | 29 ++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 tensorflow_io/core/kernels/orc/hello-time.cc create mode 100644 third_party/liborc.BUILD diff --git a/WORKSPACE b/WORKSPACE index 45de019a8..f1f4bd6b2 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1121,3 +1121,23 @@ http_archive( "https://github.com/tinyobjloader/tinyobjloader/archive/v2.0.0rc8.tar.gz", ], ) + +http_archive( + name = "rules_foreign_cc", + sha256 = "e60cfd0a8426fa4f5fd2156e768493ca62b87d125cb35e94c44e79a3f0d8635f", + strip_prefix = "rules_foreign_cc-0.2.0", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.2.0.zip", +) +load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies") + +rules_foreign_cc_dependencies() + +http_archive( + name = "liborc", + build_file = "//third_party:liborc.BUILD", + sha256 = "df5885db8fa2e4435db8d486c6c7fc4e2c565d6197eee27729cf9cbdf36353c0", + strip_prefix = "orc-rel-release-1.6.5", + urls = [ + "https://github.com/apache/orc/archive/refs/tags/rel/release-1.6.5.tar.gz", + ], +) \ No newline at end of file diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 41141c2b1..9810a925a 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -368,6 +368,21 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "orc_ops", + srcs = [ + "kernels/orc/hello-time.cc", + ], + hdrs = [ + ], + copts = tf_io_copts(), + linkstatic = True, + deps = [ + "@liborc", + ], + alwayslink = 1, +) + cc_library( name = "text_ops", srcs = [ @@ -720,6 +735,7 @@ cc_binary( "//tensorflow_io/bigquery:bigquery_ops", "//tensorflow_io/core:audio_video_ops", "//tensorflow_io/core:avro_ops", + "//tensorflow_io/core:orc_ops", "//tensorflow_io/core:cpuinfo", "//tensorflow_io/core:file_ops", "//tensorflow_io/core:grpc_ops", diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc new file mode 100644 index 000000000..d65bad3f4 --- /dev/null +++ b/tensorflow_io/core/kernels/orc/hello-time.cc @@ -0,0 +1,49 @@ +#include +#include +#include "orc/orc-config.hh" +#include +#include +#include + +void print_localtime() +{ + std::time_t result = std::time(nullptr); + std::cout << std::asctime(std::localtime(&result)); +} + +int main(int argc, char const *argv[]) +{ + std::cout << "test\n"; + std::list read_cols = {0, 1, 2, 3, 4}; + std::string file_path = "./iris.orc"; + + orc::RowReaderOptions row_reader_opts; + row_reader_opts.include(read_cols); + + orc::ReaderOptions reader_opts; + std::unique_ptr reader = orc::createReader(orc::readFile(file_path), reader_opts); + std::unique_ptr row_reader = reader->createRowReader(row_reader_opts); + + std::unique_ptr batch = row_reader->createRowBatch(24); + + //double field + auto *fields = dynamic_cast(batch.get()); + auto *col0 = dynamic_cast(fields->fields[0]); + double *buffer1 = col0->data.data(); + + //string field + auto *col4 = dynamic_cast(fields->fields[4]); + char **buffer2 = col4->data.data(); + long *lengths = col4->length.data(); + + while (row_reader->next(*batch)) + { + for (uint32_t r = 0; r < batch->numElements; ++r) + { + std::cout << "line " << buffer1[r] << "," << std::string(buffer2[r], lengths[r]) << "\n"; + } + //std::cout << "this batch nums" << " " << batch->numElements << " " << "lines\n"; + } + + return 0; +} diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD new file mode 100644 index 000000000..9df9cb6d6 --- /dev/null +++ b/third_party/liborc.BUILD @@ -0,0 +1,29 @@ +load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") + +filegroup( + name = "all_srcs", + srcs = glob(["**"]), + visibility = ["//visibility:public"], +) + +cmake( + name = "liborc", + lib_source = "@liborc//:all_srcs", + cmake_options = [ + "-DBUILD_JAVA=OFF", + "-DCMAKE_BUILD_TYPE=DEBUG" + ], + visibility = ["//visibility:public"], + out_include_dir = "include", + out_static_libs = [ + "liborc.a", + "libprotoc.a", + "libz.a", + "liblz4.a", + "libprotobuf.a", + "libsnappy.a", + "libzstd.a", + "libhdfspp_static.a", + ], + tags = ["requires-network"], +) \ No newline at end of file From 96e612cbc288881c31097eabbe320aa4e777013e Mon Sep 17 00:00:00 2001 From: oliverhu Date: Mon, 19 Apr 2021 23:49:46 -0700 Subject: [PATCH 02/18] Linker kept throwing error in tf-io, but works fine in separate project.. desperating /usr/bin/gcc -U_FORTIFY_SOURCE -fstack-protector -Wall -Wunused-but-set-parameter -Wno-free-nonheap-object -fno-omit-frame-pointer -fno-canonical-system-headers -Wno-builtin-macro-redefined -D__DATE__="redacted" -D__TIMESTAMP__="redacted" -D__TIME__="redacted" -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 -std=c++11 -Wall -Wno-unknown-pragmas -Wconversion -g -std=c++11 -Wall -Wno-unknown-pragmas -Wconversion -O3 -DNDEBUG -fuse-ld=gold -Wl,-no-as-needed -Wl,-z,relro,-z,now -B/usr/bin -pass-exit-codes -lstdc++ -lm -rdynamic tools/src/CMakeFiles/orc-statistics.dir/FileStatistics.cc.o -o orc-statistics -Wl,-rpath,/usr/local/lib: c++/src/liborc.a c++/libs/thirdparty/libhdfspp_ep-install/lib/libhdfspp_static.a -lprotobuf -pthread c++/libs/thirdparty/zlib_ep-install/lib/libz.a c++/libs/thirdparty/snappy_ep-install/lib/libsnappy.a c++/libs/thirdparty/lz4_ep-install/lib/liblz4.a c++/libs/thirdparty/zstd_ep-install/lib/libzstd.a /usr/local/lib/libsasl2.so --- WORKSPACE | 8 +++---- demo/BUILD | 16 +++++++++++++ demo/hello-time.cc | 49 ++++++++++++++++++++++++++++++++++++++++ tensorflow_io/core/BUILD | 7 +++++- third_party/liborc.BUILD | 6 ++++- tmp/test.sh | 13 +++++++++++ 6 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 demo/BUILD create mode 100644 demo/hello-time.cc create mode 100644 tmp/test.sh diff --git a/WORKSPACE b/WORKSPACE index f1f4bd6b2..96eaac380 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1135,9 +1135,9 @@ rules_foreign_cc_dependencies() http_archive( name = "liborc", build_file = "//third_party:liborc.BUILD", - sha256 = "df5885db8fa2e4435db8d486c6c7fc4e2c565d6197eee27729cf9cbdf36353c0", - strip_prefix = "orc-rel-release-1.6.5", + sha256 = "abdffe48b8d2e7776c3b541ee2241401e49774941ca4a8c759e5d795daec8a45", + strip_prefix = "orc-rel-release-1.6.7", urls = [ - "https://github.com/apache/orc/archive/refs/tags/rel/release-1.6.5.tar.gz", + "https://github.com/apache/orc/archive/refs/tags/rel/release-1.6.7.tar.gz", ], -) \ No newline at end of file +) diff --git a/demo/BUILD b/demo/BUILD new file mode 100644 index 000000000..6c3ea6bfa --- /dev/null +++ b/demo/BUILD @@ -0,0 +1,16 @@ +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +cc_binary( + name = "hello-time", + srcs = ["hello-time.cc"], + visibility = ["//visibility:public"], + linkopts = [ + "-lcrypto", + "-pthread", + "-L/usr/local/lib/", + "-lsasl2", + ], + deps = [ + "@liborc", + ] +) \ No newline at end of file diff --git a/demo/hello-time.cc b/demo/hello-time.cc new file mode 100644 index 000000000..d65bad3f4 --- /dev/null +++ b/demo/hello-time.cc @@ -0,0 +1,49 @@ +#include +#include +#include "orc/orc-config.hh" +#include +#include +#include + +void print_localtime() +{ + std::time_t result = std::time(nullptr); + std::cout << std::asctime(std::localtime(&result)); +} + +int main(int argc, char const *argv[]) +{ + std::cout << "test\n"; + std::list read_cols = {0, 1, 2, 3, 4}; + std::string file_path = "./iris.orc"; + + orc::RowReaderOptions row_reader_opts; + row_reader_opts.include(read_cols); + + orc::ReaderOptions reader_opts; + std::unique_ptr reader = orc::createReader(orc::readFile(file_path), reader_opts); + std::unique_ptr row_reader = reader->createRowReader(row_reader_opts); + + std::unique_ptr batch = row_reader->createRowBatch(24); + + //double field + auto *fields = dynamic_cast(batch.get()); + auto *col0 = dynamic_cast(fields->fields[0]); + double *buffer1 = col0->data.data(); + + //string field + auto *col4 = dynamic_cast(fields->fields[4]); + char **buffer2 = col4->data.data(); + long *lengths = col4->length.data(); + + while (row_reader->next(*batch)) + { + for (uint32_t r = 0; r < batch->numElements; ++r) + { + std::cout << "line " << buffer1[r] << "," << std::string(buffer2[r], lengths[r]) << "\n"; + } + //std::cout << "this batch nums" << " " << batch->numElements << " " << "lines\n"; + } + + return 0; +} diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 9810a925a..c4d7f7e0d 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -376,7 +376,12 @@ cc_library( hdrs = [ ], copts = tf_io_copts(), - linkstatic = True, + linkopts = [ + "-lcrypto", + "-pthread", + "-L/usr/local/lib/", + "-lsasl2", + ], deps = [ "@liborc", ], diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index 9df9cb6d6..1b676c5ac 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -11,7 +11,11 @@ cmake( lib_source = "@liborc//:all_srcs", cmake_options = [ "-DBUILD_JAVA=OFF", - "-DCMAKE_BUILD_TYPE=DEBUG" + "-DBUILD_TESTING=OFF", + "-DBUILD_CPP_TESTS=OFF", + "-DSTOP_BUILD_ON_WARNING=OFF", + # "-Werror", + # "-DCMAKE_BUILD_TYPE=DEBUG" ], visibility = ["//visibility:public"], out_include_dir = "include", diff --git a/tmp/test.sh b/tmp/test.sh new file mode 100644 index 000000000..beafcbdb6 --- /dev/null +++ b/tmp/test.sh @@ -0,0 +1,13 @@ +BUILD_SCRIPT=bazel-out/k8-fastbuild/bin/external/liborc/liborc_foreign_cc/build_script.sh +EXT_BUILD_ROOT=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io +BUILD_LOG=bazel-out/k8-fastbuild/bin/external/liborc/liborc_foreign_cc/CMake.log +PWD=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io +TF_HEADER_DIR=/home/pi/p3/lib/python3.8/site-packages/tensorflow/include +BUILD_WRAPPER_SCRIPT=bazel-out/k8-fastbuild/bin/external/liborc/liborc_foreign_cc/wrapper_build_script.sh +TF_SHARED_LIBRARY_NAME=libtensorflow_framework.so.2 +TMPDIR=/tmp +EXT_BUILD_DEPS=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc.ext_build_deps +BUILD_TMPDIR=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc.build_tmpdir +SHLVL=2 +CMAKE_MAKE_PROGRAM=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/host/bin/external/rules_foreign_cc/toolchains/make/bin/make LD_LIBRARY_PATH=:/usr/local/cuda-11.2/lib64 INSTALLDIR=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc TF_SHARED_LIBRARY_DIR=/home/pi/p3/lib/python3.8/site-packages/tensorflow PATH=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io:/home/pi/.cache/bazelisk/downloads/bazelbuild/bazel-3.7.2-linux-x86_64/bin:/home/pi/.local/bin:/usr/local/cuda-11.2/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin /home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/external/cmake-3.19.6-Linux-x86_64/bin/cmake -DCMAKE_TOOLCHAIN_FILE=crosstool_bazel.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=liborc -DCMAKE_PREFIX_PATH=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb7715 09e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc.ext_build_deps -DCMAKE_RANLIB= -DBUILD_JAVA=OFF -DBUILD_TESTING=OFF -DBUILD_CPP_TESTS=OFF -DSTOP_BUILD_ON_WARNING=OFF -G 'Unix Makefiles' /home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/external/liborc + From d7505910ef5dbab465d4ac5632996b53df6cc8e8 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 11:38:24 -0700 Subject: [PATCH 03/18] fix compiler flag --- third_party/liborc.BUILD | 5 ----- tools/build/configure.py | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index 1b676c5ac..6b2b051b1 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -21,11 +21,6 @@ cmake( out_include_dir = "include", out_static_libs = [ "liborc.a", - "libprotoc.a", - "libz.a", - "liblz4.a", - "libprotobuf.a", - "libsnappy.a", "libzstd.a", "libhdfspp_static.a", ], diff --git a/tools/build/configure.py b/tools/build/configure.py index cd2dd7dbd..69d19a4db 100644 --- a/tools/build/configure.py +++ b/tools/build/configure.py @@ -27,6 +27,7 @@ def write_config(): include_list = [] opt_list = [] + opt_list.append("-fPIC") for arg in cflags: if inc_regex.match(arg): @@ -35,7 +36,7 @@ def write_config(): opt_list.append(arg) else: print("WARNING: Unexpected cflag item {}".format(arg)) - + opt_list.remove("-D_GLIBCXX_USE_CXX11_ABI=0") if len(include_list) != 1: print( "ERROR: Expected a single include directory in " From 00359fa336376f7bd48d92fc8a8f5fb587802e35 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 11:39:14 -0700 Subject: [PATCH 04/18] remove demo proj --- demo/BUILD | 16 --------------- demo/hello-time.cc | 49 ---------------------------------------------- 2 files changed, 65 deletions(-) delete mode 100644 demo/BUILD delete mode 100644 demo/hello-time.cc diff --git a/demo/BUILD b/demo/BUILD deleted file mode 100644 index 6c3ea6bfa..000000000 --- a/demo/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") - -cc_binary( - name = "hello-time", - srcs = ["hello-time.cc"], - visibility = ["//visibility:public"], - linkopts = [ - "-lcrypto", - "-pthread", - "-L/usr/local/lib/", - "-lsasl2", - ], - deps = [ - "@liborc", - ] -) \ No newline at end of file diff --git a/demo/hello-time.cc b/demo/hello-time.cc deleted file mode 100644 index d65bad3f4..000000000 --- a/demo/hello-time.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include "orc/orc-config.hh" -#include -#include -#include - -void print_localtime() -{ - std::time_t result = std::time(nullptr); - std::cout << std::asctime(std::localtime(&result)); -} - -int main(int argc, char const *argv[]) -{ - std::cout << "test\n"; - std::list read_cols = {0, 1, 2, 3, 4}; - std::string file_path = "./iris.orc"; - - orc::RowReaderOptions row_reader_opts; - row_reader_opts.include(read_cols); - - orc::ReaderOptions reader_opts; - std::unique_ptr reader = orc::createReader(orc::readFile(file_path), reader_opts); - std::unique_ptr row_reader = reader->createRowReader(row_reader_opts); - - std::unique_ptr batch = row_reader->createRowBatch(24); - - //double field - auto *fields = dynamic_cast(batch.get()); - auto *col0 = dynamic_cast(fields->fields[0]); - double *buffer1 = col0->data.data(); - - //string field - auto *col4 = dynamic_cast(fields->fields[4]); - char **buffer2 = col4->data.data(); - long *lengths = col4->length.data(); - - while (row_reader->next(*batch)) - { - for (uint32_t r = 0; r < batch->numElements; ++r) - { - std::cout << "line " << buffer1[r] << "," << std::string(buffer2[r], lengths[r]) << "\n"; - } - //std::cout << "this batch nums" << " " << batch->numElements << " " << "lines\n"; - } - - return 0; -} From 22e52b38831944c08f569069bc5eb55b54f59896 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 11:41:10 -0700 Subject: [PATCH 05/18] clean up --- tmp/test.sh | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 tmp/test.sh diff --git a/tmp/test.sh b/tmp/test.sh deleted file mode 100644 index beafcbdb6..000000000 --- a/tmp/test.sh +++ /dev/null @@ -1,13 +0,0 @@ -BUILD_SCRIPT=bazel-out/k8-fastbuild/bin/external/liborc/liborc_foreign_cc/build_script.sh -EXT_BUILD_ROOT=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io -BUILD_LOG=bazel-out/k8-fastbuild/bin/external/liborc/liborc_foreign_cc/CMake.log -PWD=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io -TF_HEADER_DIR=/home/pi/p3/lib/python3.8/site-packages/tensorflow/include -BUILD_WRAPPER_SCRIPT=bazel-out/k8-fastbuild/bin/external/liborc/liborc_foreign_cc/wrapper_build_script.sh -TF_SHARED_LIBRARY_NAME=libtensorflow_framework.so.2 -TMPDIR=/tmp -EXT_BUILD_DEPS=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc.ext_build_deps -BUILD_TMPDIR=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc.build_tmpdir -SHLVL=2 -CMAKE_MAKE_PROGRAM=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/host/bin/external/rules_foreign_cc/toolchains/make/bin/make LD_LIBRARY_PATH=:/usr/local/cuda-11.2/lib64 INSTALLDIR=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc TF_SHARED_LIBRARY_DIR=/home/pi/p3/lib/python3.8/site-packages/tensorflow PATH=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io:/home/pi/.cache/bazelisk/downloads/bazelbuild/bazel-3.7.2-linux-x86_64/bin:/home/pi/.local/bin:/usr/local/cuda-11.2/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin /home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/external/cmake-3.19.6-Linux-x86_64/bin/cmake -DCMAKE_TOOLCHAIN_FILE=crosstool_bazel.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=liborc -DCMAKE_PREFIX_PATH=/home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb7715 09e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/bazel-out/k8-fastbuild/bin/external/liborc/liborc.ext_build_deps -DCMAKE_RANLIB= -DBUILD_JAVA=OFF -DBUILD_TESTING=OFF -DBUILD_CPP_TESTS=OFF -DSTOP_BUILD_ON_WARNING=OFF -G 'Unix Makefiles' /home/pi/.cache/bazel/_bazel_pi/be6ac8eba0db45fb771509e53438aa5b/sandbox/linux-sandbox/1/execroot/org_tensorflow_io/external/liborc - From 7b0c83575fd4d3740694573085a425c7908e80f8 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 12:11:33 -0700 Subject: [PATCH 06/18] more clean ups --- tensorflow_io/core/BUILD | 8 -------- tensorflow_io/core/kernels/orc/hello-time.cc | 14 +++++--------- third_party/liborc.BUILD | 4 ---- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index c4d7f7e0d..251f8651f 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -373,15 +373,7 @@ cc_library( srcs = [ "kernels/orc/hello-time.cc", ], - hdrs = [ - ], copts = tf_io_copts(), - linkopts = [ - "-lcrypto", - "-pthread", - "-L/usr/local/lib/", - "-lsasl2", - ], deps = [ "@liborc", ], diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc index d65bad3f4..16fd8b354 100644 --- a/tensorflow_io/core/kernels/orc/hello-time.cc +++ b/tensorflow_io/core/kernels/orc/hello-time.cc @@ -5,15 +5,13 @@ #include #include -void print_localtime() -{ +void print_localtime() { std::time_t result = std::time(nullptr); std::cout << std::asctime(std::localtime(&result)); } -int main(int argc, char const *argv[]) -{ - std::cout << "test\n"; +// Sample ORC file: https://github.com/harbby/cmake_ExternalProject_demo/blob/main/iris.orc +int main(int argc, char const *argv[]) { std::list read_cols = {0, 1, 2, 3, 4}; std::string file_path = "./iris.orc"; @@ -36,10 +34,8 @@ int main(int argc, char const *argv[]) char **buffer2 = col4->data.data(); long *lengths = col4->length.data(); - while (row_reader->next(*batch)) - { - for (uint32_t r = 0; r < batch->numElements; ++r) - { + while (row_reader->next(*batch)) { + for (uint32_t r = 0; r < batch->numElements; ++r) { std::cout << "line " << buffer1[r] << "," << std::string(buffer2[r], lengths[r]) << "\n"; } //std::cout << "this batch nums" << " " << batch->numElements << " " << "lines\n"; diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index 6b2b051b1..6e9cebeda 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -11,11 +11,7 @@ cmake( lib_source = "@liborc//:all_srcs", cmake_options = [ "-DBUILD_JAVA=OFF", - "-DBUILD_TESTING=OFF", "-DBUILD_CPP_TESTS=OFF", - "-DSTOP_BUILD_ON_WARNING=OFF", - # "-Werror", - # "-DCMAKE_BUILD_TYPE=DEBUG" ], visibility = ["//visibility:public"], out_include_dir = "include", From 0e5381859ab2d6036ad6cf50404d3269e97dbafb Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 13:09:32 -0700 Subject: [PATCH 07/18] fix lint --- WORKSPACE | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 96eaac380..0bd2f1a8e 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1123,11 +1123,12 @@ http_archive( ) http_archive( - name = "rules_foreign_cc", - sha256 = "e60cfd0a8426fa4f5fd2156e768493ca62b87d125cb35e94c44e79a3f0d8635f", - strip_prefix = "rules_foreign_cc-0.2.0", - url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.2.0.zip", + name = "rules_foreign_cc", + sha256 = "e60cfd0a8426fa4f5fd2156e768493ca62b87d125cb35e94c44e79a3f0d8635f", + strip_prefix = "rules_foreign_cc-0.2.0", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.2.0.zip", ) + load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies") rules_foreign_cc_dependencies() From 744d17b9850576f088cbf5aaa6f120587b46cf8f Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 13:29:23 -0700 Subject: [PATCH 08/18] fix lint --- third_party/liborc.BUILD | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index 6e9cebeda..31ce68f21 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -8,17 +8,17 @@ filegroup( cmake( name = "liborc", - lib_source = "@liborc//:all_srcs", cmake_options = [ - "-DBUILD_JAVA=OFF", - "-DBUILD_CPP_TESTS=OFF", + "-DBUILD_JAVA=OFF", + "-DBUILD_CPP_TESTS=OFF", ], - visibility = ["//visibility:public"], + lib_source = "@liborc//:all_srcs", out_include_dir = "include", out_static_libs = [ - "liborc.a", - "libzstd.a", - "libhdfspp_static.a", + "liborc.a", + "libzstd.a", + "libhdfspp_static.a", ], tags = ["requires-network"], -) \ No newline at end of file + visibility = ["//visibility:public"], +) From d9c24a35a40db2698edaab645ddd25dfb4345c45 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Wed, 21 Apr 2021 13:41:51 -0700 Subject: [PATCH 09/18] fix linting for real --- tensorflow_io/core/kernels/orc/hello-time.cc | 27 ++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc index 16fd8b354..66b4cbacf 100644 --- a/tensorflow_io/core/kernels/orc/hello-time.cc +++ b/tensorflow_io/core/kernels/orc/hello-time.cc @@ -1,16 +1,18 @@ #include #include -#include "orc/orc-config.hh" -#include #include #include +#include + +#include "orc/orc-config.hh" void print_localtime() { std::time_t result = std::time(nullptr); std::cout << std::asctime(std::localtime(&result)); } -// Sample ORC file: https://github.com/harbby/cmake_ExternalProject_demo/blob/main/iris.orc +// Sample ORC file: +// https://github.com/harbby/cmake_ExternalProject_demo/blob/main/iris.orc int main(int argc, char const *argv[]) { std::list read_cols = {0, 1, 2, 3, 4}; std::string file_path = "./iris.orc"; @@ -19,26 +21,31 @@ int main(int argc, char const *argv[]) { row_reader_opts.include(read_cols); orc::ReaderOptions reader_opts; - std::unique_ptr reader = orc::createReader(orc::readFile(file_path), reader_opts); - std::unique_ptr row_reader = reader->createRowReader(row_reader_opts); + std::unique_ptr reader = + orc::createReader(orc::readFile(file_path), reader_opts); + std::unique_ptr row_reader = + reader->createRowReader(row_reader_opts); - std::unique_ptr batch = row_reader->createRowBatch(24); + std::unique_ptr batch = + row_reader->createRowBatch(24); - //double field + // double field auto *fields = dynamic_cast(batch.get()); auto *col0 = dynamic_cast(fields->fields[0]); double *buffer1 = col0->data.data(); - //string field + // string field auto *col4 = dynamic_cast(fields->fields[4]); char **buffer2 = col4->data.data(); long *lengths = col4->length.data(); while (row_reader->next(*batch)) { for (uint32_t r = 0; r < batch->numElements; ++r) { - std::cout << "line " << buffer1[r] << "," << std::string(buffer2[r], lengths[r]) << "\n"; + std::cout << "line " << buffer1[r] << "," + << std::string(buffer2[r], lengths[r]) << "\n"; } - //std::cout << "this batch nums" << " " << batch->numElements << " " << "lines\n"; + // std::cout << "this batch nums" << " " << batch->numElements << " " << + // "lines\n"; } return 0; From 9bce24e4e56b90af9b9984003e94fc35b84911f9 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Sat, 24 Apr 2021 15:58:55 -0700 Subject: [PATCH 10/18] Replace Bazel CMake with raw bazel rules --- WORKSPACE | 14 +- tensorflow_io/core/BUILD | 13 ++ tensorflow_io/core/kernels/orc/hello-time.cc | 2 +- third_party/liborc.BUILD | 140 ++++++++++++++++--- tools/build/configure.py | 2 - 5 files changed, 140 insertions(+), 31 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 0bd2f1a8e..3babfad1f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1122,20 +1122,12 @@ http_archive( ], ) -http_archive( - name = "rules_foreign_cc", - sha256 = "e60cfd0a8426fa4f5fd2156e768493ca62b87d125cb35e94c44e79a3f0d8635f", - strip_prefix = "rules_foreign_cc-0.2.0", - url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.2.0.zip", -) - -load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies") - -rules_foreign_cc_dependencies() - http_archive( name = "liborc", build_file = "//third_party:liborc.BUILD", + patch_cmds = [ + "tar -xzf c++/libs/libhdfspp/libhdfspp.tar.gz -C c++/libs/libhdfspp", + ], sha256 = "abdffe48b8d2e7776c3b541ee2241401e49774941ca4a8c759e5d795daec8a45", strip_prefix = "orc-rel-release-1.6.7", urls = [ diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 251f8651f..666bb9449 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -376,10 +376,23 @@ cc_library( copts = tf_io_copts(), deps = [ "@liborc", + ":orc_binary", ], alwayslink = 1, ) +cc_binary( + name = "orc_binary", + srcs = [ + "kernels/orc/hello-time.cc", + ], + copts = tf_io_copts(), + # linkshared = 1, + deps = [ + "@liborc", + ] +) + cc_library( name = "text_ops", srcs = [ diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc index 66b4cbacf..d50daae58 100644 --- a/tensorflow_io/core/kernels/orc/hello-time.cc +++ b/tensorflow_io/core/kernels/orc/hello-time.cc @@ -37,7 +37,7 @@ int main(int argc, char const *argv[]) { // string field auto *col4 = dynamic_cast(fields->fields[4]); char **buffer2 = col4->data.data(); - long *lengths = col4->length.data(); + int64_t *lengths = col4->length.data(); while (row_reader->next(*batch)) { for (uint32_t r = 0; r < batch->numElements; ++r) { diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index 31ce68f21..ff0dd198f 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -1,24 +1,130 @@ -load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") +package(default_visibility = ["//visibility:public"]) -filegroup( - name = "all_srcs", - srcs = glob(["**"]), - visibility = ["//visibility:public"], -) +licenses(["notice"]) # Apache 2.0 -cmake( +exports_files(["LICENSE"]) + +# Note: orc-proto-wrapper.cc includes orc_proto.pb.cc +# and prefix with Adaptor.hh. The Adaptor.hh +# was supposed to capture platform discrepancies. +# However, since orc_proto.pb.cc can be compiled +# with cc_proto_library successfully, there is no need +# for orc-proto-wrapper.cc. +cc_library( name = "liborc", - cmake_options = [ - "-DBUILD_JAVA=OFF", - "-DBUILD_CPP_TESTS=OFF", + srcs = glob( + [ + "c++/src/*.cc", + "c++/src/*.hh", + "c++/src/io/*.cc", + "c++/src/io/*.hh", + "c++/src/wrap/*.cc", + "c++/src/wrap/*.hh", + "c++/src/wrap/*.h", + "c++/include/orc/*.hh", + ], + exclude = [ + "c++/src/wrap/orc-proto-wrapper.cc", + "c++/src/OrcHdfsFile.cc", + ], + ) + select({ + "@bazel_tools//src/conditions:windows": [], + "//conditions:default": [ + "c++/src/OrcHdfsFile.cc", + ], + }), + hdrs = [ + "c++/include/orc/orc-config.hh", + "c++/src/Adaptor.hh", ], - lib_source = "@liborc//:all_srcs", - out_include_dir = "include", - out_static_libs = [ - "liborc.a", - "libzstd.a", - "libhdfspp_static.a", + copts = [], + defines = [], + includes = [ + "c++/include", + "c++/src", + "c++/src/io", + "c++/src/wrap", + "proto", ], - tags = ["requires-network"], + linkopts = [], visibility = ["//visibility:public"], + deps = [ + ":libhdfspp", + ":orc_cc_proto", + "@lz4", + "@snappy", + "@zlib", + "@zstd", + ], +) + +cc_library( + name = "libhdfspp", + srcs = glob( + [ + "c++/libs/libhdfspp/include/hdfspp/*.h", + ], + exclude = [ + ], + ), + hdrs = [ + ], + copts = [], + defines = [], + includes = [ + "c++/libs/libhdfspp/include", + ], + deps = [], ) + +proto_library( + name = "orc_proto", + srcs = ["proto/orc_proto.proto"], +) + +cc_proto_library( + name = "orc_cc_proto", + deps = [":orc_proto"], +) + +genrule( + name = "orc-config_hh", + srcs = ["c++/include/orc/orc-config.hh.in"], + outs = ["c++/include/orc/orc-config.hh"], + cmd = ("sed " + + "-e 's/@ORC_VERSION@/1.6.7/g' " + + "-e 's/cmakedefine/define/g' " + + "$< >$@"), +) + +genrule( + name = "Adaptor_hh", + srcs = ["c++/src/Adaptor.hh.in"], + outs = ["c++/src/Adaptor.hh"], + cmd = select({ + "@bazel_tools//src/conditions:windows": ( + "sed " + + "-e 's/cmakedefine HAS_PREAD/undef HAS_PREAD/g' " + + "-e 's/cmakedefine NEEDS_REDUNDANT_MOVE/undef NEEDS_REDUNDANT_MOVE/g' " + + "-e 's/cmakedefine NEEDS_Z_PREFIX/undef NEEDS_Z_PREFIX/g' " + + "-e 's/cmakedefine/define/g' " + + "$< >$@" + ), + "@bazel_tools//src/conditions:darwin": ( + "sed " + + "-e 's/cmakedefine NEEDS_REDUNDANT_MOVE/undef NEEDS_REDUNDANT_MOVE/g' " + + "-e 's/cmakedefine NEEDS_Z_PREFIX/undef NEEDS_Z_PREFIX/g' " + + "-e 's/cmakedefine/define/g' " + + "$< >$@" + ), + "//conditions:default": ( + "sed " + + "-e 's/cmakedefine INT64_IS_LL/undef INT64_IS_LL/g' " + + "-e 's/cmakedefine HAS_POST_2038/undef HAS_POST_2038/g' " + + "-e 's/cmakedefine NEEDS_REDUNDANT_MOVE/undef NEEDS_REDUNDANT_MOVE/g' " + + "-e 's/cmakedefine NEEDS_Z_PREFIX/undef NEEDS_Z_PREFIX/g' " + + "-e 's/cmakedefine/define/g' " + + "$< >$@" + ), + }), +) \ No newline at end of file diff --git a/tools/build/configure.py b/tools/build/configure.py index 69d19a4db..3b9aa92ba 100644 --- a/tools/build/configure.py +++ b/tools/build/configure.py @@ -27,7 +27,6 @@ def write_config(): include_list = [] opt_list = [] - opt_list.append("-fPIC") for arg in cflags: if inc_regex.match(arg): @@ -36,7 +35,6 @@ def write_config(): opt_list.append(arg) else: print("WARNING: Unexpected cflag item {}".format(arg)) - opt_list.remove("-D_GLIBCXX_USE_CXX11_ABI=0") if len(include_list) != 1: print( "ERROR: Expected a single include directory in " From 543dd53acf34cf650fd162c5f55b1288fd8fe180 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Sat, 24 Apr 2021 16:00:53 -0700 Subject: [PATCH 11/18] reset configure.py --- tools/build/configure.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/build/configure.py b/tools/build/configure.py index 3b9aa92ba..cd2dd7dbd 100644 --- a/tools/build/configure.py +++ b/tools/build/configure.py @@ -35,6 +35,7 @@ def write_config(): opt_list.append(arg) else: print("WARNING: Unexpected cflag item {}".format(arg)) + if len(include_list) != 1: print( "ERROR: Expected a single include directory in " From 115acfca6125f8f3471d4a06a1f07d1cefd2faa0 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Sat, 24 Apr 2021 16:07:15 -0700 Subject: [PATCH 12/18] update sample file --- tensorflow_io/core/BUILD | 13 ------------- tensorflow_io/core/kernels/orc/hello-time.cc | 6 +++++- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 666bb9449..c7e1e7886 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -368,19 +368,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "orc_ops", - srcs = [ - "kernels/orc/hello-time.cc", - ], - copts = tf_io_copts(), - deps = [ - "@liborc", - ":orc_binary", - ], - alwayslink = 1, -) - cc_binary( name = "orc_binary", srcs = [ diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc index d50daae58..78c6b8255 100644 --- a/tensorflow_io/core/kernels/orc/hello-time.cc +++ b/tensorflow_io/core/kernels/orc/hello-time.cc @@ -15,7 +15,11 @@ void print_localtime() { // https://github.com/harbby/cmake_ExternalProject_demo/blob/main/iris.orc int main(int argc, char const *argv[]) { std::list read_cols = {0, 1, 2, 3, 4}; - std::string file_path = "./iris.orc"; + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " some_orc_file.orc" << std::endl; + return 1; + } + std::string file_path = argv[1]; orc::RowReaderOptions row_reader_opts; row_reader_opts.include(read_cols); From e12f8129a5b1e2d0d72c95794154634444503f8a Mon Sep 17 00:00:00 2001 From: oliverhu Date: Sat, 24 Apr 2021 16:09:05 -0700 Subject: [PATCH 13/18] apply lint --- tensorflow_io/core/BUILD | 2 +- third_party/liborc.BUILD | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index c7e1e7886..87523bc40 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -377,7 +377,7 @@ cc_binary( # linkshared = 1, deps = [ "@liborc", - ] + ], ) cc_library( diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index ff0dd198f..ae724c35a 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -74,7 +74,7 @@ cc_library( includes = [ "c++/libs/libhdfspp/include", ], - deps = [], + deps = [], ) proto_library( @@ -127,4 +127,4 @@ genrule( "$< >$@" ), }), -) \ No newline at end of file +) From 295c452dddd8ff5e6abf3722738ca9f9969c7aed Mon Sep 17 00:00:00 2001 From: oliverhu Date: Sat, 24 Apr 2021 16:17:20 -0700 Subject: [PATCH 14/18] remove orc_ops --- tensorflow_io/core/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 87523bc40..fb0c762c7 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -732,7 +732,6 @@ cc_binary( "//tensorflow_io/bigquery:bigquery_ops", "//tensorflow_io/core:audio_video_ops", "//tensorflow_io/core:avro_ops", - "//tensorflow_io/core:orc_ops", "//tensorflow_io/core:cpuinfo", "//tensorflow_io/core:file_ops", "//tensorflow_io/core:grpc_ops", From 442131b5deed20701328b1b73c5eac4a63f1dd3e Mon Sep 17 00:00:00 2001 From: oliverhu Date: Sat, 24 Apr 2021 16:19:18 -0700 Subject: [PATCH 15/18] remove commented out code --- tensorflow_io/core/BUILD | 1 - tensorflow_io/core/kernels/orc/hello-time.cc | 2 -- 2 files changed, 3 deletions(-) diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index fb0c762c7..982c42773 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -374,7 +374,6 @@ cc_binary( "kernels/orc/hello-time.cc", ], copts = tf_io_copts(), - # linkshared = 1, deps = [ "@liborc", ], diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc index 78c6b8255..41f5fec89 100644 --- a/tensorflow_io/core/kernels/orc/hello-time.cc +++ b/tensorflow_io/core/kernels/orc/hello-time.cc @@ -48,8 +48,6 @@ int main(int argc, char const *argv[]) { std::cout << "line " << buffer1[r] << "," << std::string(buffer2[r], lengths[r]) << "\n"; } - // std::cout << "this batch nums" << " " << batch->numElements << " " << - // "lines\n"; } return 0; From 0123f6fb2cbf006d41316ecd4cbb381fdca9b435 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Mon, 26 Apr 2021 10:55:22 -0700 Subject: [PATCH 16/18] address comments --- tensorflow_io/core/BUILD | 12 +---- tensorflow_io/core/kernels/orc/hello-time.cc | 54 -------------------- 2 files changed, 1 insertion(+), 65 deletions(-) delete mode 100644 tensorflow_io/core/kernels/orc/hello-time.cc diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 982c42773..1667305e2 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -368,17 +368,6 @@ cc_library( alwayslink = 1, ) -cc_binary( - name = "orc_binary", - srcs = [ - "kernels/orc/hello-time.cc", - ], - copts = tf_io_copts(), - deps = [ - "@liborc", - ], -) - cc_library( name = "text_ops", srcs = [ @@ -754,6 +743,7 @@ cc_binary( "//tensorflow_io/core:mongodb_ops", "@local_config_tf//:libtensorflow_framework", "@local_config_tf//:tf_header_lib", + "@liborc", ] + select({ "@bazel_tools//src/conditions:windows": [], "//conditions:default": [ diff --git a/tensorflow_io/core/kernels/orc/hello-time.cc b/tensorflow_io/core/kernels/orc/hello-time.cc deleted file mode 100644 index 41f5fec89..000000000 --- a/tensorflow_io/core/kernels/orc/hello-time.cc +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include -#include - -#include "orc/orc-config.hh" - -void print_localtime() { - std::time_t result = std::time(nullptr); - std::cout << std::asctime(std::localtime(&result)); -} - -// Sample ORC file: -// https://github.com/harbby/cmake_ExternalProject_demo/blob/main/iris.orc -int main(int argc, char const *argv[]) { - std::list read_cols = {0, 1, 2, 3, 4}; - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " some_orc_file.orc" << std::endl; - return 1; - } - std::string file_path = argv[1]; - - orc::RowReaderOptions row_reader_opts; - row_reader_opts.include(read_cols); - - orc::ReaderOptions reader_opts; - std::unique_ptr reader = - orc::createReader(orc::readFile(file_path), reader_opts); - std::unique_ptr row_reader = - reader->createRowReader(row_reader_opts); - - std::unique_ptr batch = - row_reader->createRowBatch(24); - - // double field - auto *fields = dynamic_cast(batch.get()); - auto *col0 = dynamic_cast(fields->fields[0]); - double *buffer1 = col0->data.data(); - - // string field - auto *col4 = dynamic_cast(fields->fields[4]); - char **buffer2 = col4->data.data(); - int64_t *lengths = col4->length.data(); - - while (row_reader->next(*batch)) { - for (uint32_t r = 0; r < batch->numElements; ++r) { - std::cout << "line " << buffer1[r] << "," - << std::string(buffer2[r], lengths[r]) << "\n"; - } - } - - return 0; -} From bde307812f9077c6c38cdd8d554df810ae641c07 Mon Sep 17 00:00:00 2001 From: Keqiu Hu Date: Mon, 26 Apr 2021 21:09:42 -0700 Subject: [PATCH 17/18] Trigger build --- third_party/liborc.BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/liborc.BUILD b/third_party/liborc.BUILD index ae724c35a..9cc2e6a01 100644 --- a/third_party/liborc.BUILD +++ b/third_party/liborc.BUILD @@ -9,7 +9,7 @@ exports_files(["LICENSE"]) # was supposed to capture platform discrepancies. # However, since orc_proto.pb.cc can be compiled # with cc_proto_library successfully, there is no need -# for orc-proto-wrapper.cc. +# for orc-proto-wrapper.cc cc_library( name = "liborc", srcs = glob( From f67d9be9741e1731e04b47a14d14306f891bbd33 Mon Sep 17 00:00:00 2001 From: oliverhu Date: Tue, 27 Apr 2021 13:22:18 -0700 Subject: [PATCH 18/18] use dummy orc_ops --- tensorflow_io/core/BUILD | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tensorflow_io/core/BUILD b/tensorflow_io/core/BUILD index 1667305e2..f2902c8df 100644 --- a/tensorflow_io/core/BUILD +++ b/tensorflow_io/core/BUILD @@ -531,6 +531,19 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "orc_ops", + srcs = [ + ], + copts = tf_io_copts(), + linkstatic = True, + deps = [ + "//tensorflow_io/core:dataset_ops", + "@liborc", + ], + alwayslink = 1, +) + cc_library( name = "numpy_ops", srcs = [ @@ -720,6 +733,7 @@ cc_binary( "//tensorflow_io/bigquery:bigquery_ops", "//tensorflow_io/core:audio_video_ops", "//tensorflow_io/core:avro_ops", + "//tensorflow_io/core:orc_ops", "//tensorflow_io/core:cpuinfo", "//tensorflow_io/core:file_ops", "//tensorflow_io/core:grpc_ops", @@ -743,7 +757,6 @@ cc_binary( "//tensorflow_io/core:mongodb_ops", "@local_config_tf//:libtensorflow_framework", "@local_config_tf//:tf_header_lib", - "@liborc", ] + select({ "@bazel_tools//src/conditions:windows": [], "//conditions:default": [