diff --git a/docs/tutorials/daos.ipynb b/docs/tutorials/daos.ipynb index cf6c1d9a2d..e7e4e4bbaf 100644 --- a/docs/tutorials/daos.ipynb +++ b/docs/tutorials/daos.ipynb @@ -1,5 +1,31 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Copyright 2022 The TensorFlow IO Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, { "cell_type": "markdown", "id": "1f9e30da", @@ -10,6 +36,26 @@ "# DAOS Filesystem with Tensorflow (Using MNIST)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, { "cell_type": "markdown", "id": "22b37505", @@ -21,7 +67,7 @@ "\n", "This tutorial shows how to use read and write files on [DAOS Filesystem](https://docs.daos.io/) with TensorFlow, through TensorFlow IO's DAOS file system integration.\n", "\n", - "A machine running DAOS natively or through a [docker emulator](https://github.com/daos-stack/daos/tree/master/utils/docker) is needed to run this tutorial and/or use the Tensorflow IO DAOS integration. The DAOS Pool and Container used for this tutorial will be created and deleted within this tutorial, where we will be training and testing a simple Neural Network on the MNIST Dataset loaded from the DAOS File System Plugin.\n", + "A machine running DAOS natively or through a [docker emulator](https://github.com/daos-stack/daos/tree/master/utils/docker) is needed to run this tutorial and/or use the Tensorflow IO DAOS integration. The DAOS Pool and Container used for this tutorial will be created and deleted within this tutorial, where you will be training and testing a simple Neural Network on the MNIST Dataset loaded from the DAOS File System Plugin.\n", "\n", "The pool and container id or label are part of the filename uri:\n", "```\n", @@ -52,12 +98,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "5e35916b", "metadata": { "id": "5de1951509cb" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: tensorflow-io in /home/omar/.local/lib/python3.8/site-packages (0.20.0)\n", + "Requirement already satisfied: tensorflow<2.7.0,>=2.6.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow-io) (2.6.0)\n", + "Requirement already satisfied: tensorflow-io-gcs-filesystem==0.20.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow-io) (0.20.0)\n", + "Requirement already satisfied: gast==0.4.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.4.0)\n", + "Requirement already satisfied: grpcio<2.0,>=1.37.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.39.0)\n", + "Requirement already satisfied: protobuf>=3.9.2 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (3.17.3)\n", + "Requirement already satisfied: tensorboard~=2.6 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (2.6.0)\n", + "Requirement already satisfied: tensorflow-estimator~=2.6 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (2.6.0)\n", + "Requirement already satisfied: typing-extensions~=3.7.4 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (3.7.4.3)\n", + "Requirement already satisfied: termcolor~=1.1.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.1.0)\n", + "Requirement already satisfied: wrapt~=1.12.1 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.12.1)\n", + "Requirement already satisfied: google-pasta~=0.2 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.2.0)\n", + "Requirement already satisfied: keras~=2.6 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (2.6.0)\n", + "Requirement already satisfied: six~=1.15.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.15.0)\n", + "Requirement already satisfied: numpy~=1.19.2 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.19.5)\n", + "Requirement already satisfied: opt-einsum~=3.3.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (3.3.0)\n", + "Requirement already satisfied: wheel~=0.35 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.37.0)\n", + "Requirement already satisfied: astunparse~=1.6.3 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.6.3)\n", + "Requirement already satisfied: absl-py~=0.10 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.13.0)\n", + "Requirement already satisfied: keras-preprocessing~=1.1.2 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.1.2)\n", + "Requirement already satisfied: h5py~=3.1.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (3.1.0)\n", + "Requirement already satisfied: clang~=5.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (5.0)\n", + "Requirement already satisfied: flatbuffers~=1.12.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.12)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /home/omar/.local/lib/python3.8/site-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.4.6)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.8.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /home/omar/.local/lib/python3.8/site-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (3.3.4)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /home/omar/.local/lib/python3.8/site-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.35.0)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/lib/python3/dist-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (45.2.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /home/omar/.local/lib/python3.8/site-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.6.1)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/lib/python3/dist-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (2.22.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /home/omar/.local/lib/python3.8/site-packages (from tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (2.0.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /home/omar/.local/lib/python3.8/site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.2.8)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /home/omar/.local/lib/python3.8/site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (4.7.2)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /home/omar/.local/lib/python3.8/site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (4.2.2)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /home/omar/.local/lib/python3.8/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (1.3.0)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /home/omar/.local/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (0.4.8)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/lib/python3/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow<2.7.0,>=2.6.0->tensorflow-io) (3.1.0)\n", + "\u001b[33mWARNING: You are using pip version 21.2.4; however, version 22.0.4 is available.\n", + "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], "source": [ "try:\n", " %tensorflow_version 2.x \n", @@ -79,12 +172,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "79528fed", "metadata": { "id": "fb83b02da201" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/bin/sh: 1: dmg: not found\n", + "/usr/bin/sh: 1: daos: not found\n" + ] + } + ], "source": [ "!dmg -i pool create -s 500M TEST_POOL\n", "!daos cont create --pool=TEST_POOL --type=POSIX TEST_CONT" @@ -128,7 +230,7 @@ }, "outputs": [], "source": [ - "dfs_url = \"dfs://TEST_POOL/TEST_CONT/\" # This the path we'll be using to load and access the dataset\n", + "dfs_url = \"dfs://TEST_POOL/TEST_CONT/\" # This the path you'll be using to load and access the dataset\n", "pwd = !pwd\n", "posix_url = pwd[0] + \"/tests/test_dfs/\"" ] @@ -162,7 +264,7 @@ "id": "b40e9a30808c" }, "source": [ - "Copying the Data from the POSIX Filesystem to the DAOS Filesystem under the pool and container we just created" + "Copying the Data from the POSIX Filesystem to the DAOS Filesystem under the pool and container you just created" ] }, { @@ -270,7 +372,7 @@ "id": "4129e8e2c1b4" }, "source": [ - "Compiling the model we just built" + "Compiling the model you just built" ] }, { @@ -467,6 +569,18 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" } }, "nbformat": 4, diff --git a/tensorflow_io/core/filesystems/dfs/dfs_filesystem.cc b/tensorflow_io/core/filesystems/dfs/dfs_filesystem.cc index 858b3d3165..5db796dc86 100644 --- a/tensorflow_io/core/filesystems/dfs/dfs_filesystem.cc +++ b/tensorflow_io/core/filesystems/dfs/dfs_filesystem.cc @@ -1,6 +1,6 @@ -#include "tensorflow_io/core/filesystems/dfs/dfs_utils.h" - #include + +#include "tensorflow_io/core/filesystems/dfs/dfs_utils.h" #undef NDEBUG #include @@ -8,14 +8,13 @@ namespace tensorflow { namespace io { namespace dfs { - // SECTION 1. Implementation for `TF_RandomAccessFile` // ---------------------------------------------------------------------------- namespace tf_random_access_file { typedef struct DFSRandomAccessFile { std::string dfs_path; dfs_t* daos_fs; - dfs_obj_t *daos_file; + dfs_obj_t* daos_file; std::vector buffers; daos_size_t file_size; daos_handle_t mEventQueueHandle{}; @@ -77,7 +76,7 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, if (read_buf.CacheHit(curr_offset)) { read_bytes = read_buf.CopyFromCache(ret, ret_offset, curr_offset, n, dfs_file->file_size, status); - break; + break; } } @@ -96,8 +95,8 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, size_t async_offset = curr_offset; for (size_t i = 0; i < dfs_file->buffers.size(); i++) { if (async_offset > dfs_file->file_size) break; - dfs_file->buffers[i].ReadAsync(dfs_file->daos_fs, - dfs_file->daos_file, async_offset); + dfs_file->buffers[i].ReadAsync(dfs_file->daos_fs, dfs_file->daos_file, + async_offset); async_offset += BUFF_SIZE; } } @@ -113,7 +112,7 @@ namespace tf_writable_file { typedef struct DFSWritableFile { std::string dfs_path; dfs_t* daos_fs; - dfs_obj_t *daos_file; + dfs_obj_t* daos_file; daos_size_t file_size; bool size_known; @@ -121,10 +120,10 @@ typedef struct DFSWritableFile { : dfs_path(std::move(dfs_path)) { daos_fs = file_system; daos_file = obj; - size_known=false; + size_known = false; } - int get_file_size(daos_size_t &size) { + int get_file_size(daos_size_t& size) { if (!size_known) { int rc = dfs_get_size(daos_fs, daos_file, &file_size); if (rc != 0) { @@ -141,9 +140,7 @@ typedef struct DFSWritableFile { size_known = true; } - void unset_file_size(void) { - size_known = false; - } + void unset_file_size(void) { size_known = false; } } DFSWritableFile; void Cleanup(TF_WritableFile* file) { @@ -171,8 +168,8 @@ void Append(const TF_WritableFile* file, const char* buffer, size_t n, return; } - rc = dfs_write(dfs_file->daos_fs, dfs_file->daos_file, &wsgl, - cur_file_size, NULL); + rc = dfs_write(dfs_file->daos_fs, dfs_file->daos_file, &wsgl, cur_file_size, + NULL); if (rc) { TF_SetStatus(status, TF_RESOURCE_EXHAUSTED, ""); dfs_file->unset_file_size(); @@ -274,8 +271,8 @@ void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, } auto random_access_file = new tf_random_access_file::DFSRandomAccessFile(path, daos->daos_fs, obj); - random_access_file->buffers[0].ReadAsync( - daos->daos_fs, random_access_file->daos_file, 0); + random_access_file->buffers[0].ReadAsync(daos->daos_fs, + random_access_file->daos_file, 0); file->plugin_file = random_access_file; TF_SetStatus(status, TF_OK, ""); } diff --git a/tensorflow_io/core/filesystems/dfs/dfs_utils.cc b/tensorflow_io/core/filesystems/dfs/dfs_utils.cc index 9378cb9729..56a0c23c61 100644 --- a/tensorflow_io/core/filesystems/dfs/dfs_utils.cc +++ b/tensorflow_io/core/filesystems/dfs/dfs_utils.cc @@ -556,7 +556,8 @@ int ReadBuffer::ReadAsync(dfs_t* daos_fs, dfs_obj_t* file, const size_t off) { assert(rc == 0); rc = daos_event_init(event, eqh, nullptr); assert(rc == 0); - event->ev_error = dfs_read(daos_fs, file, &rsgl, buffer_offset, &read_size, event); + event->ev_error = + dfs_read(daos_fs, file, &rsgl, buffer_offset, &read_size, event); return 0; } @@ -571,8 +572,9 @@ int ReadBuffer::CopyData(char* ret, const size_t ret_offset, const size_t off, } int64_t ReadBuffer::CopyFromCache(char* ret, const size_t ret_offset, - const size_t off, const size_t n, - const daos_size_t file_size, TF_Status* status) { + const size_t off, const size_t n, + const daos_size_t file_size, + TF_Status* status) { size_t read_size; read_size = off + n > file_size ? file_size - off : n; read_size = off + read_size > buffer_offset + buffer_size diff --git a/tensorflow_io/core/filesystems/dfs/dfs_utils.h b/tensorflow_io/core/filesystems/dfs/dfs_utils.h index 953d0b2f63..dc18c747b4 100644 --- a/tensorflow_io/core/filesystems/dfs/dfs_utils.h +++ b/tensorflow_io/core/filesystems/dfs/dfs_utils.h @@ -208,8 +208,8 @@ class ReadBuffer { const size_t n); int64_t CopyFromCache(char* ret, const size_t ret_offset, const size_t off, - const size_t n, const daos_size_t file_size, - TF_Status* status); + const size_t n, const daos_size_t file_size, + TF_Status* status); private: size_t id;