run-llama · Acousticdesk · Sep 30, 2024 · Oct 4, 2024 · Oct 4, 2024 · Oct 4, 2024
diff --git a/llama-index-integrations/readers/llama-index-readers-document360/.gitignore b/llama-index-integrations/readers/llama-index-readers-document360/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/readers/llama-index-readers-document360/BUILD b/llama-index-integrations/readers/llama-index-readers-document360/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/readers/llama-index-readers-document360/Makefile b/llama-index-integrations/readers/llama-index-readers-document360/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/readers/llama-index-readers-document360/README.md b/llama-index-integrations/readers/llama-index-readers-document360/README.md
@@ -0,0 +1,98 @@
+# LlamaIndex Readers Integration: Document360
+
+The `Document360Reader` class is a custom reader that interacts with the Document360 API to fetch articles. It processes these articles recursively and allows further handling via custom callback functions, while also handling rate limiting and errors.
+
+## Installation
+
+```bash
+pip install llama-index-readers-document360
+```
+
+## Usage
+
+```py
+from document360_reader import Document360Reader
+
+reader = Document360Reader(api_key="your_api_key")
+
+# Load data
+documents = reader.load_data()
+
+# Use the documents as needed
+for doc in documents:
+    print(doc.text)
+```
+
+## Class Initialization
+
+```py
+def __init__(
+    self,
+    api_key: str,
+    should_process_project_version=None,
+    should_process_category=None,
+    should_process_article=None,
+    handle_batch_finished=None,
+    handle_rate_limit_error=None,
+    handle_request_http_error=None,
+    handle_category_processing_started=None,
+    handle_article_processing_started=None,
+    handle_article_processing_error=None,
+    handle_load_data_error=None,
+    article_to_custom_document=None,
+    rate_limit_num_retries=10,
+    rate_limit_retry_wait_time=30,
+):
+    pass
+```
+
+`api_key`: Your Document360 API key (required).
+`should_process_project_version`: Callback to determine whether to process a project version.
+`should_process_category`: Callback to determine whether to process a category.
+`should_process_article`: Callback to determine whether to process an article.
+`handle_batch_finished`: Callback executed after all articles are processed.
+`handle_rate_limit_error`: Callback for handling rate limit errors.
+`handle_request_http_error`: Callback for handling HTTP errors.
+`handle_category_processing_started`: Callback triggered when category processing starts.
+`handle_article_processing_started`: Callback triggered when article processing starts.
+`handle_article_processing_error`: Callback for handling errors during article processing.
+`handle_load_data_error`: Callback for handling errors during data loading.
+`article_to_custom_document`: Custom transformation function to map an article to a document.
+`rate_limit_num_retries`: Number of retry attempts when hitting rate limits.
+`rate_limit_retry_wait_time`: Time to wait (in seconds) between retries after a rate limit error.
+
+## Referencing entities
+
+```py
+from llama_index.readers.document360.entities import (
+    Article,
+    ArticleSlim,
+    Category,
+    ProjectVersion,
+)
+
+
+def handle_category_processing_started(category: Category):
+    logging.info(f"Started processing category: {category}")
+
+
+def handle_article_processing_started(article: Article):
+    logging.info(f"Processing article: {article}")
+```
+
+## Referencing errors
+
+```py
+from llama_index.readers.document360.errors import (
+    RetryError,
+    HTTPError,
+    RateLimitException,
+)
+
+reader = Document360Reader(api_key="your_api_key")
+
+try:
+    reader.load_data()
+except RetryError as e:
+    logging.info(f"Retry Error: {e}")
+```