Skip to content

Commit

Permalink
text splitter conf
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-treebeard committed Oct 24, 2023
1 parent 19123c7 commit 8e88ae0
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/nbwrite/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DEFAULT_LLM_KWARGS,
DEFAULT_RETRIEVER_KWARGS,
DEFAULT_SYSTEM_PROMPT,
DEFAULT_TEXT_SPLITTER_KWARGS,
)


Expand All @@ -14,6 +15,7 @@ class GenerationConfig(BaseModel):
system_prompt: str = DEFAULT_SYSTEM_PROMPT
llm_kwargs: Dict[str, Any] = DEFAULT_LLM_KWARGS
retriever_kwargs: Dict[str, Any] = DEFAULT_RETRIEVER_KWARGS
text_splitter_kwargs: Dict[str, Any] = DEFAULT_TEXT_SPLITTER_KWARGS


class Config(BaseModel):
Expand Down
5 changes: 5 additions & 0 deletions src/nbwrite/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
"max_tokens": 512,
}

DEFAULT_TEXT_SPLITTER_KWARGS = {
"chunk_size": 2000,
"chunk_overlap": 200,
}

DEFAULT_RETRIEVER_KWARGS = {
"k": 5,
"search_type": "mmr",
Expand Down
8 changes: 6 additions & 2 deletions src/nbwrite/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@
sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")


def create_index(pkgs: List[str], retriever_kwargs: Dict[str, Any]):
def create_index(
pkgs: List[str],
retriever_kwargs: Dict[str, Any],
text_splitter_kwargs: Dict[str, Any],
):

python_splitter = RecursiveCharacterTextSplitter.from_language(
language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
language=Language.PYTHON, **text_splitter_kwargs
)

texts = []
Expand Down
6 changes: 5 additions & 1 deletion src/nbwrite/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ def gen(
]
)

retriever = create_index(config.packages, config.generation.retriever_kwargs)
retriever = create_index(
config.packages,
config.generation.retriever_kwargs,
config.generation.text_splitter_kwargs,
)

def _combine_documents(
docs, document_prompt=PromptTemplate.from_template(template="{page_content}"), document_separator="\n\n" # type: ignore
Expand Down

0 comments on commit 8e88ae0

Please sign in to comment.