diff --git a/.vscode/launch.json b/.vscode/launch.json
index 826e7a4dae..ad6354eacc 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -20,8 +20,8 @@
"console":"integratedTerminal",
"justMyCode":true,
"args": [
- "https://github.com/sweepai/ui/issues/2"
- // "https://github.com/sweepai/sweep/issues/2669"
+ // "https://github.com/sweepai/sweep/issues/2738",
+ "https://github.com/sweepai/sweep/issues/2758"
]
},
{
diff --git a/sweepai/core/context_pruning.py b/sweepai/core/context_pruning.py
index 3f8052875f..55c4d06cfd 100644
--- a/sweepai/core/context_pruning.py
+++ b/sweepai/core/context_pruning.py
@@ -1,7 +1,6 @@
import json
import re
import time
-from copy import deepcopy
from attr import dataclass
from loguru import logger
@@ -10,26 +9,25 @@
from sweepai.agents.assistant_wrapper import client, openai_retry_with_timeout
from sweepai.core.entities import Snippet
-from sweepai.logn.cache import file_cache
-from sweepai.utils.chat_logger import ChatLogger
+from sweepai.utils.chat_logger import ChatLogger, discord_log_error
+from sweepai.utils.code_tree import CodeTree
from sweepai.utils.github_utils import ClonedRepo
from sweepai.utils.progress import AssistantConversation, TicketProgress
from sweepai.utils.tree_utils import DirectoryTree
ASSISTANT_MAX_CHARS = 4096 * 4 * 0.95 # ~95% of 4k tokens
-sys_prompt = """You are a brilliant engineer assigned to the following Github issue. You are currently gathering the minimum set of information that allows you to plan the solution to the issue. It is very important that you get this right.
+sys_prompt = """You are a brilliant engineer assigned to the following Github issue. You must gather the information from the codebase that allows you to completely solve the issue. It is very important that you get this right.
Reply in the following format:
-
-Use the snippets, issue metadata and other information to determine the information that is critical to solve the issue. For each snippet, identify whether it was a true positive or a false positive.
-Propose the most important paths as well as any new required paths, along with a justification.
-
+## Solution Planning
+Use the snippets, user request, and repo_tree to determine the snippets that are critical to solve the issue.
-Then use the store_file_path and expand_directory tools to optimize the snippets_in_repo, repo_tree, and paths_in_repo until they allow you to perfectly solve the user request.
-If you expand a directory, you automatically expand all of its subdirectories, so do not list its subdirectories. Store all files or directories that are referenced in the issue title or descriptions.
-Store as few file paths as necessary to solve the user request."""
+1. First use the preview_file tool to preview any files that seem relevant. Then, use the view_file_snippet tool to view specific line numbers of a file. We want to find the exact line numbers to store to solve the user request. You may use this tool multiple times to view multiple snippets, either from the same file or different files.
+2. Finally, use the store_file_snippet and expand_directory tools to optimize the context (snippets_in_repo and repo_tree) until they allow you to completely solve the user request. If you don't know the correct line numbers, complete step one until you find the exact line numbers.
+
+Repeat this process until you have the perfect context to solve the user request."""
unformatted_user_prompt = """\
@@ -41,25 +39,56 @@
{repo_tree}
-# Instructions
## User Request
-{query}
-The above and have unnecessary information. Modify paths_in_repo, snippets_in_repo, and repo_tree to store only the absolutely necessary information.
-
-Reply in the following format:
-
-
-Use the snippets, issue metadata and other information to determine the information that is critical to solve the issue. For each snippet, identify whether it was a true positive or a false positive.
-Propose the most important paths as well as any new required paths, along with a justification.
-
-
-Then use the store_file_path and expand_directory tools to optimize the snippets_in_repo, repo_tree, and paths_in_repo until they allow you to perfectly solve the user request.
-If you expand a directory, you automatically expand all of its subdirectories, so do not list its subdirectories. Store all files or directories that are referenced in the issue title or descriptions.
-Store as few file paths as necessary to solve the user request."""
+{query}"""
functions = [
{
- "name": "store_file_path",
+ "name": "preview_file",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "File path to preview.",
+ },
+ "justification": {
+ "type": "string",
+ "description": "Justification for previewing the file.",
+ },
+ },
+ "required": ["snippet_path", "justification"],
+ },
+ "description": "Use this to read the summary of the file. Use this tool before viewing a snippet. This is used for exploration only and does not affect the snippets. After using this tool, use the view_file_snippet tool to view specific line numbers of a file to find the exact line numbers to store to solve the user request.",
+ },
+ {
+ "name": "view_file_snippet",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "File or directory to store.",
+ },
+ "start_line": {
+ "type": "integer",
+ "description": "Start line of the snippet.",
+ },
+ "end_line": {
+ "type": "integer",
+ "description": "End line of the snippet.",
+ },
+ "justification": {
+ "type": "string",
+ "description": "Justification for viewing the file_path.",
+ },
+ },
+ "required": ["file_path", "start_line", "end_line", "justification"],
+ },
+ "description": "Use this to view a section of a snippet. You may use this tool multiple times to view multiple snippets. After you are finished using this tool, you may use the store_file_snippet tool to store the snippet to solve the user request.",
+ },
+ {
+ "name": "store_file_snippet",
"parameters": {
"type": "object",
"properties": {
@@ -67,14 +96,22 @@
"type": "string",
"description": "File or directory to store.",
},
+ "start_line": {
+ "type": "integer",
+ "description": "Start line of the snippet.",
+ },
+ "end_line": {
+ "type": "integer",
+ "description": "End line of the snippet. Pick the minimal required lines and prefer store multiple small and precise snippets over one large snippets.",
+ },
"justification": {
"type": "string",
- "description": "Justification for store the file_path.",
+ "description": "Justification for storing the file_path.",
},
},
- "required": ["file_path", "justification"],
+ "required": ["file_path", "start_line", "end_line", "justification"],
},
- "description": "Use this to either store an existing file_path or add a new path to paths_in_repo. Only store paths you are certain are relevant to solving the user request. All of the files not listed will be removed from the paths_in_repo. Make sure to store ALL of the files that are referenced in the issue title or description.",
+ "description": "Use this to store a snippet. Only store paths you are certain are relevant to solving the user request and be precise with the line numbers. Make sure to store ALL of the files that are referenced in the issue title or description.",
},
{
"name": "expand_directory",
@@ -92,14 +129,11 @@
},
"required": ["directory_path", "justification"],
},
- "description": "Expand an existing directory that is closed. This is used for exploration only and does not affect the snippets.",
+ "description": "Expand an existing directory that is closed. This is used for exploration only and does not affect the snippets. If you expand a directory, you automatically expand all of its subdirectories, so do not list its subdirectories. Store all files or directories that are referenced in the issue title or descriptions.",
},
]
-tools = [
- {"type": "function", "function": functions[0]},
- {"type": "function", "function": functions[1]},
-]
+tools = [{"type": "function", "function": function} for function in functions]
@staticmethod
@@ -116,6 +150,7 @@ class RepoContextManager:
current_top_tree: str
snippets: list[Snippet]
snippet_scores: dict[str, float]
+ cloned_repo: ClonedRepo
current_top_snippets: list[Snippet] = []
@property
@@ -151,7 +186,10 @@ def format_context(
if can_add_snippet(snippet, new_top_snippets):
new_top_snippets.append(snippet)
self.current_top_snippets = new_top_snippets
- top_snippets_str = [snippet.xml for snippet in self.current_top_snippets]
+ top_snippets_str = [
+ f"{snippet.get_snippet()}"
+ for snippet in self.current_top_snippets
+ ]
paths_in_repo = [snippet.file_path for snippet in self.current_top_snippets]
snippets_in_repo_str = "\n".join(top_snippets_str)
paths_in_repo_str = "\n".join(paths_in_repo)
@@ -184,24 +222,13 @@ def get_highest_scoring_snippet(self, file_path: str) -> Snippet:
)
return highest_scoring_snippet
- def add_file_paths(self, paths_to_add: list[str]):
- self.dir_obj.add_file_paths(paths_to_add)
- for file_path in paths_to_add:
- highest_scoring_snippet = self.get_highest_scoring_snippet(file_path)
- if highest_scoring_snippet is None:
- continue
- if can_add_snippet(highest_scoring_snippet, self.current_top_snippets):
- self.current_top_snippets.append(highest_scoring_snippet)
- continue
- # otherwise try adding it by removing others
- prev_top_snippets = deepcopy(self.current_top_snippets)
- self.current_top_snippets = [highest_scoring_snippet]
- for snippet in prev_top_snippets:
- if can_add_snippet(snippet, self.current_top_snippets):
- self.current_top_snippets.append(snippet)
+ def add_snippets(self, snippets_to_add: list[Snippet]):
+ self.dir_obj.add_file_paths([snippet.file_path for snippet in snippets_to_add])
+ for snippet in snippets_to_add:
+ self.current_top_snippets.append(snippet)
-@file_cache(ignore_params=["repo_context_manager", "ticket_progress", "chat_logger"])
+# @file_cache(ignore_params=["repo_context_manager", "ticket_progress", "chat_logger"])
def get_relevant_context(
query: str,
repo_context_manager: RepoContextManager,
@@ -238,50 +265,50 @@ def get_relevant_context(
thread_id=thread.id,
assistant_id=assistant.id,
)
- done = modify_context(thread, run, repo_context_manager, ticket_progress)
- ticket_progress.search_progress.pruning_conversation_counter = 1
- if done:
- return repo_context_manager
- for i in range(modify_iterations):
- ticket_progress.search_progress.pruning_conversation_counter = i + 1
- thread = openai_retry_with_timeout(client.beta.threads.create)
- user_prompt = repo_context_manager.format_context(
- unformatted_user_prompt=unformatted_user_prompt, query=query
- )
- _ = openai_retry_with_timeout(
- client.beta.threads.messages.create,
- thread.id,
- role="user",
- content=f"{user_prompt}\nIf the current snippets_in_repo, repo_tree, and paths_in_repo allow you to solve the issue, store all of the existing file paths.",
- )
- run = openai_retry_with_timeout(
- client.beta.threads.runs.create,
- thread_id=thread.id,
- assistant_id=assistant.id,
- )
- done = modify_context(thread, run, repo_context_manager, ticket_progress)
- if done:
- break
+ modify_context(thread, run, repo_context_manager, ticket_progress)
+ if len(repo_context_manager.current_top_snippets) == 0:
+ repo_context_manager.current_top_snippets = repo_context_manager.snippets
+ discord_log_error(f"Context manager empty ({ticket_progress.tracking_id})")
return repo_context_manager
except Exception as e:
logger.exception(e)
return repo_context_manager
+def update_assistant_conversation(
+ run: Run,
+ thread: Thread,
+ ticket_progress: TicketProgress,
+ repo_context_manager: RepoContextManager,
+):
+ assistant_conversation = AssistantConversation.from_ids(
+ assistant_id=run.assistant_id,
+ run_id=run.id,
+ thread_id=thread.id,
+ )
+ if ticket_progress:
+ if assistant_conversation:
+ ticket_progress.search_progress.pruning_conversation = (
+ assistant_conversation
+ )
+ ticket_progress.search_progress.repo_tree = str(repo_context_manager.dir_obj)
+ ticket_progress.search_progress.final_snippets = (
+ repo_context_manager.current_top_snippets
+ )
+ ticket_progress.save()
+
+
def modify_context(
thread: Thread,
run: Run,
repo_context_manager: RepoContextManager,
ticket_progress: TicketProgress,
) -> bool | None:
- max_iterations = 30
- paths_to_keep = [] # consider persisting these across runs
- paths_to_add = []
+ max_iterations = 60
directories_to_expand = []
- logger.info(
- f"Context Management Start:\ncurrent snippet paths: {repo_context_manager.top_snippet_paths}"
- )
+ repo_context_manager.current_top_snippets = []
initial_file_paths = repo_context_manager.top_snippet_paths
+ paths_to_add = []
for iter in range(max_iterations):
run = openai_retry_with_timeout(
client.beta.threads.runs.retrieve,
@@ -289,25 +316,11 @@ def modify_context(
run_id=run.id,
)
if iter % 5 == 0:
- assistant_conversation = AssistantConversation.from_ids(
- assistant_id=run.assistant_id,
- run_id=run.id,
- thread_id=thread.id,
+ update_assistant_conversation(
+ run, thread, ticket_progress, repo_context_manager
)
- if ticket_progress:
- if assistant_conversation:
- ticket_progress.search_progress.pruning_conversation = (
- assistant_conversation
- )
- ticket_progress.search_progress.repo_tree = str(
- repo_context_manager.dir_obj
- )
- ticket_progress.search_progress.final_snippets = (
- repo_context_manager.current_top_snippets
- )
- logger.info("iteration: " + str(iter))
- ticket_progress.save()
- if run.status == "completed":
+ logger.info("iteration: " + str(iter))
+ if run.status == "completed" or run.status == "failed":
break
if (
run.status != "requires_action"
@@ -334,6 +347,13 @@ def modify_context(
}
)
continue
+ current_top_snippets_string = "\n".join(
+ [
+ "- " + snippet.xml
+ for snippet in repo_context_manager.current_top_snippets
+ ]
+ )
+ logger.info(f"Tool Call: {tool_call.function.name} {function_input}")
function_path_or_dir = (
function_input["file_path"]
if "file_path" in function_input
@@ -341,31 +361,90 @@ def modify_context(
)
valid_path = False
output = ""
- if tool_call.function.name == "store_file_path":
- if function_path_or_dir in repo_context_manager.top_snippet_paths:
- valid_path = (
- function_path_or_dir in repo_context_manager.top_snippet_paths
- )
- output = f"SUCCESS. {function_path_or_dir} was stored."
- paths_to_keep.append(function_path_or_dir)
- else: # we should add the file path
+ if tool_call.function.name == "view_file_snippet":
+ error_message = ""
+ for key in ["start_line", "end_line"]:
+ if key not in function_input:
+ logger.warning(
+ f"Key {key} not in function input {function_input}"
+ )
+ error_message = "FAILURE: Please provide a start and end line."
+ start_line = int(function_input["start_line"])
+ end_line = int(function_input["end_line"])
+ logger.info(f"start_line: {start_line}, end_line: {end_line}")
+ if error_message:
+ output = error_message
+ else:
valid_path = repo_context_manager.is_path_valid(
function_path_or_dir, directory=False
)
- highest_scoring_snippet = (
- repo_context_manager.get_highest_scoring_snippet(
- function_path_or_dir
+ file_contents = repo_context_manager.cloned_repo.get_file_contents(
+ function_path_or_dir
+ )
+ selected_file_contents = ""
+ lines = file_contents.splitlines()
+ expansion_width = 50
+ for i, line in enumerate(
+ lines[start_line - expansion_width : start_line]
+ ):
+ selected_file_contents += (
+ f"{i + start_line - expansion_width} | {line}\n"
+ )
+ selected_file_contents += "\n===START OF SNIPPET===\n"
+ for i, line in enumerate(lines[start_line:end_line]):
+ selected_file_contents += f"{i + start_line} | {line}\n"
+ selected_file_contents += "\n===END OF SNIPPET===\n"
+ for i, line in enumerate(
+ lines[end_line : end_line + expansion_width]
+ ):
+ selected_file_contents += f"{i + end_line} | {line}\n"
+ output = (
+ f"Here are the contents of `{function_path_or_dir}:{start_line}:{end_line}`\n```\n{selected_file_contents}\n```\If the above snippet contains all of the necessary contents to solve the user request BETWEEN the START and END tags, call store_file_snippet to store this snippet. Otherwise, call view_file_snippet again with a larger span."
+ if valid_path
+ else "FAILURE: This file path does not exist. Please try a new path."
+ )
+ elif tool_call.function.name == "store_file_snippet":
+ valid_path = (
+ function_path_or_dir in repo_context_manager.top_snippet_paths
+ )
+ error_message = ""
+ for key in ["start_line", "end_line"]:
+ if key not in function_input:
+ logger.warning(
+ f"Key {key} not in function input {function_input}"
)
+ error_message = "FAILURE: Please provide a start and end line."
+ start_line = int(function_input["start_line"])
+ end_line = int(function_input["end_line"])
+ logger.info(f"start_line: {start_line}, end_line: {end_line}")
+ if end_line - start_line > 1000:
+ error_message = (
+ "FAILURE: Please provide a snippet of 1000 lines or less."
)
- new_file_contents = (
- highest_scoring_snippet.xml
- if highest_scoring_snippet is not None
- else ""
+
+ try:
+ file_contents = repo_context_manager.cloned_repo.get_file_contents(
+ function_path_or_dir
+ )
+ valid_path = True
+ except:
+ error_message = (
+ "FAILURE: This file path does not exist. Please try a new path."
+ )
+ valid_path = False
+ if error_message:
+ output = error_message
+ else:
+ snippet = Snippet(
+ file_path=function_path_or_dir,
+ start=start_line,
+ end=end_line,
+ content=file_contents,
)
- repo_context_manager.add_file_paths([function_path_or_dir])
+ repo_context_manager.add_snippets([snippet])
paths_to_add.append(function_path_or_dir)
output = (
- f"SUCCESS: {function_path_or_dir} was added with contents {new_file_contents}."
+ f"SUCCESS: {function_path_or_dir} was added with contents\n```\n{snippet.xml}\n```. Here are the current selected snippets:\n{current_top_snippets_string}"
if valid_path
else "FAILURE: This file path does not exist. Please try a new path."
)
@@ -382,6 +461,26 @@ def modify_context(
)
if valid_path:
directories_to_expand.append(function_path_or_dir)
+ elif tool_call.function.name == "preview_file":
+ valid_path = repo_context_manager.is_path_valid(
+ function_path_or_dir, directory=False
+ )
+ code = repo_context_manager.cloned_repo.get_file_contents(
+ function_path_or_dir
+ )
+ file_preview = CodeTree.from_code(code).get_preview()
+ output = (
+ f"SUCCESS: Previewing file {function_path_or_dir}:\n\n{file_preview}"
+ if valid_path
+ else "FAILURE: Invalid file path. Please try a new path."
+ )
+ logger.info(output)
+ logger.info("Current top snippets:")
+ for snippet in repo_context_manager.current_top_snippets:
+ logger.info(snippet.denotation)
+ logger.info("Paths to add:")
+ for snippet in paths_to_add:
+ logger.info(snippet)
tool_outputs.append(
{
"tool_call_id": tool_call.id,
@@ -418,10 +517,8 @@ def modify_context(
)
ticket_progress.save()
logger.info(
- f"Context Management End:\npaths_to_keep: {paths_to_keep}\npaths_to_add: {paths_to_add}\ndirectories_to_expand: {directories_to_expand}"
+ f"Context Management End:\npaths_to_add: {paths_to_add}\ndirectories_to_expand: {directories_to_expand}"
)
- if paths_to_keep or paths_to_add:
- repo_context_manager.remove_all_non_kept_paths(paths_to_keep + paths_to_add)
if directories_to_expand:
repo_context_manager.expand_all_directories(directories_to_expand)
logger.info(
@@ -431,9 +528,7 @@ def modify_context(
repo_context_manager.top_snippet_paths
)
# if the paths have not changed or all tools were empty, we are done
- return not (
- paths_changed and (paths_to_keep or directories_to_expand or paths_to_add)
- )
+ return not (paths_changed and (paths_to_add or directories_to_expand))
if __name__ == "__main__":
@@ -442,24 +537,16 @@ def modify_context(
from sweepai.utils.ticket_utils import prep_snippets
installation_id = os.environ["INSTALLATION_ID"]
- cloned_repo = ClonedRepo("sweepai/sweep", installation_id, "main", repo="None")
- query = "create a new search query filtering agent that will be used in ticket_utils.py. The agent should filter unnecessary terms out of the search query to be sent into lexical search. Use a prompt to do this, using name_agent.py as a reference."
+ cloned_repo = ClonedRepo("sweepai/sweep", installation_id, "main")
+ query = (
+ "allow sweep.yaml to be read from the user/organization's .github repository"
+ )
+ # golden response is
+ # sweepai/handlers/create_pr.py:401-428
+ # sweepai/config/client.py:178-282
ticket_progress = TicketProgress(
tracking_id="test",
)
- import linecache
- import sys
-
- def trace_lines(frame, event, arg):
- if event == "line":
- filename = frame.f_code.co_filename
- if "context_pruning" in filename:
- lineno = frame.f_lineno
- line = linecache.getline(filename, lineno)
- print(f"Executing {filename}:line {lineno}:{line.rstrip()}")
- return trace_lines
-
- sys.settrace(trace_lines)
repo_context_manager = prep_snippets(cloned_repo, query, ticket_progress)
rcm = get_relevant_context(
query,
@@ -467,4 +554,5 @@ def trace_lines(frame, event, arg):
ticket_progress,
chat_logger=ChatLogger({"username": "wwzeng1"}),
)
- sys.settrace(None)
+ for snippet in rcm.current_top_snippets:
+ print(snippet.denotation)
diff --git a/sweepai/core/prompts.py b/sweepai/core/prompts.py
index 454db5c630..7a93a0b7e0 100644
--- a/sweepai/core/prompts.py
+++ b/sweepai/core/prompts.py
@@ -201,14 +201,15 @@
# Plan:
-* Instructions for creating the new file needed to solve the issue
-* Include references to all files, imports and entity names
+* Natural language instructions for creating the new file needed to solve the issue.
+* Reference necessary files, imports and entity names.
...
...
-
-* Instructions for the modifications needed to solve the issue. Be concise and mention references to all files, imports and entity names.
+
+* Natural language instructions for the modifications needed to solve the issue.
+* Be concise and reference necessary files, imports and entity names.
...
...
diff --git a/sweepai/utils/code_tree.py b/sweepai/utils/code_tree.py
index 34073a70a4..3c3bf6bb5a 100644
--- a/sweepai/utils/code_tree.py
+++ b/sweepai/utils/code_tree.py
@@ -1,4 +1,5 @@
import ast
+import re
import tree_sitter_languages
from pydantic import BaseModel
@@ -7,17 +8,18 @@
class CodeTree(BaseModel):
code: str
+ language: str
tree: Tree
class Config:
arbitrary_types_allowed = True
@classmethod
- def from_code(cls, code: str):
+ def from_code(cls, code: str, language: str = "python"):
parser = Parser()
- parser.set_language(tree_sitter_languages.get_language("python"))
+ parser.set_language(tree_sitter_languages.get_language(language))
tree = parser.parse(bytes(code, "utf8"))
- return cls(code=code, tree=tree)
+ return cls(code=code, language=language, tree=tree)
def get_path_to_line(self, min_line: int, max_line: int = -1) -> list[Node]:
if max_line == -1:
@@ -66,6 +68,68 @@ def get_lines_surrounding(
else:
return (min_line, max_line)
+ def get_preview(self, min_line: int = 5, max_line: int = 1200):
+ last_end_line = -1
+ lines = self.code.splitlines()
+ def get_children(node: Node = self.tree.root_node):
+ nonlocal last_end_line
+ children = []
+ for child in node.children:
+ start_line, _ = child.start_point
+ end_line, _ = child.end_point
+ if start_line <= last_end_line:
+ continue
+ text = "\n".join(lines[start_line : end_line + 1])
+ indentation = " " * (len(text) - len(text.lstrip()))
+ for i in range(last_end_line + 1, start_line):
+ line = lines[i]
+ children.append(f"{i} | {line}")
+ last_end_line = i
+ if end_line - start_line > max_line:
+ children.extend(get_children(child))
+ elif end_line - start_line < min_line:
+ text = "\n".join(
+ [
+ f"{start_line + i} | {line}"
+ for i, line in enumerate(text.split("\n"))
+ ]
+ )
+ children.append(text)
+ else:
+ node_lines = text.split("\n")
+ first_line = node_lines[0]
+ first_line = f"{start_line} | {first_line}"
+ second_line = node_lines[1]
+ second_line = f"{start_line + 1} | {second_line}"
+ hidden_lines_content = "\n".join(lines[start_line + 2 : end_line - 1])
+ number_of_terms = 5
+ first_n_terms = ", ".join(extract_words(hidden_lines_content)[:number_of_terms])
+ spacing = " " * (len(str(start_line)) + 2)
+ middle_lines = spacing.join(
+ [
+ spacing + indentation + f" ...\n",
+ indentation + f" (lines {start_line + 1}-{end_line - 1} contains terms: {first_n_terms}\n",
+ indentation + f" ...\n",
+ ]
+ )
+ second_last_line = node_lines[-2]
+ second_last_line = f"{end_line - 1} | {second_last_line}"
+ last_line = node_lines[-1]
+ last_line = f"{end_line} | {last_line}"
+ children.append(first_line)
+ children.append(second_line)
+ children.append(middle_lines)
+ children.append(second_last_line)
+ children.append(last_line)
+ last_end_line = end_line
+ return children
+ return "\n".join(get_children())
+
+
+def extract_words(string):
+ # extract the most common words from a code snippet
+ words = re.findall(r"\w+", string)
+ return list(dict.fromkeys(words))
def get_global_function_names_and_spans(node):
return [
@@ -87,11 +151,33 @@ def test_check_comments_presence_with_unsupported_file_extension(self, mock_spli
from unittest.mock import patch
from sweepai.utils.comment_utils import check_comments_presence
+def helper():
+ x = 1
+ y = 2
+ z = 3
+ return x + y + z
+
class TestCheckCommentsPresence(unittest.TestCase):
@patch('os.path.splitext')
def test_check_comments_presence_with_comment(self, mock_splitext):
mock_splitext.return_value = ('file', '.py')
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
+ x = 1
self.assertEqual(check_comments_presence('file.py', '# This is a comment'), True)
@patch('os.path.splitext')
@@ -112,9 +198,13 @@ def test_check_comments_presence_with_empty_new_code(self, mock_splitext):
if __name__ == '__main__':
unittest.main()
"""
- split_code = full_code.split("\n")
- match_start = 16
- match_end = 20
- code_tree = CodeTree.from_code(full_code)
- print(code_tree.get_lines_surrounding(match_start)[0])
- print(code_tree.get_lines_surrounding(match_end)[1])
+ # split_code = full_code.split("\n")
+ file_contents = open("sweepai/handlers/on_ticket.py").read()
+ # file_contents = full_code
+ # match_start = 16
+ # match_end = 20
+ code_tree = CodeTree.from_code(file_contents)
+ print(code_tree.get_preview())
+ print(len(code_tree.get_preview().split("\n")))
+ # print(code_tree.get_lines_surrounding(match_start)[0])
+ # print(code_tree.get_lines_surrounding(match_end)[1])
diff --git a/sweepai/utils/str_utils.py b/sweepai/utils/str_utils.py
index c6d4a9784f..b40d466bfe 100644
--- a/sweepai/utils/str_utils.py
+++ b/sweepai/utils/str_utils.py
@@ -88,3 +88,8 @@ def clean_logs(logs: str):
def extract_lines(text: str, start: int, end: int):
lines = text.splitlines(keepends=True)
return "\n".join(lines[max(0, start) : min(len(lines), end)])
+
+
+def add_line_numbers(text: str, start: int = 0):
+ lines = text.splitlines(keepends=True)
+ return "".join(f"{start + i} | {line}" for i, line in enumerate(lines))
diff --git a/sweepai/utils/ticket_utils.py b/sweepai/utils/ticket_utils.py
index 4cff8cc2ed..ddf82c3181 100644
--- a/sweepai/utils/ticket_utils.py
+++ b/sweepai/utils/ticket_utils.py
@@ -1,5 +1,5 @@
-from threading import Thread
import traceback
+from threading import Thread
from time import time
from loguru import logger
@@ -8,7 +8,10 @@
from sweepai.core.context_pruning import RepoContextManager, get_relevant_context
from sweepai.core.entities import Snippet
from sweepai.core.lexical_search import search_index
-from sweepai.core.vector_db import compute_vector_search_scores, prepare_lexical_search_index
+from sweepai.core.vector_db import (
+ compute_vector_search_scores,
+ prepare_lexical_search_index,
+)
from sweepai.logn.cache import file_cache
from sweepai.utils.chat_logger import discord_log_error
from sweepai.utils.event_logger import posthog
@@ -47,9 +50,13 @@ def prep_snippets(
codebase_score = files_to_scores.get(snippet.file_path, 0.08)
snippet_score = 0.1
if snippet_to_key(snippet) in content_to_lexical_score:
- snippet_score = content_to_lexical_score[snippet_to_key(snippet)] * codebase_score
+ snippet_score = (
+ content_to_lexical_score[snippet_to_key(snippet)] * codebase_score
+ )
else:
- content_to_lexical_score[snippet_to_key(snippet)] = snippet_score * codebase_score
+ content_to_lexical_score[snippet_to_key(snippet)] = (
+ snippet_score * codebase_score
+ )
ranked_snippets = sorted(
snippets,
@@ -79,6 +86,7 @@ def prep_snippets(
current_top_snippets=ranked_snippets,
snippets=snippets,
snippet_scores=content_to_lexical_score,
+ cloned_repo=cloned_repo,
)
return repo_context_manager
@@ -239,19 +247,22 @@ def log_error(
def center(text: str) -> str:
return f"{text}
"
+
def fire_and_forget_wrapper(call):
"""
This decorator is used to run a function in a separate thread.
It does not return anything and does not wait for the function to finish.
It fails silently.
"""
+
def wrapper(*args, **kwargs):
def run_in_thread(call, *a, **kw):
try:
call(*a, **kw)
except:
pass
+
thread = Thread(target=run_in_thread, args=(call,) + args, kwargs=kwargs)
thread.start()
- return wrapper
\ No newline at end of file
+ return wrapper
diff --git a/sweepai/utils/tree_utils.py b/sweepai/utils/tree_utils.py
index ebe49ff1f7..68551855a3 100644
--- a/sweepai/utils/tree_utils.py
+++ b/sweepai/utils/tree_utils.py
@@ -35,11 +35,11 @@ def __repr__(self):
class DirectoryTree:
def __init__(self):
- self.original_lines = []
- self.lines = []
+ self.original_lines: list[Line] = []
+ self.lines: list[Line] = []
- def parse(self, input_str):
- stack = [] # To keep track of parent directories
+ def parse(self, input_str: str):
+ stack: list[Line] = [] # To keep track of parent directories
for line in input_str.strip().split("\n"):
indent_count = (len(line) - len(line.lstrip())) // 2
line = line.strip()
@@ -152,6 +152,8 @@ def remove_multiple(self, targets):
self.remove(target)
def __str__(self):
- return "\n".join(
- (" " * line.indent_count) + line.full_path() for line in self.lines
- )
+ results = []
+ for line in self.lines:
+ line_text = line.text.split("/")[-2] + "/" if line.is_dir else line.text
+ results.append((" " * line.indent_count) + line_text)
+ return "\n".join(results)