Skip to content

Commit

Permalink
Context pruning uses line numbers (sweepai#2757)
Browse files Browse the repository at this point in the history
# Purpose

Please provide a high-level overview of what this pull request aims to
achieve.

# Changes Made

Please provide a detailed list of the changes made in this pull request.

1.
2.
3.

# Additional Notes

Please provide any additional notes or screenshots here.
When you make a PR, please ping us on Discord at
http://discord.gg/sweep.

---------

Co-authored-by: wwzeng1 <william@sweep.dev>
  • Loading branch information
kevinlu1248 and wwzeng1 authored Dec 13, 2023
1 parent 5d2923a commit bb084a7
Show file tree
Hide file tree
Showing 7 changed files with 362 additions and 165 deletions.
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
"console":"integratedTerminal",
"justMyCode":true,
"args": [
"https://github.com/sweepai/ui/issues/2"
// "https://github.com/sweepai/sweep/issues/2669"
// "https://github.com/sweepai/sweep/issues/2738",
"https://github.com/sweepai/sweep/issues/2758"
]
},
{
Expand Down
364 changes: 226 additions & 138 deletions sweepai/core/context_pruning.py

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions sweepai/core/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,15 @@
# Plan:
<plan>
<create file="file_path_1" relevant_files="space-separated list of ALL files relevant for creating file_path_1">
* Instructions for creating the new file needed to solve the issue
* Include references to all files, imports and entity names
* Natural language instructions for creating the new file needed to solve the issue.
* Reference necessary files, imports and entity names.
...
</create>
...
<modify file="file_path_2" relevant_files="space-separated list of ALL files relevant for modifying file_path_2">
* Instructions for the modifications needed to solve the issue. Be concise and mention references to all files, imports and entity names.
<modify file="file_path_2" start_line="i" end_line="j" relevant_files="space-separated list of ALL files relevant for modifying file_path_2">
* Natural language instructions for the modifications needed to solve the issue.
* Be concise and reference necessary files, imports and entity names.
...
</modify>
...
Expand Down
108 changes: 99 additions & 9 deletions sweepai/utils/code_tree.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import ast
import re

import tree_sitter_languages
from pydantic import BaseModel
Expand All @@ -7,17 +8,18 @@

class CodeTree(BaseModel):
code: str
language: str
tree: Tree

class Config:
arbitrary_types_allowed = True

@classmethod
def from_code(cls, code: str):
def from_code(cls, code: str, language: str = "python"):
parser = Parser()
parser.set_language(tree_sitter_languages.get_language("python"))
parser.set_language(tree_sitter_languages.get_language(language))
tree = parser.parse(bytes(code, "utf8"))
return cls(code=code, tree=tree)
return cls(code=code, language=language, tree=tree)

def get_path_to_line(self, min_line: int, max_line: int = -1) -> list[Node]:
if max_line == -1:
Expand Down Expand Up @@ -66,6 +68,68 @@ def get_lines_surrounding(
else:
return (min_line, max_line)

def get_preview(self, min_line: int = 5, max_line: int = 1200):
last_end_line = -1
lines = self.code.splitlines()
def get_children(node: Node = self.tree.root_node):
nonlocal last_end_line
children = []
for child in node.children:
start_line, _ = child.start_point
end_line, _ = child.end_point
if start_line <= last_end_line:
continue
text = "\n".join(lines[start_line : end_line + 1])
indentation = " " * (len(text) - len(text.lstrip()))
for i in range(last_end_line + 1, start_line):
line = lines[i]
children.append(f"{i} | {line}")
last_end_line = i
if end_line - start_line > max_line:
children.extend(get_children(child))
elif end_line - start_line < min_line:
text = "\n".join(
[
f"{start_line + i} | {line}"
for i, line in enumerate(text.split("\n"))
]
)
children.append(text)
else:
node_lines = text.split("\n")
first_line = node_lines[0]
first_line = f"{start_line} | {first_line}"
second_line = node_lines[1]
second_line = f"{start_line + 1} | {second_line}"
hidden_lines_content = "\n".join(lines[start_line + 2 : end_line - 1])
number_of_terms = 5
first_n_terms = ", ".join(extract_words(hidden_lines_content)[:number_of_terms])
spacing = " " * (len(str(start_line)) + 2)
middle_lines = spacing.join(
[
spacing + indentation + f" ...\n",
indentation + f" (lines {start_line + 1}-{end_line - 1} contains terms: {first_n_terms}\n",
indentation + f" ...\n",
]
)
second_last_line = node_lines[-2]
second_last_line = f"{end_line - 1} | {second_last_line}"
last_line = node_lines[-1]
last_line = f"{end_line} | {last_line}"
children.append(first_line)
children.append(second_line)
children.append(middle_lines)
children.append(second_last_line)
children.append(last_line)
last_end_line = end_line
return children
return "\n".join(get_children())


def extract_words(string):
# extract the most common words from a code snippet
words = re.findall(r"\w+", string)
return list(dict.fromkeys(words))

def get_global_function_names_and_spans(node):
return [
Expand All @@ -87,11 +151,33 @@ def test_check_comments_presence_with_unsupported_file_extension(self, mock_spli
from unittest.mock import patch
from sweepai.utils.comment_utils import check_comments_presence
def helper():
x = 1
y = 2
z = 3
return x + y + z
class TestCheckCommentsPresence(unittest.TestCase):
@patch('os.path.splitext')
def test_check_comments_presence_with_comment(self, mock_splitext):
mock_splitext.return_value = ('file', '.py')
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
x = 1
self.assertEqual(check_comments_presence('file.py', '# This is a comment'), True)
@patch('os.path.splitext')
Expand All @@ -112,9 +198,13 @@ def test_check_comments_presence_with_empty_new_code(self, mock_splitext):
if __name__ == '__main__':
unittest.main()
"""
split_code = full_code.split("\n")
match_start = 16
match_end = 20
code_tree = CodeTree.from_code(full_code)
print(code_tree.get_lines_surrounding(match_start)[0])
print(code_tree.get_lines_surrounding(match_end)[1])
# split_code = full_code.split("\n")
file_contents = open("sweepai/handlers/on_ticket.py").read()
# file_contents = full_code
# match_start = 16
# match_end = 20
code_tree = CodeTree.from_code(file_contents)
print(code_tree.get_preview())
print(len(code_tree.get_preview().split("\n")))
# print(code_tree.get_lines_surrounding(match_start)[0])
# print(code_tree.get_lines_surrounding(match_end)[1])
5 changes: 5 additions & 0 deletions sweepai/utils/str_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,8 @@ def clean_logs(logs: str):
def extract_lines(text: str, start: int, end: int):
lines = text.splitlines(keepends=True)
return "\n".join(lines[max(0, start) : min(len(lines), end)])


def add_line_numbers(text: str, start: int = 0):
lines = text.splitlines(keepends=True)
return "".join(f"{start + i} | {line}" for i, line in enumerate(lines))
21 changes: 16 additions & 5 deletions sweepai/utils/ticket_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from threading import Thread
import traceback
from threading import Thread
from time import time

from loguru import logger
Expand All @@ -8,7 +8,10 @@
from sweepai.core.context_pruning import RepoContextManager, get_relevant_context
from sweepai.core.entities import Snippet
from sweepai.core.lexical_search import search_index
from sweepai.core.vector_db import compute_vector_search_scores, prepare_lexical_search_index
from sweepai.core.vector_db import (
compute_vector_search_scores,
prepare_lexical_search_index,
)
from sweepai.logn.cache import file_cache
from sweepai.utils.chat_logger import discord_log_error
from sweepai.utils.event_logger import posthog
Expand Down Expand Up @@ -47,9 +50,13 @@ def prep_snippets(
codebase_score = files_to_scores.get(snippet.file_path, 0.08)
snippet_score = 0.1
if snippet_to_key(snippet) in content_to_lexical_score:
snippet_score = content_to_lexical_score[snippet_to_key(snippet)] * codebase_score
snippet_score = (
content_to_lexical_score[snippet_to_key(snippet)] * codebase_score
)
else:
content_to_lexical_score[snippet_to_key(snippet)] = snippet_score * codebase_score
content_to_lexical_score[snippet_to_key(snippet)] = (
snippet_score * codebase_score
)

ranked_snippets = sorted(
snippets,
Expand Down Expand Up @@ -79,6 +86,7 @@ def prep_snippets(
current_top_snippets=ranked_snippets,
snippets=snippets,
snippet_scores=content_to_lexical_score,
cloned_repo=cloned_repo,
)
return repo_context_manager

Expand Down Expand Up @@ -239,19 +247,22 @@ def log_error(
def center(text: str) -> str:
return f"<div align='center'>{text}</div>"


def fire_and_forget_wrapper(call):
"""
This decorator is used to run a function in a separate thread.
It does not return anything and does not wait for the function to finish.
It fails silently.
"""

def wrapper(*args, **kwargs):
def run_in_thread(call, *a, **kw):
try:
call(*a, **kw)
except:
pass

thread = Thread(target=run_in_thread, args=(call,) + args, kwargs=kwargs)
thread.start()

return wrapper
return wrapper
16 changes: 9 additions & 7 deletions sweepai/utils/tree_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ def __repr__(self):

class DirectoryTree:
def __init__(self):
self.original_lines = []
self.lines = []
self.original_lines: list[Line] = []
self.lines: list[Line] = []

def parse(self, input_str):
stack = [] # To keep track of parent directories
def parse(self, input_str: str):
stack: list[Line] = [] # To keep track of parent directories
for line in input_str.strip().split("\n"):
indent_count = (len(line) - len(line.lstrip())) // 2
line = line.strip()
Expand Down Expand Up @@ -152,6 +152,8 @@ def remove_multiple(self, targets):
self.remove(target)

def __str__(self):
return "\n".join(
(" " * line.indent_count) + line.full_path() for line in self.lines
)
results = []
for line in self.lines:
line_text = line.text.split("/")[-2] + "/" if line.is_dir else line.text
results.append((" " * line.indent_count) + line_text)
return "\n".join(results)

0 comments on commit bb084a7

Please sign in to comment.