Skip to content

Commit

Permalink
simplify repo cache
Browse files Browse the repository at this point in the history
  • Loading branch information
RexWzh committed Jul 29, 2024
1 parent 3ff8ccd commit f2f666f
Showing 1 changed file with 19 additions and 31 deletions.
50 changes: 19 additions & 31 deletions src/lean_dojo/data_extraction/lean.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,25 +506,26 @@ def __post_init__(self) -> None:
# set repo and commit
if repo_type == "github":
repo = url_to_repo(self.url, repo_type=repo_type)
# Convert tags or branches to commit hashes
if not is_commit_hash(self.commit):
if (self.url, self.commit) in info_cache.tag2commit:
commit = info_cache.tag2commit[(self.url, self.commit)]
else:
commit = _to_commit_hash(repo, self.commit)
assert is_commit_hash(commit), f"Invalid commit hash: {commit}"
info_cache.tag2commit[(self.url, commit)] = commit
object.__setattr__(self, "commit", commit)
else:
# get repo from cache
cache_repo_path = repo_cache.get(
REPO_CACHE_PREFIX / self.format_dirname / self.name
)
# clone and store the repo if not in cache
if cache_repo_path is None:
cache_repo_path = self.add_to_cache()
repo = Repo(cache_repo_path)
object.__setattr__(self, "commit", _to_commit_hash(repo, self.commit))
rel_cache_dir = REPO_CACHE_PREFIX / self.format_dirname / self.name
cache_repo_dir = repo_cache.get(rel_cache_dir)
if cache_repo_dir is None:
with working_directory() as tmp_dir:
repo = url_to_repo(self.url, repo_type=repo_type, tmp_dir=tmp_dir)
commit = _to_commit_hash(repo, self.commit)
rel_cache_dir = REPO_CACHE_PREFIX / self.format_dirname / self.name
cache_repo_dir = repo_cache.store(repo.working_dir, rel_cache_dir)
repo = Repo(cache_repo_dir)
# Convert tags or branches to commit hashes
if not is_commit_hash(self.commit):
if (self.url, self.commit) in info_cache.tag2commit:
commit = info_cache.tag2commit[(self.url, self.commit)]
else:
commit = _to_commit_hash(repo, self.commit)
assert is_commit_hash(commit), f"Invalid commit hash: {commit}"
info_cache.tag2commit[(self.url, commit)] = commit
object.__setattr__(self, "commit", commit)
object.__setattr__(self, "repo", repo)

# Determine the required Lean version.
Expand Down Expand Up @@ -561,27 +562,14 @@ def commit_url(self) -> str:
return f"{self.url}/tree/{self.commit}"

@property
def format_dirname(self) -> str:
def format_dirname(self) -> Path:
"""Return the formatted cache directory name"""
if self.repo_type == "github":
user_name, repo_name = _split_git_url(self.url)
else: # f"gitpython-{repo_name}-{commit}"
user_name, repo_name = "gitpython", self.name
return Path(f"{user_name}-{repo_name}-{self.commit}")

def add_to_cache(self) -> Path:
"""Store the repo in the cache directory."""
assert self.repo_type in [
"local",
"remote",
], f"Unsupported cache repo type: {self.repo_type}"
with working_directory() as tmp_dir:
repo = url_to_repo(self.url, repo_type=self.repo_type, tmp_dir=tmp_dir)
commit = _to_commit_hash(repo, self.commit)
repo.git.checkout(commit)
rel_cache_dir = REPO_CACHE_PREFIX / self.format_dirname / self.name
return repo_cache.store(repo.working_dir, rel_cache_dir)

def show(self) -> None:
"""Show the repo in the default browser."""
webbrowser.open(self.commit_url)
Expand Down

0 comments on commit f2f666f

Please sign in to comment.