Skip to content

Commit

Permalink
Added hash testing in small-test
Browse files Browse the repository at this point in the history
  • Loading branch information
benedikt-schesch committed Jan 21, 2025
1 parent 6e878d4 commit 1ee924f
Show file tree
Hide file tree
Showing 296 changed files with 95,665 additions and 42 deletions.
2 changes: 0 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ repos:
- id: check-builtin-literals
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-json
- id: check-shebang-scripts-are-executable
- id: pretty-format-json
- id: check-merge-conflict
- id: check-symlinks
- id: check-toml
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ copy-paper:
small-test:
${MAKE} clean-test-cache clean
AST_REPOS_PATH=repos-small-test DELETE_WORKDIRS=False WORKDIR_DIRECTORY=.workdir-small-test ./run_small.sh --include_trivial_merges --no_timing
python3 test/check_hashes.py
${MAKE} compress-small-cache
${MAKE} small-test-diff
rm -rf results/small
Expand All @@ -116,6 +117,7 @@ small-test:
small-test-without-cleaning:
${MAKE} clean-test-cache
AST_REPOS_PATH=repos-small-test DELETE_WORKDIRS=False WORKDIR_DIRECTORY=.workdir-small-test ./run_small.sh --include_trivial_merges --no_timing
python3 test/check_hashes.py
${MAKE} small-test-diff

update-figures:
Expand Down
40 changes: 0 additions & 40 deletions src/python/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from pathlib import Path
from typing import Union, Tuple, List, Dict
import errno
import json
import signal
import functools
from enum import Enum
Expand Down Expand Up @@ -660,45 +659,6 @@ def compute_tree_fingerprint(self) -> str:
)
return result

def compute_tree_filehash_map(repo_path: Path) -> str:
"""
Computes a SHA256 for every file in the repository (excluding .git) and
returns a JSON string mapping each file path to its hash.
Args:
repo_path (Path): Path object pointing to the local repository folder.
Returns:
str: JSON string that maps from file path to its SHA256 hash.
"""
assert repo_path.exists(), f"Repository path {repo_path} does not exist"

# Construct the shell command very similarly to the original approach
# but we won't pipe it to sha256sum's input, we'll parse the output ourselves.
command = (
"export LC_ALL=C; export LC_COLLATE=C; cd "
+ str(repo_path)
+ " ; find . -type f -not -path '*/\\.git*' -exec sha256sum {} \\; | sort"
)

# Run the command and capture the output
output = subprocess.check_output(command, shell=True, executable="/bin/bash")
lines = output.decode("utf-8").strip().split("\n")

# Build a dictionary of file -> sha256
filehash_map = {}
for line in lines:
if not line.strip():
continue
# sha256sum typically outputs "<sha> <filename>"
sha, path = line.split(" ", 1)
# Remove leading './' so paths are more consistent
cleaned_path = path.lstrip("./")
filehash_map[cleaned_path] = sha

# Return as a pretty-printed JSON string
return json.dumps(filehash_map, indent=2)

def get_sha_cache_entry(
self, commit: str, start_merge: bool = False
) -> Union[None, dict]:
Expand Down
155 changes: 155 additions & 0 deletions test/check_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# -*- coding: utf-8 -*-
"""Script to compute and verify SHA256 hashes for all files in a directory."""

import json
import subprocess
from pathlib import Path
import shutil
from rich.progress import Progress, TaskID
import argparse


def compute_tree_filehash_map(repo_path: Path) -> str:
"""
Computes a SHA256 for every file in the repository (excluding .git) and
returns a JSON string mapping each file path to its hash.
Args:
repo_path (Path): Path object pointing to the local repository folder.
Returns:
str: JSON string that maps from file path to its SHA256 hash.
"""
assert repo_path.exists(), f"Repository path {repo_path} does not exist"

command = (
"export LC_ALL=C; export LC_COLLATE=C; cd "
+ str(repo_path)
+ " ; find . -type f -not -path '*/\\.git*' -exec sha256sum {} \\; | sort"
)

output = subprocess.check_output(command, shell=True, executable="/bin/bash")
lines = output.decode("utf-8").strip().split("\n")

filehash_map = {}
for line in lines:
if not line.strip():
continue
sha, path = line.split(" ", 1)
cleaned_path = path.lstrip("./")
filehash_map[cleaned_path] = sha

return json.dumps(filehash_map, indent=2)


def process_directory(
progress: Progress, task_id: TaskID, dir3: Path, hash_dir: Path, mode: str
):
"""
Processes a single directory by computing or verifying hashes.
Args:
progress (Progress): Rich progress bar instance.
task_id (TaskID): Task ID for progress tracking.
dir3 (Path): Directory being processed.
hash_dir (Path): Path to the hash file.
mode (str): Either "create" or "verify".
"""
if mode == "create":
hash_dir.parent.mkdir(parents=True, exist_ok=True)
hash_map = compute_tree_filehash_map(dir3)
with open(hash_dir, "w", encoding="utf-8") as hash_file:
hash_file.write(hash_map)
elif mode == "verify":
if not hash_dir.exists():
print(f"Hash file for {dir3} not found")
exit(1)
with open(hash_dir, "r", encoding="utf-8") as hash_file:
stored_hash_map = json.load(hash_file)
current_hash_map = json.loads(compute_tree_filehash_map(dir3))

for file, stored_hash in stored_hash_map.items():
if file not in current_hash_map:
print(f"File {file} missing in current hashes for {dir3}")
exit(1)
if current_hash_map[file] != stored_hash:
print(f"Hash mismatch for file {file} in {dir3}")
print(f"File path: {dir3 / file}")
print(f"Stored hash: {stored_hash}")
print(f"Current hash: {current_hash_map[file]}")
print("File content:")
with open(dir3 / file, "r", encoding="utf-8") as file_content:
print(file_content.read())
exit(1)
progress.update(task_id, advance=1)


def main():
"""Main function to process and verify directory hashes."""
parser = argparse.ArgumentParser(description="Process and verify directory hashes.")
parser.add_argument(
"--override",
action="store_true",
help="Override existing hashes with newly computed ones.",
)
parser.add_argument(
"--goal_path",
type=Path,
help="Path to the directory where hash files will be stored.",
default=Path("test/small-goal-files/hashes"),
)
parser.add_argument(
"--workdir",
type=Path,
help="Path to the directory containing the directories to be processed.",
default=Path(".workdir-small-test"),
)
args = parser.parse_args()

base_path = args.goal_path
workdir_path = args.workdir

# Collect all level-3 directories
dir3_list = []
for dir1 in workdir_path.iterdir():
if dir1.is_dir():
for dir2 in dir1.iterdir():
if dir2.is_dir():
for dir3 in dir2.iterdir():
if dir3.is_dir():
dir3_list.append((dir1.name, dir2.name, dir3.name, dir3))

with Progress() as progress:
task_id = progress.add_task("Processing directories...", total=len(dir3_list))

if not base_path.exists() or args.override:
# Case 1: Create directories and generate hash mappings
shutil.rmtree(base_path, ignore_errors=True)
if not base_path.exists():
base_path.mkdir(parents=True, exist_ok=True)
for dir1_name, dir2_name, dir3_name, dir3 in dir3_list:
hash_dir = base_path / dir1_name / dir2_name / dir3_name
process_directory(
progress,
task_id,
dir3,
hash_dir.with_suffix(".json"),
mode="create",
)
else:
# Case 2: Verify hashes
for dir1_name, dir2_name, dir3_name, dir3 in dir3_list:
hash_dir = base_path / dir1_name / dir2_name / dir3_name
process_directory(
progress,
task_id,
dir3,
hash_dir.with_suffix(".json"),
mode="verify",
)

print("All hashes processed successfully")


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/report.css": "0c464ff9ce9154fdf1a041b0efed7bc31da4a4c9744249d8ffa6b80f1c36ed9e",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/sort.gif": "1875d39ff1be841991aa768486f1bcfc0ca02969323e2d2ec52ec156bb5cf240",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/session.gif": "1ec062e347bc7db837b9471b6c6128e1d0a5ae8d4c6a2eaa8fee4d5cb6441dab",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/prettify.css": "2247d0cf27cb7fd9cd850eb33444c059f261eb63a718c26b8f357a938401c5f9",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/bundle.gif": "22656fa2e683f8e5f7adae31f091ce045fc91f042a8fed82ee7a05fc4029e0c2",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/com.example/index.html": "263f49e073b89aa3756425d41a77ffa37fe4ee2e4887b8cad7679e76997cff79",
"git-hires-merge-example/git-hires-merge-example/target/jacoco.exec": "351591e8d31c2125f7ad1d1150122d86891990f9a2af77fea55f787da4f09d48",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco.xml": "36cec1943bd4d387b22d6e74ece27efb3a58e2d1d632b5cd421c9b0ac8a81403",
"git-hires-merge-example/git-hires-merge-example/src/main/java/com/example/App.java": "37ac2e815b06a3079658809c1ae83a486cc21168a2a4054645b61d0aec0d55c9",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/source.gif": "50257f70adf3ebe4d18bb2432a61370dac06dad7a45f235c8e5f376081251ac3",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/method.gif": "6308ea5fba7f5e989f67e3e9bb3366f0cb4c90ab72e838af7ff813aca5aac487",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/com.example/App.java.html": "630d61820bfbdd067e81cf0c13e6a2bbd080518aac63456bc422a0c462253567",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/down.gif": "69233accdde9fbbeef01a6bb5c117a4af85f24f7312054bccbeb0f64b7bb437c",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/prettify.js": "743c6c4cab9499cd0bfe18a5a62281eccce843f47ec75eedb32eeb29c755aa68",
"git-hires-merge-example/git-hires-merge-example/pom.xml": "74505359b872eaa8e652e52d467296f28a4d5c73801f70acead1abf119e32e9b",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/class.gif": "7ad9ac2c3f87d220f46d077e1b03d743ba45084cbbbccc2577e18c167f0fb2b1",
"git-hires-merge-example/git-hires-merge-example/target/test-classes/com/example/AppTest.class": "7cea94156687fae90bd08224fba648cd52e9cb3f8d08fbb46f2558ddba6eefde",
"git-hires-merge-example/git-hires-merge-example/bin/test/com/example/AppTest.class": "7d310cd99468be63266706bbec066a688a8f9055a1da6a05dfce82e487c36ff5",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/sort.js": "7ff293dabc89d68e33d5611f2de0dbbbcfed7e0177726fab5f9dcc0b91f593af",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/package.gif": "82577b6b416bbf65cc99bc1c249e6462ae4ac708687fe2e1428b610c049300c2",
"git-hires-merge-example/git-hires-merge-example/bin/main/com/example/App.class": "86ce84ba15a826ee401e4b5ccf84844f754b3d39865f5c80b0e97720afd365fc",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-sessions.html": "875405f756e82896cbb56de8fc905d00aa11bb3d738d4d7edd3ced3c7a2bd98f",
"git-hires-merge-example/git-hires-merge-example/target/surefire-reports/com.example.AppTest.txt": "89784b5fb8a4de26d2a972a687c27a65f17200a95024f2421933553c102f9841",
"git-hires-merge-example/git-hires-merge-example/README.md": "8cc0de964fdcafc9f2a5bccaa3bab79fc9165aef48b178766fc2f97f10ca06a8",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/redbar.gif": "906ce7fcec0fbac1a26c16521e375f020a513f0cf87888766b69c4bf63f4d9bd",
"git-hires-merge-example/git-hires-merge-example/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst": "9e2a78b71e5d50d70c7821ac3c3391415189f886a3faaffad2fa9572bfdb8425",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/index.html": "9e9c7171cb6470410694a00202fc16083d29d4fa59d1ea77d78165af5190389b",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/greenbar.gif": "a29475aae8c0a107bc41459eb2ca079bc34fc5092f107a78a287c4864b325791",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/branchpc.gif": "a419dc4732e7c225b409bfd5457263de4f20b6aa9737f4636f6dfad36b194b1d",
"git-hires-merge-example/git-hires-merge-example/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst": "a8b354d8fc90bf35ca0326d0ee6d339033a9e9dcef4667d0d5b833f978a0251d",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/branchfc.gif": "a90e2acee1486fcc06a9dc77dc3a2c7d1103f9f65a9c833834c22079ca446d76",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco.csv": "b8d53bcbb2b49a1dab456e750dad5d55b439d90d9738ad018f28674671975aa1",
"git-hires-merge-example/git-hires-merge-example/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst": "bea6b355d623d7b8ff559a587d7a87a9dc8e1b85074471658ba2282395cb8706",
"git-hires-merge-example/git-hires-merge-example/target/surefire-reports/TEST-com.example.AppTest.xml": "bf7faecbb3936cfc4f449bae8fd910515495d957abbabbecf647a1dbf39b7a10",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/report.gif": "c765cafa65ec9a3c221b46062c6ef691f2dd5ff25628ee2c65cc9fe0b74aeea6",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/branchnc.gif": "cfd1d5ad1e939c895fc6c3a4b621f5b4bec61a4b71f8386d3520f0920930ba1f",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/com.example/App.html": "d396d032587a8d4d08cfbeeac4d9cb865bc238848143cb7f0b9ae9da7cadd0d3",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/up.gif": "e1ce2c1d5128b701f2dc5bb5f63c5c402d5d8125ad9c3ffc31c6abc7767899a0",
"git-hires-merge-example/git-hires-merge-example/src/test/java/com/example/AppTest.java": "e48c855289521a9a9842a45607ff2f9f548dcd15ed63a31446de1d3bd317fb6d",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/jacoco-resources/group.gif": "eb39f2e57cabfd740ab8d0c5fb4b26fc5ca71b334e2cfc8d6481c34842b4c360",
"git-hires-merge-example/git-hires-merge-example/target/classes/com/example/App.class": "f83bda6a392f0d331f121007f926c00777da55c22b958c650a422238306c54df",
"git-hires-merge-example/git-hires-merge-example/target/site/jacoco/com.example/index.source.html": "fe1dd797ef17a53460aca8251b5c9d4256df55ebd3a97dcb9d3f45959c0022a0",
"git-hires-merge-example/git-hires-merge-example/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst": "fe99b0d168fb4136b6e66a3446962d0ee917ce2614e77f005c9c8c090095b0f8"
}
Loading

0 comments on commit 1ee924f

Please sign in to comment.