Skip to content

Commit

Permalink
Speed corpus_pruning task (#4659)
Browse files Browse the repository at this point in the history
1. Dont do unnecessary operations.
2. Impose stricter limits.
  • Loading branch information
jonathanmetzman committed Feb 4, 2025
1 parent 80c81e7 commit 984f90e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ async def delete_gcs_blobs_batch(session, bucket, blobs_to_delete, token):
num_deleted = 0
blobs_to_delete = []
delete_tasks = []
num_batches
num_batches = 0
for blob in storage.get_blobs_no_retry(corpus_url, recursive=True):
idx += 1
if not deleting:
Expand Down
14 changes: 4 additions & 10 deletions src/clusterfuzz/_internal/fuzzing/corpus_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,13 +666,7 @@ def get_proto_corpus(bucket_name,
corpus_urls = storage.sign_urls_for_existing_files(urls, include_delete_urls)
upload_urls = storage.get_arbitrary_signed_upload_urls(
gcs_url, num_uploads=max_upload_urls)
corpus = uworker_msg_pb2.Corpus( # pylint: disable=no-member
gcs_url=gcs_url,)
last_updated = _last_updated(_get_gcs_url(bucket_name, bucket_path))
if last_updated:
timestamp = timestamp_pb2.Timestamp() # pylint: disable=no-member
timestamp.FromDatetime(last_updated)
corpus.last_updated_time.CopyFrom(timestamp)
corpus = uworker_msg_pb2.Corpus(gcs_url=gcs_url) # pylint: disable=no-member
# Iterate over imap_unordered results.
for upload_url in upload_urls:
corpus.upload_urls.append(upload_url)
Expand Down Expand Up @@ -756,8 +750,8 @@ def get_corpuses_for_pruning(engine, project_qualified_name):
project_qualified_name,
include_regressions=True,
include_delete_urls=True,
max_upload_urls=5000,
max_download_urls=40000)
max_upload_urls=3_000,
max_download_urls=30_000)
# We will never need to upload more than the number of testcases in the
# corpus to the quarantine. But add a max of 500 to avoid spending
# too much time on crazy edge cases.
Expand All @@ -768,5 +762,5 @@ def get_corpuses_for_pruning(engine, project_qualified_name):
quarantine=True,
include_delete_urls=True,
max_upload_urls=max_upload_urls,
max_download_urls=5000)
max_download_urls=1_000)
return corpus, quarantine_corpus

0 comments on commit 984f90e

Please sign in to comment.