Skip to content

Commit

Permalink
feat(tools): allow deleting bugs from file (#2508)
Browse files Browse the repository at this point in the history
- Allow saving queried bug IDs to a file.
- Allow deleting bugs from a specified file. 

This is useful when we don't want to delete all bugs from a `source` at
once and need to filter the bugs before deleting them.
  • Loading branch information
hogo6002 authored Aug 23, 2024
1 parent 57c03cb commit 2593e81
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""Utility to delete all bugs for a given source."""
"""Utility to delete all bugs for a given source or from a given file."""

import sys
from google.cloud import datastore
from google.cloud.datastore.query import PropertyFilter

Expand Down Expand Up @@ -30,6 +31,18 @@ def main() -> None:
dest="source",
default="cve-osv",
help="The prefix of source_id records to delete")
parser.add_argument(
"--delete_from_file",
action=argparse.BooleanOptionalAction,
dest="delete_from_file",
default=False,
help="Delete individual bugs from a file")
parser.add_argument(
"--file",
action="store",
dest="file",
default="",
help="The file path of bug ids to delete")
parser.add_argument(
"--project",
action="store",
Expand All @@ -40,6 +53,11 @@ def main() -> None:

client = datastore.Client(project=args.project)

result = []
if args.delete_from_file:
delete_from_file(client, args.file, args.verbose, args.dryrun)
return

query = client.query(kind="Bug")
query.add_filter(filter=PropertyFilter("source", "=", args.source))

Expand Down Expand Up @@ -78,5 +96,38 @@ def main() -> None:
print("Deleted!")


def delete_from_file(client: datastore.Client, filepath, verbose, dry_run):
"""Delete bugs from a file"""
ids = []
kind = "Bug"
try:
with open(filepath, 'r') as file:
ids = [bug.rstrip() for bug in file]
print(f"There are {len(ids)} bugs to delete...")
except Exception as e:
print('ERROR: Please provide an valid bug file,'
f'separating each id with a new line. Error: {e}')
sys.exit(1)

for batch_start in range(0, len(ids), MAX_BATCH_SIZE):
batch_end = batch_start + MAX_BATCH_SIZE
batch_ids = ids[batch_start:batch_end]

try:
with client.transaction() as xact:
for db_id in batch_ids:
key = client.key(kind, db_id)
if verbose:
print(f"Deleting {key}")
if not dry_run:
xact.delete(key)
else:
print("Dry run mode. Preventing transaction from committing")
except Exception as e:
print(f"Error during batch delete: {e}")
if len(ids) > 0 and not dry_run:
print("Deleted!")


if __name__ == "__main__":
main()
13 changes: 13 additions & 0 deletions tools/datafix/list_ids_for_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def main() -> None:
dest="source_id",
default="cve-osv",
help="the source_id to filter on")
parser.add_argument(
"--txt",
action=argparse.BooleanOptionalAction,
dest="txt",
default=False,
help="output result to a txt file")
args = parser.parse_args()

ds_client = datastore.Client(project=args.project)
Expand All @@ -47,10 +53,17 @@ def main() -> None:
result = list(query.fetch())
print(f"Retrieved {len(result)} bugs")

bugs = []
# Chunk the results to modify in acceptibly sized batches for the API.
for batch in range(0, len(result), MAX_BATCH_SIZE):
for bug in result[batch:batch + MAX_BATCH_SIZE]:
print(f"{bug['db_id']}")
bugs.append(bug['db_id'])

if args.txt:
with open('bug_ids.txt', 'w') as f:
for bug in bugs:
f.write(f'{bug}\n')


if __name__ == "__main__":
Expand Down

0 comments on commit 2593e81

Please sign in to comment.