Skip to content

Commit

Permalink
fix(importer): tolerate blobs disappearing
Browse files Browse the repository at this point in the history
I noticed some errors getting logged by the GCS deletion code because
the blob disappeared in between the bucket getting listed and the blobs
being retrieved (this can happen, but led me to also find the fix in

Also give validation failure the same exception-raising treatment as the
refactor in google#2644 did for import-time validation failure.
  • Loading branch information
andrewpollock committed Sep 30, 2024
1 parent 9ba5993 commit 2b42e71
Showing 1 changed file with 18 additions and 14 deletions.
32 changes: 18 additions & 14 deletions docker/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from google.cloud import pubsub_v1
from google.cloud import storage
from google.cloud.storage import retry
from google.cloud.exceptions import NotFound
import pygit2.enums

import osv
Expand Down Expand Up @@ -292,31 +293,34 @@ def _vuln_ids_from_gcs_blob(self, client: storage.Client,
source_repo: the osv.SourceRepository the blob relates to
blob: the storage.Blob object to operate on
Raises:
jsonschema.exceptions.ValidationError when self._strict_validation is True
input fails OSV JSON Schema validation
Returns:
a list of one or more vulnerability IDs (from the Vulnerability proto) or
None when the blob has an unexpected name or fails to parse
None when the blob has an unexpected name or fails to retrieve
"""
if not _is_vulnerability_file(source_repo, blob.name):
return None

# Download in a blob generation agnostic way to cope with the blob
# changing between when it was listed and now (if the generation doesn't
# match, retrieval fails otherwise).
blob_bytes = storage.Blob(
blob.name, blob.bucket, generation=None).download_as_bytes(client)

vuln_ids = []
try:
vulns = osv.parse_vulnerabilities_from_data(
blob_bytes,
os.path.splitext(blob.name)[1],
strict=self._strict_validation)
except Exception as e:
logging.error('Failed to parse vulnerability %s: %s', blob.name, e)
# TODO(andrewpollock): I think this needs to be reraised here...
# a jsonschema.exceptions.ValidationError only gets raised in strict
# validation mode.
blob_bytes = storage.Blob(
blob.name, blob.bucket, generation=None).download_as_bytes(client)
except NotFound:
# The file can disappear between bucket listing and blob retrieval.
return None

vuln_ids = []
# When self._strict_validation is True,
# this *may* raise a jsonschema.exceptions.ValidationError
vulns = osv.parse_vulnerabilities_from_data(
blob_bytes,
os.path.splitext(blob.name)[1],
strict=self._strict_validation)
for vuln in vulns:
vuln_ids.append(vuln.id)
return vuln_ids
Expand Down

0 comments on commit 2b42e71

Please sign in to comment.