From fefec373ea0303fd1143cac3ecd3e7f82fb1f676 Mon Sep 17 00:00:00 2001 From: Andrew Pollock Date: Thu, 16 Nov 2023 10:49:18 +1000 Subject: [PATCH] Improve fuzzy version matching (#1813) When doing prefix matching, do not do it versions with a "pre-release" textual suffix, to avoid undesired matches. Addresses multiple converted CVEs in Apache InLong that were showing as unfixed (their repo has some strange release tagging practices). --- vulnfeeds/git/versions.go | 32 +++++++++++++++++++++++++------- vulnfeeds/git/versions_test.go | 30 ++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/vulnfeeds/git/versions.go b/vulnfeeds/git/versions.go index 24ad3f0de3f..9df3564ffc6 100644 --- a/vulnfeeds/git/versions.go +++ b/vulnfeeds/git/versions.go @@ -16,6 +16,7 @@ package git import ( "fmt" + "regexp" "strings" "golang.org/x/exp/maps" @@ -23,18 +24,31 @@ import ( "github.com/google/osv/vulnfeeds/cves" ) -// Take an already normalized version, repo and the pre-normalized mapping of tags to commits and do fuzzy matching on the version, returning a GitCommit and a bool if successful. +// Take an already normalized version, repo and the mapping of repo tags +// normalized tags and commits and do fuzzy matching version, returning a +// GitCommit and a bool if successful. func fuzzyVersionToCommit(normalizedVersion string, repo string, commitType cves.CommitType, normalizedTags map[string]NormalizedTag) (ac cves.AffectedCommit, b bool) { - candidateTags := []string{} + candidateTags := []string{} // the subset of normalizedTags tags that might be appropriate to use as a fuzzy match for normalizedVersion. + // Keep in sync with the regex in cves.NormalizeVersion() + var validVersionText = regexp.MustCompile(`(?i)(?:rc|alpha|beta|preview)\d*`) + for _, k := range maps.Keys(normalizedTags) { - if strings.HasPrefix(k, normalizedVersion) { + // "1-8-0-RC0" (normalized from "1.8.0-RC0") shouldn't be considered a fuzzy match for "1-8-0" (normalized from "1.8.0") + if (validVersionText.MatchString(k) && validVersionText.MatchString(normalizedVersion)) && strings.HasPrefix(k, normalizedVersion) { + candidateTags = append(candidateTags, k) + } + if (!validVersionText.MatchString(k) && !validVersionText.MatchString(normalizedVersion)) && strings.HasPrefix(k, normalizedVersion) { candidateTags = append(candidateTags, k) } } - // We may now have one or more tags to further examine for a best choice. + + // There are zero, one or more tags to further examine for a best choice. + + // Nothing even looked like it started with normalizedVersion, fail. if len(candidateTags) == 0 { return ac, false } + if len(candidateTags) == 1 { ac.SetRepo(repo) switch commitType { @@ -51,7 +65,9 @@ func fuzzyVersionToCommit(normalizedVersion string, repo string, commitType cves } for i, t := range candidateTags { - // Handle the case where we were given say "12.0", but what we have is "12.0.0" + // Handle the case where where the + // normalizedVersion is "12-0" (i.e. was "12.0") but the normalizedTags + // has "12-0-0" (i.e. the repo had "12.0.0") if strings.TrimPrefix(t, normalizedVersion) == "-0" { ac.SetRepo(repo) switch commitType { @@ -67,6 +83,8 @@ func fuzzyVersionToCommit(normalizedVersion string, repo string, commitType cves return ac, true } } + + // All fuzzy matching attempts have failed. return ac, false } @@ -76,8 +94,8 @@ func VersionToCommit(version string, repo string, commitType cves.CommitType, no if err != nil { return ac, err } - // Try a straight out match first. - normalizedTag, ok := normalizedTags[normalizedVersion] + // Try a straight out (case-insensitive) match first. + normalizedTag, ok := normalizedTags[strings.ToLower(normalizedVersion)] if !ok { // Then try to fuzzy-match. ac, ok = fuzzyVersionToCommit(normalizedVersion, repo, commitType, normalizedTags) diff --git a/vulnfeeds/git/versions_test.go b/vulnfeeds/git/versions_test.go index 71e6ea427b4..0e3767d4661 100644 --- a/vulnfeeds/git/versions_test.go +++ b/vulnfeeds/git/versions_test.go @@ -7,8 +7,7 @@ import ( ) func TestVersionToCommit(t *testing.T) { - var cache RepoTagsCache - cache = make(RepoTagsCache) + cache := make(RepoTagsCache) tests := []struct { description string @@ -18,8 +17,7 @@ func TestVersionToCommit(t *testing.T) { expectedResult string expectedOk bool }{ - { - description: "An exact match", + { description: "An exact match", inputRepoURL: "https://github.com/ARMmbed/mbedtls", cache: cache, inputVersion: "3.0.0", @@ -50,6 +48,30 @@ func TestVersionToCommit(t *testing.T) { expectedResult: "", expectedOk: false, }, + { + description: "A version that should not fuzzy match to a release candidate", + inputRepoURL: "https://github.com/apache/inlong", + cache: cache, + inputVersion: "1.4.0", + expectedResult: "", + expectedOk: false, + }, + { + description: "An RC version that should match to a (single) release candidate", + inputRepoURL: "https://github.com/apache/inlong", + cache: cache, + inputVersion: "1.4.0-RC0", + expectedResult: "8c8145974548568a68bb81720cabdafbefe545be", + expectedOk: false, + }, + { + description: "An RC version that should match to one of many prefixed release candidates", + inputRepoURL: "https://github.com/apache/inlong", + cache: cache, + inputVersion: "1.8.0-RC0", + expectedResult: "15d9fd922ef314977c7ce47c1fdf5e1655efd945", + expectedOk: false, + }, } for _, tc := range tests {