From 1e8e63f5e32944340fa49bfbe3eae968d2d692ed Mon Sep 17 00:00:00 2001 From: Vivek Kumar Sahu Date: Mon, 2 Dec 2024 20:11:56 +0530 Subject: [PATCH 1/2] remove duplicate packages from final package list Signed-off-by: Vivek Kumar Sahu --- pkg/assemble/spdx/merge.go | 19 ++++++--- pkg/assemble/spdx/utils.go | 84 ++++++++++++++++++++++++++++++-------- 2 files changed, 81 insertions(+), 22 deletions(-) diff --git a/pkg/assemble/spdx/merge.go b/pkg/assemble/spdx/merge.go index 32773b9..825caae 100644 --- a/pkg/assemble/spdx/merge.go +++ b/pkg/assemble/spdx/merge.go @@ -17,6 +17,8 @@ package spdx import ( + "fmt" + "github.com/google/uuid" "github.com/interlynk-io/sbomasm/pkg/logger" "github.com/spdx/tools-golang/spdx" @@ -98,14 +100,21 @@ func (m *merge) combinedMerge() error { describedPkgs := getDescribedPkgs(m) - //Add Packages to document + // Add Packages to document doc.Packages = append(doc.Packages, primaryPkg) doc.Packages = append(doc.Packages, pkgs...) - //Add Files to document + doc.Packages = removeDuplicates(doc.Packages) + + for _, p := range doc.Packages { + fmt.Println("DOC PACKAGE NAME: ", p.PackageName) + fmt.Println("DOC VERSION NAME: ", p.PackageVersion) + } + + // Add Files to document doc.Files = append(doc.Files, files...) - //Add OtherLicenses to document + // Add OtherLicenses to document doc.OtherLicenses = append(doc.OtherLicenses, otherLicenses...) topLevelRels := []*spdx.Relationship{} @@ -140,13 +149,13 @@ func (m *merge) combinedMerge() error { } } - //Add Relationships to document + // Add Relationships to document doc.Relationships = append(doc.Relationships, topLevelRels...) if len(rels) > 0 { doc.Relationships = append(doc.Relationships, rels...) } - //Write the SBOM + // Write the SBOM err = writeSBOM(doc, m) return err diff --git a/pkg/assemble/spdx/utils.go b/pkg/assemble/spdx/utils.go index 4d1c386..7da90ca 100644 --- a/pkg/assemble/spdx/utils.go +++ b/pkg/assemble/spdx/utils.go @@ -39,6 +39,7 @@ import ( spdx_rdf "github.com/spdx/tools-golang/rdf" "github.com/spdx/tools-golang/spdx" "github.com/spdx/tools-golang/spdx/v2/common" + spdx_common "github.com/spdx/tools-golang/spdx/v2/common" "github.com/spdx/tools-golang/spdx/v2/v2_3" spdx_tv "github.com/spdx/tools-golang/tagvalue" spdx_yaml "github.com/spdx/tools-golang/yaml" @@ -160,7 +161,7 @@ func externalDocumentRefs(docs []*v2_3.Document) []v2_3.ExternalDocumentRef { func getAllCreators(docs []*v2_3.Document, authors []Author) []common.Creator { var creators []common.Creator - var uniqCreator = make(map[string]common.Creator) + uniqCreator := make(map[string]common.Creator) for _, doc := range docs { if doc.CreationInfo != nil { @@ -291,7 +292,7 @@ func genSpdxDocument(ms *merge) (*v2_3.Document, error) { func genCreationInfo(ms *merge) (*v2_3.CreationInfo, error) { ci := v2_3.CreationInfo{} - //set UTC time + // set UTC time ci.Created = utcNowTime() ci.CreatorComment = getCreatorComments(ms.in) lVersions := getLicenseListVersion(ms.in) @@ -311,10 +312,10 @@ func genPrimaryPackage(ms *merge) (*v2_3.Package, error) { pkg.PackageDescription = ms.settings.App.Description pkg.PackageSPDXIdentifier = common.ElementID(fmt.Sprintf("RootPackage-%s", ms.rootPackageID)) pkg.PackageDownloadLocation = NOA - //This is set to true since we are analyzing the merged sboms files + // This is set to true since we are analyzing the merged sboms files pkg.FilesAnalyzed = true - //Add Supplier + // Add Supplier if ms.settings.App.Supplier.Name != "" { pkg.PackageSupplier = &common.Supplier{} pkg.PackageSupplier.SupplierType = "Organization" @@ -326,7 +327,7 @@ func genPrimaryPackage(ms *merge) (*v2_3.Package, error) { } } - //Add checksums if provided. + // Add checksums if provided. if len(ms.settings.App.Checksums) > 0 { pkg.PackageChecksums = []common.Checksum{} for _, c := range ms.settings.App.Checksums { @@ -389,7 +390,7 @@ func genPackageList(ms *merge) ([]*v2_3.Package, map[string]string, error) { for _, doc := range ms.in { for _, pkg := range doc.Packages { - //Clone the package + // Clone the package clone, err := clonePkg(pkg) if err != nil { return nil, nil, err @@ -402,7 +403,7 @@ func genPackageList(ms *merge) ([]*v2_3.Package, map[string]string, error) { clone.PackageSPDXIdentifier = newSpdxId - //Fixes + // Fixes // if filesanalyzed is false, nil our verification code if !clone.FilesAnalyzed { clone.PackageVerificationCode = nil @@ -415,7 +416,7 @@ func genPackageList(ms *merge) ([]*v2_3.Package, map[string]string, error) { clone.Files = nil - //Add the package to the list + // Add the package to the list pkgs = append(pkgs, clone) } } @@ -423,14 +424,63 @@ func genPackageList(ms *merge) ([]*v2_3.Package, map[string]string, error) { return pkgs, mapper, nil } +// remove duplicates from doc.Packages +func removeDuplicates(packages []*spdx.Package) []*spdx.Package { + uniquePackages := []*spdx.Package{} + seen := make(map[string]bool) + + for _, pkg := range packages { + + key := createPackageKey(pkg) + fmt.Println("KEY: ", key) + if !seen[key] { + uniquePackages = append(uniquePackages, pkg) + seen[key] = true + } + } + + return uniquePackages +} + +// unique package key, which will help to determine the duplicacy of packages +func createPackageKey(pkg *spdx.Package) string { + if len(pkg.PackageExternalReferences) > 0 { + for _, ref := range pkg.PackageExternalReferences { + if strings.ToLower(ref.RefType) == spdx_common.TypePackageManagerPURL { + return "purl:" + ref.Locator + } + } + } + + if len(pkg.PackageExternalReferences) > 0 { + for _, ref := range pkg.PackageExternalReferences { + if ref.RefType == spdx_common.TypeSecurityCPE23Type || ref.RefType == spdx_common.TypeSecurityCPE22Type { + return "cpe:" + ref.Locator + } + } + } + + if pkg.PackageName != "" && pkg.PackageVersion != "" { + return "name-version:" + pkg.PackageName + ":" + pkg.PackageVersion + } + + if len(pkg.PackageChecksums) > 0 { + for _, checksum := range pkg.PackageChecksums { + return "checksum:" + checksum.Value + } + } + + return "spdx-id:" + string(pkg.PackageSPDXIdentifier) +} + func genFileList(ms *merge) ([]*v2_3.File, map[string]string, error) { var files []*v2_3.File mapper := make(map[string]string) for _, doc := range ms.in { - //Add the files from the document + // Add the files from the document for _, file := range doc.Files { - //Clone the file + // Clone the file clone, err := cloneFile(file) if err != nil { return nil, nil, err @@ -442,14 +492,14 @@ func genFileList(ms *merge) ([]*v2_3.File, map[string]string, error) { mapper[oldSpdxId] = string(newSpdxId) clone.FileSPDXIdentifier = newSpdxId - //Add the file to the list + // Add the file to the list files = append(files, clone) } - //Add the files from the packages + // Add the files from the packages for _, pkg := range doc.Packages { for _, file := range pkg.Files { - //Clone the file + // Clone the file clone, err := cloneFile(file) if err != nil { return nil, nil, err @@ -461,7 +511,7 @@ func genFileList(ms *merge) ([]*v2_3.File, map[string]string, error) { mapper[oldSpdxId] = string(newSpdxId) clone.FileSPDXIdentifier = newSpdxId - //Add the file to the list + // Add the file to the list files = append(files, clone) } } @@ -483,7 +533,7 @@ func genRelationships(ms *merge, pkgMapper map[string]string, fileMapper map[str continue } - //Clone the relationship + // Clone the relationship clone, err := cloneRelationship(rel) if err != nil { return nil, err @@ -507,7 +557,7 @@ func genRelationships(ms *merge, pkgMapper map[string]string, fileMapper map[str } } - //Update ElementId RefA and RefB + // Update ElementId RefA and RefB if rel.RefA.ElementRefID != "" { namespace := "" if rel.RefA.DocumentRefID != "" { @@ -545,7 +595,7 @@ func genRelationships(ms *merge, pkgMapper map[string]string, fileMapper map[str } } - //Add the relationship to the list + // Add the relationship to the list relationships = append(relationships, clone) } } From 0683be7ac057107b9df321973daa9d9e2389fec5 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Sahu Date: Wed, 4 Dec 2024 13:18:00 +0530 Subject: [PATCH 2/2] fix duplicate package and it's deps Signed-off-by: Vivek Kumar Sahu --- pkg/assemble/cdx/uniq_comp_service.go | 4 +- pkg/assemble/spdx/merge.go | 9 --- pkg/assemble/spdx/utils.go | 95 ++++++--------------------- 3 files changed, 23 insertions(+), 85 deletions(-) diff --git a/pkg/assemble/cdx/uniq_comp_service.go b/pkg/assemble/cdx/uniq_comp_service.go index f1c0e6e..79e9a0d 100644 --- a/pkg/assemble/cdx/uniq_comp_service.go +++ b/pkg/assemble/cdx/uniq_comp_service.go @@ -27,10 +27,10 @@ import ( type uniqueComponentService struct { ctx context.Context - //unique list of new components + // unique list of new components compMap map[string]*cydx.Component - //mapping from old component id to new component id + // mapping from old component id to new component id idMap map[string]string } diff --git a/pkg/assemble/spdx/merge.go b/pkg/assemble/spdx/merge.go index 825caae..2d8c0f4 100644 --- a/pkg/assemble/spdx/merge.go +++ b/pkg/assemble/spdx/merge.go @@ -17,8 +17,6 @@ package spdx import ( - "fmt" - "github.com/google/uuid" "github.com/interlynk-io/sbomasm/pkg/logger" "github.com/spdx/tools-golang/spdx" @@ -104,13 +102,6 @@ func (m *merge) combinedMerge() error { doc.Packages = append(doc.Packages, primaryPkg) doc.Packages = append(doc.Packages, pkgs...) - doc.Packages = removeDuplicates(doc.Packages) - - for _, p := range doc.Packages { - fmt.Println("DOC PACKAGE NAME: ", p.PackageName) - fmt.Println("DOC VERSION NAME: ", p.PackageVersion) - } - // Add Files to document doc.Files = append(doc.Files, files...) diff --git a/pkg/assemble/spdx/utils.go b/pkg/assemble/spdx/utils.go index 7da90ca..8bfb9d2 100644 --- a/pkg/assemble/spdx/utils.go +++ b/pkg/assemble/spdx/utils.go @@ -39,7 +39,6 @@ import ( spdx_rdf "github.com/spdx/tools-golang/rdf" "github.com/spdx/tools-golang/spdx" "github.com/spdx/tools-golang/spdx/v2/common" - spdx_common "github.com/spdx/tools-golang/spdx/v2/common" "github.com/spdx/tools-golang/spdx/v2/v2_3" spdx_tv "github.com/spdx/tools-golang/tagvalue" spdx_yaml "github.com/spdx/tools-golang/yaml" @@ -387,36 +386,39 @@ func createLookupKey(docName, spdxId string) string { func genPackageList(ms *merge) ([]*v2_3.Package, map[string]string, error) { var pkgs []*v2_3.Package mapper := make(map[string]string) + seen := make(map[string]string) for _, doc := range ms.in { for _, pkg := range doc.Packages { - // Clone the package + key := fmt.Sprintf("%s-%s", strings.ToLower(pkg.PackageName), strings.ToLower(pkg.PackageVersion)) + + // if already seen, map the old SPDXID to the new SPDXID + if newID, exists := seen[key]; exists { + oldSpdxId := createLookupKey(doc.DocumentNamespace, string(pkg.PackageSPDXIdentifier)) + mapper[oldSpdxId] = newID + continue + } + clone, err := clonePkg(pkg) if err != nil { return nil, nil, err } - newSpdxId := common.ElementID(fmt.Sprintf("Package-%s", uuid.New().String())) oldSpdxId := createLookupKey(doc.DocumentNamespace, string(pkg.PackageSPDXIdentifier)) mapper[oldSpdxId] = string(newSpdxId) - + seen[key] = string(newSpdxId) clone.PackageSPDXIdentifier = newSpdxId - // Fixes - // if filesanalyzed is false, nil our verification code if !clone.FilesAnalyzed { clone.PackageVerificationCode = nil } - if clone.PackageVerificationCode != nil && clone.PackageVerificationCode.Value == "" { clone.PackageVerificationCode = nil clone.FilesAnalyzed = false } - clone.Files = nil - // Add the package to the list pkgs = append(pkgs, clone) } } @@ -424,55 +426,6 @@ func genPackageList(ms *merge) ([]*v2_3.Package, map[string]string, error) { return pkgs, mapper, nil } -// remove duplicates from doc.Packages -func removeDuplicates(packages []*spdx.Package) []*spdx.Package { - uniquePackages := []*spdx.Package{} - seen := make(map[string]bool) - - for _, pkg := range packages { - - key := createPackageKey(pkg) - fmt.Println("KEY: ", key) - if !seen[key] { - uniquePackages = append(uniquePackages, pkg) - seen[key] = true - } - } - - return uniquePackages -} - -// unique package key, which will help to determine the duplicacy of packages -func createPackageKey(pkg *spdx.Package) string { - if len(pkg.PackageExternalReferences) > 0 { - for _, ref := range pkg.PackageExternalReferences { - if strings.ToLower(ref.RefType) == spdx_common.TypePackageManagerPURL { - return "purl:" + ref.Locator - } - } - } - - if len(pkg.PackageExternalReferences) > 0 { - for _, ref := range pkg.PackageExternalReferences { - if ref.RefType == spdx_common.TypeSecurityCPE23Type || ref.RefType == spdx_common.TypeSecurityCPE22Type { - return "cpe:" + ref.Locator - } - } - } - - if pkg.PackageName != "" && pkg.PackageVersion != "" { - return "name-version:" + pkg.PackageName + ":" + pkg.PackageVersion - } - - if len(pkg.PackageChecksums) > 0 { - for _, checksum := range pkg.PackageChecksums { - return "checksum:" + checksum.Value - } - } - - return "spdx-id:" + string(pkg.PackageSPDXIdentifier) -} - func genFileList(ms *merge) ([]*v2_3.File, map[string]string, error) { var files []*v2_3.File mapper := make(map[string]string) @@ -557,39 +510,33 @@ func genRelationships(ms *merge, pkgMapper map[string]string, fileMapper map[str } } - // Update ElementId RefA and RefB if rel.RefA.ElementRefID != "" { - namespace := "" + namespace := doc.DocumentNamespace if rel.RefA.DocumentRefID != "" { namespace = getDocumentNamespace(rel.RefA.DocumentRefID, ms) - } else { - namespace = doc.DocumentNamespace } key := createLookupKey(namespace, string(rel.RefA.ElementRefID)) - - if _, ok := pkgMapper[key]; ok { - clone.RefA.ElementRefID = common.ElementID(pkgMapper[key]) - } else if _, ok := fileMapper[key]; ok { - clone.RefA.ElementRefID = common.ElementID(fileMapper[key]) + if newID, ok := pkgMapper[key]; ok { + clone.RefA.ElementRefID = common.ElementID(newID) + } else if newID, ok := fileMapper[key]; ok { + clone.RefA.ElementRefID = common.ElementID(newID) } else { log.Warn(fmt.Sprintf("RefA: Could not find element %s in the merge set", key)) } } if rel.RefB.ElementRefID != "" { - namespace := "" + namespace := doc.DocumentNamespace if rel.RefB.DocumentRefID != "" { namespace = getDocumentNamespace(rel.RefB.DocumentRefID, ms) - } else { - namespace = doc.DocumentNamespace } key := createLookupKey(namespace, string(rel.RefB.ElementRefID)) - if _, ok := pkgMapper[key]; ok { - clone.RefB.ElementRefID = common.ElementID(pkgMapper[key]) - } else if _, ok := fileMapper[key]; ok { - clone.RefB.ElementRefID = common.ElementID(fileMapper[key]) + if newID, ok := pkgMapper[key]; ok { + clone.RefB.ElementRefID = common.ElementID(newID) + } else if newID, ok := fileMapper[key]; ok { + clone.RefB.ElementRefID = common.ElementID(newID) } else { log.Warn(fmt.Sprintf("RefB: Could not find element %s in the merge set", key)) }