From 3815d0528c7d766049149e3ca0a09a11da1f4f5a Mon Sep 17 00:00:00 2001 From: Khemraj Rathore Date: Thu, 7 Nov 2024 16:51:26 +0530 Subject: [PATCH 1/2] consider exclusion rule in ast gen --- .../joern/jssrc2cpg/utils/AstGenRunner.scala | 37 ++++++++++++++---- .../jssrc2cpg/preprocessing/AstGenTests.scala | 38 +++++++++++++++++++ 2 files changed, 67 insertions(+), 8 deletions(-) create mode 100644 joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala diff --git a/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/utils/AstGenRunner.scala b/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/utils/AstGenRunner.scala index b68ba34431a7..ae67890dab1c 100644 --- a/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/utils/AstGenRunner.scala +++ b/joern-cli/frontends/jssrc2cpg/src/main/scala/io/joern/jssrc2cpg/utils/AstGenRunner.scala @@ -27,6 +27,8 @@ object AstGenRunner { private val MinifiedPathRegex: Regex = ".*([.-]min\\..*js|bundle\\.js)".r + private val Extensions = Set(".js", ".ts", ".vue", ".ejs", ".jsx", ".cjs", ".mjs", ".tsx") + private val AstGenDefaultIgnoreRegex: Seq[Regex] = List( "(conf|test|spec|[.-]min|\\.d)\\.(js|ts|jsx|tsx)$".r, @@ -346,21 +348,18 @@ class AstGenRunner(config: Config) { logger.info(s"Parsed $numOfParsedFiles files.") if (numOfParsedFiles == 0) { logger.warn("You may want to check the DEBUG logs for a list of files that are ignored by default.") - SourceFiles.determine( - in.pathAsString, - Set(".js", ".ts", ".vue", ".ejs", ".jsx", ".cjs", ".mjs", ".tsx"), - ignoredDefaultRegex = Option(AstGenDefaultIgnoreRegex) - ) + SourceFiles.determine(in.pathAsString, Extensions, ignoredDefaultRegex = Option(AstGenDefaultIgnoreRegex)) } files } def execute(out: File): AstGenRunnerResult = { - val in = File(config.inputPath) - logger.info(s"Running astgen in '$in' ...") + val tmpInput = filterAndCopyFiles() + val in = File(config.inputPath) + logger.info(s"Running astgen in '$tmpInput' ...") runAstGenNative(in, out) match { case Success(result) => - val parsed = checkParsedFiles(filterFiles(SourceFiles.determine(out.toString(), Set(".json")), out), in) + val parsed = checkParsedFiles(filterFiles(SourceFiles.determine(out.toString(), Set(".json")), out), tmpInput) val skipped = skippedFiles(result.toList) AstGenRunnerResult(parsed.map((in.toString(), _)), skipped.map((in.toString(), _))) case Failure(f) => @@ -369,4 +368,26 @@ class AstGenRunner(config: Config) { } } + def filterAndCopyFiles(): File = { + + /** Before running AstGen, filter and copy all the files in a temporary folder, which can be given as in input to + * AstGen, Earlier the filter used to happen post AstGen result, now it will be before. This helps in parsing files + * which are needed in AstGen + */ + val filteredFiles = SourceFiles.determine( + config.inputPath, + Extensions, + ignoredDefaultRegex = Option(AstGenDefaultIgnoreRegex), + ignoredFilesRegex = Option(config.ignoredFilesRegex) + ) + val tmpInput = File.newTemporaryDirectory("privadoGeneratedInput") + filteredFiles.foreach { filePath => + val file = File(filePath) + val destinationFile = tmpInput / Paths.get(config.inputPath).relativize(file.path).toString + destinationFile.parent.createDirectoryIfNotExists(createParents = true) + file.copyTo(destinationFile, overwrite = true) + } + tmpInput + } + } diff --git a/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala b/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala new file mode 100644 index 000000000000..0b3965ab64be --- /dev/null +++ b/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala @@ -0,0 +1,38 @@ +package io.joern.jssrc2cpg.preprocessing + +import better.files.File +import io.joern.jssrc2cpg.Config +import io.joern.jssrc2cpg.testfixtures.AstJsSrc2CpgSuite +import io.joern.jssrc2cpg.utils.AstGenRunner + +class AstGenTests extends AstJsSrc2CpgSuite { + + "Ast gen" should { + val tmpDir = File.newTemporaryDirectory("src") + ((tmpDir / "folder1").createDirectoryIfNotExists() / "1.js").write("console.log('folder1');") + ((tmpDir / "folder2").createDirectoryIfNotExists() / "1.js").write("console.log('folder2');") + ((tmpDir / "folder3").createDirectoryIfNotExists() / "1.js").write("console.log('folder3');") + "ignore files mentioned in exclusion regex" in { + val newInputDir = new AstGenRunner(Config().withInputPath(tmpDir.toString).withIgnoredFilesRegex(".*folder3.*")) + .filterAndCopyFiles() + + val fileSet = newInputDir.listRecursively.filter(_.isRegularFile).map(_.pathAsString).toSet + fileSet.size shouldBe 2 + fileSet.count(_.matches(".*folder1/1.js")) shouldBe 1 + fileSet.count(_.matches(".*folder2/1.js")) shouldBe 1 + fileSet.count(_.matches(".*folder3/1.js")) shouldBe 0 + } + + "don't ignore files, if exclusion regex not passed" in { + val newInputDir = new AstGenRunner(Config().withInputPath(tmpDir.toString)).filterAndCopyFiles() + + val fileSet = newInputDir.listRecursively.filter(_.isRegularFile).map(_.pathAsString).toSet + fileSet.size shouldBe 3 + fileSet.count(_.matches(".*folder1/1.js")) shouldBe 1 + fileSet.count(_.matches(".*folder2/1.js")) shouldBe 1 + fileSet.count(_.matches(".*folder3/1.js")) shouldBe 1 + + } + } + +} From f26b523a1bb37bae66f218214c4fb9021f475448 Mon Sep 17 00:00:00 2001 From: Khemraj Rathore Date: Thu, 7 Nov 2024 17:22:10 +0530 Subject: [PATCH 2/2] update test case --- .../joern/jssrc2cpg/preprocessing/AstGenTests.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala b/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala index 0b3965ab64be..43e4afd7560e 100644 --- a/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala +++ b/joern-cli/frontends/jssrc2cpg/src/test/scala/io/joern/jssrc2cpg/preprocessing/AstGenTests.scala @@ -18,9 +18,9 @@ class AstGenTests extends AstJsSrc2CpgSuite { val fileSet = newInputDir.listRecursively.filter(_.isRegularFile).map(_.pathAsString).toSet fileSet.size shouldBe 2 - fileSet.count(_.matches(".*folder1/1.js")) shouldBe 1 - fileSet.count(_.matches(".*folder2/1.js")) shouldBe 1 - fileSet.count(_.matches(".*folder3/1.js")) shouldBe 0 + fileSet.count(_.matches(".*folder1.*")) shouldBe 1 + fileSet.count(_.matches(".*folder2.*")) shouldBe 1 + fileSet.count(_.matches(".*folder3.*")) shouldBe 0 } "don't ignore files, if exclusion regex not passed" in { @@ -28,9 +28,9 @@ class AstGenTests extends AstJsSrc2CpgSuite { val fileSet = newInputDir.listRecursively.filter(_.isRegularFile).map(_.pathAsString).toSet fileSet.size shouldBe 3 - fileSet.count(_.matches(".*folder1/1.js")) shouldBe 1 - fileSet.count(_.matches(".*folder2/1.js")) shouldBe 1 - fileSet.count(_.matches(".*folder3/1.js")) shouldBe 1 + fileSet.count(_.matches(".*folder1.*")) shouldBe 1 + fileSet.count(_.matches(".*folder2.*")) shouldBe 1 + fileSet.count(_.matches(".*folder3.*")) shouldBe 1 } }