Skip to content

Commit

Permalink
[pysrc2cpg] Rework Import Resolution (joernio#3812)
Browse files Browse the repository at this point in the history
* [pysrc2cpg] Rework Import Resolution

The issue with the import handling is that paths that resolve to internal entities, and those to external dependencies underwent the same heuristic-driven path resolution.

The problem here is that internal entities have simple guarantees that allow easier and faster traversals with little to no heuristics required other than handling Python 2/3 differences. The rest can then be bundled accurately as unresolved.

### Main changes
* Moved changes for Python's import resolver `codeRoot` to the base class
* Separated handling of internal entities with external entities:
  - Internal entities that are importable, are given associated Pythonic import paths in the `moduleCache` map.
  - Any import paths that don't get a hit in this map then undergo some heuristic-based path building to make sensible looking types
* Split the `ResolvedImport` classes into `UnresolvedImport` and `ResolvedImport` with `EvaluatedImport` as the high-level trait. This separates imports found and those that have not been, but have undergone some heuristic processing.

### Misc
Renamed frontends' `ImportPass` to be prepended with the language name for easier navigation.

### Follow-up
Python models entities imported as `import x.y` as a field access, `FieldAccess(x).fieldIdentifier(y)`. However, `x` may not have an associated type declaration for the module since it is simply a directory holding various modules. This means that `y` may not always be resolved as it's interpreted as a member instead of a standalone module.

* Removed redundant code

* Escape backslash on windows

* Fixed compilation issue in Ruby

* Ignore function type refs
  • Loading branch information
DavidBakerEffendi authored Nov 11, 2023
1 parent 8e1481f commit 951b0f3
Show file tree
Hide file tree
Showing 15 changed files with 340 additions and 225 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ case class PythonSrcCpgGenerator(config: FrontendConfig, rootPath: Path) extends

override def applyPostProcessingPasses(cpg: Cpg): Cpg = {
new ImportsPass(cpg).createAndApply()
new ImportResolverPass(cpg).createAndApply()
new PythonImportResolverPass(cpg).createAndApply()
new DynamicTypeHintFullNamePass(cpg).createAndApply()
new PythonInheritanceNamePass(cpg).createAndApply()
val typeRecoveryConfig = pyConfig match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ object JsSrc2Cpg {
List(
new JavaScriptInheritanceNamePass(cpg),
new ConstClosurePass(cpg),
new ImportResolverPass(cpg),
new JavaScriptImportResolverPass(cpg),
new JavaScriptTypeRecoveryPass(cpg, typeRecoveryConfig),
new JavaScriptTypeHintCallLinker(cpg),
new NaiveCallLinker(cpg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import java.io.File as JFile
import java.util.regex.{Matcher, Pattern}
import scala.util.{Failure, Success, Try}

class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {
class JavaScriptImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {

private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']")

Expand All @@ -27,7 +27,7 @@ class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {
val alias = importedAs
val matcher = pathPattern.matcher(rawEntity)
val sep = Matcher.quoteReplacement(JFile.separator)
val root = s"$codeRoot${JFile.separator}"
val root = s"$codeRootDir${JFile.separator}"
val currentFile = s"$root$fileName"
// We want to know if the import is local since if an external name is used to match internal methods we may have
// false paths.
Expand Down Expand Up @@ -114,12 +114,12 @@ class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {
// Exported closure with a method ref within the AST of the RHS
y.ast.isMethodRef.map(mRef => ResolvedMethod(mRef.methodFullName, alias, Option("this"))).toSet
case _ =>
Set.empty[ResolvedImport]
Set.empty[EvaluatedImport]
}
}.toSet
} else {
Set(UnknownMethod(entity, alias, Option("this")), UnknownTypeDecl(entity))
}).foreach(x => resolvedImportToTag(x, importCall, diffGraph))
}).foreach(x => evaluatedImportToTag(x, importCall, diffGraph))
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(a: UnknownMethod, b: UnknownTypeDecl, x: UnknownMethod, y: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
a.fullName shouldBe "slack_sdk:WebClient"
b.fullName shouldBe "slack_sdk:WebClient"
x.fullName shouldBe "sendgrid:SendGridAPIClient"
Expand Down Expand Up @@ -141,7 +141,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(a: ResolvedMember, b: ResolvedMember, c: ResolvedMember, d: UnknownMethod, e: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
a.basePath shouldBe "Foo.ts::program"
a.memberName shouldBe "x"
b.basePath shouldBe "Foo.ts::program"
Expand Down Expand Up @@ -229,7 +229,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {
)

"resolve correct imports via tag nodes" in {
val List(x: ResolvedMethod) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
val List(x: ResolvedMethod) = cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
x.fullName shouldBe "util.js::program:getIncrementalInteger"
}

Expand Down Expand Up @@ -258,7 +258,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(x: UnknownMethod, y: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
x.fullName shouldBe "googleapis"
y.fullName shouldBe "googleapis"
}
Expand All @@ -280,7 +280,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(x: UnknownMethod, y: UnknownTypeDecl, z: UnknownMethod) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
x.fullName shouldBe "googleapis"
y.fullName shouldBe "googleapis"
z.fullName shouldBe "googleapis"
Expand Down Expand Up @@ -381,7 +381,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(a: ResolvedTypeDecl, b: ResolvedMethod, c: ResolvedMethod, d: UnknownMethod, e: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
a.fullName shouldBe "foo.js::program"
b.fullName shouldBe "foo.js::program:literalFunction"
c.fullName shouldBe "foo.js::program:get"
Expand Down

This file was deleted.

Loading

0 comments on commit 951b0f3

Please sign in to comment.