From 280a9b4408b2c9c675fa6e9c67396e0cfda25700 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 20 Jun 2024 21:32:46 +0200 Subject: [PATCH 1/6] Python: Support Model Editor --- python/ql/lib/modeling/ModelEditor.qll | 248 ++++++++++++++++++ python/ql/lib/modeling/Util.qll | 93 +++++++ .../modeleditor/FrameworkModeEndpoints.ql | 14 + .../modelling/FrameworkModeEndpoints.expected | 11 + .../modelling/FrameworkModeEndpoints.ext.yml | 20 ++ .../modelling/FrameworkModeEndpoints.qlref | 1 + python/ql/test/modelling/MyPackage/Foo.py | 25 ++ .../ql/test/modelling/MyPackage/__init__.py | 0 python/ql/test/modelling/TopLevel.py | 8 + 9 files changed, 420 insertions(+) create mode 100644 python/ql/lib/modeling/ModelEditor.qll create mode 100644 python/ql/lib/modeling/Util.qll create mode 100644 python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql create mode 100644 python/ql/test/modelling/FrameworkModeEndpoints.expected create mode 100644 python/ql/test/modelling/FrameworkModeEndpoints.ext.yml create mode 100644 python/ql/test/modelling/FrameworkModeEndpoints.qlref create mode 100644 python/ql/test/modelling/MyPackage/Foo.py create mode 100644 python/ql/test/modelling/MyPackage/__init__.py create mode 100644 python/ql/test/modelling/TopLevel.py diff --git a/python/ql/lib/modeling/ModelEditor.qll b/python/ql/lib/modeling/ModelEditor.qll new file mode 100644 index 000000000000..d6c57b6494b5 --- /dev/null +++ b/python/ql/lib/modeling/ModelEditor.qll @@ -0,0 +1,248 @@ +/** Provides classes and predicates related to handling APIs for the VS Code extension. */ + +private import python +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions +private import semmle.python.dataflow.new.internal.DataFlowDispatch as DP +private import Util as Util + +/** + * An string describing the kind of source code element being modeled. + * + * See `EndPoint`. + */ +class EndpointKind extends string { + EndpointKind() { + this in ["Function", "InstanceMethod", "ClassMethod", "StaticMethod", "InitMethod", "Class"] + } +} + +/** + * An element of the source code to be modeled. + * + * See `EndPointKind` for the possible kinds of elements. + */ +abstract class Endpoint instanceof Scope { + string namespace; + string type; + string name; + + Endpoint() { + this.isPublic() and + this.getLocation().getFile() instanceof Util::RelevantFile and + exists(string scopePath, string path, int pathIndex | + scopePath = Util::computeScopePath(this) and + pathIndex = scopePath.indexOf(".", 0, 0) + | + namespace = scopePath.prefix(pathIndex) and + path = scopePath.suffix(pathIndex + 1) and + ( + exists(int nameIndex | nameIndex = max(path.indexOf(".")) | + type = path.prefix(nameIndex) and + name = path.suffix(nameIndex + 1) + ) + or + not exists(path.indexOf(".")) and + type = "" and + name = path + ) + ) + } + + /** Gets the namespace for this endpoint. This will typically be the package in which it is found. */ + string getNamespace() { result = namespace } + + /** Gets hte basename of the file where this endpoint is found. */ + string getFileName() { result = super.getLocation().getFile().getBaseName() } + + /** Gets a string representation of this endpoint. */ + string toString() { result = super.toString() } + + /** Gets the location of this endpoint. */ + Location getLocation() { result = super.getLocation() } + + /** Gets the name of the class in which this endpoint is found, or the empty string if it is not found inside a class. */ + string getType() { result = type } + + /** + * Gets the name of the endpoint if it is not a class, or the empty string if it is a class + * + * If this endpoint is a class, the class name can be obtained via `getType`. + */ + string getName() { result = name } + + /** + * Gets a string representation of the parameters of this endpoint. + * + * The string follows a specific format: + * - Positional parameters are listed in order, separated by commas. + * - Keyword parameters are listed in order, separated by commas, each followed by a colon. + * - In the future, positional-only parameters will be listed in order, separated by commas, each followed by a slash. + */ + abstract string getParameters(); + + /** + * Gets a boolean that is true iff this endpoint is supported by existing modeling. + * + * The check only takes Models ss Data extension models into account. + */ + abstract boolean getSupportedStatus(); + + /** + * Gets a string that describes the type of support detected this endpoint. + * + * The string can be one of the following: + * - "source" if this endpoint is a known source. + * - "sink" if this endpoint is a known sink. + * - "summary" if this endpoint has a flow summary. + * - "neutral" if this endpoint is a known neutral. + * - "" if this endpoint is not detected as supported. + */ + abstract string getSupportedType(); + + /** Gets the kind of this endpoint. See `EndPointKind`. */ + abstract EndpointKind getKind(); +} + +private predicate sourceModelPath(string type, string path) { sourceModel(type, path, _, _) } + +module FindSourceModel = Util::FindModel; + +private predicate sinkModelPath(string type, string path) { sinkModel(type, path, _, _) } + +module FindSinkModel = Util::FindModel; + +private predicate summaryModelPath(string type, string path) { + summaryModel(type, path, _, _, _, _) +} + +module FindSummaryModel = Util::FindModel; + +private predicate neutralModelPath(string type, string path) { neutralModel(type, path, _) } + +module FindNeutralModel = Util::FindModel; + +/** + * A callable function or method from source code. + */ +class FunctionEndpoint extends Endpoint instanceof Function { + /** + * Gets the parameter types of this endpoint. + */ + override string getParameters() { + // For now, return the names of positional and keyword parameters. We don't always have type information, so we can't return type names. + // We don't yet handle splat params or dict splat params. + // + // In Python, there are three types of parameters: + // 1. Positional-only parameters: These are parameters that can only be passed by position and not by keyword. + // 2. Positional-or-keyword parameters: These are parameters that can be passed by position or by keyword. + // 3. Keyword-only parameters: These are parameters that can only be passed by keyword. + // + // The syntax for defining these parameters is as follows: + // ```python + // def f(a, /, b, *, c): + // pass + // ``` + // In this example, `a` is a positional-only parameter, `b` is a positional-or-keyword parameter, and `c` is a keyword-only parameter. + // + // We handle positional-only parameters by adding a "/" to the parameter name, reminiscient of the syntax above. + // Note that we don't yet have information about positional-only parameters. + // We handle keyword-only parameters by adding a ":" to the parameter name, to be consistent with the MaD syntax and the other languages. + exists(int nrPosOnly, Function f | + f = this and + nrPosOnly = f.getPositionalParameterCount() + | + result = + "(" + + concat(string key, string value | + // TODO: Once we have information about positional-only parameters: + // Handle positional-only parameters by adding a "/" + value = any(int i | i.toString() = key | f.getArgName(i)) + or + exists(Name param | param = f.getAKeywordOnlyArg() | + param.getId() = key and + value = key + ":" + ) + | + value, "," order by key + ) + ")" + ) + } + + /** Holds if this API has a supported summary. */ + pragma[nomagic] + predicate hasSummary() { FindSummaryModel::hasModel(this) } + + /** Holds if this API is a known source. */ + pragma[nomagic] + predicate isSource() { FindSourceModel::hasModel(this) } + + /** Holds if this API is a known sink. */ + pragma[nomagic] + predicate isSink() { FindSinkModel::hasModel(this) } + + /** Holds if this API is a known neutral. */ + pragma[nomagic] + predicate isNeutral() { FindNeutralModel::hasModel(this) } + + /** + * Holds if this API is supported by existing CodeQL libraries, that is, it is either a + * recognized source, sink or neutral or it has a flow summary. + */ + predicate isSupported() { + this.hasSummary() or this.isSource() or this.isSink() or this.isNeutral() + } + + override boolean getSupportedStatus() { + if this.isSupported() then result = true else result = false + } + + override string getSupportedType() { + this.isSink() and result = "sink" + or + this.isSource() and result = "source" + or + this.hasSummary() and result = "summary" + or + this.isNeutral() and result = "neutral" + or + not this.isSupported() and result = "" + } + + override EndpointKind getKind() { + if this.(Function).isMethod() + then + result = this.methodKind() + or + not exists(this.methodKind()) and result = "InstanceMethod" + else result = "Function" + } + + private EndpointKind methodKind() { + this.(Function).isMethod() and + ( + DP::isClassmethod(this) and result = "ClassMethod" + or + DP::isStaticmethod(this) and result = "StaticMethod" + or + this.(Function).isInitMethod() and result = "InitMethod" + ) + } +} + +/** + * A class from source code. + */ +class ClassEndpoint extends Endpoint instanceof Class { + override string getType() { result = type + "." + name } + + override string getName() { result = "" } + + override string getParameters() { result = "" } + + override boolean getSupportedStatus() { result = false } + + override string getSupportedType() { result = "" } + + override EndpointKind getKind() { result = "Class" } +} diff --git a/python/ql/lib/modeling/Util.qll b/python/ql/lib/modeling/Util.qll new file mode 100644 index 000000000000..c6657289ae60 --- /dev/null +++ b/python/ql/lib/modeling/Util.qll @@ -0,0 +1,93 @@ +/** + * Contains utility methods and classes to assist with generating data extensions models. + */ + +private import python +private import semmle.python.ApiGraphs + +/** + * A file that probably contains tests. + */ +class TestFile extends File { + TestFile() { + this.getRelativePath().regexpMatch(".*(test|spec|examples).+") and + not this.getAbsolutePath().matches("%/ql/test/%") // allows our test cases to work + } +} + +/** + * A file that is relevant in the context of library modeling. + * + * In practice, this means a file that is not part of test code. + */ +class RelevantFile extends File { + RelevantFile() { not this instanceof TestFile and not this.inStdlib() } +} + +/** + * Gets the dotted path of a scope. + */ +string computeScopePath(Scope scope) { + // base case + if scope instanceof Module + then + scope.(Module).isPackageInit() and + result = scope.(Module).getPackageName() + or + not scope.(Module).isPackageInit() and + result = scope.(Module).getName() + else + //recursive cases + if scope instanceof Class + then + result = computeScopePath(scope.(Class).getEnclosingScope()) + "." + scope.(Class).getName() + else + if scope instanceof Function + then + result = + computeScopePath(scope.(Function).getEnclosingScope()) + "." + scope.(Function).getName() + else result = "unknown: " + scope.toString() +} + +signature predicate modelSig(string type, string path); + +/** + * A utility module for finding models of endpoints. + * + * Chiefly the `hasModel` predicate is used to determine if a scope has a model. + */ +module FindModel { + /** + * Holds if the given scope has a model as identified by the provided predicate `model`. + */ + predicate hasModel(Scope scope) { + exists(string type, string path, string searchPath | model(type, path) | + searchPath = possibleMemberPathPrefix(path, scope.getName()) and + pathToScope(scope, type, searchPath) + ) + } + + /** + * returns the prefix of `path` that might be a path to `member` + */ + bindingset[path, member] + string possibleMemberPathPrefix(string path, string member) { + // functionName must be a substring of path + exists(int index | index = path.indexOf(["Member", "Method"] + "[" + member + "]") | + result = path.prefix(index) + ) + } + + /** + * Holds if `(type,path)` evaluates to the given entity, when evalauted from a client of the current library. + */ + bindingset[type, path] + predicate pathToScope(Scope scope, string type, string path) { + scope.getLocation().getFile() instanceof RelevantFile and + scope.isPublic() and // only public methods are modeled + computeScopePath(scope) = + type.replaceAll("!", "") + "." + + path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") + + scope.getName() + } +} diff --git a/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql b/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql new file mode 100644 index 000000000000..337221c092d2 --- /dev/null +++ b/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql @@ -0,0 +1,14 @@ +/** + * @name Fetch endpoints for use in the model editor (framework mode) + * @description A list of endpoints accessible (methods and attributes) for consumers of the library. Excludes test and generated code. + * @kind table + * @id py/utils/modeleditor/framework-mode-endpoints + * @tags modeleditor endpoints framework-mode + */ + +import modeling.ModelEditor + +from Endpoint endpoint +select endpoint, endpoint.getNamespace(), endpoint.getType(), endpoint.getName(), + endpoint.getParameters(), endpoint.getSupportedStatus(), endpoint.getFileName(), + endpoint.getSupportedType(), endpoint.getKind() diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.expected b/python/ql/test/modelling/FrameworkModeEndpoints.expected new file mode 100644 index 000000000000..dbb924f3e253 --- /dev/null +++ b/python/ql/test/modelling/FrameworkModeEndpoints.expected @@ -0,0 +1,11 @@ +| MyPackage/Foo.py:1:1:1:9 | Class C1 | MyPackage | Foo.C1 | | | false | Foo.py | | Class | +| MyPackage/Foo.py:2:5:2:17 | Function m1 | MyPackage | Foo.C1 | m1 | (self) | true | Foo.py | source | InstanceMethod | +| MyPackage/Foo.py:5:5:5:20 | Function m2 | MyPackage | Foo.C1 | m2 | (self,x) | true | Foo.py | source | InstanceMethod | +| MyPackage/Foo.py:9:5:9:14 | Function m3 | MyPackage | Foo.C1 | m3 | (x) | true | Foo.py | summary | StaticMethod | +| MyPackage/Foo.py:13:5:13:19 | Function m4 | MyPackage | Foo.C1 | m4 | (cls,x) | true | Foo.py | summary | ClassMethod | +| MyPackage/Foo.py:16:1:16:13 | Class C2 | MyPackage | Foo.C2 | | | false | Foo.py | | Class | +| MyPackage/Foo.py:17:5:17:17 | Function m1 | MyPackage | Foo.C2 | m1 | (self) | false | Foo.py | | InstanceMethod | +| MyPackage/Foo.py:20:5:20:27 | Function c2only_m1 | MyPackage | Foo.C2 | c2only_m1 | (self,x) | false | Foo.py | | InstanceMethod | +| MyPackage/Foo.py:23:1:23:9 | Class C3 | MyPackage | Foo.C3 | | | false | Foo.py | | Class | +| MyPackage/Foo.py:24:5:24:26 | Function get_C2_instance | MyPackage | Foo.C3 | get_C2_instance | () | false | Foo.py | | InstanceMethod | +| TopLevel.py:3:1:3:38 | Function top_level_funciton | TopLevel | | top_level_funciton | (x,y,z:) | false | TopLevel.py | | Function | diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml new file mode 100644 index 000000000000..ca7b39516565 --- /dev/null +++ b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml @@ -0,0 +1,20 @@ +extensions: + - addsTo: + pack: codeql/python-all + extensible: sourceModel + data: + - ["MyPackage.Foo.C1","Member[m1].ReturnValue","remote"] + - ["MyPackage","Member[Foo].Member[C1].Instance.Member[m2].ReturnValue","remote"] + + - addsTo: + pack: codeql/python-all + extensible: summaryModel + data: + - ["MyPackage.Foo.C1!","Member[m3]","Argument[0]","ReturnValue","value"] + - ["MyPackage","Member[Foo].Member[C1].Member[m4]","Argument[0]","ReturnValue","value"] + + - addsTo: + pack: codeql/python-all + extensible: typeModel + data: + - ["MyPackage.Foo.C2!","MyPackage","Member[Foo].Member[C3].Member[get_C2_instance].ReturnValue"] diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.qlref b/python/ql/test/modelling/FrameworkModeEndpoints.qlref new file mode 100644 index 000000000000..5ae87455edd6 --- /dev/null +++ b/python/ql/test/modelling/FrameworkModeEndpoints.qlref @@ -0,0 +1 @@ +utils/modeleditor/FrameworkModeEndpoints.ql \ No newline at end of file diff --git a/python/ql/test/modelling/MyPackage/Foo.py b/python/ql/test/modelling/MyPackage/Foo.py new file mode 100644 index 000000000000..0f4f43a34c50 --- /dev/null +++ b/python/ql/test/modelling/MyPackage/Foo.py @@ -0,0 +1,25 @@ +class C1: + def m1(self): + print("C1.m1()") + + def m2(self, x): + return x + + @staticmethod + def m3(x): + return x + + @classmethod + def m4(cls, x): + return x + +class C2(C1): + def m1(self): + print("C2.m1()") + + def c2only_m1(self, x): + return x + +class C3: + def get_C2_instance(): + return C2() \ No newline at end of file diff --git a/python/ql/test/modelling/MyPackage/__init__.py b/python/ql/test/modelling/MyPackage/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/modelling/TopLevel.py b/python/ql/test/modelling/TopLevel.py new file mode 100644 index 000000000000..0af186980e5a --- /dev/null +++ b/python/ql/test/modelling/TopLevel.py @@ -0,0 +1,8 @@ +from MyPackage import Foo + +def top_level_funciton(x, /, y, *, z): + return [x, y, z] + +top_level_value = Foo.C1() + +iC2 = Foo.C3.get_C2_instance() \ No newline at end of file From c2141b62e0ebb6a1e8dc6489669f49c30550d969 Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 27 Jun 2024 14:53:03 +0200 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: Rasmus Wriedt Larsen --- python/ql/lib/modeling/ModelEditor.qll | 6 +++--- python/ql/lib/modeling/Util.qll | 15 ++++----------- .../test/modelling/FrameworkModeEndpoints.ext.yml | 2 +- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/python/ql/lib/modeling/ModelEditor.qll b/python/ql/lib/modeling/ModelEditor.qll index d6c57b6494b5..272f5833a096 100644 --- a/python/ql/lib/modeling/ModelEditor.qll +++ b/python/ql/lib/modeling/ModelEditor.qll @@ -75,8 +75,8 @@ abstract class Endpoint instanceof Scope { * Gets a string representation of the parameters of this endpoint. * * The string follows a specific format: - * - Positional parameters are listed in order, separated by commas. - * - Keyword parameters are listed in order, separated by commas, each followed by a colon. + * - Normal parameters(where arguments can be passed as either positional or keyword) are listed in order, separated by commas. + * - Keyword-only parameters are listed in order, separated by commas, each followed by a colon. * - In the future, positional-only parameters will be listed in order, separated by commas, each followed by a slash. */ abstract string getParameters(); @@ -84,7 +84,7 @@ abstract class Endpoint instanceof Scope { /** * Gets a boolean that is true iff this endpoint is supported by existing modeling. * - * The check only takes Models ss Data extension models into account. + * The check only takes Models as Data extension models into account. */ abstract boolean getSupportedStatus(); diff --git a/python/ql/lib/modeling/Util.qll b/python/ql/lib/modeling/Util.qll index c6657289ae60..c7871c290194 100644 --- a/python/ql/lib/modeling/Util.qll +++ b/python/ql/lib/modeling/Util.qll @@ -38,14 +38,9 @@ string computeScopePath(Scope scope) { result = scope.(Module).getName() else //recursive cases - if scope instanceof Class + if scope instanceof Class or scope instanceof Function then - result = computeScopePath(scope.(Class).getEnclosingScope()) + "." + scope.(Class).getName() - else - if scope instanceof Function - then - result = - computeScopePath(scope.(Function).getEnclosingScope()) + "." + scope.(Function).getName() + result = computeScopePath(scope.getEnclosingScope()) + "." + scope.getName() else result = "unknown: " + scope.toString() } @@ -72,19 +67,17 @@ module FindModel { */ bindingset[path, member] string possibleMemberPathPrefix(string path, string member) { - // functionName must be a substring of path exists(int index | index = path.indexOf(["Member", "Method"] + "[" + member + "]") | result = path.prefix(index) ) } /** - * Holds if `(type,path)` evaluates to the given entity, when evalauted from a client of the current library. + * Holds if `(type,path)` identifies `scope`. */ bindingset[type, path] predicate pathToScope(Scope scope, string type, string path) { - scope.getLocation().getFile() instanceof RelevantFile and - scope.isPublic() and // only public methods are modeled + scope instanceof Endpoint computeScopePath(scope) = type.replaceAll("!", "") + "." + path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") + diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml index ca7b39516565..421f7d6820c5 100644 --- a/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml +++ b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml @@ -17,4 +17,4 @@ extensions: pack: codeql/python-all extensible: typeModel data: - - ["MyPackage.Foo.C2!","MyPackage","Member[Foo].Member[C3].Member[get_C2_instance].ReturnValue"] + - ["MyPackage.Foo.C2","MyPackage","Member[Foo].Member[C3].Member[get_C2_instance].ReturnValue"] From 27301edc287bd49f3b0cad4ec94d054fa18fed91 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 27 Jun 2024 16:05:21 +0200 Subject: [PATCH 3/6] Python: address more review comments --- python/ql/lib/modeling/ModelEditor.qll | 12 +++---- python/ql/lib/modeling/Util.qll | 34 ++++++------------- .../modeleditor/FrameworkModeEndpoints.ql | 2 +- .../modelling/FrameworkModeEndpoints.ext.yml | 4 +++ 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/python/ql/lib/modeling/ModelEditor.qll b/python/ql/lib/modeling/ModelEditor.qll index 272f5833a096..653e247d2bc8 100644 --- a/python/ql/lib/modeling/ModelEditor.qll +++ b/python/ql/lib/modeling/ModelEditor.qll @@ -22,14 +22,12 @@ class EndpointKind extends string { * * See `EndPointKind` for the possible kinds of elements. */ -abstract class Endpoint instanceof Scope { +abstract class Endpoint instanceof Util::RelevantScope { string namespace; string type; string name; Endpoint() { - this.isPublic() and - this.getLocation().getFile() instanceof Util::RelevantFile and exists(string scopePath, string path, int pathIndex | scopePath = Util::computeScopePath(this) and pathIndex = scopePath.indexOf(".", 0, 0) @@ -62,14 +60,14 @@ abstract class Endpoint instanceof Scope { Location getLocation() { result = super.getLocation() } /** Gets the name of the class in which this endpoint is found, or the empty string if it is not found inside a class. */ - string getType() { result = type } + string getClass() { result = type } /** * Gets the name of the endpoint if it is not a class, or the empty string if it is a class * * If this endpoint is a class, the class name can be obtained via `getType`. */ - string getName() { result = name } + string getFunctionName() { result = name } /** * Gets a string representation of the parameters of this endpoint. @@ -234,9 +232,9 @@ class FunctionEndpoint extends Endpoint instanceof Function { * A class from source code. */ class ClassEndpoint extends Endpoint instanceof Class { - override string getType() { result = type + "." + name } + override string getClass() { result = type + "." + name } - override string getName() { result = "" } + override string getFunctionName() { result = "" } override string getParameters() { result = "" } diff --git a/python/ql/lib/modeling/Util.qll b/python/ql/lib/modeling/Util.qll index c7871c290194..dc3cf025f9ee 100644 --- a/python/ql/lib/modeling/Util.qll +++ b/python/ql/lib/modeling/Util.qll @@ -4,30 +4,20 @@ private import python private import semmle.python.ApiGraphs +private import semmle.python.filters.Tests -/** - * A file that probably contains tests. - */ -class TestFile extends File { - TestFile() { - this.getRelativePath().regexpMatch(".*(test|spec|examples).+") and - not this.getAbsolutePath().matches("%/ql/test/%") // allows our test cases to work +class RelevantScope extends Scope { + RelevantScope() { + this.isPublic() and + not this instanceof TestScope and + exists(this.getLocation().getFile().getRelativePath()) } } -/** - * A file that is relevant in the context of library modeling. - * - * In practice, this means a file that is not part of test code. - */ -class RelevantFile extends File { - RelevantFile() { not this instanceof TestFile and not this.inStdlib() } -} - /** * Gets the dotted path of a scope. */ -string computeScopePath(Scope scope) { +string computeScopePath(RelevantScope scope) { // base case if scope instanceof Module then @@ -39,9 +29,8 @@ string computeScopePath(Scope scope) { else //recursive cases if scope instanceof Class or scope instanceof Function - then - result = computeScopePath(scope.getEnclosingScope()) + "." + scope.getName() - else result = "unknown: " + scope.toString() + then result = computeScopePath(scope.getEnclosingScope()) + "." + scope.getName() + else result = "unknown: " + scope.toString() } signature predicate modelSig(string type, string path); @@ -55,7 +44,7 @@ module FindModel { /** * Holds if the given scope has a model as identified by the provided predicate `model`. */ - predicate hasModel(Scope scope) { + predicate hasModel(RelevantScope scope) { exists(string type, string path, string searchPath | model(type, path) | searchPath = possibleMemberPathPrefix(path, scope.getName()) and pathToScope(scope, type, searchPath) @@ -76,8 +65,7 @@ module FindModel { * Holds if `(type,path)` identifies `scope`. */ bindingset[type, path] - predicate pathToScope(Scope scope, string type, string path) { - scope instanceof Endpoint + predicate pathToScope(RelevantScope scope, string type, string path) { computeScopePath(scope) = type.replaceAll("!", "") + "." + path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") + diff --git a/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql b/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql index 337221c092d2..b0af86421f4f 100644 --- a/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql +++ b/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql @@ -9,6 +9,6 @@ import modeling.ModelEditor from Endpoint endpoint -select endpoint, endpoint.getNamespace(), endpoint.getType(), endpoint.getName(), +select endpoint, endpoint.getNamespace(), endpoint.getClass(), endpoint.getFunctionName(), endpoint.getParameters(), endpoint.getSupportedStatus(), endpoint.getFileName(), endpoint.getSupportedType(), endpoint.getKind() diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml index 421f7d6820c5..6942e501b9fa 100644 --- a/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml +++ b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml @@ -3,14 +3,18 @@ extensions: pack: codeql/python-all extensible: sourceModel data: + # Test short form of type column - ["MyPackage.Foo.C1","Member[m1].ReturnValue","remote"] + # Test long form of type column - ["MyPackage","Member[Foo].Member[C1].Instance.Member[m2].ReturnValue","remote"] - addsTo: pack: codeql/python-all extensible: summaryModel data: + # Test short form of type column - ["MyPackage.Foo.C1!","Member[m3]","Argument[0]","ReturnValue","value"] + # Test long form of type column - ["MyPackage","Member[Foo].Member[C1].Member[m4]","Argument[0]","ReturnValue","value"] - addsTo: From 9cca1b294c2bdf6db904b2fe12ad831dcd15aa79 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 27 Jun 2024 16:33:23 +0200 Subject: [PATCH 4/6] Python: Add test cases --- python/ql/lib/semmle/python/Scope.qll | 3 ++- .../test/modelling/FrameworkModeEndpoints.expected | 6 +++++- python/ql/test/modelling/MyPackage/Foo.py | 12 +++++++++++- python/ql/test/modelling/MyPackage/ModuleWithAll.py | 3 +++ python/ql/test/modelling/TopLevel.py | 9 ++++++--- 5 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 python/ql/test/modelling/MyPackage/ModuleWithAll.py diff --git a/python/ql/lib/semmle/python/Scope.qll b/python/ql/lib/semmle/python/Scope.qll index 891e249faf57..4131455299cb 100644 --- a/python/ql/lib/semmle/python/Scope.qll +++ b/python/ql/lib/semmle/python/Scope.qll @@ -85,9 +85,10 @@ class Scope extends Scope_ { this instanceof Module or exists(Module m | m = this.getEnclosingScope() and m.isPublic() | - /* If the module has an __all__, is this in it */ + // The module is implicitly exported not exists(getAModuleExport(m)) or + // The module is explicitly exported getAModuleExport(m) = this.getName() ) or diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.expected b/python/ql/test/modelling/FrameworkModeEndpoints.expected index dbb924f3e253..5a0e82c2086d 100644 --- a/python/ql/test/modelling/FrameworkModeEndpoints.expected +++ b/python/ql/test/modelling/FrameworkModeEndpoints.expected @@ -8,4 +8,8 @@ | MyPackage/Foo.py:20:5:20:27 | Function c2only_m1 | MyPackage | Foo.C2 | c2only_m1 | (self,x) | false | Foo.py | | InstanceMethod | | MyPackage/Foo.py:23:1:23:9 | Class C3 | MyPackage | Foo.C3 | | | false | Foo.py | | Class | | MyPackage/Foo.py:24:5:24:26 | Function get_C2_instance | MyPackage | Foo.C3 | get_C2_instance | () | false | Foo.py | | InstanceMethod | -| TopLevel.py:3:1:3:38 | Function top_level_funciton | TopLevel | | top_level_funciton | (x,y,z:) | false | TopLevel.py | | Function | +| MyPackage/Foo.py:31:1:31:38 | Function top_level_function | MyPackage | Foo | top_level_function | (x,y,z:) | false | Foo.py | | Function | +| MyPackage/Foo.py:34:1:34:42 | Function func_with_fancy_args | MyPackage | Foo | func_with_fancy_args | () | false | Foo.py | | Function | +| MyPackage/ModuleWithAll.py:2:1:2:10 | Class Foo | MyPackage | ModuleWithAll.Foo | | | false | ModuleWithAll.py | | Class | +| MyPackage/ModuleWithAll.py:3:1:3:10 | Class Bar | MyPackage | ModuleWithAll.Bar | | | false | ModuleWithAll.py | | Class | +| TopLevel.py:3:1:3:38 | Function top_level_function | TopLevel | | top_level_function | (x,y,z:) | false | TopLevel.py | | Function | diff --git a/python/ql/test/modelling/MyPackage/Foo.py b/python/ql/test/modelling/MyPackage/Foo.py index 0f4f43a34c50..0c21d15861f7 100644 --- a/python/ql/test/modelling/MyPackage/Foo.py +++ b/python/ql/test/modelling/MyPackage/Foo.py @@ -22,4 +22,14 @@ def c2only_m1(self, x): class C3: def get_C2_instance(): - return C2() \ No newline at end of file + return C2() + + class C3nested: + def m5(self, x): + return x + +def top_level_function(x, /, y, *, z): + return [x, y, z] + +def func_with_fancy_args(*args, **kwargs): + return args, kwargs \ No newline at end of file diff --git a/python/ql/test/modelling/MyPackage/ModuleWithAll.py b/python/ql/test/modelling/MyPackage/ModuleWithAll.py new file mode 100644 index 000000000000..0543cd308eec --- /dev/null +++ b/python/ql/test/modelling/MyPackage/ModuleWithAll.py @@ -0,0 +1,3 @@ +__all__ = ['Foo'] +class Foo: pass +class Bar: pass \ No newline at end of file diff --git a/python/ql/test/modelling/TopLevel.py b/python/ql/test/modelling/TopLevel.py index 0af186980e5a..605fcab65acb 100644 --- a/python/ql/test/modelling/TopLevel.py +++ b/python/ql/test/modelling/TopLevel.py @@ -1,8 +1,11 @@ -from MyPackage import Foo +from MyPackage import Foo, ModuleWithAll -def top_level_funciton(x, /, y, *, z): +def top_level_function(x, /, y, *, z): return [x, y, z] top_level_value = Foo.C1() -iC2 = Foo.C3.get_C2_instance() \ No newline at end of file +iC2 = Foo.C3.get_C2_instance() + +f = ModuleWithAll.Foo() +b = ModuleWithAll.Bar() \ No newline at end of file From 6bc830dca4daeff651f51a9daffa62b14d0746e6 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 27 Jun 2024 16:55:29 +0200 Subject: [PATCH 5/6] Python: add qldoc --- python/ql/lib/modeling/Util.qll | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ql/lib/modeling/Util.qll b/python/ql/lib/modeling/Util.qll index dc3cf025f9ee..01f4d265f0aa 100644 --- a/python/ql/lib/modeling/Util.qll +++ b/python/ql/lib/modeling/Util.qll @@ -6,6 +6,7 @@ private import python private import semmle.python.ApiGraphs private import semmle.python.filters.Tests +/** A class to represent scopes that the user might want to model. */ class RelevantScope extends Scope { RelevantScope() { this.isPublic() and From dc33f0de1d517bc909b6d81cfcc081f240277c98 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Jul 2024 14:28:46 +0200 Subject: [PATCH 6/6] Python: Additional tests for model-editor We currently have some problems with these files, that we should fix later down the line. See PR comment for more details. --- .../ql/test/modelling/FrameworkModeEndpoints.expected | 7 +++++++ .../ql/test/modelling/NotPackage/not_in_pacakge_lib.py | 2 ++ .../test/modelling/NotPackage/not_in_package_script.py | 10 ++++++++++ python/ql/test/modelling/NotPackage/possibly_lib.py | 5 +++++ .../not-valid-package/not_in_pacakge_lib_copy.py | 2 ++ .../not-valid-package/not_in_package_script_copy.py | 10 ++++++++++ .../modelling/not-valid-package/possibly_lib_copy.py | 5 +++++ python/ql/test/modelling/options | 1 + 8 files changed, 42 insertions(+) create mode 100644 python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py create mode 100644 python/ql/test/modelling/NotPackage/not_in_package_script.py create mode 100644 python/ql/test/modelling/NotPackage/possibly_lib.py create mode 100644 python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py create mode 100644 python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py create mode 100644 python/ql/test/modelling/not-valid-package/possibly_lib_copy.py create mode 100644 python/ql/test/modelling/options diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.expected b/python/ql/test/modelling/FrameworkModeEndpoints.expected index 5a0e82c2086d..7665b06d4ea4 100644 --- a/python/ql/test/modelling/FrameworkModeEndpoints.expected +++ b/python/ql/test/modelling/FrameworkModeEndpoints.expected @@ -12,4 +12,11 @@ | MyPackage/Foo.py:34:1:34:42 | Function func_with_fancy_args | MyPackage | Foo | func_with_fancy_args | () | false | Foo.py | | Function | | MyPackage/ModuleWithAll.py:2:1:2:10 | Class Foo | MyPackage | ModuleWithAll.Foo | | | false | ModuleWithAll.py | | Class | | MyPackage/ModuleWithAll.py:3:1:3:10 | Class Bar | MyPackage | ModuleWithAll.Bar | | | false | ModuleWithAll.py | | Class | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | NotPackage | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | NotPackage | not_in_pacakge_lib | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib | not_in_pacakge_lib | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_package_script.py:5:1:5:37 | Function not_in_package_script_func | NotPackage | not_in_package_script | not_in_package_script_func | (x,y) | false | not_in_package_script.py | | Function | +| NotPackage/possibly_lib.py:4:1:4:28 | Function possibly_lib_func | NotPackage | possibly_lib | possibly_lib_func | (x,y) | false | possibly_lib.py | | Function | | TopLevel.py:3:1:3:38 | Function top_level_function | TopLevel | | top_level_function | (x,y,z:) | false | TopLevel.py | | Function | +| not-valid-package/not_in_pacakge_lib_copy.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib_copy | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib_copy.py | | Function | diff --git a/python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py b/python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py new file mode 100644 index 000000000000..d45f86475f40 --- /dev/null +++ b/python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py @@ -0,0 +1,2 @@ +def not_in_pacakge_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/NotPackage/not_in_package_script.py b/python/ql/test/modelling/NotPackage/not_in_package_script.py new file mode 100644 index 000000000000..0eaa723a2948 --- /dev/null +++ b/python/ql/test/modelling/NotPackage/not_in_package_script.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import not_in_pacakge_lib + +def not_in_package_script_func(x, y): + return x + y + +if __name__ == "__main__": + print(not_in_pacakge_lib.not_in_pacakge_lib_func(1, 2)) + print(not_in_package_script_func(3, 4)) diff --git a/python/ql/test/modelling/NotPackage/possibly_lib.py b/python/ql/test/modelling/NotPackage/possibly_lib.py new file mode 100644 index 000000000000..4f3bc882ce6b --- /dev/null +++ b/python/ql/test/modelling/NotPackage/possibly_lib.py @@ -0,0 +1,5 @@ +# model editor should allow modeling the functions defined in this file, even when the +# file is not imported explicitly. + +def possibly_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py b/python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py new file mode 100644 index 000000000000..d45f86475f40 --- /dev/null +++ b/python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py @@ -0,0 +1,2 @@ +def not_in_pacakge_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py b/python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py new file mode 100644 index 000000000000..22b6bcfd523d --- /dev/null +++ b/python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import not_in_pacakge_lib_copy + +def not_in_package_script_func(x, y): + return x + y + +if __name__ == "__main__": + print(not_in_pacakge_lib_copy.not_in_pacakge_lib_func(1, 2)) + print(not_in_package_script_func(3, 4)) diff --git a/python/ql/test/modelling/not-valid-package/possibly_lib_copy.py b/python/ql/test/modelling/not-valid-package/possibly_lib_copy.py new file mode 100644 index 000000000000..4f3bc882ce6b --- /dev/null +++ b/python/ql/test/modelling/not-valid-package/possibly_lib_copy.py @@ -0,0 +1,5 @@ +# model editor should allow modeling the functions defined in this file, even when the +# file is not imported explicitly. + +def possibly_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/options b/python/ql/test/modelling/options new file mode 100644 index 000000000000..3819071b01cc --- /dev/null +++ b/python/ql/test/modelling/options @@ -0,0 +1 @@ +semmle-extractor-options: -R .