diff --git a/python/ql/lib/modeling/ModelEditor.qll b/python/ql/lib/modeling/ModelEditor.qll new file mode 100644 index 000000000000..653e247d2bc8 --- /dev/null +++ b/python/ql/lib/modeling/ModelEditor.qll @@ -0,0 +1,246 @@ +/** Provides classes and predicates related to handling APIs for the VS Code extension. */ + +private import python +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions +private import semmle.python.dataflow.new.internal.DataFlowDispatch as DP +private import Util as Util + +/** + * An string describing the kind of source code element being modeled. + * + * See `EndPoint`. + */ +class EndpointKind extends string { + EndpointKind() { + this in ["Function", "InstanceMethod", "ClassMethod", "StaticMethod", "InitMethod", "Class"] + } +} + +/** + * An element of the source code to be modeled. + * + * See `EndPointKind` for the possible kinds of elements. + */ +abstract class Endpoint instanceof Util::RelevantScope { + string namespace; + string type; + string name; + + Endpoint() { + exists(string scopePath, string path, int pathIndex | + scopePath = Util::computeScopePath(this) and + pathIndex = scopePath.indexOf(".", 0, 0) + | + namespace = scopePath.prefix(pathIndex) and + path = scopePath.suffix(pathIndex + 1) and + ( + exists(int nameIndex | nameIndex = max(path.indexOf(".")) | + type = path.prefix(nameIndex) and + name = path.suffix(nameIndex + 1) + ) + or + not exists(path.indexOf(".")) and + type = "" and + name = path + ) + ) + } + + /** Gets the namespace for this endpoint. This will typically be the package in which it is found. */ + string getNamespace() { result = namespace } + + /** Gets hte basename of the file where this endpoint is found. */ + string getFileName() { result = super.getLocation().getFile().getBaseName() } + + /** Gets a string representation of this endpoint. */ + string toString() { result = super.toString() } + + /** Gets the location of this endpoint. */ + Location getLocation() { result = super.getLocation() } + + /** Gets the name of the class in which this endpoint is found, or the empty string if it is not found inside a class. */ + string getClass() { result = type } + + /** + * Gets the name of the endpoint if it is not a class, or the empty string if it is a class + * + * If this endpoint is a class, the class name can be obtained via `getType`. + */ + string getFunctionName() { result = name } + + /** + * Gets a string representation of the parameters of this endpoint. + * + * The string follows a specific format: + * - Normal parameters(where arguments can be passed as either positional or keyword) are listed in order, separated by commas. + * - Keyword-only parameters are listed in order, separated by commas, each followed by a colon. + * - In the future, positional-only parameters will be listed in order, separated by commas, each followed by a slash. + */ + abstract string getParameters(); + + /** + * Gets a boolean that is true iff this endpoint is supported by existing modeling. + * + * The check only takes Models as Data extension models into account. + */ + abstract boolean getSupportedStatus(); + + /** + * Gets a string that describes the type of support detected this endpoint. + * + * The string can be one of the following: + * - "source" if this endpoint is a known source. + * - "sink" if this endpoint is a known sink. + * - "summary" if this endpoint has a flow summary. + * - "neutral" if this endpoint is a known neutral. + * - "" if this endpoint is not detected as supported. + */ + abstract string getSupportedType(); + + /** Gets the kind of this endpoint. See `EndPointKind`. */ + abstract EndpointKind getKind(); +} + +private predicate sourceModelPath(string type, string path) { sourceModel(type, path, _, _) } + +module FindSourceModel = Util::FindModel; + +private predicate sinkModelPath(string type, string path) { sinkModel(type, path, _, _) } + +module FindSinkModel = Util::FindModel; + +private predicate summaryModelPath(string type, string path) { + summaryModel(type, path, _, _, _, _) +} + +module FindSummaryModel = Util::FindModel; + +private predicate neutralModelPath(string type, string path) { neutralModel(type, path, _) } + +module FindNeutralModel = Util::FindModel; + +/** + * A callable function or method from source code. + */ +class FunctionEndpoint extends Endpoint instanceof Function { + /** + * Gets the parameter types of this endpoint. + */ + override string getParameters() { + // For now, return the names of positional and keyword parameters. We don't always have type information, so we can't return type names. + // We don't yet handle splat params or dict splat params. + // + // In Python, there are three types of parameters: + // 1. Positional-only parameters: These are parameters that can only be passed by position and not by keyword. + // 2. Positional-or-keyword parameters: These are parameters that can be passed by position or by keyword. + // 3. Keyword-only parameters: These are parameters that can only be passed by keyword. + // + // The syntax for defining these parameters is as follows: + // ```python + // def f(a, /, b, *, c): + // pass + // ``` + // In this example, `a` is a positional-only parameter, `b` is a positional-or-keyword parameter, and `c` is a keyword-only parameter. + // + // We handle positional-only parameters by adding a "/" to the parameter name, reminiscient of the syntax above. + // Note that we don't yet have information about positional-only parameters. + // We handle keyword-only parameters by adding a ":" to the parameter name, to be consistent with the MaD syntax and the other languages. + exists(int nrPosOnly, Function f | + f = this and + nrPosOnly = f.getPositionalParameterCount() + | + result = + "(" + + concat(string key, string value | + // TODO: Once we have information about positional-only parameters: + // Handle positional-only parameters by adding a "/" + value = any(int i | i.toString() = key | f.getArgName(i)) + or + exists(Name param | param = f.getAKeywordOnlyArg() | + param.getId() = key and + value = key + ":" + ) + | + value, "," order by key + ) + ")" + ) + } + + /** Holds if this API has a supported summary. */ + pragma[nomagic] + predicate hasSummary() { FindSummaryModel::hasModel(this) } + + /** Holds if this API is a known source. */ + pragma[nomagic] + predicate isSource() { FindSourceModel::hasModel(this) } + + /** Holds if this API is a known sink. */ + pragma[nomagic] + predicate isSink() { FindSinkModel::hasModel(this) } + + /** Holds if this API is a known neutral. */ + pragma[nomagic] + predicate isNeutral() { FindNeutralModel::hasModel(this) } + + /** + * Holds if this API is supported by existing CodeQL libraries, that is, it is either a + * recognized source, sink or neutral or it has a flow summary. + */ + predicate isSupported() { + this.hasSummary() or this.isSource() or this.isSink() or this.isNeutral() + } + + override boolean getSupportedStatus() { + if this.isSupported() then result = true else result = false + } + + override string getSupportedType() { + this.isSink() and result = "sink" + or + this.isSource() and result = "source" + or + this.hasSummary() and result = "summary" + or + this.isNeutral() and result = "neutral" + or + not this.isSupported() and result = "" + } + + override EndpointKind getKind() { + if this.(Function).isMethod() + then + result = this.methodKind() + or + not exists(this.methodKind()) and result = "InstanceMethod" + else result = "Function" + } + + private EndpointKind methodKind() { + this.(Function).isMethod() and + ( + DP::isClassmethod(this) and result = "ClassMethod" + or + DP::isStaticmethod(this) and result = "StaticMethod" + or + this.(Function).isInitMethod() and result = "InitMethod" + ) + } +} + +/** + * A class from source code. + */ +class ClassEndpoint extends Endpoint instanceof Class { + override string getClass() { result = type + "." + name } + + override string getFunctionName() { result = "" } + + override string getParameters() { result = "" } + + override boolean getSupportedStatus() { result = false } + + override string getSupportedType() { result = "" } + + override EndpointKind getKind() { result = "Class" } +} diff --git a/python/ql/lib/modeling/Util.qll b/python/ql/lib/modeling/Util.qll new file mode 100644 index 000000000000..01f4d265f0aa --- /dev/null +++ b/python/ql/lib/modeling/Util.qll @@ -0,0 +1,75 @@ +/** + * Contains utility methods and classes to assist with generating data extensions models. + */ + +private import python +private import semmle.python.ApiGraphs +private import semmle.python.filters.Tests + +/** A class to represent scopes that the user might want to model. */ +class RelevantScope extends Scope { + RelevantScope() { + this.isPublic() and + not this instanceof TestScope and + exists(this.getLocation().getFile().getRelativePath()) + } +} + +/** + * Gets the dotted path of a scope. + */ +string computeScopePath(RelevantScope scope) { + // base case + if scope instanceof Module + then + scope.(Module).isPackageInit() and + result = scope.(Module).getPackageName() + or + not scope.(Module).isPackageInit() and + result = scope.(Module).getName() + else + //recursive cases + if scope instanceof Class or scope instanceof Function + then result = computeScopePath(scope.getEnclosingScope()) + "." + scope.getName() + else result = "unknown: " + scope.toString() +} + +signature predicate modelSig(string type, string path); + +/** + * A utility module for finding models of endpoints. + * + * Chiefly the `hasModel` predicate is used to determine if a scope has a model. + */ +module FindModel { + /** + * Holds if the given scope has a model as identified by the provided predicate `model`. + */ + predicate hasModel(RelevantScope scope) { + exists(string type, string path, string searchPath | model(type, path) | + searchPath = possibleMemberPathPrefix(path, scope.getName()) and + pathToScope(scope, type, searchPath) + ) + } + + /** + * returns the prefix of `path` that might be a path to `member` + */ + bindingset[path, member] + string possibleMemberPathPrefix(string path, string member) { + exists(int index | index = path.indexOf(["Member", "Method"] + "[" + member + "]") | + result = path.prefix(index) + ) + } + + /** + * Holds if `(type,path)` identifies `scope`. + */ + bindingset[type, path] + predicate pathToScope(RelevantScope scope, string type, string path) { + computeScopePath(scope) = + type.replaceAll("!", "") + "." + + path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") + + scope.getName() + } +} diff --git a/python/ql/lib/semmle/python/Scope.qll b/python/ql/lib/semmle/python/Scope.qll index 891e249faf57..4131455299cb 100644 --- a/python/ql/lib/semmle/python/Scope.qll +++ b/python/ql/lib/semmle/python/Scope.qll @@ -85,9 +85,10 @@ class Scope extends Scope_ { this instanceof Module or exists(Module m | m = this.getEnclosingScope() and m.isPublic() | - /* If the module has an __all__, is this in it */ + // The module is implicitly exported not exists(getAModuleExport(m)) or + // The module is explicitly exported getAModuleExport(m) = this.getName() ) or diff --git a/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql b/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql new file mode 100644 index 000000000000..b0af86421f4f --- /dev/null +++ b/python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql @@ -0,0 +1,14 @@ +/** + * @name Fetch endpoints for use in the model editor (framework mode) + * @description A list of endpoints accessible (methods and attributes) for consumers of the library. Excludes test and generated code. + * @kind table + * @id py/utils/modeleditor/framework-mode-endpoints + * @tags modeleditor endpoints framework-mode + */ + +import modeling.ModelEditor + +from Endpoint endpoint +select endpoint, endpoint.getNamespace(), endpoint.getClass(), endpoint.getFunctionName(), + endpoint.getParameters(), endpoint.getSupportedStatus(), endpoint.getFileName(), + endpoint.getSupportedType(), endpoint.getKind() diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.expected b/python/ql/test/modelling/FrameworkModeEndpoints.expected new file mode 100644 index 000000000000..7665b06d4ea4 --- /dev/null +++ b/python/ql/test/modelling/FrameworkModeEndpoints.expected @@ -0,0 +1,22 @@ +| MyPackage/Foo.py:1:1:1:9 | Class C1 | MyPackage | Foo.C1 | | | false | Foo.py | | Class | +| MyPackage/Foo.py:2:5:2:17 | Function m1 | MyPackage | Foo.C1 | m1 | (self) | true | Foo.py | source | InstanceMethod | +| MyPackage/Foo.py:5:5:5:20 | Function m2 | MyPackage | Foo.C1 | m2 | (self,x) | true | Foo.py | source | InstanceMethod | +| MyPackage/Foo.py:9:5:9:14 | Function m3 | MyPackage | Foo.C1 | m3 | (x) | true | Foo.py | summary | StaticMethod | +| MyPackage/Foo.py:13:5:13:19 | Function m4 | MyPackage | Foo.C1 | m4 | (cls,x) | true | Foo.py | summary | ClassMethod | +| MyPackage/Foo.py:16:1:16:13 | Class C2 | MyPackage | Foo.C2 | | | false | Foo.py | | Class | +| MyPackage/Foo.py:17:5:17:17 | Function m1 | MyPackage | Foo.C2 | m1 | (self) | false | Foo.py | | InstanceMethod | +| MyPackage/Foo.py:20:5:20:27 | Function c2only_m1 | MyPackage | Foo.C2 | c2only_m1 | (self,x) | false | Foo.py | | InstanceMethod | +| MyPackage/Foo.py:23:1:23:9 | Class C3 | MyPackage | Foo.C3 | | | false | Foo.py | | Class | +| MyPackage/Foo.py:24:5:24:26 | Function get_C2_instance | MyPackage | Foo.C3 | get_C2_instance | () | false | Foo.py | | InstanceMethod | +| MyPackage/Foo.py:31:1:31:38 | Function top_level_function | MyPackage | Foo | top_level_function | (x,y,z:) | false | Foo.py | | Function | +| MyPackage/Foo.py:34:1:34:42 | Function func_with_fancy_args | MyPackage | Foo | func_with_fancy_args | () | false | Foo.py | | Function | +| MyPackage/ModuleWithAll.py:2:1:2:10 | Class Foo | MyPackage | ModuleWithAll.Foo | | | false | ModuleWithAll.py | | Class | +| MyPackage/ModuleWithAll.py:3:1:3:10 | Class Bar | MyPackage | ModuleWithAll.Bar | | | false | ModuleWithAll.py | | Class | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | NotPackage | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | NotPackage | not_in_pacakge_lib | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib | not_in_pacakge_lib | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function | +| NotPackage/not_in_package_script.py:5:1:5:37 | Function not_in_package_script_func | NotPackage | not_in_package_script | not_in_package_script_func | (x,y) | false | not_in_package_script.py | | Function | +| NotPackage/possibly_lib.py:4:1:4:28 | Function possibly_lib_func | NotPackage | possibly_lib | possibly_lib_func | (x,y) | false | possibly_lib.py | | Function | +| TopLevel.py:3:1:3:38 | Function top_level_function | TopLevel | | top_level_function | (x,y,z:) | false | TopLevel.py | | Function | +| not-valid-package/not_in_pacakge_lib_copy.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib_copy | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib_copy.py | | Function | diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml new file mode 100644 index 000000000000..6942e501b9fa --- /dev/null +++ b/python/ql/test/modelling/FrameworkModeEndpoints.ext.yml @@ -0,0 +1,24 @@ +extensions: + - addsTo: + pack: codeql/python-all + extensible: sourceModel + data: + # Test short form of type column + - ["MyPackage.Foo.C1","Member[m1].ReturnValue","remote"] + # Test long form of type column + - ["MyPackage","Member[Foo].Member[C1].Instance.Member[m2].ReturnValue","remote"] + + - addsTo: + pack: codeql/python-all + extensible: summaryModel + data: + # Test short form of type column + - ["MyPackage.Foo.C1!","Member[m3]","Argument[0]","ReturnValue","value"] + # Test long form of type column + - ["MyPackage","Member[Foo].Member[C1].Member[m4]","Argument[0]","ReturnValue","value"] + + - addsTo: + pack: codeql/python-all + extensible: typeModel + data: + - ["MyPackage.Foo.C2","MyPackage","Member[Foo].Member[C3].Member[get_C2_instance].ReturnValue"] diff --git a/python/ql/test/modelling/FrameworkModeEndpoints.qlref b/python/ql/test/modelling/FrameworkModeEndpoints.qlref new file mode 100644 index 000000000000..5ae87455edd6 --- /dev/null +++ b/python/ql/test/modelling/FrameworkModeEndpoints.qlref @@ -0,0 +1 @@ +utils/modeleditor/FrameworkModeEndpoints.ql \ No newline at end of file diff --git a/python/ql/test/modelling/MyPackage/Foo.py b/python/ql/test/modelling/MyPackage/Foo.py new file mode 100644 index 000000000000..0c21d15861f7 --- /dev/null +++ b/python/ql/test/modelling/MyPackage/Foo.py @@ -0,0 +1,35 @@ +class C1: + def m1(self): + print("C1.m1()") + + def m2(self, x): + return x + + @staticmethod + def m3(x): + return x + + @classmethod + def m4(cls, x): + return x + +class C2(C1): + def m1(self): + print("C2.m1()") + + def c2only_m1(self, x): + return x + +class C3: + def get_C2_instance(): + return C2() + + class C3nested: + def m5(self, x): + return x + +def top_level_function(x, /, y, *, z): + return [x, y, z] + +def func_with_fancy_args(*args, **kwargs): + return args, kwargs \ No newline at end of file diff --git a/python/ql/test/modelling/MyPackage/ModuleWithAll.py b/python/ql/test/modelling/MyPackage/ModuleWithAll.py new file mode 100644 index 000000000000..0543cd308eec --- /dev/null +++ b/python/ql/test/modelling/MyPackage/ModuleWithAll.py @@ -0,0 +1,3 @@ +__all__ = ['Foo'] +class Foo: pass +class Bar: pass \ No newline at end of file diff --git a/python/ql/test/modelling/MyPackage/__init__.py b/python/ql/test/modelling/MyPackage/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py b/python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py new file mode 100644 index 000000000000..d45f86475f40 --- /dev/null +++ b/python/ql/test/modelling/NotPackage/not_in_pacakge_lib.py @@ -0,0 +1,2 @@ +def not_in_pacakge_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/NotPackage/not_in_package_script.py b/python/ql/test/modelling/NotPackage/not_in_package_script.py new file mode 100644 index 000000000000..0eaa723a2948 --- /dev/null +++ b/python/ql/test/modelling/NotPackage/not_in_package_script.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import not_in_pacakge_lib + +def not_in_package_script_func(x, y): + return x + y + +if __name__ == "__main__": + print(not_in_pacakge_lib.not_in_pacakge_lib_func(1, 2)) + print(not_in_package_script_func(3, 4)) diff --git a/python/ql/test/modelling/NotPackage/possibly_lib.py b/python/ql/test/modelling/NotPackage/possibly_lib.py new file mode 100644 index 000000000000..4f3bc882ce6b --- /dev/null +++ b/python/ql/test/modelling/NotPackage/possibly_lib.py @@ -0,0 +1,5 @@ +# model editor should allow modeling the functions defined in this file, even when the +# file is not imported explicitly. + +def possibly_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/TopLevel.py b/python/ql/test/modelling/TopLevel.py new file mode 100644 index 000000000000..605fcab65acb --- /dev/null +++ b/python/ql/test/modelling/TopLevel.py @@ -0,0 +1,11 @@ +from MyPackage import Foo, ModuleWithAll + +def top_level_function(x, /, y, *, z): + return [x, y, z] + +top_level_value = Foo.C1() + +iC2 = Foo.C3.get_C2_instance() + +f = ModuleWithAll.Foo() +b = ModuleWithAll.Bar() \ No newline at end of file diff --git a/python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py b/python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py new file mode 100644 index 000000000000..d45f86475f40 --- /dev/null +++ b/python/ql/test/modelling/not-valid-package/not_in_pacakge_lib_copy.py @@ -0,0 +1,2 @@ +def not_in_pacakge_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py b/python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py new file mode 100644 index 000000000000..22b6bcfd523d --- /dev/null +++ b/python/ql/test/modelling/not-valid-package/not_in_package_script_copy.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import not_in_pacakge_lib_copy + +def not_in_package_script_func(x, y): + return x + y + +if __name__ == "__main__": + print(not_in_pacakge_lib_copy.not_in_pacakge_lib_func(1, 2)) + print(not_in_package_script_func(3, 4)) diff --git a/python/ql/test/modelling/not-valid-package/possibly_lib_copy.py b/python/ql/test/modelling/not-valid-package/possibly_lib_copy.py new file mode 100644 index 000000000000..4f3bc882ce6b --- /dev/null +++ b/python/ql/test/modelling/not-valid-package/possibly_lib_copy.py @@ -0,0 +1,5 @@ +# model editor should allow modeling the functions defined in this file, even when the +# file is not imported explicitly. + +def possibly_lib_func(x, y): + return x + y diff --git a/python/ql/test/modelling/options b/python/ql/test/modelling/options new file mode 100644 index 000000000000..3819071b01cc --- /dev/null +++ b/python/ql/test/modelling/options @@ -0,0 +1 @@ +semmle-extractor-options: -R .