Skip to content

Commit

Permalink
Merge pull request #15655 from yoff/python/support-model-editor
Browse files Browse the repository at this point in the history
Python: Support model editor
  • Loading branch information
RasmusWL authored Jul 2, 2024
2 parents 8e8100f + dc33f0d commit ce177c3
Show file tree
Hide file tree
Showing 18 changed files with 468 additions and 1 deletion.
246 changes: 246 additions & 0 deletions python/ql/lib/modeling/ModelEditor.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
/** Provides classes and predicates related to handling APIs for the VS Code extension. */

private import python
private import semmle.python.frameworks.data.ModelsAsData
private import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DP
private import Util as Util

/**
* An string describing the kind of source code element being modeled.
*
* See `EndPoint`.
*/
class EndpointKind extends string {
EndpointKind() {
this in ["Function", "InstanceMethod", "ClassMethod", "StaticMethod", "InitMethod", "Class"]
}
}

/**
* An element of the source code to be modeled.
*
* See `EndPointKind` for the possible kinds of elements.
*/
abstract class Endpoint instanceof Util::RelevantScope {
string namespace;
string type;
string name;

Endpoint() {
exists(string scopePath, string path, int pathIndex |
scopePath = Util::computeScopePath(this) and
pathIndex = scopePath.indexOf(".", 0, 0)
|
namespace = scopePath.prefix(pathIndex) and
path = scopePath.suffix(pathIndex + 1) and
(
exists(int nameIndex | nameIndex = max(path.indexOf(".")) |
type = path.prefix(nameIndex) and
name = path.suffix(nameIndex + 1)
)
or
not exists(path.indexOf(".")) and
type = "" and
name = path
)
)
}

/** Gets the namespace for this endpoint. This will typically be the package in which it is found. */
string getNamespace() { result = namespace }

/** Gets hte basename of the file where this endpoint is found. */
string getFileName() { result = super.getLocation().getFile().getBaseName() }

/** Gets a string representation of this endpoint. */
string toString() { result = super.toString() }

/** Gets the location of this endpoint. */
Location getLocation() { result = super.getLocation() }

/** Gets the name of the class in which this endpoint is found, or the empty string if it is not found inside a class. */
string getClass() { result = type }

/**
* Gets the name of the endpoint if it is not a class, or the empty string if it is a class
*
* If this endpoint is a class, the class name can be obtained via `getType`.
*/
string getFunctionName() { result = name }

/**
* Gets a string representation of the parameters of this endpoint.
*
* The string follows a specific format:
* - Normal parameters(where arguments can be passed as either positional or keyword) are listed in order, separated by commas.
* - Keyword-only parameters are listed in order, separated by commas, each followed by a colon.
* - In the future, positional-only parameters will be listed in order, separated by commas, each followed by a slash.
*/
abstract string getParameters();

/**
* Gets a boolean that is true iff this endpoint is supported by existing modeling.
*
* The check only takes Models as Data extension models into account.
*/
abstract boolean getSupportedStatus();

/**
* Gets a string that describes the type of support detected this endpoint.
*
* The string can be one of the following:
* - "source" if this endpoint is a known source.
* - "sink" if this endpoint is a known sink.
* - "summary" if this endpoint has a flow summary.
* - "neutral" if this endpoint is a known neutral.
* - "" if this endpoint is not detected as supported.
*/
abstract string getSupportedType();

/** Gets the kind of this endpoint. See `EndPointKind`. */
abstract EndpointKind getKind();
}

private predicate sourceModelPath(string type, string path) { sourceModel(type, path, _, _) }

module FindSourceModel = Util::FindModel<sourceModelPath/2>;

private predicate sinkModelPath(string type, string path) { sinkModel(type, path, _, _) }

module FindSinkModel = Util::FindModel<sinkModelPath/2>;

private predicate summaryModelPath(string type, string path) {
summaryModel(type, path, _, _, _, _)
}

module FindSummaryModel = Util::FindModel<summaryModelPath/2>;

private predicate neutralModelPath(string type, string path) { neutralModel(type, path, _) }

module FindNeutralModel = Util::FindModel<neutralModelPath/2>;

/**
* A callable function or method from source code.
*/
class FunctionEndpoint extends Endpoint instanceof Function {
/**
* Gets the parameter types of this endpoint.
*/
override string getParameters() {
// For now, return the names of positional and keyword parameters. We don't always have type information, so we can't return type names.
// We don't yet handle splat params or dict splat params.
//
// In Python, there are three types of parameters:
// 1. Positional-only parameters: These are parameters that can only be passed by position and not by keyword.
// 2. Positional-or-keyword parameters: These are parameters that can be passed by position or by keyword.
// 3. Keyword-only parameters: These are parameters that can only be passed by keyword.
//
// The syntax for defining these parameters is as follows:
// ```python
// def f(a, /, b, *, c):
// pass
// ```
// In this example, `a` is a positional-only parameter, `b` is a positional-or-keyword parameter, and `c` is a keyword-only parameter.
//
// We handle positional-only parameters by adding a "/" to the parameter name, reminiscient of the syntax above.
// Note that we don't yet have information about positional-only parameters.
// We handle keyword-only parameters by adding a ":" to the parameter name, to be consistent with the MaD syntax and the other languages.
exists(int nrPosOnly, Function f |
f = this and
nrPosOnly = f.getPositionalParameterCount()
|
result =
"(" +
concat(string key, string value |
// TODO: Once we have information about positional-only parameters:
// Handle positional-only parameters by adding a "/"
value = any(int i | i.toString() = key | f.getArgName(i))
or
exists(Name param | param = f.getAKeywordOnlyArg() |
param.getId() = key and
value = key + ":"
)
|
value, "," order by key
) + ")"
)
}

/** Holds if this API has a supported summary. */
pragma[nomagic]
predicate hasSummary() { FindSummaryModel::hasModel(this) }

/** Holds if this API is a known source. */
pragma[nomagic]
predicate isSource() { FindSourceModel::hasModel(this) }

/** Holds if this API is a known sink. */
pragma[nomagic]
predicate isSink() { FindSinkModel::hasModel(this) }

/** Holds if this API is a known neutral. */
pragma[nomagic]
predicate isNeutral() { FindNeutralModel::hasModel(this) }

/**
* Holds if this API is supported by existing CodeQL libraries, that is, it is either a
* recognized source, sink or neutral or it has a flow summary.
*/
predicate isSupported() {
this.hasSummary() or this.isSource() or this.isSink() or this.isNeutral()
}

override boolean getSupportedStatus() {
if this.isSupported() then result = true else result = false
}

override string getSupportedType() {
this.isSink() and result = "sink"
or
this.isSource() and result = "source"
or
this.hasSummary() and result = "summary"
or
this.isNeutral() and result = "neutral"
or
not this.isSupported() and result = ""
}

override EndpointKind getKind() {
if this.(Function).isMethod()
then
result = this.methodKind()
or
not exists(this.methodKind()) and result = "InstanceMethod"
else result = "Function"
}

private EndpointKind methodKind() {
this.(Function).isMethod() and
(
DP::isClassmethod(this) and result = "ClassMethod"
or
DP::isStaticmethod(this) and result = "StaticMethod"
or
this.(Function).isInitMethod() and result = "InitMethod"
)
}
}

/**
* A class from source code.
*/
class ClassEndpoint extends Endpoint instanceof Class {
override string getClass() { result = type + "." + name }

override string getFunctionName() { result = "" }

override string getParameters() { result = "" }

override boolean getSupportedStatus() { result = false }

override string getSupportedType() { result = "" }

override EndpointKind getKind() { result = "Class" }
}
75 changes: 75 additions & 0 deletions python/ql/lib/modeling/Util.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/**
* Contains utility methods and classes to assist with generating data extensions models.
*/

private import python
private import semmle.python.ApiGraphs
private import semmle.python.filters.Tests

/** A class to represent scopes that the user might want to model. */
class RelevantScope extends Scope {
RelevantScope() {
this.isPublic() and
not this instanceof TestScope and
exists(this.getLocation().getFile().getRelativePath())
}
}

/**
* Gets the dotted path of a scope.
*/
string computeScopePath(RelevantScope scope) {
// base case
if scope instanceof Module
then
scope.(Module).isPackageInit() and
result = scope.(Module).getPackageName()
or
not scope.(Module).isPackageInit() and
result = scope.(Module).getName()
else
//recursive cases
if scope instanceof Class or scope instanceof Function
then result = computeScopePath(scope.getEnclosingScope()) + "." + scope.getName()
else result = "unknown: " + scope.toString()
}

signature predicate modelSig(string type, string path);

/**
* A utility module for finding models of endpoints.
*
* Chiefly the `hasModel` predicate is used to determine if a scope has a model.
*/
module FindModel<modelSig/2 model> {
/**
* Holds if the given scope has a model as identified by the provided predicate `model`.
*/
predicate hasModel(RelevantScope scope) {
exists(string type, string path, string searchPath | model(type, path) |
searchPath = possibleMemberPathPrefix(path, scope.getName()) and
pathToScope(scope, type, searchPath)
)
}

/**
* returns the prefix of `path` that might be a path to `member`
*/
bindingset[path, member]
string possibleMemberPathPrefix(string path, string member) {
exists(int index | index = path.indexOf(["Member", "Method"] + "[" + member + "]") |
result = path.prefix(index)
)
}

/**
* Holds if `(type,path)` identifies `scope`.
*/
bindingset[type, path]
predicate pathToScope(RelevantScope scope, string type, string path) {
computeScopePath(scope) =
type.replaceAll("!", "") + "." +
path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") +
scope.getName()
}
}
3 changes: 2 additions & 1 deletion python/ql/lib/semmle/python/Scope.qll
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ class Scope extends Scope_ {
this instanceof Module
or
exists(Module m | m = this.getEnclosingScope() and m.isPublic() |
/* If the module has an __all__, is this in it */
// The module is implicitly exported
not exists(getAModuleExport(m))
or
// The module is explicitly exported
getAModuleExport(m) = this.getName()
)
or
Expand Down
14 changes: 14 additions & 0 deletions python/ql/src/utils/modeleditor/FrameworkModeEndpoints.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* @name Fetch endpoints for use in the model editor (framework mode)
* @description A list of endpoints accessible (methods and attributes) for consumers of the library. Excludes test and generated code.
* @kind table
* @id py/utils/modeleditor/framework-mode-endpoints
* @tags modeleditor endpoints framework-mode
*/

import modeling.ModelEditor

from Endpoint endpoint
select endpoint, endpoint.getNamespace(), endpoint.getClass(), endpoint.getFunctionName(),
endpoint.getParameters(), endpoint.getSupportedStatus(), endpoint.getFileName(),
endpoint.getSupportedType(), endpoint.getKind()
22 changes: 22 additions & 0 deletions python/ql/test/modelling/FrameworkModeEndpoints.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
| MyPackage/Foo.py:1:1:1:9 | Class C1 | MyPackage | Foo.C1 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:2:5:2:17 | Function m1 | MyPackage | Foo.C1 | m1 | (self) | true | Foo.py | source | InstanceMethod |
| MyPackage/Foo.py:5:5:5:20 | Function m2 | MyPackage | Foo.C1 | m2 | (self,x) | true | Foo.py | source | InstanceMethod |
| MyPackage/Foo.py:9:5:9:14 | Function m3 | MyPackage | Foo.C1 | m3 | (x) | true | Foo.py | summary | StaticMethod |
| MyPackage/Foo.py:13:5:13:19 | Function m4 | MyPackage | Foo.C1 | m4 | (cls,x) | true | Foo.py | summary | ClassMethod |
| MyPackage/Foo.py:16:1:16:13 | Class C2 | MyPackage | Foo.C2 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:17:5:17:17 | Function m1 | MyPackage | Foo.C2 | m1 | (self) | false | Foo.py | | InstanceMethod |
| MyPackage/Foo.py:20:5:20:27 | Function c2only_m1 | MyPackage | Foo.C2 | c2only_m1 | (self,x) | false | Foo.py | | InstanceMethod |
| MyPackage/Foo.py:23:1:23:9 | Class C3 | MyPackage | Foo.C3 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:24:5:24:26 | Function get_C2_instance | MyPackage | Foo.C3 | get_C2_instance | () | false | Foo.py | | InstanceMethod |
| MyPackage/Foo.py:31:1:31:38 | Function top_level_function | MyPackage | Foo | top_level_function | (x,y,z:) | false | Foo.py | | Function |
| MyPackage/Foo.py:34:1:34:42 | Function func_with_fancy_args | MyPackage | Foo | func_with_fancy_args | () | false | Foo.py | | Function |
| MyPackage/ModuleWithAll.py:2:1:2:10 | Class Foo | MyPackage | ModuleWithAll.Foo | | | false | ModuleWithAll.py | | Class |
| MyPackage/ModuleWithAll.py:3:1:3:10 | Class Bar | MyPackage | ModuleWithAll.Bar | | | false | ModuleWithAll.py | | Class |
| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | NotPackage | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function |
| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | NotPackage | not_in_pacakge_lib | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function |
| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function |
| NotPackage/not_in_pacakge_lib.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib | not_in_pacakge_lib | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib.py | | Function |
| NotPackage/not_in_package_script.py:5:1:5:37 | Function not_in_package_script_func | NotPackage | not_in_package_script | not_in_package_script_func | (x,y) | false | not_in_package_script.py | | Function |
| NotPackage/possibly_lib.py:4:1:4:28 | Function possibly_lib_func | NotPackage | possibly_lib | possibly_lib_func | (x,y) | false | possibly_lib.py | | Function |
| TopLevel.py:3:1:3:38 | Function top_level_function | TopLevel | | top_level_function | (x,y,z:) | false | TopLevel.py | | Function |
| not-valid-package/not_in_pacakge_lib_copy.py:1:1:1:34 | Function not_in_pacakge_lib_func | not_in_pacakge_lib_copy | | not_in_pacakge_lib_func | (x,y) | false | not_in_pacakge_lib_copy.py | | Function |
24 changes: 24 additions & 0 deletions python/ql/test/modelling/FrameworkModeEndpoints.ext.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sourceModel
data:
# Test short form of type column
- ["MyPackage.Foo.C1","Member[m1].ReturnValue","remote"]
# Test long form of type column
- ["MyPackage","Member[Foo].Member[C1].Instance.Member[m2].ReturnValue","remote"]

- addsTo:
pack: codeql/python-all
extensible: summaryModel
data:
# Test short form of type column
- ["MyPackage.Foo.C1!","Member[m3]","Argument[0]","ReturnValue","value"]
# Test long form of type column
- ["MyPackage","Member[Foo].Member[C1].Member[m4]","Argument[0]","ReturnValue","value"]

- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ["MyPackage.Foo.C2","MyPackage","Member[Foo].Member[C3].Member[get_C2_instance].ReturnValue"]
Loading

0 comments on commit ce177c3

Please sign in to comment.