Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JS: Add class harness to recover localFieldStep edges #18302

Draft
wants to merge 4 commits into
base: js/shared-dataflow-branch
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ DataFlow::Node getSynthesizedNode(AstNode node, string tag) {
result = TGenericSynthesizedNode(node, tag, _)
}

DataFlowCallable getSynthesizedCallable(AstNode node, string tag) {
result = MkGenericSynthesizedCallable(node, tag)
}

DataFlowCall getSynthesizedCall(AstNode node, string tag) {
result = MkGenericSynthesizedCall(node, tag, _)
}

/**
* An extension to `AdditionalFlowStep` with additional internal-only predicates.
*/
Expand All @@ -22,6 +30,10 @@ class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep {
*/
predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) { none() }

predicate needsSynthesizedCallable(AstNode node, string tag) { none() }

predicate needsSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { none() }

/**
* Holds if `node` should only permit flow of values stored in `contents`.
*/
Expand All @@ -31,4 +43,10 @@ class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep {
* Holds if `node` should not permit flow of values stored in `contents`.
*/
predicate clearsContent(DataFlow::Node node, DataFlow::ContentSet contents) { none() }

predicate argument(DataFlowCall call, ArgumentPosition pos, DataFlow::Node value) { none() }

predicate postUpdate(DataFlow::Node pre, DataFlow::Node post) { none() }

predicate viableCallable(DataFlowCall call, DataFlowCallable target) { none() }
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ private import sharedlib.FlowSummaryImpl as FlowSummaryImpl
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate
private import semmle.javascript.dataflow.FlowSummary as FlowSummary
private import semmle.javascript.dataflow.internal.BarrierGuards
private import codeql.util.Boolean

class DataFlowSecondLevelScope = Unit;

Expand Down Expand Up @@ -381,6 +382,8 @@ predicate postUpdatePair(Node pre, Node post) {
pre.(FlowSummaryNode).getSummaryNode())
or
VariableCaptureOutput::capturePostUpdateNode(getClosureNode(post), getClosureNode(pre))
or
any(AdditionalFlowInternal f).postUpdate(pre, post)
}

class CastNode extends DataFlow::Node {
Expand All @@ -390,25 +393,42 @@ class CastNode extends DataFlow::Node {
cached
newtype TDataFlowCallable =
MkSourceCallable(StmtContainer container) or
MkLibraryCallable(LibraryCallable callable)
MkLibraryCallable(LibraryCallable callable) or
MkGenericSynthesizedCallable(AstNode node, string tag) {
any(AdditionalFlowInternal f).needsSynthesizedCallable(node, tag)
}

/**
* A callable entity. This is a wrapper around either a `StmtContainer` or a `LibraryCallable`.
* A callable entity.
*/
class DataFlowCallable extends TDataFlowCallable {
/** Gets a string representation of this callable. */
string toString() {
result = this.asSourceCallable().toString()
or
result = this.asLibraryCallable()
or
this.isGenericSynthesizedCallable(_, result)
}

/** Gets the location of this callable, if it is present in the source code. */
Location getLocation() { result = this.asSourceCallable().getLocation() }
Location getLocation() {
result = this.asSourceCallable().getLocation()
or
exists(AstNode node |
this.isGenericSynthesizedCallable(node, _) and
result = node.getLocation()
)
}

/** Gets the corresponding `StmtContainer` if this is a source callable. */
StmtContainer asSourceCallable() { this = MkSourceCallable(result) }

/** Gets the class constructor for which this is a class harness. */
predicate isGenericSynthesizedCallable(AstNode node, string tag) {
this = MkGenericSynthesizedCallable(node, tag)
}

/** Gets the corresponding `StmtContainer` if this is a source callable. */
pragma[nomagic]
StmtContainer asSourceCallableNotExterns() {
Expand Down Expand Up @@ -537,6 +557,8 @@ private predicate isArgumentNodeImpl(Node n, DataFlowCall call, ArgumentPosition
n = TDynamicArgumentArrayNode(invoke) and
pos.isDynamicArgumentArray()
)
or
any(AdditionalFlowInternal f).argument(call, pos, n)
}

predicate isArgumentNode(ArgumentNode n, DataFlowCall call, ArgumentPosition pos) {
Expand Down Expand Up @@ -734,7 +756,7 @@ ContentApprox getContentApprox(Content c) {
}

cached
private newtype TDataFlowCall =
newtype TDataFlowCall =
MkOrdinaryCall(DataFlow::InvokeNode node) or
MkPartialCall(DataFlow::PartialInvokeNode node, DataFlow::Node callback) {
callback = node.getACallbackNode()
Expand All @@ -755,6 +777,9 @@ private newtype TDataFlowCall =
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
} or
MkGenericSynthesizedCall(AstNode node, string tag, DataFlowCallable container) {
any(AdditionalFlowInternal f).needsSynthesizedCall(node, tag, container)
}

private module TotalOrdering {
Expand Down Expand Up @@ -820,6 +845,10 @@ class DataFlowCall extends TDataFlowCall {
this = MkSummaryCall(enclosingCallable, receiver)
}

predicate isGenericSynthesizedCall(AstNode node, string tag, DataFlowCallable container) {
this = MkGenericSynthesizedCall(node, tag, container)
}

Location getLocation() { none() } // Overridden in subclass

int totalorder() {
Expand Down Expand Up @@ -938,6 +967,20 @@ private class ImpliedLambdaCall extends DataFlowCall, MkImpliedLambdaCall {
}
}

class GenericSynthesizedCall extends DataFlowCall, MkGenericSynthesizedCall {
private AstNode node;
private string tag;
private DataFlowCallable container;

GenericSynthesizedCall() { this = MkGenericSynthesizedCall(node, tag, container) }

override string toString() { result = tag }

override Location getLocation() { result = node.getLocation() }

override DataFlowCallable getEnclosingCallable() { result = container }
}

private int getMaxArity() {
// TODO: account for flow summaries
result =
Expand Down Expand Up @@ -1035,6 +1078,8 @@ DataFlowCallable viableCallable(DataFlowCall node) {
)
or
result.asSourceCallableNotExterns() = node.asImpliedLambdaCall()
or
any(AdditionalFlowInternal f).viableCallable(node, result)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ private import Promises
private import Sets
private import Strings
private import DynamicImportStep
private import ClassHarness
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Contains flow for the "class harness", which facilitates flow from constructor to methods in a class.
*/

private import javascript
private import semmle.javascript.dataflow.internal.DataFlowNode
private import semmle.javascript.dataflow.internal.AdditionalFlowInternal
private import semmle.javascript.dataflow.internal.DataFlowPrivate

/**
* Synthesizes a callable for each class, which invokes the class constructor and every
* instance method with the same value of `this`.
*
* This ensures flow between methods in a class when the source originated "within the class",
* but not when the flow into the field came from an argument.
*
* For example:
* ```js
* class C {
* constructor(arg) {
* this.x = sourceOfTaint();
* this.y = arg;
* }
* method() {
* sink(this.x); // sourceOfTaint() flows here
* sink(this.y); // but 'arg' does not flow here (only through real call sites)
* }
* }
* ```
*
* The class harness for a class `C` can roughly be thought of as the following code:
* ```js
* function classHarness() {
* var c = new C();
* while (true) {
* // call an arbitrary instance methods in the loop
* c.arbitraryInstaceMethod();
* }
* }
* ```
*
* This is realized with the following data flow graph:
* ```
* [Call to constructor]
* |
* | post-update for 'this' argument
* V
* [Data flow node] <----------------------+
* | |
* | 'this' argument | post-update for 'this' argument
* V |
* [Call to an instance method] -----------+
* ```
*/
class ClassHarnessModel extends AdditionalFlowInternal {
override predicate needsSynthesizedCallable(AstNode node, string tag) {
node instanceof Function and
not node instanceof ArrowFunctionExpr and // can't be called with 'new'
not node.getTopLevel().isExterns() and // we don't need harnesses in externs
tag = "class-harness"
}

override predicate needsSynthesizedCall(AstNode node, string tag, DataFlowCallable container) {
container = getSynthesizedCallable(node, "class-harness") and
tag = ["class-harness-constructor-call", "class-harness-method-call"]
}

override predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) {
// We synthesize two nodes, but note that `class-harness-constructor-this-arg` never actually has any
// ingoing flow, we just need it to specify which post-update node to use for that argument.
container = getSynthesizedCallable(node, "class-harness") and
tag = ["class-harness-constructor-this-arg", "class-harness-method-this-arg"]
}

override predicate argument(DataFlowCall call, ArgumentPosition pos, DataFlow::Node value) {
pos.isThis() and
exists(Function f |
call = getSynthesizedCall(f, "class-harness-constructor-call") and
value = getSynthesizedNode(f, "class-harness-constructor-this-arg")
or
call = getSynthesizedCall(f, "class-harness-method-call") and
value = getSynthesizedNode(f, "class-harness-method-this-arg")
)
}

override predicate postUpdate(DataFlow::Node pre, DataFlow::Node post) {
exists(Function f |
pre =
getSynthesizedNode(f,
["class-harness-constructor-this-arg", "class-harness-method-this-arg"]) and
post = getSynthesizedNode(f, "class-harness-method-this-arg")
)
}

override predicate viableCallable(DataFlowCall call, DataFlowCallable target) {
exists(DataFlow::ClassNode cls, Function f | f = cls.getConstructor().getFunction() |
call = getSynthesizedCall(f, "class-harness-constructor-call") and
target.asSourceCallable() = f
or
call = getSynthesizedCall(f, "class-harness-method-call") and
target.asSourceCallable() = cls.getAnInstanceMember().getFunction()
)
}
}
40 changes: 40 additions & 0 deletions javascript/ql/test/library-tests/TripleDot/class-harness.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import 'dummy';

function h1() {
class C {
constructor(arg) {
this.x = source("h1.1")
}
method() {
sink(this.x); // $ hasValueFlow=h1.1
}
}
}

function h2() {
class C {
method1() {
this.x = source("h2.1")
}
method2() {
sink(this.x); // $ hasValueFlow=h2.1
}
}
}

function h3() {
class C {
constructor(arg) {
this.x = arg;
}
method1() {
sink(this.x); // $ hasValueFlow=h3.2
}
method2() {
sink(this.x); // $ hasValueFlow=h3.3
}
}
new C(source("h3.1"));
new C(source("h3.2")).method1();
new C(source("h3.3")).method2();
}
Loading