diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll index d7f92ce8dd30..c162491590d9 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/AdditionalFlowInternal.qll @@ -9,6 +9,14 @@ DataFlow::Node getSynthesizedNode(AstNode node, string tag) { result = TGenericSynthesizedNode(node, tag, _) } +DataFlowCallable getSynthesizedCallable(AstNode node, string tag) { + result = MkGenericSynthesizedCallable(node, tag) +} + +DataFlowCall getSynthesizedCall(AstNode node, string tag) { + result = MkGenericSynthesizedCall(node, tag, _) +} + /** * An extension to `AdditionalFlowStep` with additional internal-only predicates. */ @@ -22,6 +30,10 @@ class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep { */ predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) { none() } + predicate needsSynthesizedCallable(AstNode node, string tag) { none() } + + predicate needsSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { none() } + /** * Holds if `node` should only permit flow of values stored in `contents`. */ @@ -31,4 +43,10 @@ class AdditionalFlowInternal extends DataFlow::AdditionalFlowStep { * Holds if `node` should not permit flow of values stored in `contents`. */ predicate clearsContent(DataFlow::Node node, DataFlow::ContentSet contents) { none() } + + predicate argument(DataFlowCall call, ArgumentPosition pos, DataFlow::Node value) { none() } + + predicate postUpdate(DataFlow::Node pre, DataFlow::Node post) { none() } + + predicate viableCallable(DataFlowCall call, DataFlowCallable target) { none() } } diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll index 72de0f5c0458..f898d1c0bd12 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll @@ -13,6 +13,7 @@ private import sharedlib.FlowSummaryImpl as FlowSummaryImpl private import semmle.javascript.dataflow.internal.FlowSummaryPrivate as FlowSummaryPrivate private import semmle.javascript.dataflow.FlowSummary as FlowSummary private import semmle.javascript.dataflow.internal.BarrierGuards +private import codeql.util.Boolean class DataFlowSecondLevelScope = Unit; @@ -381,6 +382,8 @@ predicate postUpdatePair(Node pre, Node post) { pre.(FlowSummaryNode).getSummaryNode()) or VariableCaptureOutput::capturePostUpdateNode(getClosureNode(post), getClosureNode(pre)) + or + any(AdditionalFlowInternal f).postUpdate(pre, post) } class CastNode extends DataFlow::Node { @@ -390,10 +393,13 @@ class CastNode extends DataFlow::Node { cached newtype TDataFlowCallable = MkSourceCallable(StmtContainer container) or - MkLibraryCallable(LibraryCallable callable) + MkLibraryCallable(LibraryCallable callable) or + MkGenericSynthesizedCallable(AstNode node, string tag) { + any(AdditionalFlowInternal f).needsSynthesizedCallable(node, tag) + } /** - * A callable entity. This is a wrapper around either a `StmtContainer` or a `LibraryCallable`. + * A callable entity. */ class DataFlowCallable extends TDataFlowCallable { /** Gets a string representation of this callable. */ @@ -401,14 +407,28 @@ class DataFlowCallable extends TDataFlowCallable { result = this.asSourceCallable().toString() or result = this.asLibraryCallable() + or + this.isGenericSynthesizedCallable(_, result) } /** Gets the location of this callable, if it is present in the source code. */ - Location getLocation() { result = this.asSourceCallable().getLocation() } + Location getLocation() { + result = this.asSourceCallable().getLocation() + or + exists(AstNode node | + this.isGenericSynthesizedCallable(node, _) and + result = node.getLocation() + ) + } /** Gets the corresponding `StmtContainer` if this is a source callable. */ StmtContainer asSourceCallable() { this = MkSourceCallable(result) } + /** Gets the class constructor for which this is a class harness. */ + predicate isGenericSynthesizedCallable(AstNode node, string tag) { + this = MkGenericSynthesizedCallable(node, tag) + } + /** Gets the corresponding `StmtContainer` if this is a source callable. */ pragma[nomagic] StmtContainer asSourceCallableNotExterns() { @@ -537,6 +557,8 @@ private predicate isArgumentNodeImpl(Node n, DataFlowCall call, ArgumentPosition n = TDynamicArgumentArrayNode(invoke) and pos.isDynamicArgumentArray() ) + or + any(AdditionalFlowInternal f).argument(call, pos, n) } predicate isArgumentNode(ArgumentNode n, DataFlowCall call, ArgumentPosition pos) { @@ -734,7 +756,7 @@ ContentApprox getContentApprox(Content c) { } cached -private newtype TDataFlowCall = +newtype TDataFlowCall = MkOrdinaryCall(DataFlow::InvokeNode node) or MkPartialCall(DataFlow::PartialInvokeNode node, DataFlow::Node callback) { callback = node.getACallbackNode() @@ -755,6 +777,9 @@ private newtype TDataFlowCall = FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver ) { FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) + } or + MkGenericSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { + any(AdditionalFlowInternal f).needsSynthesizedCall(node, tag, container) } private module TotalOrdering { @@ -820,6 +845,10 @@ class DataFlowCall extends TDataFlowCall { this = MkSummaryCall(enclosingCallable, receiver) } + predicate isGenericSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { + this = MkGenericSynthesizedCall(node, tag, container) + } + Location getLocation() { none() } // Overridden in subclass int totalorder() { @@ -938,6 +967,20 @@ private class ImpliedLambdaCall extends DataFlowCall, MkImpliedLambdaCall { } } +class GenericSynthesizedCall extends DataFlowCall, MkGenericSynthesizedCall { + private AstNode node; + private string tag; + private DataFlowCallable container; + + GenericSynthesizedCall() { this = MkGenericSynthesizedCall(node, tag, container) } + + override string toString() { result = tag } + + override Location getLocation() { result = node.getLocation() } + + override DataFlowCallable getEnclosingCallable() { result = container } +} + private int getMaxArity() { // TODO: account for flow summaries result = @@ -1035,6 +1078,8 @@ DataFlowCallable viableCallable(DataFlowCall node) { ) or result.asSourceCallableNotExterns() = node.asImpliedLambdaCall() + or + any(AdditionalFlowInternal f).viableCallable(node, result) } /** diff --git a/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll index 5935fa8bfd60..21538ef58a22 100644 --- a/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll +++ b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/AllFlowSummaries.qll @@ -11,3 +11,4 @@ private import Promises private import Sets private import Strings private import DynamicImportStep +private import ClassHarness diff --git a/javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll new file mode 100644 index 000000000000..2a8bb2580f78 --- /dev/null +++ b/javascript/ql/lib/semmle/javascript/internal/flow_summaries/ClassHarness.qll @@ -0,0 +1,104 @@ +/** + * Contains flow for the "class harness", which facilitates flow from constructor to methods in a class. + */ + +private import javascript +private import semmle.javascript.dataflow.internal.DataFlowNode +private import semmle.javascript.dataflow.internal.AdditionalFlowInternal +private import semmle.javascript.dataflow.internal.DataFlowPrivate + +/** + * Synthesizes a callable for each class, which invokes the class constructor and every + * instance method with the same value of `this`. + * + * This ensures flow between methods in a class when the source originated "within the class", + * but not when the flow into the field came from an argument. + * + * For example: + * ```js + * class C { + * constructor(arg) { + * this.x = sourceOfTaint(); + * this.y = arg; + * } + * method() { + * sink(this.x); // sourceOfTaint() flows here + * sink(this.y); // but 'arg' does not flow here (only through real call sites) + * } + * } + * ``` + * + * The class harness for a class `C` can roughly be thought of as the following code: + * ```js + * function classHarness() { + * var c = new C(); + * while (true) { + * // call an arbitrary instance methods in the loop + * c.arbitraryInstaceMethod(); + * } + * } + * ``` + * + * This is realized with the following data flow graph: + * ``` + * [Call to constructor] + * | + * | post-update for 'this' argument + * V + * [Data flow node] <----------------------+ + * | | + * | 'this' argument | post-update for 'this' argument + * V | + * [Call to an instance method] -----------+ + * ``` + */ +class ClassHarnessModel extends AdditionalFlowInternal { + override predicate needsSynthesizedCallable(AstNode node, string tag) { + node instanceof Function and + not node instanceof ArrowFunctionExpr and // can't be called with 'new' + not node.getTopLevel().isExterns() and // we don't need harnesses in externs + tag = "class-harness" + } + + override predicate needsSynthesizedCall(AstNode node, string tag, DataFlowCallable container) { + container = getSynthesizedCallable(node, "class-harness") and + tag = ["class-harness-constructor-call", "class-harness-method-call"] + } + + override predicate needsSynthesizedNode(AstNode node, string tag, DataFlowCallable container) { + // We synthesize two nodes, but note that `class-harness-constructor-this-arg` never actually has any + // ingoing flow, we just need it to specify which post-update node to use for that argument. + container = getSynthesizedCallable(node, "class-harness") and + tag = ["class-harness-constructor-this-arg", "class-harness-method-this-arg"] + } + + override predicate argument(DataFlowCall call, ArgumentPosition pos, DataFlow::Node value) { + pos.isThis() and + exists(Function f | + call = getSynthesizedCall(f, "class-harness-constructor-call") and + value = getSynthesizedNode(f, "class-harness-constructor-this-arg") + or + call = getSynthesizedCall(f, "class-harness-method-call") and + value = getSynthesizedNode(f, "class-harness-method-this-arg") + ) + } + + override predicate postUpdate(DataFlow::Node pre, DataFlow::Node post) { + exists(Function f | + pre = + getSynthesizedNode(f, + ["class-harness-constructor-this-arg", "class-harness-method-this-arg"]) and + post = getSynthesizedNode(f, "class-harness-method-this-arg") + ) + } + + override predicate viableCallable(DataFlowCall call, DataFlowCallable target) { + exists(DataFlow::ClassNode cls, Function f | f = cls.getConstructor().getFunction() | + call = getSynthesizedCall(f, "class-harness-constructor-call") and + target.asSourceCallable() = f + or + call = getSynthesizedCall(f, "class-harness-method-call") and + target.asSourceCallable() = cls.getAnInstanceMember().getFunction() + ) + } +} diff --git a/javascript/ql/test/library-tests/TripleDot/class-harness.js b/javascript/ql/test/library-tests/TripleDot/class-harness.js new file mode 100644 index 000000000000..87a232fd9eee --- /dev/null +++ b/javascript/ql/test/library-tests/TripleDot/class-harness.js @@ -0,0 +1,40 @@ +import 'dummy'; + +function h1() { + class C { + constructor(arg) { + this.x = source("h1.1") + } + method() { + sink(this.x); // $ hasValueFlow=h1.1 + } + } +} + +function h2() { + class C { + method1() { + this.x = source("h2.1") + } + method2() { + sink(this.x); // $ hasValueFlow=h2.1 + } + } +} + +function h3() { + class C { + constructor(arg) { + this.x = arg; + } + method1() { + sink(this.x); // $ hasValueFlow=h3.2 + } + method2() { + sink(this.x); // $ hasValueFlow=h3.3 + } + } + new C(source("h3.1")); + new C(source("h3.2")).method1(); + new C(source("h3.3")).method2(); +}