Skip to content

Commit

Permalink
Merge pull request #15356 from github/max-schaefer/automodel-void-sou…
Browse files Browse the repository at this point in the history
…rce-candidates

Automodel: Switch tests to inline expectations
  • Loading branch information
max-schaefer authored Jan 22, 2024
2 parents 0a8869c + 99c9914 commit 5c43a0b
Show file tree
Hide file tree
Showing 34 changed files with 440 additions and 260 deletions.
123 changes: 92 additions & 31 deletions java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,12 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
predicate isSanitizer(Endpoint e, EndpointType t) {
exists(t) and
(
e.asNode().getType() instanceof BoxedType
or
e.asNode().getType() instanceof PrimitiveType
or
e.asNode().getType() instanceof NumberType
)
AutomodelJavaUtil::isUnexploitableType([
// for most endpoints, we can get the type from the node
e.asNode().getType(),
// but not for calls to void methods, where we need to go via the AST
e.asTop().(Expr).getType()
])
or
t instanceof AutomodelEndpointTypes::PathInjectionSinkType and
e.asNode() instanceof PathSanitizer::PathInjectionSanitizer
Expand Down Expand Up @@ -372,62 +371,124 @@ class ApplicationModeMetadataExtractor extends string {
}
}

/**
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
*
* The other parameters record various other properties of interest.
*/
predicate isCandidate(
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
) {
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
}

/**
* Holds if the given `endpoint` is a negative example for the `extensibleType`
* because of the `characteristic`.
*
* The other parameters record various other properties of interest.
*/
predicate isNegativeExample(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string isVarargsArray, string extensibleType
) {
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(AutomodelEndpointTypes::EndpointType tp |
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}

/**
* Holds if the given `endpoint` is a positive example for the `endpointType`.
*
* The other parameters record various other properties of interest.
*/
predicate isPositiveExample(
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
string signature, string input, string output, string isVarargsArray, string extensibleType
) {
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
}

/*
* EndpointCharacteristic classes that are specific to Automodel for Java.
*/

/**
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
* and its return value should not be considered a source.
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks.
*
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
*
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
*/
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }

override predicate appliesToEndpoint(Endpoint e) {
e.getCallable().getName().matches("is%") and
e.getCallable().getReturnType() instanceof BooleanType and
(
e.getExtensibleType() = "sinkModel" and
not ApplicationCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not ApplicationCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
not ApplicationCandidatesImpl::isSink(e, _, _)
}
}

/**
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
* considered sinks, and its return value should not be considered a source.
* considered sinks.
*
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
*/
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }

override predicate appliesToEndpoint(Endpoint e) {
exists(Callable callable |
callable = e.getCallable() and
exists(Callable callable | callable = e.getCallable() |
callable.getName().toLowerCase() = ["exists", "notexists"] and
callable.getReturnType() instanceof BooleanType
|
e.getExtensibleType() = "sinkModel" and
not ApplicationCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not ApplicationCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}
Expand Down
24 changes: 6 additions & 18 deletions java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql
Original file line number Diff line number Diff line change
Expand Up @@ -55,27 +55,15 @@ private Endpoint getSampleForSignature(
}

from
Endpoint endpoint, ApplicationModeMetadataExtractor meta, DollarAtString package,
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
DollarAtString input, DollarAtString output, DollarAtString isVarargsArray,
DollarAtString alreadyAiModeled, DollarAtString extensibleType
Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes,
DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString alreadyAiModeled, DollarAtString extensibleType
where
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
CharacteristicsImpl::isCandidate(endpoint, _) and
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray,
extensibleType, alreadyAiModeled) and
endpoint =
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
isVarargsArray, extensibleType, alreadyAiModeled) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
includeAutomodelCandidate(package, type, name, signature)
isVarargsArray, extensibleType, alreadyAiModeled)
select endpoint.asNode(),
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,45 +40,15 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
)
}

predicate candidate(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string isVarargsArray, string extensibleType
) {
// the node is known not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}

from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
endpoint = getSampleForCharacteristic(characteristic, 100) and
candidate(endpoint, characteristic, confidence, package, type, subtypes, name, signature, input,
output, isVarargsArray, extensibleType) and
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
input, output, isVarargsArray, extensibleType) and
message = characteristic
select endpoint.asNode(),
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@ from
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output,
isVarargsArray, extensibleType)
select endpoint.asNode(),
endpointType + "\nrelated locations: $@, $@, $@." +
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
Expand Down
97 changes: 84 additions & 13 deletions java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ newtype JavaRelatedLocationType =
newtype TFrameworkModeEndpoint =
TExplicitParameter(Parameter p) {
AutomodelJavaUtil::isFromSource(p) and
not p.getType() instanceof PrimitiveType and
not p.getType() instanceof BoxedType and
not p.getType() instanceof NumberType
not AutomodelJavaUtil::isUnexploitableType(p.getType())
} or
TQualifier(Callable c) { AutomodelJavaUtil::isFromSource(c) and not c instanceof Constructor } or
TReturnValue(Callable c) {
Expand All @@ -36,25 +34,19 @@ newtype TFrameworkModeEndpoint =
or
AutomodelJavaUtil::isFromSource(c) and
c instanceof Method and
(
not c.getReturnType() instanceof VoidType and
not c.getReturnType() instanceof PrimitiveType
)
not AutomodelJavaUtil::isUnexploitableType(c.getReturnType())
} or
TOverridableParameter(Method m, Parameter p) {
AutomodelJavaUtil::isFromSource(p) and
not AutomodelJavaUtil::isUnexploitableType(p.getType()) and
p.getCallable() = m and
m instanceof ModelExclusions::ModelApi and
not m.getDeclaringType().isFinal() and
not m.isFinal() and
not m.isStatic()
AutomodelJavaUtil::isOverridable(m)
} or
TOverridableQualifier(Method m) {
AutomodelJavaUtil::isFromSource(m) and
m instanceof ModelExclusions::ModelApi and
not m.getDeclaringType().isFinal() and
not m.isFinal() and
not m.isStatic()
AutomodelJavaUtil::isOverridable(m)
}

/**
Expand Down Expand Up @@ -317,6 +309,85 @@ class FrameworkModeMetadataExtractor extends string {
}
}

/**
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
*
* The other parameters record various other properties of interest.
*/
predicate isCandidate(
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
) {
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
}

/**
* Holds if the given `endpoint` is a negative example for the `extensibleType`
* because of the `characteristic`.
*
* The other parameters record various other properties of interest.
*/
predicate isNegativeExample(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string parameterName, string extensibleType
) {
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(AutomodelEndpointTypes::EndpointType tp |
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}

/**
* Holds if the given `endpoint` is a positive example for the `endpointType`.
*
* The other parameters record various other properties of interest.
*/
predicate isPositiveExample(
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
string signature, string input, string output, string parameterName, string extensibleType
) {
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
}

/*
* EndpointCharacteristic classes that are specific to Automodel for Java.
*/
Expand Down
Loading

0 comments on commit 5c43a0b

Please sign in to comment.