From ebca2f3287c95f90d898fc0ff0169bce8281f1e9 Mon Sep 17 00:00:00 2001 From: Sergey Morozov Date: Fri, 22 Dec 2023 18:19:54 +0300 Subject: [PATCH] [feat] Adds support for reporting errors in SARIF format: * Introduces `CodeEvent` as a base unit for storing information about event in code. * `CodeEvent`s are managed with `EventRecorder`, capable of serializing recorded trace. * `termianateStateOnProgramError` receives an `ErrorEvent` object contatining all required information about error. [feat] Enhances `gepExprBases`: * Bases for addresses stored for constant expressions. * Precalculates bases for `llvm::ConstantExpr` (i.e. for `getElementPtr` in arguments of instructions). [perf] Removes checks on `baseInBounds` during memory operations. [fix] Adds hacks for managing objects with neighboring addresses (in some cases `gepExprBases` could assume that the beginning of one object is the end of another). [fix] Fixes ODR violation with `llvm::APFloat::RoundingMode`. --- include/klee/ADT/ImmutableList.h | 8 +- include/klee/Core/Interpreter.h | 7 + include/klee/Expr/SourceBuilder.h | 4 +- include/klee/Module/KModule.h | 2 + include/klee/Module/LocationInfo.h | 40 +- include/klee/Module/SarifReport.h | 38 +- lib/Core/CMakeLists.txt | 2 + lib/Core/CodeEvent.h | 203 ++++++++++ lib/Core/CodeLocation.cpp | 47 +++ lib/Core/CodeLocation.h | 82 ++++ lib/Core/EventRecorder.cpp | 93 +++++ lib/Core/EventRecorder.h | 72 ++++ lib/Core/ExecutionState.cpp | 9 +- lib/Core/ExecutionState.h | 33 ++ lib/Core/Executor.cpp | 382 +++++++++++++----- lib/Core/Executor.h | 27 +- lib/Core/ExecutorUtil.cpp | 15 + lib/Core/ExternalDispatcher.h | 2 +- lib/Core/Memory.cpp | 20 +- lib/Core/Memory.h | 11 +- lib/Core/MemoryManager.cpp | 4 +- lib/Core/MemoryManager.h | 5 +- lib/Core/SpecialFunctionHandler.cpp | 40 +- lib/Expr/APFloatEval.cpp | 1 + lib/Expr/Expr.cpp | 1 + lib/Module/CMakeLists.txt | 2 + lib/Module/KInstruction.cpp | 2 +- lib/Module/KModule.cpp | 8 + lib/Module/LocalVarDeclarationFinderPass.cpp | 48 +++ lib/Module/LocationInfo.cpp | 45 ++- lib/Module/Passes.h | 40 ++ lib/Module/ReturnLocationFinderPass.cpp | 91 +++++ lib/Module/SarifReport.cpp | 2 +- lib/Solver/BitwuzlaBuilder.cpp | 1 + lib/Solver/BitwuzlaBuilder.h | 6 + lib/Solver/BitwuzlaSolver.cpp | 4 +- lib/Solver/Z3BitvectorBuilder.cpp | 1 + lib/Solver/Z3BitvectorBuilder.h | 6 + lib/Support/RoundingModeUtil.cpp | 1 + .../SymbolicSizes/FirstAndLastElements.c | 2 +- .../GSAC/ContextSensitive/ContextSensitive.c | 36 ++ .../SARIF/GSAC/ContextSensitive/pattern.sarif | 57 +++ test/SARIF/GSAC/EASY02/EASY02.c | 23 ++ test/SARIF/GSAC/EASY02/pattern.sarif | 57 +++ test/SARIF/GSAC/EASY02_fix/EASY02_fix.c | 22 + test/SARIF/GSAC/EASY03/EASY03.c | 26 ++ test/SARIF/GSAC/EASY03/pattern.sarif | 57 +++ test/SARIF/GSAC/EASY03_fix/EASY03_fix.c | 25 ++ .../GSAC/FieldSensitive/FieldSensitive.c | 41 ++ test/SARIF/GSAC/FieldSensitive/pattern.sarif | 57 +++ test/SARIF/GSAC/FlowSensitive/FlowSensitive.c | 54 +++ test/SARIF/GSAC/FlowSensitive/pattern.sarif | 57 +++ test/SARIF/GSAC/HARD01/HARD01.c | 49 +++ test/SARIF/GSAC/HARD01/pattern.sarif | 57 +++ test/SARIF/GSAC/HARD01_fix/HARD01_fix.c | 48 +++ test/SARIF/GSAC/HARD02/HARD02.c | 47 +++ test/SARIF/GSAC/HARD02/HARD02.h | 35 ++ test/SARIF/GSAC/HARD02/pattern.sarif | 57 +++ test/SARIF/GSAC/HARD02_fix/HARD02_fix.c | 46 +++ test/SARIF/GSAC/HARD02_fix/HARD02_fix.h | 35 ++ test/SARIF/GSAC/MEDIUM01/MEDIUM01.c | 48 +++ test/SARIF/GSAC/MEDIUM01/pattern.sarif | 57 +++ test/SARIF/GSAC/MEDIUM01_fix/MEDIUM01_fix.c | 46 +++ test/SARIF/GSAC/MEDIUM02/MEDIUM02.c | 38 ++ test/SARIF/GSAC/MEDIUM02/pattern.sarif | 57 +++ test/SARIF/GSAC/MEDIUM02_fix/MEDIUM02_fix.c | 40 ++ test/SARIF/GSAC/MEDIUM03/MEDIUM03.c | 79 ++++ test/SARIF/GSAC/MEDIUM03/pattern.sarif | 57 +++ test/SARIF/GSAC/MEDIUM03_fix/MEDIUM03_fix.c | 81 ++++ test/SARIF/GSAC/MEDIUM04/MEDIUM04.c | 38 ++ test/SARIF/GSAC/MEDIUM04/pattern.sarif | 57 +++ test/SARIF/GSAC/MEDIUM04_fix/MEDIUM04_fix.c | 37 ++ test/SARIF/GSAC/MEDIUM05/MEDIUM05.c | 49 +++ test/SARIF/GSAC/MEDIUM05/pattern.sarif | 57 +++ test/SARIF/GSAC/MEDIUM05_fix/MEDIUM05_fix.c | 48 +++ test/SARIF/GSAC/PathSensitive/PathSensitive.c | 36 ++ test/SARIF/GSAC/PathSensitive/pattern.sarif | 57 +++ .../LazyInitialization/LazyInitialization.c | 16 + .../Generic/LazyInitialization/pattern.sarif | 69 ++++ .../SymbolicSizeArray/SymbolicSizeArray.c | 18 + .../Generic/SymbolicSizeArray/pattern.sarif | 106 +++++ test/SARIF/checker.py | 124 ++++++ test/SARIF/lit.local.cfg | 13 + tools/klee/main.cpp | 52 ++- 84 files changed, 3404 insertions(+), 169 deletions(-) create mode 100644 lib/Core/CodeEvent.h create mode 100644 lib/Core/CodeLocation.cpp create mode 100644 lib/Core/CodeLocation.h create mode 100644 lib/Core/EventRecorder.cpp create mode 100644 lib/Core/EventRecorder.h create mode 100644 lib/Module/LocalVarDeclarationFinderPass.cpp create mode 100644 lib/Module/ReturnLocationFinderPass.cpp create mode 100644 test/SARIF/GSAC/ContextSensitive/ContextSensitive.c create mode 100644 test/SARIF/GSAC/ContextSensitive/pattern.sarif create mode 100644 test/SARIF/GSAC/EASY02/EASY02.c create mode 100644 test/SARIF/GSAC/EASY02/pattern.sarif create mode 100644 test/SARIF/GSAC/EASY02_fix/EASY02_fix.c create mode 100644 test/SARIF/GSAC/EASY03/EASY03.c create mode 100644 test/SARIF/GSAC/EASY03/pattern.sarif create mode 100644 test/SARIF/GSAC/EASY03_fix/EASY03_fix.c create mode 100644 test/SARIF/GSAC/FieldSensitive/FieldSensitive.c create mode 100644 test/SARIF/GSAC/FieldSensitive/pattern.sarif create mode 100644 test/SARIF/GSAC/FlowSensitive/FlowSensitive.c create mode 100644 test/SARIF/GSAC/FlowSensitive/pattern.sarif create mode 100644 test/SARIF/GSAC/HARD01/HARD01.c create mode 100644 test/SARIF/GSAC/HARD01/pattern.sarif create mode 100644 test/SARIF/GSAC/HARD01_fix/HARD01_fix.c create mode 100644 test/SARIF/GSAC/HARD02/HARD02.c create mode 100644 test/SARIF/GSAC/HARD02/HARD02.h create mode 100644 test/SARIF/GSAC/HARD02/pattern.sarif create mode 100644 test/SARIF/GSAC/HARD02_fix/HARD02_fix.c create mode 100644 test/SARIF/GSAC/HARD02_fix/HARD02_fix.h create mode 100644 test/SARIF/GSAC/MEDIUM01/MEDIUM01.c create mode 100644 test/SARIF/GSAC/MEDIUM01/pattern.sarif create mode 100644 test/SARIF/GSAC/MEDIUM01_fix/MEDIUM01_fix.c create mode 100644 test/SARIF/GSAC/MEDIUM02/MEDIUM02.c create mode 100644 test/SARIF/GSAC/MEDIUM02/pattern.sarif create mode 100644 test/SARIF/GSAC/MEDIUM02_fix/MEDIUM02_fix.c create mode 100644 test/SARIF/GSAC/MEDIUM03/MEDIUM03.c create mode 100644 test/SARIF/GSAC/MEDIUM03/pattern.sarif create mode 100644 test/SARIF/GSAC/MEDIUM03_fix/MEDIUM03_fix.c create mode 100644 test/SARIF/GSAC/MEDIUM04/MEDIUM04.c create mode 100644 test/SARIF/GSAC/MEDIUM04/pattern.sarif create mode 100644 test/SARIF/GSAC/MEDIUM04_fix/MEDIUM04_fix.c create mode 100644 test/SARIF/GSAC/MEDIUM05/MEDIUM05.c create mode 100644 test/SARIF/GSAC/MEDIUM05/pattern.sarif create mode 100644 test/SARIF/GSAC/MEDIUM05_fix/MEDIUM05_fix.c create mode 100644 test/SARIF/GSAC/PathSensitive/PathSensitive.c create mode 100644 test/SARIF/GSAC/PathSensitive/pattern.sarif create mode 100644 test/SARIF/Generic/LazyInitialization/LazyInitialization.c create mode 100644 test/SARIF/Generic/LazyInitialization/pattern.sarif create mode 100644 test/SARIF/Generic/SymbolicSizeArray/SymbolicSizeArray.c create mode 100644 test/SARIF/Generic/SymbolicSizeArray/pattern.sarif create mode 100644 test/SARIF/checker.py create mode 100644 test/SARIF/lit.local.cfg diff --git a/include/klee/ADT/ImmutableList.h b/include/klee/ADT/ImmutableList.h index 73ce89f448..39efedc2ab 100644 --- a/include/klee/ADT/ImmutableList.h +++ b/include/klee/ADT/ImmutableList.h @@ -97,9 +97,9 @@ template class ImmutableList { node->values.push_back(value); } - bool empty() { return size() == 0; } + bool empty() const { return size() == 0; } - const T &back() { + const T &back() const { assert(node && "requiers not empty list"); auto it = iterator(node.get()); it.get = size() - 1; @@ -109,6 +109,10 @@ template class ImmutableList { ImmutableList() : node(){}; ImmutableList(const ImmutableList &il) : node(std::make_shared(il)) {} + ImmutableList &operator=(const ImmutableList &il) { + node = std::make_shared(il); + return *this; + } }; } // namespace klee diff --git a/include/klee/Core/Interpreter.h b/include/klee/Core/Interpreter.h index 04eec908de..1d9cb96dab 100644 --- a/include/klee/Core/Interpreter.h +++ b/include/klee/Core/Interpreter.h @@ -38,6 +38,7 @@ class raw_fd_ostream; namespace klee { class ExecutionState; struct SarifReport; +struct ToolJson; class Interpreter; class TreeStreamWriter; @@ -57,6 +58,8 @@ class InterpreterHandler { virtual void processTestCase(const ExecutionState &state, const char *message, const char *suffix, bool isError = false) = 0; + + virtual ToolJson info() const = 0; }; /// [File][Line][Column] -> Opcode @@ -228,6 +231,10 @@ class Interpreter { virtual bool getSymbolicSolution(const ExecutionState &state, KTest &res) = 0; + virtual void addSARIFReport(const ExecutionState &state) = 0; + + virtual SarifReportJson getSARIFReport() const = 0; + virtual void logState(const ExecutionState &state, int id, std::unique_ptr &f) = 0; diff --git a/include/klee/Expr/SourceBuilder.h b/include/klee/Expr/SourceBuilder.h index 1a353901e0..43cf6dd19b 100644 --- a/include/klee/Expr/SourceBuilder.h +++ b/include/klee/Expr/SourceBuilder.h @@ -5,8 +5,8 @@ namespace klee { -class KInstruction; -class KGlobalVariable; +struct KInstruction; +struct KGlobalVariable; template class SparseStorage; template class ref; diff --git a/include/klee/Module/KModule.h b/include/klee/Module/KModule.h index 5b8871b2e2..de50a03d6b 100644 --- a/include/klee/Module/KModule.h +++ b/include/klee/Module/KModule.h @@ -381,6 +381,8 @@ class KModule { KBlock *getKBlock(const llvm::BasicBlock *bb); + bool inMainModule(const llvm::Instruction &i); + bool inMainModule(const llvm::Function &f); bool inMainModule(const llvm::GlobalVariable &v); diff --git a/include/klee/Module/LocationInfo.h b/include/klee/Module/LocationInfo.h index 6aaa84b8a2..4c90713bb9 100644 --- a/include/klee/Module/LocationInfo.h +++ b/include/klee/Module/LocationInfo.h @@ -1,16 +1,18 @@ -////===-- LocationInfo.h ----------------------------------*- C++ -*-===// -//// -//// The KLEE Symbolic Virtual Machine -//// -//// This file is distributed under the University of Illinois Open Source -//// License. See LICENSE.TXT for details. -//// -////===----------------------------------------------------------------------===// +////===-- LocationInfo.h ----------------------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// #ifndef KLEE_LOCATIONINFO_H #define KLEE_LOCATIONINFO_H +#include #include +#include #include namespace llvm { @@ -21,11 +23,27 @@ class Module; } // namespace llvm namespace klee { +struct PhysicalLocationJson; +} +namespace klee { + +/// @brief Immutable struct representing location in source code. struct LocationInfo { - std::string file; - size_t line; - size_t column; + /// @brief Path to source file for that location. + const std::string file; + + /// @brief Code line in source file. + const uint64_t line; + + /// @brief Column number in source file. + const std::optional column; + + /// @brief Converts location info to SARIFs representation + /// of location. + /// @param location location info in source code. + /// @return SARIFs representation of location. + PhysicalLocationJson serialize() const; }; LocationInfo getLocationInfo(const llvm::Function *func); diff --git a/include/klee/Module/SarifReport.h b/include/klee/Module/SarifReport.h index 09652206dd..7f80b7ed82 100644 --- a/include/klee/Module/SarifReport.h +++ b/include/klee/Module/SarifReport.h @@ -58,6 +58,11 @@ std::string getErrorsString(const std::vector &errors); struct FunctionInfo; struct KBlock; +struct LocationInfo; + +struct Message { + std::string text; +}; struct ArtifactLocationJson { std::optional uri; @@ -76,6 +81,7 @@ struct PhysicalLocationJson { }; struct LocationJson { + std::optional message; std::optional physicalLocation; }; @@ -92,10 +98,6 @@ struct CodeFlowJson { std::vector threadFlows; }; -struct Message { - std::string text; -}; - struct Fingerprints { std::string cooddy_uid; }; @@ -110,6 +112,7 @@ static void from_json(const json &j, Fingerprints &p) { struct ResultJson { std::optional ruleId; + std::optional level; std::optional message; std::optional id; std::optional fingerprints; @@ -117,8 +120,17 @@ struct ResultJson { std::vector codeFlows; }; +struct RuleJson { + std::string id; + std::optional name; + std::optional shortDescription; + std::optional helpUri; +}; + struct DriverJson { std::string name; + std::optional informationUri; + std::vector rules; }; struct ToolJson { @@ -131,9 +143,13 @@ struct RunJson { }; struct SarifReportJson { + std::string version; std::vector runs; }; +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(RuleJson, id, name, + shortDescription, helpUri) + NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(ArtifactLocationJson, uri) NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(RegionJson, startLine, endLine, @@ -142,7 +158,8 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(RegionJson, startLine, endLine, NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(PhysicalLocationJson, artifactLocation, region) -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(LocationJson, physicalLocation) +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(LocationJson, message, + physicalLocation) NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(ThreadFlowLocationJson, location, metadata) @@ -153,17 +170,18 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(CodeFlowJson, threadFlows) NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Message, text) -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(ResultJson, ruleId, message, id, - fingerprints, codeFlows, - locations) +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(ResultJson, ruleId, level, + message, id, fingerprints, + codeFlows, locations) -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(DriverJson, name) +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(DriverJson, name, + informationUri, rules) NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(ToolJson, driver) NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(RunJson, results, tool) -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(SarifReportJson, runs) +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(SarifReportJson, version, runs) struct Location { struct LocationHash { diff --git a/lib/Core/CMakeLists.txt b/lib/Core/CMakeLists.txt index 9e0dbb715c..1ae568fc43 100644 --- a/lib/Core/CMakeLists.txt +++ b/lib/Core/CMakeLists.txt @@ -10,10 +10,12 @@ add_library(kleeCore AddressManager.cpp AddressSpace.cpp CallPathManager.cpp + CodeLocation.cpp Context.cpp CoreStats.cpp CXXTypeSystem/CXXTypeManager.cpp DistanceCalculator.cpp + EventRecorder.cpp ExecutionState.cpp Executor.cpp ExecutorUtil.cpp diff --git a/lib/Core/CodeEvent.h b/lib/Core/CodeEvent.h new file mode 100644 index 0000000000..f266208629 --- /dev/null +++ b/lib/Core/CodeEvent.h @@ -0,0 +1,203 @@ +//===-- CodeEvent.h ---------------------------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef KLEE_CODE_EVENT_H +#define KLEE_CODE_EVENT_H + +#include "CodeLocation.h" +#include "klee/ADT/Ref.h" + +#include "klee/Module/KModule.h" +#include "klee/Module/KValue.h" +#include "klee/Module/SarifReport.h" + +#include "klee/Support/CompilerWarning.h" +DISABLE_WARNING_PUSH +DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +DISABLE_WARNING_POP + +#include +#include + +namespace klee { + +/// @brief Base unit for storing information about event from source code. +/// Chain of such units may form a history for particular execution state. +class CodeEvent { +public: + /// @brief Server for LLVM RTTI purposes. + const enum EventKind { ALLOC, BR, CALL, ERR, RET } kind; + +public: + /// @brief Required by klee::ref-managed objects + class ReferenceCounter _refCount; + + /// @brief Location information for the event + const ref location; + + explicit CodeEvent(EventKind kind, const ref &location) + : kind(kind), location(location) {} + + /// @brief Additional info to current event. + /// @return String describing event. + virtual std::string description() const = 0; + + /// @brief Serialize event to the JSON format. + /// @return JSON object describing event. + LocationJson serialize() const { + LocationJson result; + + result.message = {{description()}}; + result.physicalLocation = {{location->serialize()}}; + + return result; + } + + /// @brief Kind of event used for LLVM RTTI. + /// @return Kind of event. + EventKind getKind() const { return kind; } + + virtual ~CodeEvent() = default; +}; + +/// @brief Event describing any allocating event. +struct AllocEvent : public CodeEvent { + AllocEvent(ref location) + : CodeEvent(EventKind::ALLOC, location) {} + + std::string description() const override { + const KValue *source = location->source; + if (llvm::isa(source)) { + return std::string("Global memory allocation"); + } + + if (llvm::isa(source->unwrap())) { + return std::string("Local memory allocation"); + } + + return std::string("Heap memory allocation"); + } + + static bool classof(const CodeEvent *rhs) { + return rhs->getKind() == EventKind::ALLOC; + } +}; + +/// @brief Event describing conditional `br` event. +class BrEvent : public CodeEvent { +private: + /// @brief Described chosen branch event: `true` if + /// `then`-branch was chosen and `false` otherwise. + bool chosenBranch = true; + +public: + explicit BrEvent(const ref &location) + : CodeEvent(EventKind::BR, location) {} + + /// @brief Modifies chosen branch for `this` event. + /// @param branch true if condition in chosen branch is true and false + /// otherwise. + /// @return Reference to this object. + /// @note This function does not return modified copy of this object; it + /// returns *this* object. + BrEvent &withBranch(bool branch) { + chosenBranch = branch; + return *this; + } + + std::string description() const override { + return std::string("Choosing ") + + (chosenBranch ? std::string("true") : std::string("false")) + + std::string(" branch"); + } + + static bool classof(const CodeEvent *rhs) { + return rhs->getKind() == EventKind::BR; + } +}; + +/// @brief Event describing conditional `call` to function event. +class CallEvent : public CodeEvent { +private: + /// @brief Called function. Provides additional info + /// for event description. + const KFunction *const called; + +public: + explicit CallEvent(const ref &location, + const KFunction *const called) + : CodeEvent(EventKind::CALL, location), called(called) {} + + std::string description() const override { + return std::string("Calling '") + called->getName().str() + + std::string("()'"); + } + + static bool classof(const CodeEvent *rhs) { + return rhs->getKind() == EventKind::CALL; + } +}; + +/// @brief Event describing any error event. +/// Kind of error is described in ruleID. +struct ErrorEvent : public CodeEvent { +public: + /// @brief ID for this error. + const StateTerminationType ruleID; + + /// @brief Message describing this error. + const std::string message; + + /// @brief Event associated with this error + /// which may be treated as a "source" of error + /// (e.g. memory allocation for Out-Of-Bounds error). + const std::optional> source; + + ErrorEvent(const ref &source, const ref &sink, + StateTerminationType ruleID, const std::string &message) + : CodeEvent(EventKind::ERR, sink), ruleID(ruleID), message(message), + source(source) {} + + ErrorEvent(const ref &sink, StateTerminationType ruleID, + const std::string &message) + : CodeEvent(EventKind::ERR, sink), ruleID(ruleID), message(message), + source(std::nullopt) {} + + std::string description() const override { return message; } + + static bool classof(const CodeEvent *rhs) { + return rhs->getKind() == EventKind::ERR; + } +}; + +/// @brief Event describing conditional `return` from function event. +class ReturnEvent : public CodeEvent { +private: + /// @brief Function to which control flow returns. + const KFunction *const caller; + +public: + explicit ReturnEvent(const ref &location, + const KFunction *const caller) + : CodeEvent(EventKind::RET, location), caller(caller) {} + + std::string description() const override { + return std::string("Returning to '") + caller->getName().str() + + std::string("()'"); + } + + static bool classof(const CodeEvent *rhs) { + return rhs->getKind() == EventKind::RET; + } +}; + +} // namespace klee + +#endif // KLEE_CODE_EVENT_H diff --git a/lib/Core/CodeLocation.cpp b/lib/Core/CodeLocation.cpp new file mode 100644 index 0000000000..ad67cc0622 --- /dev/null +++ b/lib/Core/CodeLocation.cpp @@ -0,0 +1,47 @@ +//===-- CodeLocation.cpp ----------------------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "CodeLocation.h" + +#include "klee/ADT/Ref.h" +#include "klee/Module/LocationInfo.h" +#include "klee/Module/SarifReport.h" + +#include +#include + +using namespace klee; + +CodeLocation::CodeLocation(const Path::PathIndex &pathIndex, + const KValue *source, + const std::string &sourceFilename, + uint64_t sourceCodeLine, + std::optional sourceCodeColumn) + : pathIndex(pathIndex), source(source), + location(LocationInfo{sourceFilename, sourceCodeLine, sourceCodeColumn}) { +} + +ref +CodeLocation::create(const Path::PathIndex &pathIndex, const KValue *source, + const std::string &sourceFilename, uint64_t sourceCodeLine, + std::optional sourceCodeColumn = std::nullopt) { + return new CodeLocation(pathIndex, source, sourceFilename, sourceCodeLine, + sourceCodeColumn); +} + +ref +CodeLocation::create(const KValue *source, const std::string &sourceFilename, + uint64_t sourceCodeLine, + std::optional sourceCodeColumn = std::nullopt) { + return new CodeLocation(Path::PathIndex{0, 0}, source, sourceFilename, + sourceCodeLine, sourceCodeColumn); +} + +PhysicalLocationJson CodeLocation::serialize() const { + return location.serialize(); +} diff --git a/lib/Core/CodeLocation.h b/lib/Core/CodeLocation.h new file mode 100644 index 0000000000..77676abfa6 --- /dev/null +++ b/lib/Core/CodeLocation.h @@ -0,0 +1,82 @@ +//===-- CodeLocation.h ------------------------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef KLEE_CODE_LOCATION_H +#define KLEE_CODE_LOCATION_H + +#include "klee/ADT/Ref.h" +#include "klee/Expr/Path.h" +#include "klee/Module/LocationInfo.h" + +#include +#include +#include + +namespace klee { + +struct PhysicalLocationJson; +struct KValue; + +/// @brief Represents the location in source code with additional +/// information about place in the state's path. +struct CodeLocation { + /// @brief Required by klee::ref-managed objects + class ReferenceCounter _refCount; + + /// @brief Path index in `Path`. + const Path::PathIndex pathIndex; + + /// @brief Corresponding llvm entity. + const KValue *source; + + /// @brief Location in source code. + const LocationInfo location; + +private: + CodeLocation(const Path::PathIndex &pathIndex, const KValue *source, + const std::string &sourceFilename, uint64_t sourceCodeLine, + std::optional sourceCodeColumn); + + CodeLocation(const CodeLocation &) = delete; + CodeLocation &operator=(const CodeLocation &) = delete; + +public: + /// @brief Factory method for `CodeLocation` enhanced with `PathIndex` + /// in history. Wraps constructed objects in the ref to provide + /// zero-cost copying of code locations. + /// @param sourceFilename Name of source file to which location refers. + /// @param sourceCodeLine Line in source to which location refers. + /// @param sourceCodeColumn Column in source code to which location refers. + /// @return `CodeLocation` representing the location in source code. + static ref create(const Path::PathIndex &pathIndex, + const KValue *source, + const std::string &sourceFilename, + uint64_t sourceCodeLine, + std::optional sourceCodeColumn); + + /// @brief Factory method for `CodeLocation`. Wraps constructed + /// objects in the ref to provide zero-cost copying of code locations. + /// @param sourceFilename Name of source file to which location refers. + /// @param sourceCodeLine Line in source to which location refers. + /// @param sourceCodeColumn Column in source code to which location refers. + /// @return `CodeLocation` representing the location in source code. + static ref create(const KValue *source, + const std::string &sourceFilename, + uint64_t sourceCodeLine, + std::optional sourceCodeColumn); + + /// @brief Converts code location info to SARIFs representation + /// of location. + /// @param location location info in source code. + /// @return SARIFs representation of location. + PhysicalLocationJson serialize() const; +}; + +}; // namespace klee + +#endif // KLEE_CODE_LOCATION_H diff --git a/lib/Core/EventRecorder.cpp b/lib/Core/EventRecorder.cpp new file mode 100644 index 0000000000..e828e41d8c --- /dev/null +++ b/lib/Core/EventRecorder.cpp @@ -0,0 +1,93 @@ +//===-- EventRecorder.cpp ---------------------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "EventRecorder.h" +#include "CodeEvent.h" + +#include "klee/ADT/ImmutableList.h" +#include "klee/ADT/Ref.h" +#include "klee/Expr/Path.h" +#include "klee/Module/SarifReport.h" + +#include +#include +#include + +using namespace klee; + +EventRecorder::EventRecorder(const EventRecorder &rhs) : events(rhs.events) {} + +EventRecorder &EventRecorder::operator=(const EventRecorder &rhs) { + events = rhs.events; + return *this; +} + +void EventRecorder::record(const ref &event) { + assert(event->location); + assert((empty() || + Path::PathIndexCompare{}(last()->location->pathIndex, + event->location->pathIndex) || + !Path::PathIndexCompare{}(event->location->pathIndex, + last()->location->pathIndex)) && + "Event must have later pathIndex than last recorded"); + events.push_back(event); +} + +void EventRecorder::append(const EventRecorder &rhs) { + for (const auto &event : rhs.events) { + record(event); + } +} + +CodeFlowJson EventRecorder::serialize() const { + CodeFlowJson codeFlow{}; + ThreadFlowJson threadFlow{}; + + for (const auto &event : events) { + LocationJson location = event->serialize(); + ThreadFlowLocationJson threadFlowLocation{{std::move(location)}, + std::nullopt}; + threadFlow.locations.push_back(std::move(threadFlowLocation)); + } + + codeFlow.threadFlows.push_back(std::move(threadFlow)); + + return codeFlow; +} + +EventRecorder EventRecorder::inRange(const Path::PathIndex &begin, + const Path::PathIndex &end) const { + EventRecorder result; + + if (Path::PathIndexCompare{}(end, begin)) { + return result; + } + + for (const auto &event : events) { + const Path::PathIndex &eventPathIndex = event->location->pathIndex; + if (!Path::PathIndexCompare{}(eventPathIndex, begin)) { + if (Path::PathIndexCompare{}(end, eventPathIndex)) { + break; + } + result.record(event); + } + } + + return result; +} + +bool EventRecorder::empty() const { return events.empty(); } + +ref EventRecorder::last() const { return events.back(); } + +EventRecorder EventRecorder::tail(const Path::PathIndex &begin) const { + if (events.empty()) { + return EventRecorder(); + } + return inRange(begin, events.back()->location->pathIndex); +} diff --git a/lib/Core/EventRecorder.h b/lib/Core/EventRecorder.h new file mode 100644 index 0000000000..4ffa796b54 --- /dev/null +++ b/lib/Core/EventRecorder.h @@ -0,0 +1,72 @@ +//===-- EventRecorder.h -----------------------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef KLEE_EVENT_RECORDER_H +#define KLEE_EVENT_RECORDER_H + +#include "klee/Expr/Path.h" + +namespace klee { + +template class ref; +template class ImmutableList; +struct CodeFlowJson; +class CodeEvent; + +/// @brief Class capable of storing code events +/// and serializing them into SARIF format. +class EventRecorder { +private: + /// @brief Inner storage of code events. + ImmutableList> events; + +public: + EventRecorder() = default; + + EventRecorder(const EventRecorder &); + EventRecorder &operator=(const EventRecorder &); + ~EventRecorder() = default; + + /// @brief Remembers event. + /// @param event Event to record. + void record(const ref &event); + + /// @brief Appends all events from the given `EventRecorder` + /// to this recorder. + /// @param rhs `EventRecorder` to get events from. + void append(const EventRecorder &rhs); + + /// @brief Returns events from the history in specified range. + /// @param begin range begin. + /// @param end range end. + /// @return `EventRecorder` containing recorder events from inner storage. + EventRecorder inRange(const Path::PathIndex &begin, + const Path::PathIndex &end) const; + + /// @brief Returns events from the history from the givent event. + /// @param begin range begin. + /// @return `EventRecorder` containing recorder events from inner storage. + EventRecorder tail(const Path::PathIndex &begin) const; + + /// @brief Returns last recorder event. + /// @return Last recorded `CodeEvent`. + ref last() const; + + /// @brief Tests whether this event recorder is empty. + /// @return `true` if there are no events have been recorded. + bool empty() const; + + /// @brief Serializes events in this recorder to SARIF format. + /// @return Structure ready for wrapping into json + /// (i.e. `json(serialize())`) and serialization. + CodeFlowJson serialize() const; +}; + +} // namespace klee + +#endif // KLEE_EVENT_RECORDER_H diff --git a/lib/Core/ExecutionState.cpp b/lib/Core/ExecutionState.cpp index 3fd5a29996..fa14493607 100644 --- a/lib/Core/ExecutionState.cpp +++ b/lib/Core/ExecutionState.cpp @@ -22,6 +22,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -161,10 +162,10 @@ ExecutionState::ExecutionState(const ExecutionState &state) stack(state.stack), stackBalance(state.stackBalance), incomingBBIndex(state.incomingBBIndex), depth(state.depth), level(state.level), addressSpace(state.addressSpace), - constraints(state.constraints), targetForest(state.targetForest), - pathOS(state.pathOS), symPathOS(state.symPathOS), - coveredLines(state.coveredLines), symbolics(state.symbolics), - resolvedPointers(state.resolvedPointers), + constraints(state.constraints), eventsRecorder(state.eventsRecorder), + targetForest(state.targetForest), pathOS(state.pathOS), + symPathOS(state.symPathOS), coveredLines(state.coveredLines), + symbolics(state.symbolics), resolvedPointers(state.resolvedPointers), cexPreferences(state.cexPreferences), arrayNames(state.arrayNames), steppedInstructions(state.steppedInstructions), steppedMemoryInstructions(state.steppedMemoryInstructions), diff --git a/lib/Core/ExecutionState.h b/lib/Core/ExecutionState.h index dd0fd13427..3dc0b3bff7 100644 --- a/lib/Core/ExecutionState.h +++ b/lib/Core/ExecutionState.h @@ -31,9 +31,13 @@ #include "klee/System/Time.h" #include "klee/Utilities/Math.h" +#include "CodeLocation.h" +#include "EventRecorder.h" + #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/IR/Function.h" DISABLE_WARNING_POP @@ -41,6 +45,7 @@ DISABLE_WARNING_POP #include #include #include +#include #include #include #include @@ -67,6 +72,12 @@ struct CallStackFrame { KInstIterator caller; KFunction *kf; + /// @brief Location of a return statement in current stack frame. + /// @details Serves for a very special case when actual location + /// of `return` statement in source code can not be deduced from + /// LLVM IR `dbg!` metadata. + std::optional> returnLocation; + CallStackFrame(KInstIterator caller_, KFunction *kf_) : caller(caller_), kf(kf_) {} ~CallStackFrame() = default; @@ -134,6 +145,24 @@ struct ExecutionStack { inline info_stack_ty &infoStack() { return infoStack_; } inline const call_stack_ty &uniqueFrames() const { return uniqueFrames_; } + void forceReturnLocation(const ref &location) { + assert(!callStack_.empty() && "Call stack should contain at least one " + "stack frame to force return location"); + std::optional> &callStackReturnLocation = + callStack_.back().returnLocation; + assert(!callStackReturnLocation.has_value() && + "Forced return location twice for a single call stack"); + + callStackReturnLocation.emplace(location); + } + + std::optional> forcedReturnLocation() const { + if (callStack_.empty()) { + return std::nullopt; + } + return callStack_.back().returnLocation; + } + inline unsigned size() const { return callStack_.size(); } inline size_t stackRegisterSize() const { return stackSize; } inline bool empty() const { return callStack_.empty(); } @@ -301,6 +330,10 @@ class ExecutionState { /// @brief Constraints collected so far PathConstraints constraints; + /// @brief Storage for the source code events (e.g. changing control flow or + /// errors) + EventRecorder eventsRecorder; + /// @brief Key points which should be visited through execution TargetForest targetForest; diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp index 51e2d6f849..eca2e57d1e 100644 --- a/lib/Core/Executor.cpp +++ b/lib/Core/Executor.cpp @@ -57,6 +57,7 @@ #include "klee/Module/KInstruction.h" #include "klee/Module/KModule.h" #include "klee/Module/KType.h" +#include "klee/Module/SarifReport.h" #include "klee/Solver/Common.h" #include "klee/Solver/Solver.h" #include "klee/Solver/SolverCmdLine.h" @@ -70,9 +71,14 @@ #include "klee/System/MemoryUsage.h" #include "klee/System/Time.h" +#include "CodeEvent.h" +#include "CodeLocation.h" +#include "EventRecorder.h" + #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Attributes.h" @@ -494,7 +500,12 @@ Executor::Executor(LLVMContext &ctx, const InterpreterOptions &opts, replayKTest(0), replayPath(0), usingSeeds(0), atMemoryLimit(false), inhibitForking(false), coverOnTheFly(false), haltExecution(HaltExecution::NotHalt), ivcEnabled(false), - debugLogBuffer(debugBufferString) { + debugLogBuffer(debugBufferString), sarifReport({}) { + + // Add first entry for single run + sarifReport.version = "2.1.0"; + sarifReport.runs.push_back(RunJson{{}, ih->info()}); + if (interpreterOpts.MockStrategy == MockStrategyKind::Deterministic && CoreSolverToUse != Z3_SOLVER) { klee_error("Deterministic mocks can be generated with Z3 solver only.\n"); @@ -834,7 +845,13 @@ void Executor::allocateGlobalObjects(ExecutionState &state) { // We allocate an object to represent each function, // its address can be used for function pointers. // TODO: Check whether the object is accessed? - auto mo = allocate(state, Expr::createPointer(8), false, true, &f, 8); + + const KFunction *kf = kmodule->functionMap.at(&f); + ref fCodeLocation = CodeLocation::create( + kf, kf->getSourceFilepath(), kf->getLine(), std::nullopt); + auto mo = allocate(state, Expr::createPointer(8), false, true, + fCodeLocation, 8); + addr = Expr::createPointer(mo->address); legalFunctions.emplace(mo->address, &f); } @@ -955,8 +972,19 @@ void Executor::allocateGlobalObjects(ExecutionState &state) { } } + const KGlobalVariable *kv = kmodule->globalMap.at(&v).get(); + ref vCodeLocation = CodeLocation::create( + kv, kv->getSourceFilepath(), kv->getLine(), std::nullopt); + + if (!isa(size)) { + addConstraint( + state, UleExpr::create( + ZExtExpr::create(size, Context::get().getPointerWidth()), + Expr::createPointer(MaxSymbolicAllocationSize))); + } + MemoryObject *mo = allocate(state, size, /*isLocal=*/false, - /*isGlobal=*/true, /*allocSite=*/&v, + /*isGlobal=*/true, /*allocSite=*/vCodeLocation, /*alignment=*/globalObjectAlignment); if (!mo) klee_error("out of memory"); @@ -2333,6 +2361,10 @@ void Executor::executeCall(ExecutionState &state, KInstruction *ki, Function *f, // KInstIterator from just an instruction (unlike LLVM). KFunction *kf = kmodule->functionMap[f]; + if (kmodule->inMainModule(*f) && kmodule->inMainModule(*i)) { + state.eventsRecorder.record(new CallEvent(locationOf(state), kf)); + } + state.pushFrame(state.prevPC, kf); transferToBasicBlock(&*kf->function()->begin(), state.getPrevPCBlock(), state); @@ -2419,7 +2451,7 @@ void Executor::executeCall(ExecutionState &state, KInstruction *ki, Function *f, StackFrame &sf = state.stack.valueStack().back(); MemoryObject *mo = sf.varargs = - memory->allocate(size, true, false, false, state.prevPC->inst(), + memory->allocate(size, true, false, false, locationOf(state), (requires16ByteAlignment ? 16 : 8)); if (!mo && size) { terminateStateOnExecError(state, "out of memory (varargs)"); @@ -2577,6 +2609,18 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { state.increaseLevel(); terminateStateOnExit(state); } else { + if (kmodule->inMainModule(*i)) { + KFunction *callerFunction = kcaller->parent->parent; + + if (kmodule->WithPOSIXRuntime() && + callerFunction->getName() == "__klee_posix_wrapped_main") { + callerFunction = state.stack.callStack().front().kf; + } + + state.eventsRecorder.record( + new ReturnEvent(locationOf(state), callerFunction)); + } + state.popFrame(); if (statsTracker) @@ -2671,8 +2715,16 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { break; } case Instruction::Br: { + ref brLocation = locationOf(state); + BranchInst *bi = cast(i); if (bi->isUnconditional()) { + // `getMetadata` is used instead of `hasMetadata` + // because of backward compatibility with LLVM 9. + if (bi->getMetadata("md.ret")) { + state.stack.forceReturnLocation(locationOf(state)); + } + transferToBasicBlock(bi->getSuccessor(0), bi->getParent(), state); } else { // FIXME: Find a way that we don't have this hidden dependency. @@ -2700,13 +2752,22 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { if (statsTracker) statsTracker->markBranchVisited(branches.first, branches.second); - if (branches.first) + if (branches.first) { transferToBasicBlock(bi->getSuccessor(0), bi->getParent(), *branches.first); - if (branches.second) + if (kmodule->inMainModule(*i)) { + BrEvent *brEvent = new BrEvent(brLocation); + branches.first->eventsRecorder.record(&brEvent->withBranch(true)); + } + } + if (branches.second) { transferToBasicBlock(bi->getSuccessor(1), bi->getParent(), *branches.second); - + if (kmodule->inMainModule(*i)) { + BrEvent *brEvent = new BrEvent(brLocation); + branches.second->eventsRecorder.record(&brEvent->withBranch(false)); + } + } if (guidanceKind == GuidanceKind::ErrorGuidance) { checkNullCheckAfterDeref(cond, state, branches.first, branches.second); } @@ -2823,6 +2884,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { // Iterate through all non-default cases and order them by expressions for (auto i : si->cases()) { ref value = evalConstant(i.getCaseValue(), state.roundingMode); + assert(constantGepExprBases.empty()); BasicBlock *caseSuccessor = i.getCaseSuccessor(); expressionOrder.insert(std::make_pair(value, caseSuccessor)); @@ -3334,13 +3396,21 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { offset = AddExpr::create(offset, Expr::createPointer(kgepi->offset)); ref address = AddExpr::create(base, offset); - if (!isa(address) || base->isZero() || - state.isGEPExpr(base)) { - if (state.isGEPExpr(base)) { - state.gepExprBases[address] = state.gepExprBases[base]; + if (state.isGEPExpr(base)) { + if (ref addressConstant = + llvm::dyn_cast(address)) { + IDType addressConstantResolution; + if (!state.addressSpace.resolveOne(addressConstant, + typeSystemManager->getWrappedType( + state.gepExprBases[base].second), + addressConstantResolution)) { + state.gepExprBases[address] = state.gepExprBases[base]; + } } else { - state.gepExprBases[address] = {base, gepInst->getSourceElementType()}; + state.gepExprBases[address] = state.gepExprBases[base]; } + } else { + state.gepExprBases[address] = {base, gepInst->getSourceElementType()}; } bindLocal(ki, state, address); @@ -3943,9 +4013,10 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { if (iIdx >= vt->getNumElements()) { // Out of bounds write - terminateStateOnProgramError(state, - "Out of bounds write when inserting element", - StateTerminationType::BadVectorAccess); + terminateStateOnProgramError( + state, new ErrorEvent(locationOf(state), + StateTerminationType::BadVectorAccess, + "Out of bounds write when inserting element")); return; } @@ -3985,9 +4056,10 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { if (iIdx >= vt->getNumElements()) { // Out of bounds read - terminateStateOnProgramError(state, - "Out of bounds read when extracting element", - StateTerminationType::BadVectorAccess); + terminateStateOnProgramError( + state, new ErrorEvent(locationOf(state), + StateTerminationType::BadVectorAccess, + "Out of bounds read when extracting element")); return; } @@ -4194,7 +4266,7 @@ void Executor::bindInstructionConstants(KInstruction *KI) { } } -void Executor::bindModuleConstants(const llvm::APFloat::roundingMode &rm) { +void Executor::bindModuleConstants(llvm::APFloat::roundingMode rm) { for (auto &kfp : kmodule->functions) { KFunction *kf = kfp.get(); for (unsigned i = 0; i < kf->numInstructions; ++i) @@ -4545,6 +4617,20 @@ static std::string terminationTypeFileExtension(StateTerminationType type) { return ret; }; +static std::string terminationTypeName(StateTerminationType type) { + std::string ret; +#undef TTYPE +#undef TTMARK +#define TTYPE(N, I, S) \ + case StateTerminationType::N: \ + ret = #N; \ + break; +#define TTMARK(N, I) + + switch (type) { TERMINATION_TYPES } + return ret; +}; + void Executor::executeStep(ExecutionState &state) { KFunction *initKF = state.initPC->parent->parent; @@ -4674,8 +4760,7 @@ std::string Executor::getAddressInfo(ExecutionState &state, ref address, info << "none\n"; } else { const MemoryObject *mo = lower->first; - std::string alloc_info; - mo->getAllocInfo(alloc_info); + std::string alloc_info = mo->getAllocInfo(); info << "object at " << mo->address << " of size " << mo->size << "\n" << "\t\t" << alloc_info << "\n"; } @@ -4686,14 +4771,14 @@ std::string Executor::getAddressInfo(ExecutionState &state, ref address, info << "none\n"; } else { const MemoryObject *mo = lower->first; - std::string alloc_info; - mo->getAllocInfo(alloc_info); + std::string alloc_info = mo->getAllocInfo(); info << "object at " << mo->address << " of size " << mo->size << "\n" << "\t\t" << alloc_info << "\n"; } } - return info.str(); + info.flush(); + return Str; } HaltExecution::Reason fromStateTerminationType(StateTerminationType t) { @@ -4791,7 +4876,7 @@ void Executor::terminateStateEarlyUser(ExecutionState &state, } const KInstruction * -Executor::getLastNonKleeInternalInstruction(const ExecutionState &state) { +Executor::getLastNonKleeInternalInstruction(const ExecutionState &state) const { // unroll the stack of the applications state and find // the last instruction which is not inside a KLEE internal function auto it = state.stack.callStack().rbegin(); @@ -4877,7 +4962,8 @@ void Executor::terminateStateOnTargetError(ExecutionState &state, messaget = "unspecified error"; terminationType = StateTerminationType::User; } - terminateStateOnProgramError(state, messaget, terminationType); + terminateStateOnProgramError( + state, new ErrorEvent(locationOf(state), terminationType, messaget)); } void Executor::terminateStateOnError(ExecutionState &state, @@ -4948,14 +5034,28 @@ void Executor::terminateStateOnExecError(ExecutionState &state, } void Executor::terminateStateOnProgramError(ExecutionState &state, - const llvm::Twine &message, - StateTerminationType reason, + const ref &reason, const llvm::Twine &info, const char *suffix) { - assert(reason > StateTerminationType::SOLVERERR && - reason <= StateTerminationType::PROGERR); + assert(reason->ruleID > StateTerminationType::SOLVERERR && + reason->ruleID <= StateTerminationType::PROGERR); ++stats::terminationProgramError; - terminateStateOnError(state, message, reason, info, suffix); + + if (reason->source.has_value()) { + const ref &sourceEvent = reason->source.value(); + if (sourceEvent->location) { + EventRecorder traceRecorder; + + traceRecorder.record(sourceEvent); + traceRecorder.append( + state.eventsRecorder.tail(sourceEvent->location->pathIndex)); + + state.eventsRecorder = traceRecorder; + } + } + state.eventsRecorder.record(reason); + + terminateStateOnError(state, reason->message, reason->ruleID, info, suffix); } void Executor::terminateStateOnSolverError(ExecutionState &state, @@ -5214,9 +5314,8 @@ void Executor::executeAlloc(ExecutionState &state, ref size, bool isLocal, const ObjectState *reallocFrom, size_t allocationAlignment, bool checkOutOfMemory) { static unsigned allocations = 0; - const llvm::Value *allocSite = state.prevPC->inst(); if (allocationAlignment == 0) { - allocationAlignment = getAllocationAlignment(allocSite); + allocationAlignment = getAllocationAlignment(state.prevPC->inst()); } if (!isa(size) && !UseSymbolicSizeAllocation) { @@ -5251,7 +5350,7 @@ void Executor::executeAlloc(ExecutionState &state, ref size, bool isLocal, } MemoryObject *mo = allocate(state, size, isLocal, /*isGlobal=*/false, - allocSite, allocationAlignment); + locationOf(state), allocationAlignment); if (!mo) { bindLocal(target, state, Expr::createPointer(0)); } else { @@ -5312,13 +5411,19 @@ void Executor::executeFree(ExecutionState &state, ref address, zeroPointer.second->addressSpace.findObject(it->first).first; if (mo->isLocal) { - terminateStateOnProgramError(*it->second, "free of alloca", - StateTerminationType::Free, - getAddressInfo(*it->second, address)); + terminateStateOnProgramError( + *it->second, + new ErrorEvent(new AllocEvent(mo->allocSite), + locationOf(*it->second), StateTerminationType::Free, + "free of alloca"), + getAddressInfo(*it->second, address)); } else if (mo->isGlobal) { - terminateStateOnProgramError(*it->second, "free of global", - StateTerminationType::Free, - getAddressInfo(*it->second, address)); + terminateStateOnProgramError( + *it->second, + new ErrorEvent(new AllocEvent(mo->allocSite), + locationOf(*it->second), StateTerminationType::Free, + "free of global"), + getAddressInfo(*it->second, address)); } else { it->second->removePointerResolutions(mo); it->second->addressSpace.unbindObject(mo); @@ -5410,8 +5515,10 @@ bool Executor::resolveExact(ExecutionState &estate, ref address, terminateStateOnSolverError(*unbound, "Query timed out (resolve)."); } else { terminateStateOnProgramError( - *unbound, "memory error: invalid pointer: " + name, - StateTerminationType::Ptr, getAddressInfo(*unbound, address)); + *unbound, + new ErrorEvent(locationOf(*unbound), StateTerminationType::Ptr, + "memory error: invalid pointer: " + name), + getAddressInfo(*unbound, address)); } } return true; @@ -5495,9 +5602,12 @@ void Executor::concretizeSize(ExecutionState &state, ref size, ExprPPrinter::printOne(info, " size expr", size); info << " concretization : " << example << "\n"; info << " unbound example: " << tmp << "\n"; - terminateStateOnProgramError(*hugeSize.second, - "concretized symbolic size", - StateTerminationType::Model, info.str()); + terminateStateOnProgramError( + *hugeSize.second, + new ErrorEvent(locationOf(*hugeSize.second), + StateTerminationType::Model, + "concretized symbolic size"), + info.str()); } } } @@ -5554,7 +5664,7 @@ bool Executor::computeSizes(ExecutionState &state, ref size, MemoryObject *Executor::allocate(ExecutionState &state, ref size, bool isLocal, bool isGlobal, - const llvm::Value *allocSite, + ref allocSite, size_t allocationAlignment, ref lazyInitializationSource, unsigned timestamp) { @@ -5566,23 +5676,30 @@ MemoryObject *Executor::allocate(ExecutionState &state, ref size, /* Constant solution exists. Just return it. */ if (arrayConstantSize && lazyInitializationSource.isNull()) { - return memory->allocate(arrayConstantSize->getZExtValue(), isLocal, - isGlobal, false, allocSite, allocationAlignment); + MemoryObject *mo = + memory->allocate(arrayConstantSize->getZExtValue(), isLocal, isGlobal, + false, allocSite, allocationAlignment); + if (mo && state.isGEPExpr(mo->getBaseExpr())) { + state.gepExprBases.erase(mo->getBaseExpr()); + } + return mo; } Expr::Width pointerWidthInBits = Context::get().getPointerWidth(); /// Determine source for address array: - /// * LI source if allocate occures on lazi initialization + /// * LI source if allocate occures on lazy initialization /// * Otherwise choose source depending on the allocation site ref sourceAddressArray; if (!lazyInitializationSource) { - if (auto inst = dyn_cast(allocSite)) { + if (const auto inst = + dyn_cast(allocSite->source->unwrap())) { KInstruction *ki = kmodule->getKBlock(inst->getParent())->parent->instructionMap[inst]; sourceAddressArray = SourceBuilder::symbolicSizeConstantAddress( updateNameVersion(state, "const_arr"), ki, size); - } else if (auto global = dyn_cast(allocSite)) { + } else if (const auto global = dyn_cast( + allocSite->source->unwrap())) { KGlobalVariable *kgb = kmodule->globalMap[global].get(); sourceAddressArray = SourceBuilder::symbolicSizeConstantAddress( updateNameVersion(state, "const_arr"), kgb, size); @@ -5823,11 +5940,6 @@ bool Executor::checkResolvedMemoryObjects( ref baseInBounds = Expr::createTrue(); ref notInBounds = Expr::createIsZero(inBounds); - if (base != address || size != bytes) { - baseInBounds = - AndExpr::create(baseInBounds, mo->getBoundsCheckPointer(base, size)); - } - if (hasLazyInitialized) { baseInBounds = AndExpr::create( baseInBounds, Expr::createIsZero(mo->getOffsetExpr(base))); @@ -5899,13 +6011,6 @@ bool Executor::checkResolvedMemoryObjects( ref baseInBounds = Expr::createTrue(); ref notInBounds = Expr::createIsZero(inBounds); - if (base != address || size != bytes) { - baseInBounds = AndExpr::create(baseInBounds, - mo->getBoundsCheckPointer(base, size)); - baseInBounds = AndExpr::create( - baseInBounds, Expr::createIsZero(mo->getOffsetExpr(base))); - } - if (hasLazyInitialized && i == mayBeResolvedMemoryObjects.size() - 1) { baseInBounds = AndExpr::create( baseInBounds, Expr::createIsZero(mo->getOffsetExpr(base))); @@ -6166,16 +6271,6 @@ void Executor::executeMemoryOperation( } ref inBounds = mo->getBoundsCheckPointer(address, bytes); - ref baseInBounds = Expr::createTrue(); - - if (base != address || size != bytes) { - baseInBounds = - AndExpr::create(baseInBounds, mo->getBoundsCheckPointer(base, size)); - baseInBounds = AndExpr::create( - baseInBounds, Expr::createIsZero(mo->getOffsetExpr(base))); - } - - inBounds = AndExpr::create(inBounds, baseInBounds); inBounds = optimizer.optimizeExpr(inBounds, true); inBounds = Simplificator::simplifyExpr(state->constraints.cs(), inBounds) @@ -6210,8 +6305,11 @@ void Executor::executeMemoryOperation( targetType, mo->getOffsetExpr(address), ConstantExpr::alloc(size, Context::get().getPointerWidth()), true); if (wos->readOnly) { - terminateStateOnProgramError(*state, "memory error: object read only", - StateTerminationType::ReadOnly); + terminateStateOnProgramError( + *state, + new ErrorEvent(new AllocEvent(mo->allocSite), locationOf(*state), + StateTerminationType::ReadOnly, + "memory error: object read only")); } else { wos->write(mo->getOffsetExpr(address), value); } @@ -6325,9 +6423,12 @@ void Executor::executeMemoryOperation( forkInternal(*state, Expr::createIsZero(unboundConditions[i]), BranchType::MemOp); assert(branches.first); - terminateStateOnProgramError(*branches.first, - "memory error: object read only", - StateTerminationType::ReadOnly); + terminateStateOnProgramError( + *branches.first, + new ErrorEvent(new AllocEvent(mo->allocSite), + locationOf(*branches.first), + StateTerminationType::ReadOnly, + "memory error: object read only")); state = branches.second; } else { ref result = SelectExpr::create( @@ -6395,8 +6496,11 @@ void Executor::executeMemoryOperation( targetType, mo->getOffsetExpr(address), ConstantExpr::alloc(size, Context::get().getPointerWidth()), true); if (wos->readOnly) { - terminateStateOnProgramError(*bound, "memory error: object read only", - StateTerminationType::ReadOnly); + terminateStateOnProgramError( + *bound, + new ErrorEvent(new AllocEvent(mo->allocSite), locationOf(*bound), + StateTerminationType::ReadOnly, + "memory error: object read only")); } else { wos->write(mo->getOffsetExpr(address), value); } @@ -6422,11 +6526,42 @@ void Executor::executeMemoryOperation( terminateStateOnSolverError(*unbound, "Query timed out (resolve)."); return; } - assert(mayBeOutOfBound && "must be true since unbound is not null"); - terminateStateOnProgramError(*unbound, "memory error: out of bound pointer", - StateTerminationType::Ptr, - getAddressInfo(*unbound, address)); + + /* If base may point to some object then we may provide additional + information about object allocations site.*/ + bool uniqueBaseResolved = false; + IDType baseID; + + if (!unbound->addressSpace.resolveOneIfUnique(*unbound, solver.get(), + uniqueBase, baseTargetType, + baseID, uniqueBaseResolved)) { + terminateStateOnSolverError(*unbound, "Query timed out (resolve)"); + return; + } + + if (uniqueBaseResolved) { + // Obtain memory object + const ObjectPair baseObjectPair = + unbound->addressSpace.findObject(baseID); + + if (!baseObjectPair.first->isLazyInitialized) { + // Termiante with source event + terminateStateOnProgramError( + *unbound, + new ErrorEvent(new AllocEvent(baseObjectPair.first->allocSite), + locationOf(*unbound), StateTerminationType::Ptr, + "memory error: out of bound pointer"), + getAddressInfo(*unbound, address)); + return; + } + } + + terminateStateOnProgramError( + *unbound, + new ErrorEvent(locationOf(*unbound), StateTerminationType::Ptr, + "memory error: out of bound pointer"), + getAddressInfo(*unbound, address)); } } @@ -6435,7 +6570,6 @@ bool Executor::lazyInitializeObject(ExecutionState &state, ref address, KType *targetType, uint64_t size, bool isLocal, IDType &id, bool isSymbolic) { assert(!isa(address)); - const llvm::Value *allocSite = target ? target->inst() : nullptr; std::pair, ref> moBasePair; unsigned timestamp = 0; if (state.getBase(address, moBasePair)) { @@ -6472,7 +6606,7 @@ bool Executor::lazyInitializeObject(ExecutionState &state, ref address, ref addressExpr = isSymbolic ? address : nullptr; MemoryObject *mo = allocate(state, sizeExpr, isLocal, - /*isGlobal=*/false, allocSite, + /*isGlobal=*/false, CodeLocation::create(target, "", 0, {}), /*allocationAlignment=*/8, addressExpr, timestamp); if (!mo) { return false; @@ -6731,11 +6865,15 @@ ExecutionState *Executor::formState(Function *f, int argc, char **argv, if (ai != ae) { arguments.push_back(ConstantExpr::alloc(argc, Expr::Int32)); if (++ai != ae) { - Instruction *first = &*(f->begin()->begin()); + // For the entry state assume that arguments allocated + // on line with function signature. + ref parameterLocation = + CodeLocation::create(kf, kf->getSourceFilepath(), kf->getLine(), {}); + argvMO = allocate( *state, Expr::createPointer((argc + 1 + envc + 1 + 1) * NumPtrBytes), /*isLocal=*/false, /*isGlobal=*/true, - /*allocSite=*/first, /*alignment=*/8); + /*allocSite=*/parameterLocation, /*alignment=*/8); if (!argvMO) klee_error("Could not allocate memory for function arguments"); @@ -6774,10 +6912,13 @@ ExecutionState *Executor::formState(Function *f, int argc, char **argv, char *s = i < argc ? argv[i] : envp[i - (argc + 1)]; int j, len = strlen(s); + ref parameterLocation = CodeLocation::create( + kf, kf->getSourceFilepath(), kf->getLine(), {}); + MemoryObject *arg = allocate(*state, Expr::createPointer(len + 1), /*isLocal=*/false, /*isGlobal=*/true, - /*allocSite=*/state->pc->inst(), /*alignment=*/8); + /*allocSite=*/parameterLocation, /*alignment=*/8); if (!arg) klee_error("Could not allocate memory for function arguments"); @@ -6819,6 +6960,9 @@ void Executor::runFunctionAsMain(Function *f, int argc, char **argv, ExecutionState *state = formState(f, argc, argv, envp); bindModuleConstants(llvm::APFloat::rmNearestTiesToEven); + state->gepExprBases = std::move(constantGepExprBases); + constantGepExprBases.clear(); + KFunction *kEntryFunction = kmodule->functionMap.at(f); if (guidanceKind == GuidanceKind::ErrorGuidance) { ref forest; @@ -7067,6 +7211,26 @@ void Executor::getConstraintLog(const ExecutionState &state, std::string &res, } } +void Executor::addSARIFReport(const ExecutionState &state) { + ResultJson result{}; + + CodeFlowJson codeFlow = state.eventsRecorder.serialize(); + + if (ref lastEvent = + llvm::dyn_cast(state.eventsRecorder.last())) { + result.locations.push_back(lastEvent->serialize()); + result.message = {Message{lastEvent->message}}; + result.ruleId = {terminationTypeName(lastEvent->ruleID)}; + result.level = {"error"}; + } + + result.codeFlows.push_back(std::move(codeFlow)); + + sarifReport.runs.back().results.push_back(std::move(result)); +} + +SarifReportJson Executor::getSARIFReport() const { return sarifReport; } + void Executor::logState(const ExecutionState &state, int id, std::unique_ptr &f) { *f << "State number " << state.id << ". Test number: " << id << "\n\n"; @@ -7100,7 +7264,8 @@ bool resolveOnSymbolics(const std::vector &symbolics, for (const auto &res : symbolics) { const auto &mo = res.memoryObject; // Check if the provided address is between start and end of the object - // [mo->address, mo->address + mo->size) or the object is a 0-sized object. + // [mo->address, mo->address + mo->size) or the object is a 0-sized + // object. ref size = cast(assn.evaluate(mo->getSizeExpr())); if ((size->getZExtValue() == 0 && address == mo->address) || @@ -7582,6 +7747,43 @@ void Executor::dumpStates() { /// +/// @brief Determines current code location for given state. +/// @param state given state. +/// @return constructed code location for given state. +/// +/// Note: location tied with state as source code location +/// depends on current call stack. +ref Executor::locationOf(const ExecutionState &state) const { + // Iterate over call stack frames until find function from + // main module. + + // Either caller from call stack or executed instruction + // in case of last stack frame. + const KInstruction *kinst = &*state.prevPC; + + for (auto stackFrameIt = state.stack.callStack().rbegin(), + stackBottom = state.stack.callStack().rend(); + stackFrameIt != stackBottom && !kmodule->inMainModule(*kinst->inst()); + kinst = stackFrameIt++->caller) { + } + + if (!kinst) { + return nullptr; + } + + if (llvm::isa(kinst->inst())) { + std::optional> returnLocation = + state.stack.forcedReturnLocation(); + if (returnLocation.has_value()) { + return returnLocation.value(); + } + } + + Path::PathIndex callPathIndex = state.constraints.path().getCurrentIndex(); + return CodeLocation::create(callPathIndex, kinst, kinst->getSourceFilepath(), + kinst->getLine(), kinst->getColumn()); +} + Interpreter *Interpreter::create(LLVMContext &ctx, const InterpreterOptions &opts, InterpreterHandler *ih) { diff --git a/lib/Core/Executor.h b/lib/Core/Executor.h index d6cbb17352..a0a645eba3 100644 --- a/lib/Core/Executor.h +++ b/lib/Core/Executor.h @@ -36,6 +36,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Intrinsics.h" @@ -73,12 +74,14 @@ class AddressManager; class Array; struct Cell; class CodeGraphInfo; +struct CodeLocation; class DistanceCalculator; +struct ErrorEvent; class ExecutionState; class ExternalDispatcher; class Expr; template class ExprHashMap; -class KCallable; +struct KCallable; struct KFunction; struct KInstruction; class KInstIterator; @@ -155,6 +158,8 @@ class Executor : public Interpreter { std::unique_ptr targetCalculator; std::unique_ptr targetManager; + ExprHashMap, llvm::Type *>> constantGepExprBases; + /// Used to track states that have been added during the current /// instructions step. /// \invariant \ref addedStates is a subset of \ref states. @@ -251,6 +256,9 @@ class Executor : public Interpreter { bool hasStateWhichCanReachSomeTarget = false; + /// @brief SARIF report for all exploration paths. + SarifReportJson sarifReport; + /// Return the typeid corresponding to a certain `type_info` ref getEhTypeidFor(ref type_info); @@ -318,7 +326,7 @@ class Executor : public Interpreter { std::vector> &values); MemoryObject *allocate(ExecutionState &state, ref size, bool isLocal, - bool isGlobal, const llvm::Value *allocSite, + bool isGlobal, ref allocSite, size_t allocationAlignment, ref lazyInitializationSource = ref(), unsigned timestamp = 0); @@ -528,6 +536,10 @@ class Executor : public Interpreter { void bindArgument(KFunction *kf, unsigned index, ExecutionState &state, ref value); + // Returns location of ExecutionState::prevPC in given + // source code (i.e. main module). + ref locationOf(const ExecutionState &) const; + /// Evaluates an LLVM constant expression. The optional argument ki /// is the instruction where this constant was encountered, or NULL /// if not applicable/unavailable. @@ -574,7 +586,7 @@ class Executor : public Interpreter { // Determines the \param lastInstruction of the \param state which is not KLEE // internal and returns its KInstruction const KInstruction * - getLastNonKleeInternalInstruction(const ExecutionState &state); + getLastNonKleeInternalInstruction(const ExecutionState &state) const; /// Remove state from queue and delete state void terminateState(ExecutionState &state, @@ -618,8 +630,7 @@ class Executor : public Interpreter { /// Call error handler and terminate state in case of program errors /// (e.g. free()ing globals, out-of-bound accesses) void terminateStateOnProgramError(ExecutionState &state, - const llvm::Twine &message, - StateTerminationType reason, + const ref &reason, const llvm::Twine &longMessage = "", const char *suffix = nullptr); @@ -647,7 +658,7 @@ class Executor : public Interpreter { void reportProgressTowardsTargets() const; /// bindModuleConstants - Initialize the module constant table. - void bindModuleConstants(const llvm::APFloat::roundingMode &rm); + void bindModuleConstants(llvm::APFloat::roundingMode rm); uint64_t updateNameVersion(ExecutionState &state, const std::string &name); @@ -798,6 +809,10 @@ class Executor : public Interpreter { getConstraintLog(const ExecutionState &state, std::string &res, Interpreter::LogType logFormat = Interpreter::STP) override; + void addSARIFReport(const ExecutionState &state) override; + + SarifReportJson getSARIFReport() const override; + void setInitializationGraph(const ExecutionState &state, const std::vector &symbolics, const Assignment &model, KTest &tc); diff --git a/lib/Core/ExecutorUtil.cpp b/lib/Core/ExecutorUtil.cpp index 534f264fee..68603341c0 100644 --- a/lib/Core/ExecutorUtil.cpp +++ b/lib/Core/ExecutorUtil.cpp @@ -20,6 +20,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -374,6 +375,12 @@ ref Executor::evalConstantExpr(const llvm::ConstantExpr *ce, Context::get().getPointerWidth() == 32) { result = cast(FPToX87FP80Ext(result)); } + + if (constantGepExprBases.count(result)) { + constantGepExprBases[result] = {constantGepExprBases[result].first, + ce->getType()}; + } + return result; } @@ -416,6 +423,14 @@ ref Executor::evalConstantExpr(const llvm::ConstantExpr *ce, kmodule->targetData->getTypeAllocSize( ii.getIndexedType()))))); } + + if (constantGepExprBases.count(op1)) { + constantGepExprBases[base] = constantGepExprBases[op1]; + } else { + constantGepExprBases[base] = { + op1, llvm::cast(ce)->getSourceElementType()}; + } + return base; } diff --git a/lib/Core/ExternalDispatcher.h b/lib/Core/ExternalDispatcher.h index 3b6fc95928..e9ed7682d8 100644 --- a/lib/Core/ExternalDispatcher.h +++ b/lib/Core/ExternalDispatcher.h @@ -24,7 +24,7 @@ class LLVMContext; namespace klee { class ExternalDispatcherImpl; -class KCallable; +struct KCallable; class ExternalDispatcher { private: ExternalDispatcherImpl *impl; diff --git a/lib/Core/Memory.cpp b/lib/Core/Memory.cpp index 16a49c6938..33b0fca37f 100644 --- a/lib/Core/Memory.cpp +++ b/lib/Core/Memory.cpp @@ -13,6 +13,7 @@ #include "MemoryManager.h" #include "klee/Core/Context.h" +#include "CodeLocation.h" #include "klee/ADT/BitArray.h" #include "klee/Expr/ArrayCache.h" #include "klee/Expr/Assignment.h" @@ -49,26 +50,29 @@ MemoryObject::~MemoryObject() { parent->markFreed(this); } -void MemoryObject::getAllocInfo(std::string &result) const { +std::string MemoryObject::getAllocInfo() const { + std::string result; llvm::raw_string_ostream info(result); info << "MO" << id << "[" << size << "]"; - if (allocSite) { + if (allocSite && allocSite->source) { + const llvm::Value *allocSiteSource = allocSite->source->unwrap(); info << " allocated at "; - if (const Instruction *i = dyn_cast(allocSite)) { + if (const Instruction *i = dyn_cast(allocSiteSource)) { info << i->getParent()->getParent()->getName() << "():"; info << *i; - } else if (const GlobalValue *gv = dyn_cast(allocSite)) { + } else if (const GlobalValue *gv = dyn_cast(allocSiteSource)) { info << "global:" << gv->getName(); } else { - info << "value:" << *allocSite; + info << "value:" << *allocSiteSource; } } else { info << " (no allocation info)"; } info.flush(); + return result; } /***/ @@ -169,8 +173,7 @@ ref ObjectState::read8(ref offset) const { flushForRead(); if (object && object->size > 4096) { - std::string allocInfo; - object->getAllocInfo(allocInfo); + std::string allocInfo = object->getAllocInfo(); klee_warning_once( nullptr, "Symbolic memory access will send the following array of %d bytes to " @@ -203,8 +206,7 @@ void ObjectState::write8(ref offset, ref value) { flushForWrite(); if (object && object->size > 4096) { - std::string allocInfo; - object->getAllocInfo(allocInfo); + std::string allocInfo = object->getAllocInfo(); klee_warning_once( nullptr, "Symbolic memory access will send the following array of %d bytes to " diff --git a/lib/Core/Memory.h b/lib/Core/Memory.h index 6d3bb3b987..9986f22a7c 100644 --- a/lib/Core/Memory.h +++ b/lib/Core/Memory.h @@ -10,6 +10,7 @@ #ifndef KLEE_MEMORY_H #define KLEE_MEMORY_H +#include "CodeLocation.h" #include "MemoryManager.h" #include "TimingSolver.h" #include "klee/ADT/Ref.h" @@ -90,7 +91,7 @@ class MemoryObject { /// "Location" for which this memory object was allocated. This /// should be either the allocating instruction or the global object /// it was allocated for (or whatever else makes sense). - const llvm::Value *allocSite; + ref allocSite; // DO NOT IMPLEMENT MemoryObject(const MemoryObject &b); @@ -106,7 +107,7 @@ class MemoryObject { MemoryObject( uint64_t _address, unsigned _size, uint64_t alignment, bool _isLocal, bool _isGlobal, bool _isFixed, bool _isLazyInitialized, - const llvm::Value *_allocSite, MemoryManager *_parent, + ref _allocSite, MemoryManager *_parent, ref _addressExpr = nullptr, ref _sizeExpr = nullptr, unsigned _timestamp = 0 /* unused if _isLazyInitialized is false*/) : id(counter++), timestamp(_timestamp), address(_address), @@ -125,7 +126,7 @@ class MemoryObject { ~MemoryObject(); /// Get an identifying string for this allocation. - void getAllocInfo(std::string &result) const; + std::string getAllocInfo() const; void setName(const std::string &_name) const { this->name = _name; } @@ -188,8 +189,8 @@ class MemoryObject { if (size != b.size) return (size < b.size ? -1 : 1); - if (allocSite != b.allocSite) - return (allocSite < b.allocSite ? -1 : 1); + if (allocSite->source != b.allocSite->source) + return (allocSite->source < b.allocSite->source ? -1 : 1); assert(isLazyInitialized == b.isLazyInitialized); return 0; diff --git a/lib/Core/MemoryManager.cpp b/lib/Core/MemoryManager.cpp index 114c134300..4fb40e33ea 100644 --- a/lib/Core/MemoryManager.cpp +++ b/lib/Core/MemoryManager.cpp @@ -130,7 +130,7 @@ MemoryManager::~MemoryManager() { MemoryObject *MemoryManager::allocate(uint64_t size, bool isLocal, bool isGlobal, bool isLazyInitialiazed, - const llvm::Value *allocSite, + ref allocSite, size_t alignment, ref addressExpr, ref sizeExpr, unsigned timestamp, IDType id) { @@ -195,7 +195,7 @@ MemoryObject *MemoryManager::allocate(uint64_t size, bool isLocal, } MemoryObject *MemoryManager::allocateFixed(uint64_t address, uint64_t size, - const llvm::Value *allocSite) { + ref allocSite) { #ifndef NDEBUG for (objects_ty::iterator it = objects.begin(), ie = objects.end(); it != ie; ++it) { diff --git a/lib/Core/MemoryManager.h b/lib/Core/MemoryManager.h index bc31477e2f..98b5045fd4 100644 --- a/lib/Core/MemoryManager.h +++ b/lib/Core/MemoryManager.h @@ -26,6 +26,7 @@ namespace klee { class MemoryObject; class ArrayCache; class AddressManager; +struct CodeLocation; typedef uint64_t IDType; @@ -53,12 +54,12 @@ class MemoryManager { * memory. */ MemoryObject *allocate(uint64_t size, bool isLocal, bool isGlobal, - bool isLazyInitialiazed, const llvm::Value *allocSite, + bool isLazyInitialiazed, ref allocSite, size_t alignment, ref addressExpr = ref(), ref sizeExpr = ref(), unsigned timestamp = 0, IDType id = 0); MemoryObject *allocateFixed(uint64_t address, uint64_t size, - const llvm::Value *allocSite); + ref allocSite); void deallocate(const MemoryObject *mo); void markFreed(MemoryObject *mo); ArrayCache *getArrayCache() const { return arrayCache; } diff --git a/lib/Core/SpecialFunctionHandler.cpp b/lib/Core/SpecialFunctionHandler.cpp index ae4a7b358c..ba762edd0b 100644 --- a/lib/Core/SpecialFunctionHandler.cpp +++ b/lib/Core/SpecialFunctionHandler.cpp @@ -9,6 +9,7 @@ #include "SpecialFunctionHandler.h" +#include "CodeEvent.h" #include "ExecutionState.h" #include "Executor.h" #include "Memory.h" @@ -31,6 +32,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" @@ -340,8 +342,9 @@ void SpecialFunctionHandler::handleAbort(ExecutionState &state, KInstruction *target, std::vector> &arguments) { assert(arguments.size() == 0 && "invalid number of arguments to abort"); - executor.terminateStateOnProgramError(state, "abort failure", - StateTerminationType::Abort); + executor.terminateStateOnProgramError( + state, new ErrorEvent(executor.locationOf(state), + StateTerminationType::Abort, "abort failure")); } void SpecialFunctionHandler::handleExit(ExecutionState &state, @@ -363,8 +366,10 @@ void SpecialFunctionHandler::handleAssert(ExecutionState &state, std::vector> &arguments) { assert(arguments.size() == 3 && "invalid number of arguments to _assert"); executor.terminateStateOnProgramError( - state, "ASSERTION FAIL: " + readStringAtAddress(state, arguments[0]), - StateTerminationType::Assert); + state, + new ErrorEvent(executor.locationOf(state), StateTerminationType::Assert, + "ASSERTION FAIL: " + + readStringAtAddress(state, arguments[0]))); } void SpecialFunctionHandler::handleAssertFail( @@ -373,8 +378,10 @@ void SpecialFunctionHandler::handleAssertFail( assert(arguments.size() == 4 && "invalid number of arguments to __assert_fail"); executor.terminateStateOnProgramError( - state, "ASSERTION FAIL: " + readStringAtAddress(state, arguments[0]), - StateTerminationType::Assert); + state, + new ErrorEvent(executor.locationOf(state), StateTerminationType::Assert, + "ASSERTION FAIL: " + + readStringAtAddress(state, arguments[0]))); } void SpecialFunctionHandler::handleReportError( @@ -385,9 +392,11 @@ void SpecialFunctionHandler::handleReportError( // arguments[0,1,2,3] are file, line, message, suffix executor.terminateStateOnProgramError( - state, readStringAtAddress(state, arguments[2]), - StateTerminationType::ReportError, "", - readStringAtAddress(state, arguments[3]).c_str()); + state, + new ErrorEvent(executor.locationOf(state), + StateTerminationType::ReportError, + readStringAtAddress(state, arguments[2])), + "", readStringAtAddress(state, arguments[3]).c_str()); } void SpecialFunctionHandler::handleNew(ExecutionState &state, @@ -828,7 +837,9 @@ void SpecialFunctionHandler::handleCheckMemoryAccess( cast(address), executor.typeSystemManager->getUnknownType(), idObject)) { executor.terminateStateOnProgramError( - state, "check_memory_access: memory error", StateTerminationType::Ptr, + state, + new ErrorEvent(executor.locationOf(state), StateTerminationType::Ptr, + "check_memory_access: memory error"), executor.getAddressInfo(state, address)); } else { const MemoryObject *mo = state.addressSpace.findObject(idObject).first; @@ -836,8 +847,11 @@ void SpecialFunctionHandler::handleCheckMemoryAccess( address, cast(size)->getZExtValue()); if (!chk->isTrue()) { executor.terminateStateOnProgramError( - state, "check_memory_access: memory error", - StateTerminationType::Ptr, executor.getAddressInfo(state, address)); + state, + new ErrorEvent( + new AllocEvent(mo->allocSite), executor.locationOf(state), + StateTerminationType::Ptr, "check_memory_access: memory error"), + executor.getAddressInfo(state, address)); } } } @@ -865,7 +879,7 @@ void SpecialFunctionHandler::handleDefineFixedObject( uint64_t address = cast(arguments[0])->getZExtValue(); uint64_t size = cast(arguments[1])->getZExtValue(); MemoryObject *mo = - executor.memory->allocateFixed(address, size, state.prevPC->inst()); + executor.memory->allocateFixed(address, size, executor.locationOf(state)); executor.bindObjectInState( state, mo, executor.typeSystemManager->getUnknownType(), false); mo->isUserSpecified = true; // XXX hack; diff --git a/lib/Expr/APFloatEval.cpp b/lib/Expr/APFloatEval.cpp index 49ae758d1c..b3b7329432 100644 --- a/lib/Expr/APFloatEval.cpp +++ b/lib/Expr/APFloatEval.cpp @@ -12,6 +12,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/Support/raw_ostream.h" DISABLE_WARNING_POP diff --git a/lib/Expr/Expr.cpp b/lib/Expr/Expr.cpp index c6fa1b844d..27cc2bcfff 100644 --- a/lib/Expr/Expr.cpp +++ b/lib/Expr/Expr.cpp @@ -21,6 +21,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" #if LLVM_VERSION_CODE >= LLVM_VERSION(13, 0) diff --git a/lib/Module/CMakeLists.txt b/lib/Module/CMakeLists.txt index d185f53a72..65cccdcc68 100644 --- a/lib/Module/CMakeLists.txt +++ b/lib/Module/CMakeLists.txt @@ -20,12 +20,14 @@ set(KLEE_MODULE_COMPONENT_SRCS KModule.cpp KType.cpp KValue.cpp + LocalVarDeclarationFinderPass.cpp LowerSwitch.cpp ModuleUtil.cpp Optimize.cpp OptNone.cpp PhiCleaner.cpp RaiseAsm.cpp + ReturnLocationFinderPass.cpp ReturnSplitter.cpp SarifReport.cpp Target.cpp diff --git a/lib/Module/KInstruction.cpp b/lib/Module/KInstruction.cpp index 97f9002385..2c480feef2 100644 --- a/lib/Module/KInstruction.cpp +++ b/lib/Module/KInstruction.cpp @@ -81,7 +81,7 @@ size_t KInstruction::getLine() const { size_t KInstruction::getColumn() const { auto locationInfo = getLocationInfo(inst()); - return locationInfo.column; + return locationInfo.column.value_or(0); } std::string KInstruction::getSourceFilepath() const { diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp index f7ce474bdb..db6ae7a96e 100644 --- a/lib/Module/KModule.cpp +++ b/lib/Module/KModule.cpp @@ -304,6 +304,10 @@ void KModule::optimiseAndPrepare( // going to be unresolved. We really need to handle the intrinsics // directly I think? legacy::PassManager pm3; + + pm3.add(new ReturnLocationFinderPass()); + pm3.add(new LocalVarDeclarationFinderPass()); + if (opts.Simplify) pm3.add(createCFGSimplificationPass()); switch (SwitchType) { @@ -488,6 +492,10 @@ bool KModule::inMainModule(const llvm::Function &f) { return mainModuleFunctions.count(f.getName().str()) != 0; } +bool KModule::inMainModule(const llvm::Instruction &i) { + return inMainModule(*i.getParent()->getParent()); +} + bool KModule::inMainModule(const GlobalVariable &v) { return mainModuleGlobals.count(v.getName().str()) != 0; } diff --git a/lib/Module/LocalVarDeclarationFinderPass.cpp b/lib/Module/LocalVarDeclarationFinderPass.cpp new file mode 100644 index 0000000000..8c84f7537b --- /dev/null +++ b/lib/Module/LocalVarDeclarationFinderPass.cpp @@ -0,0 +1,48 @@ +//===-- LocalVarDeclarationFinderPass.cpp -----------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "Passes.h" + +#include "klee/Support/CompilerWarning.h" + +DISABLE_WARNING_PUSH +DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +DISABLE_WARNING_POP + +using namespace klee; + +//////////////////////////////////////////////// + +char LocalVarDeclarationFinderPass::ID = 0; + +bool LocalVarDeclarationFinderPass::runOnFunction(llvm::Function &function) { + bool anyChanged = false; + + for (const llvm::BasicBlock &block : function) { + for (const llvm::Instruction &instruction : block) { + if (const llvm::DbgDeclareInst *debugDeclareInstruction = + llvm::dyn_cast(&instruction)) { + llvm::Value *source = debugDeclareInstruction->getAddress(); + if (llvm::Instruction *sourceInstruction = + llvm::dyn_cast(source)) { + sourceInstruction->setDebugLoc( + debugDeclareInstruction->getDebugLoc()); + anyChanged = true; + } + } + } + } + + return anyChanged; +} diff --git a/lib/Module/LocationInfo.cpp b/lib/Module/LocationInfo.cpp index 6395970b08..3f7a3ee845 100644 --- a/lib/Module/LocationInfo.cpp +++ b/lib/Module/LocationInfo.cpp @@ -8,8 +8,9 @@ //===----------------------------------------------------------------------===// #include "klee/Module/LocationInfo.h" -#include "klee/Support/CompilerWarning.h" +#include "klee/Module/SarifReport.h" +#include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS #include "llvm/ADT/SmallVector.h" @@ -23,17 +24,42 @@ DISABLE_WARNING_DEPRECATED_DECLARATIONS #include "llvm/Support/FormattedStream.h" DISABLE_WARNING_POP +#include +#include + namespace klee { +PhysicalLocationJson LocationInfo::serialize() const { + // clang-format off + return PhysicalLocationJson{ + { + ArtifactLocationJson { + {file} + } + }, + { + RegionJson { + {line}, + std::nullopt, + column, + std::nullopt + } + } + }; + // clang-format on +} + +//////////////////////////////////////////////////////////////// + LocationInfo getLocationInfo(const llvm::Function *func) { const auto dsub = func->getSubprogram(); if (dsub != nullptr) { auto path = dsub->getFilename(); - return {path.str(), dsub->getLine(), 0}; // TODO why not use column here? + return {path.str(), dsub->getLine(), {}}; } - return {"", 0, 0}; + return {"", 0, {}}; } LocationInfo getLocationInfo(const llvm::Instruction *inst) { @@ -55,7 +81,7 @@ LocationInfo getLocationInfo(const llvm::Instruction *inst) { column = LexicalBlock->getColumn(); } } - return {full_path.str(), line, column}; + return {full_path.str(), line, {column}}; } return getLocationInfo(inst->getParent()->getParent()); @@ -72,15 +98,20 @@ LocationInfo getLocationInfo(const llvm::GlobalVariable *globalVar) { // Return location from any debug info for global variable. if (const llvm::DIGlobalVariable *debugInfoGlobalVar = debugInfoEntry->getVariable()) { - // Assume that global variable declared at line 0. return {debugInfoGlobalVar->getFilename().str(), - debugInfoGlobalVar->getLine(), 0}; + debugInfoGlobalVar->getLine(), + {}}; } } + // For `extern` variables return `external` file. + if (globalVar->hasExternalLinkage()) { + return {"external", 0, {}}; + } + // Fallback to empty location if there is no appropriate debug // info. - return {"", 0, 0}; + return {"", 0, {}}; } } // namespace klee diff --git a/lib/Module/Passes.h b/lib/Module/Passes.h index 01983e0b85..432f030a51 100644 --- a/lib/Module/Passes.h +++ b/lib/Module/Passes.h @@ -220,6 +220,46 @@ class ReturnSplitter : public llvm::FunctionPass { ReturnSplitter() : llvm::FunctionPass(ID) {} bool runOnFunction(llvm::Function &F) override; }; + +/// @brief Pass able to find the actual location of +/// `return` statements in source code. +/// +/// @details For function with multiple `return` statements +/// clang compiler generates LLVM IR with exactly one `ret` +/// instruction. `return` statements transform to the: +/// ``` +/// ret_reg = val +/// br ret_block +/// ... +/// ret_block: +/// ret_val = load ret_reg +/// ret ret_val +/// ``` +/// This pass finds such constructions and marks +/// `br ret_block` with `md_ret` metadata. +class ReturnLocationFinderPass : public llvm::FunctionPass { +public: + static char ID; + ReturnLocationFinderPass() : llvm::FunctionPass(ID) {} + bool runOnFunction(llvm::Function &) override; +}; + +/// @brief Pass able to find line in source code with +/// declaration of local variable. +/// +/// @details "Construction" of local variable in LLVM IR +/// is represented as allocation of memory in the beginning +/// of each function and subsequent call of `llvm.dbg.declare` +/// function on allocated memory. This pass moves `!dbg` infos +/// from calls to mentioned functions to the corresponding `alloca` +/// instructions. +class LocalVarDeclarationFinderPass : public llvm::FunctionPass { +public: + static char ID; + LocalVarDeclarationFinderPass() : llvm::FunctionPass(ID) {} + bool runOnFunction(llvm::Function &) override; +}; + } // namespace klee #endif /* KLEE_PASSES_H */ diff --git a/lib/Module/ReturnLocationFinderPass.cpp b/lib/Module/ReturnLocationFinderPass.cpp new file mode 100644 index 0000000000..77c79ec4ae --- /dev/null +++ b/lib/Module/ReturnLocationFinderPass.cpp @@ -0,0 +1,91 @@ +//===-- ReturnLocationFinderPass.cpp ----------------------------*- C++ -*-===// +// +// The KLEEF Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "Passes.h" + +#include "klee/Support/CompilerWarning.h" + +DISABLE_WARNING_PUSH +DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Casting.h" +DISABLE_WARNING_POP + +#include + +using namespace klee; + +/// @brief Determines whether given block is a "return" block. +/// @param block block to check. +/// @return true iff block is a "return" block. +/// +/// @details Check is based on form of return block: either +/// just a single `ret` instruction without return value or +/// exactly two instructions `load` and `ret` where `ret` +/// return value loaded by `load`. +static bool isReturnBlock(const llvm::BasicBlock *block) { + bool shouldReturnConstant = false; + + switch (block->size()) { + case 2: { + if (!llvm::isa(&block->front())) { + return false; + } + shouldReturnConstant = true; + [[fallthrough]]; + } + case 1: { + const llvm::ReturnInst *returnInstruction = + llvm::dyn_cast(&block->back()); + if (!returnInstruction) { + return false; + } + // `isa_and_nonnull` required as return value may not exist + return shouldReturnConstant == llvm::isa_and_nonnull( + returnInstruction->getReturnValue()); + } + default: { + return false; + } + } +} + +//////////////////////////////////////////////// + +char ReturnLocationFinderPass::ID = 0; + +bool ReturnLocationFinderPass::runOnFunction(llvm::Function &function) { + llvm::BasicBlock &terminatorBlock = function.back(); + + if (!isReturnBlock(&terminatorBlock)) { + return false; + } + + for (auto predecessorBlock : predecessors(&terminatorBlock)) { + llvm::Instruction *predecessorTerminator = + predecessorBlock->getTerminator(); + // Predecessor instruction should be a `br` instruction. + // Sometimes optimizer may generate `switch` instruction + // with branch at return block. But it is not mapped + // to a `return` statement in source code. + if (llvm::isa(predecessorTerminator)) { + // Attach metadata to the instruction. + llvm::MDTuple *predecessorMetadataTuple = + llvm::MDNode::get(predecessorTerminator->getContext(), {}); + predecessorTerminator->setMetadata("md.ret", predecessorMetadataTuple); + } + } + + return true; +} diff --git a/lib/Module/SarifReport.cpp b/lib/Module/SarifReport.cpp index 377baf888d..728e775c24 100644 --- a/lib/Module/SarifReport.cpp +++ b/lib/Module/SarifReport.cpp @@ -373,7 +373,7 @@ bool Location::isInside(const llvm::Function *f, locInfo.line >= startLine && locInfo.column <= *endColumn && locInfo.column >= *startColumn && origInsts.at(locInfo.line) - .at(locInfo.column) + .at(locInfo.column.value_or(0)) .count(inst.getOpcode()) != 0) { return true; } diff --git a/lib/Solver/BitwuzlaBuilder.cpp b/lib/Solver/BitwuzlaBuilder.cpp index 2c47a1440a..1849d29605 100644 --- a/lib/Solver/BitwuzlaBuilder.cpp +++ b/lib/Solver/BitwuzlaBuilder.cpp @@ -23,6 +23,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" DISABLE_WARNING_POP diff --git a/lib/Solver/BitwuzlaBuilder.h b/lib/Solver/BitwuzlaBuilder.h index 9119832f3f..4e260f832a 100644 --- a/lib/Solver/BitwuzlaBuilder.h +++ b/lib/Solver/BitwuzlaBuilder.h @@ -14,6 +14,12 @@ #include "klee/Expr/ArrayExprHash.h" #include "klee/Expr/ExprHashMap.h" +#include "klee/Support/CompilerWarning.h" +DISABLE_WARNING_PUSH +DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" +DISABLE_WARNING_POP + #include #include diff --git a/lib/Solver/BitwuzlaSolver.cpp b/lib/Solver/BitwuzlaSolver.cpp index 8c7263f0d5..b3fd6ebddb 100644 --- a/lib/Solver/BitwuzlaSolver.cpp +++ b/lib/Solver/BitwuzlaSolver.cpp @@ -530,7 +530,9 @@ bool BitwuzlaSolverImpl::internalRunSolver( timeoutInMicroSeconds = UINT_MAX; BitwuzlaTerminator terminator(timeoutInMicroSeconds); - struct sigaction action, old_action; + struct sigaction action {}; + struct sigaction old_action {}; + action.sa_handler = signal_handler; action.sa_flags = 0; sigaction(SIGINT, &action, &old_action); diff --git a/lib/Solver/Z3BitvectorBuilder.cpp b/lib/Solver/Z3BitvectorBuilder.cpp index 82e2ba206f..881f41a701 100644 --- a/lib/Solver/Z3BitvectorBuilder.cpp +++ b/lib/Solver/Z3BitvectorBuilder.cpp @@ -22,6 +22,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" DISABLE_WARNING_POP diff --git a/lib/Solver/Z3BitvectorBuilder.h b/lib/Solver/Z3BitvectorBuilder.h index bfc6c6412d..d6f9a3d3bd 100644 --- a/lib/Solver/Z3BitvectorBuilder.h +++ b/lib/Solver/Z3BitvectorBuilder.h @@ -19,6 +19,12 @@ #include #include +#include "klee/Support/CompilerWarning.h" +DISABLE_WARNING_PUSH +DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" +DISABLE_WARNING_POP + namespace klee { class Z3BitvectorBuilder : public Z3Builder { private: diff --git a/lib/Support/RoundingModeUtil.cpp b/lib/Support/RoundingModeUtil.cpp index d8b8d0d81a..70428cf56c 100644 --- a/lib/Support/RoundingModeUtil.cpp +++ b/lib/Support/RoundingModeUtil.cpp @@ -11,6 +11,7 @@ #include "klee/Support/CompilerWarning.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/Support/ErrorHandling.h" DISABLE_WARNING_POP diff --git a/test/Feature/SymbolicSizes/FirstAndLastElements.c b/test/Feature/SymbolicSizes/FirstAndLastElements.c index 10ba4c5784..ff9f612e77 100644 --- a/test/Feature/SymbolicSizes/FirstAndLastElements.c +++ b/test/Feature/SymbolicSizes/FirstAndLastElements.c @@ -1,6 +1,6 @@ // RUN: %clang %s -g -emit-llvm %O0opt -c -o %t1.bc // RUN: rm -rf %t.klee-out -// RUN: %klee --output-dir=%t.klee-out --check-out-of-memory --use-sym-size-alloc --use-merged-pointer-dereference=true %t1.bc 2>&1 | FileCheck %s +// RUN: %klee --output-dir=%t.klee-out --check-out-of-memory --use-sym-size-alloc --use-merged-pointer-dereference=true --max-sym-alloc=128 %t1.bc 2>&1 | FileCheck %s #include "klee/klee.h" #include diff --git a/test/SARIF/GSAC/ContextSensitive/ContextSensitive.c b/test/SARIF/GSAC/ContextSensitive/ContextSensitive.c new file mode 100644 index 0000000000..3dcf6fffdf --- /dev/null +++ b/test/SARIF/GSAC/ContextSensitive/ContextSensitive.c @@ -0,0 +1,36 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ +#include + +int *foo(int size) { + int *data = (int *)malloc(size * sizeof(int)); + return data; +} + +int main() { + int size = 5; + int *first_data = foo(size++); // Call of 'foo(5)' + int *second_data = foo(size--); // Call of 'foo(6)' + + for (int i = 0; i <= size; i++) { + // Length of 'first_data' is 5 and 'size' also is 5 + first_data[i] = i; // buffer-overflow + } + for (int i = 0; i <= size; i++) { + // Length of 'second_data' is 6 + second_data[i] = i; + } + free(first_data); + free(second_data); +} diff --git a/test/SARIF/GSAC/ContextSensitive/pattern.sarif b/test/SARIF/GSAC/ContextSensitive/pattern.sarif new file mode 100644 index 0000000000..74331b99a3 --- /dev/null +++ b/test/SARIF/GSAC/ContextSensitive/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "ContextSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 22, + "startLine": 17 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "ContextSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 19, + "startLine": 28 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/EASY02/EASY02.c b/test/SARIF/GSAC/EASY02/EASY02.c new file mode 100644 index 0000000000..e911767c7c --- /dev/null +++ b/test/SARIF/GSAC/EASY02/EASY02.c @@ -0,0 +1,23 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#include + +int main() { + int size = 10; + int *arr = malloc(size * sizeof(int)); // Memory allocation + // Access is out of bounds + arr[size] = 4; // buffer-overflow + free(arr); +} diff --git a/test/SARIF/GSAC/EASY02/pattern.sarif b/test/SARIF/GSAC/EASY02/pattern.sarif new file mode 100644 index 0000000000..6afb225f75 --- /dev/null +++ b/test/SARIF/GSAC/EASY02/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "EASY02.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 14, + "startLine": 19 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "EASY02.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 21 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/EASY02_fix/EASY02_fix.c b/test/SARIF/GSAC/EASY02_fix/EASY02_fix.c new file mode 100644 index 0000000000..6a46dcb703 --- /dev/null +++ b/test/SARIF/GSAC/EASY02_fix/EASY02_fix.c @@ -0,0 +1,22 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#include + +int main() { + int size = 10; + int *arr = malloc(size * sizeof(int)); + arr[size - 1] = 4; + free(arr); +} diff --git a/test/SARIF/GSAC/EASY03/EASY03.c b/test/SARIF/GSAC/EASY03/EASY03.c new file mode 100644 index 0000000000..fa12dbc9e1 --- /dev/null +++ b/test/SARIF/GSAC/EASY03/EASY03.c @@ -0,0 +1,26 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#include + +int main() { + int *data; + int size = 5; + data = malloc(size * sizeof(int)); // Memory allocation + for (int i = 0; i <= size; i++) { // Last iteration of this cycle is (size + 1)_th + // In the last iteration access is out of bounds + data[i] = i; // buffer-overflow + } + free(data); +} diff --git a/test/SARIF/GSAC/EASY03/pattern.sarif b/test/SARIF/GSAC/EASY03/pattern.sarif new file mode 100644 index 0000000000..64373d487b --- /dev/null +++ b/test/SARIF/GSAC/EASY03/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "EASY03.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 10, + "startLine": 20 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "EASY03.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 23 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/EASY03_fix/EASY03_fix.c b/test/SARIF/GSAC/EASY03_fix/EASY03_fix.c new file mode 100644 index 0000000000..9aefc7c8ae --- /dev/null +++ b/test/SARIF/GSAC/EASY03_fix/EASY03_fix.c @@ -0,0 +1,25 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#include + +int main() { + int *data; + int size = 5; + data = malloc(size * sizeof(int)); + for (int i = 0; i < size; i++) { + data[i] = i; + } + free(data); +} diff --git a/test/SARIF/GSAC/FieldSensitive/FieldSensitive.c b/test/SARIF/GSAC/FieldSensitive/FieldSensitive.c new file mode 100644 index 0000000000..64bb519fa3 --- /dev/null +++ b/test/SARIF/GSAC/FieldSensitive/FieldSensitive.c @@ -0,0 +1,41 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#include + +struct Data { + int *first_data; + int *second_data; +}; + +void foo(struct Data *data, int size) { + data->second_data = (int *)malloc(size * sizeof(int)); // Allocated 20 bytes of memory + size++; + data->first_data = (int *)malloc(size * sizeof(int)); // Allocated 24 bytes of memory +} + +int main() { + int size = 5; + struct Data data; + foo(&data, size); + for (int i = 0; i <= size; i++) { + data.first_data[i] = i; + } + // Length of 'second_data' is 5 and 'size' also is 5 + for (int i = 0; i <= size; i++) { + data.second_data[i] = i; // buffer-overflow + } + free(data.first_data); + free(data.second_data); +} diff --git a/test/SARIF/GSAC/FieldSensitive/pattern.sarif b/test/SARIF/GSAC/FieldSensitive/pattern.sarif new file mode 100644 index 0000000000..d4e1f7a811 --- /dev/null +++ b/test/SARIF/GSAC/FieldSensitive/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "FieldSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 30, + "startLine": 23 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "FieldSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 25, + "startLine": 37 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/FlowSensitive/FlowSensitive.c b/test/SARIF/GSAC/FlowSensitive/FlowSensitive.c new file mode 100644 index 0000000000..b3a1247dee --- /dev/null +++ b/test/SARIF/GSAC/FlowSensitive/FlowSensitive.c @@ -0,0 +1,54 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * CVE-2022-23462 + */ + +#include +#include + +#define IWNUMBUF_SIZE 70 +#define NEWBUF_SIZE 32 + +void iwjson_ftoa(long double val, char *buf, size_t *out_len) { + // If size of 'buf' is 32 which is less than 64, then will be buffer-overflow + int len = snprintf(buf, 64, "%.8Lf", val); + if (len <= 0) { + buf[0] = '\0'; + *out_len = 0; + return; + } + while (len > 0 && buf[len - 1] == '0') { + buf[len - 1] = '\0'; + len--; + } + if ((len > 0) && (buf[len - 1] == '.')) { + buf[len - 1] = '\0'; + len--; + } + *out_len = (size_t)len; +} + +int main() { + char buf[IWNUMBUF_SIZE]; + char new_buf[NEWBUF_SIZE]; + char *alias_to_buf = buf; + size_t *out_len = malloc(sizeof(size_t)); + *out_len = 50; + iwjson_ftoa(12345678912345678912345.1234, alias_to_buf, out_len); + alias_to_buf = new_buf; + iwjson_ftoa(12345678912345678912345.1234, alias_to_buf, out_len); + free(out_len); +} diff --git a/test/SARIF/GSAC/FlowSensitive/pattern.sarif b/test/SARIF/GSAC/FlowSensitive/pattern.sarif new file mode 100644 index 0000000000..690c186335 --- /dev/null +++ b/test/SARIF/GSAC/FlowSensitive/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Local memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "FlowSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 8, + "startLine": 46 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "FlowSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 27 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/HARD01/HARD01.c b/test/SARIF/GSAC/HARD01/HARD01.c new file mode 100644 index 0000000000..c6d9403cd0 --- /dev/null +++ b/test/SARIF/GSAC/HARD01/HARD01.c @@ -0,0 +1,49 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-23462 + */ + +#include +#include + +#define IWNUMBUF_SIZE 32 +char buf[IWNUMBUF_SIZE]; + +void iwjson_ftoa(long double val, size_t *out_len, size_t buf_size) { + // Length of 'buf' is 32. But size of 'maxlen' in 'snprintf' is 64 + int len = snprintf(buf, buf_size, "%.8Lf", val); // buffer overflow + if (len <= 0) { + buf[0] = '\0'; + *out_len = 0; + return; + } + while (len > 0 && buf[len - 1] == '0') { + buf[len - 1] = '\0'; + len--; + } + if ((len > 0) && (buf[len - 1] == '.')) { + buf[len - 1] = '\0'; + len--; + } + *out_len = (size_t)len; +} + +int main() { + size_t *out_len = malloc(sizeof(size_t)); + *out_len = 50; + iwjson_ftoa(12345678912345678912345.1234, out_len, 64); + free(out_len); +} diff --git a/test/SARIF/GSAC/HARD01/pattern.sarif b/test/SARIF/GSAC/HARD01/pattern.sarif new file mode 100644 index 0000000000..f4a221800a --- /dev/null +++ b/test/SARIF/GSAC/HARD01/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Global memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "HARD01.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": null, + "startLine": 23 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "HARD01.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 27 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/HARD01_fix/HARD01_fix.c b/test/SARIF/GSAC/HARD01_fix/HARD01_fix.c new file mode 100644 index 0000000000..a3163a6515 --- /dev/null +++ b/test/SARIF/GSAC/HARD01_fix/HARD01_fix.c @@ -0,0 +1,48 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-23462 (fixed) + */ + +#include +#include + +#define IWNUMBUF_SIZE 32 +char buf[IWNUMBUF_SIZE]; + +void iwjson_ftoa(long double val, size_t *out_len, size_t buf_size) { + int len = snprintf(buf, buf_size, "%.8Lf", val); + if (len <= 0) { + buf[0] = '\0'; + *out_len = 0; + return; + } + while (len > 0 && buf[len - 1] == '0') { + buf[len - 1] = '\0'; + len--; + } + if ((len > 0) && (buf[len - 1] == '.')) { + buf[len - 1] = '\0'; + len--; + } + *out_len = (size_t)len; +} + +int main() { + size_t *out_len = malloc(sizeof(size_t)); + *out_len = 50; + iwjson_ftoa(12345678912345678912345.1234, out_len, IWNUMBUF_SIZE); + free(out_len); +} diff --git a/test/SARIF/GSAC/HARD02/HARD02.c b/test/SARIF/GSAC/HARD02/HARD02.c new file mode 100644 index 0000000000..e0373bca5b --- /dev/null +++ b/test/SARIF/GSAC/HARD02/HARD02.c @@ -0,0 +1,47 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-26768 + */ +#include "HARD02.h" +#include +#include +#include + +static char initialLogFileName[FILENAMESIZE] = ""; + +void EXPORT_CALL lou_logFile(const char *fileName) { + if (fileName == NULL || fileName[0] == 0) + return; + if (initialLogFileName[0] == 0) + // Size of 'fileName' is greater than 'FILENAMESIZE' + strcpy(initialLogFileName, fileName); // buffer-overflow + logFile = fopen(fileName, "a"); + if (logFile == NULL && initialLogFileName[0] != 0) + logFile = fopen(initialLogFileName, "a"); + if (logFile == NULL) { + fprintf(stderr, "Cannot open log file %s\n", fileName); + logFile = stderr; + } +} + +int main() { + const int sz = 260; + char *fileName = malloc(sz + 1); // Allocate 261 bytes of memory + memset(fileName, 's', sz); + fileName[sz] = '\0'; + lou_logFile(fileName); + free(fileName); +} diff --git a/test/SARIF/GSAC/HARD02/HARD02.h b/test/SARIF/GSAC/HARD02/HARD02.h new file mode 100644 index 0000000000..55f44e58f9 --- /dev/null +++ b/test/SARIF/GSAC/HARD02/HARD02.h @@ -0,0 +1,35 @@ +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#ifndef HARD02_H +#define HARD02_H + +#include + +#define EXPORT_CALL + +#ifndef __FILE_defined +#define __FILE_defined 1 + +struct _IO_FILE; + +typedef struct _IO_FILE FILE; + +#endif + +extern FILE *stderr; +#define stderr stderr + +#define FILENAMESIZE 256 + +static FILE *logFile = NULL; + +void EXPORT_CALL lou_logFile(const char *fileName); + +#endif // HARD02_H diff --git a/test/SARIF/GSAC/HARD02/pattern.sarif b/test/SARIF/GSAC/HARD02/pattern.sarif new file mode 100644 index 0000000000..907c2b8eb6 --- /dev/null +++ b/test/SARIF/GSAC/HARD02/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Global memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "HARD02.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": null, + "startLine": 23 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "HARD02.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 5, + "startLine": 30 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/HARD02_fix/HARD02_fix.c b/test/SARIF/GSAC/HARD02_fix/HARD02_fix.c new file mode 100644 index 0000000000..f23361c49e --- /dev/null +++ b/test/SARIF/GSAC/HARD02_fix/HARD02_fix.c @@ -0,0 +1,46 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-26768 (fixed) + */ +#include "HARD02_fix.h" +#include +#include +#include + +static char initialLogFileName[FILENAMESIZE] = ""; + +void EXPORT_CALL lou_logFile(const char *fileName) { + if (fileName == NULL || fileName[0] == 0 || strlen(fileName) >= FILENAMESIZE) + return; + if (initialLogFileName[0] == 0) + strcpy(initialLogFileName, fileName); + logFile = fopen(fileName, "a"); + if (logFile == NULL && initialLogFileName[0] != 0) + logFile = fopen(initialLogFileName, "a"); + if (logFile == NULL) { + fprintf(stderr, "Cannot open log file %s\n", fileName); + logFile = stderr; + } +} + +int main() { + const int sz = 260; + char *fileName = malloc(sz + 1); + memset(fileName, 's', sz); + fileName[sz] = '\0'; + lou_logFile(fileName); + free(fileName); +} diff --git a/test/SARIF/GSAC/HARD02_fix/HARD02_fix.h b/test/SARIF/GSAC/HARD02_fix/HARD02_fix.h new file mode 100644 index 0000000000..3a495e6d8b --- /dev/null +++ b/test/SARIF/GSAC/HARD02_fix/HARD02_fix.h @@ -0,0 +1,35 @@ +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +#ifndef HARD02_FIX_H +#define HARD02_FIX_H + +#include + +#define EXPORT_CALL + +#ifndef __FILE_defined +#define __FILE_defined 1 + +struct _IO_FILE; + +typedef struct _IO_FILE FILE; + +#endif + +extern FILE *stderr; +#define stderr stderr + +#define FILENAMESIZE 256 + +static FILE *logFile = NULL; + +void EXPORT_CALL lou_logFile(const char *fileName); + +#endif // HARD02_FIX_H diff --git a/test/SARIF/GSAC/MEDIUM01/MEDIUM01.c b/test/SARIF/GSAC/MEDIUM01/MEDIUM01.c new file mode 100644 index 0000000000..3593408a94 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM01/MEDIUM01.c @@ -0,0 +1,48 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-0185 + */ + +#include +#include +#include + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ULL << PAGE_SHIFT) + +int foo(unsigned int size, const char *key) { + size_t len = 3; + + if (len > PAGE_SIZE - 2 - size) { + printf("Too large\n"); + return -1; + } + + // 'strlen' can't find '\0' + len = strlen(key); // buffer-overflow + return len; +} + +int main() { + unsigned int size = 4294967295; + char *key = malloc(10); + // After 'memcpy()' 'key' doesn't have a '\0' symbol + memcpy(key, "asddds4323", 10); + + foo(size, key); + + free(key); +} diff --git a/test/SARIF/GSAC/MEDIUM01/pattern.sarif b/test/SARIF/GSAC/MEDIUM01/pattern.sarif new file mode 100644 index 0000000000..d12a5daab4 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM01/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM01.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 15, + "startLine": 41 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM01.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 9, + "startLine": 35 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/MEDIUM01_fix/MEDIUM01_fix.c b/test/SARIF/GSAC/MEDIUM01_fix/MEDIUM01_fix.c new file mode 100644 index 0000000000..6b7a288e96 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM01_fix/MEDIUM01_fix.c @@ -0,0 +1,46 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-0185 (fixed) + */ + +#include +#include +#include + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ULL << PAGE_SHIFT) + +int foo(unsigned int size, const char *key) { + size_t len = 3; + + if (size + len + 2 > PAGE_SIZE) { + printf("Too large\n"); + return -1; + } + + len = strlen(key); + return len; +} + +int main() { + unsigned int size = 4294967295; + char *key = malloc(10); + memcpy(key, "asddds432", 10); + + foo(size, key); + + free(key); +} diff --git a/test/SARIF/GSAC/MEDIUM02/MEDIUM02.c b/test/SARIF/GSAC/MEDIUM02/MEDIUM02.c new file mode 100644 index 0000000000..9eb6eab10a --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM02/MEDIUM02.c @@ -0,0 +1,38 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-3077 + */ + +#include +#include + +void foo(int *data) { + unsigned char buffer[32 + 16]; + // Access to 'data' + 4 + 'data[0]' is out of bounds + memcpy(&buffer[1], &data[1], data[0]); // buffer-overflow +} + +int main() { + int *data = (int *)malloc(4 * sizeof(int)); + data[0] = 13; + data[1] = 2; + data[2] = 3; + data[3] = 5; + + foo(data); + + free(data); +} diff --git a/test/SARIF/GSAC/MEDIUM02/pattern.sarif b/test/SARIF/GSAC/MEDIUM02/pattern.sarif new file mode 100644 index 0000000000..0b42928d8b --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM02/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM02.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 22, + "startLine": 29 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM02.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 3, + "startLine": 25 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/MEDIUM02_fix/MEDIUM02_fix.c b/test/SARIF/GSAC/MEDIUM02_fix/MEDIUM02_fix.c new file mode 100644 index 0000000000..b75146c467 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM02_fix/MEDIUM02_fix.c @@ -0,0 +1,40 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-3077 (fixed) + */ + +#include +#include + +void foo(int *data) { + unsigned char buffer[32 + 16]; + if (data[0] > 12) + return; + + memcpy(&buffer[1], &data[1], data[0]); +} + +int main() { + int *data = (int *)malloc(4 * sizeof(int)); + data[0] = 13; + data[1] = 2; + data[2] = 3; + data[3] = 5; + + foo(data); + + free(data); +} diff --git a/test/SARIF/GSAC/MEDIUM03/MEDIUM03.c b/test/SARIF/GSAC/MEDIUM03/MEDIUM03.c new file mode 100644 index 0000000000..ae785a323a --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM03/MEDIUM03.c @@ -0,0 +1,79 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2023-0819 + */ + +#include +#include +#include + +struct Info { + unsigned char *data; + int size; +}; + +typedef struct { + unsigned short year; + unsigned char month; + unsigned char day; + unsigned char hour; + unsigned char minute; + unsigned char second; +} TIME; + +void foo(struct Info info) { + unsigned char *data = + info.data; // 'data' is an array with unsigned char elements. With 'info.size' length. + int data_size = info.size; + TIME time_table; + + unsigned int date, yp, mp, k; + + if (data_size != 5) { + printf("Corrupted size\n"); + } + + date = data[0] * 256 + data[1]; + yp = (unsigned int)((date - 15078.2) / 365.25); + mp = (unsigned int)((date - 14956.1 - (unsigned int)(yp * 365.25)) / 30.6001); + time_table.day = + (unsigned int)(date - 14956 - (unsigned int)(yp * 365.25) - (unsigned int)(mp * 30.6001)); + if (mp == 14 || mp == 15) + k = 1; + else + k = 0; + time_table.year = yp + k + 1900; + time_table.month = mp - 1 - k * 12; + + time_table.hour = 10 * ((data[2] & 0xf0) >> 4) + (data[2] & 0x0f); + time_table.minute = 10 * ((data[3] & 0xf0) >> 4) + (data[3] & 0x0f); + // Access to 'data' is out of bounds + time_table.second = 10 * ((data[4] & 0xf0) >> 4) + (data[4] & 0x0f); // buffer-overflow + + printf("year: %d, month: %d, day: %d, hour: %d, minute: %d, second %d", time_table.year, + time_table.month, time_table.day, time_table.hour, time_table.minute, time_table.second); +} + +int main() { + struct Info info; + info.data = malloc(4); // Memory allocation + memcpy(info.data, "asdf", 4); + info.size = 4; + + foo(info); + + free(info.data); +} diff --git a/test/SARIF/GSAC/MEDIUM03/pattern.sarif b/test/SARIF/GSAC/MEDIUM03/pattern.sarif new file mode 100644 index 0000000000..e7898ff874 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM03/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM03.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 15, + "startLine": 72 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM03.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 30, + "startLine": 64 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/MEDIUM03_fix/MEDIUM03_fix.c b/test/SARIF/GSAC/MEDIUM03_fix/MEDIUM03_fix.c new file mode 100644 index 0000000000..dae0b506fb --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM03_fix/MEDIUM03_fix.c @@ -0,0 +1,81 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2023-0819 (fixed) + */ + +#include +#include +#include + +struct Info { + unsigned char *data; + int size; +}; + +typedef struct { + unsigned short year; + unsigned char month; + unsigned char day; + unsigned char hour; + unsigned char minute; + unsigned char second; +} TIME; + +void foo(struct Info info) { + unsigned char *data = info.data; + int data_size = info.size; + TIME time_table; + + unsigned int date, yp, mp, k; + + if (data_size != 5) { + printf("Corrupted size\n"); + } + + if (data_size < 5) { + return; + } + + date = data[0] * 256 + data[1]; + yp = (unsigned int)((date - 15078.2) / 365.25); + mp = (unsigned int)((date - 14956.1 - (unsigned int)(yp * 365.25)) / 30.6001); + time_table.day = + (unsigned int)(date - 14956 - (unsigned int)(yp * 365.25) - (unsigned int)(mp * 30.6001)); + if (mp == 14 || mp == 15) + k = 1; + else + k = 0; + time_table.year = yp + k + 1900; + time_table.month = mp - 1 - k * 12; + + time_table.hour = 10 * ((data[2] & 0xf0) >> 4) + (data[2] & 0x0f); + time_table.minute = 10 * ((data[3] & 0xf0) >> 4) + (data[3] & 0x0f); + time_table.second = 10 * ((data[4] & 0xf0) >> 4) + (data[4] & 0x0f); + + printf("year: %d, month: %d, day: %d, hour: %d, minute: %d, second %d", time_table.year, + time_table.month, time_table.day, time_table.hour, time_table.minute, time_table.second); +} + +int main() { + struct Info info; + info.data = malloc(4); + memcpy(info.data, "asdf", 4); + info.size = 4; + + foo(info); + + free(info.data); +} diff --git a/test/SARIF/GSAC/MEDIUM04/MEDIUM04.c b/test/SARIF/GSAC/MEDIUM04/MEDIUM04.c new file mode 100644 index 0000000000..a7ca35d6bf --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM04/MEDIUM04.c @@ -0,0 +1,38 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2023-38559 + */ +#include + +void devn_pcx_write_rle(const char *from, const char *end, int step) { + while (from < end) { + char data = *from; + + from += step; + // Access to 'from' is out of range + if (data != *from || from == end) { // buffer-overflow + return; + } + from += step; + } +} + +int main() { + char *a = malloc(sizeof(char)); + a[0] = 'a'; + devn_pcx_write_rle(a, a + 4, 4); + free(a); +} diff --git a/test/SARIF/GSAC/MEDIUM04/pattern.sarif b/test/SARIF/GSAC/MEDIUM04/pattern.sarif new file mode 100644 index 0000000000..b84b02d64d --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM04/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM04.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 34 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM04.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 17, + "startLine": 26 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/MEDIUM04_fix/MEDIUM04_fix.c b/test/SARIF/GSAC/MEDIUM04_fix/MEDIUM04_fix.c new file mode 100644 index 0000000000..7c164e7f28 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM04_fix/MEDIUM04_fix.c @@ -0,0 +1,37 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2023-38559 + */ +#include + +void devn_pcx_write_rle(const char *from, const char *end, int step) { + while (from < end) { + char data = *from; + + from += step; + if (from >= end || data != *from) { + return; + } + from += step; + } +} + +int main() { + char *a = malloc(sizeof(char)); + a[0] = 'a'; + devn_pcx_write_rle(a, a + 4, 4); + free(a); +} diff --git a/test/SARIF/GSAC/MEDIUM05/MEDIUM05.c b/test/SARIF/GSAC/MEDIUM05/MEDIUM05.c new file mode 100644 index 0000000000..b5e3717411 --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM05/MEDIUM05.c @@ -0,0 +1,49 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-23462 + */ + +#include +#include + +#define IWNUMBUF_SIZE 32 + +void iwjson_ftoa(long double val, char buf[static IWNUMBUF_SIZE], size_t *out_len) { + // Size of 'buf' is 32 which is less than 64 + int len = snprintf(buf, 64, "%.8Lf", val); // buffer-overflow + if (len <= 0) { + buf[0] = '\0'; + *out_len = 0; + return; + } + while (len > 0 && buf[len - 1] == '0') { + buf[len - 1] = '\0'; + len--; + } + if ((len > 0) && (buf[len - 1] == '.')) { + buf[len - 1] = '\0'; + len--; + } + *out_len = (size_t)len; +} + +int main() { + char buf[IWNUMBUF_SIZE]; + size_t *out_len = malloc(sizeof(size_t)); + *out_len = 50; + iwjson_ftoa(12345678912345678912345.1234, buf, out_len); + free(out_len); +} diff --git a/test/SARIF/GSAC/MEDIUM05/pattern.sarif b/test/SARIF/GSAC/MEDIUM05/pattern.sarif new file mode 100644 index 0000000000..a8561c284c --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM05/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Local memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM05.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 8, + "startLine": 44 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "MEDIUM05.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 26 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/GSAC/MEDIUM05_fix/MEDIUM05_fix.c b/test/SARIF/GSAC/MEDIUM05_fix/MEDIUM05_fix.c new file mode 100644 index 0000000000..123c4902ef --- /dev/null +++ b/test/SARIF/GSAC/MEDIUM05_fix/MEDIUM05_fix.c @@ -0,0 +1,48 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: test ! -f %t.klee-out/report.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * Based on CVE-2022-23462 (fixed) + */ + +#include +#include + +#define IWNUMBUF_SIZE 32 + +void iwjson_ftoa(long double val, char buf[static IWNUMBUF_SIZE], size_t *out_len) { + int len = snprintf(buf, IWNUMBUF_SIZE, "%.8Lf", val); + if (len <= 0) { + buf[0] = '\0'; + *out_len = 0; + return; + } + while (len > 0 && buf[len - 1] == '0') { + buf[len - 1] = '\0'; + len--; + } + if ((len > 0) && (buf[len - 1] == '.')) { + buf[len - 1] = '\0'; + len--; + } + *out_len = (size_t)len; +} + +int main() { + char buf[IWNUMBUF_SIZE]; + size_t *out_len = malloc(sizeof(size_t)); + *out_len = 50; + iwjson_ftoa(12345678912345678912345.1234, buf, out_len); + free(out_len); +} diff --git a/test/SARIF/GSAC/PathSensitive/PathSensitive.c b/test/SARIF/GSAC/PathSensitive/PathSensitive.c new file mode 100644 index 0000000000..051ba1488f --- /dev/null +++ b/test/SARIF/GSAC/PathSensitive/PathSensitive.c @@ -0,0 +1,36 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +/*************************************************************************************** + * Title: GSAC + * Author: https://github.com/GSACTech + * Date: 2023 + * Code version: 1.0 + * Availability: https://github.com/GSACTech/contest + * + ***************************************************************************************/ + +/* + * CVE-2023-0819 + */ + +#include + +int main() { + int *data; + int size = 5; + data = malloc(size * sizeof(int)); // Allocated 20 bytes of memory + // Length of 'data' is 5 + for (int i = 0; i < size; i++) { + data[i] = i; + } + if (data[0]) { // Condition is always false + data[5] = data[4]; + } + if (data[4]) { + data[5] = data[4]; // buffer-overflow + } + free(data); +} diff --git a/test/SARIF/GSAC/PathSensitive/pattern.sarif b/test/SARIF/GSAC/PathSensitive/pattern.sarif new file mode 100644 index 0000000000..a5ad7c0279 --- /dev/null +++ b/test/SARIF/GSAC/PathSensitive/pattern.sarif @@ -0,0 +1,57 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "PathSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 10, + "startLine": 24 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "PathSensitive.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 13, + "startLine": 33 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/Generic/LazyInitialization/LazyInitialization.c b/test/SARIF/Generic/LazyInitialization/LazyInitialization.c new file mode 100644 index 0000000000..56008f2436 --- /dev/null +++ b/test/SARIF/Generic/LazyInitialization/LazyInitialization.c @@ -0,0 +1,16 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +#include "klee/klee.h" + +#include + +int main() { + char *s; + klee_make_symbolic(&s, sizeof(s), "s"); + + *s = 100; + s[200] = 200; +} diff --git a/test/SARIF/Generic/LazyInitialization/pattern.sarif b/test/SARIF/Generic/LazyInitialization/pattern.sarif new file mode 100644 index 0000000000..7144bfb4a2 --- /dev/null +++ b/test/SARIF/Generic/LazyInitialization/pattern.sarif @@ -0,0 +1,69 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "memory error: null pointer exception" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "LazyInitialization.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 6, + "startLine": 14 + } + } + } + } + ] + } + ] + } + ] + }, + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "LazyInitialization.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 10, + "startLine": 15 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/Generic/SymbolicSizeArray/SymbolicSizeArray.c b/test/SARIF/Generic/SymbolicSizeArray/SymbolicSizeArray.c new file mode 100644 index 0000000000..c39107dfd3 --- /dev/null +++ b/test/SARIF/Generic/SymbolicSizeArray/SymbolicSizeArray.c @@ -0,0 +1,18 @@ +// RUN: %clang -emit-llvm -g -c %s -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee -write-sarifs --use-sym-size-alloc --use-sym-size-li --skip-not-symbolic-objects --posix-runtime --libc=uclibc -cex-cache-validity-cores --output-dir=%t.klee-out %t.bc > %t.log +// RUN: %checker %t.klee-out/report.sarif %S/pattern.sarif + +#include "klee/klee.h" + +#include + +int main() { + int n; + klee_make_symbolic(&n, sizeof(n), "n"); + + char *s = (char *)malloc(n); + s[1] = 10; + s[2] = 20; + s[0] = 0; +} diff --git a/test/SARIF/Generic/SymbolicSizeArray/pattern.sarif b/test/SARIF/Generic/SymbolicSizeArray/pattern.sarif new file mode 100644 index 0000000000..29cd7288af --- /dev/null +++ b/test/SARIF/Generic/SymbolicSizeArray/pattern.sarif @@ -0,0 +1,106 @@ +{ + "runs": [ + { + "results": [ + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "SymbolicSizeArray.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 21, + "startLine": 14 + } + } + }, + "metadata": null + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "SymbolicSizeArray.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 8, + "startLine": 15 + } + } + } + } + ] + } + ] + } + ] + }, + { + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "message": { + "text": "Heap memory allocation" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "SymbolicSizeArray.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 21, + "startLine": 14 + } + } + } + }, + { + "location": { + "message": { + "text": "memory error: out of bound pointer" + }, + "physicalLocation": { + "artifactLocation": { + "uri": "SymbolicSizeArray.c" + }, + "region": { + "endColumn": null, + "endLine": null, + "startColumn": 8, + "startLine": 16 + } + } + } + } + ] + } + ] + } + ] + } + ] + } + ], + "version": "2.1.0" +} diff --git a/test/SARIF/checker.py b/test/SARIF/checker.py new file mode 100644 index 0000000000..ea45b871d4 --- /dev/null +++ b/test/SARIF/checker.py @@ -0,0 +1,124 @@ +#!/usr/bin/python3 + +import sys +import json +import os + +from typing import Tuple + + +def compare_locations(source, pattern) -> Tuple[bool, str]: + try: + if source["message"] != pattern["message"]: + return ( + False, + f"different messages: {source['message']['text']} vs {pattern['message']['text']}", + ) + if ( + source["physicalLocation"]["region"] + != pattern["physicalLocation"]["region"] + ): + return (False, "different locations") + + source_path = source["physicalLocation"]["artifactLocation"]["uri"] + pattern_path = pattern["physicalLocation"]["artifactLocation"]["uri"] + + if str(source_path).endswith(str(pattern_path)): + return (True, "") + + return (False, f"different locations: {source_path} vs {pattern_path}") + except KeyError: + return (False, "Not a SARIF format given") + + +def validate_source(source, pattern) -> Tuple[bool, str]: + try: + for result_id in range(len(source["runs"][0]["results"])): + + source_loc = source["runs"][0]["results"][result_id]["codeFlows"][0][ + "threadFlows" + ][0]["locations"][0]["location"] + pattern_loc = pattern["runs"][0]["results"][result_id]["codeFlows"][0][ + "threadFlows" + ][0]["locations"][0]["location"] + + (ok, msg) = compare_locations(source_loc, pattern_loc) + if not ok: + return ( + False, + f"Source differs from source in pattern in result #{result_id + 1} : {msg}", + ) + + return (True, "") + except KeyError: + return (False, "Not a SARIF format given") + + +def validate_sink(source, pattern) -> Tuple[bool, str]: + try: + for result_id in range(len(source["runs"][0]["results"])): + + source_loc = source["runs"][0]["results"][result_id]["codeFlows"][0][ + "threadFlows" + ][0]["locations"][-1]["location"] + pattern_loc = pattern["runs"][0]["results"][result_id]["codeFlows"][0][ + "threadFlows" + ][0]["locations"][-1]["location"] + + (ok, msg) = compare_locations(source_loc, pattern_loc) + if not ok: + return ( + False, + f"Sink differs from sink in pattern in result #{result_id + 1} : {msg}", + ) + + return (True, "") + except KeyError: + return (False, "Not a SARIF format given") + + +def validate(source_file, pattern_file) -> Tuple[bool, str]: + try: + source = json.load(source_file) + pattern = json.load(pattern_file) + except json.JSONDecodeError as e: + print("Invalid JSON syntax:", e) + + try: + if len(source["runs"]) != 1: + return (False, "Expected exactly 1 run in source file") + if len(pattern["runs"]) != 1: + return (False, "Expected exactly 1 run in source file") + + if len(source["runs"][0]["results"]) != len(pattern["runs"][0]["results"]): + return (False, "Number of results does not match expected") + except KeyError: + return (False, "Not a SARIF format given") + + (ok, msg) = validate_source(source, pattern) + if not ok: + return (ok, msg) + + return validate_sink(source, pattern) + + +def main(): + if len(sys.argv) != 3: + print(f"USAGE: {sys.argv[0]} SOURCE PATTERN", file=sys.stderr) + exit(1) + + source_path = sys.argv[1] + pattern_path = sys.argv[2] + + with open(source_path, "r") as source_file, open(pattern_path, "r") as pattern_file: + (ok, msg) = validate(source_file, pattern_file) + if ok: + print(f"Validation passed!", file=sys.stderr) + exit(0) + + print(f"Validation failed: {msg}!", file=sys.stderr) + exit(1) + + +if __name__ == "__main__": + main() diff --git a/test/SARIF/lit.local.cfg b/test/SARIF/lit.local.cfg new file mode 100644 index 0000000000..33c46ccb22 --- /dev/null +++ b/test/SARIF/lit.local.cfg @@ -0,0 +1,13 @@ +import os + +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +if not getRoot(config).enable_posix_runtime or not getRoot(config).enable_uclibc: + config.unsupported = True + +config.substitutions.append( + ('%checker', f"python3 {os.path.join(os.path.dirname(__file__), 'checker.py')}".strip()) +) diff --git a/tools/klee/main.cpp b/tools/klee/main.cpp index f6a6b8147f..35fbd4de0d 100644 --- a/tools/klee/main.cpp +++ b/tools/klee/main.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/CallGraph.h" DISABLE_WARNING_PUSH DISABLE_WARNING_DEPRECATED_DECLARATIONS +#include "llvm/ADT/APFloat.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" @@ -89,6 +90,11 @@ cl::opt cl::desc("Do not generate any test files (default=false)"), cl::cat(TestCaseCat)); +cl::opt WriteSARIFs( + "write-sarifs", cl::init(false), + cl::desc("Write .sarif files for each erroneous test case (default=false)"), + cl::cat(TestCaseCat)); + cl::opt WriteKTests( "write-ktests", cl::init(true), cl::desc("Write .ktest files for each test case (default=true)"), @@ -471,6 +477,8 @@ class KleeHandler : public InterpreterHandler { void setOutputDirectory(const std::string &directory); SmallString<128> getOutputDirectory() const; + + ToolJson info() const override; }; KleeHandler::KleeHandler(int argc, char **argv) @@ -614,6 +622,36 @@ SmallString<128> KleeHandler::getOutputDirectory() const { return m_outputDirectory; } +static std::vector rules() { + std::vector ret; + // Push back rules + // Wtype-limits deprecated as I might be equal to 0. + DISABLE_WARNING_PUSH + // clang-format off + DISABLE_WARNING(-Wtype-limits) + // clang-format on +#undef TTYPE +#undef TTMARK +#define TTYPE(N, I, S) \ + if ((I) > (unsigned int)StateTerminationType::SOLVERERR && \ + (I) < (unsigned int)StateTerminationType::PROGERR) { \ + ret.push_back(RuleJson{#N, {"Program error"}, {}, {}}); \ + } +#define TTMARK(N, I) + + TERMINATION_TYPES; + DISABLE_WARNING_POP + return ret; +}; + +ToolJson KleeHandler::info() const { + DriverJson driver{"KLEEF", "https://toolchain-labs.com/projects/kleef.html", + rules()}; + ToolJson tool = {std::move(driver)}; + + return tool; +} + std::unique_ptr KleeHandler::openTestFile(const std::string &suffix, unsigned id, unsigned version) { @@ -769,6 +807,17 @@ void KleeHandler::processTestCase(const ExecutionState &state, } } + if (isError && WriteSARIFs) { + auto f = openOutputFile("report.sarif"); + + // Rewrite .sarif each time it is updated to + // receive results as they appear. + if (f) { + m_interpreter->addSARIFReport(state); + *f << json(m_interpreter->getSARIFReport()).dump(2); + } + } + if (isError && OptExitOnError) { m_interpreter->prepareForEarlyExit(); klee_error("EXITING ON ERROR:\n%s\n", message); @@ -1964,7 +2013,7 @@ int main(int argc, char **argv, char **envp) { for (const auto &instr : llvm::instructions(Func)) { auto locationInfo = getLocationInfo(&instr); origInstructions[locationInfo.file][locationInfo.line] - [locationInfo.column] + [locationInfo.column.value_or(0)] .insert(instr.getOpcode()); } } @@ -2110,6 +2159,7 @@ int main(int argc, char **argv, char **envp) { errorMsg.c_str()); } + mainModuleFunctions.insert("__klee_posix_wrapped_main"); preparePOSIX(loadedUserModules); }