diff options
author | Anna Zaks <ganna@apple.com> | 2012-01-12 02:22:34 +0000 |
---|---|---|
committer | Anna Zaks <ganna@apple.com> | 2012-01-12 02:22:34 +0000 |
commit | 1fb826a6fd893234f32b0b91bb92ea4d127788ad (patch) | |
tree | a146fb7bf4be12981dc3e6ce6938a7c250e053b0 | |
parent | e9c876044b7fe9560128a41d511426c014bf5d3f (diff) |
[analyzer] Add taint transfer by strcpy & others (part 1).
To simplify the process:
Refactor taint generation checker to simplify passing the
information on which arguments need to be tainted from pre to post
visit.
Todo: We need to factor out the code that sema is using to identify the
string and memcpy functions and use it here and in the CString checker.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148010 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp | 189 | ||||
-rw-r--r-- | test/Analysis/taint-generic.c | 41 |
2 files changed, 134 insertions, 96 deletions
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 12f5be416a..81f16bdfa8 100644 --- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -20,6 +20,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include <climits> using namespace clang; using namespace ento; @@ -28,48 +29,42 @@ namespace { class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, check::PreStmt<CallExpr> > { public: - enum TaintOnPreVisitKind { - /// No taint propagates from pre-visit to post-visit. - PrevisitNone = 0, - /// Based on the pre-visit, the return argument of the call - /// should be tainted. - PrevisitTaintRet = 1, - /// Based on the pre-visit, the call can taint values through it's - /// pointer/reference arguments. - PrevisitTaintArgs = 2 - }; + static const unsigned ReturnValueIndex = UINT_MAX; private: mutable llvm::OwningPtr<BugType> BT; void initBugType() const; - /// Add/propagate taint on a post visit. - void taintPost(const CallExpr *CE, CheckerContext &C) const; - /// Add/propagate taint on a pre visit. - void taintPre(const CallExpr *CE, CheckerContext &C) const; - - /// Catch taint related bugs. Check if tainted data is passed to a system - /// call etc. + /// \brief Catch taint related bugs. Check if tainted data is passed to a + /// system call etc. bool checkPre(const CallExpr *CE, CheckerContext &C) const; - /// Given a pointer argument, get the symbol of the value it contains + /// \brief Add taint sources on a pre-visit. + void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; + + /// \brief Propagate taint generated at pre-visit. + bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; + + /// \brief Add taint sources on a post visit. + void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; + + /// \brief Given a pointer argument, get the symbol of the value it contains /// (points to). SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg, - bool IssueWarning = true) const; + bool IssueWarning = false) const; /// Functions defining the attack surface. typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *, CheckerContext &C) const; const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const; - const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const; const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const; - const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const; /// Taint the scanned input if the file is tainted. const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const; /// Taint if any of the arguments are tainted. const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const; + const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const; /// Check if the region the expression evaluates to is the standard input, /// and thus, is tainted. @@ -90,20 +85,17 @@ public: }; } -/// Definitions for the checker specific state. -namespace { struct TaintOnPreVisit {};} -namespace clang { -namespace ento { - /// A flag which is used to pass information from call pre-visit instruction - /// to the call post-visit. The value is an unsigned, which takes on values - /// of the TaintOnPreVisitKind enumeration. - template<> - struct ProgramStateTrait<TaintOnPreVisit> : - public ProgramStatePartialTrait<unsigned> { - static void *GDMIndex() { return GenericTaintChecker::getTag(); } - }; -} -} +/// A set which is used to pass information from call pre-visit instruction +/// to the call post-visit. The values are unsigned integers, which are either +/// ReturnValueIndex, or indexes of the pointer/reference argument, which +/// points to data, which should be tainted on return. +namespace { struct TaintArgsOnPostVisit{}; } +namespace clang { namespace ento { +template<> struct ProgramStateTrait<TaintArgsOnPostVisit> + : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { + static void *GDMIndex() { return GenericTaintChecker::getTag(); } +}; +}} inline void GenericTaintChecker::initBugType() const { if (!BT) @@ -117,23 +109,31 @@ void GenericTaintChecker::checkPreStmt(const CallExpr *CE, return; // Add taint second. - taintPre(CE, C); + addSourcesPre(CE, C); } void GenericTaintChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const { - taintPost(CE, C); + if (propagateFromPre(CE, C)) + return; + addSourcesPost(CE, C); } -void GenericTaintChecker::taintPre(const CallExpr *CE, - CheckerContext &C) const { +void GenericTaintChecker::addSourcesPre(const CallExpr *CE, + CheckerContext &C) const { // Set the evaluation function by switching on the callee name. StringRef Name = C.getCalleeName(CE); + if (Name.empty()) + return; FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) - .Case("fscanf", &GenericTaintChecker::preFscanf) .Case("atoi", &GenericTaintChecker::preAnyArgs) .Case("atol", &GenericTaintChecker::preAnyArgs) .Case("atoll", &GenericTaintChecker::preAnyArgs) + .Case("fscanf", &GenericTaintChecker::preFscanf) + .Cases("strcpy", "__builtin___strcpy_chk", + "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy) + .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy) + .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy) .Default(0); // Check and evaluate the call. @@ -146,22 +146,58 @@ void GenericTaintChecker::taintPre(const CallExpr *CE, C.addTransition(State); } -void GenericTaintChecker::taintPost(const CallExpr *CE, - CheckerContext &C) const { +bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, + CheckerContext &C) const { + const ProgramState *State = C.getState(); + + // Depending on what was tainted at pre-visit, we determined a set of + // arguments which should be tainted after the function returns. These are + // stored in the state as TaintArgsOnPostVisit set. + llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); + for (llvm::ImmutableSet<unsigned>::iterator + I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { + unsigned ArgNum = *I; + + // Special handling for the tainted return value. + if (ArgNum == ReturnValueIndex) { + State = State->addTaint(CE, C.getLocationContext()); + continue; + } + + // The arguments are pointer arguments. The data they are pointing at is + // tainted after the call. + const Expr* Arg = CE->getArg(ArgNum); + SymbolRef Sym = getPointedToSymbol(C, Arg, true); + if (Sym) + State = State->addTaint(Sym); + } + + // Clear up the taint info from the state. + State = State->remove<TaintArgsOnPostVisit>(); + + if (State != C.getState()) { + C.addTransition(State); + return true; + } + return false; +} + +void GenericTaintChecker::addSourcesPost(const CallExpr *CE, + CheckerContext &C) const { // Define the attack surface. // Set the evaluation function by switching on the callee name. StringRef Name = C.getCalleeName(CE); + if (Name.empty()) + return; FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) .Case("scanf", &GenericTaintChecker::postScanf) - .Case("fscanf", &GenericTaintChecker::postFscanf) - .Case("sscanf", &GenericTaintChecker::postFscanf) // TODO: Add support for vfscanf & family. .Case("getchar", &GenericTaintChecker::postRetTaint) .Case("getenv", &GenericTaintChecker::postRetTaint) .Case("fopen", &GenericTaintChecker::postRetTaint) .Case("fdopen", &GenericTaintChecker::postRetTaint) .Case("freopen", &GenericTaintChecker::postRetTaint) - .Default(&GenericTaintChecker::postDefault); + .Default(0); // If the callee isn't defined, it is not of security concern. // Check and evaluate the call. @@ -171,8 +207,6 @@ void GenericTaintChecker::taintPost(const CallExpr *CE, if (!State) return; - assert(State->get<TaintOnPreVisit>() == PrevisitNone && - "State has to be cleared."); C.addTransition(State); } @@ -213,6 +247,8 @@ SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, return Val.getAsSymbol(); } +// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 +// and arg 1 should get taint. const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, CheckerContext &C) const { assert(CE->getNumArgs() >= 2); @@ -220,8 +256,13 @@ const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, // Check is the file descriptor is tainted. if (State->isTainted(CE->getArg(0), C.getLocationContext()) || - isStdin(CE->getArg(0), C)) - return State->set<TaintOnPreVisit>(PrevisitTaintArgs); + isStdin(CE->getArg(0), C)) { + // All arguments except for the first two should get taint. + for (unsigned int i = 2; i < CE->getNumArgs(); ++i) + State = State->add<TaintArgsOnPostVisit>(i); + return State; + } + return 0; } @@ -233,22 +274,19 @@ const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE, const Expr *Arg = CE->getArg(i); if (State->isTainted(Arg, C.getLocationContext()) || State->isTainted(getPointedToSymbol(C, Arg))) - return State = State->set<TaintOnPreVisit>(PrevisitTaintRet); + return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); } return 0; } -const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE, - CheckerContext &C) const { +const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE, + CheckerContext &C) const { + assert(CE->getNumArgs() >= 2); + const Expr *FromArg = CE->getArg(1); const ProgramState *State = C.getState(); - - // Check if we know that the result needs to be tainted based on the - // pre-visit analysis. - if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) { - State = State->addTaint(CE, C.getLocationContext()); - return State->set<TaintOnPreVisit>(PrevisitNone); - } - + if (State->isTainted(FromArg, C.getLocationContext()) || + State->isTainted(getPointedToSymbol(C, FromArg))) + return State = State->add<TaintArgsOnPostVisit>(0); return 0; } @@ -262,34 +300,7 @@ const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE, // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. const Expr* Arg = CE->getArg(i); - SymbolRef Sym = getPointedToSymbol(C, Arg); - if (Sym) - State = State->addTaint(Sym); - } - return State; -} - -/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 -/// and arg 1 should get taint. -const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE, - CheckerContext &C) const { - const ProgramState *State = C.getState(); - assert(CE->getNumArgs() >= 2); - - // Fscanf is only tainted if the input file is tainted at pre visit, so - // check for that first. - if (State->get<TaintOnPreVisit>() == PrevisitNone) - return 0; - - // Reset the taint state. - State = State->set<TaintOnPreVisit>(PrevisitNone); - - // All arguments except for the first two should get taint. - for (unsigned int i = 2; i < CE->getNumArgs(); ++i) { - // The arguments are pointer arguments. The data they are pointing at is - // tainted after the call. - const Expr* Arg = CE->getArg(i); - SymbolRef Sym = getPointedToSymbol(C, Arg); + SymbolRef Sym = getPointedToSymbol(C, Arg, true); if (Sym) State = State->addTaint(Sym); } @@ -373,7 +384,7 @@ bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, // If either the format string content or the pointer itself are tainted, warn. const ProgramState *State = C.getState(); const Expr *Arg = CE->getArg(ArgNum); - if (State->isTainted(getPointedToSymbol(C, Arg, false)) || + if (State->isTainted(getPointedToSymbol(C, Arg)) || State->isTainted(Arg, C.getLocationContext())) if (ExplodedNode *N = C.addTransition()) { initBugType(); diff --git a/test/Analysis/taint-generic.c b/test/Analysis/taint-generic.c index a23d20f79f..fd9884d3fa 100644 --- a/test/Analysis/taint-generic.c +++ b/test/Analysis/taint-generic.c @@ -3,6 +3,26 @@ int scanf(const char *restrict format, ...); int getchar(void); +typedef struct _FILE FILE; +extern FILE *stdin; +int fscanf(FILE *restrict stream, const char *restrict format, ...); +int sprintf(char *str, const char *format, ...); +void setproctitle(const char *fmt, ...); +typedef __typeof(sizeof(int)) size_t; + +// Define string functions. Use builtin for some of them. They all default to +// the processing in the taint checker. +#define strcpy(dest, src) \ + ((__builtin_object_size(dest, 0) != -1ULL) \ + ? __builtin___strcpy_chk (dest, src, __builtin_object_size(dest, 1)) \ + : __inline_strcpy_chk(dest, src)) + +static char *__inline_strcpy_chk (char *dest, const char *src) { + return __builtin___strcpy_chk(dest, src, __builtin_object_size(dest, 1)); +} +char *stpcpy(char *restrict s1, const char *restrict s2); +char *strncpy( char * destination, const char * source, size_t num ); + #define BUFSIZE 10 int Buffer[BUFSIZE]; @@ -47,16 +67,23 @@ void bufferGetchar(int x) { Buffer[m] = 1; //expected-warning {{Out of bound memory access }} } -typedef struct _FILE FILE; -extern FILE *stdin; -int fscanf(FILE *restrict stream, const char *restrict format, ...); -int sprintf(char *str, const char *format, ...); -void setproctitle(const char *fmt, ...); - -void testUncontrolledFormatString() { +void testUncontrolledFormatString(char **p) { char s[80]; fscanf(stdin, "%s", s); char buf[128]; sprintf(buf,s); // expected-warning {{Uncontrolled Format String}} setproctitle(s, 3); // expected-warning {{Uncontrolled Format String}} + + // Test taint propagation through strcpy and family. + char scpy[80]; + strcpy(scpy, s); + sprintf(buf,scpy); // expected-warning {{Uncontrolled Format String}} + + char spcpy[80]; + stpcpy(spcpy, s); + setproctitle(spcpy, 3); // expected-warning {{Uncontrolled Format String}} + + char sncpy[80]; + strncpy(sncpy, s, 20); + setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}} } |