Move parsing of identifiers in MS-style inline assembly into

the actual parser and support arbitrary id-expressions. We're actually basically set up to do arbitrary expressions here if we wanted to. Assembly operands permit things like A::x to be written regardless of language mode, which forces us to embellish the evaluation context logic somewhat. The logic here under template instantiation is incorrect; we need to preserve the fact that an expression was unevaluated. Of course, template instantiation in general is fishy here because we have no way of delaying semantic analysis in the MC parser. It's all just fishy. I've also fixed the serialization of MS asm statements. This commit depends on an LLVM commit. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@180976 91177308-0d34-0410-b5e6-96231b3b80d8
author: John McCall <rjmccall@apple.com> 2013-05-03 00:10:13 +0000
committer: John McCall <rjmccall@apple.com> 2013-05-03 00:10:13 +0000
commit: aeeacf725c9e0ddd64ea9764bd008e5b6873ce51 (patch)
tree: 370063ad5a0cf0312992d978ed703abc92c53403 /lib/Parse
parent: c70fac3c52092013b08163187f034b73c94bf3d0 (diff)
1 files changed, 395 insertions, 2 deletions
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index 5fa4f17026..43b6965d31 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -14,13 +14,26 @@
 
 #include "clang/Parse/Parser.h"
 #include "RAIIObjectsForParser.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/PrettyDeclStackTrace.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/TypoCorrection.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/ADT/SmallString.h"
 using namespace clang;
 
@@ -1663,6 +1676,281 @@ StmtResult Parser::ParseReturnStatement() {
   return Actions.ActOnReturnStmt(ReturnLoc, R.take());
 }
 
+namespace {
+  class ClangAsmParserCallback : public llvm::MCAsmParserSemaCallback {
+    Parser &TheParser;
+    SourceLocation AsmLoc;
+    StringRef AsmString;
+
+    /// The tokens we streamed into AsmString and handed off to MC.
+    ArrayRef<Token> AsmToks;
+
+    /// The offset of each token in AsmToks within AsmString.
+    ArrayRef<unsigned> AsmTokOffsets;
+
+  public:
+    ClangAsmParserCallback(Parser &P, SourceLocation Loc,
+                           StringRef AsmString,
+                           ArrayRef<Token> Toks,
+                           ArrayRef<unsigned> Offsets)
+      : TheParser(P), AsmLoc(Loc), AsmString(AsmString),
+        AsmToks(Toks), AsmTokOffsets(Offsets) {
+      assert(AsmToks.size() == AsmTokOffsets.size());
+    }
+
+    void *LookupInlineAsmIdentifier(StringRef &LineBuf,
+                                    InlineAsmIdentifierInfo &Info,
+                                    bool IsUnevaluatedContext) {
+      // Collect the desired tokens.
+      SmallVector<Token, 16> LineToks;
+      const Token *FirstOrigToken = 0;
+      findTokensForString(LineBuf, LineToks, FirstOrigToken);
+
+      unsigned NumConsumedToks;
+      ExprResult Result =
+        TheParser.ParseMSAsmIdentifier(LineToks, NumConsumedToks, &Info,
+                                       IsUnevaluatedContext);
+
+      // If we consumed the entire line, tell MC that.
+      // Also do this if we consumed nothing as a way of reporting failure.
+      if (NumConsumedToks == 0 || NumConsumedToks == LineToks.size()) {
+        // By not modifying LineBuf, we're implicitly consuming it all.
+
+      // Otherwise, consume up to the original tokens.
+      } else {
+        assert(FirstOrigToken && "not using original tokens?");
+
+        // Since we're using original tokens, apply that offset.
+        assert(FirstOrigToken[NumConsumedToks].getLocation()
+                  == LineToks[NumConsumedToks].getLocation());
+        unsigned FirstIndex = FirstOrigToken - AsmToks.begin();
+        unsigned LastIndex = FirstIndex + NumConsumedToks - 1;
+
+        // The total length we've consumed is the relative offset
+        // of the last token we consumed plus its length.
+        unsigned TotalOffset = (AsmTokOffsets[LastIndex]
+                                + AsmToks[LastIndex].getLength()
+                                - AsmTokOffsets[FirstIndex]);
+        LineBuf = LineBuf.substr(0, TotalOffset);
+      }
+
+      // Initialize the "decl" with the lookup result.
+      Info.OpDecl = static_cast<void*>(Result.take());
+      return Info.OpDecl;
+    }
+
+    bool LookupInlineAsmField(StringRef Base, StringRef Member,
+                              unsigned &Offset) {
+      return TheParser.getActions().LookupInlineAsmField(Base, Member,
+                                                         Offset, AsmLoc);
+    }
+
+    static void DiagHandlerCallback(const llvm::SMDiagnostic &D,
+                                    void *Context) {
+      ((ClangAsmParserCallback*) Context)->handleDiagnostic(D);
+    }
+
+  private:
+    /// Collect the appropriate tokens for the given string.
+    void findTokensForString(StringRef Str, SmallVectorImpl<Token> &TempToks,
+                             const Token *&FirstOrigToken) const {
+      // For now, assert that the string we're working with is a substring
+      // of what we gave to MC.  This lets us use the original tokens.
+      assert(!std::less<const char*>()(Str.begin(), AsmString.begin()) &&
+             !std::less<const char*>()(AsmString.end(), Str.end()));
+
+      // Try to find a token whose offset matches the first token.
+      unsigned FirstCharOffset = Str.begin() - AsmString.begin();
+      const unsigned *FirstTokOffset
+        = std::lower_bound(AsmTokOffsets.begin(), AsmTokOffsets.end(),
+                           FirstCharOffset);
+
+      // For now, assert that the start of the string exactly
+      // corresponds to the start of a token.
+      assert(*FirstTokOffset == FirstCharOffset);
+
+      // Use all the original tokens for this line.  (We assume the
+      // end of the line corresponds cleanly to a token break.)
+      unsigned FirstTokIndex = FirstTokOffset - AsmTokOffsets.begin();
+      FirstOrigToken = &AsmToks[FirstTokIndex];
+      unsigned LastCharOffset = Str.end() - AsmString.begin();
+      for (unsigned i = FirstTokIndex, e = AsmTokOffsets.size(); i != e; ++i) {
+        if (AsmTokOffsets[i] >= LastCharOffset) break;
+        TempToks.push_back(AsmToks[i]);
+      }
+    }
+
+    void handleDiagnostic(const llvm::SMDiagnostic &D) {
+      // Compute an offset into the inline asm buffer.
+      // FIXME: This isn't right if .macro is involved (but hopefully, no
+      // real-world code does that).
+      const llvm::SourceMgr &LSM = *D.getSourceMgr();
+      const llvm::MemoryBuffer *LBuf =
+        LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(D.getLoc()));
+      unsigned Offset = D.getLoc().getPointer() - LBuf->getBufferStart();
+
+      // Figure out which token that offset points into.
+      const unsigned *TokOffsetPtr =
+        std::lower_bound(AsmTokOffsets.begin(), AsmTokOffsets.end(), Offset);
+      unsigned TokIndex = TokOffsetPtr - AsmTokOffsets.begin();
+      unsigned TokOffset = *TokOffsetPtr;
+
+      // If we come up with an answer which seems sane, use it; otherwise,
+      // just point at the __asm keyword.
+      // FIXME: Assert the answer is sane once we handle .macro correctly.
+      SourceLocation Loc = AsmLoc;
+      if (TokIndex < AsmToks.size()) {
+        const Token &Tok = AsmToks[TokIndex];
+        Loc = Tok.getLocation();
+        Loc = Loc.getLocWithOffset(Offset - TokOffset);
+      }
+      TheParser.Diag(Loc, diag::err_inline_ms_asm_parsing)
+        << D.getMessage();
+    }
+  };
+}
+
+/// Parse an identifier in an MS-style inline assembly block.
+///
+/// \param CastInfo - a void* so that we don't have to teach Parser.h
+///   about the actual type.
+ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
+                                        unsigned &NumLineToksConsumed,
+                                        void *CastInfo,
+                                        bool IsUnevaluatedContext) {
+  llvm::InlineAsmIdentifierInfo &Info =
+    *(llvm::InlineAsmIdentifierInfo *) CastInfo;
+
+  // Push a fake token on the end so that we don't overrun the token
+  // stream.  We use ';' because it expression-parsing should never
+  // overrun it.
+  const tok::TokenKind EndOfStream = tok::semi;
+  Token EndOfStreamTok;
+  EndOfStreamTok.startToken();
+  EndOfStreamTok.setKind(EndOfStream);
+  LineToks.push_back(EndOfStreamTok);
+
+  // Also copy the current token over.
+  LineToks.push_back(Tok);
+
+  PP.EnterTokenStream(LineToks.begin(),
+                      LineToks.size(),
+                      /*disable macros*/ true,
+                      /*owns tokens*/ false);
+
+  // Clear the current token and advance to the first token in LineToks.
+  ConsumeAnyToken();
+
+  // Parse an optional scope-specifier if we're in C++.
+  CXXScopeSpec SS;
+  if (getLangOpts().CPlusPlus) {
+    ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+  }
+
+  // Require an identifier here.
+  SourceLocation TemplateKWLoc;
+  UnqualifiedId Id;
+  bool Invalid = ParseUnqualifiedId(SS,
+                                    /*EnteringContext=*/false,
+                                    /*AllowDestructorName=*/false,
+                                    /*AllowConstructorName=*/false,
+                                    /*ObjectType=*/ ParsedType(),
+                                    TemplateKWLoc,
+                                    Id);
+
+  // If we've run into the poison token we inserted before, or there
+  // was a parsing error, then claim the entire line.
+  if (Invalid || Tok.is(EndOfStream)) {
+    NumLineToksConsumed = LineToks.size() - 2;
+
+    // Otherwise, claim up to the start of the next token.
+  } else {
+    // Figure out how many tokens we are into LineToks.
+    unsigned LineIndex = 0;
+    while (LineToks[LineIndex].getLocation() != Tok.getLocation()) {
+      LineIndex++;
+      assert(LineIndex < LineToks.size() - 2); // we added two extra tokens
+    }
+
+    NumLineToksConsumed = LineIndex;
+  }
+      
+  // Finally, restore the old parsing state by consuming all the
+  // tokens we staged before, implicitly killing off the
+  // token-lexer we pushed.
+  for (unsigned n = LineToks.size() - 2 - NumLineToksConsumed; n != 0; --n) {
+    ConsumeAnyToken();
+  }
+  ConsumeToken(EndOfStream);
+
+  // Leave LineToks in its original state.
+  LineToks.pop_back();
+  LineToks.pop_back();
+
+  // Perform the lookup.
+  return Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, Info,
+                                           IsUnevaluatedContext);
+}
+
+/// Turn a sequence of our tokens back into a string that we can hand
+/// to the MC asm parser.
+static bool buildMSAsmString(Preprocessor &PP,
+                             SourceLocation AsmLoc,
+                             ArrayRef<Token> AsmToks,
+                             SmallVectorImpl<unsigned> &TokOffsets,
+                             SmallString<512> &Asm) {
+  assert (!AsmToks.empty() && "Didn't expect an empty AsmToks!");
+
+  // Is this the start of a new assembly statement?
+  bool isNewStatement = true;
+
+  for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) {
+    const Token &Tok = AsmToks[i];
+
+    // Start each new statement with a newline and a tab.
+    if (!isNewStatement &&
+        (Tok.is(tok::kw_asm) || Tok.isAtStartOfLine())) {
+      Asm += "\n\t";
+      isNewStatement = true;
+    }
+
+    // Preserve the existence of leading whitespace except at the
+    // start of a statement.
+    if (!isNewStatement && Tok.hasLeadingSpace())
+      Asm += ' ';
+
+    // Remember the offset of this token.
+    TokOffsets.push_back(Asm.size());
+
+    // Don't actually write '__asm' into the assembly stream.
+    if (Tok.is(tok::kw_asm)) {
+      // Complain about __asm at the end of the stream.
+      if (i + 1 == e) {
+        PP.Diag(AsmLoc, diag::err_asm_empty);
+        return true;
+      }
+
+      continue;
+    }
+
+    // Append the spelling of the token.
+    SmallString<32> SpellingBuffer;
+    bool SpellingInvalid = false;
+    Asm += PP.getSpelling(Tok, SpellingBuffer, &SpellingInvalid);
+    assert(!SpellingInvalid && "spelling was invalid after correct parse?");
+
+    // We are no longer at the start of a statement.
+    isNewStatement = false;
+  }
+
+  // Ensure that the buffer is null-terminated.
+  Asm.push_back('\0');
+  Asm.pop_back();
+
+  assert(TokOffsets.size() == AsmToks.size());
+  return false;
+}
+
 /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled,
 /// this routine is called to collect the tokens for an MS asm statement.
 ///
@@ -1771,9 +2059,114 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
     return StmtError();
   }
 
+  // Okay, prepare to use MC to parse the assembly.
+  SmallVector<StringRef, 4> ConstraintRefs;
+  SmallVector<Expr*, 4> Exprs;
+  SmallVector<StringRef, 4> ClobberRefs;
+
+  // We need an actual supported target.
+  llvm::Triple TheTriple = Actions.Context.getTargetInfo().getTriple();
+  llvm::Triple::ArchType ArchTy = TheTriple.getArch();
+  bool UnsupportedArch = (ArchTy != llvm::Triple::x86 &&
+                          ArchTy != llvm::Triple::x86_64);
+  if (UnsupportedArch)
+    Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName();
+    
+  // If we don't support assembly, or the assembly is empty, we don't
+  // need to instantiate the AsmParser, etc.
+  if (UnsupportedArch || AsmToks.empty()) {
+    return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLoc, AsmToks, StringRef(),
+                                  /*NumOutputs*/ 0, /*NumInputs*/ 0,
+                                  ConstraintRefs, ClobberRefs, Exprs, EndLoc);
+  }
+
+  // Expand the tokens into a string buffer.
+  SmallString<512> AsmString;
+  SmallVector<unsigned, 8> TokOffsets;
+  if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString))
+    return StmtError();
+
+  // Find the target and create the target specific parser.
+  std::string Error;
+  const std::string &TT = TheTriple.getTriple();
+  const llvm::Target *TheTarget = llvm::TargetRegistry::lookupTarget(TT, Error);
+
+  OwningPtr<llvm::MCAsmInfo> MAI(TheTarget->createMCAsmInfo(TT));
+  OwningPtr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT));
+  OwningPtr<llvm::MCObjectFileInfo> MOFI(new llvm::MCObjectFileInfo());
+  OwningPtr<llvm::MCSubtargetInfo>
+    STI(TheTarget->createMCSubtargetInfo(TT, "", ""));
+
+  llvm::SourceMgr TempSrcMgr;
+  llvm::MCContext Ctx(*MAI, *MRI, MOFI.get(), &TempSrcMgr);
+  llvm::MemoryBuffer *Buffer =
+    llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>");
+
+  // Tell SrcMgr about this buffer, which is what the parser will pick up.
+  TempSrcMgr.AddNewSourceBuffer(Buffer, llvm::SMLoc());
+
+  OwningPtr<llvm::MCStreamer> Str(createNullStreamer(Ctx));
+  OwningPtr<llvm::MCAsmParser>
+    Parser(createMCAsmParser(TempSrcMgr, Ctx, *Str.get(), *MAI));
+  OwningPtr<llvm::MCTargetAsmParser>
+    TargetParser(TheTarget->createMCAsmParser(*STI, *Parser));
+
+  // Get the instruction descriptor.
+  const llvm::MCInstrInfo *MII = TheTarget->createMCInstrInfo(); 
+  llvm::MCInstPrinter *IP =
+    TheTarget->createMCInstPrinter(1, *MAI, *MII, *MRI, *STI);
+
+  // Change to the Intel dialect.
+  Parser->setAssemblerDialect(1);
+  Parser->setTargetParser(*TargetParser.get());
+  Parser->setParsingInlineAsm(true);
+  TargetParser->setParsingInlineAsm(true);
+
+  ClangAsmParserCallback Callback(*this, AsmLoc, AsmString,
+                                  AsmToks, TokOffsets);
+  TargetParser->setSemaCallback(&Callback);
+  TempSrcMgr.setDiagHandler(ClangAsmParserCallback::DiagHandlerCallback,
+                            &Callback);
+
+  unsigned NumOutputs;
+  unsigned NumInputs;
+  std::string AsmStringIR;
+  SmallVector<std::pair<void *, bool>, 4> OpExprs;
+  SmallVector<std::string, 4> Constraints;
+  SmallVector<std::string, 4> Clobbers;
+  if (Parser->parseMSInlineAsm(AsmLoc.getPtrEncoding(), AsmStringIR,
+                               NumOutputs, NumInputs, OpExprs, Constraints,
+                               Clobbers, MII, IP, Callback))
+    return StmtError();
+
+  // Build the vector of clobber StringRefs.
+  unsigned NumClobbers = Clobbers.size();
+  ClobberRefs.resize(NumClobbers);
+  for (unsigned i = 0; i != NumClobbers; ++i)
+    ClobberRefs[i] = StringRef(Clobbers[i]);
+
+  // Recast the void pointers and build the vector of constraint StringRefs.
+  unsigned NumExprs = NumOutputs + NumInputs;
+  ConstraintRefs.resize(NumExprs);
+  Exprs.resize(NumExprs);
+  for (unsigned i = 0, e = NumExprs; i != e; ++i) {
+    Expr *OpExpr = static_cast<Expr *>(OpExprs[i].first);
+    if (!OpExpr)
+      return StmtError();
+
+    // Need address of variable.
+    if (OpExprs[i].second)
+      OpExpr = Actions.BuildUnaryOp(getCurScope(), AsmLoc, UO_AddrOf, OpExpr)
+        .take();
+
+    ConstraintRefs[i] = StringRef(Constraints[i]);
+    Exprs[i] = OpExpr;
+  }
+
   // FIXME: We should be passing source locations for better diagnostics.
-  return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLoc,
-                                llvm::makeArrayRef(AsmToks), EndLoc);
+  return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLoc, AsmToks, AsmStringIR,
+                                NumOutputs, NumInputs,
+                                ConstraintRefs, ClobberRefs, Exprs, EndLoc);
 }
 
 /// ParseAsmStatement - Parse a GNU extended asm statement.
author	John McCall <rjmccall@apple.com>	2013-05-03 00:10:13 +0000
committer	John McCall <rjmccall@apple.com>	2013-05-03 00:10:13 +0000
commit	aeeacf725c9e0ddd64ea9764bd008e5b6873ce51 (patch)
tree	370063ad5a0cf0312992d978ed703abc92c53403 /lib/Parse
parent	c70fac3c52092013b08163187f034b73c94bf3d0 (diff)