diff options
author | Evan Cheng <evan.cheng@apple.com> | 2009-04-17 01:29:40 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2009-04-17 01:29:40 +0000 |
commit | 276b77e66c538264d79b78c00bbad9f890f58011 (patch) | |
tree | f668c05c94b70225a5bc83d12f1e1912cb485226 | |
parent | d10a4ce5825d0981107c0106c49089b9e5792e40 (diff) |
Teach spiller to unfold instructions which modref spill slot when a scratch
register is available and when it's profitable.
e.g.
xorq %r12<kill>, %r13
addq %rax, -184(%rbp)
addq %r13, -184(%rbp)
==>
xorq %r12<kill>, %r13
movq -184(%rbp), %r12
addq %rax, %r12
addq %r13, %r12
movq %r12, -184(%rbp)
Two more instructions, but fewer memory accesses. It can also open up
opportunities for more optimizations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69341 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/Spiller.cpp | 219 | ||||
-rw-r--r-- | lib/CodeGen/Spiller.h | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/2009-04-16-SpillerUnfold.ll | 139 |
3 files changed, 366 insertions, 11 deletions
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 5edde38170..92bb785de6 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -29,6 +29,7 @@ STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumReused , "Number of values reused"); STATISTIC(NumDCE , "Number of copies elided"); STATISTIC(NumSUnfold , "Number of stores unfolded"); +STATISTIC(NumModRefUnfold, "Number of modref unfolded"); namespace { enum SpillerName { simple, local }; @@ -524,6 +525,7 @@ bool LocalSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { RegInfo = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); + AllocatableRegs = TRI->getAllocatableSet(MF); DOUT << "\n**** Local spiller rewriting function '" << MF.getFunction()->getName() << "':\n"; DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" @@ -595,7 +597,201 @@ bool LocalSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { } -/// PrepForUnfoldOpti - Turn a store folding instruction into a load folding +/// FoldsStackSlotModRef - Return true if the specified MI folds the specified +/// stack slot mod/ref. It also checks if it's possible to unfold the +/// instruction by having it define a specified physical register instead. +static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) + return false; + + bool Found = false; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { + unsigned VirtReg = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + if (MR & VirtRegMap::isModRef) + if (VRM.getStackSlot(VirtReg) == SS) { + Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; + break; + } + } + if (!Found) + return false; + + // Does the instruction uses a register that overlaps the scratch register? + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!VRM.hasPhys(Reg)) + continue; + Reg = VRM.getPhys(Reg); + } + if (TRI->regsOverlap(PhysReg, Reg)) + return false; + } + return true; +} + +/// FindFreeRegister - Find a free register of a given register class by looking +/// at (at most) the last two machine instructions. +static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, + MachineBasicBlock &MBB, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + BitVector &AllocatableRegs) { + BitVector Defs(TRI->getNumRegs()); + BitVector Uses(TRI->getNumRegs()); + SmallVector<unsigned, 4> LocalUses; + SmallVector<unsigned, 4> Kills; + + // Take a look at 2 instructions at most. + for (unsigned Count = 0; Count < 2; ++Count) { + if (MII == MBB.begin()) + break; + MachineInstr *PrevMI = prior(MII); + for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = PrevMI->getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef()) { + Defs.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Defs.set(*AS); + } else { + LocalUses.push_back(Reg); + if (MO.isKill() && AllocatableRegs[Reg]) + Kills.push_back(Reg); + } + } + + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned Kill = Kills[i]; + if (!Defs[Kill] && !Uses[Kill] && + TRI->getPhysicalRegisterRegClass(Kill) == RC) + return Kill; + } + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + Uses.set(*AS); + } + + MII = PrevMI; + } + + return 0; +} + +static +void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == VirtReg) + MO.setReg(PhysReg); + } +} + +/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if +/// a scratch register is available. +/// xorq %r12<kill>, %r13 +/// addq %rax, -184(%rbp) +/// addq %r13, -184(%rbp) +/// ==> +/// xorq %r12<kill>, %r13 +/// movq -184(%rbp), %r12 +/// addq %rax, %r12 +/// addq %r13, %r12 +/// movq %r12, -184(%rbp) +bool LocalSpiller::OptimizeByUnfold2(unsigned VirtReg, int SS, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM) { + MachineBasicBlock::iterator NextMII = next(MII); + if (NextMII == MBB.end()) + return false; + + if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) + return false; + + // Now let's see if the last couple of instructions happens to have freed up + // a register. + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs); + if (!PhysReg) + return false; + + MachineFunction &MF = *MBB.getParent(); + TRI = MF.getTarget().getRegisterInfo(); + MachineInstr &MI = *MII; + if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM)) + return false; + + // If the next instruction also folds the same SS modref and can be unfoled, + // then it's worthwhile to issue a load from SS into the free register and + // then unfold these instructions. + if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)) + return false; + + // Load from SS to the spare physical register. + TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC); + // This invalidates Phys. + Spills.ClobberPhysReg(PhysReg); + // Remember it's available. + Spills.addAvailable(SS, PhysReg); + MaybeDeadStores[SS] = NULL; + + // Unfold current MI. + SmallVector<MachineInstr*, 4> NewMIs; + if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) + assert(0 && "Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + VRM.transferRestorePts(&MI, NewMIs[0]); + MII = MBB.insert(MII, NewMIs[0]); + InvalidateKills(MI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + ++NumModRefUnfold; + + // Unfold next instructions that fold the same SS. + do { + MachineInstr &NextMI = *NextMII; + NextMII = next(NextMII); + NewMIs.clear(); + if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) + assert(0 && "Unable unfold the load / store folding instruction!"); + assert(NewMIs.size() == 1); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + VRM.transferRestorePts(&NextMI, NewMIs[0]); + MBB.insert(NextMII, NewMIs[0]); + InvalidateKills(NextMI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&NextMI); + MBB.erase(&NextMI); + ++NumModRefUnfold; + } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)); + + // Store the value back into SS. + TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC); + MachineInstr *StoreMI = prior(NextMII); + VRM.addSpillSlotUse(SS, StoreMI); + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + + return true; +} + +/// OptimizeByUnfold - Turn a store folding instruction into a load folding /// instruction. e.g. /// xorl %edi, %eax /// movl %eax, -32(%ebp) @@ -607,7 +803,7 @@ bool LocalSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { /// mov %eax, -32(%ebp) /// This enables unfolding optimization for a subsequent instruction which will /// also eliminate the newly introduced store instruction. -bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB, +bool LocalSpiller::OptimizeByUnfold(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, std::vector<MachineInstr*> &MaybeDeadStores, AvailableSpills &Spills, @@ -646,8 +842,14 @@ bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB, } } - if (!UnfoldedOpc) - return false; + if (!UnfoldedOpc) { + if (!UnfoldVR) + return false; + + // Look for other unfolding opportunities. + return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM); + } for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); @@ -705,6 +907,7 @@ bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB, MF.DeleteMachineInstr(NewMI); } } + return false; } @@ -770,7 +973,7 @@ bool LocalSpiller::CommuteToFoldReload(MachineBasicBlock &MBB, VRM.addSpillSlotUse(SS, FoldedMI); VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); // Insert new def MI and spill MI. - const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC); MII = prior(MII); MachineInstr *StoreMI = MII; @@ -935,13 +1138,13 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, DistanceMap.clear(); for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MII != E; ) { - MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineBasicBlock::iterator NextMII = next(MII); VirtRegMap::MI2VirtMapTy::const_iterator I, End; bool Erased = false; bool BackTracked = false; - if (PrepForUnfoldOpti(MBB, MII, - MaybeDeadStores, Spills, RegKills, KillOps, VRM)) + if (OptimizeByUnfold(MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM)) NextMII = next(MII); MachineInstr &MI = *MII; diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 5a42a8279d..c0d0837960 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -97,7 +97,7 @@ namespace llvm { const TargetRegisterInfo *getRegInfo() const { return TRI; } /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is - /// available in a physical register, return that PhysReg, otherwise + /// available in a physical register, return that PhysReg, otherwise /// return 0. unsigned getSpillSlotOrReMatPhysReg(int Slot) const { std::map<int, unsigned>::const_iterator I = @@ -284,6 +284,7 @@ namespace llvm { MachineRegisterInfo *RegInfo; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + BitVector AllocatableRegs; DenseMap<MachineInstr*, unsigned> DistanceMap; public: bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM); @@ -291,12 +292,22 @@ namespace llvm { void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, unsigned Reg, BitVector &RegKills, std::vector<MachineOperand*> &KillOps); - bool PrepForUnfoldOpti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, + + bool OptimizeByUnfold(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector<MachineInstr*> &MaybeDeadStores, + AvailableSpills &Spills, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps, + VirtRegMap &VRM); + + bool OptimizeByUnfold2(unsigned VirtReg, int SS, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, std::vector<MachineInstr*> &MaybeDeadStores, AvailableSpills &Spills, BitVector &RegKills, std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM); + bool CommuteToFoldReload(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, unsigned VirtReg, unsigned SrcReg, int SS, @@ -305,6 +316,7 @@ namespace llvm { std::vector<MachineOperand*> &KillOps, const TargetRegisterInfo *TRI, VirtRegMap &VRM); + void SpillRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, int Idx, unsigned PhysReg, int StackSlot, @@ -315,6 +327,7 @@ namespace llvm { BitVector &RegKills, std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM); + void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, AvailableSpills &Spills, BitVector &RegKills, std::vector<MachineOperand*> &KillOps); diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll new file mode 100644 index 0000000000..e9e2e4aaa1 --- /dev/null +++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll @@ -0,0 +1,139 @@ +; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded} + + %struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 } + %struct.anon = type { [16 x i64] } +@K512 = external constant [80 x i64], align 32 ; <[80 x i64]*> [#uses=2] + +define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp { +entry: + br label %bb349 + +bb349: ; preds = %bb349, %entry + %e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ] ; <i64> [#uses=3] + %b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ] ; <i64> [#uses=2] + %asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind ; <i64> [#uses=1] + %0 = xor i64 0, %asmtmp356 ; <i64> [#uses=1] + %1 = add i64 0, %0 ; <i64> [#uses=1] + %2 = add i64 %1, 0 ; <i64> [#uses=1] + %3 = add i64 %2, 0 ; <i64> [#uses=1] + %4 = add i64 %3, 0 ; <i64> [#uses=5] + %asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind ; <i64> [#uses=1] + %asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind ; <i64> [#uses=0] + %5 = xor i64 %asmtmp372, 0 ; <i64> [#uses=0] + %6 = xor i64 0, %b.0472 ; <i64> [#uses=1] + %7 = and i64 %4, %6 ; <i64> [#uses=1] + %8 = xor i64 %7, 0 ; <i64> [#uses=1] + %9 = add i64 0, %8 ; <i64> [#uses=1] + %10 = add i64 %9, 0 ; <i64> [#uses=2] + %asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind ; <i64> [#uses=1] + %11 = xor i64 0, %asmtmp377 ; <i64> [#uses=1] + %12 = add i64 0, %11 ; <i64> [#uses=1] + %13 = add i64 %12, 0 ; <i64> [#uses=1] + %not381 = xor i64 0, -1 ; <i64> [#uses=1] + %14 = and i64 %e.0489, %not381 ; <i64> [#uses=1] + %15 = xor i64 0, %14 ; <i64> [#uses=1] + %16 = add i64 %15, 0 ; <i64> [#uses=1] + %17 = add i64 %16, %13 ; <i64> [#uses=1] + %18 = add i64 %17, 0 ; <i64> [#uses=1] + %19 = add i64 %18, 0 ; <i64> [#uses=2] + %20 = add i64 %19, %b.0472 ; <i64> [#uses=3] + %21 = add i64 %19, 0 ; <i64> [#uses=1] + %22 = add i64 %21, 0 ; <i64> [#uses=1] + %23 = add i32 0, 12 ; <i32> [#uses=1] + %24 = and i32 %23, 12 ; <i32> [#uses=1] + %25 = zext i32 %24 to i64 ; <i64> [#uses=1] + %26 = getelementptr [16 x i64]* null, i64 0, i64 %25 ; <i64*> [#uses=0] + %27 = add i64 0, %e.0489 ; <i64> [#uses=1] + %28 = add i64 %27, 0 ; <i64> [#uses=1] + %29 = add i64 %28, 0 ; <i64> [#uses=1] + %30 = add i64 %29, 0 ; <i64> [#uses=2] + %31 = and i64 %10, %4 ; <i64> [#uses=1] + %32 = xor i64 0, %31 ; <i64> [#uses=1] + %33 = add i64 %30, 0 ; <i64> [#uses=3] + %34 = add i64 %30, %32 ; <i64> [#uses=1] + %35 = add i64 %34, 0 ; <i64> [#uses=1] + %36 = and i64 %33, %20 ; <i64> [#uses=1] + %37 = xor i64 %36, 0 ; <i64> [#uses=1] + %38 = add i64 %37, 0 ; <i64> [#uses=1] + %39 = add i64 %38, 0 ; <i64> [#uses=1] + %40 = add i64 %39, 0 ; <i64> [#uses=1] + %41 = add i64 %40, 0 ; <i64> [#uses=1] + %42 = add i64 %41, %4 ; <i64> [#uses=3] + %43 = or i32 0, 6 ; <i32> [#uses=1] + %44 = and i32 %43, 14 ; <i32> [#uses=1] + %45 = zext i32 %44 to i64 ; <i64> [#uses=1] + %46 = getelementptr [16 x i64]* null, i64 0, i64 %45 ; <i64*> [#uses=1] + %not417 = xor i64 %42, -1 ; <i64> [#uses=1] + %47 = and i64 %20, %not417 ; <i64> [#uses=1] + %48 = xor i64 0, %47 ; <i64> [#uses=1] + %49 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1] + %50 = load i64* %49, align 8 ; <i64> [#uses=1] + %51 = add i64 %48, 0 ; <i64> [#uses=1] + %52 = add i64 %51, 0 ; <i64> [#uses=1] + %53 = add i64 %52, 0 ; <i64> [#uses=1] + %54 = add i64 %53, %50 ; <i64> [#uses=2] + %asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind ; <i64> [#uses=1] + %asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind ; <i64> [#uses=1] + %55 = xor i64 %asmtmp420, 0 ; <i64> [#uses=1] + %56 = xor i64 %55, %asmtmp421 ; <i64> [#uses=1] + %57 = add i64 %54, %10 ; <i64> [#uses=5] + %58 = add i64 %54, 0 ; <i64> [#uses=1] + %59 = add i64 %58, %56 ; <i64> [#uses=2] + %60 = or i32 0, 7 ; <i32> [#uses=1] + %61 = and i32 %60, 15 ; <i32> [#uses=1] + %62 = zext i32 %61 to i64 ; <i64> [#uses=1] + %63 = getelementptr [16 x i64]* null, i64 0, i64 %62 ; <i64*> [#uses=2] + %64 = load i64* null, align 8 ; <i64> [#uses=1] + %65 = lshr i64 %64, 6 ; <i64> [#uses=1] + %66 = xor i64 0, %65 ; <i64> [#uses=1] + %67 = xor i64 %66, 0 ; <i64> [#uses=1] + %68 = load i64* %46, align 8 ; <i64> [#uses=1] + %69 = load i64* null, align 8 ; <i64> [#uses=1] + %70 = add i64 %68, 0 ; <i64> [#uses=1] + %71 = add i64 %70, %67 ; <i64> [#uses=1] + %72 = add i64 %71, %69 ; <i64> [#uses=1] + %asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind ; <i64> [#uses=1] + %asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind ; <i64> [#uses=1] + %73 = xor i64 %asmtmp427, 0 ; <i64> [#uses=1] + %74 = xor i64 %73, %asmtmp428 ; <i64> [#uses=1] + %75 = and i64 %57, %42 ; <i64> [#uses=1] + %not429 = xor i64 %57, -1 ; <i64> [#uses=1] + %76 = and i64 %33, %not429 ; <i64> [#uses=1] + %77 = xor i64 %75, %76 ; <i64> [#uses=1] + %78 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1] + %79 = load i64* %78, align 16 ; <i64> [#uses=1] + %80 = add i64 %77, %20 ; <i64> [#uses=1] + %81 = add i64 %80, %72 ; <i64> [#uses=1] + %82 = add i64 %81, %74 ; <i64> [#uses=1] + %83 = add i64 %82, %79 ; <i64> [#uses=1] + %asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind ; <i64> [#uses=1] + %asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind ; <i64> [#uses=1] + %84 = xor i64 %asmtmp432, 0 ; <i64> [#uses=1] + %85 = xor i64 %84, %asmtmp433 ; <i64> [#uses=1] + %86 = add i64 %83, %22 ; <i64> [#uses=2] + %87 = add i64 0, %85 ; <i64> [#uses=1] + %asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind ; <i64> [#uses=1] + %88 = xor i64 0, %asmtmp435 ; <i64> [#uses=1] + %89 = load i64* null, align 8 ; <i64> [#uses=3] + %asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind ; <i64> [#uses=1] + %asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind ; <i64> [#uses=1] + %90 = lshr i64 %89, 6 ; <i64> [#uses=1] + %91 = xor i64 %asmtmp436, %90 ; <i64> [#uses=1] + %92 = xor i64 %91, %asmtmp437 ; <i64> [#uses=1] + %93 = load i64* %63, align 8 ; <i64> [#uses=1] + %94 = load i64* null, align 8 ; <i64> [#uses=1] + %95 = add i64 %93, %88 ; <i64> [#uses=1] + %96 = add i64 %95, %92 ; <i64> [#uses=1] + %97 = add i64 %96, %94 ; <i64> [#uses=2] + store i64 %97, i64* %63, align 8 + %98 = and i64 %86, %57 ; <i64> [#uses=1] + %not441 = xor i64 %86, -1 ; <i64> [#uses=1] + %99 = and i64 %42, %not441 ; <i64> [#uses=1] + %100 = xor i64 %98, %99 ; <i64> [#uses=1] + %101 = add i64 %100, %33 ; <i64> [#uses=1] + %102 = add i64 %101, %97 ; <i64> [#uses=1] + %103 = add i64 %102, 0 ; <i64> [#uses=1] + %104 = add i64 %103, 0 ; <i64> [#uses=1] + %e.0 = add i64 %104, %35 ; <i64> [#uses=1] + br label %bb349 +} |