Commit 345b9eee2adc78aff8c455eb4b2baa67421d3faf

Authored by Josh Klontz
1 parent 235a9c3b

fixed LLVM for loop to test end condition before looping

Showing 1 changed file with 47 additions and 45 deletions
sdk/plugins/llvm.cpp
... ... @@ -246,26 +246,29 @@ struct MatrixBuilder : public Matrix
246 246 Value *compareLT(Value *i, Value *j) const { return isFloating() ? b->CreateFCmpOLT(i, j) : (isSigned() ? b->CreateICmpSLT(i, j) : b->CreateICmpULT(i, j)); }
247 247 Value *compareGT(Value *i, Value *j) const { return isFloating() ? b->CreateFCmpOGT(i, j) : (isSigned() ? b->CreateICmpSGT(i, j) : b->CreateICmpUGT(i, j)); }
248 248  
249   - static PHINode *beginLoop(IRBuilder<> &builder, Function *function, BasicBlock *parent, BasicBlock **current, const Twine &name = "") {
250   - *current = BasicBlock::Create(getGlobalContext(), "loop_"+name, function);
251   - builder.CreateBr(*current);
252   - builder.SetInsertPoint(*current);
253   - PHINode *j = builder.CreatePHI(Type::getInt32Ty(getGlobalContext()), 2, name);
254   - j->addIncoming(MatrixBuilder::zero(), parent);
255   - return j;
256   - }
257   - PHINode *beginLoop(BasicBlock *parent, BasicBlock **current, const Twine &name = "") const { return beginLoop(*b, f, parent, current, name); }
258   - static void endLoop(IRBuilder<> &builder, Function *function, BasicBlock *current, PHINode *j, Value *end, const Twine &name = "") {
259   - BasicBlock *loop = BasicBlock::Create(getGlobalContext(), "loop_"+name+"_end", function);
  249 + static PHINode *beginLoop(IRBuilder<> &builder, Function *function, BasicBlock *entry, BasicBlock *&loop, BasicBlock *&exit, Value *stop, const Twine &name = "") {
  250 + loop = BasicBlock::Create(getGlobalContext(), "loop_"+name, function);
260 251 builder.CreateBr(loop);
261 252 builder.SetInsertPoint(loop);
262   - Value *increment = builder.CreateAdd(j, MatrixBuilder::one(), "increment_"+name);
263   - j->addIncoming(increment, loop);
264   - BasicBlock *exit = BasicBlock::Create(getGlobalContext(), "loop_"+name+"_exit", function);
265   - builder.CreateCondBr(builder.CreateICmpNE(increment, end, "loop_"+name+"_test"), current, exit);
  253 +
  254 + PHINode *i = builder.CreatePHI(Type::getInt32Ty(getGlobalContext()), 2, name);
  255 + i->addIncoming(MatrixBuilder::zero(), entry);
  256 + Value *increment = builder.CreateAdd(i, MatrixBuilder::one(), "increment_"+name);
  257 + BasicBlock *body = BasicBlock::Create(getGlobalContext(), "loop_"+name+"_body", function);
  258 + i->addIncoming(increment, body);
  259 +
  260 + exit = BasicBlock::Create(getGlobalContext(), "loop_"+name+"_exit", function);
  261 + builder.CreateCondBr(builder.CreateICmpEQ(i, stop, "loop_"+name+"_test"), exit, body);
  262 + builder.SetInsertPoint(body);
  263 + return i;
  264 + }
  265 + PHINode *beginLoop(BasicBlock *entry, BasicBlock *&loop, BasicBlock *&exit, Value *stop, const Twine &name = "") const { return beginLoop(*b, f, entry, loop, exit, stop, name); }
  266 +
  267 + static void endLoop(IRBuilder<> &builder, BasicBlock *loop, BasicBlock *exit) {
  268 + builder.CreateBr(loop);
266 269 builder.SetInsertPoint(exit);
267 270 }
268   - void endLoop(BasicBlock *current, PHINode *j, Value *end, const Twine &name = "") const { endLoop(*b, f, current, j, end, name); }
  271 + void endLoop(BasicBlock *loop, BasicBlock *exit) const { endLoop(*b, loop, exit); }
269 272  
270 273 template <typename T>
271 274 inline static std::vector<T> toVector(T value) { std::vector<T> vector; vector.push_back(value); return vector; }
... ... @@ -456,14 +459,14 @@ private:
456 459 BasicBlock *entry = BasicBlock::Create(getGlobalContext(), "entry", function);
457 460 IRBuilder<> builder(entry);
458 461  
459   - BasicBlock *kernel;
460   - PHINode *i = MatrixBuilder::beginLoop(builder, function, entry, &kernel, "i");
  462 + BasicBlock *loop, *exit;
  463 + PHINode *i = MatrixBuilder::beginLoop(builder, function, entry, loop, exit, len, "i");
461 464  
462 465 Matrix n;
463 466 preallocate(m, n);
464 467 build(MatrixBuilder(m, src, &builder, function, "src"), MatrixBuilder(n, dst, &builder, function, "dst"), i);
465 468  
466   - MatrixBuilder::endLoop(builder, function, kernel, i, len, "i");
  469 + MatrixBuilder::endLoop(builder, loop, exit);
467 470  
468 471 builder.CreateRetVoid();
469 472 return function;
... ... @@ -549,14 +552,14 @@ private:
549 552 BasicBlock *entry = BasicBlock::Create(getGlobalContext(), "entry", function);
550 553 IRBuilder<> builder(entry);
551 554  
552   - BasicBlock *kernel;
553   - PHINode *i = MatrixBuilder::beginLoop(builder, function, entry, &kernel, "i");
  555 + BasicBlock *loop, *exit;
  556 + PHINode *i = MatrixBuilder::beginLoop(builder, function, entry, loop, exit, len, "i");
554 557  
555 558 Matrix o;
556 559 preallocate(m, n, o);
557 560 build(MatrixBuilder(m, srcA, &builder, function, "srcA"), MatrixBuilder(n, srcB, &builder, function, "srcB"), MatrixBuilder(o, dst, &builder, function, "dst"), i);
558 561  
559   - MatrixBuilder::endLoop(builder, function, kernel, i, len, "i");
  562 + MatrixBuilder::endLoop(builder, loop, exit);
560 563  
561 564 builder.CreateRetVoid();
562 565 return function;
... ... @@ -760,53 +763,52 @@ class sumTransform : public UnaryKernel
760 763 dst.deindex(i, &c, &x, &y, &t);
761 764 AllocaInst *sum = dst.autoAlloca(0, "sum");
762 765  
763   - QList<PHINode*> loops;
764   - QList<BasicBlock*> blocks;
765   - blocks.push_back(i->getParent());
  766 + QList<BasicBlock*> loops, exits;
  767 + loops.push_back(i->getParent());
766 768 Value *src_c, *src_x, *src_y, *src_t;
767 769  
768 770 if (frames && !src.singleFrame()) {
769   - BasicBlock *block;
770   - loops.append(dst.beginLoop(blocks.last(), &block, "src_t"));
771   - blocks.append(block);
772   - src_t = loops.last();
  771 + BasicBlock *loop, *exit;
  772 + src_t = dst.beginLoop(loops.last(), loop, exit, src.getFrames(), "src_t");
  773 + loops.append(loop);
  774 + exits.append(exit);
773 775 } else {
774 776 src_t = t;
775 777 }
776 778  
777 779 if (rows && !src.singleRow()) {
778   - BasicBlock *block;
779   - loops.append(dst.beginLoop(blocks.last(), &block, "src_y"));
780   - blocks.append(block);
781   - src_y = loops.last();
  780 + BasicBlock *loop, *exit;
  781 + src_y = dst.beginLoop(loops.last(), loop, exit, src.getRows(), "src_y");
  782 + loops.append(loop);
  783 + exits.append(exit);
782 784 } else {
783 785 src_y = y;
784 786 }
785 787  
786 788 if (columns && !src.singleColumn()) {
787   - BasicBlock *block;
788   - loops.append(dst.beginLoop(blocks.last(), &block, "src_x"));
789   - blocks.append(block);
790   - src_x = loops.last();
  789 + BasicBlock *loop, *exit;
  790 + src_x = dst.beginLoop(loops.last(), loop, exit, src.getColumns(), "src_x");
  791 + loops.append(loop);
  792 + exits.append(exit);
791 793 } else {
792 794 src_x = x;
793 795 }
794 796  
795 797 if (channels && !src.singleChannel()) {
796   - BasicBlock *block;
797   - loops.append(dst.beginLoop(blocks.last(), &block, "src_c"));
798   - blocks.append(block);
799   - src_c = loops.last();
  798 + BasicBlock *loop, *exit;
  799 + src_c = dst.beginLoop(loops.last(), loop, exit, src.getChannels(), "src_c");
  800 + loops.append(loop);
  801 + exits.append(exit);
800 802 } else {
801 803 src_c = c;
802 804 }
803 805  
804 806 dst.b->CreateStore(dst.add(dst.b->CreateLoad(sum), src.cast(src.load(src.aliasIndex(dst, src_c, src_x, src_y, src_t)), dst), "accumulate"), sum);
805 807  
806   - if (channels && !src.singleChannel()) dst.endLoop(blocks.takeLast(), loops.takeLast(), src.getChannels(), "src_c");
807   - if (columns && !src.singleColumn()) dst.endLoop(blocks.takeLast(), loops.takeLast(), src.getColumns(), "src_x");
808   - if (rows && !src.singleRow()) dst.endLoop(blocks.takeLast(), loops.takeLast(), src.getRows(), "src_y");
809   - if (frames && !src.singleFrame()) dst.endLoop(blocks.takeLast(), loops.takeLast(), src.getFrames(), "src_t");
  808 + if (channels && !src.singleChannel()) dst.endLoop(loops.takeLast(), exits.takeLast());
  809 + if (columns && !src.singleColumn()) dst.endLoop(loops.takeLast(), exits.takeLast());
  810 + if (rows && !src.singleRow()) dst.endLoop(loops.takeLast(), exits.takeLast());
  811 + if (frames && !src.singleFrame()) dst.endLoop(loops.takeLast(), exits.takeLast());
810 812  
811 813 dst.store(i, dst.b->CreateLoad(sum));
812 814 }
... ...