1 /*=============================================================================
2 Copyright (c) 2001-2011 Joel de Guzman
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 =============================================================================*/
8 #include "compiler.hpp"
9 #include "annotation.hpp"
12 #include <boost/foreach.hpp>
13 #include <boost/variant/apply_visitor.hpp>
14 #include <boost/range/adaptor/transformed.hpp>
15 #include <boost/assert.hpp>
16 #include <boost/lexical_cast.hpp>
19 namespace client
{ namespace code_gen
30 llvm::IRBuilder
<>* builder
)
32 is_lvalue_(is_lvalue_
),
36 value::value(value
const& rhs
)
38 is_lvalue_(rhs
.is_lvalue_
),
42 bool value::is_lvalue() const
47 bool value::is_valid() const
52 value::operator bool() const
57 void value::name(char const* id
)
62 void value::name(std::string
const& id
)
67 value::operator llvm::Value
*() const
71 BOOST_ASSERT(builder
!= 0);
72 return builder
->CreateLoad(v
, v
->getName());
77 value
& value::operator=(value
const& rhs
)
80 is_lvalue_
= rhs
.is_lvalue_
;
81 builder
= rhs
.builder
;
85 value
& value::assign(value
const& rhs
)
87 BOOST_ASSERT(is_lvalue());
88 BOOST_ASSERT(builder
!= 0);
89 builder
->CreateStore(rhs
, v
);
93 value
operator-(value a
)
95 BOOST_ASSERT(a
.builder
!= 0);
97 a
.builder
->CreateNeg(a
, "neg_tmp"),
102 value
operator!(value a
)
104 BOOST_ASSERT(a
.builder
!= 0);
106 a
.builder
->CreateNot(a
, "not_tmp"),
111 value
operator+(value a
, value b
)
113 BOOST_ASSERT(a
.builder
!= 0);
115 a
.builder
->CreateAdd(a
, b
, "add_tmp"),
120 value
operator-(value a
, value b
)
122 BOOST_ASSERT(a
.builder
!= 0);
124 a
.builder
->CreateSub(a
, b
, "sub_tmp"),
129 value
operator*(value a
, value b
)
131 BOOST_ASSERT(a
.builder
!= 0);
133 a
.builder
->CreateMul(a
, b
, "mul_tmp"),
138 value
operator/(value a
, value b
)
140 BOOST_ASSERT(a
.builder
!= 0);
142 a
.builder
->CreateSDiv(a
, b
, "div_tmp"),
147 value
operator%(value a
, value b
)
149 BOOST_ASSERT(a
.builder
!= 0);
151 a
.builder
->CreateSRem(a
, b
, "mod_tmp"),
156 value
operator&(value a
, value b
)
158 BOOST_ASSERT(a
.builder
!= 0);
160 a
.builder
->CreateAnd(a
, b
, "and_tmp"),
165 value
operator|(value a
, value b
)
167 BOOST_ASSERT(a
.builder
!= 0);
169 a
.builder
->CreateOr(a
, b
, "or_tmp"),
174 value
operator^(value a
, value b
)
176 BOOST_ASSERT(a
.builder
!= 0);
178 a
.builder
->CreateXor(a
, b
, "xor_tmp"),
183 value
operator<<(value a
, value b
)
185 BOOST_ASSERT(a
.builder
!= 0);
187 a
.builder
->CreateShl(a
, b
, "shl_tmp"),
192 value
operator>>(value a
, value b
)
194 BOOST_ASSERT(a
.builder
!= 0);
196 a
.builder
->CreateLShr(a
, b
, "shr_tmp"),
201 value
operator==(value a
, value b
)
203 BOOST_ASSERT(a
.builder
!= 0);
205 a
.builder
->CreateICmpEQ(a
, b
, "eq_tmp"),
210 value
operator!=(value a
, value b
)
212 BOOST_ASSERT(a
.builder
!= 0);
214 a
.builder
->CreateICmpNE(a
, b
, "ne_tmp"),
219 value
operator<(value a
, value b
)
221 BOOST_ASSERT(a
.builder
!= 0);
223 a
.builder
->CreateICmpSLT(a
, b
, "slt_tmp"),
228 value
operator<=(value a
, value b
)
230 BOOST_ASSERT(a
.builder
!= 0);
232 a
.builder
->CreateICmpSLE(a
, b
, "sle_tmp"),
237 value
operator>(value a
, value b
)
239 BOOST_ASSERT(a
.builder
!= 0);
241 a
.builder
->CreateICmpSGT(a
, b
, "sgt_tmp"),
246 value
operator>=(value a
, value b
)
248 BOOST_ASSERT(a
.builder
!= 0);
250 a
.builder
->CreateICmpSGE(a
, b
, "sge_tmp"),
255 struct function::to_value
257 typedef value result_type
;
260 to_value(llvm_compiler
* c
= 0) : c(c
) {}
262 value
operator()(llvm::Value
& v
) const
268 bool basic_block::has_terminator() const
270 return b
->getTerminator() != 0;
273 bool basic_block::is_valid() const
278 function::operator llvm::Function
*() const
283 std::size_t function::arg_size() const
285 return f
->arg_size();
288 void function::add(basic_block
const& b
)
290 f
->getBasicBlockList().push_back(b
);
293 void function::erase_from_parent()
295 f
->eraseFromParent();
298 basic_block
function::last_block()
300 return &f
->getBasicBlockList().back();
303 bool function::empty() const
308 std::string
function::name() const
313 function::arg_range
function::args() const
315 BOOST_ASSERT(c
!= 0);
316 arg_val_iterator
first(f
->arg_begin(), to_value());
317 arg_val_iterator
last(f
->arg_end(), to_value());
318 return arg_range(first
, last
);
321 bool function::is_valid() const
326 void function::verify() const
328 llvm::verifyFunction(*f
);
331 value
llvm_compiler::val(unsigned int x
)
334 llvm::ConstantInt::get(context(), llvm::APInt(int_size
, x
)),
335 false, &llvm_builder
);
338 value
llvm_compiler::val(int x
)
341 llvm::ConstantInt::get(context(), llvm::APInt(int_size
, x
)),
342 false, &llvm_builder
);
345 value
llvm_compiler::val(bool x
)
348 llvm::ConstantInt::get(context(), llvm::APInt(1, x
)),
349 false, &llvm_builder
);
352 value
llvm_compiler::val(llvm::Value
* v
)
354 return value(v
, false, &llvm_builder
);
359 // Create an alloca instruction in the entry block of
360 // the function. This is used for mutable variables etc.
362 make_entry_block_alloca(
365 llvm::LLVMContext
& context
)
367 llvm::IRBuilder
<> builder(
369 f
->getEntryBlock().begin());
371 return builder
.CreateAlloca(
372 llvm::Type::getIntNTy(context
, int_size
), 0, name
);
376 value
llvm_compiler::var(char const* name
)
378 llvm::Function
* f
= llvm_builder
.GetInsertBlock()->getParent();
379 llvm::IRBuilder
<> builder(
381 f
->getEntryBlock().begin());
383 llvm::AllocaInst
* alloca
= builder
.CreateAlloca(
384 llvm::Type::getIntNTy(context(), int_size
), 0, name
);
386 return value(alloca
, true, &llvm_builder
);
389 struct value::to_llvm_value
391 typedef llvm::Value
* result_type
;
392 llvm::Value
* operator()(value
const& x
) const
398 template <typename C
>
399 llvm::Value
* llvm_compiler::call_impl(
403 // Sigh. LLVM requires CreateCall arguments to be random access.
404 // It would have been better if it can accept forward iterators.
405 // I guess it needs the arguments to be in contiguous memory.
406 // So, we have to put the args into a temporary std::vector.
407 std::vector
<llvm::Value
*> args(
408 args_
.begin(), args_
.end());
410 // Check the args for null values. We can't have null values.
411 // Return 0 if we find one to flag error.
412 BOOST_FOREACH(llvm::Value
* arg
, args
)
418 return llvm_builder
.CreateCall(
419 callee
, args
.begin(), args
.end(), "call_tmp");
422 template <typename Container
>
423 value
llvm_compiler::call(
425 Container
const& args
)
427 llvm::Value
* call
= call_impl(
429 args
| boost::adaptors::transformed(value::to_llvm_value()));
433 return value(call
, false, &llvm_builder
);
436 function
llvm_compiler::get_function(char const* name
)
438 return function(vm
.module()->getFunction(name
), this);
441 function
llvm_compiler::get_current_function()
443 // get the current function
444 return function(llvm_builder
.GetInsertBlock()->getParent(), this);
447 function
llvm_compiler::declare_function(
449 , std::string
const& name
452 llvm::Type
const* int_type
=
453 llvm::Type::getIntNTy(context(), int_size
);
454 llvm::Type
const* void_type
= llvm::Type::getVoidTy(context());
456 std::vector
<llvm::Type
const*> ints(nargs
, int_type
);
457 llvm::Type
const* return_type
= void_return
? void_type
: int_type
;
459 llvm::FunctionType
* function_type
=
460 llvm::FunctionType::get(void_return
? void_type
: int_type
, ints
, false);
462 return function(llvm::Function::Create(
463 function_type
, llvm::Function::ExternalLinkage
,
464 name
, vm
.module()), this);
467 basic_block
llvm_compiler::make_basic_block(
470 , basic_block before
)
472 return llvm::BasicBlock::Create(context(), name
, parent
, before
);
475 value
llvm_compiler::var(std::string
const& name
)
477 return var(name
.c_str());
480 function
llvm_compiler::get_function(std::string
const& name
)
482 return get_function(name
.c_str());
485 basic_block
llvm_compiler::get_insert_block()
487 return llvm_builder
.GetInsertBlock();
490 void llvm_compiler::set_insert_point(basic_block b
)
492 llvm_builder
.SetInsertPoint(b
);
495 void llvm_compiler::conditional_branch(
496 value cond
, basic_block true_br
, basic_block false_br
)
498 llvm_builder
.CreateCondBr(cond
, true_br
, false_br
);
501 void llvm_compiler::branch(basic_block b
)
503 llvm_builder
.CreateBr(b
);
506 void llvm_compiler::return_()
508 llvm_builder
.CreateRetVoid();
511 void llvm_compiler::return_(value v
)
513 llvm_builder
.CreateRet(v
);
516 void llvm_compiler::optimize_function(function f
)
518 // Optimize the function.
522 void llvm_compiler::init_fpm()
524 // Set up the optimizer pipeline. Start with registering info about how the
525 // target lays out data structures.
526 fpm
.add(new llvm::TargetData(*vm
.execution_engine()->getTargetData()));
527 // Provide basic AliasAnalysis support for GVN.
528 fpm
.add(llvm::createBasicAliasAnalysisPass());
529 // Promote allocas to registers.
530 fpm
.add(llvm::createPromoteMemoryToRegisterPass());
531 // Do simple "peephole" optimizations and bit-twiddling optzns.
532 fpm
.add(llvm::createInstructionCombiningPass());
533 // Reassociate expressions.
534 fpm
.add(llvm::createReassociatePass());
535 // Eliminate Common SubExpressions.
536 fpm
.add(llvm::createGVNPass());
537 // Simplify the control flow graph (deleting unreachable blocks, etc).
538 fpm
.add(llvm::createCFGSimplificationPass());
540 fpm
.doInitialization();
543 value
compiler::operator()(unsigned int x
)
548 value
compiler::operator()(bool x
)
553 value
compiler::operator()(ast::primary_expr
const& x
)
555 return boost::apply_visitor(*this, x
.get());
558 value
compiler::operator()(ast::identifier
const& x
)
560 // Look this variable up in the function.
561 if (locals
.find(x
.name
) == locals
.end())
563 error_handler(x
.id
, "Undeclared variable: " + x
.name
);
566 return locals
[x
.name
];
569 value
compiler::operator()(ast::unary_expr
const& x
)
571 value operand
= boost::apply_visitor(*this, x
.operand_
);
572 if (!operand
.is_valid())
577 case token_ids::compl_
: return operand
^ val(-1);
578 case token_ids::minus
: return -operand
;
579 case token_ids::not_
: return !operand
;
580 case token_ids::plus
: return operand
;
581 case token_ids::plus_plus
:
583 if (!operand
.is_lvalue())
585 error_handler(x
.id
, "++ needs an var");
589 value r
= operand
+ val(1);
593 case token_ids::minus_minus
:
595 if (!operand
.is_lvalue())
597 error_handler(x
.id
, "-- needs an var");
601 value r
= operand
- val(1);
616 compile_args(compiler
& c
) : c(c
) {}
618 typedef value result_type
;
619 value
operator()(ast::expression
const& expr
) const
626 value
compiler::operator()(ast::function_call
const& x
)
628 function callee
= get_function(x
.function_name
.name
);
629 if (!callee
.is_valid())
631 error_handler(x
.function_name
.id
,
632 "Function not found: " + x
.function_name
.name
);
636 if (callee
.arg_size() != x
.args
.size())
638 error_handler(x
.function_name
.id
,
639 "Wrong number of arguments: " + x
.function_name
.name
);
644 x
.args
| boost::adaptors::transformed(compile_args(*this)));
656 2, // op_minus_assign
657 2, // op_times_assign
658 2, // op_divide_assign
660 2, // op_bit_and_assign
661 2, // op_bit_xor_assign
662 2, // op_bitor_assign
663 2, // op_shift_left_assign
664 2, // op_shift_right_assign
689 9, // op_greater_equal
693 10, // op_shift_right
706 inline int precedence_of(token_ids::type op
)
708 return precedence
[op
& 0xFF];
711 value
compiler::compile_binary_expression(
712 value lhs
, value rhs
, token_ids::type op
)
716 case token_ids::plus
: return lhs
+ rhs
;
717 case token_ids::minus
: return lhs
- rhs
;
718 case token_ids::times
: return lhs
* rhs
;
719 case token_ids::divide
: return lhs
/ rhs
;
720 case token_ids::mod
: return lhs
% rhs
;
722 case token_ids::logical_or
:
723 case token_ids::bit_or
: return lhs
| rhs
;
725 case token_ids::logical_and
:
726 case token_ids::bit_and
: return lhs
& rhs
;
728 case token_ids::bit_xor
: return lhs
^ rhs
;
729 case token_ids::shift_left
: return lhs
<< rhs
;
730 case token_ids::shift_right
: return lhs
>> rhs
;
732 case token_ids::equal
: return lhs
== rhs
;
733 case token_ids::not_equal
: return lhs
!= rhs
;
734 case token_ids::less
: return lhs
< rhs
;
735 case token_ids::less_equal
: return lhs
<= rhs
;
736 case token_ids::greater
: return lhs
> rhs
;
737 case token_ids::greater_equal
: return lhs
>= rhs
;
739 default: BOOST_ASSERT(0); return val();
743 // The Shunting-yard algorithm
744 value
compiler::compile_expression(
747 std::list
<ast::operation
>::const_iterator
& rest_begin
,
748 std::list
<ast::operation
>::const_iterator rest_end
)
750 while ((rest_begin
!= rest_end
) &&
751 (precedence_of(rest_begin
->operator_
) >= min_precedence
))
753 token_ids::type op
= rest_begin
->operator_
;
754 value rhs
= boost::apply_visitor(*this, rest_begin
->operand_
);
759 while ((rest_begin
!= rest_end
) &&
760 (precedence_of(rest_begin
->operator_
) > precedence_of(op
)))
762 token_ids::type next_op
= rest_begin
->operator_
;
763 rhs
= compile_expression(
764 precedence_of(next_op
), rhs
, rest_begin
, rest_end
);
767 lhs
= compile_binary_expression(lhs
, rhs
, op
);
772 value
compiler::operator()(ast::expression
const& x
)
774 value lhs
= boost::apply_visitor(*this, x
.first
);
777 std::list
<ast::operation
>::const_iterator rest_begin
= x
.rest
.begin();
778 return compile_expression(0, lhs
, rest_begin
, x
.rest
.end());
781 value
compiler::operator()(ast::assignment
const& x
)
783 if (locals
.find(x
.lhs
.name
) == locals
.end())
785 error_handler(x
.lhs
.id
, "Undeclared variable: " + x
.lhs
.name
);
789 value lhs
= locals
[x
.lhs
.name
];
790 value rhs
= (*this)(x
.rhs
);
794 if (x
.operator_
== token_ids::assign
)
803 case token_ids::plus_assign
: result
= lhs
+ rhs
; break;
804 case token_ids::minus_assign
: result
= lhs
- rhs
; break;
805 case token_ids::times_assign
: result
= lhs
* rhs
; break;
806 case token_ids::divide_assign
: result
= lhs
/ rhs
; break;
807 case token_ids::mod_assign
: result
= lhs
% rhs
; break;
808 case token_ids::bit_and_assign
: result
= lhs
& rhs
; break;
809 case token_ids::bit_xor_assign
: result
= lhs
^ rhs
; break;
810 case token_ids::bit_or_assign
: result
= lhs
| rhs
; break;
811 case token_ids::shift_left_assign
: result
= lhs
<< rhs
; break;
812 case token_ids::shift_right_assign
: result
= lhs
>> rhs
; break;
813 default: BOOST_ASSERT(0); return val();
820 bool compiler::operator()(ast::variable_declaration
const& x
)
822 if (locals
.find(x
.lhs
.name
) != locals
.end())
824 error_handler(x
.lhs
.id
, "Duplicate variable: " + x
.lhs
.name
);
829 std::string
const& name
= x
.lhs
.name
;
831 if (x
.rhs
) // if there's an RHS initializer
833 init
= (*this)(*x
.rhs
);
834 if (!init
.is_valid()) // don't add the variable if the RHS fails
838 value var_
= var(name
.c_str());
842 // Remember this binding.
847 struct compiler::statement_compiler
: compiler
849 typedef bool result_type
;
852 compiler::statement_compiler
& compiler::as_statement()
854 return *static_cast<statement_compiler
*>(this);
857 bool compiler::operator()(ast::statement
const& x
)
859 if (boost::get
<ast::nil
>(&x
) != 0) // empty statement
861 return boost::apply_visitor(as_statement(), x
);
864 bool compiler::operator()(ast::statement_list
const& x
)
866 BOOST_FOREACH(ast::statement
const& s
, x
)
874 bool compiler::operator()(ast::if_statement
const& x
)
876 value condition
= (*this)(x
.condition
);
877 if (!condition
.is_valid())
880 function f
= get_current_function();
882 // Create blocks for the then and else cases. Insert the 'then' block at the
883 // end of the function.
884 basic_block then_block
= make_basic_block("if.then", f
);
885 basic_block else_block
;
886 basic_block exit_block
;
890 else_block
= make_basic_block("if.else");
891 conditional_branch(condition
, then_block
, else_block
);
895 exit_block
= make_basic_block("if.end");
896 conditional_branch(condition
, then_block
, exit_block
);
900 set_insert_point(then_block
);
901 if (!(*this)(x
.then
))
903 if (!then_block
.has_terminator())
905 if (!exit_block
.is_valid())
906 exit_block
= make_basic_block("if.end");
909 // Codegen of 'then' can change the current block, update then_block
910 then_block
= get_insert_block();
916 set_insert_point(else_block
);
917 if (!(*this)(*x
.else_
))
919 if (!else_block
.has_terminator())
921 if (!exit_block
.is_valid())
922 exit_block
= make_basic_block("if.end");
925 // Codegen of 'else' can change the current block, update else_block
926 else_block
= get_insert_block();
929 if (exit_block
.is_valid())
933 set_insert_point(exit_block
);
938 bool compiler::operator()(ast::while_statement
const& x
)
940 function f
= get_current_function();
942 basic_block cond_block
= make_basic_block("while.cond", f
);
943 basic_block body_block
= make_basic_block("while.body");
944 basic_block exit_block
= make_basic_block("while.end");
947 set_insert_point(cond_block
);
948 value condition
= (*this)(x
.condition
);
949 if (!condition
.is_valid())
951 conditional_branch(condition
, body_block
, exit_block
);
953 set_insert_point(body_block
);
955 if (!(*this)(x
.body
))
958 if (!body_block
.has_terminator())
959 branch(cond_block
); // loop back
963 set_insert_point(exit_block
);
968 bool compiler::operator()(ast::return_statement
const& x
)
975 x
.id
, "'void' function returning a value: ");
984 x
.id
, current_function_name
985 + " function must return a value: ");
992 value return_val
= (*this)(*x
.expr
);
993 if (!return_val
.is_valid())
995 return_var
.assign(return_val
);
998 branch(return_block
);
1002 function
compiler::function_decl(ast::function
const& x
)
1004 void_return
= x
.return_type
== "void";
1005 current_function_name
= x
.function_name
.name
;
1010 , current_function_name
1013 // If function conflicted, the function already exixts. If it has a
1014 // body, don't allow redefinition or reextern.
1015 if (f
.name() != current_function_name
)
1017 // Delete the one we just made and get the existing one.
1018 f
.erase_from_parent();
1019 f
= get_function(current_function_name
);
1021 // If function already has a body, reject this.
1026 "Duplicate function: " + x
.function_name
.name
);
1030 // If function took a different number of args, reject.
1031 if (f
.arg_size() != x
.args
.size())
1035 "Redefinition of function with different # args: "
1036 + x
.function_name
.name
);
1040 // Set names for all arguments.
1041 function::arg_range rng
= f
.args();
1042 function::arg_range::const_iterator iter
= rng
.begin();
1043 BOOST_FOREACH(ast::identifier
const& arg
, x
.args
)
1045 iter
->name(arg
.name
);
1052 void compiler::function_allocas(ast::function
const& x
, function f
)
1054 // Create variables for each argument and register the
1055 // argument in the symbol table so that references to it will succeed.
1056 function::arg_range rng
= f
.args();
1057 function::arg_range::const_iterator iter
= rng
.begin();
1058 BOOST_FOREACH(ast::identifier
const& arg
, x
.args
)
1060 // Create an arg_ for this variable.
1061 value arg_
= var(arg
.name
);
1063 // Store the initial value into the arg_.
1066 // Add arguments to variable symbol table.
1067 locals
[arg
.name
] = arg_
;
1073 // Create an alloca for the return value
1074 return_var
= var("return.val");
1078 bool compiler::operator()(ast::function
const& x
)
1080 ///////////////////////////////////////////////////////////////////////
1082 function f
= function_decl(x
);
1086 ///////////////////////////////////////////////////////////////////////
1088 if (x
.body
) // compile the body if this is not a prototype
1090 // Create a new basic block to start insertion into.
1091 basic_block block
= make_basic_block("entry", f
);
1092 set_insert_point(block
);
1094 function_allocas(x
, f
);
1095 return_block
= make_basic_block("return");
1097 if (!(*this)(*x
.body
))
1099 // Error reading body, remove function.
1100 f
.erase_from_parent();
1104 basic_block last_block
= f
.last_block();
1106 // If the last block is unterminated, connect it to return_block
1107 if (!last_block
.has_terminator())
1109 set_insert_point(last_block
);
1110 branch(return_block
);
1113 f
.add(return_block
);
1114 set_insert_point(return_block
);
1119 return_(return_var
);
1121 // Validate the generated code, checking for consistency.
1124 // Optimize the function.
1125 optimize_function(f
);
1131 bool compiler::operator()(ast::function_list
const& x
)
1133 BOOST_FOREACH(ast::function
const& f
, x
)
1135 locals
.clear(); // clear the variables