Files
cs420/src/irgen/mod.rs
Janggun Lee fcb6ca2538 Update
2025-01-06 18:45:28 +09:00

778 lines
26 KiB
Rust

//! # Homework: IR Generation
//!
//! The goal of this homework is to translate the components of a C file into KECC IR. While doing
//! so, you will familarize yourself with the structure of KECC IR, and understand the semantics of
//! C in terms of KECC.
//!
//! We highly recommend checking out the [slides][slides] and [github repo][github-qna-irgen] for
//! useful information.
//!
//! ## Guide
//!
//! ### High Level Guide
//!
//! Please watch the following video from 2020 along the lecture slides.
//! - [Intermediate Representation][ir]
//! - [IRgen (Overview)][irgen-overview]
//!
//! ### Coding Guide
//!
//! We highly recommend you copy-and-paste the code given in the following lecture videos from 2020:
//! - [IRgen (Code, Variable Declaration)][irgen-var-decl]
//! - [IRgen (Code, Function Definition)][irgen-func-def]
//! - [IRgen (Code, Statement 1)][irgen-stmt-1]
//! - [IRgen (Code, Statement 2)][irgen-stmt-2]
//!
//! The skeleton code roughly consists of the code for the first two videos, but you should still
//! watch them to have an idea of what the code is like.
//!
//! [slides]: https://docs.google.com/presentation/d/1SqtU-Cn60Sd1jkbO0OSsRYKPMIkul0eZoYG9KpMugFE/edit?usp=sharing
//! [ir]: https://youtu.be/7CY_lX5ZroI
//! [irgen-overview]: https://youtu.be/YPtnXlKDSYo
//! [irgen-var-decl]: https://youtu.be/HjARCUoK08s
//! [irgen-func-def]: https://youtu.be/Rszt9x0Xu_0
//! [irgen-stmt-1]: https://youtu.be/jFahkyxm994
//! [irgen-stmt-2]: https://youtu.be/UkaXaNw462U
//! [github-qna-irgen]: https://github.com/kaist-cp/cs420/labels/homework%20-%20irgen
#![allow(dead_code)]
use core::convert::TryFrom;
use core::fmt;
use core::mem;
use std::collections::{BTreeMap, HashMap};
use std::ops::Deref;
use lang_c::ast::*;
use lang_c::driver::Parse;
use lang_c::span::Node;
use thiserror::Error;
use crate::ir::{DtypeError, HasDtype, Named};
use crate::*;
use itertools::izip;
#[derive(Debug)]
pub struct IrgenError {
pub code: String,
pub message: IrgenErrorMessage,
}
impl IrgenError {
pub fn new(code: String, message: IrgenErrorMessage) -> Self {
Self { code, message }
}
}
impl fmt::Display for IrgenError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "error: {}\r\n\r\ncode: {}", self.message, self.code)
}
}
#[derive(Debug, PartialEq, Eq, Error)]
pub enum IrgenErrorMessage {
/// For uncommon error
#[error("{message}")]
Misc { message: String },
#[error("called object `{callee:?}` is not a function or function pointer")]
NeedFunctionOrFunctionPointer { callee: ir::Operand },
#[error("redefinition, `{name}`")]
Redefinition { name: String },
#[error("`{dtype}` conflicts prototype's dtype, `{protorype_dtype}`")]
ConflictingDtype {
dtype: ir::Dtype,
protorype_dtype: ir::Dtype,
},
#[error("{dtype_error}")]
InvalidDtype { dtype_error: DtypeError },
#[error("l-value required as {message}")]
RequireLvalue { message: String },
}
#[derive(Default, Debug)]
pub struct Irgen {
decls: BTreeMap<String, ir::Declaration>,
typedefs: HashMap<String, ir::Dtype>,
structs: HashMap<String, Option<ir::Dtype>>,
struct_tempid_counter: usize,
}
impl Translate<Parse> for Irgen {
type Target = ir::TranslationUnit;
type Error = IrgenError;
fn translate(&mut self, source: &Parse) -> Result<Self::Target, Self::Error> {
self.translate(&source.unit)
}
}
impl Translate<TranslationUnit> for Irgen {
type Target = ir::TranslationUnit;
type Error = IrgenError;
fn translate(&mut self, source: &TranslationUnit) -> Result<Self::Target, Self::Error> {
for ext_decl in &source.0 {
match ext_decl.node {
ExternalDeclaration::Declaration(ref var) => {
self.add_declaration(&var.node)?;
}
ExternalDeclaration::StaticAssert(_) => {
panic!("ExternalDeclaration::StaticAssert is unsupported")
}
ExternalDeclaration::FunctionDefinition(ref func) => {
self.add_function_definition(&func.node)?;
}
}
}
let decls = mem::take(&mut self.decls);
let structs = mem::take(&mut self.structs);
Ok(Self::Target { decls, structs })
}
}
impl Irgen {
const BID_INIT: ir::BlockId = ir::BlockId(0);
// `0` is used to create `BID_INIT`
const BID_COUNTER_INIT: usize = 1;
const TEMPID_COUNTER_INIT: usize = 0;
/// Add a declaration. It can be either a struct, typedef, or a variable.
fn add_declaration(&mut self, source: &Declaration) -> Result<(), IrgenError> {
let (base_dtype, is_typedef) =
ir::Dtype::try_from_ast_declaration_specifiers(&source.specifiers).map_err(|e| {
IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?;
let base_dtype = base_dtype.resolve_typedefs(&self.typedefs).map_err(|e| {
IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?;
let base_dtype = if let ir::Dtype::Struct { name, fields, .. } = &base_dtype {
if let Some(name) = name {
let _ = self.structs.entry(name.to_string()).or_insert(None);
}
if fields.is_some() {
base_dtype
.resolve_structs(&mut self.structs, &mut self.struct_tempid_counter)
.map_err(|e| {
IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?
} else {
base_dtype
}
} else {
base_dtype
};
for init_decl in &source.declarators {
let declarator = &init_decl.node.declarator.node;
let name = name_of_declarator(declarator);
let dtype = base_dtype
.clone()
.with_ast_declarator(declarator)
.map_err(|e| {
IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?
.deref()
.clone();
let dtype = dtype.resolve_typedefs(&self.typedefs).map_err(|e| {
IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?;
if !is_typedef && is_invalid_structure(&dtype, &self.structs) {
return Err(IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::Misc {
message: "incomplete struct type".to_string(),
},
));
}
if is_typedef {
// Add new typedef if nothing has been declared before
let prev_dtype = self
.typedefs
.entry(name.clone())
.or_insert_with(|| dtype.clone());
if prev_dtype != &dtype {
return Err(IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::ConflictingDtype {
dtype,
protorype_dtype: prev_dtype.clone(),
},
));
}
continue;
}
// Creates a new declaration based on the dtype.
let mut decl = ir::Declaration::try_from(dtype.clone()).map_err(|e| {
IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?;
// If `initializer` exists, convert initializer to a constant value
if let Some(initializer) = init_decl.node.initializer.as_ref() {
if !is_valid_initializer(&initializer.node, &dtype, &self.structs) {
return Err(IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::Misc {
message: "initializer is not valid".to_string(),
},
));
}
match &mut decl {
ir::Declaration::Variable {
initializer: var_initializer,
..
} => {
if var_initializer.is_some() {
return Err(IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::Redefinition { name },
));
}
*var_initializer = Some(initializer.node.clone());
}
ir::Declaration::Function { .. } => {
return Err(IrgenError::new(
format!("{source:#?}"),
IrgenErrorMessage::Misc {
message: "illegal initializer (only variables can be initialized)"
.to_string(),
},
));
}
}
}
self.add_decl(&name, decl)?;
}
Ok(())
}
/// Add a function definition.
fn add_function_definition(&mut self, source: &FunctionDefinition) -> Result<(), IrgenError> {
// Creates name and signature.
let specifiers = &source.specifiers;
let declarator = &source.declarator.node;
let name = name_of_declarator(declarator);
let name_of_params = name_of_params_from_function_declarator(declarator)
.expect("declarator is not from function definition");
let (base_dtype, is_typedef) = ir::Dtype::try_from_ast_declaration_specifiers(specifiers)
.map_err(|e| {
IrgenError::new(
format!("specs: {specifiers:#?}\ndecl: {declarator:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?;
if is_typedef {
return Err(IrgenError::new(
format!("specs: {specifiers:#?}\ndecl: {declarator:#?}"),
IrgenErrorMessage::Misc {
message: "function definition declared typedef".into(),
},
));
}
let dtype = base_dtype
.with_ast_declarator(declarator)
.map_err(|e| {
IrgenError::new(
format!("specs: {specifiers:#?}\ndecl: {declarator:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?
.deref()
.clone();
let dtype = dtype.resolve_typedefs(&self.typedefs).map_err(|e| {
IrgenError::new(
format!("specs: {specifiers:#?}\ndecl: {declarator:#?}"),
IrgenErrorMessage::InvalidDtype { dtype_error: e },
)
})?;
let signature = ir::FunctionSignature::new(dtype.clone());
// Adds new declaration if nothing has been declared before
let decl = ir::Declaration::try_from(dtype).unwrap();
self.add_decl(&name, decl)?;
// Prepare scope for global variable
let global_scope: HashMap<_, _> = self
.decls
.iter()
.map(|(name, decl)| {
let dtype = decl.dtype();
let pointer = ir::Constant::global_variable(name.clone(), dtype);
let operand = ir::Operand::constant(pointer);
(name.clone(), operand)
})
.collect();
// Prepares for irgen pass.
let mut irgen = IrgenFunc {
return_type: signature.ret.clone(),
bid_init: Irgen::BID_INIT,
phinodes_init: Vec::new(),
allocations: Vec::new(),
blocks: BTreeMap::new(),
bid_counter: Irgen::BID_COUNTER_INIT,
tempid_counter: Irgen::TEMPID_COUNTER_INIT,
typedefs: &self.typedefs,
structs: &self.structs,
// Initial symbol table has scope for global variable already
symbol_table: vec![global_scope],
};
let mut context = Context::new(irgen.bid_init);
// Enter variable scope for alloc registers matched with function parameters
irgen.enter_scope();
// Creates the init block that stores arguments.
irgen
.translate_parameter_decl(&signature, irgen.bid_init, &name_of_params, &mut context)
.map_err(|e| {
IrgenError::new(format!("specs: {specifiers:#?}\ndecl: {declarator:#?}"), e)
})?;
// Translates statement.
irgen.translate_stmt(&source.statement.node, &mut context, None, None)?;
// Creates the end block
let ret = signature.ret.set_const(false);
let value = if ret == ir::Dtype::unit() {
ir::Operand::constant(ir::Constant::unit())
} else if ret == ir::Dtype::INT {
// If "main" function, default return value is `0` when return type is `int`
if name == "main" {
ir::Operand::constant(ir::Constant::int(0, ret))
} else {
ir::Operand::constant(ir::Constant::undef(ret))
}
} else {
ir::Operand::constant(ir::Constant::undef(ret))
};
// Last Block of the function
irgen.insert_block(context, ir::BlockExit::Return { value });
// Exit variable scope created above
irgen.exit_scope();
let func_def = ir::FunctionDefinition {
allocations: irgen.allocations,
blocks: irgen.blocks,
bid_init: irgen.bid_init,
};
let decl = self
.decls
.get_mut(&name)
.unwrap_or_else(|| panic!("The declaration of `{name}` must exist"));
if let ir::Declaration::Function { definition, .. } = decl {
if definition.is_some() {
return Err(IrgenError::new(
format!("specs: {specifiers:#?}\ndecl: {declarator:#?}"),
IrgenErrorMessage::Misc {
message: format!("the name `{name}` is defined multiple time"),
},
));
}
// Update function definition
*definition = Some(func_def);
} else {
panic!("`{name}` must be function declaration")
}
Ok(())
}
/// Adds a possibly existing declaration.
///
/// Returns error if the previous declearation is incompatible with `decl`.
fn add_decl(&mut self, name: &str, decl: ir::Declaration) -> Result<(), IrgenError> {
let old_decl = some_or!(
self.decls.insert(name.to_string(), decl.clone()),
return Ok(())
);
// Check if type is conflicting for pre-declared one
if !old_decl.is_compatible(&decl) {
return Err(IrgenError::new(
name.to_string(),
IrgenErrorMessage::ConflictingDtype {
dtype: old_decl.dtype(),
protorype_dtype: decl.dtype(),
},
));
}
Ok(())
}
}
/// Storage for instructions up to the insertion of a block
#[derive(Debug)]
struct Context {
/// The block id of the current context.
bid: ir::BlockId,
/// Current instructions of the block.
instrs: Vec<Named<ir::Instruction>>,
}
impl Context {
/// Create a new context with block number bid
fn new(bid: ir::BlockId) -> Self {
Self {
bid,
instrs: Vec::new(),
}
}
// Adds `instr` to the current context.
fn insert_instruction(
&mut self,
instr: ir::Instruction,
) -> Result<ir::Operand, IrgenErrorMessage> {
let dtype = instr.dtype();
self.instrs.push(Named::new(None, instr));
Ok(ir::Operand::register(
ir::RegisterId::temp(self.bid, self.instrs.len() - 1),
dtype,
))
}
}
/// A C function being translated.
struct IrgenFunc<'i> {
/// return type of the function.
return_type: ir::Dtype,
/// initial block id for the function, typically 0.
bid_init: ir::BlockId,
/// arguments represented as initial phinodes. Order must be the same of that given in the C
/// function.
phinodes_init: Vec<Named<ir::Dtype>>,
/// local allocations.
allocations: Vec<Named<ir::Dtype>>,
/// Map from block id to basic blocks
blocks: BTreeMap<ir::BlockId, ir::Block>,
/// current block id. `blocks` must have an entry for all ids less then this
bid_counter: usize,
/// current temporary id. Used to create temporary names in the IR for e.g,
tempid_counter: usize,
/// Usable definitions
typedefs: &'i HashMap<String, ir::Dtype>,
/// Usable structs
// TODO: Add examples on how to use properly use this field.
structs: &'i HashMap<String, Option<ir::Dtype>>,
/// Current symbol table. The initial symbol table has the global variables.
symbol_table: Vec<HashMap<String, ir::Operand>>,
}
impl IrgenFunc<'_> {
/// Allocate a new block id.
fn alloc_bid(&mut self) -> ir::BlockId {
let bid = self.bid_counter;
self.bid_counter += 1;
ir::BlockId(bid)
}
/// Allocate a new temporary id.
fn alloc_tempid(&mut self) -> String {
let tempid = self.tempid_counter;
self.tempid_counter += 1;
format!("t{tempid}")
}
/// Create a new allocation with type given by `alloc`.
fn insert_alloc(&mut self, alloc: Named<ir::Dtype>) -> usize {
self.allocations.push(alloc);
self.allocations.len() - 1
}
/// Insert a new block `context` with exit instruction `exit`.
///
/// # Panic
///
/// Panics if another block with the same bid as `context` already existed.
fn insert_block(&mut self, context: Context, exit: ir::BlockExit) {
let block = ir::Block {
phinodes: if context.bid == self.bid_init {
self.phinodes_init.clone()
} else {
Vec::new()
},
instructions: context.instrs,
exit,
};
if self.blocks.insert(context.bid, block).is_some() {
panic!("the bid `{}` is defined multiple time", context.bid)
}
}
/// Enter a scope and create a new symbol table entry, i.e, we are at a `{` in the function.
fn enter_scope(&mut self) {
self.symbol_table.push(HashMap::new());
}
/// Exit a scope and remove the a oldest symbol table entry. i.e, we are at a `}` in the
/// function.
///
/// # Panic
///
/// Panics if there are no scopes to exit, i.e, the function has a unmatched `}`.
fn exit_scope(&mut self) {
let _unused = self.symbol_table.pop().unwrap();
}
/// Inserts `var` with `value` to the current symbol table.
///
/// Returns Ok() if the current scope has no previously-stored entry for a given variable.
fn insert_symbol_table_entry(
&mut self,
var: String,
value: ir::Operand,
) -> Result<(), IrgenErrorMessage> {
let cur_scope = self
.symbol_table
.last_mut()
.expect("symbol table has no valid scope");
if cur_scope.insert(var.clone(), value).is_some() {
return Err(IrgenErrorMessage::Redefinition { name: var });
}
Ok(())
}
/// Transalte a C statement `stmt` under the current block `context`, with `continue` block
/// `bid_continue` and break block `bid_break`.
fn translate_stmt(
&mut self,
_stmt: &Statement,
_context: &mut Context,
_bid_continue: Option<ir::BlockId>,
_bid_break: Option<ir::BlockId>,
) -> Result<(), IrgenError> {
todo!("Homework: IR Generation")
}
/// Translate parameter declaration of the functions to IR.
///
/// For example, given the following C function from [`foo.c`][foo]:
///
/// ```C
/// int foo(int x, int y, int z) {
/// if (x == y) {
/// return y;
/// } else {
/// return z;
/// }
/// }
/// ```
///
/// The IR before this function looks roughly as follows:
///
/// ```text
/// fun i32 @foo (i32, i32, i32) {
/// init:
/// bid: b0
/// allocations:
///
/// block b0:
/// %b0:p0:i32:x
/// %b0:p1:i32:y
/// %b0:p2:i32:z
/// ```
///
/// With the following arguments :
/// ```ignore
/// signature = FunctionSignature { ret: ir::INT, params: vec![ir::INT, ir::INT, ir::INT] }
/// bid_init = 0
/// name_of_params = ["x", "y", "z"]
/// context = // omitted
/// ```
///
/// The resulting IR after this function should be roughly follows :
///
/// ```text
/// fun i32 @foo (i32, i32, i32) {
/// init:
/// bid: b0
/// allocations:
/// %l0:i32:x
/// %l1:i32:y
/// %l2:i32:z
///
/// block b0:
/// %b0:p0:i32:x
/// %b0:p1:i32:y
/// %b0:p2:i32:z
/// %b0:i0:unit = store %b0:p0:i32 %l0:i32*
/// %b0:i1:unit = store %b0:p1:i32 %l1:i32*
/// %b0:i2:unit = store %b0:p2:i32 %l2:i32*
/// ```
///
/// In particular, note that it is added to the local allocation list and store them to the
/// initial phinodes.
///
/// Note that the resulting IR is **a** solution. If you can think of a better way to
/// translate parameters, feel free to do so.
///
/// [foo]: https://github.com/kaist-cp/kecc-public/blob/main/examples/c/foo.c
fn translate_parameter_decl(
&mut self,
_signature: &ir::FunctionSignature,
_bid_init: ir::BlockId,
_name_of_params: &[String],
_context: &mut Context,
) -> Result<(), IrgenErrorMessage> {
todo!("Homework: IR Generation")
}
}
#[inline]
fn name_of_declarator(declarator: &Declarator) -> String {
let declarator_kind = &declarator.kind;
match &declarator_kind.node {
DeclaratorKind::Abstract => panic!("DeclaratorKind::Abstract is unsupported"),
DeclaratorKind::Identifier(identifier) => identifier.node.name.clone(),
DeclaratorKind::Declarator(declarator) => name_of_declarator(&declarator.node),
}
}
#[inline]
fn name_of_params_from_function_declarator(declarator: &Declarator) -> Option<Vec<String>> {
let declarator_kind = &declarator.kind;
match &declarator_kind.node {
DeclaratorKind::Abstract => panic!("DeclaratorKind::Abstract is unsupported"),
DeclaratorKind::Identifier(_) => {
name_of_params_from_derived_declarators(&declarator.derived)
}
DeclaratorKind::Declarator(next_declarator) => {
name_of_params_from_function_declarator(&next_declarator.node)
.or_else(|| name_of_params_from_derived_declarators(&declarator.derived))
}
}
}
#[inline]
fn name_of_params_from_derived_declarators(
derived_decls: &[Node<DerivedDeclarator>],
) -> Option<Vec<String>> {
for derived_decl in derived_decls {
match &derived_decl.node {
DerivedDeclarator::Function(func_decl) => {
let name_of_params = func_decl
.node
.parameters
.iter()
.map(|p| name_of_parameter_declaration(&p.node))
.collect::<Option<Vec<_>>>()
.unwrap_or_default();
return Some(name_of_params);
}
DerivedDeclarator::KRFunction(_kr_func_decl) => {
// K&R function is allowed only when it has no parameter
return Some(Vec::new());
}
_ => (),
};
}
None
}
#[inline]
fn name_of_parameter_declaration(parameter_declaration: &ParameterDeclaration) -> Option<String> {
let declarator = some_or!(parameter_declaration.declarator.as_ref(), return None);
Some(name_of_declarator(&declarator.node))
}
#[inline]
fn is_valid_initializer(
initializer: &Initializer,
dtype: &ir::Dtype,
structs: &HashMap<String, Option<ir::Dtype>>,
) -> bool {
match initializer {
Initializer::Expression(expr) => match dtype {
ir::Dtype::Int { .. } | ir::Dtype::Float { .. } | ir::Dtype::Pointer { .. } => {
match &expr.node {
Expression::Constant(_) => true,
Expression::UnaryOperator(unary) => matches!(
&unary.node.operator.node,
UnaryOperator::Minus | UnaryOperator::Plus
),
_ => false,
}
}
_ => false,
},
Initializer::List(items) => match dtype {
ir::Dtype::Array { inner, .. } => items
.iter()
.all(|i| is_valid_initializer(&i.node.initializer.node, inner, structs)),
ir::Dtype::Struct { name, .. } => {
let name = name.as_ref().expect("struct should have its name");
let struct_type = structs
.get(name)
.expect("struct type matched with `name` must exist")
.as_ref()
.expect("`struct_type` must have its definition");
let fields = struct_type
.get_struct_fields()
.expect("`struct_type` must be struct type")
.as_ref()
.expect("`fields` must be `Some`");
izip!(fields, items).all(|(f, i)| {
is_valid_initializer(&i.node.initializer.node, f.deref(), structs)
})
}
_ => false,
},
}
}
#[inline]
fn is_invalid_structure(dtype: &ir::Dtype, structs: &HashMap<String, Option<ir::Dtype>>) -> bool {
// When `dtype` is `Dtype::Struct`, `structs` has real definition of `dtype`
if let ir::Dtype::Struct { name, fields, .. } = dtype {
assert!(name.is_some() && fields.is_none());
let name = name.as_ref().unwrap();
let struct_type = some_or!(structs.get(name), return true);
struct_type.is_none()
} else {
false
}
}