From 2a5a5e71eded12bc3b6192f787f3c766d0b6c5dc Mon Sep 17 00:00:00 2001 From: Jeehoon Kang Date: Sat, 28 Mar 2020 18:29:04 +0900 Subject: [PATCH] Fix hw1 fuzzer again --- Cargo.lock | 12 +-- Cargo.toml | 6 +- README.md | 15 +++ bin/fuzz.rs | 16 +++ bin/fuzz_cli.yml | 12 +++ bin/kecc.rs | 24 ++++- bin/kecc_cli.yml | 15 +++ src/ir/dtype.rs | 82 +++++++++----- src/ir/interp.rs | 58 +++++++--- src/ir/mod.rs | 59 +++++++++-- src/ir/write_ir.rs | 1 + src/lib.rs | 2 +- src/opt/deadcode.rs | 14 +++ src/opt/gvn.rs | 14 +-- src/opt/mem2reg.rs | 15 +-- src/opt/mod.rs | 49 +++++++-- src/opt/opt_utils.rs | 171 ++++++++++++++++++++++++++++++ src/opt/simplify_cfg.rs | 18 +--- src/utils.rs | 7 ++ tests/fuzz.py | 18 ++-- tests/reduce-criteria-template.sh | 2 +- 21 files changed, 500 insertions(+), 110 deletions(-) create mode 100644 src/opt/deadcode.rs create mode 100644 src/opt/opt_utils.rs diff --git a/Cargo.lock b/Cargo.lock index a376713..47701a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,7 +113,7 @@ dependencies = [ [[package]] name = "itertools" -version = "0.8.2" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -125,14 +125,14 @@ version = "0.1.0" dependencies = [ "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "failure 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lang-c 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lang-c 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "lang-c" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -321,8 +321,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum failure_derive 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "030a733c8287d6213886dd487564ff5c8f6aae10278b3588ed177f9d18f8d231" "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum hermit-abi 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8" -"checksum itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" -"checksum lang-c 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "43d8e04e01e7e22312294e6aaa1e121192b103abf9408800fc20ee85c67ccc0f" +"checksum itertools 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +"checksum lang-c 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "86efc420d5d7407655eb2ff1a77d7c81463307f1b204c886f7608cc2e6506d55" "checksum libc 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" "checksum proc-macro2 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)" = "6c09721c6781493a2a492a96b5a5bf19b65917fe6728884e7c44dd0c60ca3435" diff --git a/Cargo.toml b/Cargo.toml index eb926c6..ac4d17e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ path = "bin/fuzz.rs" [dependencies] clap = { version = "2.33.0", features = ["yaml"] } -lang-c = "0.7.0" -itertools = "0.8" -failure = "0.1.6" +lang-c = "0.8.0" +itertools = "0.9.0" +failure = "0.1.7" tempfile = "3.1.0" diff --git a/README.md b/README.md index 09c11bb..88e09e3 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,21 @@ cargo test # run a particular test ## Fuzzing +We encourage you to do homework using the test-driven development approach (TDD). You randomly +generate test input, and if it fails, then reduce it as much as possible and manually inspect the +reduced test input. For example, for homework 1, do: + +```sh +# randomly generates test inputs and tests them +python3 tests/fuzz.py --print + +# reduces the failing test input as much as possible +python3 tests/fuzz.py --print --reduce + +# fix your code for the reduced test input +cat tests/test_reduced.c +``` + ### Install ```sh diff --git a/bin/fuzz.rs b/bin/fuzz.rs index ed8d08e..a05234a 100644 --- a/bin/fuzz.rs +++ b/bin/fuzz.rs @@ -27,4 +27,20 @@ fn main() { if matches.is_present("irgen") { kecc::test_irgen(&unit, Path::new(&input)); } + + if matches.is_present("simplify-cfg") { + todo!("test simplify-cfg"); + } + + if matches.is_present("mem2erg") { + todo!("test mem2reg"); + } + + if matches.is_present("deadcode") { + todo!("test deadcode"); + } + + if matches.is_present("gvn") { + todo!("test gvn"); + } } diff --git a/bin/fuzz_cli.yml b/bin/fuzz_cli.yml index 3d1f324..93ed801 100644 --- a/bin/fuzz_cli.yml +++ b/bin/fuzz_cli.yml @@ -8,6 +8,18 @@ args: short: i long: irgen help: Fuzzes irgen + - simplify-cfg: + long: simplify-cfg + help: Performs simplify-cfg + - mem2reg: + long: mem2reg + help: Performs mem2reg + - deadcode: + long: deadcode + help: Performs deadcode elimination + - gvn: + long: gvn + help: Performs gvn - INPUT: help: Sets the input file to use required: true diff --git a/bin/kecc.rs b/bin/kecc.rs index bda3ae0..663c80b 100644 --- a/bin/kecc.rs +++ b/bin/kecc.rs @@ -5,7 +5,9 @@ use clap::{crate_authors, crate_description, crate_version, App}; #[macro_use] extern crate kecc; -use kecc::{write, Asmgen, Irgen, Optimize, Parse, Translate, O1}; +use kecc::{ + write, Asmgen, Deadcode, Gvn, Irgen, Mem2reg, Optimize, Parse, SimplifyCfg, Translate, O1, +}; fn main() { let yaml = load_yaml!("kecc_cli.yml"); @@ -26,6 +28,10 @@ fn main() { Box::new(ok_or_exit!(::std::fs::File::open(output), 1)) }; + if matches.is_present("parse") { + return; + } + if matches.is_present("print") { write(&unit, &mut output).unwrap(); return; @@ -45,6 +51,22 @@ fn main() { if matches.is_present("optimize") { O1::default().optimize(&mut ir); + } else { + if matches.is_present("simplify-cfg") { + SimplifyCfg::default().optimize(&mut ir); + } + + if matches.is_present("mem2erg") { + Mem2reg::default().optimize(&mut ir); + } + + if matches.is_present("deadcode") { + Deadcode::default().optimize(&mut ir); + } + + if matches.is_present("gvn") { + Gvn::default().optimize(&mut ir); + } } let asm = ok_or_exit!(Asmgen::default().translate(&ir), 1); diff --git a/bin/kecc_cli.yml b/bin/kecc_cli.yml index 5537f9d..2e3d4fd 100644 --- a/bin/kecc_cli.yml +++ b/bin/kecc_cli.yml @@ -1,5 +1,8 @@ name: kecc args: + - parse: + long: parse + help: Parses the input file - print: short: p long: print @@ -12,6 +15,18 @@ args: short: O long: optimize help: Optimizes IR + - simplify-cfg: + long: simplify-cfg + help: Performs simplify-cfg + - mem2reg: + long: mem2reg + help: Performs mem2reg + - deadcode: + long: deadcode + help: Performs deadcode elimination + - gvn: + long: gvn + help: Performs gvn - output: short: o long: output diff --git a/src/ir/dtype.rs b/src/ir/dtype.rs index 71804ea..6c90756 100644 --- a/src/ir/dtype.rs +++ b/src/ir/dtype.rs @@ -44,6 +44,10 @@ pub enum Dtype { inner: Box, is_const: bool, }, + Array { + inner: Box, + size: usize, + }, Function { ret: Box, params: Vec, @@ -342,6 +346,30 @@ impl Dtype { } } + // Suppose the C declaration is `int *a[2][3]`. Then `a`'s `ir::Dtype` should be `[2 x [3 x int*]]`. + // But in the AST, it is parsed as `Array(3, Array(2, Pointer(int)))`, reversing the order of `2` and `3`. + // In the recursive translation of declaration into Dtype, we need to insert `3` inside `[2 * int*]`. + pub fn array(base_dtype: Dtype, size: usize) -> Self { + match base_dtype { + Self::Array { + inner, + size: old_size, + } => { + let inner = inner.deref().clone(); + let inner = Self::array(inner, size); + Self::Array { + inner: Box::new(inner), + size: old_size, + } + } + Self::Function { .. } => panic!("array size cannot be applied to function type"), + inner => Self::Array { + inner: Box::new(inner), + size, + }, + } + } + #[inline] pub fn function(ret: Dtype, params: Vec) -> Self { Self::Function { @@ -411,9 +439,8 @@ impl Dtype { Self::Int { is_const, .. } => *is_const, Self::Float { is_const, .. } => *is_const, Self::Pointer { is_const, .. } => *is_const, - Self::Function { .. } => { - panic!("there should be no case that check whether `Function` is `const`") - } + Self::Array { .. } => true, + Self::Function { .. } => true, } } @@ -429,36 +456,36 @@ impl Dtype { }, Self::Float { width, .. } => Self::Float { width, is_const }, Self::Pointer { inner, .. } => Self::Pointer { inner, is_const }, - Self::Function { .. } => panic!("`const` cannot be applied to `Dtype::Function`"), + Self::Array { .. } => self, + Self::Function { .. } => self, } } - /// Return byte size of `Dtype` - pub fn size_of(&self) -> Result { - // TODO: consider complex type like array, structure in the future + pub fn size_align_of(&self) -> Result<(usize, usize), DtypeError> { match self { - Self::Unit { .. } => Ok(0), - Self::Int { width, .. } => Ok(*width / Self::WIDTH_OF_BYTE), - Self::Float { width, .. } => Ok(*width / Self::WIDTH_OF_BYTE), - Self::Pointer { .. } => Ok(Self::WIDTH_OF_POINTER / Self::WIDTH_OF_BYTE), - Self::Function { .. } => Err(DtypeError::Misc { - message: "`sizeof` cannot be used with function types".to_string(), - }), - } - } + Self::Unit { .. } => Ok((0, 1)), + Self::Int { width, .. } | Self::Float { width, .. } => { + let align_of = *width / Self::WIDTH_OF_BYTE; + let size_of = align_of; - /// Return alignment requirements of `Dtype` - pub fn align_of(&self) -> Result { - // TODO: consider complex type like array, structure in the future - // TODO: when considering complex type like a structure, - // the calculation method should be different from `Dtype::size_of`. - match self { - Self::Unit { .. } => Ok(0), - Self::Int { width, .. } => Ok(*width / Self::WIDTH_OF_BYTE), - Self::Float { width, .. } => Ok(*width / Self::WIDTH_OF_BYTE), - Self::Pointer { .. } => Ok(Self::WIDTH_OF_POINTER / Self::WIDTH_OF_BYTE), + Ok((size_of, align_of)) + } + Self::Pointer { .. } => { + let align_of = Self::WIDTH_OF_POINTER / Self::WIDTH_OF_BYTE; + let size_of = align_of; + + Ok((size_of, align_of)) + } + Self::Array { inner, size, .. } => { + let (size_of_inner, align_of_inner) = inner.size_align_of()?; + + Ok(( + size * std::cmp::max(size_of_inner, align_of_inner), + align_of_inner, + )) + } Self::Function { .. } => Err(DtypeError::Misc { - message: "`alignof` cannot be used with function types".to_string(), + message: "`size_align_of` cannot be used with function types".to_string(), }), } } @@ -590,6 +617,7 @@ impl fmt::Display for Dtype { Self::Pointer { inner, is_const } => { write!(f, "{}* {}", inner, if *is_const { "const" } else { "" }) } + Self::Array { inner, size, .. } => write!(f, "[{} x {}]", size, inner,), Self::Function { ret, params } => write!( f, "{} ({})", diff --git a/src/ir/interp.rs b/src/ir/interp.rs index e992224..2dc76b0 100644 --- a/src/ir/interp.rs +++ b/src/ir/interp.rs @@ -11,6 +11,9 @@ use crate::*; // TODO: the variants of Value will be added in the future #[derive(Debug, PartialEq, Clone)] pub enum Value { + Undef { + dtype: Dtype, + }, Unit, Int { value: u128, @@ -97,7 +100,8 @@ impl Value { } => Self::int(u128::default(), *width, *is_signed), ir::Dtype::Float { width, .. } => Self::float(f64::default(), *width), ir::Dtype::Pointer { .. } => Self::nullptr(), - ir::Dtype::Function { .. } => panic!("function types do not have a default value"), + ir::Dtype::Array { .. } => panic!("array type does not have a default value"), + ir::Dtype::Function { .. } => panic!("function type does not have a default value"), } } } @@ -225,6 +229,8 @@ mod calculator { rhs: Value, ) -> Result { match (op, lhs, rhs) { + (_, Value::Undef { .. }, _) => Err(()), + (_, _, Value::Undef { .. }) => Err(()), ( op, Value::Int { @@ -273,6 +279,7 @@ mod calculator { operand: Value, ) -> Result { match (op, operand) { + (_, Value::Undef { .. }) => Err(()), ( ast::UnaryOperator::Plus, Value::Int { @@ -312,6 +319,7 @@ mod calculator { pub fn calculate_typecast(value: Value, dtype: crate::ir::Dtype) -> Result { match (value, dtype) { + (Value::Undef { .. }, _) => Err(()), // TODO: distinguish zero/signed extension in the future // TODO: consider truncate in the future ( @@ -336,14 +344,7 @@ struct Memory { impl Memory { fn alloc(&mut self, dtype: &Dtype) -> Result { - let memory_block = match dtype { - ir::Dtype::Unit { .. } - | ir::Dtype::Int { .. } - | ir::Dtype::Float { .. } - | ir::Dtype::Pointer { .. } => vec![Value::default_from_dtype(dtype)], - ir::Dtype::Function { .. } => vec![], - }; - + let memory_block = Self::block_from_dtype(dtype); self.inner.push(memory_block); Ok(self.inner.len() - 1) @@ -356,6 +357,25 @@ impl Memory { fn store(&mut self, bid: usize, offset: usize, value: Value) { self.inner[bid][offset] = value; } + + fn block_from_dtype(dtype: &Dtype) -> Vec { + match dtype { + ir::Dtype::Unit { .. } => vec![], + ir::Dtype::Int { .. } | ir::Dtype::Float { .. } | ir::Dtype::Pointer { .. } => { + vec![Value::Undef { + dtype: dtype.clone(), + }] + } + ir::Dtype::Array { inner, size, .. } => { + let sub_vec = Self::block_from_dtype(inner.deref()); + (0..*size).fold(vec![], |mut result, _| { + result.append(&mut sub_vec.clone()); + result + }) + } + ir::Dtype::Function { .. } => vec![], + } + } } // TODO: allocation fields will be added in the future @@ -414,16 +434,20 @@ impl<'i> State<'i> { // Initialize allocated memory space match decl { - Declaration::Variable { dtype, initializer } => { - if dtype.get_function_inner().is_some() { - panic!("function variable does not exist") - } + Declaration::Variable { dtype, initializer } => match &dtype { + ir::Dtype::Unit { .. } => (), + ir::Dtype::Int { .. } | ir::Dtype::Float { .. } | ir::Dtype::Pointer { .. } => { + let value = if let Some(constant) = initializer { + self.interp_constant(constant.clone()) + } else { + Value::default_from_dtype(&dtype) + }; - if let Some(constant) = initializer { - let value = self.interp_constant(constant.clone()); self.memory.store(bid, 0, value); } - } + ir::Dtype::Array { .. } => todo!("Initializer::List is needed"), + ir::Dtype::Function { .. } => panic!("function variable does not exist"), + }, // If functin declaration, skip initialization Declaration::Function { .. } => (), } @@ -578,6 +602,7 @@ impl<'i> State<'i> { fn interp_instruction(&mut self, instruction: &Instruction) -> Result<(), InterpreterError> { let result = match instruction { + Instruction::Nop => Value::unit(), Instruction::BinOp { op, lhs, rhs, .. } => { let lhs = self.interp_operand(lhs.clone())?; let rhs = self.interp_operand(rhs.clone())?; @@ -685,6 +710,7 @@ impl<'i> State<'i> { fn interp_constant(&self, value: Constant) -> Value { match value { + Constant::Undef { dtype } => Value::Undef { dtype }, Constant::Unit => Value::Unit, Constant::Int { value, diff --git a/src/ir/mod.rs b/src/ir/mod.rs index 9229e42..7fd0a77 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -49,12 +49,13 @@ impl TryFrom for Declaration { Dtype::Unit { .. } => Err(DtypeError::Misc { message: "A variable of type `void` cannot be declared".to_string(), }), - Dtype::Int { .. } | Dtype::Float { .. } | Dtype::Pointer { .. } => { - Ok(Declaration::Variable { - dtype, - initializer: None, - }) - } + Dtype::Int { .. } + | Dtype::Float { .. } + | Dtype::Pointer { .. } + | Dtype::Array { .. } => Ok(Declaration::Variable { + dtype, + initializer: None, + }), Dtype::Function { .. } => Ok(Declaration::Function { signature: FunctionSignature::new(dtype), definition: None, @@ -257,6 +258,9 @@ impl Hash for RegisterId { #[derive(Debug, PartialEq, Clone)] pub enum Constant { + Undef { + dtype: Dtype, + }, Unit, Int { value: u128, @@ -365,8 +369,14 @@ impl Constant { } } + #[inline] + pub fn undef(dtype: Dtype) -> Self { + Self::Undef { dtype } + } + + #[inline] pub fn unit() -> Self { - Constant::Unit + Self::Unit } #[inline] @@ -374,7 +384,7 @@ impl Constant { let width = dtype.get_int_width().expect("`dtype` must be `Dtype::Int`"); let is_signed = dtype.is_int_signed(); - Constant::Int { + Self::Int { value, width, is_signed, @@ -387,18 +397,27 @@ impl Constant { .get_float_width() .expect("`dtype` must be `Dtype::Float`"); - Constant::Float { value, width } + Self::Float { value, width } } #[inline] pub fn global_variable(name: String, dtype: Dtype) -> Self { Self::GlobalVariable { name, dtype } } + + pub fn is_undef(&self) -> bool { + if let Self::Undef { .. } = self { + true + } else { + false + } + } } impl fmt::Display for Constant { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + Self::Undef { .. } => write!(f, "undef"), Self::Unit => write!(f, "unit"), Self::Int { value, .. } => write!(f, "{}", value), Self::Float { value, .. } => write!(f, "{}", value), @@ -410,6 +429,7 @@ impl fmt::Display for Constant { impl HasDtype for Constant { fn dtype(&self) -> Dtype { match self { + Self::Undef { dtype } => dtype.clone(), Self::Unit => Dtype::unit(), Self::Int { width, is_signed, .. @@ -450,6 +470,14 @@ impl Operand { None } } + + pub fn get_register_mut(&mut self) -> Option<(&mut RegisterId, &mut Dtype)> { + if let Self::Register { rid, dtype } = self { + Some((rid, dtype)) + } else { + None + } + } } impl fmt::Display for Operand { @@ -471,8 +499,10 @@ impl HasDtype for Operand { } #[derive(Debug, PartialEq, Clone)] +#[allow(clippy::large_enum_variant)] pub enum Instruction { // TODO: the variants of Instruction will be added in the future + Nop, BinOp { op: ast::BinaryOperator, lhs: Operand, @@ -505,6 +535,7 @@ pub enum Instruction { impl HasDtype for Instruction { fn dtype(&self) -> Dtype { match self { + Self::Nop => Dtype::unit(), Self::BinOp { dtype, .. } => dtype.clone(), Self::UnaryOp { dtype, .. } => dtype.clone(), Self::Store { .. } => Dtype::unit(), @@ -521,6 +552,16 @@ impl HasDtype for Instruction { } } +impl Instruction { + pub fn is_pure(&self) -> bool { + match self { + Self::Store { .. } => false, + Self::Call { .. } => false, + _ => true, + } + } +} + #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] pub struct BlockId(pub usize); diff --git a/src/ir/write_ir.rs b/src/ir/write_ir.rs index 95a4ddf..2bb3d08 100644 --- a/src/ir/write_ir.rs +++ b/src/ir/write_ir.rs @@ -120,6 +120,7 @@ impl WriteLine for (&BlockId, &Block) { impl WriteString for Instruction { fn write_string(&self) -> String { match self { + Instruction::Nop => "nop".into(), Instruction::BinOp { op, lhs, rhs, .. } => format!( "{} {} {}", op.write_operation(), diff --git a/src/lib.rs b/src/lib.rs index 10c35b3..889d4dc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,4 +21,4 @@ pub use c::Parse; pub use asmgen::Asmgen; pub use irgen::Irgen; -pub use opt::{Gvn, Mem2reg, Optimize, Repeat, SimplifyCfg, Translate, O0, O1}; +pub use opt::{Deadcode, Gvn, Mem2reg, Optimize, Repeat, SimplifyCfg, O0, O1}; diff --git a/src/opt/deadcode.rs b/src/opt/deadcode.rs new file mode 100644 index 0000000..85be8e7 --- /dev/null +++ b/src/opt/deadcode.rs @@ -0,0 +1,14 @@ +use crate::ir::*; +use crate::opt::FunctionPass; +use crate::*; + +pub type Deadcode = FunctionPass; + +#[derive(Default)] +pub struct DeadcodeInner {} + +impl Optimize for DeadcodeInner { + fn optimize(&mut self, _code: &mut FunctionDefinition) -> bool { + todo!("homework 6") + } +} diff --git a/src/opt/gvn.rs b/src/opt/gvn.rs index eaab947..ac50254 100644 --- a/src/opt/gvn.rs +++ b/src/opt/gvn.rs @@ -1,11 +1,13 @@ -use crate::ir; +use crate::opt::FunctionPass; use crate::*; -#[derive(Default)] -pub struct Gvn {} +pub type Gvn = FunctionPass>; -impl Optimize for Gvn { - fn optimize(&mut self, _code: &mut ir::TranslationUnit) -> bool { - todo!() +#[derive(Default)] +pub struct GvnInner {} + +impl Optimize for GvnInner { + fn optimize(&mut self, _code: &mut ir::FunctionDefinition) -> bool { + todo!("homework 5") } } diff --git a/src/opt/mem2reg.rs b/src/opt/mem2reg.rs index 583b01d..d82b309 100644 --- a/src/opt/mem2reg.rs +++ b/src/opt/mem2reg.rs @@ -1,11 +1,14 @@ -use crate::ir; +use crate::ir::*; +use crate::opt::FunctionPass; use crate::*; -#[derive(Default)] -pub struct Mem2reg {} +pub type Mem2reg = FunctionPass; -impl Optimize for Mem2reg { - fn optimize(&mut self, _code: &mut ir::TranslationUnit) -> bool { - todo!() +#[derive(Default)] +pub struct Mem2regInner {} + +impl Optimize for Mem2regInner { + fn optimize(&mut self, _code: &mut FunctionDefinition) -> bool { + todo!("homework 4") } } diff --git a/src/opt/mod.rs b/src/opt/mod.rs index 9862d14..a098819 100644 --- a/src/opt/mod.rs +++ b/src/opt/mod.rs @@ -1,35 +1,42 @@ +use crate::*; + +mod deadcode; mod gvn; mod mem2reg; +mod opt_utils; mod simplify_cfg; +pub use deadcode::Deadcode; pub use gvn::Gvn; pub use mem2reg::Mem2reg; +pub use opt_utils::{ + make_cfg, make_domtree, replace_operand, replace_operands, reverse_cfg, Domtree, Walk, +}; pub use simplify_cfg::SimplifyCfg; use crate::ir; -pub trait Translate { - type Target; - type Error; - - fn translate(&mut self, source: &S) -> Result; -} - pub trait Optimize { fn optimize(&mut self, code: &mut T) -> bool; } +pub type O0 = Null; +pub type O1 = Repeat<(SimplifyCfg, (Mem2reg, (Deadcode, Gvn)))>; + +#[derive(Default)] +pub struct Null {} + #[derive(Default)] pub struct Repeat { inner: O, } #[derive(Default)] -pub struct O0 {} +pub struct FunctionPass> { + inner: T, +} -pub type O1 = Repeat<(SimplifyCfg, (Mem2reg, Gvn))>; - -impl Optimize for O0 { +impl Optimize for Null { fn optimize(&mut self, _code: &mut ir::TranslationUnit) -> bool { false } @@ -53,3 +60,23 @@ impl> Optimize for Repeat { true } } + +impl Optimize for FunctionPass +where + T: Optimize, +{ + fn optimize(&mut self, code: &mut ir::TranslationUnit) -> bool { + code.decls.iter_mut().any(|(_, decl)| self.optimize(decl)) + } +} + +impl Optimize for FunctionPass +where + T: Optimize, +{ + fn optimize(&mut self, code: &mut ir::Declaration) -> bool { + let (_fsig, fdef) = some_or!(code.get_function_mut(), return false); + let fdef = some_or!(fdef, return false); + self.inner.optimize(fdef) + } +} diff --git a/src/opt/opt_utils.rs b/src/opt/opt_utils.rs new file mode 100644 index 0000000..358d1a8 --- /dev/null +++ b/src/opt/opt_utils.rs @@ -0,0 +1,171 @@ +#![allow(dead_code)] + +use std::collections::HashMap; + +use crate::ir::*; + +/// "Replace-all-uses-with". +pub trait Walk { + fn walk(&mut self, f: F) -> bool + where + F: FnMut(&mut Operand) -> bool; +} + +impl Walk for FunctionDefinition { + fn walk(&mut self, mut f: F) -> bool + where + F: FnMut(&mut Operand) -> bool, + { + self.blocks.iter_mut().any(|(_, block)| block.walk(&mut f)) + } +} + +impl Walk for Block { + fn walk(&mut self, mut f: F) -> bool + where + F: FnMut(&mut Operand) -> bool, + { + self.instructions.iter_mut().any(|i| i.walk(&mut f)) || self.exit.walk(&mut f) + } +} + +impl Walk for Instruction { + fn walk(&mut self, mut f: F) -> bool + where + F: FnMut(&mut Operand) -> bool, + { + match self { + Self::Nop => false, + Self::BinOp { lhs, rhs, .. } => lhs.walk(&mut f) || rhs.walk(&mut f), + Self::UnaryOp { operand, .. } => operand.walk(&mut f), + Self::Store { ptr, value } => ptr.walk(&mut f) || value.walk(&mut f), + Self::Load { ptr } => ptr.walk(&mut f), + Self::Call { callee, args, .. } => { + callee.walk(&mut f) || args.iter_mut().any(|a| a.walk(&mut f)) + } + Self::TypeCast { value, .. } => value.walk(&mut f), + } + } +} + +impl Walk for BlockExit { + fn walk(&mut self, mut f: F) -> bool + where + F: FnMut(&mut Operand) -> bool, + { + match self { + Self::Jump { arg } => arg.walk(&mut f), + Self::ConditionalJump { + condition, + arg_then, + arg_else, + } => condition.walk(&mut f) || arg_then.walk(&mut f) || arg_else.walk(&mut f), + Self::Switch { + value, + default, + cases, + } => { + value.walk(&mut f) + || default.walk(&mut f) + || cases.iter_mut().any(|(_, a)| a.walk(&mut f)) + } + Self::Return { value } => value.walk(&mut f), + Self::Unreachable => false, + } + } +} + +impl Walk for JumpArg { + fn walk(&mut self, mut f: F) -> bool + where + F: FnMut(&mut Operand) -> bool, + { + self.args.iter_mut().any(|a| a.walk(&mut f)) + } +} + +impl Walk for Operand { + fn walk(&mut self, mut f: F) -> bool + where + F: FnMut(&mut Operand) -> bool, + { + f(self) + } +} + +pub fn replace_operand(operand: &mut Operand, from: &Operand, to: &Operand) -> bool { + if operand == from { + *operand = to.clone(); + true + } else { + false + } +} + +pub fn replace_operands(operand: &mut Operand, dict: &HashMap) -> bool { + if let Some((rid, dtype)) = operand.get_register_mut() { + if let Some(val) = dict.get(rid) { + assert_eq!(*dtype, val.dtype()); + *operand = val.clone(); + return true; + } + } + false +} + +pub fn make_cfg(fdef: &FunctionDefinition) -> HashMap> { + let mut result = HashMap::new(); + + for (bid, block) in &fdef.blocks { + let mut args = Vec::new(); + match &block.exit { + BlockExit::Jump { arg } => args.push(arg.clone()), + BlockExit::ConditionalJump { + arg_then, arg_else, .. + } => { + args.push(arg_then.clone()); + args.push(arg_else.clone()); + } + BlockExit::Switch { default, cases, .. } => { + args.push(default.clone()); + for (_, arg) in cases { + args.push(arg.clone()); + } + } + _ => {} + } + result.insert(*bid, args); + } + result +} + +pub fn reverse_cfg( + cfg: &HashMap>, +) -> HashMap> { + let mut result = HashMap::new(); + + for (bid, jumps) in cfg { + for jump in jumps { + result + .entry(jump.bid) + .or_insert_with(Vec::new) + .push((*bid, jump.clone())); + } + } + result +} + +pub struct Domtree {} + +impl Domtree { + pub fn walk(&self, _f: F) + where + F: FnMut(BlockId, BlockId), + { + todo!() + } +} + +pub fn make_domtree(_cfg: &HashMap>) -> Domtree { + todo!() +} diff --git a/src/opt/simplify_cfg.rs b/src/opt/simplify_cfg.rs index 7638d00..006660a 100644 --- a/src/opt/simplify_cfg.rs +++ b/src/opt/simplify_cfg.rs @@ -1,21 +1,9 @@ use crate::ir::*; +use crate::opt::FunctionPass; use crate::*; -pub type SimplifyCfg = Repeat<(SimplifyCfgConstProp, (SimplifyCfgReach, SimplifyCfgMerge))>; - -impl Optimize for SimplifyCfg { - fn optimize(&mut self, code: &mut TranslationUnit) -> bool { - code.decls.iter_mut().any(|(_, decl)| self.optimize(decl)) - } -} - -impl Optimize for SimplifyCfg { - fn optimize(&mut self, code: &mut Declaration) -> bool { - let (_fsig, fdef) = some_or!(code.get_function_mut(), return false); - let fdef = some_or!(fdef, return false); - self.optimize(fdef) - } -} +pub type SimplifyCfg = + FunctionPass>; /// Simplifies block exits by propagating constants. #[derive(Default)] diff --git a/src/utils.rs b/src/utils.rs index 4e550f9..bf78970 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -44,3 +44,10 @@ macro_rules! some_or_exit { } }}; } + +pub trait Translate { + type Target; + type Error; + + fn translate(&mut self, source: &S) -> Result; +} diff --git a/tests/fuzz.py b/tests/fuzz.py index 9d41562..be8c422 100644 --- a/tests/fuzz.py +++ b/tests/fuzz.py @@ -26,6 +26,7 @@ REPLACE_DICT = { "_Float128": "long double", "union": "struct", r"enum[\w\s]*\{[^\}]*\};": "", + r"typedef enum[\w\s]*\{[^;]*;[\s_A-Z]*;": "", "const char \*const sys_errlist\[\];": "", r"[^\n]*printf[^;]*;": "", r"[^\n]*scanf[^;]*;": "", @@ -191,14 +192,6 @@ def creduce(tests_dir, fuzz_arg): def fuzz(tests_dir, fuzz_arg, num_iter): csmith_bin, csmith_inc = install_csmith(tests_dir) try: - print("Building KECC..") - try: - proc = subprocess.Popen(["cargo", "build", "--release"], cwd=tests_dir) - proc.communicate() - except subprocess.TimeoutExpired as e: - proc.kill() - raise e - if num_iter is None: print("Fuzzing with infinitely many test cases. Please press [ctrl+C] to break.") iterator = itertools.count(0) @@ -247,6 +240,15 @@ if __name__ == "__main__": raise Exception("Specify fuzzing argument") tests_dir = os.path.abspath(os.path.dirname(__file__)) + + print("Building KECC..") + try: + proc = subprocess.Popen(["cargo", "build", "--release"], cwd=tests_dir) + proc.communicate() + except subprocess.TimeoutExpired as e: + proc.kill() + raise e + if args.reduce: creduce(tests_dir, fuzz_arg) else: diff --git a/tests/reduce-criteria-template.sh b/tests/reduce-criteria-template.sh index d0320ef..4c79709 100644 --- a/tests/reduce-criteria-template.sh +++ b/tests/reduce-criteria-template.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash -cargo run --manifest-path $PROJECT_DIR/Cargo.toml --release -- -p test_reduced.c >/dev/null 2>&1 &&\ +cargo run --manifest-path $PROJECT_DIR/Cargo.toml --release -- --parse test_reduced.c >/dev/null 2>&1 &&\ ! cargo run --manifest-path $PROJECT_DIR/Cargo.toml --release --bin fuzz -- $FUZZ_ARG test_reduced.c