From 974590bc9b5d7f75c94beff748ced0d1aac0855e Mon Sep 17 00:00:00 2001 From: Aki Date: Tue, 5 Sep 2023 23:56:17 +0200 Subject: [PATCH] Implement interpreter --- Cargo.toml | 2 +- crates/interpreter/Cargo.toml | 11 ++ crates/interpreter/src/frame.rs | 176 +++++++++++++++++++++++ crates/interpreter/src/lib.rs | 8 ++ crates/interpreter/src/pc.rs | 37 +++++ crates/interpreter/src/state.rs | 246 ++++++++++++++++++++++++++++++++ crates/interpreter/src/value.rs | 65 +++++++++ 7 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 crates/interpreter/Cargo.toml create mode 100644 crates/interpreter/src/frame.rs create mode 100644 crates/interpreter/src/lib.rs create mode 100644 crates/interpreter/src/pc.rs create mode 100644 crates/interpreter/src/state.rs create mode 100644 crates/interpreter/src/value.rs diff --git a/Cargo.toml b/Cargo.toml index 492fefe7..fadbfecb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["crates/ir", "crates/codegen", "crates/object", "crates/parser", "crates/filecheck", "crates/triple"] +members = ["crates/ir", "crates/codegen", "crates/object", "crates/parser", "crates/filecheck", "crates/triple", "crates/interpreter"] diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml new file mode 100644 index 00000000..f74e990a --- /dev/null +++ b/crates/interpreter/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "interpreter" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +cranelift-entity = "0.99.1" +sonatina-codegen = { path = "../codegen" } +sonatina-ir = { path = "../ir" } diff --git a/crates/interpreter/src/frame.rs b/crates/interpreter/src/frame.rs new file mode 100644 index 00000000..ece18682 --- /dev/null +++ b/crates/interpreter/src/frame.rs @@ -0,0 +1,176 @@ +use std::mem; + +use cranelift_entity::SecondaryMap; + +use sonatina_ir::{ + module::ModuleCtx, + types::{CompoundType, CompoundTypeData}, + DataFlowGraph, Function, Insn, Type, Value, ValueData, I256, +}; + +use crate::{Literal, ProgramCounter}; + +pub struct Frame { + pub ret_addr: ProgramCounter, + local_values: SecondaryMap, // 256-bit register + alloca_region: Vec, // big endian +} + +impl Frame { + pub fn new(func: &Function, ret_addr: ProgramCounter, args: Vec) -> Self { + let mut local_values = SecondaryMap::new(); + for (v, literal_value) in func.arg_values.iter().zip(args.into_iter()) { + local_values[*v] = literal_value + } + let alloca_region = Vec::new(); + + Self { + ret_addr, + local_values, + alloca_region, + } + } + + pub fn load(&mut self, /*ctx: Context,*/ v: Value, dfg: &DataFlowGraph) -> Literal { + if !self.is_assigned(v) { + let v = match dfg.value_data(v) { + ValueData::Insn { insn, .. } => { + let result_v = dfg.insn_result(*insn).unwrap(); + if self.is_assigned(result_v) { + return self.local_values[result_v]; + } + result_v + } + _ => v, + }; + let i256 = dfg.value_imm(v).unwrap().as_i256(); + self.local_values[v] = Literal(i256); + } + self.local_values[v] + } + + pub fn map(&mut self, literal: Literal, insn: Insn, dfg: &DataFlowGraph) { + let v = dfg.insn_result(insn).unwrap(); + debug_assert!(!self.is_assigned(v)); + self.local_values[v] = literal + } + + pub fn alloca(&mut self, ctx: &ModuleCtx, ty: Type, insn: Insn, dfg: &DataFlowGraph) { + let v = dfg.insn_result(insn).unwrap(); + debug_assert!(!self.is_assigned(v)); + + let addr = self.alloca_region.len(); + + let size_of_data = byte_size_of_ty(ctx, ty); + for _ in 0..size_of_data { + self.alloca_region.push(0u8); + } + self.local_values[v] = Literal::from_usize(addr); + } + + pub fn gep(&mut self, ctx: &ModuleCtx, args: &[Value], dfg: &DataFlowGraph) -> Literal { + let ptr_v = args[0]; + let ptr = self.load(ptr_v, dfg); + let base_addr = ptr.as_usize(); + let ptr_ty = dfg.value_ty(ptr_v); + debug_assert!(ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); + + let pointee_ty = ctx.with_ty_store(|s| s.deref(ptr_ty)).unwrap(); + debug_assert!(!pointee_ty.is_integral() && !ctx.with_ty_store(|s| s.is_ptr(ptr_ty))); + + let mut cmpd_ty = to_cmpd_ty(pointee_ty); + let mut offset = 0usize; + for arg in &args[1..] { + let index = self.load(*arg, dfg).as_usize(); + + ctx.with_ty_store(|s| match s.resolve_compound(cmpd_ty.unwrap()) { + CompoundTypeData::Array { elem, .. } => { + offset += index * byte_size_of_ty(ctx, *elem); + cmpd_ty = to_cmpd_ty(*elem); + } + CompoundTypeData::Struct(data) => { + for ty in &data.fields[..index] { + offset += byte_size_of_ty(ctx, *ty); + } + cmpd_ty = to_cmpd_ty(data.fields[index]); + } + _ => unreachable!(), + }) + } + Literal::from_usize(base_addr + offset) + } + + pub fn ldr(&mut self, ctx: &ModuleCtx, ptr: Value, insn: Insn, dfg: &DataFlowGraph) { + let addr = self.load(ptr, dfg).as_usize(); + debug_assert!(self.is_alloca(addr)); + + let ty = dfg.insn_result_ty(insn).unwrap(); + let size = byte_size_of_ty(ctx, ty); + let mut literal_b = Vec::new(); + for b in &self.alloca_region[addr..addr + size] { + literal_b.push(*b) + } + let Some(data) = Literal::deserialize(ctx, ty, literal_b) else { + return; + }; + self.map(data, insn, dfg); + } + + pub fn str(&mut self, ctx: &ModuleCtx, ptr: Value, v: Value, dfg: &DataFlowGraph) { + let addr = self.load(ptr, dfg).as_usize(); + let data = self.load(v, dfg); + let data_ty = dfg.value_ty(v); + let data_b = data.serialize(ctx, data_ty); + for (i, b) in data_b.into_iter().enumerate() { + self.alloca_region[addr + i] = b; + } + } + + pub fn eq(&mut self, lhs: Value, rhs: Value, dfg: &DataFlowGraph) -> bool { + self.load(lhs, dfg) == self.load(rhs, dfg) + } + + fn is_assigned(&self, v: Value) -> bool { + for local_v in self.local_values.keys() { + if v == local_v { + return true; + } + } + false + } + + fn is_alloca(&self, addr: usize) -> bool { + addr < self.alloca_region.len() + } +} + +pub fn byte_size_of_ty(ctx: &ModuleCtx, ty: Type) -> usize { + match ty { + Type::I1 => mem::size_of::(), + Type::I8 => mem::size_of::(), + Type::I16 => mem::size_of::(), + Type::I32 => mem::size_of::(), + Type::I64 => mem::size_of::(), + Type::I128 => mem::size_of::(), + Type::I256 => mem::size_of::(), + Type::Compound(ty) => { + use CompoundTypeData::*; + ctx.with_ty_store(|s| match s.resolve_compound(ty) { + Array { len, elem } => len * byte_size_of_ty(ctx, *elem), + Ptr(_) => mem::size_of::(), + Struct(data) => data + .fields + .iter() + .fold(0usize, |acc, ty| acc + byte_size_of_ty(ctx, *ty)), + }) + } + Type::Void => mem::size_of::<()>(), + } +} + +fn to_cmpd_ty(ty: Type) -> Option { + match ty { + Type::Compound(ty) => Some(ty), + _ => None, + } +} diff --git a/crates/interpreter/src/lib.rs b/crates/interpreter/src/lib.rs new file mode 100644 index 00000000..a9f197b6 --- /dev/null +++ b/crates/interpreter/src/lib.rs @@ -0,0 +1,8 @@ +pub mod frame; +pub mod pc; +pub mod state; +pub mod value; + +pub use frame::Frame; +pub use pc::ProgramCounter; +pub use value::Literal; diff --git a/crates/interpreter/src/pc.rs b/crates/interpreter/src/pc.rs new file mode 100644 index 00000000..e095f2ae --- /dev/null +++ b/crates/interpreter/src/pc.rs @@ -0,0 +1,37 @@ +use sonatina_ir::{module::FuncRef, Block, Insn, Layout}; + +#[derive(Clone, Copy)] +pub struct ProgramCounter { + pub func_ref: FuncRef, + pub insn: Insn, +} + +impl ProgramCounter { + pub fn new(entry_func: FuncRef, layout: &Layout) -> Self { + let entry = layout.entry_block().unwrap(); + let insn = layout.first_insn_of(entry).unwrap(); + + Self { + func_ref: entry_func, + insn, + } + } + + pub fn call(&mut self, callee_ref: FuncRef, callee_layout: &Layout) { + *self = ProgramCounter::new(callee_ref, &callee_layout) + } + + pub fn next_insn(&mut self, layout: &Layout) { + self.insn = layout.next_insn_of(self.insn).unwrap(); + } + + pub fn branch_to(&mut self, block: Block, layout: &Layout) { + self.insn = layout.first_insn_of(block).unwrap(); + } + + pub fn resume_frame_at(&mut self, ret_addr: Self) { + let ProgramCounter { func_ref, insn } = ret_addr; + self.func_ref = func_ref; + self.insn = insn; + } +} diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs new file mode 100644 index 00000000..95399810 --- /dev/null +++ b/crates/interpreter/src/state.rs @@ -0,0 +1,246 @@ +use std::ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Sub}; + +use sonatina_ir::{ + insn::{BinaryOp, CastOp, UnaryOp}, + module::FuncRef, + Block, DataLocationKind, Immediate, InsnData, Module, +}; + +use crate::{Frame, Literal, ProgramCounter}; + +struct State { + module: Module, + frames: Vec, + pc: ProgramCounter, + prev_block: Option, +} + +impl State { + // the cpu + pub fn new(module: Module, entry_func: FuncRef) -> Self { + let func = &module.funcs[entry_func]; + let pc = ProgramCounter::new(entry_func, &func.layout); + let entry_frame = Frame::new(func, pc, vec![]); + let frames = vec![entry_frame]; + + Self { + module, + frames, + pc, + prev_block: None, + } + } + + pub fn repl(mut self) -> Option { + loop { + if let Some(arg) = self.step() { + return arg; + } + } + } + + pub fn step(&mut self) -> Option> { + let frame = self.frames.last_mut().unwrap(); + let insn = self.pc.insn; + let ctx = &self.module.ctx; + let func = &self.module.funcs[self.pc.func_ref]; + + let dfg = &func.dfg; + let layout = &func.layout; + + let insn_data = dfg.insn_data(insn); + + use InsnData::*; + match insn_data { + Unary { code, args } => { + let arg = frame.load(args[0], dfg).0; + use UnaryOp::*; + let result = Literal(match code { + Not => arg.not(), + Neg => arg.neg(), + }); + + frame.map(result, insn, dfg); + + self.pc.next_insn(layout); + None + } + Binary { code, args } => { + let lhs: Immediate = frame.load(args[0], dfg).0.into(); + let rhs: Immediate = frame.load(args[1], dfg).0.into(); + use BinaryOp::*; + let result = Literal( + match code { + Add => lhs.add(rhs), + Sub => lhs.sub(rhs), + Mul => lhs.mul(rhs), + Udiv => lhs.udiv(rhs), + Sdiv => lhs.sdiv(rhs), + Lt => lhs.lt(rhs), + Gt => lhs.gt(rhs), + Slt => lhs.slt(rhs), + Sgt => lhs.sgt(rhs), + Le => lhs.le(rhs), + Ge => lhs.ge(rhs), + Sle => lhs.sle(rhs), + Sge => lhs.sge(rhs), + Eq => lhs.imm_eq(rhs), + Ne => lhs.imm_ne(rhs), + And => lhs.bitand(rhs), + Or => lhs.bitor(rhs), + Xor => lhs.bitxor(rhs), + } + .as_i256(), + ); + + frame.map(result, insn, dfg); + + self.pc.next_insn(layout); + None + } + Cast { code, args, ty } => { + let arg: Immediate = frame.load(args[0], dfg).0.into(); + use CastOp::*; + let result = Literal( + match code { + Sext => arg.sext(*ty), + Zext => arg.zext(*ty), + Trunc => arg.trunc(*ty), + BitCast => arg, + } + .as_i256(), + ); + + frame.map(result, insn, dfg); + + self.pc.next_insn(layout); + None + } + Load { args, loc } => { + use DataLocationKind::*; + match loc { + Memory => { + frame.ldr(ctx, args[0], insn, dfg); + } + Storage => todo!(), + } + + self.pc.next_insn(layout); + None + } + Store { args, loc } => { + use DataLocationKind::*; + match loc { + Memory => { + frame.str(ctx, args[0], args[1], dfg); + } + Storage => todo!(), + } + + self.pc.next_insn(layout); + None + } + Call { func, args, .. } => { + let mut literal_args = Vec::with_capacity(args.len()); + for arg in args { + let arg = frame.load(*arg, dfg); + literal_args.push(arg.clone()) + } + + // Function prologue + + let ret_addr = self.pc; + + let callee = &self.module.funcs[*func]; + let new_frame = Frame::new(callee, ret_addr, literal_args); + self.frames.push(new_frame); + + self.pc.call(*func, &callee.layout); + None + } + Jump { dests, .. } => { + let block = layout.insn_block(insn); + self.prev_block = Some(block); + + self.pc.branch_to(dests[0], layout); + None + } + Branch { args, dests } => { + let arg = frame.load(args[0], dfg).0; + let idx = arg.not().to_u256().as_usize(); + + let block = layout.insn_block(insn); + self.prev_block = Some(block); + self.pc.branch_to(dests[idx], layout); + None + } + BrTable { + args, + default, + table, + } => { + let block = layout.insn_block(insn); + self.prev_block = Some(block); + + let cond = args[0]; + for (idx, arg) in args[1..].iter().enumerate() { + if frame.eq(cond, *arg, dfg) { + self.pc.branch_to(table[idx], layout); + return None; + } + } + if let Some(block) = *default { + self.pc.branch_to(block, layout); + } + None + } + Alloca { ty } => { + frame.alloca(ctx, *ty, insn, dfg); + + self.pc.next_insn(layout); + None + } + Return { args } => { + let arg = args.map(|arg| frame.load(arg, dfg).clone()); + + let frame = self.frames.pop().unwrap(); // pop returning frame + match self.frames.last_mut() { + Some(caller_frame) => { + // Function epilogue + + self.pc.resume_frame_at(frame.ret_addr); + + let caller = &self.module.funcs[self.pc.func_ref]; + if let Some(lit) = arg { + caller_frame.map(lit, self.pc.insn, &caller.dfg); + } + + self.pc.next_insn(&caller.layout); + None + } + None => return Some(arg), + } + } + Gep { args } => { + let ptr = frame.gep(ctx, &args, dfg); + + frame.map(ptr, insn, dfg); + + self.pc.next_insn(layout); + None + } + Phi { values, blocks, .. } => { + let _block = layout.insn_block(insn); + let prev_block = self.prev_block.unwrap(); + for (v, block) in values.iter().zip(blocks.iter()) { + if prev_block == *block { + let lit = frame.load(*v, dfg).clone(); + frame.map(lit, insn, dfg); + break; + } + } + None + } + } + } +} diff --git a/crates/interpreter/src/value.rs b/crates/interpreter/src/value.rs new file mode 100644 index 00000000..60fccb9f --- /dev/null +++ b/crates/interpreter/src/value.rs @@ -0,0 +1,65 @@ +use sonatina_ir::{module::ModuleCtx, Type, I256, U256}; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Literal(pub I256); + +impl Default for Literal { + fn default() -> Self { + Literal(I256::zero()) + } +} + +impl Literal { + pub fn from_usize(addr: usize) -> Self { + Self(I256::from_u256(U256::from(addr))) + } + + pub fn as_usize(&self) -> usize { + self.0.to_u256().as_usize() + } + + pub fn deserialize(ctx: &ModuleCtx, ty: Type, b: Vec) -> Option { + Some(Self(match ty { + Type::I1 => (b[0] & 0b1).into(), + Type::I8 => i8::from_be_bytes(b.try_into().unwrap()).into(), + Type::I16 => i16::from_be_bytes(b.try_into().unwrap()).into(), + Type::I32 => i32::from_be_bytes(b.try_into().unwrap()).into(), + Type::I64 => i64::from_be_bytes(b.try_into().unwrap()).into(), + Type::I128 => i128::from_be_bytes(b.try_into().unwrap()).into(), + Type::I256 => I256::from_u256(U256::from_big_endian(&b)), + Type::Compound(ty) => { + debug_assert!(ctx.with_ty_store(|s| s.resolve_compound(ty).is_ptr())); + debug_assert!(b.len() == std::mem::size_of::()); + U256::from(usize::from_be_bytes(b.try_into().unwrap())).into() + } + Type::Void => return None, + })) + } + + pub fn serialize(&self, ctx: &ModuleCtx, ty: Type) -> Vec { + match ty { + Type::I1 => self.i256().trunc_to_i8().to_be_bytes().to_vec(), + Type::I8 => self.i256().trunc_to_i8().to_be_bytes().to_vec(), + Type::I16 => self.i256().trunc_to_i16().to_be_bytes().to_vec(), + Type::I32 => self.i256().trunc_to_i32().to_be_bytes().to_vec(), + Type::I64 => self.i256().trunc_to_i64().to_be_bytes().to_vec(), + Type::I128 => self.i256().trunc_to_i128().to_be_bytes().to_vec(), + Type::I256 => { + let mut b = [0u8; 32]; + self.i256().to_u256().to_big_endian(&mut b); + b.to_vec() + } + Type::Compound(ty) => { + debug_assert!(ctx.with_ty_store(|s| s.resolve_compound(ty).is_ptr())); + let mut b = [0u8; 32]; + self.i256().to_u256().to_big_endian(&mut b); + b[32 - std::mem::size_of::()..].to_vec() + } + Type::Void => Vec::new(), + } + } + + pub fn i256(&self) -> I256 { + self.0 + } +}