diff --git a/lib/arm/arm_machine.rb b/lib/arm/arm_machine.rb new file mode 100644 index 00000000..2cbeb054 --- /dev/null +++ b/lib/arm/arm_machine.rb @@ -0,0 +1,13 @@ +require "vm/machine" + +module Arm + class ArmMachine < Vm::Machine + + def word_load value + "word" + end + def function_call call_value + "call" + end + end +end \ No newline at end of file diff --git a/lib/arm/assembler.rb b/lib/arm/assembler.rb new file mode 100644 index 00000000..b095f2bc --- /dev/null +++ b/lib/arm/assembler.rb @@ -0,0 +1,98 @@ +require 'asm/nodes' +require 'asm/block' +require 'stream_reader' +require 'stringio' +require "asm/string_literal" + +module Asm + + # Assembler is the the top-level of the code hierachy, except it is not derived from code + # instead a Assembler is a list of blocks (and string constants) + + # All code is created in blocks (see there) and there are two styles for that, for forward of backward + # referencing. Read function block and add_block and Block.set + + + class Assembler + + def initialize + @blocks = [] + @string_table = {} + end + + attr_reader :blocks + + # Assembling to string will return a binary string of the whole program, ie all blocks and the + # strings they use + # As a memory reference this would be callable, but more likely you will hand it over to + # an ObjectWriter as the .text section and then link it. And then execute it :-) + def assemble_to_string + #put the strings at the end of the assembled code. + # adding them will fix their position and make them assemble after + @string_table.values.each do |data| + add_block data + end + io = StringIO.new + assemble(io) + io.string + end + + # Add a string to the string table. Strings are global and constant. So only one copy of each + # string exists + # Internally StringLiterals are created and stored and during assembly written after the blocks + def add_string str + code = @string_table[str] + return code if code + data = Asm::StringLiteral.new(str) + @string_table[str] = data + end + + # Length of all blocks. Does not take strings into account as they are added after all blocks. + # This is used to determine where a block when it is added after creation (see add_block) + def length + @blocks.inject(0) {| sum , item | sum + item.length} + end + + # This is how you add a forward declared block. This is called automatically when you + # call block with ruby block, but has to be done manually if not + def add_block block + block.at self.length + @blocks << block + end + + # return the block of the given name + # or raise an exception, as this is meant to be called when the block is available + def get_block name + block = @blocks.find {|b| b.name == name} + raise "No block found for #{name} (in #{blocks.collect{|b|b.name}.join(':')})" unless block + block + end + # this is used to create blocks. + # All functions that have no args are interpreted as block names + # and if a block is provided, it is evaluated in the (ruby)blocks scope and the block added to the + # program immediately. + # If no block is provided (forward declaration), you must call code on it later + def method_missing(meth, *args, &block) + if args.length == 0 + code = Block.new(meth.to_s , self ) + if block_given? + add_block code + code.instance_eval(&block) + end + return code + else + super + end + end + + + private + + def assemble(io) + @blocks.each do |obj| + obj.assemble io + end + end + end +end + diff --git a/lib/arm/assembly_error.rb b/lib/arm/assembly_error.rb new file mode 100644 index 00000000..c3d7dd33 --- /dev/null +++ b/lib/arm/assembly_error.rb @@ -0,0 +1,8 @@ +module Asm + class AssemblyError < StandardError + def initialize(message) + super(message) + end + end +end + \ No newline at end of file diff --git a/lib/arm/block.rb b/lib/arm/block.rb new file mode 100644 index 00000000..ab783b6a --- /dev/null +++ b/lib/arm/block.rb @@ -0,0 +1,153 @@ +require_relative 'call_instruction' +require_relative 'stack_instruction' +require_relative 'logic_instruction' +require_relative 'memory_instruction' + +module Asm + + class Code ; end + + # A Block is the smalles unit of code, a list of instructions as it were + # It is also a point to jump/branch to. An address in the final stream. + # To allow for forward branches creation does not fix the position. + # Thee position is fixed in one of three ways + # - create the block with ruby block, signalling that the instantiation poin is the position + # - call block.code with the code or if you wish program.add_block (and add you code with calls) + # - the assmebly process will pin it if it wasn't set + + # creating blocks is done by calling the blocks name/label on either a program or a block + # (method missing will cathc the call and create the block) + # and the easiest way is to go into a ruby block and start writing instructions + # Example (backward jump): + # program.loop do create a new block with label loop + # sub r1 , r1 , 1 count the r1 register down + # bne :loop jump back to loop when the counter is not zero + # end (initialization and actual code missing off course) + + # Example (forward jump) + # else_block = program.else + # program.if do + # test r1 , 0 test some condition + # beq :else_block + # mov . . .. .. do whatever the if block does + # end + # else_block.code do + # ldr .... do whatever else does + # end + + # Blocks are also used to create instructions, and so Block has functions for every cpu instruction + # and to make using the apu function easier, there are functions that create registers as well + class Block < Code + + def initialize(name , prog) + super() + @name = name.to_sym + @codes = [] + @position = 0 + @program = prog + end + attr_reader :name + + ArmMachine::REGISTERS.each do |reg , number| + define_method(reg) { Asm::Register.new(reg , number) } + end + + def instruction(clazz, opcode , condition_code , update_status , *args) + arg_nodes = [] + args.each do |arg| + if (arg.is_a?(Asm::Register)) + arg_nodes << arg + elsif (arg.is_a?(Integer)) + arg_nodes << Asm::NumLiteral.new(arg) + elsif (arg.is_a?(String)) + arg_nodes << @program.add_string(arg) + elsif (arg.is_a?(Asm::Block)) + arg_nodes << arg + elsif (arg.is_a?(Symbol)) + block = @program.get_block arg + arg_nodes << block + else + raise "Invalid argument #{arg.inspect} for instruction" + end + end + add_code clazz.new(opcode , condition_code , update_status , arg_nodes) + end + + + def self.define_instruction(inst , clazz ) + define_method(inst) do |*args| + instruction clazz , inst , :al , 0 , *args + end + define_method("#{inst}s") do |*args| + instruction clazz , inst , :al , 1 , *args + end + ArmMachine::COND_CODES.keys.each do |suffix| + define_method("#{inst}#{suffix}") do |*args| + instruction clazz , inst , suffix , 0 , *args + end + define_method("#{inst}s#{suffix}") do |*args| + instruction clazz , inst , suffix , 1 , *args + end + end + end + + [:push, :pop].each do |inst| + define_instruction(inst , StackInstruction) + end + + [:adc, :add, :and, :bic, :eor, :orr, :rsb, :rsc, :sbc, :sub].each do |inst| + define_instruction(inst , LogicInstruction) + end + [:mov, :mvn].each do |inst| + define_instruction(inst , MoveInstruction) + end + [:cmn, :cmp, :teq, :tst].each do |inst| + define_instruction(inst , CompareInstruction) + end + [:strb, :str , :ldrb, :ldr].each do |inst| + define_instruction(inst , MemoryInstruction) + end + [:b, :bl , :swi].each do |inst| + define_instruction(inst , CallInstruction) + end + + # codeing a block fixes it's position in the stream. + # You must call with a block, which is instance_eval'd and provides the actual code for the block + def code &block + @program.add_block self + self.instance_eval block + end + + # length of the codes. In arm it would be the length * 4 + # (strings are stored globally in the Assembler) + def length + @codes.inject(0) {| sum , item | sum + item.length} + end + + def add_code(kode) + kode.at(@position) + length = kode.length + @position += length + @codes << kode + end + + def assemble(io) + @codes.each do |obj| + obj.assemble io + end + end + + # this is used to create blocks. + # All functions that have no args are interpreted as block names + # In fact the block calls are delegated to the program which then instantiates the blocks + def method_missing(meth, *args, &block) + if args.length == 0 + @program.send(meth , *args , &block) + else + super + end + end + + end + +end \ No newline at end of file diff --git a/lib/arm/call_instruction.rb b/lib/arm/call_instruction.rb new file mode 100644 index 00000000..257fcd4a --- /dev/null +++ b/lib/arm/call_instruction.rb @@ -0,0 +1,49 @@ +require_relative "instruction" + +module Asm + # There are only three call instructions in arm branch (b), call (bl) and syscall (swi) + + # A branch could be called a jump as it has no notion of returning + + # A call has the bl code as someone thought "branch with link" is a useful name. + # The pc is put into the link register to make a return possible + # a return is affected by moving the stored link register into the pc, effectively a branch + + # swi (SoftWareInterrupt) or system call is how we call the kernel. + # in Arm the register layout is different and so we have to place the syscall code into register 7 + # Registers 0-6 hold the call values as for a normal c call + + class CallInstruction < Instruction + + def assemble(io) + case opcode + when :b, :bl + arg = args[0] + if arg.is_a? Block + diff = arg.position - self.position - 8 + arg = NumLiteral.new(diff) + end + if (arg.is_a?(Asm::NumLiteral)) + jmp_val = arg.value >> 2 + packed = [jmp_val].pack('l') + # signed 32-bit, condense to 24-bit + # TODO add check that the value fits into 24 bits + io << packed[0,3] + else + raise "else not coded #{arg.inspect}" + end + io.write_uint8 OPCODES[opcode] | (COND_CODES[@condition_code] << 4) + when :swi + arg = args[0] + if (arg.is_a?(Asm::NumLiteral)) + packed = [arg.value].pack('L')[0,3] + io << packed + io.write_uint8 0b1111 | (COND_CODES[@condition_code] << 4) + else + raise Asm::AssemblyError.new("invalid operand argument expected literal not #{arg}") + end + end + end + + end#class +end \ No newline at end of file diff --git a/lib/arm/instruction.rb b/lib/arm/instruction.rb new file mode 100644 index 00000000..ad4535c0 --- /dev/null +++ b/lib/arm/instruction.rb @@ -0,0 +1,46 @@ +require_relative "assembly_error" +require_relative "arm_machine" + +module Asm + + class Code ; end + + # Not surprisingly represents an cpu instruction. + # This is an abstract base class, with derived classes + # Logic / Move / Compare / Stack / Memory (see there) + # + # Opcode is a (<= three) letter accronym (same as in assembly code). Though in arm, suffixes can + # make the opcode longer, we chop those off in the constructor + # Argurments are registers or labels or string/num Literals + + class Instruction < Code + include ArmMachine + + COND_POSTFIXES = Regexp.union( COND_CODES.keys.collect{|k|k.to_s} ).source + + def initialize(opcode , condition_code , update_status , args) + @update_status_flag = update_status + @condition_code = condition_code.to_sym + @opcode = opcode + @args = args + @operand = 0 + end + + attr_reader :opcode, :args + # Many arm instructions may be conditional, where the default condition is always (al) + # ArmMachine::COND_CODES names them, and this attribute reflects it + attr_reader :condition_code + attr_reader :operand + + # Logic instructions may be executed with or without affecting the status register + # Only when an instruction affects the status is a subsequent compare instruction effective + # But to make the conditional execution (see cond) work for more than one instruction, one needs to + # be able to execute without changing the status + attr_reader :update_status_flag + + # arm intrucioons are pretty sensible, and always 4 bytes (thumb not supported) + def length + 4 + end + end +end \ No newline at end of file diff --git a/lib/arm/logic_instruction.rb b/lib/arm/logic_instruction.rb new file mode 100644 index 00000000..035e1725 --- /dev/null +++ b/lib/arm/logic_instruction.rb @@ -0,0 +1,107 @@ +require_relative "instruction" + +module Asm + # ADDRESSING MODE 1 + # Logic ,Maths, Move and compare instructions (last three below) + + class LogicInstruction < Instruction + + def initialize(opcode , condition_code , update_status , args) + super(opcode , condition_code , update_status , args) + @rn = nil + @i = 0 + @rd = args[0] + end + attr_accessor :i, :rn, :rd + + # Build representation for source value + def build + @rn = args[1] + do_build args[2] + end + + #(stays in subclases, while build is overriden to provide different arguments) + def do_build(arg) + if arg.is_a?(Asm::StringLiteral) + # do pc relative addressing with the difference to the instuction + # 8 is for the funny pipeline adjustment (ie oc pointing to fetch and not execute) + arg = Asm::NumLiteral.new( arg.position - self.position - 8 ) + end + if (arg.is_a?(Asm::NumLiteral)) + if (arg.value.fits_u8?) + # no shifting needed + @operand = arg.value + @i = 1 + elsif (op_with_rot = calculate_u8_with_rr(arg)) + @operand = op_with_rot + @i = 1 + else + raise Asm::AssemblyError.new("cannot fit numeric literal argument in operand #{arg}") + end + elsif (arg.is_a?(Asm::Register)) + @operand = arg + @i = 0 + elsif (arg.is_a?(Asm::Shift)) + rm_ref = arg.argument + @i = 0 + shift_op = {'lsl' => 0b000, 'lsr' => 0b010, 'asr' => 0b100, + 'ror' => 0b110, 'rrx' => 0b110}[arg.type] + if (arg.type == 'ror' and arg.value.nil?) + # ror #0 == rrx + raise Asm::AssemblyError.new('cannot rotate by zero', arg) + end + + arg1 = arg.value + if (arg1.is_a?(Asm::NumLiteral)) + if (arg1.value >= 32) + raise Asm::AssemblyError.new('cannot shift by more than 31', arg1) + end + shift_imm = arg1.value + elsif (arg1.is_a?(Asm::Register)) + shift_op val |= 0x1; + shift_imm = arg1.number << 1 + elsif (arg.type == 'rrx') + shift_imm = 0 + end + + @operand = rm_ref | (shift_op << 4) | (shift_imm << 4+3) + else + raise Asm::AssemblyError.new("invalid operand argument #{arg.inspect}") + end + end + + def assemble(io) + build + instuction_class = 0b00 # OPC_DATA_PROCESSING + val = operand.is_a?(Register) ? operand.bits : operand + val |= (rd.bits << 12) + val |= (rn.bits << 12+4) + val |= (update_status_flag << 12+4+4)#20 + val |= (op_bit_code << 12+4+4 +1) + val |= (i << 12+4+4 +1+4) + val |= (instuction_class << 12+4+4 +1+4+1) + val |= (cond_bit_code << 12+4+4 +1+4+1+2) + io.write_uint32 val + end + end + class CompareInstruction < LogicInstruction + def initialize(opcode , condition_code , update_status , args) + super(opcode , condition_code , update_status , args) + @update_status_flag = 1 + @rn = args[0] + @rd = reg "r0" + end + def build + do_build args[1] + end + end + class MoveInstruction < LogicInstruction + def initialize(opcode , condition_code , update_status , args) + super(opcode , condition_code , update_status , args) + @rn = reg "r0" # register zero = zero bit pattern + end + def build + do_build args[1] + end + end +end \ No newline at end of file diff --git a/lib/arm/memory_instruction.rb b/lib/arm/memory_instruction.rb new file mode 100644 index 00000000..b5222486 --- /dev/null +++ b/lib/arm/memory_instruction.rb @@ -0,0 +1,80 @@ +require "asm/nodes" +require_relative "instruction" + +module Asm + # ADDRESSING MODE 2 + # Implemented: immediate offset with offset=0 + class MemoryInstruction < Instruction + + def initialize(opcode , condition_code , update_status , args) + super(opcode , condition_code , update_status , args) + @i = 0 #I flag (third bit) + @pre_post_index = 0 #P flag + @add_offset = 0 #U flag + @byte_access = opcode.to_s[-1] == "b" ? 1 : 0 #B (byte) flag + @w = 0 #W flag + @is_load = opcode.to_s[0] == "l" ? 1 : 0 #L (load) flag + @rn = reg "r0" # register zero = zero bit pattern + @rd = reg "r0" # register zero = zero bit pattern + end + attr_accessor :i, :pre_post_index, :add_offset, + :byte_access, :w, :is_load, :rn, :rd + + # Build representation for target address + def build + if( @is_load ) + @rd = args[0] + arg = args[1] + else #store + @rd = args[1] + arg = args[0] + end + #str / ldr are _serious instructions. With BIG possibilities not half are implemented + if (arg.is_a?(Asm::Register)) + @rn = arg + if(arg.offset != 0) + @operand = arg.offset + if (@operand < 0) + @add_offset = 0 + #TODO test/check/understand + @operand *= -1 + else + @add_offset = 1 + end + if (@operand.abs > 4095) + raise Asm::AssemblyError.new("reference offset too large/small (max 4095) #{argr.right}" ) + end + end + elsif (arg.is_a?(Asm::Label) or arg.is_a?(Asm::NumLiteral)) + @pre_post_index = 1 + @rn = pc + @use_addrtable_reloc = true + @addrtable_reloc_target = arg + else + raise Asm::AssemblyError.new("invalid operand argument #{arg.inspect}") + end + end + + def assemble(io) + build + #not sure about these 2 constants. They produce the correct output for str r0 , r1 + # but i can't help thinking that that is because they are not used in that instruction and + # so it doesn't matter. Will see + @add_offset = 1 + @pre_post_index = 1 + instuction_class = 0b01 # OPC_MEMORY_ACCESS + val = operand + val |= (rd.bits << 12 ) + val |= (rn.bits << 12+4) #16 + val |= (is_load << 12+4 +4) + val |= (w << 12+4 +4+1) + val |= (byte_access << 12+4 +4+1+1) + val |= (add_offset << 12+4 +4+1+1+1) + val |= (pre_post_index << 12+4 +4+1+1+1+1)#24 + val |= (i << 12+4 +4+1+1+1+1 +1) + val |= (instuction_class<<12+4 +4+1+1+1+1 +1+1) + val |= (cond_bit_code << 12+4 +4+1+1+1+1 +1+1+2) + io.write_uint32 val + end + end +end \ No newline at end of file diff --git a/lib/arm/nodes.rb b/lib/arm/nodes.rb new file mode 100644 index 00000000..7297019b --- /dev/null +++ b/lib/arm/nodes.rb @@ -0,0 +1,44 @@ +module Asm + + class Shift + attr_accessor :type, :value, :argument + end + + # Registers have off course a name (r1-16 for arm) + # but also refer to an address. In other words they can be an operand for instructions. + # Arm has addressing modes abound, and so can add to a register before actually using it + # If can actually shift or indeed shift what it adds, but not implemented + class Register + attr_accessor :name , :offset , :bits + def initialize name , bits + @name = name + @bits = bits + @offset = 0 + end + + # this is for the dsl, so we can write pretty code like r1 + 4 + # when we want to access the next word (4) after r1 + def + number + @offset = number + self + end + end + + # maybe not used at all as code_gen::instruction raises if used. + # instead now using Arrays + class RegisterList + attr_accessor :registers + def initialize regs + @registers = regs + regs.each{ |reg| raise "not a reg #{sym} , #{reg}" unless reg.is_a?(Asm::Register) } + end + end + + class NumLiteral + attr_accessor :value + def initialize val + @value = val + end + end + +end diff --git a/lib/arm/stack_instruction.rb b/lib/arm/stack_instruction.rb new file mode 100644 index 00000000..77376f45 --- /dev/null +++ b/lib/arm/stack_instruction.rb @@ -0,0 +1,58 @@ +require_relative "instruction" + +module Asm + # ADDRESSING MODE 4 + class StackInstruction < Instruction + + def initialize(opcode , condition_code , update_status , args) + super(opcode , condition_code , update_status , args) + @update_status_flag= 0 + @rn = reg "r0" # register zero = zero bit pattern + # downward growing, decrement before memory access + # official ARM style stack as used by gas + @write_base = 1 + if (opcode == :push) + @pre_post_index = 1 + @up_down = 0 + @is_pop = 0 + else #pop + @pre_post_index = 0 + @up_down = 1 + @is_pop = 1 + end + end + attr_accessor :pre_post_index, :up_down, + :update_status_flag, :write_base, :is_pop, :rn + + def assemble(io) + build + instuction_class = 0b10 # OPC_STACK + cond = @condition_code.is_a?(Symbol) ? COND_CODES[@condition_code] : @condition_code + rn = reg "sp" # sp register + #assemble of old + val = operand + val |= (rn.bits << 16) + val |= (is_pop << 16+4) #20 + val |= (write_base << 16+4+ 1) + val |= (update_status_flag << 16+4+ 1+1) + val |= (up_down << 16+4+ 1+1+1) + val |= (pre_post_index << 16+4+ 1+1+1+1)#24 + val |= (instuction_class << 16+4+ 1+1+1+1 +2) + val |= (cond << 16+4+ 1+1+1+1 +2+2) + io.write_uint32 val + end + + private + # Build representation for source value + def build + if (args.is_a?(Array)) + @operand = 0 + args.each do |reg | + @operand |= (1 << reg.bits) + end + else + raise Asm::AssemblyError.new("invalid operand argument #{args.inspect}") + end + end + end +end diff --git a/lib/arm/string_literal.rb b/lib/arm/string_literal.rb new file mode 100644 index 00000000..36f54800 --- /dev/null +++ b/lib/arm/string_literal.rb @@ -0,0 +1,30 @@ +require_relative "../vm/code" + +module Asm + # The name really says it all. + # The only interesting thing is storage. + # Currently string are stored "inline" , ie in the code segment. + # Mainly because that works an i aint no elf expert. + + class StringLiteral < Vm::Code + + # currently aligned to 4 (ie padded with 0) and off course 0 at the end + def initialize(str) + length = str.length + # rounding up to the next 4 (always adding one for zero pad) + pad = ((length / 4 ) + 1 ) * 4 - length + raise "#{pad} #{self}" unless pad >= 1 + @string = str + "\x00" * pad + end + + # the strings length plus padding + def length + @string.length + end + + # just writing the string + def assemble(io) + io << @string + end + end +end \ No newline at end of file diff --git a/lib/asm/code.rb b/lib/vm/code.rb similarity index 100% rename from lib/asm/code.rb rename to lib/vm/code.rb