copied asm here so that stays working while i break (an) arm

This commit is contained in:
Torsten Ruger 2014-05-03 15:13:15 +03:00
parent fae0c1ab8c
commit 1c86ecb84e
12 changed files with 686 additions and 0 deletions

13
lib/arm/arm_machine.rb Normal file
View File

@ -0,0 +1,13 @@
require "vm/machine"
module Arm
class ArmMachine < Vm::Machine
def word_load value
"word"
end
def function_call call_value
"call"
end
end
end

98
lib/arm/assembler.rb Normal file
View File

@ -0,0 +1,98 @@
require 'asm/nodes'
require 'asm/block'
require 'stream_reader'
require 'stringio'
require "asm/string_literal"
module Asm
# Assembler is the the top-level of the code hierachy, except it is not derived from code
# instead a Assembler is a list of blocks (and string constants)
# All code is created in blocks (see there) and there are two styles for that, for forward of backward
# referencing. Read function block and add_block and Block.set
class Assembler
def initialize
@blocks = []
@string_table = {}
end
attr_reader :blocks
# Assembling to string will return a binary string of the whole program, ie all blocks and the
# strings they use
# As a memory reference this would be callable, but more likely you will hand it over to
# an ObjectWriter as the .text section and then link it. And then execute it :-)
def assemble_to_string
#put the strings at the end of the assembled code.
# adding them will fix their position and make them assemble after
@string_table.values.each do |data|
add_block data
end
io = StringIO.new
assemble(io)
io.string
end
# Add a string to the string table. Strings are global and constant. So only one copy of each
# string exists
# Internally StringLiterals are created and stored and during assembly written after the blocks
def add_string str
code = @string_table[str]
return code if code
data = Asm::StringLiteral.new(str)
@string_table[str] = data
end
# Length of all blocks. Does not take strings into account as they are added after all blocks.
# This is used to determine where a block when it is added after creation (see add_block)
def length
@blocks.inject(0) {| sum , item | sum + item.length}
end
# This is how you add a forward declared block. This is called automatically when you
# call block with ruby block, but has to be done manually if not
def add_block block
block.at self.length
@blocks << block
end
# return the block of the given name
# or raise an exception, as this is meant to be called when the block is available
def get_block name
block = @blocks.find {|b| b.name == name}
raise "No block found for #{name} (in #{blocks.collect{|b|b.name}.join(':')})" unless block
block
end
# this is used to create blocks.
# All functions that have no args are interpreted as block names
# and if a block is provided, it is evaluated in the (ruby)blocks scope and the block added to the
# program immediately.
# If no block is provided (forward declaration), you must call code on it later
def method_missing(meth, *args, &block)
if args.length == 0
code = Block.new(meth.to_s , self )
if block_given?
add_block code
code.instance_eval(&block)
end
return code
else
super
end
end
private
def assemble(io)
@blocks.each do |obj|
obj.assemble io
end
end
end
end

View File

@ -0,0 +1,8 @@
module Asm
class AssemblyError < StandardError
def initialize(message)
super(message)
end
end
end

153
lib/arm/block.rb Normal file
View File

@ -0,0 +1,153 @@
require_relative 'call_instruction'
require_relative 'stack_instruction'
require_relative 'logic_instruction'
require_relative 'memory_instruction'
module Asm
class Code ; end
# A Block is the smalles unit of code, a list of instructions as it were
# It is also a point to jump/branch to. An address in the final stream.
# To allow for forward branches creation does not fix the position.
# Thee position is fixed in one of three ways
# - create the block with ruby block, signalling that the instantiation poin is the position
# - call block.code with the code or if you wish program.add_block (and add you code with calls)
# - the assmebly process will pin it if it wasn't set
# creating blocks is done by calling the blocks name/label on either a program or a block
# (method missing will cathc the call and create the block)
# and the easiest way is to go into a ruby block and start writing instructions
# Example (backward jump):
# program.loop do create a new block with label loop
# sub r1 , r1 , 1 count the r1 register down
# bne :loop jump back to loop when the counter is not zero
# end (initialization and actual code missing off course)
# Example (forward jump)
# else_block = program.else
# program.if do
# test r1 , 0 test some condition
# beq :else_block
# mov . . .. .. do whatever the if block does
# end
# else_block.code do
# ldr .... do whatever else does
# end
# Blocks are also used to create instructions, and so Block has functions for every cpu instruction
# and to make using the apu function easier, there are functions that create registers as well
class Block < Code
def initialize(name , prog)
super()
@name = name.to_sym
@codes = []
@position = 0
@program = prog
end
attr_reader :name
ArmMachine::REGISTERS.each do |reg , number|
define_method(reg) { Asm::Register.new(reg , number) }
end
def instruction(clazz, opcode , condition_code , update_status , *args)
arg_nodes = []
args.each do |arg|
if (arg.is_a?(Asm::Register))
arg_nodes << arg
elsif (arg.is_a?(Integer))
arg_nodes << Asm::NumLiteral.new(arg)
elsif (arg.is_a?(String))
arg_nodes << @program.add_string(arg)
elsif (arg.is_a?(Asm::Block))
arg_nodes << arg
elsif (arg.is_a?(Symbol))
block = @program.get_block arg
arg_nodes << block
else
raise "Invalid argument #{arg.inspect} for instruction"
end
end
add_code clazz.new(opcode , condition_code , update_status , arg_nodes)
end
def self.define_instruction(inst , clazz )
define_method(inst) do |*args|
instruction clazz , inst , :al , 0 , *args
end
define_method("#{inst}s") do |*args|
instruction clazz , inst , :al , 1 , *args
end
ArmMachine::COND_CODES.keys.each do |suffix|
define_method("#{inst}#{suffix}") do |*args|
instruction clazz , inst , suffix , 0 , *args
end
define_method("#{inst}s#{suffix}") do |*args|
instruction clazz , inst , suffix , 1 , *args
end
end
end
[:push, :pop].each do |inst|
define_instruction(inst , StackInstruction)
end
[:adc, :add, :and, :bic, :eor, :orr, :rsb, :rsc, :sbc, :sub].each do |inst|
define_instruction(inst , LogicInstruction)
end
[:mov, :mvn].each do |inst|
define_instruction(inst , MoveInstruction)
end
[:cmn, :cmp, :teq, :tst].each do |inst|
define_instruction(inst , CompareInstruction)
end
[:strb, :str , :ldrb, :ldr].each do |inst|
define_instruction(inst , MemoryInstruction)
end
[:b, :bl , :swi].each do |inst|
define_instruction(inst , CallInstruction)
end
# codeing a block fixes it's position in the stream.
# You must call with a block, which is instance_eval'd and provides the actual code for the block
def code &block
@program.add_block self
self.instance_eval block
end
# length of the codes. In arm it would be the length * 4
# (strings are stored globally in the Assembler)
def length
@codes.inject(0) {| sum , item | sum + item.length}
end
def add_code(kode)
kode.at(@position)
length = kode.length
@position += length
@codes << kode
end
def assemble(io)
@codes.each do |obj|
obj.assemble io
end
end
# this is used to create blocks.
# All functions that have no args are interpreted as block names
# In fact the block calls are delegated to the program which then instantiates the blocks
def method_missing(meth, *args, &block)
if args.length == 0
@program.send(meth , *args , &block)
else
super
end
end
end
end

View File

@ -0,0 +1,49 @@
require_relative "instruction"
module Asm
# There are only three call instructions in arm branch (b), call (bl) and syscall (swi)
# A branch could be called a jump as it has no notion of returning
# A call has the bl code as someone thought "branch with link" is a useful name.
# The pc is put into the link register to make a return possible
# a return is affected by moving the stored link register into the pc, effectively a branch
# swi (SoftWareInterrupt) or system call is how we call the kernel.
# in Arm the register layout is different and so we have to place the syscall code into register 7
# Registers 0-6 hold the call values as for a normal c call
class CallInstruction < Instruction
def assemble(io)
case opcode
when :b, :bl
arg = args[0]
if arg.is_a? Block
diff = arg.position - self.position - 8
arg = NumLiteral.new(diff)
end
if (arg.is_a?(Asm::NumLiteral))
jmp_val = arg.value >> 2
packed = [jmp_val].pack('l')
# signed 32-bit, condense to 24-bit
# TODO add check that the value fits into 24 bits
io << packed[0,3]
else
raise "else not coded #{arg.inspect}"
end
io.write_uint8 OPCODES[opcode] | (COND_CODES[@condition_code] << 4)
when :swi
arg = args[0]
if (arg.is_a?(Asm::NumLiteral))
packed = [arg.value].pack('L')[0,3]
io << packed
io.write_uint8 0b1111 | (COND_CODES[@condition_code] << 4)
else
raise Asm::AssemblyError.new("invalid operand argument expected literal not #{arg}")
end
end
end
end#class
end

46
lib/arm/instruction.rb Normal file
View File

@ -0,0 +1,46 @@
require_relative "assembly_error"
require_relative "arm_machine"
module Asm
class Code ; end
# Not surprisingly represents an cpu instruction.
# This is an abstract base class, with derived classes
# Logic / Move / Compare / Stack / Memory (see there)
#
# Opcode is a (<= three) letter accronym (same as in assembly code). Though in arm, suffixes can
# make the opcode longer, we chop those off in the constructor
# Argurments are registers or labels or string/num Literals
class Instruction < Code
include ArmMachine
COND_POSTFIXES = Regexp.union( COND_CODES.keys.collect{|k|k.to_s} ).source
def initialize(opcode , condition_code , update_status , args)
@update_status_flag = update_status
@condition_code = condition_code.to_sym
@opcode = opcode
@args = args
@operand = 0
end
attr_reader :opcode, :args
# Many arm instructions may be conditional, where the default condition is always (al)
# ArmMachine::COND_CODES names them, and this attribute reflects it
attr_reader :condition_code
attr_reader :operand
# Logic instructions may be executed with or without affecting the status register
# Only when an instruction affects the status is a subsequent compare instruction effective
# But to make the conditional execution (see cond) work for more than one instruction, one needs to
# be able to execute without changing the status
attr_reader :update_status_flag
# arm intrucioons are pretty sensible, and always 4 bytes (thumb not supported)
def length
4
end
end
end

View File

@ -0,0 +1,107 @@
require_relative "instruction"
module Asm
# ADDRESSING MODE 1
# Logic ,Maths, Move and compare instructions (last three below)
class LogicInstruction < Instruction
def initialize(opcode , condition_code , update_status , args)
super(opcode , condition_code , update_status , args)
@rn = nil
@i = 0
@rd = args[0]
end
attr_accessor :i, :rn, :rd
# Build representation for source value
def build
@rn = args[1]
do_build args[2]
end
#(stays in subclases, while build is overriden to provide different arguments)
def do_build(arg)
if arg.is_a?(Asm::StringLiteral)
# do pc relative addressing with the difference to the instuction
# 8 is for the funny pipeline adjustment (ie oc pointing to fetch and not execute)
arg = Asm::NumLiteral.new( arg.position - self.position - 8 )
end
if (arg.is_a?(Asm::NumLiteral))
if (arg.value.fits_u8?)
# no shifting needed
@operand = arg.value
@i = 1
elsif (op_with_rot = calculate_u8_with_rr(arg))
@operand = op_with_rot
@i = 1
else
raise Asm::AssemblyError.new("cannot fit numeric literal argument in operand #{arg}")
end
elsif (arg.is_a?(Asm::Register))
@operand = arg
@i = 0
elsif (arg.is_a?(Asm::Shift))
rm_ref = arg.argument
@i = 0
shift_op = {'lsl' => 0b000, 'lsr' => 0b010, 'asr' => 0b100,
'ror' => 0b110, 'rrx' => 0b110}[arg.type]
if (arg.type == 'ror' and arg.value.nil?)
# ror #0 == rrx
raise Asm::AssemblyError.new('cannot rotate by zero', arg)
end
arg1 = arg.value
if (arg1.is_a?(Asm::NumLiteral))
if (arg1.value >= 32)
raise Asm::AssemblyError.new('cannot shift by more than 31', arg1)
end
shift_imm = arg1.value
elsif (arg1.is_a?(Asm::Register))
shift_op val |= 0x1;
shift_imm = arg1.number << 1
elsif (arg.type == 'rrx')
shift_imm = 0
end
@operand = rm_ref | (shift_op << 4) | (shift_imm << 4+3)
else
raise Asm::AssemblyError.new("invalid operand argument #{arg.inspect}")
end
end
def assemble(io)
build
instuction_class = 0b00 # OPC_DATA_PROCESSING
val = operand.is_a?(Register) ? operand.bits : operand
val |= (rd.bits << 12)
val |= (rn.bits << 12+4)
val |= (update_status_flag << 12+4+4)#20
val |= (op_bit_code << 12+4+4 +1)
val |= (i << 12+4+4 +1+4)
val |= (instuction_class << 12+4+4 +1+4+1)
val |= (cond_bit_code << 12+4+4 +1+4+1+2)
io.write_uint32 val
end
end
class CompareInstruction < LogicInstruction
def initialize(opcode , condition_code , update_status , args)
super(opcode , condition_code , update_status , args)
@update_status_flag = 1
@rn = args[0]
@rd = reg "r0"
end
def build
do_build args[1]
end
end
class MoveInstruction < LogicInstruction
def initialize(opcode , condition_code , update_status , args)
super(opcode , condition_code , update_status , args)
@rn = reg "r0" # register zero = zero bit pattern
end
def build
do_build args[1]
end
end
end

View File

@ -0,0 +1,80 @@
require "asm/nodes"
require_relative "instruction"
module Asm
# ADDRESSING MODE 2
# Implemented: immediate offset with offset=0
class MemoryInstruction < Instruction
def initialize(opcode , condition_code , update_status , args)
super(opcode , condition_code , update_status , args)
@i = 0 #I flag (third bit)
@pre_post_index = 0 #P flag
@add_offset = 0 #U flag
@byte_access = opcode.to_s[-1] == "b" ? 1 : 0 #B (byte) flag
@w = 0 #W flag
@is_load = opcode.to_s[0] == "l" ? 1 : 0 #L (load) flag
@rn = reg "r0" # register zero = zero bit pattern
@rd = reg "r0" # register zero = zero bit pattern
end
attr_accessor :i, :pre_post_index, :add_offset,
:byte_access, :w, :is_load, :rn, :rd
# Build representation for target address
def build
if( @is_load )
@rd = args[0]
arg = args[1]
else #store
@rd = args[1]
arg = args[0]
end
#str / ldr are _serious instructions. With BIG possibilities not half are implemented
if (arg.is_a?(Asm::Register))
@rn = arg
if(arg.offset != 0)
@operand = arg.offset
if (@operand < 0)
@add_offset = 0
#TODO test/check/understand
@operand *= -1
else
@add_offset = 1
end
if (@operand.abs > 4095)
raise Asm::AssemblyError.new("reference offset too large/small (max 4095) #{argr.right}" )
end
end
elsif (arg.is_a?(Asm::Label) or arg.is_a?(Asm::NumLiteral))
@pre_post_index = 1
@rn = pc
@use_addrtable_reloc = true
@addrtable_reloc_target = arg
else
raise Asm::AssemblyError.new("invalid operand argument #{arg.inspect}")
end
end
def assemble(io)
build
#not sure about these 2 constants. They produce the correct output for str r0 , r1
# but i can't help thinking that that is because they are not used in that instruction and
# so it doesn't matter. Will see
@add_offset = 1
@pre_post_index = 1
instuction_class = 0b01 # OPC_MEMORY_ACCESS
val = operand
val |= (rd.bits << 12 )
val |= (rn.bits << 12+4) #16
val |= (is_load << 12+4 +4)
val |= (w << 12+4 +4+1)
val |= (byte_access << 12+4 +4+1+1)
val |= (add_offset << 12+4 +4+1+1+1)
val |= (pre_post_index << 12+4 +4+1+1+1+1)#24
val |= (i << 12+4 +4+1+1+1+1 +1)
val |= (instuction_class<<12+4 +4+1+1+1+1 +1+1)
val |= (cond_bit_code << 12+4 +4+1+1+1+1 +1+1+2)
io.write_uint32 val
end
end
end

44
lib/arm/nodes.rb Normal file
View File

@ -0,0 +1,44 @@
module Asm
class Shift
attr_accessor :type, :value, :argument
end
# Registers have off course a name (r1-16 for arm)
# but also refer to an address. In other words they can be an operand for instructions.
# Arm has addressing modes abound, and so can add to a register before actually using it
# If can actually shift or indeed shift what it adds, but not implemented
class Register
attr_accessor :name , :offset , :bits
def initialize name , bits
@name = name
@bits = bits
@offset = 0
end
# this is for the dsl, so we can write pretty code like r1 + 4
# when we want to access the next word (4) after r1
def + number
@offset = number
self
end
end
# maybe not used at all as code_gen::instruction raises if used.
# instead now using Arrays
class RegisterList
attr_accessor :registers
def initialize regs
@registers = regs
regs.each{ |reg| raise "not a reg #{sym} , #{reg}" unless reg.is_a?(Asm::Register) }
end
end
class NumLiteral
attr_accessor :value
def initialize val
@value = val
end
end
end

View File

@ -0,0 +1,58 @@
require_relative "instruction"
module Asm
# ADDRESSING MODE 4
class StackInstruction < Instruction
def initialize(opcode , condition_code , update_status , args)
super(opcode , condition_code , update_status , args)
@update_status_flag= 0
@rn = reg "r0" # register zero = zero bit pattern
# downward growing, decrement before memory access
# official ARM style stack as used by gas
@write_base = 1
if (opcode == :push)
@pre_post_index = 1
@up_down = 0
@is_pop = 0
else #pop
@pre_post_index = 0
@up_down = 1
@is_pop = 1
end
end
attr_accessor :pre_post_index, :up_down,
:update_status_flag, :write_base, :is_pop, :rn
def assemble(io)
build
instuction_class = 0b10 # OPC_STACK
cond = @condition_code.is_a?(Symbol) ? COND_CODES[@condition_code] : @condition_code
rn = reg "sp" # sp register
#assemble of old
val = operand
val |= (rn.bits << 16)
val |= (is_pop << 16+4) #20
val |= (write_base << 16+4+ 1)
val |= (update_status_flag << 16+4+ 1+1)
val |= (up_down << 16+4+ 1+1+1)
val |= (pre_post_index << 16+4+ 1+1+1+1)#24
val |= (instuction_class << 16+4+ 1+1+1+1 +2)
val |= (cond << 16+4+ 1+1+1+1 +2+2)
io.write_uint32 val
end
private
# Build representation for source value
def build
if (args.is_a?(Array))
@operand = 0
args.each do |reg |
@operand |= (1 << reg.bits)
end
else
raise Asm::AssemblyError.new("invalid operand argument #{args.inspect}")
end
end
end
end

30
lib/arm/string_literal.rb Normal file
View File

@ -0,0 +1,30 @@
require_relative "../vm/code"
module Asm
# The name really says it all.
# The only interesting thing is storage.
# Currently string are stored "inline" , ie in the code segment.
# Mainly because that works an i aint no elf expert.
class StringLiteral < Vm::Code
# currently aligned to 4 (ie padded with 0) and off course 0 at the end
def initialize(str)
length = str.length
# rounding up to the next 4 (always adding one for zero pad)
pad = ((length / 4 ) + 1 ) * 4 - length
raise "#{pad} #{self}" unless pad >= 1
@string = str + "\x00" * pad
end
# the strings length plus padding
def length
@string.length
end
# just writing the string
def assemble(io)
io << @string
end
end
end