adds first version of the expanded as assembler from mikko

This commit is contained in:
Torsten Ruger 2014-04-14 18:09:56 +03:00
parent 52e9542d73
commit 408b290b8a
32 changed files with 1943 additions and 1 deletions

2
.gitignore vendored
View File

@ -28,7 +28,7 @@ pkg
#
# For MacOS:
#
#.DS_Store
.DS_Store
# For TextMate
#*.tmproj

18
lib/asm/README.markdown Normal file
View File

@ -0,0 +1,18 @@
Assembler in Ruby
=================
Supporting arm, but aimed quite specifically at raspberry pi, arm v7, floating point included
Outputs ELF object files, with relocation support.
Constant table support exists but isn't very good. Some addressing modes
are not supported or only partially supported.
Supported (pseudo)instructions:
- adc, add, and, bic, eor, orr, rsb, rsc, sbc, sub, cmn, cmp, teq, tst,
mov, mvn, strb, str, ldrb, ldr, push, pop, b, bl, bx, swi
- Conditional versions of above
Thanks to Cyndis for starting this arm/elf project in the first place: https://github.com/cyndis/as

View File

@ -0,0 +1,34 @@
class Asm::ARM::AddrTableObject
def initialize
@table = []
@const = []
end
# TODO don't create new entry if there's already an entry for the same label/const
def add_label(label)
d = [label, Asm::LabelObject.new]
@table << d
d[1]
end
def add_const(const)
d = [const, Asm::LabelObject.new]
@const << d
d[1]
end
def assemble(io, as)
@table.each do |pair|
target_label, here_label = *pair
here_label.assemble io, as
as.add_relocation io.tell, target_label, Asm::ARM::R_ARM_ABS32,
Asm::ARM::Instruction::RelocHandler
io.write_uint32 0
end
@const.each do |pair|
const, here_label = *pair
here_label.assemble io, as
io.write_uint32 const
end
end
end

View File

@ -0,0 +1,54 @@
require_relative 'assembler'
module Asm
module Arm
# Relocation constants
# Note that in this assembler, a relocation simply means any
# reference to a label that can only be determined at assembly time
# or later (as in the normal meaning)
R_ARM_PC24 = 0x01
R_ARM_ABS32 = 0x02
# Unofficial (cant be used for extern relocations)
R_ARM_PC12 = 0xF0
# TODO actually find the closest somehow
def self.closest_addrtable(as)
as.objects.find do |obj|
obj.is_a?(Asm::ARM::AddrTableObject)
end || (raise Asm::AssemblyError.new('could not find addrtable to use', nil))
end
def self.write_resolved_relocation(io, addr, type)
case type
when R_ARM_PC24
diff = addr - io.tell - 8
packed = [diff >> 2].pack('l')
io << packed[0,3]
when R_ARM_ABS32
packed = [addr].pack('l')
io << packed
when R_ARM_PC12
diff = addr - io.tell - 8
if (diff.abs > 2047)
raise Asm::AssemblyError.new('offset too large for R_ARM_PC12 relocation',
nil)
end
val = diff.abs
sign = (diff>0)?1:0
curr = io.read_uint32
io.seek(-4, IO::SEEK_CUR)
io.write_uint32 (curr & ~0b00000000100000000000111111111111) |
val | (sign << 23)
else
raise 'unknown relocation type'
end
end
end

106
lib/asm/arm/builder_a.rb Normal file
View File

@ -0,0 +1,106 @@
module Asm
module Arm
# ADDRESSING MODE 1
# Complete!
class BuilderA
include Asm::ARM::InstructionTools
def initialize
@cond = 0b1110
@inst_class = 0
@i = 0
@opcode = 0
@s = 0
@rn = 0
@rd = 0
@operand = 0
end
attr_accessor :cond, :inst_class, :i, :opcode, :s,
:rn, :rd, :operand
def self.make(inst_class, opcode, s)
a = new
a.inst_class = inst_class
a.opcode = opcode
a.s = s
a
end
def calculate_u8_with_rr(arg)
parts = arg.value.to_s(2).rjust(32,'0').scan(/^(0*)(.+?)0*$/).flatten
pre_zeros = parts[0].length
imm_len = parts[1].length
if ((pre_zeros+imm_len) % 2 == 1)
u8_imm = (parts[1]+'0').to_i(2)
imm_len += 1
else
u8_imm = parts[1].to_i(2)
end
if (u8_imm.fits_u8?)
# can do!
rot_imm = (pre_zeros+imm_len) / 2
if (rot_imm > 15)
return nil
end
return u8_imm | (rot_imm << 8)
else
return nil
end
end
# Build representation for source value
def build_operand(arg)
if (arg.is_a?(Asm::Parser::NumLiteralArgNode))
if (arg.value.fits_u8?)
# no shifting needed
@operand = arg.value
@i = 1
elsif (op_with_rot = calculate_u8_with_rr(arg))
@operand = op_with_rot
@i = 1
else
raise Asm::AssemblyError.new(Asm::ERRSTR_NUMERIC_TOO_LARGE, arg)
end
elsif (arg.is_a?(Asm::Parser::RegisterArgNode))
@operand = reg_ref(arg)
@i = 0
elsif (arg.is_a?(Asm::Parser::ShiftNode))
rm_ref = reg_ref(arg.argument)
@i = 0
shift_op = {'lsl' => 0b000, 'lsr' => 0b010, 'asr' => 0b100,
'ror' => 0b110, 'rrx' => 0b110}[arg.type]
if (arg.type == 'ror' and arg.value.nil?)
# ror #0 == rrx
raise Asm::AssemblyError.new('cannot rotate by zero', arg)
end
arg1 = arg.value
if (arg1.is_a?(Asm::Parser::NumLiteralArgNode))
if (arg1.value >= 32)
raise Asm::AssemblyError.new('cannot shift by more than 31', arg1)
end
shift_imm = arg1.value
elsif (arg1.is_a?(Asm::Parser::RegisterArgNode))
shift_op |= 0x1;
shift_imm = reg_ref(arg1) << 1
elsif (arg.type == 'rrx')
shift_imm = 0
end
@operand = rm_ref | (shift_op << 4) | (shift_imm << 4+3)
else
raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg)
end
end
def write(io, as)
val = operand | (rd << 12) | (rn << 12+4) |
(s << 12+4+4) | (opcode << 12+4+4+1) |
(i << 12+4+4+1+4) | (inst_class << 12+4+4+1+4+1) |
(cond << 12+4+4+1+4+1+2)
io.write_uint32 val
end
end
end
end

113
lib/asm/arm/builder_b.rb Normal file
View File

@ -0,0 +1,113 @@
module Asm
module Arm
# ADDRESSING MODE 2
# Implemented: immediate offset with offset=0
class BuilderB
include Asm::ARM::InstructionTools
def initialize
@cond = 0b1110
@inst_class = 0
@i = 0 #I flag (third bit)
@pre_post_index = 0 #P flag
@add_offset = 0 #U flag
@byte_access = 0 #B flag
@w = 0 #W flag
@load_store = 0 #L flag
@rn = 0
@rd = 0
@operand = 0
end
attr_accessor :cond, :inst_class, :i, :pre_post_index, :add_offset,
:byte_access, :w, :load_store, :rn, :rd, :operand
def self.make(inst_class, byte_access, load_store)
a = new
a.inst_class = inst_class
a.byte_access = byte_access
a.load_store = load_store
a
end
class MathReferenceArgNode < Asm::Parser::ReferenceArgNode
attr_accessor :op, :right
end
def simplify_reference(arg)
node = MathReferenceArgNode.new
if (arg.is_a?(Asm::Parser::MathNode))
node.argument = arg.left
node.op = arg.op
node.right = arg.right
else
node.argument = arg
end
node
end
# Build representation for target address
def build_operand(arg1)
if (arg1.is_a?(Asm::Parser::ReferenceArgNode))
argr = simplify_reference(arg1.argument)
arg = argr.argument
if (arg.is_a?(Asm::Parser::RegisterArgNode))
@i = 0
@pre_post_index = 1
@w = 0
@rn = reg_ref(arg)
@operand = 0
if (argr.op and argr.right.is_a?(Asm::Parser::NumLiteralArgNode))
val = argr.right.value
if (val < 0)
@add_offset = 0
val *= -1
else
@add_offset = 1
end
if (val.abs > 4095)
raise Asm::AssemblyError.new('reference offset too large/small (max 4095)', argr.right)
end
@operand = val
elsif (argr.op)
raise Asm::AssemblyError.new('reference offset must be an integer literal', argr.right)
end
else
raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg)
end
elsif (arg1.is_a?(Asm::Parser::LabelEquivAddrArgNode) or arg1.is_a?(Asm::Parser::NumEquivAddrArgNode))
@i = 0
@pre_post_index = 1
@w = 0
@rn = 15 # pc
@operand = 0
@use_addrtable_reloc = true
@addrtable_reloc_target = arg1
else
raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg1)
end
end
def write(io, as, ast_asm, inst)
val = operand | (rd << 12) | (rn << 12+4) |
(load_store << 12+4+4) | (w << 12+4+4+1) |
(byte_access << 12+4+4+1+1) | (add_offset << 12+4+4+1+1+1) |
(pre_post_index << 12+4+4+1+1+1+1) | (i << 12+4+4+1+1+1+1+1) |
(inst_class << 12+4+4+1+1+1+1+1+1) | (cond << 12+4+4+1+1+1+1+1+1+2)
if (@use_addrtable_reloc)
closest_addrtable = Asm::ARM.closest_addrtable(as)
if (@addrtable_reloc_target.is_a?(Asm::Parser::LabelEquivAddrArgNode))
obj = ast_asm.object_for_label(@addrtable_reloc_target.label, inst)
ref_label = closest_addrtable.add_label(obj)
elsif (@addrtable_reloc_target.is_a?(Asm::Parser::NumEquivAddrArgNode))
ref_label = closest_addrtable.add_const(@addrtable_reloc_target.value)
end
as.add_relocation io.tell, ref_label, Asm::ARM::R_ARM_PC12,
Asm::ARM::Instruction::RelocHandler
end
io.write_uint32 val
end
end
end
end

52
lib/asm/arm/builder_d.rb Normal file
View File

@ -0,0 +1,52 @@
module Asm
module Arm
# ADDRESSING MODE 4
class BuilderD
include Asm::ARM::InstructionTools
def initialize
@cond = 0b1110
@inst_class = Asm::ARM::Instruction::OPC_STACK
@pre_post_index = 0
@up_down = 0
@s = 0
@write_base = 0
@store_load = 0
@rn = 0
@operand = 0
end
attr_accessor :cond, :inst_class, :pre_post_index, :up_down,
:s, :write_base, :store_load, :rn, :operand
def self.make(pre_post, up_down, write, store_load)
a = new
a.pre_post_index = pre_post
a.up_down = up_down
a.write_base = write
a.store_load = store_load
a
end
# Build representation for source value
def build_operand(arg)
if (arg.is_a?(Asm::Parser::RegisterListArgNode))
@operand = 0
arg.registers.each do |reg_node|
reg = reg_ref(reg_node)
@operand |= (1 << reg)
end
else
raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg)
end
end
def write(io, as)
val = operand | (rn << 16) | (store_load << 16+4) |
(write_base << 16+4+1) | (s << 16+4+1+1) | (up_down << 16+4+1+1+1) |
(pre_post_index << 16+4+1+1+1+1) | (inst_class << 16+4+1+1+1+1+2) |
(cond << 16+4+1+1+1+1+2+2)
io.write_uint32 val
end
end
end
end

View File

@ -0,0 +1,126 @@
require_relative 'arm_assembler'
require_relative 'parser'
require 'stringio'
module Asm
module Arm
class CodeGenerator
def initialize
@asm = Asm::Assembler.new
@externs = []
end
def data(str)
@asm.add_object Asm::DataObject.new(str)
end
%w(r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12
r13 r14 r15 a1 a2 a3 a4 v1 v2 v3 v4 v5 v6
rfp sl fp ip sp lr pc
).each { |reg|
define_method(reg) {
[:reg, reg]
}
}
def instruction(name, *args)
node = Asm::Parser::InstructionNode.new
node.opcode = name.to_s
node.args = []
args.each { |arg|
if (arg.is_a?(Array))
if (arg[0] == :reg)
node.args << Asm::Parser::RegisterArgNode.new { |n|
n.name = arg[1]
}
end
elsif (arg.is_a?(Integer))
node.args << Asm::Parser::NumLiteralArgNode.new { |n|
n.value = arg
}
elsif (arg.is_a?(Symbol))
node.args << Asm::Parser::LabelRefArgNode.new { |n|
n.label = arg.to_s
}
elsif (arg.is_a?(GeneratorLabel) or arg.is_a?(GeneratorExternLabel))
node.args << arg
else
raise 'Invalid argument `%s\' for instruction' % arg.inspect
end
}
@asm.add_object Asm::ARM::Instruction.new(node)
end
%w(adc add and bic eor orr rsb rsc sbc sub
mov mvn cmn cmp teq tst b bl bx swi strb
).each { |inst|
define_method(inst) { |*args|
instruction inst.to_sym, *args
}
define_method(inst+'s') { |*args|
instruction (inst+'s').to_sym, *args
}
%w(al eq ne cs mi hi cc pl ls vc
lt le ge gt vs
).each { |cond_suffix|
define_method(inst+cond_suffix) { |*args|
instruction (inst+cond_suffix).to_sym, *args
}
define_method(inst+'s'+cond_suffix) { |*args|
instruction (inst+'s'+cond_suffix).to_sym, *args
}
}
}
class GeneratorLabel < Asm::LabelObject
def initialize(asm)
@asm = asm
end
def set!
@asm.add_object self
end
end
class GeneratorExternLabel < Asm::LabelObject
def initialize(name)
@name = name
extern!
end
attr_reader :name
end
def label
GeneratorLabel.new(@asm)
end
def label!
lbl = GeneratorLabel.new(@asm)
lbl.set!
lbl
end
def extern(sym)
if (lbl = @externs.find { |extern| extern.name == sym })
lbl
else
@externs << lbl = GeneratorExternLabel.new(sym)
@asm.add_object lbl
lbl
end
end
def assemble
io = StringIO.new
@asm.assemble(io)
io.string
end
def relocations
@asm.relocations
end
end
end
end

149
lib/asm/arm/instruction.rb Normal file
View File

@ -0,0 +1,149 @@
module Asm
module Arm
class Asm::ARM::Instruction
include Asm::ARM::InstructionTools
COND_POSTFIXES = Regexp.union(%w(eq ne cs cc mi pl vs vc hi ls ge lt gt le al)).source
def initialize(node, ast_asm = nil)
@node = node
@ast_asm = ast_asm
opcode = node.opcode
args = node.args
opcode = opcode.downcase
@cond = :al
if (opcode =~ /(#{COND_POSTFIXES})$/)
@cond = $1.to_sym
opcode = opcode[0..-3]
end
if (opcode =~ /s$/)
@s = true
opcode = opcode[0..-2]
else
@s = false
end
@opcode = opcode.downcase.to_sym
@args = args
end
attr_reader :opcode, :args
OPC_DATA_PROCESSING = 0b00
OPC_MEMORY_ACCESS = 0b01
OPC_STACK = 0b10
# These are used differently in the
# instruction encoders
OPCODES = {
:adc => 0b0101, :add => 0b0100,
:and => 0b0000, :bic => 0b1110,
:eor => 0b0001, :orr => 0b1100,
:rsb => 0b0011, :rsc => 0b0111,
:sbc => 0b0110, :sub => 0b0010,
# for these Rn is sbz (should be zero)
:mov => 0b1101,
:mvn => 0b1111,
# for these Rd is sbz and S=1
:cmn => 0b1011,
:cmp => 0b1010,
:teq => 0b1001,
:tst => 0b1000,
:b => 0b1010,
:bl => 0b1011,
:bx => 0b00010010
}
COND_BITS = {
:al => 0b1110, :eq => 0b0000,
:ne => 0b0001, :cs => 0b0010,
:mi => 0b0100, :hi => 0b1000,
:cc => 0b0011, :pl => 0b0101,
:ls => 0b1001, :vc => 0b0111,
:lt => 0b1011, :le => 0b1101,
:ge => 0b1010, :gt => 0b1100,
:vs => 0b0110
}
RelocHandler = Asm::ARM.method(:write_resolved_relocation)
def assemble(io, as)
s = @s ? 1 : 0
case opcode
when :adc, :add, :and, :bic, :eor, :orr, :rsb, :rsc, :sbc, :sub
a = BuilderA.make(OPC_DATA_PROCESSING, OPCODES[opcode], s)
a.cond = COND_BITS[@cond]
a.rd = reg_ref(args[0])
a.rn = reg_ref(args[1])
a.build_operand args[2]
a.write io, as
when :cmn, :cmp, :teq, :tst
a = BuilderA.make(OPC_DATA_PROCESSING, OPCODES[opcode], 1)
a.cond = COND_BITS[@cond]
a.rn = reg_ref(args[0])
a.rd = 0
a.build_operand args[1]
a.write io, as
when :mov, :mvn
a = BuilderA.make(OPC_DATA_PROCESSING, OPCODES[opcode], s)
a.cond = COND_BITS[@cond]
a.rn = 0
a.rd = reg_ref(args[0])
a.build_operand args[1]
a.write io, as
when :strb, :str
a = BuilderB.make(OPC_MEMORY_ACCESS, (opcode == :strb ? 1 : 0), 0)
a.cond = COND_BITS[@cond]
a.rd = reg_ref(args[1])
a.build_operand args[0]
a.write io, as, @ast_asm, self
when :ldrb, :ldr
a = BuilderB.make(OPC_MEMORY_ACCESS, (opcode == :ldrb ? 1 : 0), 1)
a.cond = COND_BITS[@cond]
a.rd = reg_ref(args[0])
a.build_operand args[1]
a.write io, as, @ast_asm, self
when :push, :pop
# downward growing, decrement before memory access
# official ARM style stack as used by gas
if (opcode == :push)
a = BuilderD.make(1,0,1,0)
else
a = BuilderD.make(0,1,1,1)
end
a.cond = COND_BITS[@cond]
a.rn = 13 # sp
a.build_operand args[0]
a.write io, as
when :b, :bl
arg = args[0]
if (arg.is_a?(Asm::Parser::NumLiteralArgNode))
jmp_val = arg.value >> 2
packed = [jmp_val].pack('l')
# signed 32-bit, condense to 24-bit
# TODO add check that the value fits into 24 bits
io << packed[0,3]
elsif (arg.is_a?(Asm::LabelObject) or arg.is_a?(Asm::Parser::LabelRefArgNode))
arg = @ast_asm.object_for_label(arg.label, self) if arg.is_a?(Asm::Parser::LabelRefArgNode)
as.add_relocation(io.tell, arg, Asm::ARM::R_ARM_PC24, RelocHandler)
io << "\x00\x00\x00"
end
io.write_uint8 OPCODES[opcode] | (COND_BITS[@cond] << 4)
when :bx
rm = reg_ref(args[0])
io.write_uint32 rm | (0b1111111111110001 << 4) | (OPCODES[:bx] << 16+4) |
(COND_BITS[@cond] << 16+4+8)
when :swi
arg = args[0]
if (arg.is_a?(Asm::Parser::NumLiteralArgNode))
packed = [arg.value].pack('L')[0,3]
io << packed
io.write_uint8 0b1111 | (COND_BITS[@cond] << 4)
else
raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg)
end
else
raise Asm::AssemblyError.new("unknown instruction #{opcode}", @node)
end
end
end
end
end

View File

@ -0,0 +1,25 @@
module Asm
module Arm
module Asm::ARM::InstructionTools
def reg_ref(arg)
if (not arg.is_a?(Asm::Parser::RegisterArgNode))
raise Asm::AssemblyError.new('argument must be a register', arg)
end
ref =
{'r0' => 0, 'r1' => 1, 'r2' => 2, 'r3' => 3, 'r4' => 4, 'r5' => 5,
'r6' => 6, 'r7' => 7, 'r8' => 8, 'r9' => 9, 'r10' => 10, 'r11' => 11,
'r12' => 12, 'r13' => 13, 'r14' => 14, 'r15' => 15, 'a1' => 0, 'a2' => 1,
'a3' => 2, 'a4' => 3, 'v1' => 4, 'v2' => 5, 'v3' => 6, 'v4' => 7, 'v5' => 8,
'v6' => 9, 'rfp' => 9, 'sl' => 10, 'fp' => 11, 'ip' => 12, 'sp' => 13,
'lr' => 14, 'pc' => 15}[arg.name.downcase]
if (not ref)
raise Asm::AssemblyError.new('unknown register %s' % arg.name.downcase, arg)
end
ref
end
end
end
end

43
lib/asm/assembler.rb Normal file
View File

@ -0,0 +1,43 @@
module Asm
ERRSTR_NUMERIC_TOO_LARGE = 'cannot fit numeric literal argument in operand'
ERRSTR_INVALID_ARG = 'invalid operand argument'
class Assembler
def initialize
@objects = []
@label_objects = []
@label_callbacks = []
@relocations = []
end
attr_reader :relocations, :objects
def add_object(obj)
@objects << obj
end
def add_relocation(*args)
@relocations << Asm::Relocation.new(*args)
end
def register_label_callback(label, io_pos, &block)
@label_callbacks << [label, io_pos, block]
end
def assemble(io)
@objects.each do |obj|
obj.assemble io, self
end
@relocations.delete_if do |reloc|
io.seek reloc.position
if (reloc.label.extern?)
reloc.handler.call(io, io.tell, reloc.type)
else
reloc.handler.call(io, reloc.label.address, reloc.type)
end
not reloc.label.extern?
end
end
end
end

12
lib/asm/assembly_error.rb Normal file
View File

@ -0,0 +1,12 @@
module Asm
class AssemblyError < StandardError
def initialize(message, node)
super(message)
@node = node
end
attr_reader :node
end
end

85
lib/asm/ast_assembler.rb Normal file
View File

@ -0,0 +1,85 @@
module Asm
class AstAssembler
def initialize(asm_arch)
@asm_arch = asm_arch
@symbols = {}
@inst_label_context = {}
@asm = Asm::Assembler.new
end
def assembler
@asm
end
def load_ast(ast)
label_breadcrumb = []
ast.children.each do |cmd|
if (cmd.is_a?(Asm::Parser::LabelNode))
m = /^\/+/.match(cmd.name)
count = m ? m[0].length : 0
label_breadcrumb = label_breadcrumb[0,count]
label_breadcrumb << cmd.name[count..-1]
@asm.add_object object_for_label(label_breadcrumb.join('/'))
elsif (cmd.is_a?(Asm::Parser::InstructionNode))
inst = @asm_arch::Instruction.new(cmd, self)
@asm.add_object inst
@inst_label_context[inst] = label_breadcrumb
elsif (cmd.is_a?(Asm::Parser::DirectiveNode))
if (cmd.name == 'global')
symbol_for_label(cmd.value)[:linkage] = Elf::Constants::STB_GLOBAL
elsif (cmd.name == 'extern')
object_for_label(cmd.value).extern!
elsif (cmd.name == 'hexdata')
bytes = cmd.value.strip.split(/\s+/).map do |hex|
hex.to_i(16)
end.pack('C*')
@asm.add_object Asm::DataObject.new(bytes)
elsif (cmd.name == "asciz")
str = eval(cmd.value) + "\x00"
@asm.add_object Asm::DataObject.new(str)
elsif (defined?(Asm::ARM) and cmd.name == 'addrtable')
@asm.add_object Asm::ARM::AddrTableObject.new
else
raise Asm::AssemblyError.new('unknown directive', cmd)
end
end
end
end
# instruction is user for label context
def symbol_for_label(name, instruction=nil)
if (instruction)
context = @inst_label_context[instruction]
m = /^(\/*)(.+)/.match(name)
breadcrumb = context[0,m[1].length]
breadcrumb << m[2]
qual_name = breadcrumb.join('/')
else
qual_name = name
end
if (not @symbols[qual_name])
@symbols[name] = {:label => Asm::LabelObject.new, :linkage => Elf::Constants::STB_LOCAL, :name => qual_name}
end
@symbols[qual_name]
end
def object_for_label(name, instruction=nil)
symbol_for_label(name, instruction)[:label]
end
def assemble(io)
@asm.assemble io
end
def symbols
@symbols.values
end
def relocations
@asm.relocations
end
end
end

149
lib/asm/command_line.rb Normal file
View File

@ -0,0 +1,149 @@
require_relative 'parser'
require_relative 'assembler'
require_relative 'objectwriter'
require 'optparse'
require 'ostruct'
module Asm
class CommandLine
def initialize
options = OpenStruct.new
options.output_file = "a.out"
options.target = :arm
opts = OptionParser.new do |opts|
opts.banner = "Usage: as [options] <input file>"
opts.separator ""
opts.separator "Options:"
opts.on("-t", "--target TARGET",
"Specify target architecture (arm [default], ttk91)") { |o|
options.target = o.to_sym
if (not [:arm, :ttk91].include?(options.target))
puts opts
exit
end
}
opts.on("-o", "--output FILENAME",
"Specify output filename for object file") { |o|
options.output_file = o
}
opts.on("-s", "--show-ast",
"Show parse tree") { |o|
options.show_ast = true
}
opts.on_tail("-h", "--help", "Show this message") {
puts opts
exit
}
end
opts.parse!(ARGV)
options.input_file = ARGV.shift
if (not options.input_file)
puts opts
exit
end
@options = options
end
attr_reader :options
def run
begin
if (options.input_file == '-')
code = $stdin.read
else
code = File.read(options.input_file)
end
rescue => err
puts 'as: could not read input file: ' + err.message
exit 2
end
begin
ast = Asm::Parser.parse(code)
rescue Asm::ParseError => err
puts 'as: parse error on line %d, column %d' % [err.line+1, err.column+1]
line = code.split("\n")[err.line]
puts line.gsub(/\s/, ' ')
puts ' ' * (err.column-1) + '^'
puts ' ' + err.message
exit 3
end
if (options.show_ast)
require 'pp'
pp ast
exit 0
end
case options.target
when :arm
require_relative 'arm_assembler.rb'
as_module = Asm::ARM
as_target = Elf::Constants::TARGET_ARM
when :ttk91
require_relative 'ttk91_assembler.rb'
as_module = Asm::TTK91
as_target = Elf::Constants::TARGET_TTK91
end
asm = Asm::AstAssembler.new(as_module)
begin
asm.load_ast ast
data = StringIO.new
asm.assemble(data)
symbols = asm.symbols
rescue Asm::AssemblyError => err
if (err.node)
puts 'as: assembly error on line %d, column %d' % [
err.node.line+1, err.node.column+1]
line = code.split("\n")[err.node.line]
puts line.gsub(/\s/, ' ')
puts ' ' * (err.node.column-1) + '^'
puts ' ' + err.message
else
puts 'as: ' + err.message
end
exit 4
end
writer = Asm::ObjectWriter.new(as_target)
writer.set_text data.string
reloc_name_ref = {}
symbols.each { |symbol|
label = symbol[:label]
if (label.extern?)
reloc_name_ref[label] = symbol[:name]
writer.add_reloc_symbol symbol[:name]
else
writer.add_symbol symbol[:name], symbol[:label].address, symbol[:linkage]
end
}
asm.relocations.each { |reloc|
writer.add_reloc reloc.position, reloc_name_ref[reloc.label], reloc.type
}
begin
writer.save(options.output_file)
rescue => err
puts 'as: cannot save output file: ' + err.message
end
end
end
end
if (__FILE__ == $0)
Asm::CommandLine.new.run
end

11
lib/asm/data_object.rb Normal file
View File

@ -0,0 +1,11 @@
module Asm
class DataObject
def initialize(data)
@data = data
end
def assemble(io, as)
io << @data
end
end
end

32
lib/asm/label_object.rb Normal file
View File

@ -0,0 +1,32 @@
module Asm
class LabelObject
def initialize
@address = nil
@extern = false
end
attr_writer :address
def address
return 0 if extern?
if (@address.nil?)
raise 'Tried to use label object that has not been set'
end
@address
end
def assemble(io, as)
self.address = io.tell
end
def extern?
@extern
end
def extern!
@extern = true
end
end
end

48
lib/asm/object_writer.rb Normal file
View File

@ -0,0 +1,48 @@
require_relative 'elfobject'
module Asm
class ObjectWriter
def initialize(target)
@object = ELF::ObjectFile.new(target)
sym_strtab = ELF::StringTableSection.new(".strtab")
@object.add_section sym_strtab
@symbol_table = ELF::SymbolTableSection.new(".symtab", sym_strtab)
@object.add_section @symbol_table
@text = ELF::TextSection.new(".text")
@object.add_section @text
@reloc_table = ELF::RelocationTableSection.new(".text.rel", @symbol_table, @text)
@object.add_section @reloc_table
end
def set_text(text)
@text.text = text
add_symbol "_start", 0
end
def add_symbol(name, offset, linkage = Elf::Constants::STB_GLOBAL)
@symbol_table.add_func_symbol name, offset, @text, linkage
end
def add_reloc_symbol(name)
@symbol_table.add_func_symbol name, 0, nil, Elf::Constants::STB_GLOBAL
end
def add_reloc(offset, label, type)
@reloc_table.add_reloc offset, label, type
end
def save(filename)
File.open(filename, 'wb') { |fp|
write fp
}
end
def write(io)
@object.write io
end
end
end

269
lib/asm/parser.rb Normal file
View File

@ -0,0 +1,269 @@
require_relative 'str_scanner'
module AS
class ParseError < StandardError
def initialize(message, s)
super(message)
@line = s.line
@column = s.column
end
attr_reader :line, :column
end
end
class Asm::Parser
def initialize(str)
scanner = Asm::Scanner.new(str)
@ast = parse_toplevel scanner
end
attr_reader :ast
def self.parse(str)
new(str).ast
end
class Node
def initialize(s = nil)
if (s)
@line = s.prev_line
@column = s.prev_column
else
@line = 0
@column = 0
end
yield self if block_given?
end
attr_reader :line, :column
end
class ToplevelNode < Node
attr_accessor :children
end
def parse_toplevel(s)
node = ToplevelNode.new(s)
node.children = []
while (not s.eos?)
node.children << parse(s)
end
node
end
def parse(s)
s.scan /\s*/
node = nil
%w(comment directive label instruction).each { |em|
if (node = send('parse_'+em, s))
break
end
}
raise Asm::ParseError.new('could not parse element', s) unless node
s.scan /\s*/
node
end
class CommentNode < Node; end
def parse_comment(s)
if (s.scan(/;.*?$/))
CommentNode.new(s)
end
end
class DirectiveNode < Node
attr_accessor :name, :value
end
def parse_directive(s)
if (m = s.scan(/\.(\w+)(?:(?!$)\s+(.+)\s*?$)?/))
DirectiveNode.new(s) { |n|
n.name = m[0]
n.value = m[1]
}
end
end
class LabelNode < Node
attr_accessor :name
end
def parse_label(s)
if (m = s.scan(/(\/*\w+):/))
LabelNode.new(s) { |n|
n.name = m[0]
}
end
end
class InstructionNode < Node
attr_accessor :opcode, :args
end
def parse_instruction(s)
if (m = s.scan(/(\w+)/))
node = InstructionNode.new(s) { |n|
n.opcode = m[0]
n.args = []
}
if (not s.scan(/\s*($|;)/))
loop {
arg = parse_arg(s)
node.args << arg
break if not s.scan(/\s*,/)
}
end
node
end
end
class ArgNode < Node
end
def parse_arg(s)
s.scan /\s*/
node = nil
%w(reference register register_list num_literal label_ref).each { |em|
if (node = send('parse_'+em, s))
break
end
}
raise Asm::ParseError.new('expected argument but none found', s) unless node
if (node2 = parse_arg_op(s))
node2.argument = node
node = node2
end
s.scan /\s*/
node
end
def parse_arg_op(s)
s.scan /\s*/
node = nil
%w(shift math).each do |em|
if (node = send('parse_'+em, s))
break
end
end
s.scan /\s*/
node
end
class ShiftNode < Node
attr_accessor :type, :value, :argument
end
def parse_shift(s)
if (m = s.scan(/(lsl|lsr|asr|ror|rrx)\s+/i))
op = m[0].downcase
if (op == 'rrx' or arg = parse_arg(s))
ShiftNode.new(s) { |n|
n.type = m[0].downcase
n.value = arg
}
else
nil
end
end
end
class MathNode < Node
attr_accessor :left, :right, :op
alias_method :argument, :left
alias_method :argument=, :left=
end
def parse_math(s)
if (m = s.scan_str(/[\+\-]/))
if (arg1 = parse_arg(s))
MathNode.new(s) do |n|
n.right = arg1
n.op = m
end
else
raise Asm::ParseError.new('expected right side for arithmetic op', s)
end
end
end
REGISTER_REGEXP = Regexp.union(*%w(r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12
r13 r14 r15 a1 a2 a3 a4 v1 v2 v3 v4 v5 v6
rfp sl fp ip sp lr pc
))
class RegisterArgNode < ArgNode
attr_accessor :name
end
def parse_register(s)
if (m = s.scan_str(REGISTER_REGEXP))
RegisterArgNode.new(s) { |n|
n.name = m
}
end
end
class RegisterListArgNode < ArgNode
attr_accessor :registers
end
def parse_register_list(s)
if (m = s.scan(/\{/))
node = RegisterListArgNode.new(s) do |n|
n.registers = []
end
loop do
s.scan /\s*/
reg = parse_register(s)
if (not reg)
return nil
end
s.scan /\s*,?/
node.registers << reg
if (s.scan(/\}/))
break
end
end
node
end
end
class NumLiteralArgNode < ArgNode
attr_accessor :value
end
class NumEquivAddrArgNode < NumLiteralArgNode
end
def parse_num_literal(s)
if (m = s.scan(/(=?)#(-?(?:0x)?[0-9A-Fa-f]+)/))
(m[0] == '=' ? NumEquivAddrArgNode : NumLiteralArgNode).new(s) { |n|
n.value = Integer(m[1])
}
end
end
class LabelRefArgNode < ArgNode
attr_accessor :label, :label_object
end
class LabelEquivAddrArgNode < LabelRefArgNode
end
def parse_label_ref(s)
if (m = s.scan(/(=?)(\/*\w+)/))
(m[0] == '=' ? LabelEquivAddrArgNode : LabelRefArgNode).new(s) { |n|
n.label = m[1]
}
end
end
class ReferenceArgNode < ArgNode
attr_accessor :argument
end
def parse_reference(s)
if (m = s.scan(/\[/))
arg = parse_arg(s)
if (arg and s.scan(/\]/))
ReferenceArgNode.new(s) do |n|
n.argument = arg
end
end
end
end
end
if (__FILE__ == $0)
p Asm::Parser.parse ARGV[0]
end

12
lib/asm/relocation.rb Normal file
View File

@ -0,0 +1,12 @@
module Asm
class Relocation
def initialize(pos, label, type, handler)
@position = pos
@label = label
@type = type
@handler = handler
end
attr_reader :position, :label, :type, :handler
end
end

65
lib/asm/str_scanner.rb Normal file
View File

@ -0,0 +1,65 @@
module AS; end
if (not defined? RUBY_ENGINE or not RUBY_ENGINE == 'rbx')
class Regexp
def match_start(str, idx)
Regexp.compile('\A(?:'+source+')').match(str[idx..-1])
end
end
end
class Asm::Scanner
def initialize(str)
@string = str
@pos = 0
@line = 0
@column = 0
end
attr_accessor :string, :pos, :line, :column, :prev_line, :prev_column
def rest
string[pos..-1]
end
def advance_str(str)
self.prev_line = line
self.prev_column = column
self.pos += str.length
self.line += str.count("\n")
if (str.include?("\n"))
self.column = str.length - str.rindex("\n")
else
self.column += str.length
end
end
def scan(regexp)
if (match = regexp.match_start(rest, 0))
advance_str match.to_s
match.captures
else
nil
end
end
def scan_str(regexp)
if (match = regexp.match_start(rest, 0))
advance_str match.to_s
match.to_s
else
nil
end
end
def lookahead(regexp)
if (match = regexp.match_start(rest, 0))
true
else
false
end
end
def eos?
pos == string.length
end
end

123
lib/asm/streamreader.rb Normal file
View File

@ -0,0 +1,123 @@
module StreamReader
def read_binary(size, count, type)
d = __sr_read(size*count)
ret = d.unpack(type*count)
return ret if ret.length > 1
return ret[0]
end
def read_uint32(n=1)
return read_binary(4,n,'L')
end
def read_uint16(n=1)
return read_binary(2,n,'S')
end
def read_uint8(n=1)
return read_binary(1,n,'C')
end
def read_uint64(n=1)
return read_binary(8,n,'Q')
end
def read_sint64(n=1)
return read_binary(8,n,'q')
end
def read_cstr_fixed(length)
return __sr_read(length).gsub("\000",'')
end
def read_cstr_terminated
return __sr_gets(0.chr)
end
def read_cstr_prefixed
len = read_uint8
return __sr_read(len)
end
def read_float(n=1)
return read_binary(4,n,'F')
end
def read_double(n=1)
return read_binary(8,n,'D')
end
def read_sint16(n=1)
return read_binary(2,n,'s')
end
def read_sint32(n=1)
return read_binary(4,n,'l')
end
def read_data(len)
__sr_read(len)
end
end
module StreamWriter
def write_binary(values, type)
d = values.pack(type * values.length)
__sr_write(d)
end
def write_uint32(*args)
return write_binary(args,'L')
end
def write_uint16(*args)
return write_binary(args,'S')
end
def write_uint8(*args)
return write_binary(args,'C')
end
def write_uint64(*args)
return write_binary(args,'Q')
end
def write_sint64(*args)
return write_binary(args,'q')
end
def write_cstr_fixed(str, len)
return __sr_write(str.ljust(len, 0.chr))
end
def write_cstr_terminated(str)
return __sr_write(str + 0.chr)
end
def write_cstr_prefixed(str)
write_uint8(str.length)
return __sr_write(str)
end
def write_str(str)
return __sr_write(str)
end
def write_float(*args)
return write_binary(args,'F')
end
def write_double(*args)
return write_binary(args,'D')
end
def write_sint16(*args)
return write_binary(args,'s')
end
def write_sint32(*args)
return write_binary(args,'l')
end
def write_data(str)
return __sr_write(str)
end
end
class IO
include StreamReader
include StreamWriter
def __sr_read(len)
read(len)
end
def __sr_write(str)
write(str)
end
end
require 'stringio'
class StringIO
include StreamReader
include StreamWriter
def __sr_read(len)
read(len)
end
def __sr_write(str)
write(str)
end
end

View File

@ -0,0 +1,5 @@
class Numeric
def fits_u8?
self >= 0 and self <= 255
end
end

59
lib/elf/constants.rb Normal file
View File

@ -0,0 +1,59 @@
module ELF
module Constants
ET_NONE = 0
ET_REL = 1
ET_EXEC = 2
ET_DYN = 3
ET_CORE = 4
EM_NONE = 0
EM_M32 = 1
EM_SPARC = 2
EM_386 = 3
EM_68K = 4
EM_88K = 5
EM_860 = 7
EM_MIPS = 8
EM_ARM = 40
EV_NONE = 0
EV_CURRENT = 1
ELFCLASSNONE = 0
ELFCLASS32 = 1
ELFCLASS64 = 2
ELFDATANONE = 0
ELFDATA2LSB = 1
ELFDATA2MSB = 2
SHT_NULL = 0
SHT_PROGBITS = 1
SHT_SYMTAB = 2
SHT_STRTAB = 3
SHT_RELA = 4
SHT_HASH = 5
SHT_DYNAMIC = 6
SHT_NOTE = 7
SHT_NOBITS = 8
SHT_REL = 9
SHT_SHLIB = 10
SHT_DYNSYM = 11
SHF_WRITE = 0x1
SHF_ALLOC = 0x2
SHF_EXECINSTR = 0x4
STB_LOCAL = 0
STB_GLOBAL = 1
STB_WEAK = 2
ABI_SYSTEMV = 0
ABI_ARM = 0x61
ARM_INFLOOP = "\x08\xf0\x4f\xe2"
TARGET_ARM = [ELFCLASS32, ELFDATA2LSB, ABI_ARM, EM_ARM]
TARGET_X86 = [ELFCLASS32, ELFDATA2LSB, ABI_SYSTEMV, EM_386]
end
end

18
lib/elf/null_section.rb Normal file
View File

@ -0,0 +1,18 @@
module Elf
class NullSection < Section
def initialize
super('')
end
def write(io)
end
def type
Elf::Constants::SHT_NULL
end
def alignment
0
end
end
end

89
lib/elf/object_file.rb Normal file
View File

@ -0,0 +1,89 @@
module Elf
class ObjectFile
include ELF
def initialize(target)
@target = target
@sections = []
add_section NullSection.new
end
def add_section(section)
@sections << section
section.index = @sections.length - 1
end
def write(io)
io << "\x7fELF"
io.write_uint8 @target[0]
io.write_uint8 @target[1]
io.write_uint8 EV_CURRENT
io.write_uint8 @target[2]
io << "\x00" * 8 # pad
io.write_uint16 ET_REL
io.write_uint16 @target[3]
io.write_uint32 EV_CURRENT
io.write_uint32 0 # entry point
io.write_uint32 0 # no program header table
sh_offset_pos = io.tell
io.write_uint32 0 # section header table offset
io.write_uint32 0 # no flags
io.write_uint16 52 # header length
io.write_uint16 0 # program header length
io.write_uint16 0 # program header count
io.write_uint16 40 # section header length
shstrtab = StringTableSection.new(".shstrtab")
@sections << shstrtab
@sections.each { |section|
shstrtab.add_string section.name
}
io.write_uint16 @sections.length # section header count
io.write_uint16 @sections.length-1 # section name string table index
# write sections
section_data = []
@sections.each { |section|
offset = io.tell
section.write(io)
size = io.tell - offset
section_data << {:section => section, :offset => offset,
:size => size}
}
# write section headers
sh_offset = io.tell
section_data.each { |data|
section, offset, size = data[:section], data[:offset], data[:size]
# write header first
io.write_uint32 shstrtab.index_for(section.name)
io.write_uint32 section.type
io.write_uint32 section.flags
io.write_uint32 section.addr
if (section.type == SHT_NOBITS)
raise 'SHT_NOBITS not handled yet'
elsif (section.type == SHT_NULL)
io.write_uint32 0
io.write_uint32 0
else
io.write_uint32 offset
io.write_uint32 size
end
io.write_uint32 section.link
io.write_uint32 section.info
io.write_uint32 section.alignment
io.write_uint32 section.ent_size
}
io.seek sh_offset_pos
io.write_uint32 sh_offset
end
end
end

View File

@ -0,0 +1,41 @@
module Elf
class RelocationTableSection < Section
def initialize(name, symtab, text_section)
super(name)
@symtab = symtab
@text_section = text_section
@relocs = []
end
def add_reloc(offset, name, type)
@relocs << [offset, name, type]
end
def type
Elf::Constants::SHT_REL
end
def ent_size
8
end
def link
@symtab.index
end
def info
@text_section.index
end
def write(io)
@relocs.each { |reloc|
name_idx = @symtab.index_for_name(reloc[1])
io.write_uint32 reloc[0]
# +1 because entry number 0 is und
io.write_uint32 reloc[2] | ((name_idx+1) << 8)
}
end
end
end

31
lib/elf/section.rb Normal file
View File

@ -0,0 +1,31 @@
module Elf
class Section
def initialize(name)
@name = name
end
attr_accessor :name, :index
def type
raise 'Reimplement #type'
end
def flags
0
end
def addr
0
end
def link
0
end
def info
0
end
def alignment
1
end
def ent_size
0
end
end
end

View File

@ -0,0 +1,29 @@
module Elf
class StringTableSection < Section
def initialize(*args)
super
@string_data = "\x00"
@indices = {"" => 0}
end
def add_string(str)
return if @indices[str]
@indices[str] = @string_data.length
@string_data << str << "\x00"
end
def index_for(str)
@indices[str]
end
def write(io)
io << @string_data
end
def type
Elf::Constants::SHT_STRTAB
end
end
end

View File

@ -0,0 +1,77 @@
module Elf
class SymbolTableSection < Section
def initialize(name, strtab)
super(name)
@strtab = strtab
@symbols = []
end
def add_func_symbol(name, value, text_section, linkage)
@strtab.add_string name
arr = [name, value, text_section, linkage]
if (linkage == Elf::Constants::STB_LOCAL)
@symbols.unshift arr
else
@symbols.push arr
end
end
def index_for_name(name)
@symbols.each_with_index { |sym, idx|
if (sym[0] == name)
return idx
end
}
nil
end
def type
Elf::Constants::SHT_SYMTAB
end
def ent_size
16
end
def link
@strtab.index
end
def info
i = -1
@symbols.each_with_index { |sym, idx|
if (sym[4] == Elf::Constants::STB_LOCAL)
i = idx
end
}
i + 1
end
def write(io)
# write undefined symbol
io.write_uint32 0
io.write_uint32 0
io.write_uint32 0
io.write_uint8 Elf::Constants::STB_LOCAL << 4
io.write_uint8 0
io.write_uint16 0
# write other symbols
@symbols.each { |sym|
io.write_uint32 @strtab.index_for(sym[0])
io.write_uint32 sym[1]
io.write_uint32 0
io.write_uint8((sym[3] << 4) + 0)
io.write_uint8 0
if (sym[2])
io.write_uint16 sym[2].index
else
# undefined symbol
io.write_uint16 0
end
}
end
end
end

21
lib/elf/text_section.rb Normal file
View File

@ -0,0 +1,21 @@
module Elf
class TextSection < Section
attr_accessor :text
def write(io)
io << text
end
def type
Elf::Constants::SHT_PROGBITS
end
def flags
Elf::Constants::SHF_ALLOC | Elf::Constants::SHF_EXECINSTR
end
def alignment
4
end
end
end

27
test/code_generator.rb Normal file
View File

@ -0,0 +1,27 @@
if (__FILE__ == $0)
gen = Asm::ARMCodeGenerator.new
gen.instance_eval {
mov r0, 5
loop_start = label
loop_start.set!
subs r0, r0, 1
bne loop_start
bx lr
}
require 'objectwriter'
require 'tempfile'
writer = Asm::ObjectWriter.new(Elf::Constants::TARGET_ARM)
writer.set_text gen.assemble
begin
writer.save('arm_as_generated.o')
rescue => err
puts 'as: cannot save output file: ' + err.message
exit
end
end

19
test/elf_object.rb Normal file
View File

@ -0,0 +1,19 @@
if (__FILE__ == $0)
obj = ELF::ObjectFile.new ELF::TARGET_ARM
sym_strtab = ELF::StringTableSection.new(".strtab")
obj.add_section sym_strtab
symtab = ELF::SymbolTableSection.new(".symtab", sym_strtab)
obj.add_section symtab
text_section = ELF::TextSection.new(".text")
obj.add_section text_section
symtab.add_func_symbol "_start", 0, text_section, ELF::STB_GLOBAL
fp = File.open("test.o", "wb")
obj.write fp
fp.close
end