From 408b290b8a79b7c3f4c8bf30072b27f238b728d8 Mon Sep 17 00:00:00 2001 From: Torsten Ruger Date: Mon, 14 Apr 2014 18:09:56 +0300 Subject: [PATCH] adds first version of the expanded as assembler from mikko --- .gitignore | 2 +- lib/asm/README.markdown | 18 ++ lib/asm/arm/addr_table_object.rb | 34 ++++ lib/asm/arm/arm_assembler.rb | 54 ++++++ lib/asm/arm/builder_a.rb | 106 +++++++++++ lib/asm/arm/builder_b.rb | 113 ++++++++++++ lib/asm/arm/builder_d.rb | 52 ++++++ lib/asm/arm/code_generator.rb | 126 +++++++++++++ lib/asm/arm/instruction.rb | 149 +++++++++++++++ lib/asm/arm/instruction_tools.rb | 25 +++ lib/asm/assembler.rb | 43 +++++ lib/asm/assembly_error.rb | 12 ++ lib/asm/ast_assembler.rb | 85 +++++++++ lib/asm/command_line.rb | 149 +++++++++++++++ lib/asm/data_object.rb | 11 ++ lib/asm/label_object.rb | 32 ++++ lib/asm/object_writer.rb | 48 +++++ lib/asm/parser.rb | 269 ++++++++++++++++++++++++++++ lib/asm/relocation.rb | 12 ++ lib/asm/str_scanner.rb | 65 +++++++ lib/asm/streamreader.rb | 123 +++++++++++++ lib/crystal.rb | 5 + lib/elf/constants.rb | 59 ++++++ lib/elf/null_section.rb | 18 ++ lib/elf/object_file.rb | 89 +++++++++ lib/elf/relocation_table_section.rb | 41 +++++ lib/elf/section.rb | 31 ++++ lib/elf/string_table_section.rb | 29 +++ lib/elf/symbol_table_section.rb | 77 ++++++++ lib/elf/text_section.rb | 21 +++ test/code_generator.rb | 27 +++ test/elf_object.rb | 19 ++ 32 files changed, 1943 insertions(+), 1 deletion(-) create mode 100644 lib/asm/README.markdown create mode 100644 lib/asm/arm/addr_table_object.rb create mode 100644 lib/asm/arm/arm_assembler.rb create mode 100644 lib/asm/arm/builder_a.rb create mode 100644 lib/asm/arm/builder_b.rb create mode 100644 lib/asm/arm/builder_d.rb create mode 100644 lib/asm/arm/code_generator.rb create mode 100644 lib/asm/arm/instruction.rb create mode 100644 lib/asm/arm/instruction_tools.rb create mode 100644 lib/asm/assembler.rb create mode 100644 lib/asm/assembly_error.rb create mode 100644 lib/asm/ast_assembler.rb create mode 100644 lib/asm/command_line.rb create mode 100644 lib/asm/data_object.rb create mode 100644 lib/asm/label_object.rb create mode 100644 lib/asm/object_writer.rb create mode 100644 lib/asm/parser.rb create mode 100644 lib/asm/relocation.rb create mode 100644 lib/asm/str_scanner.rb create mode 100644 lib/asm/streamreader.rb create mode 100644 lib/elf/constants.rb create mode 100644 lib/elf/null_section.rb create mode 100644 lib/elf/object_file.rb create mode 100644 lib/elf/relocation_table_section.rb create mode 100644 lib/elf/section.rb create mode 100644 lib/elf/string_table_section.rb create mode 100644 lib/elf/symbol_table_section.rb create mode 100644 lib/elf/text_section.rb create mode 100644 test/code_generator.rb create mode 100644 test/elf_object.rb diff --git a/.gitignore b/.gitignore index a9c888fa..bb341ac4 100644 --- a/.gitignore +++ b/.gitignore @@ -28,7 +28,7 @@ pkg # # For MacOS: # -#.DS_Store +.DS_Store # For TextMate #*.tmproj diff --git a/lib/asm/README.markdown b/lib/asm/README.markdown new file mode 100644 index 00000000..c64135f9 --- /dev/null +++ b/lib/asm/README.markdown @@ -0,0 +1,18 @@ +Assembler in Ruby +================= + +Supporting arm, but aimed quite specifically at raspberry pi, arm v7, floating point included + +Outputs ELF object files, with relocation support. + +Constant table support exists but isn't very good. Some addressing modes +are not supported or only partially supported. + +Supported (pseudo)instructions: + +- adc, add, and, bic, eor, orr, rsb, rsc, sbc, sub, cmn, cmp, teq, tst, + mov, mvn, strb, str, ldrb, ldr, push, pop, b, bl, bx, swi +- Conditional versions of above + +Thanks to Cyndis for starting this arm/elf project in the first place: https://github.com/cyndis/as + diff --git a/lib/asm/arm/addr_table_object.rb b/lib/asm/arm/addr_table_object.rb new file mode 100644 index 00000000..89b21e34 --- /dev/null +++ b/lib/asm/arm/addr_table_object.rb @@ -0,0 +1,34 @@ +class Asm::ARM::AddrTableObject + def initialize + @table = [] + @const = [] + end + + # TODO don't create new entry if there's already an entry for the same label/const + def add_label(label) + d = [label, Asm::LabelObject.new] + @table << d + d[1] + end + + def add_const(const) + d = [const, Asm::LabelObject.new] + @const << d + d[1] + end + + def assemble(io, as) + @table.each do |pair| + target_label, here_label = *pair + here_label.assemble io, as + as.add_relocation io.tell, target_label, Asm::ARM::R_ARM_ABS32, + Asm::ARM::Instruction::RelocHandler + io.write_uint32 0 + end + @const.each do |pair| + const, here_label = *pair + here_label.assemble io, as + io.write_uint32 const + end + end +end diff --git a/lib/asm/arm/arm_assembler.rb b/lib/asm/arm/arm_assembler.rb new file mode 100644 index 00000000..bfbc683c --- /dev/null +++ b/lib/asm/arm/arm_assembler.rb @@ -0,0 +1,54 @@ +require_relative 'assembler' + +module Asm + module Arm + + # Relocation constants + # Note that in this assembler, a relocation simply means any + # reference to a label that can only be determined at assembly time + # or later (as in the normal meaning) + + R_ARM_PC24 = 0x01 + R_ARM_ABS32 = 0x02 + + # Unofficial (cant be used for extern relocations) + R_ARM_PC12 = 0xF0 + + # TODO actually find the closest somehow + def self.closest_addrtable(as) + as.objects.find do |obj| + obj.is_a?(Asm::ARM::AddrTableObject) + end || (raise Asm::AssemblyError.new('could not find addrtable to use', nil)) + end + + def self.write_resolved_relocation(io, addr, type) + case type + when R_ARM_PC24 + diff = addr - io.tell - 8 + packed = [diff >> 2].pack('l') + io << packed[0,3] + when R_ARM_ABS32 + packed = [addr].pack('l') + io << packed + when R_ARM_PC12 + diff = addr - io.tell - 8 + if (diff.abs > 2047) + raise Asm::AssemblyError.new('offset too large for R_ARM_PC12 relocation', + nil) + end + + val = diff.abs + sign = (diff>0)?1:0 + + curr = io.read_uint32 + io.seek(-4, IO::SEEK_CUR) + + io.write_uint32 (curr & ~0b00000000100000000000111111111111) | + val | (sign << 23) + else + raise 'unknown relocation type' + end + end + +end + diff --git a/lib/asm/arm/builder_a.rb b/lib/asm/arm/builder_a.rb new file mode 100644 index 00000000..fb033faa --- /dev/null +++ b/lib/asm/arm/builder_a.rb @@ -0,0 +1,106 @@ +module Asm + module Arm + # ADDRESSING MODE 1 + # Complete! + class BuilderA + include Asm::ARM::InstructionTools + + def initialize + @cond = 0b1110 + @inst_class = 0 + @i = 0 + @opcode = 0 + @s = 0 + @rn = 0 + @rd = 0 + @operand = 0 + end + attr_accessor :cond, :inst_class, :i, :opcode, :s, + :rn, :rd, :operand + + def self.make(inst_class, opcode, s) + a = new + a.inst_class = inst_class + a.opcode = opcode + a.s = s + a + end + + def calculate_u8_with_rr(arg) + parts = arg.value.to_s(2).rjust(32,'0').scan(/^(0*)(.+?)0*$/).flatten + pre_zeros = parts[0].length + imm_len = parts[1].length + if ((pre_zeros+imm_len) % 2 == 1) + u8_imm = (parts[1]+'0').to_i(2) + imm_len += 1 + else + u8_imm = parts[1].to_i(2) + end + if (u8_imm.fits_u8?) + # can do! + rot_imm = (pre_zeros+imm_len) / 2 + if (rot_imm > 15) + return nil + end + return u8_imm | (rot_imm << 8) + else + return nil + end + end + + # Build representation for source value + def build_operand(arg) + if (arg.is_a?(Asm::Parser::NumLiteralArgNode)) + if (arg.value.fits_u8?) + # no shifting needed + @operand = arg.value + @i = 1 + elsif (op_with_rot = calculate_u8_with_rr(arg)) + @operand = op_with_rot + @i = 1 + else + raise Asm::AssemblyError.new(Asm::ERRSTR_NUMERIC_TOO_LARGE, arg) + end + elsif (arg.is_a?(Asm::Parser::RegisterArgNode)) + @operand = reg_ref(arg) + @i = 0 + elsif (arg.is_a?(Asm::Parser::ShiftNode)) + rm_ref = reg_ref(arg.argument) + @i = 0 + shift_op = {'lsl' => 0b000, 'lsr' => 0b010, 'asr' => 0b100, + 'ror' => 0b110, 'rrx' => 0b110}[arg.type] + if (arg.type == 'ror' and arg.value.nil?) + # ror #0 == rrx + raise Asm::AssemblyError.new('cannot rotate by zero', arg) + end + + arg1 = arg.value + if (arg1.is_a?(Asm::Parser::NumLiteralArgNode)) + if (arg1.value >= 32) + raise Asm::AssemblyError.new('cannot shift by more than 31', arg1) + end + shift_imm = arg1.value + elsif (arg1.is_a?(Asm::Parser::RegisterArgNode)) + shift_op |= 0x1; + shift_imm = reg_ref(arg1) << 1 + elsif (arg.type == 'rrx') + shift_imm = 0 + end + + @operand = rm_ref | (shift_op << 4) | (shift_imm << 4+3) + else + raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg) + end + end + + def write(io, as) + val = operand | (rd << 12) | (rn << 12+4) | + (s << 12+4+4) | (opcode << 12+4+4+1) | + (i << 12+4+4+1+4) | (inst_class << 12+4+4+1+4+1) | + (cond << 12+4+4+1+4+1+2) + io.write_uint32 val + end + end + + end +end \ No newline at end of file diff --git a/lib/asm/arm/builder_b.rb b/lib/asm/arm/builder_b.rb new file mode 100644 index 00000000..17dddf33 --- /dev/null +++ b/lib/asm/arm/builder_b.rb @@ -0,0 +1,113 @@ +module Asm + module Arm + # ADDRESSING MODE 2 + # Implemented: immediate offset with offset=0 + class BuilderB + include Asm::ARM::InstructionTools + + def initialize + @cond = 0b1110 + @inst_class = 0 + @i = 0 #I flag (third bit) + @pre_post_index = 0 #P flag + @add_offset = 0 #U flag + @byte_access = 0 #B flag + @w = 0 #W flag + @load_store = 0 #L flag + @rn = 0 + @rd = 0 + @operand = 0 + end + attr_accessor :cond, :inst_class, :i, :pre_post_index, :add_offset, + :byte_access, :w, :load_store, :rn, :rd, :operand + + def self.make(inst_class, byte_access, load_store) + a = new + a.inst_class = inst_class + a.byte_access = byte_access + a.load_store = load_store + a + end + + class MathReferenceArgNode < Asm::Parser::ReferenceArgNode + attr_accessor :op, :right + end + def simplify_reference(arg) + node = MathReferenceArgNode.new + + if (arg.is_a?(Asm::Parser::MathNode)) + node.argument = arg.left + node.op = arg.op + node.right = arg.right + else + node.argument = arg + end + + node + end + + # Build representation for target address + def build_operand(arg1) + if (arg1.is_a?(Asm::Parser::ReferenceArgNode)) + argr = simplify_reference(arg1.argument) + arg = argr.argument + if (arg.is_a?(Asm::Parser::RegisterArgNode)) + @i = 0 + @pre_post_index = 1 + @w = 0 + @rn = reg_ref(arg) + @operand = 0 + + if (argr.op and argr.right.is_a?(Asm::Parser::NumLiteralArgNode)) + val = argr.right.value + if (val < 0) + @add_offset = 0 + val *= -1 + else + @add_offset = 1 + end + if (val.abs > 4095) + raise Asm::AssemblyError.new('reference offset too large/small (max 4095)', argr.right) + end + @operand = val + elsif (argr.op) + raise Asm::AssemblyError.new('reference offset must be an integer literal', argr.right) + end + else + raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg) + end + elsif (arg1.is_a?(Asm::Parser::LabelEquivAddrArgNode) or arg1.is_a?(Asm::Parser::NumEquivAddrArgNode)) + @i = 0 + @pre_post_index = 1 + @w = 0 + @rn = 15 # pc + @operand = 0 + @use_addrtable_reloc = true + @addrtable_reloc_target = arg1 + else + raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg1) + end + end + + def write(io, as, ast_asm, inst) + val = operand | (rd << 12) | (rn << 12+4) | + (load_store << 12+4+4) | (w << 12+4+4+1) | + (byte_access << 12+4+4+1+1) | (add_offset << 12+4+4+1+1+1) | + (pre_post_index << 12+4+4+1+1+1+1) | (i << 12+4+4+1+1+1+1+1) | + (inst_class << 12+4+4+1+1+1+1+1+1) | (cond << 12+4+4+1+1+1+1+1+1+2) + if (@use_addrtable_reloc) + closest_addrtable = Asm::ARM.closest_addrtable(as) + if (@addrtable_reloc_target.is_a?(Asm::Parser::LabelEquivAddrArgNode)) + obj = ast_asm.object_for_label(@addrtable_reloc_target.label, inst) + ref_label = closest_addrtable.add_label(obj) + elsif (@addrtable_reloc_target.is_a?(Asm::Parser::NumEquivAddrArgNode)) + ref_label = closest_addrtable.add_const(@addrtable_reloc_target.value) + end + as.add_relocation io.tell, ref_label, Asm::ARM::R_ARM_PC12, + Asm::ARM::Instruction::RelocHandler + end + io.write_uint32 val + end + end + end +end \ No newline at end of file diff --git a/lib/asm/arm/builder_d.rb b/lib/asm/arm/builder_d.rb new file mode 100644 index 00000000..f15d7b64 --- /dev/null +++ b/lib/asm/arm/builder_d.rb @@ -0,0 +1,52 @@ +module Asm + module Arm + # ADDRESSING MODE 4 + class BuilderD + include Asm::ARM::InstructionTools + + def initialize + @cond = 0b1110 + @inst_class = Asm::ARM::Instruction::OPC_STACK + @pre_post_index = 0 + @up_down = 0 + @s = 0 + @write_base = 0 + @store_load = 0 + @rn = 0 + @operand = 0 + end + attr_accessor :cond, :inst_class, :pre_post_index, :up_down, + :s, :write_base, :store_load, :rn, :operand + + def self.make(pre_post, up_down, write, store_load) + a = new + a.pre_post_index = pre_post + a.up_down = up_down + a.write_base = write + a.store_load = store_load + a + end + + # Build representation for source value + def build_operand(arg) + if (arg.is_a?(Asm::Parser::RegisterListArgNode)) + @operand = 0 + arg.registers.each do |reg_node| + reg = reg_ref(reg_node) + @operand |= (1 << reg) + end + else + raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg) + end + end + + def write(io, as) + val = operand | (rn << 16) | (store_load << 16+4) | + (write_base << 16+4+1) | (s << 16+4+1+1) | (up_down << 16+4+1+1+1) | + (pre_post_index << 16+4+1+1+1+1) | (inst_class << 16+4+1+1+1+1+2) | + (cond << 16+4+1+1+1+1+2+2) + io.write_uint32 val + end + end + end +end diff --git a/lib/asm/arm/code_generator.rb b/lib/asm/arm/code_generator.rb new file mode 100644 index 00000000..8a89f54d --- /dev/null +++ b/lib/asm/arm/code_generator.rb @@ -0,0 +1,126 @@ +require_relative 'arm_assembler' +require_relative 'parser' +require 'stringio' + +module Asm + module Arm + + class CodeGenerator + def initialize + @asm = Asm::Assembler.new + @externs = [] + end + + def data(str) + @asm.add_object Asm::DataObject.new(str) + end + + %w(r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + r13 r14 r15 a1 a2 a3 a4 v1 v2 v3 v4 v5 v6 + rfp sl fp ip sp lr pc + ).each { |reg| + define_method(reg) { + [:reg, reg] + } + } + + def instruction(name, *args) + node = Asm::Parser::InstructionNode.new + node.opcode = name.to_s + node.args = [] + + args.each { |arg| + if (arg.is_a?(Array)) + if (arg[0] == :reg) + node.args << Asm::Parser::RegisterArgNode.new { |n| + n.name = arg[1] + } + end + elsif (arg.is_a?(Integer)) + node.args << Asm::Parser::NumLiteralArgNode.new { |n| + n.value = arg + } + elsif (arg.is_a?(Symbol)) + node.args << Asm::Parser::LabelRefArgNode.new { |n| + n.label = arg.to_s + } + elsif (arg.is_a?(GeneratorLabel) or arg.is_a?(GeneratorExternLabel)) + node.args << arg + else + raise 'Invalid argument `%s\' for instruction' % arg.inspect + end + } + + @asm.add_object Asm::ARM::Instruction.new(node) + end + + %w(adc add and bic eor orr rsb rsc sbc sub + mov mvn cmn cmp teq tst b bl bx swi strb + ).each { |inst| + define_method(inst) { |*args| + instruction inst.to_sym, *args + } + define_method(inst+'s') { |*args| + instruction (inst+'s').to_sym, *args + } + %w(al eq ne cs mi hi cc pl ls vc + lt le ge gt vs + ).each { |cond_suffix| + define_method(inst+cond_suffix) { |*args| + instruction (inst+cond_suffix).to_sym, *args + } + define_method(inst+'s'+cond_suffix) { |*args| + instruction (inst+'s'+cond_suffix).to_sym, *args + } + } + } + + class GeneratorLabel < Asm::LabelObject + def initialize(asm) + @asm = asm + end + def set! + @asm.add_object self + end + end + + class GeneratorExternLabel < Asm::LabelObject + def initialize(name) + @name = name + extern! + end + attr_reader :name + end + + def label + GeneratorLabel.new(@asm) + end + + def label! + lbl = GeneratorLabel.new(@asm) + lbl.set! + lbl + end + + def extern(sym) + if (lbl = @externs.find { |extern| extern.name == sym }) + lbl + else + @externs << lbl = GeneratorExternLabel.new(sym) + @asm.add_object lbl + lbl + end + end + + def assemble + io = StringIO.new + @asm.assemble(io) + io.string + end + + def relocations + @asm.relocations + end + end + end +end \ No newline at end of file diff --git a/lib/asm/arm/instruction.rb b/lib/asm/arm/instruction.rb new file mode 100644 index 00000000..e3ab99d9 --- /dev/null +++ b/lib/asm/arm/instruction.rb @@ -0,0 +1,149 @@ +module Asm + module Arm + class Asm::ARM::Instruction + include Asm::ARM::InstructionTools + + COND_POSTFIXES = Regexp.union(%w(eq ne cs cc mi pl vs vc hi ls ge lt gt le al)).source + def initialize(node, ast_asm = nil) + @node = node + @ast_asm = ast_asm + opcode = node.opcode + args = node.args + + opcode = opcode.downcase + @cond = :al + if (opcode =~ /(#{COND_POSTFIXES})$/) + @cond = $1.to_sym + opcode = opcode[0..-3] + end + if (opcode =~ /s$/) + @s = true + opcode = opcode[0..-2] + else + @s = false + end + @opcode = opcode.downcase.to_sym + @args = args + end + attr_reader :opcode, :args + + OPC_DATA_PROCESSING = 0b00 + OPC_MEMORY_ACCESS = 0b01 + OPC_STACK = 0b10 + # These are used differently in the + # instruction encoders + OPCODES = { + :adc => 0b0101, :add => 0b0100, + :and => 0b0000, :bic => 0b1110, + :eor => 0b0001, :orr => 0b1100, + :rsb => 0b0011, :rsc => 0b0111, + :sbc => 0b0110, :sub => 0b0010, + + # for these Rn is sbz (should be zero) + :mov => 0b1101, + :mvn => 0b1111, + # for these Rd is sbz and S=1 + :cmn => 0b1011, + :cmp => 0b1010, + :teq => 0b1001, + :tst => 0b1000, + + :b => 0b1010, + :bl => 0b1011, + :bx => 0b00010010 + } + COND_BITS = { + :al => 0b1110, :eq => 0b0000, + :ne => 0b0001, :cs => 0b0010, + :mi => 0b0100, :hi => 0b1000, + :cc => 0b0011, :pl => 0b0101, + :ls => 0b1001, :vc => 0b0111, + :lt => 0b1011, :le => 0b1101, + :ge => 0b1010, :gt => 0b1100, + :vs => 0b0110 + } + + RelocHandler = Asm::ARM.method(:write_resolved_relocation) + + def assemble(io, as) + s = @s ? 1 : 0 + case opcode + when :adc, :add, :and, :bic, :eor, :orr, :rsb, :rsc, :sbc, :sub + a = BuilderA.make(OPC_DATA_PROCESSING, OPCODES[opcode], s) + a.cond = COND_BITS[@cond] + a.rd = reg_ref(args[0]) + a.rn = reg_ref(args[1]) + a.build_operand args[2] + a.write io, as + when :cmn, :cmp, :teq, :tst + a = BuilderA.make(OPC_DATA_PROCESSING, OPCODES[opcode], 1) + a.cond = COND_BITS[@cond] + a.rn = reg_ref(args[0]) + a.rd = 0 + a.build_operand args[1] + a.write io, as + when :mov, :mvn + a = BuilderA.make(OPC_DATA_PROCESSING, OPCODES[opcode], s) + a.cond = COND_BITS[@cond] + a.rn = 0 + a.rd = reg_ref(args[0]) + a.build_operand args[1] + a.write io, as + when :strb, :str + a = BuilderB.make(OPC_MEMORY_ACCESS, (opcode == :strb ? 1 : 0), 0) + a.cond = COND_BITS[@cond] + a.rd = reg_ref(args[1]) + a.build_operand args[0] + a.write io, as, @ast_asm, self + when :ldrb, :ldr + a = BuilderB.make(OPC_MEMORY_ACCESS, (opcode == :ldrb ? 1 : 0), 1) + a.cond = COND_BITS[@cond] + a.rd = reg_ref(args[0]) + a.build_operand args[1] + a.write io, as, @ast_asm, self + when :push, :pop + # downward growing, decrement before memory access + # official ARM style stack as used by gas + if (opcode == :push) + a = BuilderD.make(1,0,1,0) + else + a = BuilderD.make(0,1,1,1) + end + a.cond = COND_BITS[@cond] + a.rn = 13 # sp + a.build_operand args[0] + a.write io, as + when :b, :bl + arg = args[0] + if (arg.is_a?(Asm::Parser::NumLiteralArgNode)) + jmp_val = arg.value >> 2 + packed = [jmp_val].pack('l') + # signed 32-bit, condense to 24-bit + # TODO add check that the value fits into 24 bits + io << packed[0,3] + elsif (arg.is_a?(Asm::LabelObject) or arg.is_a?(Asm::Parser::LabelRefArgNode)) + arg = @ast_asm.object_for_label(arg.label, self) if arg.is_a?(Asm::Parser::LabelRefArgNode) + as.add_relocation(io.tell, arg, Asm::ARM::R_ARM_PC24, RelocHandler) + io << "\x00\x00\x00" + end + io.write_uint8 OPCODES[opcode] | (COND_BITS[@cond] << 4) + when :bx + rm = reg_ref(args[0]) + io.write_uint32 rm | (0b1111111111110001 << 4) | (OPCODES[:bx] << 16+4) | + (COND_BITS[@cond] << 16+4+8) + when :swi + arg = args[0] + if (arg.is_a?(Asm::Parser::NumLiteralArgNode)) + packed = [arg.value].pack('L')[0,3] + io << packed + io.write_uint8 0b1111 | (COND_BITS[@cond] << 4) + else + raise Asm::AssemblyError.new(Asm::ERRSTR_INVALID_ARG, arg) + end + else + raise Asm::AssemblyError.new("unknown instruction #{opcode}", @node) + end + end + end + end +end \ No newline at end of file diff --git a/lib/asm/arm/instruction_tools.rb b/lib/asm/arm/instruction_tools.rb new file mode 100644 index 00000000..4e437aca --- /dev/null +++ b/lib/asm/arm/instruction_tools.rb @@ -0,0 +1,25 @@ +module Asm + module Arm + module Asm::ARM::InstructionTools + def reg_ref(arg) + if (not arg.is_a?(Asm::Parser::RegisterArgNode)) + raise Asm::AssemblyError.new('argument must be a register', arg) + end + + ref = + {'r0' => 0, 'r1' => 1, 'r2' => 2, 'r3' => 3, 'r4' => 4, 'r5' => 5, + 'r6' => 6, 'r7' => 7, 'r8' => 8, 'r9' => 9, 'r10' => 10, 'r11' => 11, + 'r12' => 12, 'r13' => 13, 'r14' => 14, 'r15' => 15, 'a1' => 0, 'a2' => 1, + 'a3' => 2, 'a4' => 3, 'v1' => 4, 'v2' => 5, 'v3' => 6, 'v4' => 7, 'v5' => 8, + 'v6' => 9, 'rfp' => 9, 'sl' => 10, 'fp' => 11, 'ip' => 12, 'sp' => 13, + 'lr' => 14, 'pc' => 15}[arg.name.downcase] + + if (not ref) + raise Asm::AssemblyError.new('unknown register %s' % arg.name.downcase, arg) + end + + ref + end + end + end +end \ No newline at end of file diff --git a/lib/asm/assembler.rb b/lib/asm/assembler.rb new file mode 100644 index 00000000..bf40e2c3 --- /dev/null +++ b/lib/asm/assembler.rb @@ -0,0 +1,43 @@ +module Asm + ERRSTR_NUMERIC_TOO_LARGE = 'cannot fit numeric literal argument in operand' + ERRSTR_INVALID_ARG = 'invalid operand argument' + + class Assembler + def initialize + @objects = [] + @label_objects = [] + @label_callbacks = [] + @relocations = [] + end + attr_reader :relocations, :objects + + def add_object(obj) + @objects << obj + end + + def add_relocation(*args) + @relocations << Asm::Relocation.new(*args) + end + + def register_label_callback(label, io_pos, &block) + @label_callbacks << [label, io_pos, block] + end + + def assemble(io) + @objects.each do |obj| + obj.assemble io, self + end + + @relocations.delete_if do |reloc| + io.seek reloc.position + if (reloc.label.extern?) + reloc.handler.call(io, io.tell, reloc.type) + else + reloc.handler.call(io, reloc.label.address, reloc.type) + end + not reloc.label.extern? + end + end + end +end + diff --git a/lib/asm/assembly_error.rb b/lib/asm/assembly_error.rb new file mode 100644 index 00000000..ee4cf3f4 --- /dev/null +++ b/lib/asm/assembly_error.rb @@ -0,0 +1,12 @@ +module Asm + class AssemblyError < StandardError + def initialize(message, node) + super(message) + + @node = node + end + attr_reader :node + end + +end + \ No newline at end of file diff --git a/lib/asm/ast_assembler.rb b/lib/asm/ast_assembler.rb new file mode 100644 index 00000000..3438cb1b --- /dev/null +++ b/lib/asm/ast_assembler.rb @@ -0,0 +1,85 @@ +module Asm + class AstAssembler + def initialize(asm_arch) + @asm_arch = asm_arch + + @symbols = {} + @inst_label_context = {} + + @asm = Asm::Assembler.new + end + + def assembler + @asm + end + + def load_ast(ast) + label_breadcrumb = [] + ast.children.each do |cmd| + if (cmd.is_a?(Asm::Parser::LabelNode)) + m = /^\/+/.match(cmd.name) + count = m ? m[0].length : 0 + label_breadcrumb = label_breadcrumb[0,count] + label_breadcrumb << cmd.name[count..-1] + @asm.add_object object_for_label(label_breadcrumb.join('/')) + elsif (cmd.is_a?(Asm::Parser::InstructionNode)) + inst = @asm_arch::Instruction.new(cmd, self) + @asm.add_object inst + @inst_label_context[inst] = label_breadcrumb + elsif (cmd.is_a?(Asm::Parser::DirectiveNode)) + if (cmd.name == 'global') + symbol_for_label(cmd.value)[:linkage] = Elf::Constants::STB_GLOBAL + elsif (cmd.name == 'extern') + object_for_label(cmd.value).extern! + elsif (cmd.name == 'hexdata') + bytes = cmd.value.strip.split(/\s+/).map do |hex| + hex.to_i(16) + end.pack('C*') + @asm.add_object Asm::DataObject.new(bytes) + elsif (cmd.name == "asciz") + str = eval(cmd.value) + "\x00" + @asm.add_object Asm::DataObject.new(str) + elsif (defined?(Asm::ARM) and cmd.name == 'addrtable') + @asm.add_object Asm::ARM::AddrTableObject.new + else + raise Asm::AssemblyError.new('unknown directive', cmd) + end + end + end + end + + # instruction is user for label context + def symbol_for_label(name, instruction=nil) + if (instruction) + context = @inst_label_context[instruction] + m = /^(\/*)(.+)/.match(name) + breadcrumb = context[0,m[1].length] + breadcrumb << m[2] + qual_name = breadcrumb.join('/') + else + qual_name = name + end + + if (not @symbols[qual_name]) + @symbols[name] = {:label => Asm::LabelObject.new, :linkage => Elf::Constants::STB_LOCAL, :name => qual_name} + end + @symbols[qual_name] + end + + def object_for_label(name, instruction=nil) + symbol_for_label(name, instruction)[:label] + end + + def assemble(io) + @asm.assemble io + end + + def symbols + @symbols.values + end + + def relocations + @asm.relocations + end + end +end \ No newline at end of file diff --git a/lib/asm/command_line.rb b/lib/asm/command_line.rb new file mode 100644 index 00000000..ba665062 --- /dev/null +++ b/lib/asm/command_line.rb @@ -0,0 +1,149 @@ +require_relative 'parser' +require_relative 'assembler' +require_relative 'objectwriter' +require 'optparse' +require 'ostruct' + +module Asm + class CommandLine + def initialize + options = OpenStruct.new + options.output_file = "a.out" + options.target = :arm + + opts = OptionParser.new do |opts| + opts.banner = "Usage: as [options] " + + opts.separator "" + opts.separator "Options:" + + opts.on("-t", "--target TARGET", + "Specify target architecture (arm [default], ttk91)") { |o| + options.target = o.to_sym + if (not [:arm, :ttk91].include?(options.target)) + puts opts + exit + end + } + + opts.on("-o", "--output FILENAME", + "Specify output filename for object file") { |o| + options.output_file = o + } + + opts.on("-s", "--show-ast", + "Show parse tree") { |o| + options.show_ast = true + } + + opts.on_tail("-h", "--help", "Show this message") { + puts opts + exit + } + end + + opts.parse!(ARGV) + + options.input_file = ARGV.shift + if (not options.input_file) + puts opts + exit + end + + @options = options + end + attr_reader :options + + def run + begin + if (options.input_file == '-') + code = $stdin.read + else + code = File.read(options.input_file) + end + rescue => err + puts 'as: could not read input file: ' + err.message + exit 2 + end + + begin + ast = Asm::Parser.parse(code) + rescue Asm::ParseError => err + puts 'as: parse error on line %d, column %d' % [err.line+1, err.column+1] + line = code.split("\n")[err.line] + puts line.gsub(/\s/, ' ') + puts ' ' * (err.column-1) + '^' + puts ' ' + err.message + exit 3 + end + + if (options.show_ast) + require 'pp' + pp ast + exit 0 + end + + case options.target + when :arm + require_relative 'arm_assembler.rb' + as_module = Asm::ARM + as_target = Elf::Constants::TARGET_ARM + when :ttk91 + require_relative 'ttk91_assembler.rb' + as_module = Asm::TTK91 + as_target = Elf::Constants::TARGET_TTK91 + end + + asm = Asm::AstAssembler.new(as_module) + begin + asm.load_ast ast + data = StringIO.new + asm.assemble(data) + symbols = asm.symbols + rescue Asm::AssemblyError => err + if (err.node) + puts 'as: assembly error on line %d, column %d' % [ + err.node.line+1, err.node.column+1] + line = code.split("\n")[err.node.line] + puts line.gsub(/\s/, ' ') + puts ' ' * (err.node.column-1) + '^' + puts ' ' + err.message + else + puts 'as: ' + err.message + end + exit 4 + end + + writer = Asm::ObjectWriter.new(as_target) + writer.set_text data.string + + reloc_name_ref = {} + + symbols.each { |symbol| + label = symbol[:label] + if (label.extern?) + reloc_name_ref[label] = symbol[:name] + writer.add_reloc_symbol symbol[:name] + else + writer.add_symbol symbol[:name], symbol[:label].address, symbol[:linkage] + end + } + + asm.relocations.each { |reloc| + writer.add_reloc reloc.position, reloc_name_ref[reloc.label], reloc.type + } + + begin + writer.save(options.output_file) + rescue => err + puts 'as: cannot save output file: ' + err.message + end + end + end + +end + + +if (__FILE__ == $0) + Asm::CommandLine.new.run +end diff --git a/lib/asm/data_object.rb b/lib/asm/data_object.rb new file mode 100644 index 00000000..8c2a01cc --- /dev/null +++ b/lib/asm/data_object.rb @@ -0,0 +1,11 @@ +module Asm + class DataObject + def initialize(data) + @data = data + end + + def assemble(io, as) + io << @data + end + end +end \ No newline at end of file diff --git a/lib/asm/label_object.rb b/lib/asm/label_object.rb new file mode 100644 index 00000000..0834aeeb --- /dev/null +++ b/lib/asm/label_object.rb @@ -0,0 +1,32 @@ +module Asm + + class LabelObject + def initialize + @address = nil + @extern = false + end + attr_writer :address + + def address + return 0 if extern? + + if (@address.nil?) + raise 'Tried to use label object that has not been set' + end + @address + end + + def assemble(io, as) + self.address = io.tell + end + + def extern? + @extern + end + + def extern! + @extern = true + end + end + +end \ No newline at end of file diff --git a/lib/asm/object_writer.rb b/lib/asm/object_writer.rb new file mode 100644 index 00000000..9743c3ca --- /dev/null +++ b/lib/asm/object_writer.rb @@ -0,0 +1,48 @@ +require_relative 'elfobject' + +module Asm + + class ObjectWriter + def initialize(target) + @object = ELF::ObjectFile.new(target) + + sym_strtab = ELF::StringTableSection.new(".strtab") + @object.add_section sym_strtab + @symbol_table = ELF::SymbolTableSection.new(".symtab", sym_strtab) + @object.add_section @symbol_table + + @text = ELF::TextSection.new(".text") + @object.add_section @text + + @reloc_table = ELF::RelocationTableSection.new(".text.rel", @symbol_table, @text) + @object.add_section @reloc_table + end + + def set_text(text) + @text.text = text + add_symbol "_start", 0 + end + + def add_symbol(name, offset, linkage = Elf::Constants::STB_GLOBAL) + @symbol_table.add_func_symbol name, offset, @text, linkage + end + + def add_reloc_symbol(name) + @symbol_table.add_func_symbol name, 0, nil, Elf::Constants::STB_GLOBAL + end + + def add_reloc(offset, label, type) + @reloc_table.add_reloc offset, label, type + end + + def save(filename) + File.open(filename, 'wb') { |fp| + write fp + } + end + + def write(io) + @object.write io + end + end +end \ No newline at end of file diff --git a/lib/asm/parser.rb b/lib/asm/parser.rb new file mode 100644 index 00000000..24bdf581 --- /dev/null +++ b/lib/asm/parser.rb @@ -0,0 +1,269 @@ +require_relative 'str_scanner' + +module AS + class ParseError < StandardError + def initialize(message, s) + super(message) + + @line = s.line + @column = s.column + end + attr_reader :line, :column + end +end + +class Asm::Parser + def initialize(str) + scanner = Asm::Scanner.new(str) + + @ast = parse_toplevel scanner + end + attr_reader :ast + + def self.parse(str) + new(str).ast + end + + class Node + def initialize(s = nil) + if (s) + @line = s.prev_line + @column = s.prev_column + else + @line = 0 + @column = 0 + end + + yield self if block_given? + end + attr_reader :line, :column + end + + class ToplevelNode < Node + attr_accessor :children + end + def parse_toplevel(s) + node = ToplevelNode.new(s) + node.children = [] + while (not s.eos?) + node.children << parse(s) + end + node + end + + def parse(s) + s.scan /\s*/ + node = nil + %w(comment directive label instruction).each { |em| + if (node = send('parse_'+em, s)) + break + end + } + raise Asm::ParseError.new('could not parse element', s) unless node + s.scan /\s*/ + node + end + + class CommentNode < Node; end + def parse_comment(s) + if (s.scan(/;.*?$/)) + CommentNode.new(s) + end + end + + class DirectiveNode < Node + attr_accessor :name, :value + end + def parse_directive(s) + if (m = s.scan(/\.(\w+)(?:(?!$)\s+(.+)\s*?$)?/)) + DirectiveNode.new(s) { |n| + n.name = m[0] + n.value = m[1] + } + end + end + + class LabelNode < Node + attr_accessor :name + end + def parse_label(s) + if (m = s.scan(/(\/*\w+):/)) + LabelNode.new(s) { |n| + n.name = m[0] + } + end + end + + class InstructionNode < Node + attr_accessor :opcode, :args + end + def parse_instruction(s) + if (m = s.scan(/(\w+)/)) + node = InstructionNode.new(s) { |n| + n.opcode = m[0] + n.args = [] + } + if (not s.scan(/\s*($|;)/)) + loop { + arg = parse_arg(s) + node.args << arg + break if not s.scan(/\s*,/) + } + end + node + end + end + + class ArgNode < Node + end + def parse_arg(s) + s.scan /\s*/ + node = nil + %w(reference register register_list num_literal label_ref).each { |em| + if (node = send('parse_'+em, s)) + break + end + } + raise Asm::ParseError.new('expected argument but none found', s) unless node + + if (node2 = parse_arg_op(s)) + node2.argument = node + node = node2 + end + + s.scan /\s*/ + node + end + + def parse_arg_op(s) + s.scan /\s*/ + node = nil + %w(shift math).each do |em| + if (node = send('parse_'+em, s)) + break + end + end + s.scan /\s*/ + node + end + + class ShiftNode < Node + attr_accessor :type, :value, :argument + end + def parse_shift(s) + if (m = s.scan(/(lsl|lsr|asr|ror|rrx)\s+/i)) + op = m[0].downcase + if (op == 'rrx' or arg = parse_arg(s)) + ShiftNode.new(s) { |n| + n.type = m[0].downcase + n.value = arg + } + else + nil + end + end + end + + class MathNode < Node + attr_accessor :left, :right, :op + alias_method :argument, :left + alias_method :argument=, :left= + end + def parse_math(s) + if (m = s.scan_str(/[\+\-]/)) + if (arg1 = parse_arg(s)) + MathNode.new(s) do |n| + n.right = arg1 + n.op = m + end + else + raise Asm::ParseError.new('expected right side for arithmetic op', s) + end + end + end + + REGISTER_REGEXP = Regexp.union(*%w(r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 + r13 r14 r15 a1 a2 a3 a4 v1 v2 v3 v4 v5 v6 + rfp sl fp ip sp lr pc + )) + class RegisterArgNode < ArgNode + attr_accessor :name + end + def parse_register(s) + if (m = s.scan_str(REGISTER_REGEXP)) + RegisterArgNode.new(s) { |n| + n.name = m + } + end + end + + class RegisterListArgNode < ArgNode + attr_accessor :registers + end + def parse_register_list(s) + if (m = s.scan(/\{/)) + node = RegisterListArgNode.new(s) do |n| + n.registers = [] + end + loop do + s.scan /\s*/ + reg = parse_register(s) + if (not reg) + return nil + end + s.scan /\s*,?/ + + node.registers << reg + + if (s.scan(/\}/)) + break + end + end + node + end + end + + class NumLiteralArgNode < ArgNode + attr_accessor :value + end + class NumEquivAddrArgNode < NumLiteralArgNode + end + def parse_num_literal(s) + if (m = s.scan(/(=?)#(-?(?:0x)?[0-9A-Fa-f]+)/)) + (m[0] == '=' ? NumEquivAddrArgNode : NumLiteralArgNode).new(s) { |n| + n.value = Integer(m[1]) + } + end + end + + class LabelRefArgNode < ArgNode + attr_accessor :label, :label_object + end + class LabelEquivAddrArgNode < LabelRefArgNode + end + def parse_label_ref(s) + if (m = s.scan(/(=?)(\/*\w+)/)) + (m[0] == '=' ? LabelEquivAddrArgNode : LabelRefArgNode).new(s) { |n| + n.label = m[1] + } + end + end + + class ReferenceArgNode < ArgNode + attr_accessor :argument + end + def parse_reference(s) + if (m = s.scan(/\[/)) + arg = parse_arg(s) + if (arg and s.scan(/\]/)) + ReferenceArgNode.new(s) do |n| + n.argument = arg + end + end + end + end +end + +if (__FILE__ == $0) + p Asm::Parser.parse ARGV[0] +end diff --git a/lib/asm/relocation.rb b/lib/asm/relocation.rb new file mode 100644 index 00000000..a0383c9c --- /dev/null +++ b/lib/asm/relocation.rb @@ -0,0 +1,12 @@ +module Asm + class Relocation + def initialize(pos, label, type, handler) + @position = pos + @label = label + @type = type + @handler = handler + end + attr_reader :position, :label, :type, :handler + end + +end \ No newline at end of file diff --git a/lib/asm/str_scanner.rb b/lib/asm/str_scanner.rb new file mode 100644 index 00000000..093aae40 --- /dev/null +++ b/lib/asm/str_scanner.rb @@ -0,0 +1,65 @@ +module AS; end + +if (not defined? RUBY_ENGINE or not RUBY_ENGINE == 'rbx') + class Regexp + def match_start(str, idx) + Regexp.compile('\A(?:'+source+')').match(str[idx..-1]) + end + end +end + +class Asm::Scanner + def initialize(str) + @string = str + @pos = 0 + @line = 0 + @column = 0 + end + attr_accessor :string, :pos, :line, :column, :prev_line, :prev_column + + def rest + string[pos..-1] + end + + def advance_str(str) + self.prev_line = line + self.prev_column = column + self.pos += str.length + self.line += str.count("\n") + if (str.include?("\n")) + self.column = str.length - str.rindex("\n") + else + self.column += str.length + end + end + + def scan(regexp) + if (match = regexp.match_start(rest, 0)) + advance_str match.to_s + match.captures + else + nil + end + end + + def scan_str(regexp) + if (match = regexp.match_start(rest, 0)) + advance_str match.to_s + match.to_s + else + nil + end + end + + def lookahead(regexp) + if (match = regexp.match_start(rest, 0)) + true + else + false + end + end + + def eos? + pos == string.length + end +end diff --git a/lib/asm/streamreader.rb b/lib/asm/streamreader.rb new file mode 100644 index 00000000..d8173e61 --- /dev/null +++ b/lib/asm/streamreader.rb @@ -0,0 +1,123 @@ +module StreamReader + def read_binary(size, count, type) + d = __sr_read(size*count) + ret = d.unpack(type*count) + return ret if ret.length > 1 + return ret[0] + end + def read_uint32(n=1) + return read_binary(4,n,'L') + end + def read_uint16(n=1) + return read_binary(2,n,'S') + end + def read_uint8(n=1) + return read_binary(1,n,'C') + end + def read_uint64(n=1) + return read_binary(8,n,'Q') + end + def read_sint64(n=1) + return read_binary(8,n,'q') + end + def read_cstr_fixed(length) + return __sr_read(length).gsub("\000",'') + end + def read_cstr_terminated + return __sr_gets(0.chr) + end + def read_cstr_prefixed + len = read_uint8 + return __sr_read(len) + end + def read_float(n=1) + return read_binary(4,n,'F') + end + def read_double(n=1) + return read_binary(8,n,'D') + end + def read_sint16(n=1) + return read_binary(2,n,'s') + end + def read_sint32(n=1) + return read_binary(4,n,'l') + end + def read_data(len) + __sr_read(len) + end +end + +module StreamWriter + def write_binary(values, type) + d = values.pack(type * values.length) + __sr_write(d) + end + def write_uint32(*args) + return write_binary(args,'L') + end + def write_uint16(*args) + return write_binary(args,'S') + end + def write_uint8(*args) + return write_binary(args,'C') + end + def write_uint64(*args) + return write_binary(args,'Q') + end + def write_sint64(*args) + return write_binary(args,'q') + end + def write_cstr_fixed(str, len) + return __sr_write(str.ljust(len, 0.chr)) + end + def write_cstr_terminated(str) + return __sr_write(str + 0.chr) + end + def write_cstr_prefixed(str) + write_uint8(str.length) + return __sr_write(str) + end + def write_str(str) + return __sr_write(str) + end + def write_float(*args) + return write_binary(args,'F') + end + def write_double(*args) + return write_binary(args,'D') + end + def write_sint16(*args) + return write_binary(args,'s') + end + def write_sint32(*args) + return write_binary(args,'l') + end + def write_data(str) + return __sr_write(str) + end +end + +class IO + include StreamReader + include StreamWriter + + def __sr_read(len) + read(len) + end + def __sr_write(str) + write(str) + end +end + +require 'stringio' +class StringIO + include StreamReader + include StreamWriter + + def __sr_read(len) + read(len) + end + def __sr_write(str) + write(str) + end +end diff --git a/lib/crystal.rb b/lib/crystal.rb index e69de29b..f6e1850e 100644 --- a/lib/crystal.rb +++ b/lib/crystal.rb @@ -0,0 +1,5 @@ +class Numeric + def fits_u8? + self >= 0 and self <= 255 + end +end diff --git a/lib/elf/constants.rb b/lib/elf/constants.rb new file mode 100644 index 00000000..93e2ea50 --- /dev/null +++ b/lib/elf/constants.rb @@ -0,0 +1,59 @@ +module ELF + module Constants + ET_NONE = 0 + ET_REL = 1 + ET_EXEC = 2 + ET_DYN = 3 + ET_CORE = 4 + + EM_NONE = 0 + EM_M32 = 1 + EM_SPARC = 2 + EM_386 = 3 + EM_68K = 4 + EM_88K = 5 + EM_860 = 7 + EM_MIPS = 8 + EM_ARM = 40 + + EV_NONE = 0 + EV_CURRENT = 1 + + ELFCLASSNONE = 0 + ELFCLASS32 = 1 + ELFCLASS64 = 2 + + ELFDATANONE = 0 + ELFDATA2LSB = 1 + ELFDATA2MSB = 2 + + SHT_NULL = 0 + SHT_PROGBITS = 1 + SHT_SYMTAB = 2 + SHT_STRTAB = 3 + SHT_RELA = 4 + SHT_HASH = 5 + SHT_DYNAMIC = 6 + SHT_NOTE = 7 + SHT_NOBITS = 8 + SHT_REL = 9 + SHT_SHLIB = 10 + SHT_DYNSYM = 11 + + SHF_WRITE = 0x1 + SHF_ALLOC = 0x2 + SHF_EXECINSTR = 0x4 + + STB_LOCAL = 0 + STB_GLOBAL = 1 + STB_WEAK = 2 + + ABI_SYSTEMV = 0 + ABI_ARM = 0x61 + + ARM_INFLOOP = "\x08\xf0\x4f\xe2" + + TARGET_ARM = [ELFCLASS32, ELFDATA2LSB, ABI_ARM, EM_ARM] + TARGET_X86 = [ELFCLASS32, ELFDATA2LSB, ABI_SYSTEMV, EM_386] + end +end \ No newline at end of file diff --git a/lib/elf/null_section.rb b/lib/elf/null_section.rb new file mode 100644 index 00000000..27f341fa --- /dev/null +++ b/lib/elf/null_section.rb @@ -0,0 +1,18 @@ +module Elf + class NullSection < Section + def initialize + super('') + end + + def write(io) + end + + def type + Elf::Constants::SHT_NULL + end + + def alignment + 0 + end + end +end diff --git a/lib/elf/object_file.rb b/lib/elf/object_file.rb new file mode 100644 index 00000000..f221ad7f --- /dev/null +++ b/lib/elf/object_file.rb @@ -0,0 +1,89 @@ +module Elf + class ObjectFile + include ELF + + def initialize(target) + @target = target + + @sections = [] + add_section NullSection.new + end + + def add_section(section) + @sections << section + section.index = @sections.length - 1 + end + + def write(io) + io << "\x7fELF" + io.write_uint8 @target[0] + io.write_uint8 @target[1] + io.write_uint8 EV_CURRENT + io.write_uint8 @target[2] + io << "\x00" * 8 # pad + + io.write_uint16 ET_REL + io.write_uint16 @target[3] + io.write_uint32 EV_CURRENT + io.write_uint32 0 # entry point + io.write_uint32 0 # no program header table + sh_offset_pos = io.tell + io.write_uint32 0 # section header table offset + io.write_uint32 0 # no flags + io.write_uint16 52 # header length + io.write_uint16 0 # program header length + io.write_uint16 0 # program header count + io.write_uint16 40 # section header length + + shstrtab = StringTableSection.new(".shstrtab") + @sections << shstrtab + @sections.each { |section| + shstrtab.add_string section.name + } + + io.write_uint16 @sections.length # section header count + + io.write_uint16 @sections.length-1 # section name string table index + + # write sections + + section_data = [] + @sections.each { |section| + offset = io.tell + section.write(io) + size = io.tell - offset + section_data << {:section => section, :offset => offset, + :size => size} + } + + # write section headers + + sh_offset = io.tell + + section_data.each { |data| + section, offset, size = data[:section], data[:offset], data[:size] + # write header first + io.write_uint32 shstrtab.index_for(section.name) + io.write_uint32 section.type + io.write_uint32 section.flags + io.write_uint32 section.addr + if (section.type == SHT_NOBITS) + raise 'SHT_NOBITS not handled yet' + elsif (section.type == SHT_NULL) + io.write_uint32 0 + io.write_uint32 0 + else + io.write_uint32 offset + io.write_uint32 size + end + io.write_uint32 section.link + io.write_uint32 section.info + io.write_uint32 section.alignment + io.write_uint32 section.ent_size + } + + io.seek sh_offset_pos + io.write_uint32 sh_offset + end + end +end diff --git a/lib/elf/relocation_table_section.rb b/lib/elf/relocation_table_section.rb new file mode 100644 index 00000000..8cd65030 --- /dev/null +++ b/lib/elf/relocation_table_section.rb @@ -0,0 +1,41 @@ +module Elf + class RelocationTableSection < Section + def initialize(name, symtab, text_section) + super(name) + + @symtab = symtab + @text_section = text_section + + @relocs = [] + end + + def add_reloc(offset, name, type) + @relocs << [offset, name, type] + end + + def type + Elf::Constants::SHT_REL + end + + def ent_size + 8 + end + + def link + @symtab.index + end + + def info + @text_section.index + end + + def write(io) + @relocs.each { |reloc| + name_idx = @symtab.index_for_name(reloc[1]) + io.write_uint32 reloc[0] + # +1 because entry number 0 is und + io.write_uint32 reloc[2] | ((name_idx+1) << 8) + } + end + end +end diff --git a/lib/elf/section.rb b/lib/elf/section.rb new file mode 100644 index 00000000..d8f75083 --- /dev/null +++ b/lib/elf/section.rb @@ -0,0 +1,31 @@ +module Elf + class Section + def initialize(name) + @name = name + end + attr_accessor :name, :index + + def type + raise 'Reimplement #type' + end + def flags + 0 + end + def addr + 0 + end + def link + 0 + end + def info + 0 + end + def alignment + 1 + end + def ent_size + 0 + end + end + +end diff --git a/lib/elf/string_table_section.rb b/lib/elf/string_table_section.rb new file mode 100644 index 00000000..5de5f8c2 --- /dev/null +++ b/lib/elf/string_table_section.rb @@ -0,0 +1,29 @@ +module Elf + class StringTableSection < Section + def initialize(*args) + super + + @string_data = "\x00" + @indices = {"" => 0} + end + + def add_string(str) + return if @indices[str] + + @indices[str] = @string_data.length + @string_data << str << "\x00" + end + + def index_for(str) + @indices[str] + end + + def write(io) + io << @string_data + end + + def type + Elf::Constants::SHT_STRTAB + end + end +end diff --git a/lib/elf/symbol_table_section.rb b/lib/elf/symbol_table_section.rb new file mode 100644 index 00000000..f43c202e --- /dev/null +++ b/lib/elf/symbol_table_section.rb @@ -0,0 +1,77 @@ +module Elf + class SymbolTableSection < Section + def initialize(name, strtab) + super(name) + + @strtab = strtab + + @symbols = [] + end + + def add_func_symbol(name, value, text_section, linkage) + @strtab.add_string name + arr = [name, value, text_section, linkage] + if (linkage == Elf::Constants::STB_LOCAL) + @symbols.unshift arr + else + @symbols.push arr + end + end + + def index_for_name(name) + @symbols.each_with_index { |sym, idx| + if (sym[0] == name) + return idx + end + } + nil + end + + def type + Elf::Constants::SHT_SYMTAB + end + + def ent_size + 16 + end + + def link + @strtab.index + end + + def info + i = -1 + @symbols.each_with_index { |sym, idx| + if (sym[4] == Elf::Constants::STB_LOCAL) + i = idx + end + } + i + 1 + end + + def write(io) + # write undefined symbol + io.write_uint32 0 + io.write_uint32 0 + io.write_uint32 0 + io.write_uint8 Elf::Constants::STB_LOCAL << 4 + io.write_uint8 0 + io.write_uint16 0 + + # write other symbols + @symbols.each { |sym| + io.write_uint32 @strtab.index_for(sym[0]) + io.write_uint32 sym[1] + io.write_uint32 0 + io.write_uint8((sym[3] << 4) + 0) + io.write_uint8 0 + if (sym[2]) + io.write_uint16 sym[2].index + else + # undefined symbol + io.write_uint16 0 + end + } + end + end +end diff --git a/lib/elf/text_section.rb b/lib/elf/text_section.rb new file mode 100644 index 00000000..c98012ac --- /dev/null +++ b/lib/elf/text_section.rb @@ -0,0 +1,21 @@ +module Elf + class TextSection < Section + attr_accessor :text + + def write(io) + io << text + end + + def type + Elf::Constants::SHT_PROGBITS + end + + def flags + Elf::Constants::SHF_ALLOC | Elf::Constants::SHF_EXECINSTR + end + + def alignment + 4 + end + end +end diff --git a/test/code_generator.rb b/test/code_generator.rb new file mode 100644 index 00000000..65d5f593 --- /dev/null +++ b/test/code_generator.rb @@ -0,0 +1,27 @@ + +if (__FILE__ == $0) + gen = Asm::ARMCodeGenerator.new + + gen.instance_eval { + mov r0, 5 + loop_start = label + loop_start.set! + subs r0, r0, 1 + bne loop_start + bx lr + } + + require 'objectwriter' + require 'tempfile' + writer = Asm::ObjectWriter.new(Elf::Constants::TARGET_ARM) + writer.set_text gen.assemble + + + begin + writer.save('arm_as_generated.o') + rescue => err + puts 'as: cannot save output file: ' + err.message + exit + end + +end diff --git a/test/elf_object.rb b/test/elf_object.rb new file mode 100644 index 00000000..404342f8 --- /dev/null +++ b/test/elf_object.rb @@ -0,0 +1,19 @@ +if (__FILE__ == $0) + obj = ELF::ObjectFile.new ELF::TARGET_ARM + + sym_strtab = ELF::StringTableSection.new(".strtab") + obj.add_section sym_strtab + symtab = ELF::SymbolTableSection.new(".symtab", sym_strtab) + obj.add_section symtab + + text_section = ELF::TextSection.new(".text") + obj.add_section text_section + + symtab.add_func_symbol "_start", 0, text_section, ELF::STB_GLOBAL + + fp = File.open("test.o", "wb") + obj.write fp + + fp.close +end +