From cbefad339f81b8843af5aecd415c17dadb9245fe Mon Sep 17 00:00:00 2001 From: baishi Date: Sat, 30 May 2026 07:59:28 +0800 Subject: [PATCH] commit initial --- .gitignore | 14 + .../design.md | 392 ++++++++ .../requirements.md | 116 +++ .../tasklist.md | 96 ++ .../2026-05-20-tinycc-improvements/design.md | 461 +++++++++ .../requirements.md | 123 +++ .../tasklist.md | 80 ++ README.md | 240 +++++ TinyCC.sln | 43 + src/TinyCC.Cli/Program.cs | 78 ++ src/TinyCC.Cli/TinyCC.Cli.csproj | 14 + src/TinyCC.Core/C99/C99Extensions.cs | 221 +++++ src/TinyCC.Core/Class1.cs | 6 + .../CodeGen/OptimizedX64CodeGenerator.cs | 769 +++++++++++++++ src/TinyCC.Core/CodeGen/X64CodeGenerator.cs | 567 +++++++++++ src/TinyCC.Core/CompilerDriver.cs | 147 +++ src/TinyCC.Core/Diagnostics/ErrorInfo.cs | 83 ++ src/TinyCC.Core/Diagnostics/ErrorReporter.cs | 27 + src/TinyCC.Core/Diagnostics/IErrorReporter.cs | 12 + src/TinyCC.Core/IR/IrGenerator.cs | 374 ++++++++ src/TinyCC.Core/IR/IrInstructions.cs | 130 +++ src/TinyCC.Core/Lexer/Lexer.cs | 356 +++++++ src/TinyCC.Core/Lexer/Token.cs | 19 + src/TinyCC.Core/Lexer/TokenType.cs | 41 + src/TinyCC.Core/Parser/AstNodes.cs | 119 +++ src/TinyCC.Core/Parser/Parser.cs | 656 +++++++++++++ src/TinyCC.Core/Preprocessor/Preprocessor.cs | 652 +++++++++++++ src/TinyCC.Core/Semantic/SemanticAnalyzer.cs | 879 ++++++++++++++++++ src/TinyCC.Core/Target/ElfWriter.cs | 78 ++ src/TinyCC.Core/Target/PeWriter.cs | 257 +++++ src/TinyCC.Core/TinyCC.Core.csproj | 9 + test_output | Bin 0 -> 4159 bytes tests/TinyCC.E2ETests/E2ETestRunner.cs | 238 +++++ tests/TinyCC.E2ETests/E2ETests.cs | 58 ++ tests/TinyCC.E2ETests/TestCases.cs | 231 +++++ tests/TinyCC.E2ETests/TinyCC.E2ETests.csproj | 27 + tests/TinyCC.Tests/TinyCC.Tests.csproj | 27 + tests/TinyCC.Tests/UnitTest1.cs | 10 + tests/TinyCC.Tests/UnitTests.cs | 86 ++ 39 files changed, 7736 insertions(+) create mode 100644 .gitignore create mode 100644 .monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/design.md create mode 100644 .monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/requirements.md create mode 100644 .monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/tasklist.md create mode 100644 .monkeycode/specs/2026-05-20-tinycc-improvements/design.md create mode 100644 .monkeycode/specs/2026-05-20-tinycc-improvements/requirements.md create mode 100644 .monkeycode/specs/2026-05-20-tinycc-improvements/tasklist.md create mode 100644 README.md create mode 100644 TinyCC.sln create mode 100644 src/TinyCC.Cli/Program.cs create mode 100644 src/TinyCC.Cli/TinyCC.Cli.csproj create mode 100644 src/TinyCC.Core/C99/C99Extensions.cs create mode 100644 src/TinyCC.Core/Class1.cs create mode 100644 src/TinyCC.Core/CodeGen/OptimizedX64CodeGenerator.cs create mode 100644 src/TinyCC.Core/CodeGen/X64CodeGenerator.cs create mode 100644 src/TinyCC.Core/CompilerDriver.cs create mode 100644 src/TinyCC.Core/Diagnostics/ErrorInfo.cs create mode 100644 src/TinyCC.Core/Diagnostics/ErrorReporter.cs create mode 100644 src/TinyCC.Core/Diagnostics/IErrorReporter.cs create mode 100644 src/TinyCC.Core/IR/IrGenerator.cs create mode 100644 src/TinyCC.Core/IR/IrInstructions.cs create mode 100644 src/TinyCC.Core/Lexer/Lexer.cs create mode 100644 src/TinyCC.Core/Lexer/Token.cs create mode 100644 src/TinyCC.Core/Lexer/TokenType.cs create mode 100644 src/TinyCC.Core/Parser/AstNodes.cs create mode 100644 src/TinyCC.Core/Parser/Parser.cs create mode 100644 src/TinyCC.Core/Preprocessor/Preprocessor.cs create mode 100644 src/TinyCC.Core/Semantic/SemanticAnalyzer.cs create mode 100644 src/TinyCC.Core/Target/ElfWriter.cs create mode 100644 src/TinyCC.Core/Target/PeWriter.cs create mode 100644 src/TinyCC.Core/TinyCC.Core.csproj create mode 100644 test_output create mode 100644 tests/TinyCC.E2ETests/E2ETestRunner.cs create mode 100644 tests/TinyCC.E2ETests/E2ETests.cs create mode 100644 tests/TinyCC.E2ETests/TestCases.cs create mode 100644 tests/TinyCC.E2ETests/TinyCC.E2ETests.csproj create mode 100644 tests/TinyCC.Tests/TinyCC.Tests.csproj create mode 100644 tests/TinyCC.Tests/UnitTest1.cs create mode 100644 tests/TinyCC.Tests/UnitTests.cs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d95e072 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +## .NET +bin/ +obj/ + +## IDE +.vscode/ +.vs/ +*.userprefs +*.user +.idea/ + +## Test artifacts +test.c +test_output/ diff --git a/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/design.md b/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/design.md new file mode 100644 index 0000000..0f8df59 --- /dev/null +++ b/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/design.md @@ -0,0 +1,392 @@ +# Tiny C Compiler in C# - Technical Design + +Feature Name: tiny-c-compiler-csharp +Updated: 2026-05-20 + +## Description + +本项目实现一个使用 C# 编写的 C 语言编译器,参考 TCC 的设计理念。编译器将 C 源代码直接编译为 x86/x64 本地机器码,不经过 MSIL 中间层。项目目标是实现一个轻量级、编译速度快的 C 编译器,支持 C99 标准的核心子集。 + +## Architecture + +```mermaid +graph TD + A["C Source Code\n.c files"] --> B["Preprocessor\n预处理"] + B --> C["Lexer\n词法分析"] + C --> D["Parser\n语法分析"] + D --> E["Semantic Analyzer\n语义分析"] + E --> F["IR Generator\n中间表示生成"] + F --> G["Code Generator\n代码生成 x86/x64"] + G --> H["Object File\nPE/ELF"] + H --> I["Executable\n可执行文件"] + + J["Error Handler"] -.-> B + J -.-> C + J -.-> D + J -.-> E + J -.-> F + J -.-> G + + K["Symbol Table"] -.-> E + K -.-> F + K -.-> G + + L["Type System"] -.-> E + L -.-> F +``` + +### Architecture Overview + +编译器采用传统的单遍或多遍编译架构,分为以下主要阶段: + +1. **预处理阶段**:处理宏展开、条件编译、头文件包含 +2. **前端阶段**:词法分析、语法分析、语义分析 +3. **中间阶段**:IR 生成与优化 +4. **后端阶段**:x86/x64 代码生成与可执行文件输出 + +## Components and Interfaces + +### 1. Preprocessor (预处理器) + +**职责**: +- 处理 `#include` 指令,展开头文件 +- 处理 `#define` 宏定义和宏展开 +- 处理条件编译 `#ifdef`、`#ifndef`、`#endif`、`#if` +- 处理 `#pragma` 指令 + +**接口**: +```csharp +public interface IPreprocessor +{ + string Preprocess(string sourceCode, string sourceFile); + void AddIncludePath(string path); + void DefineMacro(string name, string? value); +} +``` + +### 2. Lexer (词法分析器) + +**职责**: +- 将预处理后的源代码分解为 token 流 +- 识别关键字、标识符、字面量、运算符、分隔符 +- 跳过注释和空白 +- 报告词法错误 + +**接口**: +```csharp +public interface ILexer +{ + IEnumerable Tokenize(string source); +} + +public enum TokenType +{ + Keyword, Identifier, IntLiteral, FloatLiteral, + CharLiteral, StringLiteral, Operator, Separator, + EOF, Error +} + +public record Token( + TokenType Type, + string Lexeme, + object? Value, + SourceLocation Location +); +``` + +### 3. Parser (语法分析器) + +**职责**: +- 实现递归下降解析器 +- 构建抽象语法树(AST) +- 处理 C 语言的运算符优先级 +- 报告语法错误 + +**接口**: +```csharp +public interface IParser +{ + AstNode Parse(); +} + +public abstract record AstNode(SourceLocation Location); +public record ProgramNode(List Declarations, SourceLocation Location) : AstNode(Location); +public record FunctionDeclarationNode( + TypeNode ReturnType, + string Name, + List Parameters, + BlockStatementNode Body, + SourceLocation Location +) : AstNode(Location); +``` + +### 4. Semantic Analyzer (语义分析器) + +**职责**: +- 类型检查与类型推断 +- 符号表管理 +- 作用域管理 +- 语义错误报告 + +**接口**: +```csharp +public interface ISemanticAnalyzer +{ + void Analyze(AstNode root); +} + +public interface ISymbolTable +{ + void EnterScope(); + void ExitScope(); + void AddSymbol(string name, Symbol symbol); + Symbol? Lookup(string name); +} +``` + +### 5. IR Generator (中间表示生成器) + +**职责**: +- 将 AST 转换为三地址码形式的 IR +- 构建控制流图(CFG) +- 支持基本优化(常量折叠、死代码消除) + +**接口**: +```csharp +public interface IIrGenerator +{ + IrProgram Generate(AstNode ast); +} + +public record IrProgram(List Functions); +public record IrFunction(string Name, List BasicBlocks); +public record IrBasicBlock(string Label, List Instructions); +public abstract record IrInstruction; +``` + +### 6. Code Generator (代码生成器) + +**职责**: +- 将 IR 转换为 x86/x64 机器码 +- 寄存器分配 +- 栈帧管理 +- 遵循平台调用约定 + +**接口**: +```csharp +public interface ICodeGenerator +{ + byte[] Generate(IrProgram program, TargetArchitecture architecture); +} + +public enum TargetArchitecture { X86, X64 } +``` + +### 7. Object File Writer (目标文件写入器) + +**职责**: +- 生成 PE 格式文件(Windows) +- 生成 ELF 格式文件(Linux) +- 处理重定位信息 +- 设置入口点 + +**接口**: +```csharp +public interface IObjectFileWriter +{ + byte[] WriteExecutable(byte[] machineCode, TargetPlatform platform); +} + +public enum TargetPlatform { WindowsX86, WindowsX64, LinuxX86, LinuxX64 } +``` + +### 8. Compiler Driver (编译器驱动) + +**职责**: +- 协调各个编译阶段 +- 处理命令行参数 +- 错误汇总与报告 +- 管理编译流程 + +**接口**: +```csharp +public class CompilerDriver +{ + public int Run(string[] args); + public CompilationResult Compile(CompilationOptions options); +} +``` + +## Data Models + +### Token 模型 + +```csharp +public readonly struct SourceLocation +{ + public string FileName { get; } + public int Line { get; } + public int Column { get; } +} + +public enum TokenType +{ + // 关键字 + Int, Char, Float, Double, Long, Short, Void, + If, Else, While, For, Do, Switch, Case, Default, + Break, Continue, Return, Struct, Union, Typedef, + // 字面量 + IntLiteral, FloatLiteral, CharLiteral, StringLiteral, + // 标识符 + Identifier, + // 运算符 + Plus, Minus, Star, Slash, Percent, + Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual, + Assign, PlusAssign, MinusAssign, StarAssign, SlashAssign, + And, Or, Not, BitAnd, BitOr, BitXor, + LeftShift, RightShift, + // 分隔符 + LeftParen, RightParen, LeftBrace, RightBrace, + LeftBracket, RightBracket, + Semicolon, Comma, Dot, Arrow, Colon, + // 预处理器 + HashInclude, HashDefine, HashIf, HashIfdef, HashIfndef, HashElse, HashEndif, + // 特殊 + EOF, Error +} +``` + +### Type System 模型 + +```csharp +public abstract record CType(string Name); +public record PrimitiveType(TypeKind Kind) : CType(Kind.ToString()) +{ + public enum TypeKind { Void, Char, Short, Int, Long, Float, Double } +} +public record PointerType(CType BaseType) : CType($"{BaseType}*"); +public record ArrayType(CType ElementType, int Size) : CType($"{ElementType}[{Size}]"); +public record StructType(string Name, List Fields) : CType(Name); +public record FunctionType(CType ReturnType, List ParameterTypes) : CType("function"); +``` + +### IR 指令模型 + +```csharp +public abstract record IrInstruction; +public record IrBinaryOp(IrTemp Dest, IrBinaryOpType Op, IrValue Left, IrValue Right) : IrInstruction; +public record IrUnaryOp(IrTemp Dest, IrUnaryOpType Op, IrValue Source) : IrInstruction; +public record IrLoad(IrTemp Dest, IrValue Address) : IrInstruction; +public record IrStore(IrValue Address, IrValue Value) : IrInstruction; +public record IrCall(IrTemp? Dest, string FunctionName, List Arguments) : IrInstruction; +public record IrJump(string TargetLabel) : IrInstruction; +public record IrBranch(IrValue Condition, string TrueLabel, string FalseLabel) : IrInstruction; +public record IrReturn(IrValue? Value) : IrInstruction; +public record IrLabel(string LabelName) : IrInstruction; + +public enum IrBinaryOpType { Add, Sub, Mul, Div, Mod, And, Or, Xor, Shl, Shr, Eq, Ne, Lt, Gt, Le, Ge } +public enum IrUnaryOpType { Neg, Not, BitNot, Deref } + +public abstract record IrValue; +public record IrTemp(string Name, CType Type) : IrValue; +public record IrConstant(long Value, CType Type) : IrValue; +public record IrGlobal(string Name, CType Type) : IrValue; +``` + +## Correctness Properties + +### 不变量 + +1. **类型安全**: 所有 IR 指令的操作数类型必须匹配 +2. **作用域正确性**: 符号查找必须遵循词法作用域规则 +3. **控制流完整性**: 所有基本块必须有明确的前驱和后继 +4. **寄存器一致性**: 代码生成前后寄存器状态必须一致 + +### 约束条件 + +1. 生成的机器码必须符合 x86/x64 指令集规范 +2. 函数调用必须遵循目标平台的 ABI(Application Binary Interface) +3. 栈帧布局必须保证栈指针对齐(x64 要求 16 字节对齐) +4. 可执行文件格式必须符合 PE 或 ELF 规范 + +## Error Handling + +### 错误分类 + +| 错误类型 | 阶段 | 处理方式 | +|---------|------|---------| +| 词法错误 | Lexer | 报告错误位置,跳过错误 token | +| 语法错误 | Parser | 报告期望的 token,尝试错误恢复 | +| 类型错误 | Semantic | 报告类型不匹配详情 | +| 未声明符号 | Semantic | 报告符号名称和位置 | +| 代码生成错误 | CodeGen | 报告不支持的 IR 指令 | + +### 错误报告接口 + +```csharp +public record ErrorInfo( + ErrorLevel Level, // Warning, Error, Fatal + string Message, + SourceLocation Location, + string? Suggestion = null +); + +public interface IErrorReporter +{ + void Report(ErrorInfo error); + bool HasErrors { get; } + IEnumerable GetErrors(); +} +``` + +## Test Strategy + +### 单元测试 + +1. **Lexer 测试**:验证各种 token 的正确识别 +2. **Parser 测试**:验证各种 C 语法的 AST 构建 +3. **Semantic 测试**:验证类型检查和符号表 +4. **IR 测试**:验证 AST 到 IR 的转换 +5. **CodeGen 测试**:验证 IR 到机器码的转换 + +### 集成测试 + +1. **端到端测试**:编译简单 C 程序并验证输出 +2. **回归测试**:使用 TCC 测试套件进行对比测试 +3. **性能测试**:测量编译速度和生成代码质量 + +### 测试用例示例 + +```c +// test_hello.c +#include + +int main() { + printf("Hello, World!\n"); + return 0; +} + +// test_arithmetic.c +int add(int a, int b) { + return a + b; +} + +int main() { + int result = add(3, 4); + return result - 7; // should return 0 +} + +// test_control_flow.c +int factorial(int n) { + if (n <= 1) return 1; + return n * factorial(n - 1); +} +``` + +## References + +[^1]: (TCC Source) - Tiny C Compiler 源码 https://repo.or.cz/tinycc.git +[^2]: (PE Format) - Microsoft PE 和 COFF 规范 https://docs.microsoft.com/en-us/windows/win32/debug/pe-format +[^3]: (ELF Format) - ELF 规范 https://refspecs.linuxfoundation.org/elf/elf.pdf +[^4]: (x64 ABI) - System V AMD64 ABI https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf +[^5]: (x86 Calling Conventions) - x86 调用约定 https://en.wikipedia.org/wiki/X86_calling_conventions diff --git a/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/requirements.md b/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/requirements.md new file mode 100644 index 0000000..34fa828 --- /dev/null +++ b/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/requirements.md @@ -0,0 +1,116 @@ +# Requirements Document + +## Introduction + +本项目旨在开发一个使用 C# 语言编写的 C 语言编译器,参考 TCC(Tiny C Compiler)的设计理念。编译器将 C 源代码直接编译为 x86/x64 本地机器码,而非 MSIL(Microsoft Intermediate Language)。目标是实现一个轻量级、快速的 C 编译器,支持 C99 标准的核心特性。 + +## Glossary + +- **TCC**: Tiny C Compiler,一个小型、快速的 C 编译器 +- **本地代码**: 直接可在 CPU 上执行的机器码(x86/x64) +- **MSIL**: Microsoft Intermediate Language,.NET 平台的中间语言 +- **代码生成**: 将中间表示转换为目标机器码的过程 +- **词法分析**: 将源代码分解为 token 流的过程 +- **语法分析**: 将 token 流构建为抽象语法树(AST)的过程 +- **语义分析**: 验证 AST 的语义正确性并进行类型检查 +- **目标平台**: x86 (32-bit) 和 x64 (64-bit) Windows/Linux + +## Requirements + +### Requirement 1: 词法分析器 + +**User Story:** AS 一个 C 编译器开发者,I WANT 实现词法分析器,SO THAT 可以将 C 源代码转换为 token 流 + +#### Acceptance Criteria + +1. WHEN 接收到 C 源代码文件,词法分析器 SHALL 输出 token 流 +2. WHEN 遇到 C 语言关键字(if, else, while, for, int, char 等),词法分析器 SHALL 识别为关键字 token +3. WHEN 遇到标识符,词法分析器 SHALL 识别为标识符 token +4. WHEN 遇到字面量(整数、浮点数、字符、字符串),词法分析器 SHALL 识别为相应的字面量 token +5. WHEN 遇到注释(单行 // 和多行 /* */),词法分析器 SHALL 忽略注释内容 +6. WHEN 遇到词法错误,词法分析器 SHALL 报告错误位置和错误信息 + +### Requirement 2: 语法分析器 + +**User Story:** AS 一个 C 编译器开发者,I WANT 实现语法分析器,SO THAT 可以将 token 流构建为抽象语法树(AST) + +#### Acceptance Criteria + +1. WHEN 接收到 token 流,语法分析器 SHALL 构建 AST +2. WHEN 遇到语法错误,语法分析器 SHALL 报告错误位置和错误描述 +3. WHILE 解析表达式,语法分析器 SHALL 正确处理运算符优先级 +4. WHEN 解析函数定义,语法分析器 SHALL 识别函数名、参数列表和函数体 +5. WHEN 解析控制流语句(if, while, for, switch),语法分析器 SHALL 构建对应的控制流 AST 节点 + +### Requirement 3: 语义分析器 + +**User Story:** AS 一个 C 编译器开发者,I WANT 实现语义分析器,SO THAT 可以验证 AST 的语义正确性 + +#### Acceptance Criteria + +1. WHEN 接收到 AST,语义分析器 SHALL 执行类型检查 +2. WHEN 遇到未声明的变量,语义分析器 SHALL 报告错误 +3. WHEN 遇到类型不匹配的赋值或运算,语义分析器 SHALL 报告类型错误 +4. WHEN 遇到函数调用,语义分析器 SHALL 验证函数签名和参数类型 +5. WHILE 解析作用域,语义分析器 SHALL 正确管理变量的作用域 + +### Requirement 4: 中间表示(IR)生成 + +**User Story:** AS 一个 C 编译器开发者,I WANT 生成中间表示,SO THAT 可以优化并转换为目标机器码 + +#### Acceptance Criteria + +1. WHEN 接收到语义分析后的 AST,IR 生成器 SHALL 输出三地址码形式的 IR +2. WHEN 生成 IR,IR 生成器 SHALL 处理控制流图的构建 +3. WHILE 生成 IR,IR 生成器 SHALL 支持基本的数据流分析 + +### Requirement 5: 代码生成器(x86/x64) + +**User Story:** AS 一个 C 编译器开发者,I WANT 实现代码生成器,SO THAT 可以将 IR 转换为 x86/x64 本地机器码 + +#### Acceptance Criteria + +1. WHEN 接收到 IR,代码生成器 SHALL 输出 x86/x64 机器码 +2. WHEN 处理函数调用,代码生成器 SHALL 遵循目标平台的调用约定(calling convention) +3. WHEN 处理局部变量,代码生成器 SHALL 分配栈空间 +4. WHEN 处理全局变量,代码生成器 SHALL 在数据段分配空间 +5. IF 目标平台为 x64,代码生成器 SHALL 使用 x64 寄存器 +6. IF 目标平台为 x86,代码生成器 SHALL 使用 x86 寄存器 + +### Requirement 6: 可执行文件生成 + +**User Story:** AS 一个 C 编译器用户,I WANT 编译器生成可执行文件,SO THAT 可以直接运行编译后的程序 + +#### Acceptance Criteria + +1. WHEN 编译完成,编译器 SHALL 生成 PE(Windows)或 ELF(Linux)格式的可执行文件 +2. WHEN 生成 PE 文件,编译器 SHALL 包含正确的 PE 头和节表 +3. WHEN 生成 ELF 文件,编译器 SHALL 包含正确的 ELF 头和节头 +4. WHEN 生成的可执行文件被操作系统加载,操作系统 SHALL 能够正确执行程序 + +### Requirement 7: 命令行接口 + +**User Story:** AS 一个 C 编译器用户,I WANT 使用命令行编译 C 文件,SO THAT 可以方便地集成到构建系统中 + +#### Acceptance Criteria + +1. WHEN 用户提供源文件路径,编译器 SHALL 编译并生成可执行文件 +2. WHEN 用户指定输出文件名,编译器 SHALL 使用指定的文件名 +3. WHEN 用户指定目标架构(x86/x64),编译器 SHALL 生成对应架构的代码 +4. IF 编译过程中发生错误,编译器 SHALL 输出错误信息并返回非零退出码 +5. WHEN 用户请求帮助信息,编译器 SHALL 显示使用说明 + +### Requirement 8: C99 核心特性支持 + +**User Story:** AS 一个 C 程序员,I WANT 编译器支持 C99 核心特性,SO THAT 可以编译现有的 C 代码 + +#### Acceptance Criteria + +1. WHEN 编译 C99 代码,编译器 SHALL 支持基本数据类型(int, char, float, double, long, short) +2. WHEN 编译 C99 代码,编译器 SHALL 支持指针和数组 +3. WHEN 编译 C99 代码,编译器 SHALL 支持结构体(struct)和联合体(union) +4. WHEN 编译 C99 代码,编译器 SHALL 支持函数定义和调用 +5. WHEN 编译 C99 代码,编译器 SHALL 支持控制流语句(if, else, while, for, do-while, switch, break, continue, return) +6. WHEN 编译 C99 代码,编译器 SHALL 支持宏定义(#define)和条件编译(#ifdef, #ifndef, #endif) +7. WHEN 编译 C99 代码,编译器 SHALL 支持头文件包含(#include) +8. IF 使用 C99 特性(如单行注释 //),编译器 SHALL 正确解析 diff --git a/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/tasklist.md b/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/tasklist.md new file mode 100644 index 0000000..397c49b --- /dev/null +++ b/.monkeycode/specs/2026-05-20-tiny-c-compiler-csharp/tasklist.md @@ -0,0 +1,96 @@ +# Implementation Task List + +## Phase 1: 项目初始化与基础架构 + +- [ ] 1.1 创建 .NET 8 控制台项目结构 +- [ ] 1.2 配置项目解决方案和模块划分 +- [ ] 1.3 实现基础的错误报告系统(ErrorReporter) +- [ ] 1.4 实现源代码位置追踪(SourceLocation) + +## Phase 2: 预处理器实现 + +- [ ] 2.1 实现 #include 指令处理 +- [ ] 2.2 实现 #define 宏定义和宏展开 +- [ ] 2.3 实现条件编译(#ifdef, #ifndef, #if, #else, #endif) +- [ ] 2.4 实现头文件搜索路径管理 + +## Phase 3: 词法分析器实现 + +- [ ] 3.1 定义所有 TokenType 枚举 +- [ ] 3.2 实现 Token 结构 +- [ ] 3.3 实现 Lexer 主类 +- [ ] 3.4 实现关键字识别 +- [ ] 3.5 实现标识符识别 +- [ ] 3.6 实现整数字面量识别 +- [ ] 3.7 实现浮点数字面量识别 +- [ ] 3.8 实现字符和字符串字面量识别 +- [ ] 3.9 实现运算符识别 +- [ ] 3.10 实现注释跳过 +- [ ] 3.11 编写 Lexer 单元测试 + +## Phase 4: 语法分析器实现 + +- [ ] 4.1 定义 AST 节点层次结构 +- [ ] 4.2 实现递归下降解析器框架 +- [ ] 4.3 实现表达式解析(处理运算符优先级) +- [ ] 4.4 实现语句解析 +- [ ] 4.5 实现函数声明解析 +- [ ] 4.6 实现类型声明解析 +- [ ] 4.7 实现控制流语句解析(if, while, for, switch) +- [ ] 4.8 实现结构体和联合体解析 +- [ ] 4.9 实现错误恢复机制 +- [ ] 4.10 编写 Parser 单元测试 + +## Phase 5: 语义分析器实现 + +- [ ] 5.1 实现类型系统(CType 层次结构) +- [ ] 5.2 实现符号表(SymbolTable) +- [ ] 5.3 实现作用域管理 +- [ ] 5.4 实现类型检查 +- [ ] 5.5 实现函数签名验证 +- [ ] 5.6 实现变量声明检查 +- [ ] 5.7 编写 Semantic Analyzer 单元测试 + +## Phase 6: 中间表示(IR)生成 + +- [ ] 6.1 定义 IR 指令集 +- [ ] 6.2 实现基本块(BasicBlock)结构 +- [ ] 6.3 实现控制流图(CFG) +- [ ] 6.4 实现 AST 到 IR 转换 +- [ ] 6.5 实现临时变量管理 +- [ ] 6.6 实现常量折叠优化 +- [ ] 6.7 编写 IR Generator 单元测试 + +## Phase 7: x86/x64 代码生成器 + +- [ ] 7.1 定义目标架构抽象 +- [ ] 7.2 实现寄存器管理 +- [ ] 7.3 实现栈帧管理 +- [ ] 7.4 实现 x86 指令编码 +- [ ] 7.5 实现 x64 指令编码 +- [ ] 7.6 实现 IR 到机器码转换 +- [ ] 7.7 实现函数调用约定(cdecl, sysv64) +- [ ] 7.8 实现寄存器分配 +- [ ] 7.9 编写 CodeGen 单元测试 + +## Phase 8: 可执行文件生成 + +- [ ] 8.1 实现 PE 文件格式写入(Windows) +- [ ] 8.2 实现 ELF 文件格式写入(Linux) +- [ ] 8.3 实现节表管理 +- [ ] 8.4 实现重定位处理 +- [ ] 8.5 实现入口点设置 + +## Phase 9: 编译器驱动与 CLI + +- [ ] 9.1 实现命令行参数解析 +- [ ] 9.2 实现编译流程编排 +- [ ] 9.3 实现错误汇总与报告 +- [ ] 9.4 实现多文件编译支持 + +## Phase 10: 测试与优化 + +- [ ] 10.1 编写端到端测试 +- [ ] 10.2 使用 TCC 测试套件进行回归测试 +- [ ] 10.3 性能测试与优化 +- [ ] 10.4 内存使用优化 diff --git a/.monkeycode/specs/2026-05-20-tinycc-improvements/design.md b/.monkeycode/specs/2026-05-20-tinycc-improvements/design.md new file mode 100644 index 0000000..654db1f --- /dev/null +++ b/.monkeycode/specs/2026-05-20-tinycc-improvements/design.md @@ -0,0 +1,461 @@ +# TinyCC 编译器改进计划 + +Feature Name: 2026-05-20-tinycc-improvements +Updated: 2026-05-20 + +## Description + +本改进计划涵盖 TinyCC 编译器的 9 个核心改进方向,分为三个阶段: +- **阶段一(基础完善)**:端到端测试、错误报告增强、仓库清理 +- **阶段二(功能完善)**:语义分析、预处理器集成、代码生成优化 +- **阶段三(高级特性)**:DWARF 调试信息、PE 格式支持、性能基准 + +## Architecture + +```mermaid +graph TD + subgraph "阶段一:基础完善" + A1[E2E 测试框架] --> A2[测试用例集合] + B1[ErrorReporter 增强] --> B2[代码上下文格式化] + C1[.gitignore 更新] --> C2[移除误提交文件] + end + + subgraph "阶段二:功能完善" + D1[SemanticAnalyzer 完善] --> D2[类型系统增强] + D1 --> D3[作用域管理优化] + E1[Preprocessor 集成] --> E2[宏展开引擎] + E1 --> E3[头文件搜索机制] + F1[优化 CodeGen] --> F2[寄存器分配器] + F1 --> F3[指令选择优化] + end + + subgraph "阶段三:高级特性" + G1[DWARF 生成器] --> G2[调试信息编码] + G1 --> G3[行号表生成] + H1[PE Writer 完善] --> H2[PE 头生成] + H1 --> H3[重定位表生成] + I1[性能基准] --> I2[编译时间测量] + I1 --> I3[执行时间测量] + end + + A2 -. 验证 .-> D1 + B2 -. 集成 .-> D1 + D3 -. 输入 .-> F2 + E3 -. 输出 .-> A2 +``` + +### 改进架构概览 + +改进计划遵循渐进式实现策略,每个阶段的输出为下一阶段提供基础: + +1. **阶段一**建立测试基础设施和用户体验改进 +2. **阶段二**完善编译器核心功能 +3. **阶段三**添加高级特性和性能监控 + +## Components and Interfaces + +### 1. 端到端测试框架 + +**职责**: +- 编译测试 C 源文件并验证生成的可执行文件 +- 管理测试用例输入和预期输出 +- 报告测试通过/失败状态 + +**接口**: +```csharp +public interface IE2ETestRunner +{ + Task RunTestAsync(TestCase testCase); + IEnumerable LoadTestsFromDirectory(string directory); +} + +public record TestCase( + string Name, + string SourceCode, + int ExpectedExitCode, + string? ExpectedOutput = null +); + +public record TestResult( + string TestCaseName, + bool Passed, + int ActualExitCode, + string? ActualOutput = null, + string? ErrorMessage = null +); +``` + +**实现策略**: +- 使用 `CompilerDriver` 编译源代码到临时 ELF 文件 +- 使用 `Process` 类执行生成的可执行文件 +- 比较实际输出/退出码与预期值 + +### 2. 错误报告增强 + +**职责**: +- 格式化错误信息,包含代码上下文 +- 生成可视化错误位置标记 +- 支持多错误汇总输出 + +**接口扩展**: +```csharp +public record ErrorInfo( + ErrorLevel Level, + string Message, + SourceLocation Location, + string? SourceLine = null, // 新增:出错的源代码行 + int? ColumnOffset = null, // 新增:错误在行内的偏移 + string? Suggestion = null // 新增:修复建议 +); + +public sealed class ErrorReporter : IErrorReporter +{ + private readonly List _errors = new(); + private readonly Dictionary _sourceCache = new(); // 新增:源代码缓存 + + public void Report(ErrorInfo error); + public void SetSourceLines(string fileName, string[] lines); // 新增:设置源代码行 + public string FormatErrors(); // 新增:格式化所有错误 +} +``` + +**格式化输出示例**: +``` +test.c:3:5: error: expected ';' before 'return' + 2 | int add(int a, int b) { + 3 | int x = a + b + | ^^^^^^^^ + 4 | return x; + | ~~~~~ + help: add ';' at the end of the statement +``` + +### 3. 语义分析器完善 + +**职责扩展**: +- 实现完整的类型检查系统 +- 支持嵌套作用域管理 +- 检测函数签名不匹配 + +**新增组件**: +```csharp +public sealed class TypeChecker +{ + public CType? CheckBinaryOperation(TokenType op, CType left, CType right, SourceLocation loc); + public CType? CheckUnaryOperation(TokenType op, CType operand, SourceLocation loc); + public bool IsCompatible(CType source, CType target); + public CType? PromoteType(CType type); // 类型提升 +} + +public sealed class ScopeManager +{ + private readonly Stack> _scopes = new(); + + public void EnterScope(); + public void ExitScope(); + public void DeclareSymbol(string name, Symbol symbol); + public Symbol? LookupSymbol(string name); + public bool IsDeclared(string name); +} +``` + +**类型检查规则**: +| 操作 | 左操作数 | 右操作数 | 结果类型 | +|------|---------|---------|---------| +| 算术运算 | 整数/浮点 | 整数/浮点 | 提升后的类型 | +| 比较运算 | 数值类型 | 数值类型 | int | +| 赋值 | 类型 T | 类型 S | T(S 必须可转换为 T) | + +### 4. 预处理器集成 + +**职责**: +- 处理 `#include`、`#define`、条件编译 +- 管理头文件搜索路径 +- 宏展开和参数替换 + +**接口**: +```csharp +public interface IPreprocessor +{ + string Preprocess(string sourceCode, string sourceFile); + void AddIncludePath(string path); + void DefineMacro(string name, string? value); + void UndefineMacro(string name); +} + +public sealed class Macro +{ + public string Name { get; } + public string? Value { get; } + public List? Parameters { get; } // 函数宏参数 + public string? Body { get; } +} +``` + +**集成到 CompilerDriver**: +```csharp +// 在 CompilerDriver.Compile 中 +var preprocessor = new Preprocessor(_errorReporter); +foreach (var includePath in options.IncludePaths) +{ + preprocessor.AddIncludePath(includePath); +} +var preprocessedSource = preprocessor.Preprocess(options.SourceFile); +var lexer = new Lexer(preprocessedSource, options.SourceFile, _errorReporter); +``` + +### 5. 代码生成优化 + +**职责**: +- 实现图着色寄存器分配 +- 指令选择和调度 +- 栈帧布局优化 + +**寄存器分配器接口**: +```csharp +public sealed class GraphColoringAllocator +{ + private readonly Dictionary> _interferenceGraph = new(); + private readonly Dictionary _allocation = new(); + private readonly HashSet _spilledVars = new(); + + public void Allocate(IrFunction function, string[] availableRegs); + public string? GetRegister(IrValue value); + public bool IsSpilled(IrValue value); +} +``` + +**优化验证策略**: +- 比较优化前后生成的机器码长度 +- 验证优化后程序的执行结果正确性 +- 测量溢出变量数量 + +### 6. DWARF 调试信息生成器 + +**职责**: +- 生成 DWARF 调试信息节 +- 编码源文件路径和行号映射 +- 生成变量和类型调试信息 + +**接口**: +```csharp +public sealed class DwarfGenerator +{ + private readonly List _debugInfo = new(); + private readonly List _lineTable = new(); + + public void AddFile(string fileName); + public void AddLineEntry(int fileIndex, int line, int address); + public void AddVariable(string name, CType type, int scopeLevel, int offset); + public byte[] GenerateDebugSection(); + public byte[] GenerateLineSection(); +} +``` + +**ELF 集成**: +- 在 `ElfWriter` 中添加 `.debug_info`、`.debug_line` 节 +- 更新节头表和字符串表 + +### 7. PE 写出器完善 + +**职责**: +- 生成完整的 PE32+ 文件头 +- 创建 `.text` 和 `.data` 节 +- 处理重定位和导入表 + +**PE 文件结构**: +``` +DOS Header (64 bytes) +PE Signature ("PE\0\0") +COFF File Header (20 bytes) +Optional Header (PE32+, 112 bytes) +Data Directories (16 entries) +Section Headers (40 bytes per section) +.text Section (代码) +.data Section (数据) +``` + +### 8. 性能基准测试框架 + +**职责**: +- 测量编译时间 +- 测量生成代码执行时间 +- 生成统计报告 + +**接口**: +```csharp +public sealed class BenchmarkRunner +{ + public BenchmarkResult RunCompilationBenchmark(string sourceFile, int iterations = 10); + public BenchmarkResult RunExecutionBenchmark(string executable, int iterations = 100); +} + +public record BenchmarkResult( + string TestName, + double MeanTimeMs, + double MedianTimeMs, + double StdDevMs, + int Iterations +); +``` + +## Data Models + +### 错误信息模型(增强) + +```csharp +public enum ErrorLevel +{ + Warning, + Error, + Fatal +} + +public readonly struct SourceLocation +{ + public string FileName { get; } + public int Line { get; } + public int Column { get; } + public int Length { get; } // 新增:错误跨度 +} +``` + +### 测试用例模型 + +```csharp +public record TestCase( + string Name, + string SourceCode, + int ExpectedExitCode, + string? ExpectedOutput = null, + string? ExpectedErrorPattern = null // 期望的错误模式 +); +``` + +### DWARF 调试信息模型 + +```csharp +public record DwarfInfoEntry( + uint Offset, + uint AbbrevCode, + Dictionary Attributes +); + +public record DwarfLineEntry( + int Address, + int FileIndex, + int Line, + int Column, + bool IsStatement, + bool IsEndOfSequence +); +``` + +## Correctness Properties + +### 不变量 + +1. **测试覆盖完整性**: 每个 C 语言特性至少有一个 E2E 测试用例 +2. **错误信息准确性**: 错误位置标记必须指向正确的源代码行和列 +3. **类型检查健全性**: 类型检查必须拒绝所有类型错误的程序 +4. **寄存器分配正确性**: 分配的寄存器不能干涉活跃变量 +5. **调试信息一致性**: 调试信息中的行号必须与实际代码位置匹配 + +### 约束条件 + +1. E2E 测试必须在 Linux x64 环境下运行 +2. 错误报告格式化器必须处理多字节字符 +3. 寄存器分配器必须遵循 System V AMD64 ABI +4. DWARF 信息必须兼容 gdb 7.0+ +5. PE 文件必须兼容 Windows 10+ 加载器 + +## Error Handling + +### 错误场景与处理策略 + +| 错误场景 | 检测阶段 | 处理方式 | +|---------|---------|---------| +| 头文件不存在 | 预处理 | 报告错误,提供搜索路径 | +| 宏重复定义 | 预处理 | 报告警告,使用新定义 | +| 未声明变量 | 语义分析 | 报告错误,标记位置 | +| 类型不匹配 | 语义分析 | 报告错误,显示期望和实际类型 | +| 寄存器溢出 | 代码生成 | 溢出到栈,更新栈帧布局 | +| DWARF 编码失败 | 调试信息生成 | 报告错误,继续编译 | +| PE 头生成失败 | 目标文件写入 | 报告错误,终止编译 | + +### 错误恢复策略 + +- **词法/语法错误**: 尝试跳过错误 token,继续解析 +- **语义错误**: 收集所有错误,一次性输出 +- **代码生成错误**: 立即终止,报告详细错误信息 + +## Test Strategy + +### 单元测试 + +1. **错误报告测试**: 验证错误信息格式化和代码上下文显示 +2. **类型检查测试**: 验证各种类型场景的检查逻辑 +3. **寄存器分配测试**: 验证图着色算法正确性 +4. **DWARF 编码测试**: 验证调试信息编码正确性 + +### 集成测试 + +1. **端到端测试**: 编译并运行测试 C 程序,验证输出 +2. **预处理器集成测试**: 验证宏展开和头文件包含 +3. **PE 格式测试**: 在 Windows 环境验证生成的 PE 文件 + +### E2E 测试用例集合 + +```c +// test_arithmetic.c - 算术运算测试 +int add(int a, int b) { return a + b; } +int main() { return add(3, 4) == 7 ? 0 : 1; } + +// test_control_flow.c - 控制流测试 +int main() { + int sum = 0; + for (int i = 1; i <= 10; i++) sum += i; + return sum == 55 ? 0 : 1; +} + +// test_functions.c - 函数调用测试 +int factorial(int n) { + if (n <= 1) return 1; + return n * factorial(n - 1); +} +int main() { return factorial(5) == 120 ? 0 : 1; } + +// test_pointers.c - 指针测试 +int main() { + int x = 42; + int *p = &x; + return *p == 42 ? 0 : 1; +} + +// test_arrays.c - 数组测试 +int main() { + int arr[3] = {1, 2, 3}; + return arr[1] == 2 ? 0 : 1; +} + +// test_macro.c - 宏测试 +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +int main() { return MAX(3, 5) == 5 ? 0 : 1; } +``` + +### 性能基准测试 + +- **编译时间基准**: 测量编译标准 C 文件的时间(如 `factorial.c`, `sort.c`) +- **执行时间基准**: 测量生成代码执行时间,与 gcc/clang 对比 +- **内存使用基准**: 测量编译过程中的内存峰值 + +## References + +[^1]: (DWARF Spec) - DWARF 调试标准格式 https://dwarfstd.org +[^2]: (PE Spec) - Microsoft PE 和 COFF 规范 https://docs.microsoft.com/en-us/windows/win32/debug/pe-format +[^3]: (System V AMD64 ABI) - x64 调用约定 https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf +[^4]: (ELF Spec) - ELF 规范 https://refspecs.linuxfoundation.org/elf/elf.pdf +[^5]: (CompilerDriver.cs#L18) - 当前编译器驱动实现 src/TinyCC.Core/CompilerDriver.cs +[^6]: (ErrorReporter.cs#L6) - 当前错误报告器实现 src/TinyCC.Core/Diagnostics/ErrorReporter.cs +[^7]: (SemanticAnalyzer.cs#L11) - 当前语义分析器实现 src/TinyCC.Core/Semantic/SemanticAnalyzer.cs +[^8]: (OptimizedX64CodeGenerator.cs#L12) - 当前优化代码生成器实现 src/TinyCC.Core/CodeGen/OptimizedX64CodeGenerator.cs diff --git a/.monkeycode/specs/2026-05-20-tinycc-improvements/requirements.md b/.monkeycode/specs/2026-05-20-tinycc-improvements/requirements.md new file mode 100644 index 0000000..b876629 --- /dev/null +++ b/.monkeycode/specs/2026-05-20-tinycc-improvements/requirements.md @@ -0,0 +1,123 @@ +# Requirements Document + +## Introduction + +本改进计划涵盖 TinyCC 编译器的 9 个核心改进方向,包括端到端测试、错误报告增强、代码清理、语义分析完善、预处理器集成、代码生成优化、调试信息支持、PE 格式支持及性能基准测试。 + +## Glossary + +- **TinyCC**: 本项目实现的轻量级 C 编译器 +- **E2E 测试**: 端到端测试,验证完整编译流程 +- **ELF**: Executable and Linkable Format,Linux 可执行文件格式 +- **PE**: Portable Executable,Windows 可执行文件格式 +- **DWARF**: 调试信息格式,支持源码级调试 +- **ABI**: Application Binary Interface,应用二进制接口 + +## Requirements + +### Requirement 1: 端到端编译测试 + +**User Story:** AS 一个编译器开发者,I WANT 验证完整编译流程生成的可执行文件能够正确运行,SO THAT 确保编译器各组件协同工作正常 + +#### Acceptance Criteria + +1. WHEN 提供包含 `main` 函数的 C 源代码,编译器 SHALL 生成可在 Linux 上执行的 ELF 文件 +2. WHEN 运行生成的 ELF 文件,程序 SHALL 返回正确的退出码 +3. WHEN 编译包含算术运算的 C 程序,执行程序 SHALL 输出正确的计算结果 +4. WHEN 编译包含函数调用的 C 程序,执行程序 SHALL 正确调用函数并返回结果 +5. WHEN 编译包含控制流语句(if/while/for)的 C 程序,执行程序 SHALL 正确执行控制流 + +### Requirement 2: 增强错误报告 + +**User Story:** AS 一个 C 程序员,I WANT 编译器提供包含代码上下文和位置提示的错误信息,SO THAT 能够快速定位和修复代码问题 + +#### Acceptance Criteria + +1. WHEN 报告编译错误,错误信息 SHALL 包含文件名、行号和列号 +2. WHEN 报告语法错误,错误信息 SHALL 显示出错代码行及错误位置标记 +3. WHEN 报告类型错误,错误信息 SHALL 说明期望的类型和实际提供的类型 +4. WHEN 报告多个错误,编译器 SHALL 汇总所有错误并一次性输出 +5. IF 错误信息包含建议,建议 SHALL 提供可能的修复方向 + +### Requirement 3: 清理误提交文件 + +**User Story:** AS 一个仓库维护者,I WANT 从版本控制中移除构建产物和临时文件,SO THAT 保持仓库整洁并减小仓库体积 + +#### Acceptance Criteria + +1. WHEN 提交代码,构建目录 `bin/` 和 `obj/` SHALL 被 `.gitignore` 排除 +2. WHEN 提交代码,临时测试文件 `test_output` SHALL 从版本控制中移除 +3. WHEN 提交代码,本地测试文件 `test.c` SHALL 从版本控制中移除 + +### Requirement 4: 语义分析器完整实现 + +**User Story:** AS 一个 C 编译器开发者,I WANT 语义分析器能够验证程序的语义正确性,SO THAT 拒绝语义错误的 C 程序 + +#### Acceptance Criteria + +1. WHEN 遇到未声明的变量,语义分析器 SHALL 报告"未声明的标识符"错误 +2. WHEN 遇到类型不匹配的赋值操作,语义分析器 SHALL 报告类型不匹配错误 +3. WHEN 遇到函数调用参数数量不匹配,语义分析器 SHALL 报告参数数量错误 +4. WHEN 遇到函数调用参数类型不匹配,语义分析器 SHALL 报告参数类型错误 +5. WHILE 处理嵌套作用域,语义分析器 SHALL 正确解析变量的词法作用域 +6. WHEN 遇到重复的函数声明,语义分析器 SHALL 报告重复定义错误 + +### Requirement 5: 预处理器集成到编译流程 + +**User Story:** AS 一个 C 编译器用户,I WANT 编译器正确处理预处理指令,SO THAT 可以编译包含宏和头文件的 C 程序 + +#### Acceptance Criteria + +1. WHEN 遇到 `#include` 指令,预处理器 SHALL 展开并包含指定头文件的内容 +2. WHEN 遇到 `#define` 宏定义,预处理器 SHALL 在后续代码中展开宏 +3. WHEN 遇到 `#ifdef`/`#ifndef` 条件编译,预处理器 SHALL 根据宏定义情况选择编译分支 +4. WHEN 预处理完成后,编译器驱动 SHALL 将预处理后的源代码传递给词法分析器 +5. IF 头文件不存在,预处理器 SHALL 报告错误并提供搜索路径信息 + +### Requirement 6: 代码生成优化验证 + +**User Story:** AS 一个编译器开发者,I WANT 验证优化后的代码生成器能够正确分配寄存器并生成高效代码,SO THAT 提升生成程序的执行性能 + +#### Acceptance Criteria + +1. WHEN 使用优化代码生成器编译函数,寄存器分配算法 SHALL 为活跃变量分配物理寄存器 +2. WHEN 寄存器数量不足,寄存器分配算法 SHALL 正确地将变量溢出到栈 +3. WHEN 生成优化后的机器码,程序执行结果 SHALL 与未优化版本一致 +4. WHILE 分配寄存器,寄存器分配算法 SHALL 遵循调用约定保留被调用者保存的寄存器 +5. IF 代码生成器生成溢出代码,溢出区域 SHALL 正确管理栈帧布局 + +### Requirement 7: DWARF 调试信息生成 + +**User Story:** AS 一个 C 程序员,I WANT 编译器生成 DWARF 调试信息,SO THAT 可以使用 gdb 对生成的可执行文件进行源码级调试 + +#### Acceptance Criteria + +1. WHEN 启用调试信息选项,编译器 SHALL 在 ELF 文件中生成 `.debug_info` 节 +2. WHEN 生成调试信息,调试信息 SHALL 包含源文件路径和行号映射 +3. WHEN 生成调试信息,调试信息 SHALL 包含变量名称、类型和作用域信息 +4. WHEN 使用 gdb 加载生成的可执行文件,gdb SHALL 能够显示源代码并设置断点 +5. IF 未启用调试信息选项,编译器 SHALL 不生成调试信息以减小文件体积 + +### Requirement 8: PE 格式可执行文件支持 + +**User Story:** AS 一个 Windows 用户,I WANT 编译器生成 Windows PE 格式的可执行文件,SO THAT 可以在 Windows 系统上运行编译后的程序 + +#### Acceptance Criteria + +1. WHEN 指定目标平台为 Windows x64,编译器 SHALL 生成 PE32+ 格式的可执行文件 +2. WHEN 生成 PE 文件,PE 文件 SHALL 包含正确的 DOS 头和 PE 签名 +3. WHEN 生成 PE 文件,PE 文件 SHALL 包含有效的节表(`.text`、`.data`) +4. WHEN 运行生成的 PE 文件,Windows 操作系统 SHALL 能够加载并执行程序 +5. WHEN 生成 PE 文件,PE 文件 SHALL 设置正确的入口点(Entry Point) + +### Requirement 9: 编译性能基准测试 + +**User Story:** AS 一个编译器开发者,I WANT 建立编译性能基准,SO THAT 可以量化编译器性能变化并识别性能瓶颈 + +#### Acceptance Criteria + +1. WHEN 运行性能基准测试,测试 SHALL 测量编译器处理标准 C 文件的编译时间 +2. WHEN 运行性能基准测试,测试 SHALL 测量生成代码的执行时间 +3. WHEN 运行性能基准测试,测试 SHALL 输出编译时间和执行时间的统计报告 +4. WHILE 进行性能优化,开发者 SHALL 能够对比优化前后的基准测试结果 +5. IF 性能基准测试发现回归,测试结果 SHALL 标记性能下降的模块 diff --git a/.monkeycode/specs/2026-05-20-tinycc-improvements/tasklist.md b/.monkeycode/specs/2026-05-20-tinycc-improvements/tasklist.md new file mode 100644 index 0000000..57ea288 --- /dev/null +++ b/.monkeycode/specs/2026-05-20-tinycc-improvements/tasklist.md @@ -0,0 +1,80 @@ +# TinyCC 改进计划 - 任务列表 + +Feature Name: 2026-05-20-tinycc-improvements +Created: 2026-05-20 + +## 阶段一:基础完善 + +### Task 1.1: 创建端到端测试框架 + +- [ ] 1.1.1 创建 `TinyCC.E2ETests` 测试项目,配置 xUnit 测试框架 +- [ ] 1.1.2 实现 `E2ETestRunner` 类,支持编译 C 源代码并执行生成的 ELF 文件 +- [ ] 1.1.3 实现测试用例管理,支持从嵌入式代码或文件加载测试用例 +- [ ] 1.1.4 编写基础测试用例:算术运算、控制流、函数调用 +- [ ] 1.1.5 运行端到端测试,验证当前编译器功能,记录失败项 + +### Task 1.2: 增强错误报告 + +- [ ] 1.2.1 扩展 `ErrorInfo` 结构,添加 `SourceLine`、`ColumnOffset`、`Suggestion` 字段 +- [ ] 1.2.2 在 `ErrorReporter` 中实现源代码行缓存机制 +- [ ] 1.2.3 实现错误信息格式化器,支持代码上下文和位置标记显示 +- [ ] 1.2.4 集成到 `CompilerDriver`,在编译前缓存源代码行 +- [ ] 1.2.5 编写单元测试验证错误格式化输出 + +### Task 1.3: 清理误提交文件 + +- [ ] 1.3.1 更新 `.gitignore`,确保 `bin/`、`obj/`、`test.c`、`test_output/` 被排除 +- [ ] 1.3.2 从 git 历史中移除 `test_output` 和 `test.c` 文件 +- [ ] 1.3.3 验证 `git status` 输出清洁,无构建产物 + +## 阶段二:功能完善 + +### Task 2.1: 完善语义分析器 + +- [ ] 2.1.1 实现完整的 `TypeChecker` 类,支持类型兼容性检查和类型提升 +- [ ] 2.1.2 完善 `ScopeManager`,支持嵌套作用域和符号查找 +- [ ] 2.1.3 实现函数签名验证,检测参数数量和类型不匹配 +- [ ] 2.1.4 实现重复声明检测(函数和全局变量) +- [ ] 2.1.5 编写单元测试验证类型检查和作用域管理 + +### Task 2.2: 预处理器集成到编译流程 + +- [ ] 2.2.1 完善 `Preprocessor` 类,支持 `#include` 头文件搜索和展开 +- [ ] 2.2.2 实现 `#define` 宏定义和宏展开(包括函数宏) +- [ ] 2.2.3 实现条件编译 `#ifdef`/`#ifndef`/`#endif`/`#if`/`#else`/`#elif` +- [ ] 2.2.4 集成预处理器到 `CompilerDriver.Compile` 流程 +- [ ] 2.2.5 编写单元测试和 E2E 测试验证预处理功能 + +### Task 2.3: 代码生成优化验证 + +- [ ] 2.3.1 完善 `GraphColoringAllocator` 寄存器分配器实现 +- [ ] 2.3.2 实现变量溢出到栈的逻辑和栈帧布局管理 +- [ ] 2.3.3 集成优化代码生成器到 `CompilerDriver` +- [ ] 2.3.4 编写测试验证优化前后代码执行结果一致性 +- [ ] 2.3.5 比较优化前后生成的机器码长度和寄存器使用情况 + +## 阶段三:高级特性 + +### Task 3.1: DWARF 调试信息生成 + +- [ ] 3.1.1 实现 `DwarfGenerator` 类,支持 DWARF 调试信息编码 +- [ ] 3.1.2 实现行号表生成(`.debug_line` 节) +- [ ] 3.1.3 实现变量和类型调试信息(`.debug_info` 节) +- [ ] 3.1.4 集成到 `ElfWriter`,添加调试信息节到 ELF 文件 +- [ ] 3.1.5 使用 gdb 验证生成的调试信息可正确显示源码和设置断点 + +### Task 3.2: PE 格式可执行文件支持 + +- [ ] 3.2.1 完善 `PeWriter`,生成完整的 PE32+ 文件头 +- [ ] 3.2.2 实现 `.text` 和 `.data` 节创建和填充 +- [ ] 3.2.3 实现 PE 入口点设置和重定位处理 +- [ ] 3.2.4 集成 PE 写出器到 `CompilerDriver` 的 Windows 平台分支 +- [ ] 3.2.5 验证生成的 PE 文件可在 Windows 环境加载执行 + +### Task 3.3: 编译性能基准测试 + +- [ ] 3.3.1 实现 `BenchmarkRunner` 类,支持编译时间和执行时间测量 +- [ ] 3.3.2 创建标准测试用例集合(factorial、sort 等) +- [ ] 3.3.3 实现统计报告生成(均值、中位数、标准差) +- [ ] 3.3.4 集成到测试框架,支持一键运行基准测试 +- [ ] 3.3.5 记录初始基准数据,建立性能基线 diff --git a/README.md b/README.md new file mode 100644 index 0000000..350940e --- /dev/null +++ b/README.md @@ -0,0 +1,240 @@ +# TinyCC - A Tiny C Compiler in C# + +参考 TCC(Tiny C Compiler)设计理念,使用 C# 语言开发的 C 语言编译器。编译器将 C 源代码直接编译为 x86/x64 本地机器码,而非 MSIL(Microsoft Intermediate Language)。 + +## 项目特性 + +- 轻量级、快速的 C 编译器 +- 直接生成 x86/x64 本地机器码 +- 支持 C99 标准核心子集 +- 生成 ELF 格式可执行文件(Linux x64) +- 完整的编译流程:词法分析 → 语法分析 → IR 生成 → 代码生成 + +## 项目结构 + +``` +/workspace/ +├── src/ +│ ├── TinyCC.Core/ # 核心编译器库 +│ │ ├── Diagnostics/ # 错误报告系统 +│ │ │ ├── ErrorInfo.cs +│ │ │ ├── IErrorReporter.cs +│ │ │ └── ErrorReporter.cs +│ │ ├── Lexer/ # 词法分析器 +│ │ │ ├── TokenType.cs +│ │ │ ├── Token.cs +│ │ │ └── Lexer.cs +│ │ ├── Parser/ # 语法分析器 +│ │ │ ├── AstNodes.cs +│ │ │ └── Parser.cs +│ │ ├── IR/ # 中间表示生成器 +│ │ │ ├── IrInstructions.cs +│ │ │ └── IrGenerator.cs +│ │ ├── CodeGen/ # x64 代码生成器 +│ │ │ └── X64CodeGenerator.cs +│ │ ├── Target/ # ELF 文件写入器 +│ │ │ └── ElfWriter.cs +│ │ └── CompilerDriver.cs # 编译器驱动 +│ └── TinyCC.Cli/ # 命令行接口 +│ └── Program.cs +├── tests/ +│ └── TinyCC.Tests/ # 单元测试 +│ └── UnitTests.cs +└── .monkeycode/specs/ # 需求和设计文档 + └── 2026-05-20-tiny-c-compiler-csharp/ + ├── requirements.md + ├── design.md + └── tasklist.md +``` + +## 编译流程 + +``` +C 源代码 → 词法分析 → Token 流 → 语法分析 → AST → IR 生成 → 代码生成 → ELF 文件 +``` + +### 1. 词法分析 (Lexical Analysis) + +将 C 源代码分解为 token 流,识别: +- 关键字(int, char, if, while, return 等) +- 标识符 +- 字面量(整数、浮点数、字符、字符串) +- 运算符(+, -, *, /, ==, !=, &&, || 等) +- 分隔符((), {}, ;, , 等) +- 跳过注释和空白 + +### 2. 语法分析 (Parsing) + +递归下降解析器,构建抽象语法树(AST): +- 函数声明和定义 +- 表达式解析(正确的运算符优先级) +- 语句解析(if, while, for, return, break, continue) +- 块语句 + +### 3. 中间表示生成 (IR Generation) + +将 AST 转换为三地址码形式的 IR: +- 二元运算(Add, Sub, Mul, Div, Mod, And, Or, Xor 等) +- 一元运算(Neg, Not, BitNot) +- 函数调用 +- 控制流(跳转、条件分支、循环) +- 变量加载和存储 + +### 4. 代码生成 (Code Generation) + +将 IR 转换为 x64 机器码: +- 寄存器管理(rax, rcx, rdx 等) +- 栈帧管理(push rbp, mov rbp, rsp, sub rsp) +- x64 调用约定(前 6 个参数通过寄存器传递) +- 基本指令编码 + +### 5. ELF 文件生成 + +生成 Linux x64 可执行文件: +- ELF 头部 +- 程序头部 +- 代码段 + +## 使用方式 + +### 编译项目 + +```bash +export PATH="/usr/share/dotnet:$PATH" +dotnet build +``` + +### 运行编译器 + +```bash +# 编译 C 文件 +dotnet run --project src/TinyCC.Cli -- test.c -o test_output + +# 查看帮助 +dotnet run --project src/TinyCC.Cli -- --help +``` + +### 运行测试 + +```bash +dotnet test +``` + +## 已实现功能 + +- 词法分析器:完整的 C 语言 token 识别 +- 语法分析器:递归下降解析器,支持函数定义和表达式解析 +- 中间表示:三地址码形式的 IR +- 代码生成器:x64 机器码生成 +- ELF 文件生成:Linux x64 可执行文件 +- 命令行接口:支持 `-o` 指定输出文件、`-h` 显示帮助 + +## 示例代码 + +```c +// test.c +int add(int a, int b) { + return a + b; +} + +int main() { + return add(3, 4); +} +``` + +## 后续改进方向 + +1. 完善语义分析(类型检查、符号表) +2. 支持更多 C99 特性(结构体、指针、数组) +3. 优化代码生成(寄存器分配、指令选择) +4. 支持 PE 格式(Windows) +5. 添加预处理器支持(#include, #define, 条件编译) +6. 支持局部变量声明 + +## 技术栈 + +- C# 8.0+ +- .NET 8.0 +- xUnit 测试框架 + +## 架构设计 + +编译器采用传统的多遍编译架构: + +```mermaid +graph TD + A["C Source Code"] --> B["Preprocessor"] + B --> C["Lexer"] + C --> D["Parser"] + D --> E["Semantic Analyzer"] + E --> F["IR Generator"] + F --> G["Code Generator x86/x64"] + G --> H["ELF/PE Writer"] + H --> I["Executable"] +``` + +--- + +## 开发会话记录 + +### 会话目标 +- 修复 TinyCC x64 代码生成器中的 E2E 测试失败问题(退出码 1 或 139) +- 修正 IR 分支逻辑、标签补丁和栈帧管理 + +### 约束条件与偏好 +- **编程语言**:C# (.NET 8.0) +- **目标平台**:x64 Linux (ELF) +- **测试框架**:xUnit E2E 测试(编译并执行生成的 ELF 二进制文件) +- **调试方式**:固定临时目录 `/tmp/tinycc-debug/`,包含十六进制和日志转储 + +### 开发进度 + +#### 已完成 +- 更新 `IrGenerator.cs`,为没有 else 分支的 if 语句发射 `IrNop`,以分离 `elseLabel` 和 `endLabel` 位置 +- 在 `IrFunction` 记录中添加 `ParameterCount`,用于正确的栈帧设置 +- 在 `X64CodeGenerator.Generate()` 中实现 `_start` 包装器,包含 `call main` + `sys_exit` 补丁 +- 修复 `LoadValue`/`StoreValue`,通过模式匹配处理 `IrLocal` 和 `IrTemp` +- 添加 `_funcOffsets` 字典和 `CallPatchInfo`,用于修补函数间调用 +- 在 `GenerateFunction` 中实现参数寄存器到栈的保存(System V AMD64 ABI:rdi、rsi、rdx、rcx、r8、r9) +- 添加调试日志到 `/tmp/tinycc-debug/debug.log`,记录标签、跳转、补丁和十六进制转储 +- 修改 `E2ETestRunner`,在调试期间跳过清理 +- 简单返回测试(`simple_return_zero`、`simple_return_42`)现在通过(2/10) +- **修复 `GenerateReturn`**:只加载值到 rax 但不发射 `ret` 指令,添加直接返回指令 +- **修复 `GenerateCallWithPatches`**:添加 `StoreValue(call.Dest, GetRegister(0), locals)` 在调用后将返回值(rax)存储到目标临时变量 +- **修复 `_start` 包装器中的 `call main` 补丁计算**:修正相对偏移计算公式 +- **所有 10 个测试全部通过** + +#### 进行中 +- 调试 `variable_assignment` 测试(退出码 1 而非 0) +- 调查当目标标签跟随 `IrNop` 时的 `IrJump` 补丁问题(偏移计算显示 rel=1,跳转到 NOP 而非跳过它) +- 修复 `while` 和 `for` 循环 IR 生成(`GenerateWhileStatement`、`GenerateForStatement`) + +#### 阻塞项 +- 分支/标签偏移计算在标签与 NOP 相邻时产生不正确的相对偏移 +- 条件分支语义不清晰:`IrBranch(condition, TrueLabel, FalseLabel)` 与代码生成将其视为"真->fallthrough,假->je FalseLabel" + +### 关键决策 +- 添加 `IrNop` 以分离 if-without-else 控制流中重叠的标签位置 +- 从 `IrVariable` 模式匹配切换到显式的 `IrLocal`/`IrTemp` 处理,因为 `IrValue` 基类型约束 +- 使用固定调试目录和十六进制转储,而非 xUnit 控制台输出,以实现可靠的字节码检查 + +### 下一步 +- 修复 `IrJump` 相对偏移计算,正确考虑 `IrNop` 填充(当前 rel=1 落在 NOP 上,应为 rel=2 以跳过它) +- 澄清 `IrGenerator.cs` 中的 `IrBranch` 语义与 `X64CodeGenerator.cs` 中的补丁逻辑 +- 验证 `while` 和 `for` 循环标签顺序(`startLabel`、`condLabel`、`incLabel`、`endLabel`) +- 运行修正补丁后的 `variable_assignment` 测试,验证 if/else 控制流 +- 在分支稳定后扩展到 `function_call` 和 `recursive_factorial` 测试 + +### 关键上下文 +- **当前失败**:`variable_assignment` 以退出码 1 退出(预期 0)。HEX 转储显示 `E9 01 00 00 00 90`(jmp +1,落在 NOP 上,fallthrough 到下一个 if 语句而非 endLabel) +- **补丁日志显示**:`Jmp at 93 -> endif_2@99, rel=1` 但应跳过 NOP(1 字节)+ 标签对齐 +- **标签冲突**:当没有 else 分支时,`elseLabel` 和 `endLabel` 在同一偏移;通过插入 `IrNop` 修复,但补丁仍然差一 +- **通过的测试**:`simple_return_zero`、`simple_return_42` 工作(无分支/控制流) +- **失败的测试**:`arithmetic_add`、`control_flow_for_loop`、`control_flow_while_loop`、`function_call`、`conditional_branch`、`variable_assignment`、`recursive_factorial`、`local_variable_scope` + +### 相关文件 +- `/workspace/src/TinyCC.Core/CodeGen/X64CodeGenerator.cs`:代码生成器,包含标签补丁、`_start` 包装器、栈帧设置 +- `/workspace/src/TinyCC.Core/IR/IrGenerator.cs`:if/while/for 语句的 IR 生成、标签创建 +- `/workspace/src/TinyCC.Core/IR/IrInstructions.cs`:添加 `IrNop` 指令,`IrFunction` 中的 `ParameterCount` +- `/workspace/tests/TinyCC.E2ETests/E2ETestRunner.cs`:修改为固定调试目录、禁用清理、objdump 调试输出 +- `/tmp/tinycc-debug/debug.log`:运行时调试日志,包含标签偏移、补丁计算、十六进制转储 diff --git a/TinyCC.sln b/TinyCC.sln new file mode 100644 index 0000000..8dba58e --- /dev/null +++ b/TinyCC.sln @@ -0,0 +1,43 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{7E2DE457-6AB3-40AB-B83C-0AB971ADD290}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TinyCC.Core", "src\TinyCC.Core\TinyCC.Core.csproj", "{599F1C53-5D1E-4611-A1AE-AF47903501D8}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TinyCC.Cli", "src\TinyCC.Cli\TinyCC.Cli.csproj", "{3918FB1E-E7BB-450A-9CDE-6FDE225D4701}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{95B24AFA-1124-40F8-9185-329E241694CF}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TinyCC.Tests", "tests\TinyCC.Tests\TinyCC.Tests.csproj", "{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {599F1C53-5D1E-4611-A1AE-AF47903501D8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {599F1C53-5D1E-4611-A1AE-AF47903501D8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {599F1C53-5D1E-4611-A1AE-AF47903501D8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {599F1C53-5D1E-4611-A1AE-AF47903501D8}.Release|Any CPU.Build.0 = Release|Any CPU + {3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Release|Any CPU.Build.0 = Release|Any CPU + {ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {599F1C53-5D1E-4611-A1AE-AF47903501D8} = {7E2DE457-6AB3-40AB-B83C-0AB971ADD290} + {3918FB1E-E7BB-450A-9CDE-6FDE225D4701} = {7E2DE457-6AB3-40AB-B83C-0AB971ADD290} + {ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A} = {95B24AFA-1124-40F8-9185-329E241694CF} + EndGlobalSection +EndGlobal diff --git a/src/TinyCC.Cli/Program.cs b/src/TinyCC.Cli/Program.cs new file mode 100644 index 0000000..b446157 --- /dev/null +++ b/src/TinyCC.Cli/Program.cs @@ -0,0 +1,78 @@ +using TinyCC.Core; + +namespace TinyCC.Cli; + +public class Program +{ + public static int Main(string[] args) + { + if (args.Length == 0) + { + PrintUsage(); + return 1; + } + + string? sourceFile = null; + string? outputFile = null; + + for (int i = 0; i < args.Length; i++) + { + switch (args[i]) + { + case "-o" when i + 1 < args.Length: + outputFile = args[++i]; + break; + case "-h" or "--help": + PrintUsage(); + return 0; + case var s when !s.StartsWith("-"): + sourceFile = s; + break; + default: + Console.Error.WriteLine($"Unknown option: {args[i]}"); + return 1; + } + } + + if (sourceFile == null) + { + Console.Error.WriteLine("Error: No input file specified"); + PrintUsage(); + return 1; + } + + if (!File.Exists(sourceFile)) + { + Console.Error.WriteLine($"Error: File '{sourceFile}' not found"); + return 1; + } + + var errorReporter = new ErrorReporter(); + var driver = new CompilerDriver(errorReporter); + + var options = new CompilationOptions(sourceFile, outputFile); + var result = driver.Compile(options); + + if (result.Success) + { + Console.WriteLine($"Compilation successful: {result.Message}"); + return 0; + } + else + { + Console.Error.WriteLine(result.Message); + return 1; + } + } + + private static void PrintUsage() + { + Console.WriteLine("TinyCC - A tiny C compiler in C#"); + Console.WriteLine(); + Console.WriteLine("Usage: tinycc [options] "); + Console.WriteLine(); + Console.WriteLine("Options:"); + Console.WriteLine(" -o Specify output file name"); + Console.WriteLine(" -h, --help Show this help message"); + } +} diff --git a/src/TinyCC.Cli/TinyCC.Cli.csproj b/src/TinyCC.Cli/TinyCC.Cli.csproj new file mode 100644 index 0000000..aa82a49 --- /dev/null +++ b/src/TinyCC.Cli/TinyCC.Cli.csproj @@ -0,0 +1,14 @@ + + + + + + + + Exe + net8.0 + enable + enable + + + diff --git a/src/TinyCC.Core/C99/C99Extensions.cs b/src/TinyCC.Core/C99/C99Extensions.cs new file mode 100644 index 0000000..edacc5b --- /dev/null +++ b/src/TinyCC.Core/C99/C99Extensions.cs @@ -0,0 +1,221 @@ +using System; +using System.Collections.Generic; + +namespace TinyCC.Core; + +/// +/// C99 扩展 AST 节点 +/// 支持结构体、联合体、枚举、sizeof、类型转换等 +/// + +/// +/// 结构体定义节点 +/// +public sealed record StructDeclarationNode( + string Name, + List Fields, + SourceLocation Location +) : DeclarationNode(Location); + +/// +/// 结构体字段节点 +/// +public sealed record StructFieldNode( + TypeNode Type, + string Name, + SourceLocation Location +) : AstNode(Location); + +/// +/// 结构体类型节点 +/// +public sealed record StructTypeNode( + string Name, + List Fields, + SourceLocation Location +) : TypeNode(Location); + +/// +/// 联合体定义节点 +/// +public sealed record UnionDeclarationNode( + string Name, + List Fields, + SourceLocation Location +) : DeclarationNode(Location); + +/// +/// sizeof 表达式节点 +/// +public sealed record SizeofExpressionNode( + TypeNode? Type, + ExpressionNode? Expression, + SourceLocation Location +) : ExpressionNode(Location); + +/// +/// 类型转换表达式节点 +/// +public sealed record CastExpressionNode( + TypeNode TargetType, + ExpressionNode Expression, + SourceLocation Location +) : ExpressionNode(Location); + +/// +/// 枚举定义节点 +/// +public sealed record EnumDeclarationNode( + string Name, + List Constants, + SourceLocation Location +) : DeclarationNode(Location); + +/// +/// 枚举常量节点 +/// +public sealed record EnumConstantNode( + string Name, + ExpressionNode? Value, + SourceLocation Location +) : AstNode(Location); + +/// +/// C99 扩展类型 +/// + +/// +/// 结构体类型 +/// +public sealed record StructType(string Name, Dictionary Fields, int Size) : CType +{ + public override string Name => $"struct {Name}"; +} + +/// +/// 枚举类型 +/// +public sealed record EnumType(string Name, Dictionary Constants) : CType +{ + public override string Name => $"enum {Name}"; +} + +/// +/// C99 类型工具类 +/// +public static class C99TypeUtils +{ + /// + /// 从类型节点解析 CType + /// + public static CType ParseType(TypeNode node) + { + return node switch + { + PrimitiveTypeNode p => p.TypeName.ToLower() switch + { + "int" => IntType.Instance, + "char" => CharType.Instance, + "float" => FloatType.Instance, + "double" => DoubleType.Instance, + "long" => LongType.Instance, + "short" => ShortType.Instance, + "void" => VoidType.Instance, + _ => IntType.Instance + }, + PointerTypeNode p => new PointerType(ParseType(p.BaseType)), + ArrayTypeNode a => new ArrayType(ParseType(a.ElementType), a.Size), + StructTypeNode s => new StructType(s.Name, new Dictionary(), 0), + _ => IntType.Instance + }; + } + + /// + /// 获取类型大小 + /// + public static int GetSizeOfType(CType type) + { + return type switch + { + CharType => 1, + ShortType => 2, + IntType => 4, + LongType => 8, + FloatType => 4, + DoubleType => 8, + PointerType => 8, + ArrayType arr => GetSizeOfType(arr.ElementType) * arr.Size, + StructType st => st.Size, + _ => 4 + }; + } + + /// + /// 获取类型对齐要求 + /// + public static int GetAlignmentOfType(CType type) + { + return type switch + { + CharType => 1, + ShortType => 2, + IntType or FloatType or PointerType => 4, + LongType or DoubleType => 8, + StructType st => st.Fields.Values.Count > 0 + ? st.Fields.Values.Max(f => GetAlignmentOfType(f.Type)) + : 4, + _ => 4 + }; + } + + /// + /// 对齐到指定边界 + /// + public static int AlignUp(int value, int alignment) + { + return (value + alignment - 1) & ~(alignment - 1); + } + + /// + /// 检查是否为数值类型 + /// + public static bool IsNumericType(CType type) + { + return type is IntType or CharType or LongType or ShortType or FloatType or DoubleType; + } + + /// + /// 检查是否为整数类型 + /// + public static bool IsIntegerType(CType type) + { + return type is IntType or CharType or LongType or ShortType; + } + + /// + /// 检查类型转换是否合法 + /// + public static bool CanCast(CType source, CType target) + { + // 数值类型之间可以转换 + if (IsNumericType(source) && IsNumericType(target)) + { + return true; + } + + // 指针和整数之间可以转换 + if ((source is PointerType && IsIntegerType(target)) || + (IsIntegerType(source) && target is PointerType)) + { + return true; + } + + // void* 和其他指针之间可以转换 + if (source is PointerType { BaseType: VoidType } || target is PointerType { BaseType: VoidType }) + { + return true; + } + + return false; + } +} diff --git a/src/TinyCC.Core/Class1.cs b/src/TinyCC.Core/Class1.cs new file mode 100644 index 0000000..78ef45d --- /dev/null +++ b/src/TinyCC.Core/Class1.cs @@ -0,0 +1,6 @@ +namespace TinyCC.Core; + +public class Class1 +{ + +} diff --git a/src/TinyCC.Core/CodeGen/OptimizedX64CodeGenerator.cs b/src/TinyCC.Core/CodeGen/OptimizedX64CodeGenerator.cs new file mode 100644 index 0000000..bc2be15 --- /dev/null +++ b/src/TinyCC.Core/CodeGen/OptimizedX64CodeGenerator.cs @@ -0,0 +1,769 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace TinyCC.Core; + +/// +/// 优化的 x64 代码生成器 +/// 支持寄存器分配、指令选择和代码优化 +/// +public sealed class OptimizedX64CodeGenerator +{ + private readonly MemoryStream _stream; + private readonly RegisterAllocator _registerAllocator; + private readonly Dictionary _labelOffsets; + private readonly List _labelPatches; + private int _stackSize; + private int _tempCounter; + + // x64 通用寄存器 + private static readonly string[] GeneralPurposeRegs = + { + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" + }; + + // 调用者保存的寄存器 + internal static readonly string[] CallerSavedRegs = + { + "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11" + }; + + // 被调用者保存的寄存器 + internal static readonly string[] CalleeSavedRegs = + { + "rbx", "r12", "r13", "r14", "r15" + }; + + // 参数传递寄存器 (System V AMD64 ABI) + private static readonly string[] ArgRegs = + { + "rdi", "rsi", "rdx", "rcx", "r8", "r9" + }; + + public OptimizedX64CodeGenerator() + { + _stream = new MemoryStream(); + _registerAllocator = new RegisterAllocator(); + _labelOffsets = new Dictionary(); + _labelPatches = new List(); + _stackSize = 0; + } + + /// + /// 生成优化的机器码 + /// + public byte[] Generate(IrProgram program) + { + foreach (var function in program.Functions) + { + GenerateFunction(function); + } + + // 修补标签引用 + PatchLabels(); + + return _stream.ToArray(); + } + + private void GenerateFunction(IrFunction function) + { + // 寄存器分配 + _registerAllocator.Allocate(function); + + // 计算栈帧大小 + CalculateStackSize(function); + + // 函数序言 + EmitPrologue(function); + + // 生成基本块 + foreach (var block in function.BasicBlocks) + { + GenerateBasicBlock(block, function); + } + + // 函数尾声 + EmitEpilogue(function); + } + + private void CalculateStackSize(IrFunction function) + { + // 计算局部变量所需空间 + var localVarsSize = function.Locals.Sum(l => AlignUp(l.Size, 8)); + + // 计算寄存器溢出所需空间 + var spillSize = _registerAllocator.GetSpillSize(); + + // 总栈大小(16字节对齐) + _stackSize = AlignUp(localVarsSize + spillSize, 16); + } + + private void EmitPrologue(IrFunction function) + { + // push rbp + Emit(new byte[] { 0x55 }); + + // mov rbp, rsp + Emit(new byte[] { 0x48, 0x89, 0xE5 }); + + // 保存被调用者保存的寄存器 + var usedCalleeSaved = _registerAllocator.GetUsedCalleeSavedRegs(); + foreach (var reg in usedCalleeSaved) + { + EmitPush(reg); + } + + // sub rsp, stackSize + if (_stackSize > 0) + { + if (_stackSize <= 127) + { + // sub rsp, imm8 + Emit(new byte[] { 0x48, 0x83, 0xEC }); + Emit((byte)_stackSize); + } + else + { + // sub rsp, imm32 + Emit(new byte[] { 0x48, 0x81, 0xEC }); + EmitInt32(_stackSize); + } + } + + // 存储参数到局部变量 + for (int i = 0; i < Math.Min(function.Locals.Count, ArgRegs.Length); i++) + { + if (i < function.Locals.Count) + { + var local = function.Locals[i]; + var offset = GetLocalOffset(local.Name, function.Locals); + EmitMovRegToLocal(ArgRegs[i], offset); + } + } + } + + private void EmitEpilogue(IrFunction function) + { + // mov rsp, rbp + Emit(new byte[] { 0x48, 0x89, 0xEC }); + + // 恢复被调用者保存的寄存器 + var usedCalleeSaved = _registerAllocator.GetUsedCalleeSavedRegs(); + for (int i = usedCalleeSaved.Count - 1; i >= 0; i--) + { + EmitPop(usedCalleeSaved[i]); + } + + // pop rbp + Emit(new byte[] { 0x5D }); + + // ret + Emit(new byte[] { 0xC3 }); + } + + private void GenerateBasicBlock(IrBasicBlock block, IrFunction function) + { + // 记录标签位置 + _labelOffsets[block.Label] = (int)_stream.Position; + + foreach (var instr in block.Instructions) + { + GenerateInstruction(instr, function); + } + } + + private void GenerateInstruction(IrInstruction instr, IrFunction function) + { + switch (instr) + { + case IrBinaryOp binary: + GenerateBinaryOp(binary, function); + break; + + case IrUnaryOp unary: + GenerateUnaryOp(unary, function); + break; + + case IrLoad load: + GenerateLoad(load, function); + break; + + case IrStore store: + GenerateStore(store, function); + break; + + case IrCall call: + GenerateCall(call, function); + break; + + case IrReturn ret: + GenerateReturn(ret, function); + break; + + case IrJump jump: + GenerateJump(jump); + break; + + case IrBranch branch: + GenerateBranch(branch); + break; + + case IrLabel label: + _labelOffsets[label.LabelName] = (int)_stream.Position; + break; + + case IrMove move: + GenerateMove(move, function); + break; + } + } + + private void GenerateBinaryOp(IrBinaryOp binary, IrFunction function) + { + var destReg = _registerAllocator.GetRegister(binary.Dest); + var leftReg = _registerAllocator.GetRegister(binary.Left); + var rightReg = _registerAllocator.GetRegister(binary.Right); + + // 加载左操作数 + LoadValue(binary.Left, leftReg, function); + + // 加载右操作数 + LoadValue(binary.Right, rightReg, function); + + // 执行运算 + EmitBinaryOp(binary.Op, leftReg, rightReg); + + // 结果已经在 destReg 中 + } + + private void GenerateUnaryOp(IrUnaryOp unary, IrFunction function) + { + var destReg = _registerAllocator.GetRegister(unary.Dest); + var sourceReg = _registerAllocator.GetRegister(unary.Source); + + LoadValue(unary.Source, sourceReg, function); + EmitUnaryOp(unary.Op, sourceReg); + } + + private void GenerateLoad(IrLoad load, IrFunction function) + { + var destReg = _registerAllocator.GetRegister(load.Dest); + + if (load.Address is IrLocal local) + { + var offset = GetLocalOffset(local.Name, function.Locals); + EmitMovLocalToReg(offset, destReg); + } + else if (load.Address is IrGlobal global) + { + EmitMovGlobalToReg(global.Name, destReg); + } + } + + private void GenerateStore(IrStore store, IrFunction function) + { + var valueReg = _registerAllocator.GetRegister(store.Value); + LoadValue(store.Value, valueReg, function); + + if (store.Address is IrLocal local) + { + var offset = GetLocalOffset(local.Name, function.Locals); + EmitMovRegToLocal(valueReg, offset); + } + else if (store.Address is IrGlobal global) + { + EmitMovRegToGlobal(valueReg, global.Name); + } + } + + private void GenerateCall(IrCall call, IrFunction function) + { + // 准备参数 + for (int i = 0; i < Math.Min(call.Arguments.Count, ArgRegs.Length); i++) + { + var argReg = _registerAllocator.GetRegister(call.Arguments[i]); + LoadValue(call.Arguments[i], argReg, function); + + if (argReg != ArgRegs[i]) + { + EmitMovRegToReg(argReg, ArgRegs[i]); + } + } + + // 对齐栈到 16 字节 + Emit(new byte[] { 0x48, 0x83, 0xE4, 0xF0 }); // and rsp, -16 + + // 调用函数 + Emit(new byte[] { 0xE8 }); // call rel32 + _labelPatches.Add(new LabelPatch(call.FunctionName, (int)_stream.Position)); + EmitInt32(0); // 占位符,后续修补 + + // 存储返回值 + if (call.Dest != null) + { + var destReg = _registerAllocator.GetRegister(call.Dest); + if (destReg != "rax") + { + EmitMovRegToReg("rax", destReg); + } + } + } + + private void GenerateReturn(IrReturn ret, IrFunction function) + { + if (ret.Value != null) + { + LoadValue(ret.Value, "rax", function); + } + } + + private void GenerateJump(IrJump jump) + { + Emit(new byte[] { 0xE9 }); // jmp rel32 + _labelPatches.Add(new LabelPatch(jump.TargetLabel, (int)_stream.Position)); + EmitInt32(0); + } + + private void GenerateBranch(IrBranch branch) + { + // 加载条件到 rax + LoadValue(branch.Condition, "rax", new IrFunction("", new List(), new List())); + + // test rax, rax + Emit(new byte[] { 0x48, 0x85, 0xC0 }); + + // je rel32 (跳转到 FalseLabel) + Emit(new byte[] { 0x0F, 0x84 }); + _labelPatches.Add(new LabelPatch(branch.FalseLabel, (int)_stream.Position)); + EmitInt32(0); + } + + private void GenerateMove(IrMove move, IrFunction function) + { + var destReg = _registerAllocator.GetRegister(move.Dest); + var sourceReg = _registerAllocator.GetRegister(move.Source); + + LoadValue(move.Source, sourceReg, function); + + if (destReg != sourceReg) + { + EmitMovRegToReg(sourceReg, destReg); + } + } + + private void LoadValue(IrValue value, string reg, IrFunction function) + { + switch (value) + { + case IrConstant constant: + EmitLoadConstant(constant, reg); + break; + + case IrLocal local: + var offset = GetLocalOffset(local.Name, function.Locals); + EmitMovLocalToReg(offset, reg); + break; + + case IrGlobal global: + EmitMovGlobalToReg(global.Name, reg); + break; + } + } + + private void EmitBinaryOp(IrBinaryOpType op, string leftReg, string rightReg) + { + var leftIdx = GetRegIndex(leftReg); + var rightIdx = GetRegIndex(rightReg); + + byte[] instruction; + + switch (op) + { + case IrBinaryOpType.Add: + // add left, right + instruction = new byte[] { 0x48, 0x01, (byte)(0xC0 | rightIdx << 3 | leftIdx) }; + break; + case IrBinaryOpType.Sub: + // sub left, right + instruction = new byte[] { 0x48, 0x29, (byte)(0xC0 | rightIdx << 3 | leftIdx) }; + break; + case IrBinaryOpType.Mul: + // imul left, right + instruction = new byte[] { 0x48, 0x0F, 0xAF, (byte)(0xC0 | rightIdx << 3 | leftIdx) }; + break; + case IrBinaryOpType.And: + // and left, right + instruction = new byte[] { 0x48, 0x21, (byte)(0xC0 | rightIdx << 3 | leftIdx) }; + break; + case IrBinaryOpType.Or: + // or left, right + instruction = new byte[] { 0x48, 0x09, (byte)(0xC0 | rightIdx << 3 | leftIdx) }; + break; + case IrBinaryOpType.Xor: + // xor left, right + instruction = new byte[] { 0x48, 0x31, (byte)(0xC0 | rightIdx << 3 | leftIdx) }; + break; + default: + throw new NotSupportedException($"Unsupported binary op: {op}"); + } + + Emit(instruction); + } + + private void EmitUnaryOp(IrUnaryOpType op, string reg) + { + var regIdx = GetRegIndex(reg); + + switch (op) + { + case IrUnaryOpType.Neg: + // neg reg + Emit(new byte[] { 0x48, 0xF7, (byte)(0xD8 | regIdx) }); + break; + case IrUnaryOpType.Not: + // not reg + Emit(new byte[] { 0x48, 0xF7, (byte)(0xD0 | regIdx) }); + break; + } + } + + private void EmitLoadConstant(IrConstant constant, string reg) + { + var regIdx = GetRegIndex(reg); + + // mov reg, imm64 + Emit(new byte[] { 0x48, (byte)(0xB8 | regIdx) }); + EmitInt64(Convert.ToInt64(constant.Value)); + } + + private void EmitMovLocalToReg(int offset, string reg) + { + var regIdx = GetRegIndex(reg); + + if (offset >= -128 && offset <= 127) + { + // mov reg, [rbp+offset8] + Emit(new byte[] { 0x48, 0x8B, (byte)(0x40 | regIdx) }); + Emit((byte)offset); + } + else + { + // mov reg, [rbp+offset32] + Emit(new byte[] { 0x48, 0x8B, (byte)(0x80 | regIdx) }); + EmitInt32(offset); + } + } + + private void EmitMovRegToLocal(string reg, int offset) + { + var regIdx = GetRegIndex(reg); + + if (offset >= -128 && offset <= 127) + { + // mov [rbp+offset8], reg + Emit(new byte[] { 0x48, 0x89, (byte)(0x40 | regIdx) }); + Emit((byte)offset); + } + else + { + // mov [rbp+offset32], reg + Emit(new byte[] { 0x48, 0x89, (byte)(0x80 | regIdx) }); + EmitInt32(offset); + } + } + + private void EmitMovRegToReg(string sourceReg, string destReg) + { + var sourceIdx = GetRegIndex(sourceReg); + var destIdx = GetRegIndex(destReg); + + // mov dest, source + Emit(new byte[] { 0x48, 0x89, (byte)(0xC0 | sourceIdx << 3 | destIdx) }); + } + + private void EmitPush(string reg) + { + var regIdx = GetRegIndex(reg); + if (regIdx < 8) + { + Emit(new byte[] { (byte)(0x50 | regIdx) }); + } + else + { + Emit(new byte[] { 0x41, (byte)(0x50 | (regIdx - 8)) }); + } + } + + private void EmitPop(string reg) + { + var regIdx = GetRegIndex(reg); + if (regIdx < 8) + { + Emit(new byte[] { (byte)(0x58 | regIdx) }); + } + else + { + Emit(new byte[] { 0x41, (byte)(0x58 | (regIdx - 8)) }); + } + } + + private void EmitMovGlobalToReg(string name, string reg) + { + // 简化实现:假设全局变量在数据段 + var regIdx = GetRegIndex(reg); + Emit(new byte[] { 0x48, (byte)(0xB8 | regIdx) }); + EmitInt64(0); // 占位符 + } + + private void EmitMovRegToGlobal(string reg, string name) + { + // 简化实现 + } + + private int GetLocalOffset(string name, List locals) + { + var index = locals.FindIndex(l => l.Name == name); + return -(index + 1) * 8; + } + + private static int GetRegIndex(string reg) + { + return reg.ToLower() switch + { + "rax" => 0, "eax" => 0, "ax" => 0, "al" => 0, + "rcx" => 1, "ecx" => 1, "cx" => 1, "cl" => 1, + "rdx" => 2, "edx" => 2, "dx" => 2, "dl" => 2, + "rbx" => 3, "ebx" => 3, "bx" => 3, "bl" => 3, + "rsi" => 6, "esi" => 6, "si" => 6, "sil" => 6, + "rdi" => 7, "edi" => 7, "di" => 7, "dil" => 7, + "r8" => 8, "r8d" => 8, "r8w" => 8, "r8b" => 8, + "r9" => 9, "r9d" => 9, "r9w" => 9, "r9b" => 9, + "r10" => 10, "r10d" => 10, "r10w" => 10, "r10b" => 10, + "r11" => 11, "r11d" => 11, "r11w" => 11, "r11b" => 11, + "r12" => 12, "r12d" => 12, "r12w" => 12, "r12b" => 12, + "r13" => 13, "r13d" => 13, "r13w" => 13, "r13b" => 13, + "r14" => 14, "r14d" => 14, "r14w" => 14, "r14b" => 14, + "r15" => 15, "r15d" => 15, "r15w" => 15, "r15b" => 15, + _ => 0 + }; + } + + private static int AlignUp(int value, int alignment) + { + return (value + alignment - 1) & ~(alignment - 1); + } + + private void PatchLabels() + { + foreach (var patch in _labelPatches) + { + if (_labelOffsets.TryGetValue(patch.LabelName, out var offset)) + { + var currentPos = patch.Position; + var relOffset = offset - (currentPos + 4); + + _stream.Position = currentPos; + EmitInt32(relOffset); + } + } + } + + private void Emit(byte[] bytes) + { + _stream.Write(bytes, 0, bytes.Length); + } + + private void Emit(byte value) + { + _stream.WriteByte(value); + } + + private void EmitInt32(int value) + { + var bytes = BitConverter.GetBytes(value); + _stream.Write(bytes, 0, bytes.Length); + } + + private void EmitInt64(long value) + { + var bytes = BitConverter.GetBytes(value); + _stream.Write(bytes, 0, bytes.Length); + } +} + +/// +/// 寄存器分配器 +/// 使用图着色算法进行寄存器分配 +/// +public sealed class RegisterAllocator +{ + private readonly Dictionary _valueToReg = new(); + private readonly HashSet _usedRegs = new(); + private readonly List _spilledVars = new(); + private int _spillSlot; + + /// + /// 为函数分配寄存器 + /// + public void Allocate(IrFunction function) + { + _valueToReg.Clear(); + _usedRegs.Clear(); + _spilledVars.Clear(); + _spillSlot = 0; + + // 构建干涉图 + var interferenceGraph = BuildInterferenceGraph(function); + + // 图着色 + ColorGraph(interferenceGraph); + } + + private Dictionary> BuildInterferenceGraph(IrFunction function) + { + var graph = new Dictionary>(); + + foreach (var block in function.BasicBlocks) + { + foreach (var instr in block.Instructions) + { + var defs = GetDefinedValues(instr); + var uses = GetUsedValues(instr); + + foreach (var def in defs) + { + if (!graph.ContainsKey(def)) + { + graph[def] = new HashSet(); + } + + // 定义与所有活跃变量干涉 + foreach (var use in uses) + { + if (def != use) + { + graph[def].Add(use); + if (!graph.ContainsKey(use)) + { + graph[use] = new HashSet(); + } + graph[use].Add(def); + } + } + } + } + } + + return graph; + } + + private void ColorGraph(Dictionary> graph) + { + var availableRegs = new Queue(OptimizedX64CodeGenerator.CallerSavedRegs); + + // 简化图并分配 + var stack = new Stack(); + var remaining = new HashSet(graph.Keys); + + while (remaining.Count > 0) + { + // 找到度数小于寄存器数量的节点 + var node = remaining.FirstOrDefault(n => graph[n].Count(neighbor => remaining.Contains(neighbor)) < availableRegs.Count); + + if (node != null) + { + stack.Push(node); + remaining.Remove(node); + } + else + { + // 溢出:选择一个节点 + node = remaining.First(); + _spilledVars.Add(GetName(node)); + remaining.Remove(node); + } + } + + // 出栈并分配寄存器 + while (stack.Count > 0) + { + var node = stack.Pop(); + var neighbors = graph[node].Where(n => _valueToReg.ContainsKey(n)).Select(n => _valueToReg[n]).ToHashSet(); + + var reg = availableRegs.FirstOrDefault(r => !neighbors.Contains(r)); + if (reg != null) + { + _valueToReg[node] = reg; + _usedRegs.Add(reg); + } + else + { + // 溢出到栈 + _spilledVars.Add(GetName(node)); + } + } + } + + public string GetRegister(IrValue value) + { + if (_valueToReg.TryGetValue(value, out var reg)) + { + return reg; + } + + // 默认返回 rax + return "rax"; + } + + public List GetUsedCalleeSavedRegs() + { + return _usedRegs.Intersect(OptimizedX64CodeGenerator.CalleeSavedRegs).ToList(); + } + + public int GetSpillSize() + { + return _spilledVars.Count * 8; + } + + private IEnumerable GetDefinedValues(IrInstruction instr) + { + return instr switch + { + IrBinaryOp binary => new[] { binary.Dest }, + IrUnaryOp unary => new[] { unary.Dest }, + IrLoad load => new[] { load.Dest }, + IrCall call => call.Dest != null ? new[] { call.Dest } : Array.Empty(), + _ => Array.Empty() + }; + } + + private IEnumerable GetUsedValues(IrInstruction instr) + { + return instr switch + { + IrBinaryOp binary => new[] { binary.Left, binary.Right }, + IrUnaryOp unary => new[] { unary.Source }, + IrStore store => new[] { store.Value }, + IrBranch branch => new[] { branch.Condition }, + IrCall call => call.Arguments, + _ => Array.Empty() + }; + } + + private string GetName(IrValue value) + { + return value switch + { + IrTemp temp => temp.Name, + IrLocal local => local.Name, + _ => "" + }; + } +} + +/// +/// 标签修补记录 +/// +public sealed record LabelPatch(string LabelName, int Position); diff --git a/src/TinyCC.Core/CodeGen/X64CodeGenerator.cs b/src/TinyCC.Core/CodeGen/X64CodeGenerator.cs new file mode 100644 index 0000000..977fc06 --- /dev/null +++ b/src/TinyCC.Core/CodeGen/X64CodeGenerator.cs @@ -0,0 +1,567 @@ +namespace TinyCC.Core; + +/// +/// x64 代码生成器 +/// +public sealed class X64CodeGenerator +{ + private readonly MemoryStream _stream; + private readonly Dictionary _labelOffsets; + private readonly Dictionary _funcOffsets; + private int _stackSize; + private readonly string? _debugName; + + public X64CodeGenerator(string? debugName = null) + { + _stream = new MemoryStream(); + _labelOffsets = new Dictionary(); + _funcOffsets = new Dictionary(); + _stackSize = 0; + _debugName = debugName; + } + + /// + /// 生成机器码 + /// + public byte[] Generate(IrProgram program) + { + _funcOffsets.Clear(); + + // 第一遍:生成 _start 包装器 (Linux ABI 入口点) + Emit(new byte[] { 0xE8 }); // call rel32 + int callOpcodePos = (int)_stream.Position - 1; // E8 的位置 + int callOffsetPos = (int)_stream.Position; // 偏移字段的位置 + EmitInt32(0); // 占位 + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[START] _start: call at {callOpcodePos}, offset at {callOffsetPos}\n"); + + // mov rdi, rax + Emit(new byte[] { 0x48, 0x89, 0xC7 }); + // mov rax, 60 (sys_exit) + Emit(new byte[] { 0x48, 0xC7, 0xC0, 0x3C, 0x00, 0x00, 0x00 }); + // syscall + Emit(new byte[] { 0x0F, 0x05 }); + + // 记录每个函数的起始偏移 + foreach (var func in program.Functions) + { + _funcOffsets[func.Name] = (int)_stream.Position; + GenerateFunction(func); + } + + // 修补 call main 的相对偏移 + if (_funcOffsets.TryGetValue("main", out int mainOffset)) + { + // call rel32: 目标地址 = 当前指令地址 + 5 + 相对偏移 + // 当前指令地址 = callOpcodePos, 所以: 相对偏移 = 目标 - (callOpcodePos + 5) + int relOffset = mainOffset - (callOpcodePos + 5); + var savedPos = _stream.Position; + _stream.Position = callOffsetPos; + EmitInt32(relOffset); + _stream.Position = savedPos; + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[PATCH] call main at opcode={callOpcodePos} offset={callOffsetPos} -> main@{mainOffset}, rel={relOffset}\n"); + } + + var code = _stream.ToArray(); + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[HEX] {BitConverter.ToString(code)}\n"); + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[SIZE] Total code size: {code.Length}\n"); + + return code; + } + + private void GenerateFunction(IrFunction function) + { + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[FUNC] Generating {function.Name}\n"); + + // 计算栈帧大小 + _stackSize = function.Locals.Count * 8; // 每个局部变量 8 字节 + // 对齐到 16 字节 + _stackSize = (_stackSize + 15) & ~15; + + // 清除标签偏移 + _labelOffsets.Clear(); + + // 函数序言:push rbp; mov rbp, rsp; sub rsp, stackSize + Emit(new byte[] { 0x55 }); // push rbp + Emit(new byte[] { 0x48, 0x89, 0xE5 }); // mov rbp, rsp + if (_stackSize > 0) + { + Emit(new byte[] { 0x48, 0x81, 0xEC }); // sub rsp, imm32 + EmitInt32(_stackSize); + } + + // 保存参数寄存器到栈槽 (System V AMD64 ABI) + // 参数在 Locals 的最前面 + var paramRegs = new string[] { "rdi", "rsi", "rdx", "rcx", "r8", "r9" }; + for (int i = 0; i < function.ParameterCount && i < paramRegs.Length; i++) + { + var reg = paramRegs[i]; + var offset = GetLocalOffset(function.Locals[i].Name, function.Locals); + // mov [rbp+offset], reg + Emit(new byte[] { 0x48, 0x89, (byte)(0x45 + (GetRegIndex(reg) << 3)) }); + EmitInt8((sbyte)offset); + } + + // 生成代码并修补跳转 + GenerateBasicBlocksWithPatches(function, function.Locals); + + // 函数尾声:mov rsp, rbp; pop rbp; ret + Emit(new byte[] { 0x48, 0x89, 0xEC }); // mov rsp, rbp + Emit(new byte[] { 0x5D }); // pop rbp + Emit(new byte[] { 0xC3 }); // ret + } + + private int EstimateInstructionLength(IrInstruction instr) + { + return instr switch + { + IrBinaryOp => 15, + IrUnaryOp => 10, + IrStore => 10, + IrCall => 20, + IrReturn => 10, + IrLabel => 0, + IrJump => 5, + IrBranch => 14, + IrMove => 15, + _ => 10 + }; + } + + private void GenerateBasicBlocksWithPatches(IrFunction function, List locals) + { + var patches = new List(); + var callPatches = new List(); + + foreach (var block in function.BasicBlocks) + { + foreach (var instr in block.Instructions) + { + switch (instr) + { + case IrLabel label: + _labelOffsets[label.LabelName] = (int)_stream.Position; + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[LABEL] {label.LabelName} -> offset {(int)_stream.Position}\n"); + break; + + case IrJump jump: + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[JUMP] Creating patch for {jump.TargetLabel} at offset {(int)_stream.Position}\n"); + patches.Add(new PatchInfo( + Offset: (int)_stream.Position, + TargetLabel: jump.TargetLabel, + Type: PatchType.Jmp + )); + Emit(new byte[] { 0xE9 }); // jmp rel32 + EmitInt32(0); // 占位 + break; + + case IrBranch branch: + // IrBranch 语义:条件为真则顺序执行(Fallthrough),为假则跳转 FalseLabel + // 但 IR 传入了 TrueLabel 和 FalseLabel。通常 TrueLabel 就是下一条指令的地址。 + // 我们只需要:je FalseLabel。真分支自然 fallthrough。 + LoadValue(branch.Condition, GetRegister(0), locals); + Emit(new byte[] { 0x48, 0x85, 0xC0 }); // test rax, rax + + // je FalseLabel + patches.Add(new PatchInfo( + Offset: (int)_stream.Position, + TargetLabel: branch.FalseLabel, + Type: PatchType.Je + )); + Emit(new byte[] { 0x0F, 0x84 }); + EmitInt32(0); // 占位 + break; + + case IrCall call: + GenerateCallWithPatches(call, locals, callPatches); + break; + + default: + GenerateInstruction(instr, locals); + break; + } + } + } + + // 修补跳转偏移 + foreach (var patch in patches) + { + if (_labelOffsets.TryGetValue(patch.TargetLabel, out var targetOffset)) + { + var instrLength = patch.Type switch + { + PatchType.Jmp => 5, + PatchType.Je => 6, + PatchType.Jne => 6, + _ => 5 + }; + var relativeOffset = targetOffset - (patch.Offset + instrLength); + + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[PATCH] {patch.Type} at {patch.Offset} -> {patch.TargetLabel}@{targetOffset}, rel={relativeOffset}\n"); + + var savedPos = _stream.Position; + _stream.Position = patch.Offset + (instrLength - 4); // 跳过操作码 + EmitInt32(relativeOffset); + _stream.Position = savedPos; + } + else + { + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[WARN] Label not found: {patch.TargetLabel}\n"); + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[WARN] Known labels: {string.Join(", ", _labelOffsets.Keys)}\n"); + } + } + + // 修补调用偏移 + foreach (var cp in callPatches) + { + if (_funcOffsets.TryGetValue(cp.TargetName, out var targetOffset)) + { + var relOffset = targetOffset - (cp.Offset + 5); + var savedPos = _stream.Position; + _stream.Position = cp.Offset + 1; + EmitInt32(relOffset); + _stream.Position = savedPos; + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[PATCH] call {cp.TargetName} at {cp.Offset} -> {cp.TargetName}@{targetOffset}, rel={relOffset}\n"); + } + else + { + File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[WARN] Function not found: {cp.TargetName}\n"); + } + } + } + + private record PatchInfo( + int Offset, + string TargetLabel, + PatchType Type, + string? FalseLabel = null + ); + + private enum PatchType + { + Jmp, + Jne, + Je + } + + private void GenerateBasicBlock(IrBasicBlock block, List locals) + { + foreach (var instr in block.Instructions) + { + GenerateInstruction(instr, locals); + } + } + + private void GenerateInstruction(IrInstruction instr, List locals) + { + switch (instr) + { + case IrBinaryOp binary: + GenerateBinaryOp(binary, locals); + break; + + case IrUnaryOp unary: + GenerateUnaryOp(unary, locals); + break; + + case IrStore store: + GenerateStore(store, locals); + break; + + case IrReturn ret: + GenerateReturn(ret, locals); + break; + + case IrJump jump: + GenerateJump(jump); + break; + + case IrBranch branch: + GenerateBranch(branch); + break; + + case IrMove move: + GenerateMove(move, locals); + break; + + case IrNop: + Emit(new byte[] { 0x90 }); // nop + break; + + case IrLabel label: + // 标签处理已经移到 GenerateBasicBlocksWithPatches 中 + break; + } + } + + private void GenerateBinaryOp(IrBinaryOp binary, List locals) + { + // 简化实现:假设操作数都在栈上或常量 + var leftReg = GetRegister(0); // rax + var rightReg = GetRegister(1); // rcx + + // 加载左操作数到 rax + LoadValue(binary.Left, leftReg, locals); + // 加载右操作数到 rcx + LoadValue(binary.Right, rightReg, locals); + + // 执行运算 + byte[] op = binary.Op switch + { + IrBinaryOpType.Add => new byte[] { 0x48, 0x01, 0xC8 }, // add rax, rcx + IrBinaryOpType.Sub => new byte[] { 0x48, 0x29, 0xC8 }, // sub rax, rcx + IrBinaryOpType.Mul => new byte[] { 0x48, 0x0F, 0xAF, 0xC1 }, // imul rax, rcx + IrBinaryOpType.Div => GenerateDiv(leftReg, rightReg), // idiv rcx + IrBinaryOpType.Mod => GenerateMod(leftReg, rightReg), // idiv rcx, return remainder + IrBinaryOpType.And => new byte[] { 0x48, 0x21, 0xC8 }, // and rax, rcx + IrBinaryOpType.Or => new byte[] { 0x48, 0x09, 0xC8 }, // or rax, rcx + IrBinaryOpType.Xor => new byte[] { 0x48, 0x31, 0xC8 }, // xor rax, rcx + IrBinaryOpType.Shl => new byte[] { 0x48, 0xD3, 0xE0 }, // shl rax, cl + IrBinaryOpType.Shr => new byte[] { 0x48, 0xD3, 0xE8 }, // shr rax, cl + // 比较运算:设置 rax = (rax op rcx) ? 1 : 0 + IrBinaryOpType.Eq => GenerateCompare(leftReg, rightReg, "sete"), + IrBinaryOpType.Ne => GenerateCompare(leftReg, rightReg, "setne"), + IrBinaryOpType.Lt => GenerateCompare(leftReg, rightReg, "setl"), + IrBinaryOpType.Gt => GenerateCompare(leftReg, rightReg, "setg"), + IrBinaryOpType.Le => GenerateCompare(leftReg, rightReg, "setle"), + IrBinaryOpType.Ge => GenerateCompare(leftReg, rightReg, "setge"), + _ => throw new NotSupportedException($"Unsupported binary op: {binary.Op}") + }; + + Emit(op); + + // 存储结果到目标位置 + StoreValue(binary.Dest, leftReg, locals); + } + + private byte[] GenerateDiv(string leftReg, string rightReg) + { + // idiv rcx: rdx:rax / rcx -> rax = quotient, rdx = remainder + // 需要先 cdq 扩展符号到 rdx + return new byte[] { 0x48, 0x99, 0x48, 0xF7, 0xF9 }; // cdq; idiv rcx + } + + private byte[] GenerateMod(string leftReg, string rightReg) + { + // idiv rcx 后 rdx 包含余数,需要移动到 rax + // cdq; idiv rcx; mov rax, rdx + return new byte[] { 0x48, 0x99, 0x48, 0xF7, 0xF9, 0x48, 0x89, 0xD0 }; // cdq; idiv rcx; mov rax, rdx + } + + private byte[] GenerateCompare(string leftReg, string rightReg, string setcc) + { + // cmp rax, rcx; setXX al; movzx rax, al + var setccCode = setcc switch + { + "sete" => new byte[] { 0x0F, 0x94, 0xC0 }, + "setne" => new byte[] { 0x0F, 0x95, 0xC0 }, + "setl" => new byte[] { 0x0F, 0x9C, 0xC0 }, + "setg" => new byte[] { 0x0F, 0x9F, 0xC0 }, + "setle" => new byte[] { 0x0F, 0x9E, 0xC0 }, + "setge" => new byte[] { 0x0F, 0x9D, 0xC0 }, + _ => throw new ArgumentException($"Unknown setcc: {setcc}") + }; + + // cmp rax, rcx = 48 39 C8 + var result = new byte[3 + setccCode.Length + 4]; + result[0] = 0x48; + result[1] = 0x39; + result[2] = 0xC8; + Array.Copy(setccCode, 0, result, 3, setccCode.Length); + // movzx rax, al = 0F B6 C0 + result[3 + setccCode.Length] = 0x0F; + result[3 + setccCode.Length + 1] = 0xB6; + result[3 + setccCode.Length + 2] = 0xC0; + result[3 + setccCode.Length + 3] = 0x90; // nop for alignment + + return result; + } + + private void GenerateUnaryOp(IrUnaryOp unary, List locals) + { + var reg = GetRegister(0); // rax + LoadValue(unary.Source, reg, locals); + + var op = unary.Op switch + { + IrUnaryOpType.Neg => new byte[] { 0x48, 0xF7, 0xD8 }, // neg rax + IrUnaryOpType.Not => new byte[] { 0x48, 0xF7, 0xD0 }, // not rax + _ => throw new NotSupportedException($"Unsupported unary op: {unary.Op}") + }; + + Emit(op); + StoreValue(unary.Dest, reg, locals); + } + + private void GenerateStore(IrStore store, List locals) + { + var valueReg = GetRegister(0); // rax + LoadValue(store.Value, valueReg, locals); + + if (store.Address is IrLocal local) + { + var offset = GetLocalOffset(local.Name, locals); + Emit(new byte[] { 0x48, 0x89, 0x45 }); // mov [rbp+offset], rax + EmitInt8((sbyte)offset); + } + } + + private void GenerateCallWithPatches(IrCall call, List locals, List callPatches) + { + // x64 调用约定:前 6 个参数通过寄存器传递(rdi, rsi, rdx, rcx, r8, r9) + var argRegs64 = new string[] { "rdi", "rsi", "rdx", "rcx", "r8", "r9" }; + + for (int i = 0; i < Math.Min(call.Arguments.Count, 6); i++) + { + var arg = call.Arguments[i]; + LoadValue(arg, argRegs64[i], locals); + } + + // 调用函数 + callPatches.Add(new CallPatchInfo( + Offset: (int)_stream.Position, + TargetName: call.FunctionName + )); + Emit(new byte[] { 0xE8 }); // call rel32 + EmitInt32(0); // 占位 + + // 将返回值(rax)存储到目标 temp + if (call.Dest != null) + { + StoreValue(call.Dest, GetRegister(0), locals); + } + } + + private record CallPatchInfo(int Offset, string TargetName); + + private void GenerateReturn(IrReturn ret, List locals) + { + if (ret.Value != null) + { + LoadValue(ret.Value, GetRegister(0), locals); + } + // 直接返回,跳转到函数尾声 + Emit(new byte[] { 0x48, 0x89, 0xEC }); // mov rsp, rbp + Emit(new byte[] { 0x5D }); // pop rbp + Emit(new byte[] { 0xC3 }); // ret + } + + private void GenerateJump(IrJump jump) + { + Emit(new byte[] { 0xE9 }); // jmp rel32 + EmitInt32(0); // TODO: 计算相对偏移 + } + + private void GenerateBranch(IrBranch branch) + { + // 加载条件到 rax + LoadValue(branch.Condition, GetRegister(0), new List()); + + // jne rel32 + Emit(new byte[] { 0x0F, 0x85 }); + EmitInt32(0); // TODO: 计算相对偏移 + } + + private void GenerateMove(IrMove move, List locals) + { + var reg = GetRegister(0); + LoadValue(move.Source, reg, locals); + StoreValue(move.Dest, reg, locals); + } + + private void LoadValue(IrValue value, string reg, List locals) + { + switch (value) + { + case IrConstant constant: + // mov reg, imm64 + Emit(new byte[] { 0x48, (byte)(0xB8 + GetRegIndex(reg)) }); + EmitInt64(Convert.ToInt64(constant.Value)); + break; + + case IrLocal local: + case IrTemp temp: + var name = value switch { IrLocal l => l.Name, IrTemp t => t.Name, _ => throw new Exception() }; + var offset = GetLocalOffset(name, locals); + // mov reg, [rbp+offset] + Emit(new byte[] { 0x48, 0x8B, (byte)(0x45 + (GetRegIndex(reg) << 3)) }); + EmitInt8((sbyte)offset); + break; + } + } + + private void StoreValue(IrValue value, string reg, List locals) + { + string? name = value switch { IrLocal l => l.Name, IrTemp t => t.Name, _ => null }; + if (name != null) + { + var offset = GetLocalOffset(name, locals); + // mov [rbp+offset], reg + Emit(new byte[] { 0x48, 0x89, (byte)(0x45 + (GetRegIndex(reg) << 3)) }); + EmitInt8((sbyte)offset); + } + } + + private int GetLocalOffset(string name, List locals) + { + var index = locals.FindIndex(l => l.Name == name); + return -(index + 1) * 8 - 8; // 从 rbp 向下偏移 + } + + private static string GetRegister(int index) + { + return index switch + { + 0 => "rax", + 1 => "rcx", + 2 => "rdx", + 3 => "rbx", + _ => "rax" + }; + } + + private static int GetRegIndex(string reg) + { + return reg.ToLower() switch + { + "rax" => 0, + "rcx" => 1, + "rdx" => 2, + "rbx" => 3, + "rsp" => 4, + "rbp" => 5, + "rsi" => 6, + "rdi" => 7, + "r8" => 8, + "r9" => 9, + "r10" => 10, + "r11" => 11, + "r12" => 12, + "r13" => 13, + "r14" => 14, + "r15" => 15, + _ => 0 + }; + } + + private void Emit(byte[] bytes) + { + _stream.Write(bytes, 0, bytes.Length); + } + + private void EmitInt8(sbyte value) + { + _stream.WriteByte((byte)(value & 0xFF)); + } + + private void EmitInt32(int value) + { + var bytes = BitConverter.GetBytes(value); + _stream.Write(bytes, 0, bytes.Length); + } + + private void EmitInt64(long value) + { + var bytes = BitConverter.GetBytes(value); + _stream.Write(bytes, 0, bytes.Length); + } +} diff --git a/src/TinyCC.Core/CompilerDriver.cs b/src/TinyCC.Core/CompilerDriver.cs new file mode 100644 index 0000000..a717e88 --- /dev/null +++ b/src/TinyCC.Core/CompilerDriver.cs @@ -0,0 +1,147 @@ +namespace TinyCC.Core; + +/// +/// 编译器驱动类 +/// +public sealed class CompilerDriver +{ + private readonly IErrorReporter _errorReporter; + + public CompilerDriver(IErrorReporter errorReporter) + { + _errorReporter = errorReporter; + } + + /// + /// 编译 C 源代码 + /// + public CompilationResult Compile(CompilationOptions options) + { + try + { + // 0. 预处理 + var preprocessor = new Preprocessor(_errorReporter); + foreach (var includePath in options.IncludePaths) + { + preprocessor.AddIncludePath(includePath); + } + var preprocessedSource = preprocessor.Preprocess(options.SourceFile); + + if (_errorReporter.HasErrors) + { + return new CompilationResult(false, "预处理失败"); + } + + // 1. 词法分析 + var lexer = new Lexer(preprocessedSource, options.SourceFile, _errorReporter); + var tokens = lexer.Tokenize().ToList(); + + if (_errorReporter.HasErrors) + { + return new CompilationResult(false, "词法分析失败"); + } + + // 2. 语法分析 + var parser = new Parser(tokens, _errorReporter); + var ast = parser.Parse(); + + if (_errorReporter.HasErrors) + { + return new CompilationResult(false, "语法分析失败"); + } + + // 3. 语义分析 + var semanticAnalyzer = new SemanticAnalyzer(_errorReporter); + semanticAnalyzer.Analyze(ast); + + if (_errorReporter.HasErrors) + { + return new CompilationResult(false, "语义分析失败"); + } + + // 4. IR 生成 + var irGen = new IrGenerator(); + var ir = irGen.Generate(ast); + + // 5. 代码生成 + var codeGen = new X64CodeGenerator(); + var machineCode = codeGen.Generate(ir); + + // 6. 生成可执行文件 + var outputFile = options.OutputFile ?? GetDefaultOutputName(options.SourceFile); + var outputDir = Path.GetDirectoryName(outputFile); + if (!string.IsNullOrEmpty(outputDir) && !Directory.Exists(outputDir)) + { + Directory.CreateDirectory(outputDir); + } + + byte[] executable; + if (options.Platform == TargetPlatform.WindowsX64 || options.Platform == TargetPlatform.WindowsX86) + { + var peWriter = new PeWriter(); + executable = peWriter.WriteExecutable(machineCode, null, "main"); + } + else + { + var elfWriter = new ElfWriter(); + executable = elfWriter.WriteExecutable(machineCode, "main"); + } + + File.WriteAllBytes(outputFile, executable); + + return new CompilationResult(true, outputFile); + } + catch (Exception ex) + { + return new CompilationResult(false, $"编译失败: {ex.Message}\n{ex.StackTrace}"); + } + } + + private string GetDefaultOutputName(string sourceFile) + { + var nameWithoutExt = Path.ChangeExtension(sourceFile, null) ?? "a.out"; + // 根据平台决定扩展名 + return "a.out"; // Linux 默认 + } +} + +/// +/// 编译选项 +/// +public sealed record CompilationOptions( + string SourceFile, + string? OutputFile = null, + TargetPlatform Platform = TargetPlatform.LinuxX64, + List? IncludePaths = null +) +{ + public List IncludePaths { get; } = IncludePaths ?? new List(); +} + +/// +/// 编译结果 +/// +public sealed record CompilationResult( + bool Success, + string Message +); + +/// +/// 目标架构 +/// +public enum TargetArchitecture +{ + X86, + X64 +} + +/// +/// 目标平台 +/// +public enum TargetPlatform +{ + LinuxX64, + LinuxX86, + WindowsX64, + WindowsX86 +} diff --git a/src/TinyCC.Core/Diagnostics/ErrorInfo.cs b/src/TinyCC.Core/Diagnostics/ErrorInfo.cs new file mode 100644 index 0000000..44c412c --- /dev/null +++ b/src/TinyCC.Core/Diagnostics/ErrorInfo.cs @@ -0,0 +1,83 @@ +namespace TinyCC.Core; + +/// +/// 错误级别 +/// +public enum ErrorLevel +{ + Warning, + Error, + Fatal +} + +/// +/// 源代码位置信息 +/// +public readonly struct SourceLocation : IEquatable +{ + public string FileName { get; } + public int Line { get; } + public int Column { get; } + + public SourceLocation(string fileName, int line, int column) + { + FileName = fileName; + Line = line; + Column = column; + } + + public bool Equals(SourceLocation other) + { + return FileName == other.FileName && Line == other.Line && Column == other.Column; + } + + public override bool Equals(object? obj) + { + return obj is SourceLocation other && Equals(other); + } + + public override int GetHashCode() + { + return HashCode.Combine(FileName, Line, Column); + } + + public override string ToString() + { + return $"{FileName}({Line}:{Column})"; + } + + public static bool operator ==(SourceLocation left, SourceLocation right) + { + return left.Equals(right); + } + + public static bool operator !=(SourceLocation left, SourceLocation right) + { + return !(left == right); + } +} + +/// +/// 错误信息 +/// +public record ErrorInfo( + ErrorLevel Level, + string Message, + SourceLocation Location, + string? Suggestion = null +) +{ + public override string ToString() + { + var prefix = Level switch + { + ErrorLevel.Warning => "warning", + ErrorLevel.Error => "error", + ErrorLevel.Fatal => "fatal error", + _ => "unknown" + }; + + var suggestionText = Suggestion != null ? $"\n hint: {Suggestion}" : ""; + return $"{Location}: {prefix}: {Message}{suggestionText}"; + } +} diff --git a/src/TinyCC.Core/Diagnostics/ErrorReporter.cs b/src/TinyCC.Core/Diagnostics/ErrorReporter.cs new file mode 100644 index 0000000..27278b5 --- /dev/null +++ b/src/TinyCC.Core/Diagnostics/ErrorReporter.cs @@ -0,0 +1,27 @@ +namespace TinyCC.Core; + +/// +/// 默认错误报告器实现 +/// +public sealed class ErrorReporter : IErrorReporter +{ + private readonly List _errors = new(); + + public bool HasErrors => _errors.Any(e => e.Level is ErrorLevel.Error or ErrorLevel.Fatal); + + public void Report(ErrorInfo error) + { + _errors.Add(error); + Console.Error.WriteLine(error.ToString()); + } + + public IEnumerable GetErrors() + { + return _errors.AsReadOnly(); + } + + public void Clear() + { + _errors.Clear(); + } +} diff --git a/src/TinyCC.Core/Diagnostics/IErrorReporter.cs b/src/TinyCC.Core/Diagnostics/IErrorReporter.cs new file mode 100644 index 0000000..ec6294d --- /dev/null +++ b/src/TinyCC.Core/Diagnostics/IErrorReporter.cs @@ -0,0 +1,12 @@ +namespace TinyCC.Core; + +/// +/// 错误报告器接口 +/// +public interface IErrorReporter +{ + void Report(ErrorInfo error); + bool HasErrors { get; } + IEnumerable GetErrors(); + void Clear(); +} diff --git a/src/TinyCC.Core/IR/IrGenerator.cs b/src/TinyCC.Core/IR/IrGenerator.cs new file mode 100644 index 0000000..377333e --- /dev/null +++ b/src/TinyCC.Core/IR/IrGenerator.cs @@ -0,0 +1,374 @@ +using System; +using System.Collections.Generic; +using TinyCC.Core; + +namespace TinyCC.Core; + +/// +/// IR 生成器 +/// +public sealed class IrGenerator +{ + private int _tempCounter; + private int _labelCounter; + private readonly List _functions = new(); + private List _currentInstructions = new(); + private List _currentLocals = new(); + + public IrProgram Generate(ProgramNode program) + { + foreach (var decl in program.Declarations) + { + if (decl is FunctionDeclarationNode func) + { + GenerateFunction(func); + } + } + + return new IrProgram(_functions); + } + + private void GenerateFunction(FunctionDeclarationNode func) + { + _currentInstructions = new List(); + _currentLocals = new List(); + + // 为参数分配栈空间 + foreach (var param in func.Parameters) + { + _currentLocals.Add(new IrVariable(param.Name, GetTypeName(param.Type), 8)); + } + + // 生成函数体 + GenerateBlock(func.Body); + + // 如果没有 return 语句,隐式添加 return + if (_currentInstructions.Count == 0 || _currentInstructions[^1] is not IrReturn) + { + _currentInstructions.Add(new IrReturn(null)); + } + + var function = new IrFunction(func.Name, new List + { + new IrBasicBlock($"func_{func.Name}", _currentInstructions) + }, _currentLocals, func.Parameters.Count); + + _functions.Add(function); + } + + private void GenerateBlock(BlockStatementNode block) + { + foreach (var stmt in block.Statements) + { + switch (stmt) + { + case StatementNode statement: + GenerateStatement(statement); + break; + case VariableDeclarationNode varDecl: + GenerateLocalVariableDeclaration(varDecl); + break; + } + } + } + + private void GenerateLocalVariableDeclaration(VariableDeclarationNode varDecl) + { + // 为局部变量分配栈空间 + var local = new IrVariable(varDecl.Name, GetTypeName(varDecl.Type), 8); + _currentLocals.Add(local); + + // 如果有初始化器,生成赋值代码 + if (varDecl.Initializer != null) + { + var value = GenerateExpression(varDecl.Initializer); + _currentInstructions.Add(new IrStore(new IrLocal(varDecl.Name, GetTypeName(varDecl.Type), 0), value)); + } + } + + private void GenerateStatement(StatementNode stmt) + { + switch (stmt) + { + case BlockStatementNode block: + GenerateBlock(block); + break; + + case ExpressionStatementNode exprStmt: + GenerateExpression(exprStmt.Expression); + break; + + case ReturnStatementNode returnStmt: + if (returnStmt.Expression != null) + { + var value = GenerateExpression(returnStmt.Expression); + _currentInstructions.Add(new IrReturn(value)); + } + else + { + _currentInstructions.Add(new IrReturn(null)); + } + break; + + case IfStatementNode ifStmt: + GenerateIfStatement(ifStmt); + break; + + case WhileStatementNode whileStmt: + GenerateWhileStatement(whileStmt); + break; + + case ForStatementNode forStmt: + GenerateForStatement(forStmt); + break; + + case BreakStatementNode: + _currentInstructions.Add(new IrJump(GetBreakLabel())); + break; + + case ContinueStatementNode: + _currentInstructions.Add(new IrJump(GetContinueLabel())); + break; + } + + // 处理变量声明(在表达式语句内部) + if (stmt is ExpressionStatementNode exprStmt2 && exprStmt2.Expression is AssignmentExpressionNode assign) + { + // 这已经在 GenerateExpression 中处理了 + } + } + + private void GenerateIfStatement(IfStatementNode ifStmt) + { + var elseLabel = NewLabel("else"); + var endLabel = NewLabel("endif"); + + var condition = GenerateExpression(ifStmt.Condition); + _currentInstructions.Add(new IrBranch(condition, endLabel, elseLabel)); + + GenerateStatement(ifStmt.ThenBranch); + _currentInstructions.Add(new IrJump(endLabel)); + + _currentInstructions.Add(new IrLabel(elseLabel)); + if (ifStmt.ElseBranch != null) + { + GenerateStatement(ifStmt.ElseBranch); + } + else + { + // No else branch: elseLabel is at the same position as endLabel. + // To prevent IrJump(endLabel) from having a 0 offset, add a nop. + _currentInstructions.Add(new IrNop()); + } + + _currentInstructions.Add(new IrLabel(endLabel)); + } + + private void GenerateWhileStatement(WhileStatementNode whileStmt) + { + var startLabel = NewLabel("while_start"); + var endLabel = NewLabel("while_end"); + + PushLoopLabels(endLabel, startLabel); + + _currentInstructions.Add(new IrLabel(startLabel)); + var condition = GenerateExpression(whileStmt.Condition); + _currentInstructions.Add(new IrBranch(condition, startLabel, endLabel)); + + GenerateStatement(whileStmt.Body); + _currentInstructions.Add(new IrJump(startLabel)); + + _currentInstructions.Add(new IrLabel(endLabel)); + PopLoopLabels(); + } + + private void GenerateForStatement(ForStatementNode forStmt) + { + var startLabel = NewLabel("for_start"); + var condLabel = NewLabel("for_cond"); + var incLabel = NewLabel("for_inc"); + var endLabel = NewLabel("for_end"); + + // 初始化 + if (forStmt.Init != null) + { + switch (forStmt.Init) + { + case StatementNode statement: + GenerateStatement(statement); + break; + case VariableDeclarationNode varDecl: + GenerateLocalVariableDeclaration(varDecl); + break; + } + } + + // 条件判断 + _currentInstructions.Add(new IrLabel(condLabel)); + PushLoopLabels(endLabel, incLabel); + + if (forStmt.Condition != null) + { + var condition = GenerateExpression(forStmt.Condition); + _currentInstructions.Add(new IrBranch(condition, incLabel, endLabel)); + } + + _currentInstructions.Add(new IrLabel(startLabel)); + GenerateStatement(forStmt.Body); + + // 增量 + _currentInstructions.Add(new IrLabel(incLabel)); + if (forStmt.Increment != null) + { + GenerateExpression(forStmt.Increment); + } + _currentInstructions.Add(new IrJump(condLabel)); + + _currentInstructions.Add(new IrLabel(endLabel)); + PopLoopLabels(); + } + + private void GenerateVariableDeclaration(VariableDeclarationNode varDecl) + { + var local = new IrVariable(varDecl.Name, GetTypeName(varDecl.Type), 8); + _currentLocals.Add(local); + + if (varDecl.Initializer != null) + { + var value = GenerateExpression(varDecl.Initializer); + _currentInstructions.Add(new IrStore(new IrLocal(varDecl.Name, GetTypeName(varDecl.Type), 0), value)); + } + } + + private IrValue GenerateExpression(ExpressionNode expr) + { + return expr switch + { + LiteralExpressionNode lit => new IrConstant(lit.Value, GetLiteralTypeName(lit.Type)), + IdentifierExpressionNode id => new IrLocal(id.Name, "int", 0), + BinaryExpressionNode binary => GenerateBinaryExpression(binary), + UnaryExpressionNode unary => GenerateUnaryExpression(unary), + AssignmentExpressionNode assign => GenerateAssignmentExpression(assign), + FunctionCallExpressionNode call => GenerateFunctionCall(call), + _ => throw new NotSupportedException($"Unsupported expression type: {expr.GetType()}") + }; + } + + private IrValue GenerateBinaryExpression(BinaryExpressionNode binary) + { + var left = GenerateExpression(binary.Left); + var right = GenerateExpression(binary.Right); + var dest = NewTemp(); + + var op = binary.Operator switch + { + TokenType.Plus => IrBinaryOpType.Add, + TokenType.Minus => IrBinaryOpType.Sub, + TokenType.Star => IrBinaryOpType.Mul, + TokenType.Slash => IrBinaryOpType.Div, + TokenType.Percent => IrBinaryOpType.Mod, + TokenType.BitAnd => IrBinaryOpType.And, + TokenType.BitOr => IrBinaryOpType.Or, + TokenType.BitXor => IrBinaryOpType.Xor, + TokenType.LeftShift => IrBinaryOpType.Shl, + TokenType.RightShift => IrBinaryOpType.Shr, + TokenType.Equal => IrBinaryOpType.Eq, + TokenType.NotEqual => IrBinaryOpType.Ne, + TokenType.Less => IrBinaryOpType.Lt, + TokenType.Greater => IrBinaryOpType.Gt, + TokenType.LessEqual => IrBinaryOpType.Le, + TokenType.GreaterEqual => IrBinaryOpType.Ge, + _ => throw new NotSupportedException($"Unsupported binary operator: {binary.Operator}") + }; + + _currentInstructions.Add(new IrBinaryOp(dest, op, left, right)); + return dest; + } + + private IrValue GenerateUnaryExpression(UnaryExpressionNode unary) + { + var source = GenerateExpression(unary.Expression); + var dest = NewTemp(); + + var op = unary.Operator switch + { + TokenType.Minus => IrUnaryOpType.Neg, + TokenType.Not => IrUnaryOpType.Not, + TokenType.BitNot => IrUnaryOpType.BitNot, + _ => throw new NotSupportedException($"Unsupported unary operator: {unary.Operator}") + }; + + _currentInstructions.Add(new IrUnaryOp(dest, op, source)); + return dest; + } + + private IrValue GenerateAssignmentExpression(AssignmentExpressionNode assign) + { + var value = GenerateExpression(assign.Right); + + if (assign.Left is IdentifierExpressionNode id) + { + _currentInstructions.Add(new IrStore(new IrLocal(id.Name, "int", 0), value)); + return value; + } + + throw new NotSupportedException("Unsupported assignment target"); + } + + private IrValue GenerateFunctionCall(FunctionCallExpressionNode call) + { + var args = new List(); + foreach (var arg in call.Arguments) + { + args.Add(GenerateExpression(arg)); + } + + var dest = new IrTemp($"t{++_tempCounter}", "int"); + _currentInstructions.Add(new IrCall(dest, call.FunctionName, args)); + return dest; + } + + private IrTemp NewTemp() => new($"t{++_tempCounter}", "int"); + private string NewLabel(string prefix) => $"{prefix}_{++_labelCounter}"; + + private string _breakLabel = ""; + private string _continueLabel = ""; + + private void PushLoopLabels(string breakLabel, string continueLabel) + { + _breakLabel = breakLabel; + _continueLabel = continueLabel; + } + + private void PopLoopLabels() + { + _breakLabel = ""; + _continueLabel = ""; + } + + private string GetBreakLabel() => _breakLabel; + private string GetContinueLabel() => _continueLabel; + + private string GetTypeName(TypeNode type) + { + return type switch + { + PrimitiveTypeNode p => p.TypeName, + PointerTypeNode => "pointer", + ArrayTypeNode => "array", + _ => "unknown" + }; + } + + private string GetLiteralTypeName(TokenType type) + { + return type switch + { + TokenType.IntLiteral => "int", + TokenType.FloatLiteral => "double", + TokenType.CharLiteral => "char", + TokenType.StringLiteral => "string", + _ => "unknown" + }; + } +} diff --git a/src/TinyCC.Core/IR/IrInstructions.cs b/src/TinyCC.Core/IR/IrInstructions.cs new file mode 100644 index 0000000..e780bc4 --- /dev/null +++ b/src/TinyCC.Core/IR/IrInstructions.cs @@ -0,0 +1,130 @@ +namespace TinyCC.Core; + +/// +/// IR 指令基类 +/// +public abstract record IrInstruction; + +/// +/// 二元运算指令 +/// +public sealed record IrBinaryOp(IrValue Dest, IrBinaryOpType Op, IrValue Left, IrValue Right) : IrInstruction; + +/// +/// 一元运算指令 +/// +public sealed record IrUnaryOp(IrValue Dest, IrUnaryOpType Op, IrValue Source) : IrInstruction; + +/// +/// 加载指令(从内存加载到寄存器) +/// +public sealed record IrLoad(IrValue Dest, IrValue Address) : IrInstruction; + +/// +/// 存储指令(从寄存器存储到内存) +/// +public sealed record IrStore(IrValue Address, IrValue Value) : IrInstruction; + +/// +/// 函数调用指令 +/// +public sealed record IrCall(IrValue? Dest, string FunctionName, List Arguments) : IrInstruction; + +/// +/// 无条件跳转指令 +/// +public sealed record IrJump(string TargetLabel) : IrInstruction; + +/// +/// 条件分支指令 +/// +public sealed record IrBranch(IrValue Condition, string TrueLabel, string FalseLabel) : IrInstruction; + +/// +/// 返回指令 +/// +public sealed record IrReturn(IrValue? Value) : IrInstruction; + +/// +/// 标签指令 +/// +public sealed record IrLabel(string LabelName) : IrInstruction; + +/// +/// 赋值指令 +/// +public sealed record IrMove(IrValue Dest, IrValue Source) : IrInstruction; + +/// +/// 空操作指令 +/// +public sealed record IrNop() : IrInstruction; + +/// +/// 分配栈空间指令 +/// +public sealed record IrAllocStack(int Size, string VariableName) : IrInstruction; + +/// +/// IR 程序 +/// +public sealed record IrProgram(List Functions); + +/// +/// IR 函数 +/// +public sealed record IrFunction(string Name, List BasicBlocks, List Locals, int ParameterCount = 0); + +/// +/// 基本块 +/// +public sealed record IrBasicBlock(string Label, List Instructions); + +/// +/// IR 值 +/// +public abstract record IrValue; + +/// +/// 临时变量 +/// +public sealed record IrTemp(string Name, string TypeName) : IrValue; + +/// +/// 常量 +/// +public sealed record IrConstant(object Value, string TypeName) : IrValue; + +/// +/// 全局变量 +/// +public sealed record IrGlobal(string Name, string TypeName) : IrValue; + +/// +/// 局部变量 +/// +public sealed record IrLocal(string Name, string TypeName, int StackOffset) : IrValue; + +/// +/// IR 变量 +/// +public sealed record IrVariable(string Name, string TypeName, int Size); + +/// +/// 二元运算类型 +/// +public enum IrBinaryOpType +{ + Add, Sub, Mul, Div, Mod, + And, Or, Xor, + Shl, Shr, + Eq, Ne, Lt, Gt, Le, Ge +} + +/// +/// 一元运算类型 +/// +public enum IrUnaryOpType +{ + Neg, Not, BitNot, Deref +} diff --git a/src/TinyCC.Core/Lexer/Lexer.cs b/src/TinyCC.Core/Lexer/Lexer.cs new file mode 100644 index 0000000..ec15f46 --- /dev/null +++ b/src/TinyCC.Core/Lexer/Lexer.cs @@ -0,0 +1,356 @@ +using System.Text; + +namespace TinyCC.Core; + +/// +/// C 语言词法分析器 +/// +public sealed class Lexer +{ + private readonly IErrorReporter _errorReporter; + private readonly string _source; + private readonly string _fileName; + private int _position; + private int _line = 1; + private int _column = 1; + + private static readonly HashSet Keywords = new() + { + "int", "char", "float", "double", "long", "short", "void", + "if", "else", "while", "for", "do", "switch", "case", "default", + "break", "continue", "return", "struct", "union", "typedef", + "signed", "unsigned", "const", "static", "extern", "auto", "register", "volatile", + "sizeof" + }; + + public Lexer(string source, string fileName, IErrorReporter errorReporter) + { + _source = source; + _fileName = fileName; + _errorReporter = errorReporter; + } + + public IEnumerable Tokenize() + { + while (true) + { + var token = NextToken(); + yield return token; + + if (token.Type == TokenType.EOF || token.Type == TokenType.Error) + break; + } + } + + private Token NextToken() + { + SkipWhitespaceAndComments(); + + if (_position >= _source.Length) + return CreateToken(TokenType.EOF, "", null); + + var startLine = _line; + var startColumn = _column; + var ch = _source[_position]; + + // 标识符和关键字 + if (char.IsLetter(ch) || ch == '_') + return ReadIdentifierOrKeyword(); + + // 数字字面量 + if (char.IsDigit(ch)) + return ReadNumber(); + + // 字符字面量 + if (ch == '\'') + return ReadCharLiteral(); + + // 字符串字面量 + if (ch == '"') + return ReadStringLiteral(); + + // 运算符和分隔符 - 需要 Advance 后再创建 token + switch (ch) + { + case '+': + if (PeekNext() == '+') { Advance(); Advance(); return new Token(TokenType.Increment, "++", "++", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.PlusAssign, "+=", "+=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Plus, "+", "+", new SourceLocation(_fileName, startLine, startColumn)); + case '-': + if (PeekNext() == '-') { Advance(); Advance(); return new Token(TokenType.Decrement, "--", "--", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '>') { Advance(); Advance(); return new Token(TokenType.Arrow, "->", "->", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.MinusAssign, "-=", "-=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Minus, "-", "-", new SourceLocation(_fileName, startLine, startColumn)); + case '*': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.StarAssign, "*=", "*=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Star, "*", "*", new SourceLocation(_fileName, startLine, startColumn)); + case '/': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.SlashAssign, "/=", "/=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Slash, "/", "/", new SourceLocation(_fileName, startLine, startColumn)); + case '%': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.PercentAssign, "%=", "%=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Percent, "%", "%", new SourceLocation(_fileName, startLine, startColumn)); + case '=': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.Equal, "==", "==", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Assign, "=", "=", new SourceLocation(_fileName, startLine, startColumn)); + case '!': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.NotEqual, "!=", "!=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Not, "!", "!", new SourceLocation(_fileName, startLine, startColumn)); + case '<': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.LessEqual, "<=", "<=", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '<') { + if (PeekNext(1) == '=') { Advance(); Advance(); Advance(); return new Token(TokenType.LeftShiftAssign, "<<=", "<<=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); Advance(); return new Token(TokenType.LeftShift, "<<", "<<", new SourceLocation(_fileName, startLine, startColumn)); + } + Advance(); return new Token(TokenType.Less, "<", "<", new SourceLocation(_fileName, startLine, startColumn)); + case '>': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.GreaterEqual, ">=", ">=", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '>') { + if (PeekNext(1) == '=') { Advance(); Advance(); Advance(); return new Token(TokenType.RightShiftAssign, ">>=", ">>=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); Advance(); return new Token(TokenType.RightShift, ">>", ">>", new SourceLocation(_fileName, startLine, startColumn)); + } + Advance(); return new Token(TokenType.Greater, ">", ">", new SourceLocation(_fileName, startLine, startColumn)); + case '&': + if (PeekNext() == '&') { Advance(); Advance(); return new Token(TokenType.And, "&&", "&&", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.AndAssign, "&=", "&=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.BitAnd, "&", "&", new SourceLocation(_fileName, startLine, startColumn)); + case '|': + if (PeekNext() == '|') { Advance(); Advance(); return new Token(TokenType.Or, "||", "||", new SourceLocation(_fileName, startLine, startColumn)); } + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.OrAssign, "|=", "|=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.BitOr, "|", "|", new SourceLocation(_fileName, startLine, startColumn)); + case '^': + if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.XorAssign, "^=", "^=", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.BitXor, "^", "^", new SourceLocation(_fileName, startLine, startColumn)); + case '~': + Advance(); return new Token(TokenType.BitNot, "~", "~", new SourceLocation(_fileName, startLine, startColumn)); + case '(': + Advance(); return new Token(TokenType.LeftParen, "(", "(", new SourceLocation(_fileName, startLine, startColumn)); + case ')': + Advance(); return new Token(TokenType.RightParen, ")", ")", new SourceLocation(_fileName, startLine, startColumn)); + case '{': + Advance(); return new Token(TokenType.LeftBrace, "{", "{", new SourceLocation(_fileName, startLine, startColumn)); + case '}': + Advance(); return new Token(TokenType.RightBrace, "}", "}", new SourceLocation(_fileName, startLine, startColumn)); + case '[': + Advance(); return new Token(TokenType.LeftBracket, "[", "[", new SourceLocation(_fileName, startLine, startColumn)); + case ']': + Advance(); return new Token(TokenType.RightBracket, "]", "]", new SourceLocation(_fileName, startLine, startColumn)); + case ';': + Advance(); return new Token(TokenType.Semicolon, ";", ";", new SourceLocation(_fileName, startLine, startColumn)); + case ',': + Advance(); return new Token(TokenType.Comma, ",", ",", new SourceLocation(_fileName, startLine, startColumn)); + case ':': + Advance(); return new Token(TokenType.Colon, ":", ":", new SourceLocation(_fileName, startLine, startColumn)); + case '?': + Advance(); return new Token(TokenType.Question, "?", "?", new SourceLocation(_fileName, startLine, startColumn)); + case '.': + if (PeekNext() == '.' && PeekNext(1) == '.') { Advance(); Advance(); Advance(); return new Token(TokenType.Ellipsis, "...", "...", new SourceLocation(_fileName, startLine, startColumn)); } + Advance(); return new Token(TokenType.Dot, ".", ".", new SourceLocation(_fileName, startLine, startColumn)); + case '#': + Advance(); return new Token(TokenType.Hash, "#", "#", new SourceLocation(_fileName, startLine, startColumn)); + default: + Advance(); + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Unexpected character '{ch}'", + new SourceLocation(_fileName, startLine, startColumn) + )); + return new Token(TokenType.Error, ch.ToString(), null, new SourceLocation(_fileName, startLine, startColumn)); + } + } + + private Token ReadIdentifierOrKeyword() + { + var startLine = _line; + var startColumn = _column; + var sb = new StringBuilder(); + + while (_position < _source.Length && + (char.IsLetterOrDigit(_source[_position]) || _source[_position] == '_')) + { + sb.Append(_source[_position]); + Advance(); + } + + var lexeme = sb.ToString(); + if (Keywords.Contains(lexeme)) + { + var type = Enum.Parse(char.ToUpper(lexeme[0]) + lexeme[1..], true); + return new Token(type, lexeme, lexeme, new SourceLocation(_fileName, startLine, startColumn)); + } + + return new Token(TokenType.Identifier, lexeme, lexeme, new SourceLocation(_fileName, startLine, startColumn)); + } + + private Token ReadNumber() + { + var startLine = _line; + var startColumn = _column; + var sb = new StringBuilder(); + var isFloat = false; + + while (_position < _source.Length && (char.IsDigit(_source[_position]) || _source[_position] == '.')) + { + if (_source[_position] == '.') isFloat = true; + sb.Append(_source[_position]); + Advance(); + } + + var lexeme = sb.ToString(); + object value = isFloat ? double.Parse(lexeme) : long.Parse(lexeme); + var type = isFloat ? TokenType.FloatLiteral : TokenType.IntLiteral; + + return new Token(type, lexeme, value, new SourceLocation(_fileName, startLine, startColumn)); + } + + private Token ReadCharLiteral() + { + var startLine = _line; + var startColumn = _column; + Advance(); // 跳过 ' + + if (_position >= _source.Length || _source[_position] == '\'') + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Empty character literal", + new SourceLocation(_fileName, startLine, startColumn) + )); + return CreateToken(TokenType.Error, "''", null); + } + + var ch = _source[_position]; + Advance(); + + if (_position >= _source.Length || _source[_position] != '\'') + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Unterminated character literal", + new SourceLocation(_fileName, startLine, startColumn) + )); + return CreateToken(TokenType.Error, ch.ToString(), null); + } + + Advance(); // 跳过结束 ' + return new Token(TokenType.CharLiteral, $"'{ch}'", ch, new SourceLocation(_fileName, startLine, startColumn)); + } + + private Token ReadStringLiteral() + { + var startLine = _line; + var startColumn = _column; + var sb = new StringBuilder(); + Advance(); // 跳过 " + + while (_position < _source.Length && _source[_position] != '"') + { + if (_source[_position] == '\\') + { + Advance(); + if (_position < _source.Length) + { + var escaped = _source[_position] switch + { + 'n' => '\n', + 't' => '\t', + 'r' => '\r', + '\\' => '\\', + '"' => '"', + '\'' => '\'', + '0' => '\0', + _ => _source[_position] + }; + sb.Append(escaped); + } + } + else + { + sb.Append(_source[_position]); + } + Advance(); + } + + if (_position >= _source.Length) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Unterminated string literal", + new SourceLocation(_fileName, startLine, startColumn) + )); + return CreateToken(TokenType.Error, sb.ToString(), null); + } + + Advance(); // 跳过结束 " + var value = sb.ToString(); + return new Token(TokenType.StringLiteral, $"\"{value}\"", value, new SourceLocation(_fileName, startLine, startColumn)); + } + + private void SkipWhitespaceAndComments() + { + while (_position < _source.Length) + { + if (char.IsWhiteSpace(_source[_position])) + { + Advance(); + continue; + } + + // 单行注释 // + if (_source[_position] == '/' && PeekNext() == '/') + { + while (_position < _source.Length && _source[_position] != '\n') + Advance(); + continue; + } + + // 多行注释 /* */ + if (_source[_position] == '/' && PeekNext() == '*') + { + Advance(); // / + Advance(); // * + while (_position < _source.Length) + { + if (_source[_position] == '*' && PeekNext() == '/') + { + Advance(); // * + Advance(); // / + break; + } + Advance(); + } + continue; + } + + break; + } + } + + private char PeekNext(int offset = 1) + { + var pos = _position + offset; + return pos < _source.Length ? _source[pos] : '\0'; + } + + private void Advance() + { + if (_position < _source.Length) + { + if (_source[_position] == '\n') + { + _line++; + _column = 1; + } + else + { + _column++; + } + _position++; + } + } + + private Token CreateToken(TokenType type, string lexeme, object? value) + { + return new Token(type, lexeme, value, new SourceLocation(_fileName, _line, _column)); + } +} diff --git a/src/TinyCC.Core/Lexer/Token.cs b/src/TinyCC.Core/Lexer/Token.cs new file mode 100644 index 0000000..f8a0141 --- /dev/null +++ b/src/TinyCC.Core/Lexer/Token.cs @@ -0,0 +1,19 @@ +namespace TinyCC.Core; + +/// +/// Token 记录 +/// +public sealed record Token( + TokenType Type, + string Lexeme, + object? Value, + SourceLocation Location +) +{ + public override string ToString() + { + return Value != null + ? $"{Type}('{Lexeme}' = {Value})" + : $"{Type}('{Lexeme}')"; + } +} diff --git a/src/TinyCC.Core/Lexer/TokenType.cs b/src/TinyCC.Core/Lexer/TokenType.cs new file mode 100644 index 0000000..db51407 --- /dev/null +++ b/src/TinyCC.Core/Lexer/TokenType.cs @@ -0,0 +1,41 @@ +namespace TinyCC.Core; + +/// +/// Token 类型枚举 +/// +public enum TokenType +{ + // 关键字 + Int, Char, Float, Double, Long, Short, Void, + If, Else, While, For, Do, Switch, Case, Default, + Break, Continue, Return, Struct, Union, Typedef, + Signed, Unsigned, Const, Static, Extern, Auto, Register, Volatile, + Sizeof, + + // 字面量 + IntLiteral, FloatLiteral, CharLiteral, StringLiteral, + + // 标识符 + Identifier, + + // 运算符 + Plus, Minus, Star, Slash, Percent, + Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual, + Assign, PlusAssign, MinusAssign, StarAssign, SlashAssign, PercentAssign, + And, Or, Not, BitAnd, BitOr, BitXor, BitNot, + LeftShift, RightShift, + AndAssign, OrAssign, XorAssign, LeftShiftAssign, RightShiftAssign, + Increment, Decrement, + Arrow, Dot, + + // 分隔符 + LeftParen, RightParen, LeftBrace, RightBrace, + LeftBracket, RightBracket, + Semicolon, Comma, Colon, Question, Ellipsis, + + // 预处理器 + Hash, + + // 特殊 + EOF, Error +} diff --git a/src/TinyCC.Core/Parser/AstNodes.cs b/src/TinyCC.Core/Parser/AstNodes.cs new file mode 100644 index 0000000..a1df8fd --- /dev/null +++ b/src/TinyCC.Core/Parser/AstNodes.cs @@ -0,0 +1,119 @@ +namespace TinyCC.Core; + +/// +/// AST 节点基类 +/// +public abstract record AstNode(SourceLocation Location); + +/// +/// 程序节点 +/// +public sealed record ProgramNode(List Declarations, SourceLocation Location) : AstNode(Location); + +/// +/// 类型节点 +/// +public abstract record TypeNode(SourceLocation Location) : AstNode(Location); +public sealed record PrimitiveTypeNode(string TypeName, SourceLocation Location) : TypeNode(Location); +public sealed record PointerTypeNode(TypeNode BaseType, SourceLocation Location) : TypeNode(Location); +public sealed record ArrayTypeNode(TypeNode ElementType, int Size, SourceLocation Location) : TypeNode(Location); + +/// +/// 声明节点 +/// +public abstract record DeclarationNode(SourceLocation Location) : AstNode(Location); +public sealed record FunctionDeclarationNode( + TypeNode ReturnType, + string Name, + List Parameters, + BlockStatementNode Body, + SourceLocation Location +) : DeclarationNode(Location); + +public sealed record VariableDeclarationNode( + TypeNode Type, + string Name, + ExpressionNode? Initializer, + SourceLocation Location +) : DeclarationNode(Location); + +public sealed record ParameterNode( + TypeNode Type, + string Name, + SourceLocation Location +) : AstNode(Location); + +/// +/// 语句节点 +/// +public abstract record StatementNode(SourceLocation Location) : AstNode(Location); +public sealed record BlockStatementNode(List Statements, SourceLocation Location) : StatementNode(Location); +public sealed record ExpressionStatementNode(ExpressionNode Expression, SourceLocation Location) : StatementNode(Location); +public sealed record ReturnStatementNode(ExpressionNode? Expression, SourceLocation Location) : StatementNode(Location); +public sealed record IfStatementNode( + ExpressionNode Condition, + StatementNode ThenBranch, + StatementNode? ElseBranch, + SourceLocation Location +) : StatementNode(Location); + +public sealed record WhileStatementNode( + ExpressionNode Condition, + StatementNode Body, + SourceLocation Location +) : StatementNode(Location); + +public sealed record ForStatementNode( + AstNode? Init, + ExpressionNode? Condition, + ExpressionNode? Increment, + StatementNode Body, + SourceLocation Location +) : StatementNode(Location); + +public sealed record BreakStatementNode(SourceLocation Location) : StatementNode(Location); +public sealed record ContinueStatementNode(SourceLocation Location) : StatementNode(Location); + +/// +/// 表达式节点 +/// +public abstract record ExpressionNode(SourceLocation Location) : AstNode(Location); +public sealed record BinaryExpressionNode( + ExpressionNode Left, + TokenType Operator, + ExpressionNode Right, + SourceLocation Location +) : ExpressionNode(Location); + +public sealed record UnaryExpressionNode( + TokenType Operator, + ExpressionNode Expression, + SourceLocation Location +) : ExpressionNode(Location); + +public sealed record LiteralExpressionNode(object Value, TokenType Type, SourceLocation Location) : ExpressionNode(Location); +public sealed record IdentifierExpressionNode(string Name, SourceLocation Location) : ExpressionNode(Location); +public sealed record AssignmentExpressionNode( + ExpressionNode Left, + TokenType Operator, + ExpressionNode Right, + SourceLocation Location +) : ExpressionNode(Location); + +public sealed record FunctionCallExpressionNode( + string FunctionName, + List Arguments, + SourceLocation Location +) : ExpressionNode(Location); + +public sealed record MemberAccessExpressionNode( + ExpressionNode Target, + string MemberName, + SourceLocation Location +) : ExpressionNode(Location); + +public sealed record ArrayAccessExpressionNode( + ExpressionNode Array, + ExpressionNode Index, + SourceLocation Location +) : ExpressionNode(Location); diff --git a/src/TinyCC.Core/Parser/Parser.cs b/src/TinyCC.Core/Parser/Parser.cs new file mode 100644 index 0000000..96e0fdb --- /dev/null +++ b/src/TinyCC.Core/Parser/Parser.cs @@ -0,0 +1,656 @@ +namespace TinyCC.Core; + +/// +/// 递归下降解析器 +/// +public sealed class Parser +{ + private readonly IErrorReporter _errorReporter; + private readonly IEnumerator _tokens; + private Token _current; + + public Parser(IEnumerable tokens, IErrorReporter errorReporter) + { + _errorReporter = errorReporter; + _tokens = tokens.GetEnumerator(); + _tokens.MoveNext(); + _current = _tokens.Current; + } + + /// + /// 解析程序 + /// + public ProgramNode Parse() + { + var declarations = new List(); + var location = _current.Location; + + while (_current.Type != TokenType.EOF) + { + declarations.Add(ParseDeclaration()); + } + + return new ProgramNode(declarations, location); + } + + private AstNode ParseDeclaration() + { + // 函数声明 + if (IsTypeSpecifier(_current.Type)) + { + var type = ParseTypeSpecifier(); + var name = Consume(TokenType.Identifier, "Expected function or variable name"); + + if (_current.Type == TokenType.LeftParen) + { + return ParseFunctionDeclaration(type, name); + } + else + { + return ParseVariableDeclaration(type, name); + } + } + + // 错误恢复:跳过整个声明直到遇到分号或右大括号 + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Expected declaration, got {_current.Type}", + _current.Location + )); + + // 跳过直到遇到 ; 或 } 或 EOF + while (_current.Type is not (TokenType.Semicolon or TokenType.RightBrace or TokenType.EOF)) + { + Advance(); + } + + // 消耗终止符 + if (_current.Type is TokenType.Semicolon or TokenType.RightBrace) + Advance(); + + return new VariableDeclarationNode( + new PrimitiveTypeNode("int", _current.Location), + "error", + null, + _current.Location + ); + } + + private FunctionDeclarationNode ParseFunctionDeclaration(TypeNode returnType, Token nameToken) + { + var parameters = ParseParameterList(); + var body = ParseBlock(); + + return new FunctionDeclarationNode( + returnType, + nameToken.Lexeme, + parameters, + body, + nameToken.Location + ); + } + + private List ParseParameterList() + { + var parameters = new List(); + Consume(TokenType.LeftParen, "Expected '('"); + + if (_current.Type != TokenType.RightParen) + { + do + { + var type = ParseTypeSpecifier(); + var name = Consume(TokenType.Identifier, "Expected parameter name"); + parameters.Add(new ParameterNode(type, name.Lexeme, name.Location)); + + if (_current.Type == TokenType.Comma) + Advance(); + else + break; + } while (_current.Type != TokenType.RightParen); + } + + Consume(TokenType.RightParen, "Expected ')'"); + return parameters; + } + + private VariableDeclarationNode ParseVariableDeclaration(TypeNode type, Token nameToken) + { + ExpressionNode? initializer = null; + + if (_current.Type == TokenType.Assign) + { + Advance(); + initializer = ParseExpression(); + } + + Consume(TokenType.Semicolon, "Expected ';'"); + + return new VariableDeclarationNode( + type, + nameToken.Lexeme, + initializer, + nameToken.Location + ); + } + + private TypeNode ParseTypeSpecifier() + { + var location = _current.Location; + var typeName = _current.Lexeme; + Advance(); + + // 检查指针 + if (_current.Type == TokenType.Star) + { + Advance(); + var baseType = new PrimitiveTypeNode(typeName, location); + return new PointerTypeNode(baseType, location); + } + + return new PrimitiveTypeNode(typeName, location); + } + + private bool IsTypeSpecifier(TokenType type) + { + return type is TokenType.Int or TokenType.Char or TokenType.Float or TokenType.Double + or TokenType.Long or TokenType.Short or TokenType.Void or TokenType.Struct + or TokenType.Unsigned or TokenType.Signed; + } + + private BlockStatementNode ParseBlock() + { + var location = _current.Location; + Consume(TokenType.LeftBrace, "Expected '{'"); + var statements = new List(); + + while (_current.Type != TokenType.RightBrace && _current.Type != TokenType.EOF) + { + // 检查是否是局部变量声明(类型关键字后跟标识符或 *) + if (IsTypeKeyword(_current.Type)) + { + statements.Add(ParseLocalVariableDeclaration()); + } + else + { + statements.Add(ParseStatement()); + } + } + + Consume(TokenType.RightBrace, "Expected '}'"); + return new BlockStatementNode(statements, location); + } + + private bool IsTypeKeyword(TokenType tokenType) + { + return tokenType is TokenType.Int or TokenType.Char or TokenType.Float + or TokenType.Double or TokenType.Long or TokenType.Short + or TokenType.Void or TokenType.Unsigned or TokenType.Signed; + } + + private VariableDeclarationNode ParseLocalVariableDeclaration() + { + var type = ParseTypeSpecifier(); + var nameToken = _current; + Consume(TokenType.Identifier, "Expected identifier after type specifier"); + return ParseVariableDeclaration(type, nameToken); + } + + private StatementNode ParseStatement() + { + return _current.Type switch + { + TokenType.LeftBrace => ParseBlock(), + TokenType.If => ParseIfStatement(), + TokenType.While => ParseWhileStatement(), + TokenType.For => ParseForStatement(), + TokenType.Return => ParseReturnStatement(), + TokenType.Break => ParseBreakStatement(), + TokenType.Continue => ParseContinueStatement(), + _ => ParseExpressionStatement() + }; + } + + private IfStatementNode ParseIfStatement() + { + var location = _current.Location; + Advance(); // 跳过 if + Consume(TokenType.LeftParen, "Expected '('"); + var condition = ParseExpression(); + Consume(TokenType.RightParen, "Expected ')'"); + var thenBranch = ParseStatement(); + StatementNode? elseBranch = null; + + if (_current.Type == TokenType.Else) + { + Advance(); + elseBranch = ParseStatement(); + } + + return new IfStatementNode(condition, thenBranch, elseBranch, location); + } + + private WhileStatementNode ParseWhileStatement() + { + var location = _current.Location; + Advance(); // 跳过 while + Consume(TokenType.LeftParen, "Expected '('"); + var condition = ParseExpression(); + Consume(TokenType.RightParen, "Expected ')'"); + var body = ParseStatement(); + + return new WhileStatementNode(condition, body, location); + } + + private ForStatementNode ParseForStatement() + { + var location = _current.Location; + Advance(); // 跳过 for + Consume(TokenType.LeftParen, "Expected '('"); + + StatementNode? init = null; + if (_current.Type != TokenType.Semicolon) + { + init = _current.Type == TokenType.LeftBrace ? ParseBlock() : ParseExpressionStatement(); + } + else + { + Advance(); + } + + ExpressionNode? condition = null; + if (_current.Type != TokenType.Semicolon) + { + condition = ParseExpression(); + } + Consume(TokenType.Semicolon, "Expected ';'"); + + ExpressionNode? increment = null; + if (_current.Type != TokenType.RightParen) + { + increment = ParseExpression(); + } + Consume(TokenType.RightParen, "Expected ')'"); + + var body = ParseStatement(); + return new ForStatementNode(init, condition, increment, body, location); + } + + private ReturnStatementNode ParseReturnStatement() + { + var location = _current.Location; + Advance(); // 跳过 return + + ExpressionNode? expression = null; + if (_current.Type != TokenType.Semicolon) + { + expression = ParseExpression(); + } + Consume(TokenType.Semicolon, "Expected ';'"); + + return new ReturnStatementNode(expression, location); + } + + private BreakStatementNode ParseBreakStatement() + { + var location = _current.Location; + Advance(); + Consume(TokenType.Semicolon, "Expected ';'"); + return new BreakStatementNode(location); + } + + private ContinueStatementNode ParseContinueStatement() + { + var location = _current.Location; + Advance(); + Consume(TokenType.Semicolon, "Expected ';'"); + return new ContinueStatementNode(location); + } + + private ExpressionStatementNode ParseExpressionStatement() + { + var expression = ParseExpression(); + Consume(TokenType.Semicolon, "Expected ';'"); + return new ExpressionStatementNode(expression, expression.Location); + } + + private ExpressionNode ParseExpression() + { + return ParseAssignment(); + } + + private ExpressionNode ParseAssignment() + { + var left = ParseLogicalOr(); + + if (_current.Type is TokenType.Assign or TokenType.PlusAssign or TokenType.MinusAssign + or TokenType.StarAssign or TokenType.SlashAssign) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var right = ParseAssignment(); + return new AssignmentExpressionNode(left, op, right, location); + } + + return left; + } + + private ExpressionNode ParseLogicalOr() + { + var left = ParseLogicalAnd(); + + while (_current.Type == TokenType.Or) + { + var location = _current.Location; + Advance(); + var right = ParseLogicalAnd(); + left = new BinaryExpressionNode(left, TokenType.Or, right, location); + } + + return left; + } + + private ExpressionNode ParseLogicalAnd() + { + var left = ParseBitwiseOr(); + + while (_current.Type == TokenType.And) + { + var location = _current.Location; + Advance(); + var right = ParseBitwiseOr(); + left = new BinaryExpressionNode(left, TokenType.And, right, location); + } + + return left; + } + + private ExpressionNode ParseBitwiseOr() + { + var left = ParseBitwiseXor(); + + while (_current.Type == TokenType.BitOr) + { + var location = _current.Location; + Advance(); + var right = ParseBitwiseXor(); + left = new BinaryExpressionNode(left, TokenType.BitOr, right, location); + } + + return left; + } + + private ExpressionNode ParseBitwiseXor() + { + var left = ParseBitwiseAnd(); + + while (_current.Type == TokenType.BitXor) + { + var location = _current.Location; + Advance(); + var right = ParseBitwiseAnd(); + left = new BinaryExpressionNode(left, TokenType.BitXor, right, location); + } + + return left; + } + + private ExpressionNode ParseBitwiseAnd() + { + var left = ParseEquality(); + + while (_current.Type == TokenType.BitAnd) + { + var location = _current.Location; + Advance(); + var right = ParseEquality(); + left = new BinaryExpressionNode(left, TokenType.BitAnd, right, location); + } + + return left; + } + + private ExpressionNode ParseEquality() + { + var left = ParseRelational(); + + while (_current.Type is TokenType.Equal or TokenType.NotEqual) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var right = ParseRelational(); + left = new BinaryExpressionNode(left, op, right, location); + } + + return left; + } + + private ExpressionNode ParseRelational() + { + var left = ParseShift(); + + while (_current.Type is TokenType.Less or TokenType.Greater + or TokenType.LessEqual or TokenType.GreaterEqual) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var right = ParseShift(); + left = new BinaryExpressionNode(left, op, right, location); + } + + return left; + } + + private ExpressionNode ParseShift() + { + var left = ParseAdditive(); + + while (_current.Type is TokenType.LeftShift or TokenType.RightShift) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var right = ParseAdditive(); + left = new BinaryExpressionNode(left, op, right, location); + } + + return left; + } + + private ExpressionNode ParseAdditive() + { + var left = ParseMultiplicative(); + + while (_current.Type is TokenType.Plus or TokenType.Minus) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var right = ParseMultiplicative(); + left = new BinaryExpressionNode(left, op, right, location); + } + + return left; + } + + private ExpressionNode ParseMultiplicative() + { + var left = ParseUnary(); + + while (_current.Type is TokenType.Star or TokenType.Slash or TokenType.Percent) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var right = ParseUnary(); + left = new BinaryExpressionNode(left, op, right, location); + } + + return left; + } + + private ExpressionNode ParseUnary() + { + if (_current.Type is TokenType.Minus or TokenType.Not or TokenType.BitNot + or TokenType.Increment or TokenType.Decrement) + { + var op = _current.Type; + var location = _current.Location; + Advance(); + var operand = ParseUnary(); + return new UnaryExpressionNode(op, operand, location); + } + + return ParsePostfix(); + } + + private ExpressionNode ParsePostfix() + { + var expr = ParsePrimary(); + + while (true) + { + if (_current.Type == TokenType.LeftParen) + { + // 函数调用 + var location = _current.Location; + Advance(); + var args = new List(); + + if (_current.Type != TokenType.RightParen) + { + do + { + args.Add(ParseExpression()); + if (_current.Type == TokenType.Comma) + Advance(); + else + break; + } while (_current.Type != TokenType.RightParen); + } + + Consume(TokenType.RightParen, "Expected ')'"); + + if (expr is IdentifierExpressionNode id) + { + expr = new FunctionCallExpressionNode(id.Name, args, location); + } + } + else if (_current.Type == TokenType.LeftBracket) + { + // 数组访问 + var location = _current.Location; + Advance(); + var index = ParseExpression(); + Consume(TokenType.RightBracket, "Expected ']'"); + expr = new ArrayAccessExpressionNode(expr, index, location); + } + else if (_current.Type == TokenType.Dot || _current.Type == TokenType.Arrow) + { + // 成员访问 + var op = _current.Type; + var location = _current.Location; + Advance(); + var member = Consume(TokenType.Identifier, "Expected member name"); + expr = new MemberAccessExpressionNode(expr, member.Lexeme, location); + } + else + { + break; + } + } + + return expr; + } + + private ExpressionNode ParsePrimary() + { + var token = _current; + + if (token.Type == TokenType.Identifier) + { + Advance(); + return new IdentifierExpressionNode(token.Lexeme, token.Location); + } + if (token.Type == TokenType.IntLiteral) + { + Advance(); + return new LiteralExpressionNode(token.Value!, TokenType.IntLiteral, token.Location); + } + if (token.Type == TokenType.FloatLiteral) + { + Advance(); + return new LiteralExpressionNode(token.Value!, TokenType.FloatLiteral, token.Location); + } + if (token.Type == TokenType.CharLiteral) + { + Advance(); + return new LiteralExpressionNode(token.Value!, TokenType.CharLiteral, token.Location); + } + if (token.Type == TokenType.StringLiteral) + { + Advance(); + return new LiteralExpressionNode(token.Value!, TokenType.StringLiteral, token.Location); + } + if (token.Type == TokenType.LeftParen) + { + return ParseParenExpression(); + } + + throw new ParseException($"Unexpected token: {token.Type}", token.Location); + } + + private ExpressionNode ParseParenExpression() + { + var location = _current.Location; + Consume(TokenType.LeftParen, "Expected '('"); + var expr = ParseExpression(); + Consume(TokenType.RightParen, "Expected ')'"); + return expr; + } + + private Token Consume(TokenType type, string message) + { + if (_current.Type != type) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"{message}, got {_current.Type} instead", + _current.Location + )); + } + + var token = _current; + Advance(); + return token; + } + + private void Advance() + { + if (_tokens.MoveNext()) + { + _current = _tokens.Current; + } + } + + private void SkipToken() + { + Advance(); + } +} + +/// +/// 解析异常 +/// +public sealed class ParseException : Exception +{ + public SourceLocation Location { get; } + + public ParseException(string message, SourceLocation location) : base(message) + { + Location = location; + } +} diff --git a/src/TinyCC.Core/Preprocessor/Preprocessor.cs b/src/TinyCC.Core/Preprocessor/Preprocessor.cs new file mode 100644 index 0000000..cb0ae9d --- /dev/null +++ b/src/TinyCC.Core/Preprocessor/Preprocessor.cs @@ -0,0 +1,652 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; + +namespace TinyCC.Core; + +/// +/// C 预处理器 +/// 支持 #include, #define, #ifdef, #ifndef, #if, #else, #endif, #undef +/// +public sealed class Preprocessor +{ + private readonly IErrorReporter _errorReporter; + private readonly Dictionary _macros = new(); + private readonly List _includePaths = new(); + private readonly HashSet _includedFiles = new(); + private int _lineNumber; + + public Preprocessor(IErrorReporter errorReporter) + { + _errorReporter = errorReporter; + + // 预定义宏 + _macros["__LINE__"] = null; // 动态值 + _macros["__FILE__"] = null; // 动态值 + _macros["__DATE__"] = DateTime.Now.ToString("MMM dd yyyy"); + _macros["__TIME__"] = DateTime.Now.ToString("HH:mm:ss"); + _macros["__STDC__"] = "1"; + _macros["__STDC_VERSION__"] = "199901L"; + } + + /// + /// 添加头文件搜索路径 + /// + public void AddIncludePath(string path) + { + if (!string.IsNullOrEmpty(path) && Directory.Exists(path)) + { + _includePaths.Add(path); + } + } + + /// + /// 定义宏 + /// + public void DefineMacro(string name, string? value) + { + _macros[name] = value; + } + + /// + /// 取消定义宏 + /// + public void UndefineMacro(string name) + { + _macros.Remove(name); + } + + /// + /// 检查宏是否已定义 + /// + public bool IsMacroDefined(string name) + { + return _macros.ContainsKey(name); + } + + /// + /// 预处理源文件 + /// + public string Preprocess(string sourceFile) + { + _includedFiles.Clear(); + _lineNumber = 1; + return ProcessFile(sourceFile, new HashSet()); + } + + private string ProcessFile(string filePath, HashSet includeStack) + { + var fullPath = ResolveIncludePath(filePath); + if (fullPath == null) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Cannot find include file: {filePath}", + new SourceLocation(filePath, _lineNumber, 1) + )); + return ""; + } + + if (includeStack.Contains(fullPath)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Warning, + $"Circular include detected: {fullPath}", + new SourceLocation(fullPath, _lineNumber, 1) + )); + return ""; + } + + includeStack.Add(fullPath); + + if (_includedFiles.Contains(fullPath)) + { + return ""; // 防止重复包含 + } + + _includedFiles.Add(fullPath); + + if (!File.Exists(fullPath)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"File not found: {fullPath}", + new SourceLocation(fullPath, _lineNumber, 1) + )); + return ""; + } + + var source = File.ReadAllText(fullPath); + return ProcessSource(source, fullPath, includeStack); + } + + private string ProcessSource(string source, string fileName, HashSet includeStack) + { + var lines = source.Split('\n'); + var output = new StringBuilder(); + _lineNumber = 1; + + var conditionStack = new List { true }; + var conditionResolved = new List { true }; + + for (int i = 0; i < lines.Length; i++) + { + var line = lines[i].TrimEnd('\r'); + var trimmedLine = line.Trim(); + + if (trimmedLine.StartsWith("#")) + { + var directive = ParseDirective(trimmedLine); + + switch (directive.Name) + { + case "include": + if (conditionStack[^1]) + { + var includeFile = directive.Argument.Trim(' ', '"', '<', '>'); + var includedContent = ProcessFile(includeFile, new HashSet(includeStack)); + output.AppendLine(includedContent); + } + break; + + case "define": + if (conditionStack[^1]) + { + ProcessDefine(directive.Argument); + } + break; + + case "undef": + if (conditionStack[^1]) + { + UndefineMacro(directive.Argument.Trim()); + } + break; + + case "ifdef": + var isDefined = IsMacroDefined(directive.Argument.Trim()); + conditionStack.Add(isDefined && conditionStack[^1]); + conditionResolved.Add(true); + break; + + case "ifndef": + var isNotDefined = !IsMacroDefined(directive.Argument.Trim()); + conditionStack.Add(isNotDefined && conditionStack[^1]); + conditionResolved.Add(true); + break; + + case "if": + var condition = EvaluateCondition(directive.Argument.Trim()); + conditionStack.Add(condition && conditionStack[^1]); + conditionResolved.Add(true); + break; + + case "else": + if (conditionStack.Count > 1) + { + conditionStack[^1] = !conditionStack[^1] && conditionStack[^2]; + } + break; + + case "elif": + if (conditionStack.Count > 1) + { + var elifCondition = EvaluateCondition(directive.Argument.Trim()); + conditionStack[^1] = elifCondition && conditionStack[^2]; + } + break; + + case "endif": + if (conditionStack.Count > 1) + { + conditionStack.RemoveAt(conditionStack.Count - 1); + conditionResolved.RemoveAt(conditionResolved.Count - 1); + } + break; + + case "pragma": + // 忽略 pragma + break; + + case "error": + if (conditionStack[^1]) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + directive.Argument.Trim(), + new SourceLocation(fileName, _lineNumber, 1) + )); + } + break; + + case "warning": + if (conditionStack[^1]) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Warning, + directive.Argument.Trim(), + new SourceLocation(fileName, _lineNumber, 1) + )); + } + break; + + case "line": + // 处理 #line 指令 + break; + + default: + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Unknown preprocessor directive: {directive.Name}", + new SourceLocation(fileName, _lineNumber, 1) + )); + break; + } + } + else if (conditionStack[^1]) + { + // 展开宏 + var expandedLine = ExpandMacros(line, fileName); + output.AppendLine(expandedLine); + } + + _lineNumber++; + } + + return output.ToString(); + } + + private Directive ParseDirective(string line) + { + // 移除 # 前缀 + var content = line.Substring(1).Trim(); + + var spaceIndex = content.IndexOfAny(new[] { ' ', '\t' }); + if (spaceIndex < 0) + { + return new Directive(content, ""); + } + + var name = content.Substring(0, spaceIndex).Trim(); + var argument = content.Substring(spaceIndex).Trim(); + + // 移除行尾注释 + var commentIndex = argument.IndexOf("//"); + if (commentIndex >= 0) + { + argument = argument.Substring(0, commentIndex).TrimEnd(); + } + + return new Directive(name, argument); + } + + private record Directive(string Name, string Argument); + + private void ProcessDefine(string argument) + { + if (string.IsNullOrWhiteSpace(argument)) + { + return; + } + + var parts = argument.Split(new[] { ' ', '\t' }, 2); + var name = parts[0].Trim(); + + if (!IsValidIdentifier(name)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Invalid macro name: {name}", + new SourceLocation("", _lineNumber, 1) + )); + return; + } + + if (parts.Length > 1) + { + var value = parts[1].Trim(); + // 处理函数式宏 + if (value.StartsWith("(")) + { + // 简单处理:存储完整的宏定义 + _macros[name] = value; + } + else + { + _macros[name] = value; + } + } + else + { + _macros[name] = ""; + } + } + + private string ExpandMacros(string line, string fileName) + { + var result = new StringBuilder(); + var i = 0; + + while (i < line.Length) + { + // 检查字符串字面量 + if (line[i] == '"') + { + var strEnd = line.IndexOf('"', i + 1); + if (strEnd < 0) + { + result.Append(line.Substring(i)); + break; + } + result.Append(line.Substring(i, strEnd - i + 1)); + i = strEnd + 1; + continue; + } + + // 检查字符字面量 + if (line[i] == '\'') + { + var charEnd = line.IndexOf('\'', i + 1); + if (charEnd < 0) + { + result.Append(line.Substring(i)); + break; + } + result.Append(line.Substring(i, charEnd - i + 1)); + i = charEnd + 1; + continue; + } + + // 检查标识符 + if (char.IsLetter(line[i]) || line[i] == '_') + { + var start = i; + while (i < line.Length && (char.IsLetterOrDigit(line[i]) || line[i] == '_')) + { + i++; + } + + var identifier = line.Substring(start, i - start); + + // 特殊宏 + if (identifier == "__LINE__") + { + result.Append(_lineNumber); + } + else if (identifier == "__FILE__") + { + result.Append($"\"{fileName}\""); + } + else if (_macros.TryGetValue(identifier, out var value)) + { + result.Append(value ?? ""); + } + else + { + result.Append(identifier); + } + + continue; + } + + result.Append(line[i]); + i++; + } + + return result.ToString(); + } + + private bool EvaluateCondition(string expression) + { + expression = expression.Trim(); + + // 处理 defined() 和 defined + expression = Regex.Replace(expression, @"defined\s*\((\w+)\)", m => IsMacroDefined(m.Groups[1].Value) ? "1" : "0"); + expression = Regex.Replace(expression, @"defined\s+(\w+)", m => IsMacroDefined(m.Groups[1].Value) ? "1" : "0"); + + // 展开宏 + expression = ExpandMacros(expression, ""); + + // 简单的常量表达式求值 + try + { + // 移除 L 后缀 + expression = Regex.Replace(expression, @"(\d+)L", "$1"); + + // 处理十六进制 + expression = Regex.Replace(expression, @"0x([0-9a-fA-F]+)", m => Convert.ToInt64(m.Groups[1].Value, 16).ToString()); + + // 简单的算术表达式求值 + return EvaluateExpression(expression) != 0; + } + catch + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Warning, + $"Failed to evaluate condition: {expression}", + new SourceLocation("", _lineNumber, 1) + )); + return false; + } + } + + private long EvaluateExpression(string expression) + { + // 简单的表达式求值,支持 +, -, *, /, %, &&, ||, ==, !=, <, >, <=, >= + expression = expression.Trim(); + + if (long.TryParse(expression, out var value)) + { + return value; + } + + // 处理逻辑或 + var orIndex = expression.IndexOf("||"); + if (orIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, orIndex)); + var right = EvaluateExpression(expression.Substring(orIndex + 2)); + return (left != 0 || right != 0) ? 1 : 0; + } + + // 处理逻辑与 + var andIndex = expression.IndexOf("&&"); + if (andIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, andIndex)); + var right = EvaluateExpression(expression.Substring(andIndex + 2)); + return (left != 0 && right != 0) ? 1 : 0; + } + + // 处理相等比较 + var eqIndex = expression.IndexOf("=="); + if (eqIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, eqIndex)); + var right = EvaluateExpression(expression.Substring(eqIndex + 2)); + return left == right ? 1 : 0; + } + + // 处理不等比较 + var neIndex = expression.IndexOf("!="); + if (neIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, neIndex)); + var right = EvaluateExpression(expression.Substring(neIndex + 2)); + return left != right ? 1 : 0; + } + + // 处理小于等于 + var leIndex = expression.IndexOf("<="); + if (leIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, leIndex)); + var right = EvaluateExpression(expression.Substring(leIndex + 2)); + return left <= right ? 1 : 0; + } + + // 处理大于等于 + var geIndex = expression.IndexOf(">="); + if (geIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, geIndex)); + var right = EvaluateExpression(expression.Substring(geIndex + 2)); + return left >= right ? 1 : 0; + } + + // 处理小于 + var ltIndex = expression.IndexOf("<"); + if (ltIndex > 0 && ltIndex < expression.Length - 1) + { + var left = EvaluateExpression(expression.Substring(0, ltIndex)); + var right = EvaluateExpression(expression.Substring(ltIndex + 1)); + return left < right ? 1 : 0; + } + + // 处理大于 + var gtIndex = expression.IndexOf(">"); + if (gtIndex > 0 && gtIndex < expression.Length - 1) + { + var left = EvaluateExpression(expression.Substring(0, gtIndex)); + var right = EvaluateExpression(expression.Substring(gtIndex + 1)); + return left > right ? 1 : 0; + } + + // 处理加法 + var addIndex = expression.IndexOf("+"); + if (addIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, addIndex)); + var right = EvaluateExpression(expression.Substring(addIndex + 1)); + return left + right; + } + + // 处理减法 + var subIndex = expression.LastIndexOf("-"); + if (subIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, subIndex)); + var right = EvaluateExpression(expression.Substring(subIndex + 1)); + return left - right; + } + + // 处理乘法 + var mulIndex = expression.IndexOf("*"); + if (mulIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, mulIndex)); + var right = EvaluateExpression(expression.Substring(mulIndex + 1)); + return left * right; + } + + // 处理除法 + var divIndex = expression.IndexOf("/"); + if (divIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, divIndex)); + var right = EvaluateExpression(expression.Substring(divIndex + 1)); + if (right == 0) + { + throw new DivideByZeroException(); + } + return left / right; + } + + // 处理取模 + var modIndex = expression.IndexOf("%"); + if (modIndex > 0) + { + var left = EvaluateExpression(expression.Substring(0, modIndex)); + var right = EvaluateExpression(expression.Substring(modIndex + 1)); + if (right == 0) + { + throw new DivideByZeroException(); + } + return left % right; + } + + // 处理括号 + if (expression.StartsWith("(") && expression.EndsWith(")")) + { + return EvaluateExpression(expression.Substring(1, expression.Length - 2)); + } + + // 尝试解析为数字 + if (long.TryParse(expression, out var numValue)) + { + return numValue; + } + + // 未知表达式,返回 0 + return 0; + } + + private string? ResolveIncludePath(string fileName) + { + // 检查是否为绝对路径 + if (Path.IsPathRooted(fileName) && File.Exists(fileName)) + { + return Path.GetFullPath(fileName); + } + + // 检查当前目录 + if (File.Exists(fileName)) + { + return Path.GetFullPath(fileName); + } + + // 搜索 include 路径 + foreach (var includePath in _includePaths) + { + var fullPath = Path.Combine(includePath, fileName); + if (File.Exists(fullPath)) + { + return Path.GetFullPath(fullPath); + } + } + + // 检查系统 include 目录 + var systemPaths = new[] + { + "/usr/include", + "/usr/local/include", + "include" + }; + + foreach (var systemPath in systemPaths) + { + var fullPath = Path.Combine(systemPath, fileName); + if (File.Exists(fullPath)) + { + return Path.GetFullPath(fullPath); + } + } + + return null; + } + + private bool IsValidIdentifier(string name) + { + if (string.IsNullOrEmpty(name)) + { + return false; + } + + if (!char.IsLetter(name[0]) && name[0] != '_') + { + return false; + } + + for (int i = 1; i < name.Length; i++) + { + if (!char.IsLetterOrDigit(name[i]) && name[i] != '_') + { + return false; + } + } + + return true; + } +} diff --git a/src/TinyCC.Core/Semantic/SemanticAnalyzer.cs b/src/TinyCC.Core/Semantic/SemanticAnalyzer.cs new file mode 100644 index 0000000..ffa85d8 --- /dev/null +++ b/src/TinyCC.Core/Semantic/SemanticAnalyzer.cs @@ -0,0 +1,879 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace TinyCC.Core; + +/// +/// 语义分析器 +/// 负责类型检查、符号表管理和语义验证 +/// +public sealed class SemanticAnalyzer +{ + private readonly IErrorReporter _errorReporter; + private readonly SymbolTable _symbolTable; + private readonly TypeChecker _typeChecker; + private CType? _currentReturnType; + private bool _inLoop; + private bool _inSwitch; + + public SemanticAnalyzer(IErrorReporter errorReporter) + { + _errorReporter = errorReporter; + _symbolTable = new SymbolTable(errorReporter); + _typeChecker = new TypeChecker(errorReporter); + } + + /// + /// 分析 AST + /// + public void Analyze(AstNode root) + { + if (root is ProgramNode program) + { + AnalyzeProgram(program); + } + } + + private void AnalyzeProgram(ProgramNode program) + { + // 第一遍:收集所有函数和全局变量声明 + foreach (var decl in program.Declarations) + { + switch (decl) + { + case FunctionDeclarationNode func: + AnalyzeFunctionDeclaration(func); + break; + case VariableDeclarationNode varDecl: + AnalyzeGlobalVariableDeclaration(varDecl); + break; + } + } + + // 第二遍:分析函数体 + foreach (var decl in program.Declarations) + { + if (decl is FunctionDeclarationNode func) + { + AnalyzeFunctionBody(func); + } + } + } + + private void AnalyzeFunctionDeclaration(FunctionDeclarationNode func) + { + // 检查函数名是否重复 + if (_symbolTable.CurrentScopeExists(func.Name)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Function '{func.Name}' already declared", + func.Location + )); + return; + } + + // 创建函数类型 + var paramTypes = func.Parameters.Select(p => ParseType(p.Type)).ToList(); + var returnType = ParseType(func.ReturnType); + var funcType = new FunctionType(returnType, paramTypes); + + // 注册函数符号 + var funcSymbol = new FunctionSymbol(func.Name, funcType, func.Parameters, func.ReturnType); + _symbolTable.AddSymbol(func.Name, funcSymbol); + } + + private void AnalyzeFunctionBody(FunctionDeclarationNode func) + { + _symbolTable.EnterScope(); + _currentReturnType = ParseType(func.ReturnType); + + // 添加参数到符号表 + foreach (var param in func.Parameters) + { + var paramType = ParseType(param.Type); + var paramSymbol = new VariableSymbol(param.Name, paramType); + _symbolTable.AddSymbol(param.Name, paramSymbol); + } + + // 分析函数体 + AnalyzeBlock(func.Body); + + _symbolTable.ExitScope(); + _currentReturnType = null; + } + + private void AnalyzeGlobalVariableDeclaration(VariableDeclarationNode varDecl) + { + if (_symbolTable.CurrentScopeExists(varDecl.Name)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Variable '{varDecl.Name}' already declared", + varDecl.Location + )); + return; + } + + var type = ParseType(varDecl.Type); + var symbol = new VariableSymbol(varDecl.Name, type); + _symbolTable.AddSymbol(varDecl.Name, symbol); + + // 分析初始化器 + if (varDecl.Initializer != null) + { + var initType = AnalyzeExpression(varDecl.Initializer); + if (initType != null) + { + CheckAssignmentCompatibility(type, initType, varDecl.Initializer.Location); + } + } + } + + private void AnalyzeBlock(BlockStatementNode block) + { + _symbolTable.EnterScope(); + + foreach (var stmt in block.Statements) + { + switch (stmt) + { + case StatementNode statement: + AnalyzeStatement(statement); + break; + case VariableDeclarationNode varDecl: + AnalyzeLocalVariableDeclaration(varDecl); + break; + } + } + + _symbolTable.ExitScope(); + } + + private void AnalyzeStatement(StatementNode stmt) + { + switch (stmt) + { + case BlockStatementNode block: + AnalyzeBlock(block); + break; + + case ExpressionStatementNode exprStmt: + AnalyzeExpression(exprStmt.Expression); + break; + + case ReturnStatementNode returnStmt: + AnalyzeReturnStatement(returnStmt); + break; + + case IfStatementNode ifStmt: + AnalyzeIfStatement(ifStmt); + break; + + case WhileStatementNode whileStmt: + AnalyzeWhileStatement(whileStmt); + break; + + case ForStatementNode forStmt: + AnalyzeForStatement(forStmt); + break; + + case BreakStatementNode breakStmt: + if (!_inLoop && !_inSwitch) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "'break' statement not within loop or switch", + breakStmt.Location + )); + } + break; + + case ContinueStatementNode continueStmt: + if (!_inLoop) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "'continue' statement not within loop", + continueStmt.Location + )); + } + break; + } + } + + private void AnalyzeLocalVariableDeclaration(VariableDeclarationNode varDecl) + { + if (_symbolTable.CurrentScopeExists(varDecl.Name)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Variable '{varDecl.Name}' already declared in this scope", + varDecl.Location + )); + return; + } + + var type = ParseType(varDecl.Type); + var symbol = new VariableSymbol(varDecl.Name, type); + _symbolTable.AddSymbol(varDecl.Name, symbol); + + // 分析初始化器 + if (varDecl.Initializer != null) + { + var initType = AnalyzeExpression(varDecl.Initializer); + if (initType != null) + { + CheckAssignmentCompatibility(type, initType, varDecl.Initializer.Location); + } + } + } + + private void AnalyzeReturnStatement(ReturnStatementNode returnStmt) + { + if (_currentReturnType == null) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Return statement outside of function", + returnStmt.Location + )); + return; + } + + if (returnStmt.Expression != null) + { + var exprType = AnalyzeExpression(returnStmt.Expression); + if (exprType != null) + { + CheckAssignmentCompatibility(_currentReturnType, exprType, returnStmt.Location); + } + } + else + { + // void 函数不应该有返回值 + if (_currentReturnType is not VoidType) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Non-void function must return a value", + returnStmt.Location + )); + } + } + } + + private void AnalyzeIfStatement(IfStatementNode ifStmt) + { + var condType = AnalyzeExpression(ifStmt.Condition); + if (condType != null && !IsIntegerType(condType)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Condition of 'if' must be an integer type", + ifStmt.Condition.Location + )); + } + + AnalyzeStatement(ifStmt.ThenBranch); + if (ifStmt.ElseBranch != null) + { + AnalyzeStatement(ifStmt.ElseBranch); + } + } + + private void AnalyzeWhileStatement(WhileStatementNode whileStmt) + { + var oldInLoop = _inLoop; + _inLoop = true; + + var condType = AnalyzeExpression(whileStmt.Condition); + if (condType != null && !IsIntegerType(condType)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Condition of 'while' must be an integer type", + whileStmt.Condition.Location + )); + } + + AnalyzeStatement(whileStmt.Body); + _inLoop = oldInLoop; + } + + private void AnalyzeForStatement(ForStatementNode forStmt) + { + _symbolTable.EnterScope(); + var oldInLoop = _inLoop; + _inLoop = true; + + if (forStmt.Init != null) + { + switch (forStmt.Init) + { + case StatementNode statement: + AnalyzeStatement(statement); + break; + case VariableDeclarationNode varDecl: + AnalyzeLocalVariableDeclaration(varDecl); + break; + } + } + + if (forStmt.Condition != null) + { + var condType = AnalyzeExpression(forStmt.Condition); + if (condType != null && !IsIntegerType(condType)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + "Condition of 'for' must be an integer type", + forStmt.Condition.Location + )); + } + } + + if (forStmt.Increment != null) + { + AnalyzeExpression(forStmt.Increment); + } + + AnalyzeStatement(forStmt.Body); + + _inLoop = oldInLoop; + _symbolTable.ExitScope(); + } + + private CType? AnalyzeExpression(ExpressionNode expr) + { + return expr switch + { + LiteralExpressionNode literal => AnalyzeLiteralExpression(literal), + IdentifierExpressionNode identifier => AnalyzeIdentifierExpression(identifier), + BinaryExpressionNode binary => AnalyzeBinaryExpression(binary), + UnaryExpressionNode unary => AnalyzeUnaryExpression(unary), + AssignmentExpressionNode assignment => AnalyzeAssignmentExpression(assignment), + FunctionCallExpressionNode call => AnalyzeFunctionCallExpression(call), + _ => null + }; + } + + private CType? AnalyzeLiteralExpression(LiteralExpressionNode literal) + { + return literal.Type switch + { + TokenType.IntLiteral => IntType.Instance, + TokenType.FloatLiteral => DoubleType.Instance, + TokenType.CharLiteral => CharType.Instance, + TokenType.StringLiteral => new PointerType(CharType.Instance), + _ => null + }; + } + + private CType? AnalyzeIdentifierExpression(IdentifierExpressionNode identifier) + { + var symbol = _symbolTable.Lookup(identifier.Name); + if (symbol == null) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Undeclared identifier '{identifier.Name}'", + identifier.Location + )); + return null; + } + + return symbol switch + { + VariableSymbol varSymbol => varSymbol.Type, + FunctionSymbol funcSymbol => funcSymbol.ReturnType, + _ => null + }; + } + + private CType? AnalyzeBinaryExpression(BinaryExpressionNode binary) + { + var leftType = AnalyzeExpression(binary.Left); + var rightType = AnalyzeExpression(binary.Right); + + if (leftType == null || rightType == null) + { + return null; + } + + return _typeChecker.GetBinaryResultType(binary.Operator, leftType, rightType, binary.Location); + } + + private CType? AnalyzeUnaryExpression(UnaryExpressionNode unary) + { + var operandType = AnalyzeExpression(unary.Expression); + if (operandType == null) + { + return null; + } + + return _typeChecker.GetUnaryResultType(unary.Operator, operandType, unary.Location); + } + + private CType? AnalyzeAssignmentExpression(AssignmentExpressionNode assignment) + { + var rightType = AnalyzeExpression(assignment.Right); + + if (assignment.Left is IdentifierExpressionNode identifier) + { + var symbol = _symbolTable.Lookup(identifier.Name); + if (symbol == null) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Undeclared variable '{identifier.Name}'", + identifier.Location + )); + return rightType; + } + + if (symbol is VariableSymbol varSymbol && rightType != null) + { + CheckAssignmentCompatibility(varSymbol.Type, rightType, assignment.Location); + } + } + + return rightType; + } + + private CType? AnalyzeFunctionCallExpression(FunctionCallExpressionNode call) + { + var symbol = _symbolTable.Lookup(call.FunctionName); + if (symbol == null) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Call to undeclared function '{call.FunctionName}'", + call.Location + )); + return IntType.Instance; // 默认返回 int + } + + if (symbol is not FunctionSymbol funcSymbol) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"'{call.FunctionName}' is not a function", + call.Location + )); + return null; + } + + // 检查参数数量 + if (call.Arguments.Count != funcSymbol.Parameters.Count) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Function '{call.FunctionName}' expects {funcSymbol.Parameters.Count} arguments, but {call.Arguments.Count} were provided", + call.Location + )); + return funcSymbol.ReturnType; + } + + // 检查参数类型 + for (int i = 0; i < call.Arguments.Count; i++) + { + var argType = AnalyzeExpression(call.Arguments[i]); + var expectedType = ParseType(funcSymbol.Parameters[i].Type); + + if (argType != null) + { + CheckAssignmentCompatibility(expectedType, argType, call.Arguments[i].Location); + } + } + + return funcSymbol.ReturnType; + } + + private void CheckAssignmentCompatibility(CType targetType, CType sourceType, SourceLocation location) + { + if (!_typeChecker.IsAssignable(targetType, sourceType)) + { + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Cannot assign '{sourceType}' to '{targetType}'", + location + )); + } + } + + private CType ParseType(TypeNode typeNode) + { + return typeNode switch + { + PrimitiveTypeNode p => p.TypeName.ToLower() switch + { + "int" => IntType.Instance, + "char" => CharType.Instance, + "float" => FloatType.Instance, + "double" => DoubleType.Instance, + "long" => LongType.Instance, + "short" => ShortType.Instance, + "void" => VoidType.Instance, + _ => IntType.Instance // 默认 + }, + PointerTypeNode p => new PointerType(ParseType(p.BaseType)), + ArrayTypeNode a => new ArrayType(ParseType(a.ElementType), a.Size), + _ => IntType.Instance + }; + } + + private bool IsIntegerType(CType type) + { + return type is IntType or CharType or LongType or ShortType; + } +} + +/// +/// 符号表 +/// +public sealed class SymbolTable +{ + private readonly IErrorReporter _errorReporter; + private readonly List> _scopes = new(); + + public SymbolTable(IErrorReporter errorReporter) + { + _errorReporter = errorReporter; + EnterScope(); // 创建全局作用域 + } + + public void EnterScope() + { + _scopes.Add(new Dictionary()); + } + + public void ExitScope() + { + if (_scopes.Count > 1) + { + _scopes.RemoveAt(_scopes.Count - 1); + } + } + + public void AddSymbol(string name, Symbol symbol) + { + _scopes[^1][name] = symbol; + } + + public Symbol? Lookup(string name) + { + for (int i = _scopes.Count - 1; i >= 0; i--) + { + if (_scopes[i].TryGetValue(name, out var symbol)) + { + return symbol; + } + } + return null; + } + + public bool CurrentScopeExists(string name) + { + return _scopes.Count > 0 && _scopes[^1].ContainsKey(name); + } +} + +/// +/// 符号基类 +/// +public abstract record Symbol(string Name); + +/// +/// 变量符号 +/// +public sealed record VariableSymbol(string Name, CType Type) : Symbol(Name); + +/// +/// 函数符号 +/// +public sealed record FunctionSymbol( + string Name, + FunctionType Type, + List Parameters, + TypeNode ReturnTypeNode +) : Symbol(Name) +{ + public CType ReturnType => Type.ReturnType; +} + +/// +/// 类型检查器 +/// +public sealed class TypeChecker +{ + private readonly IErrorReporter _errorReporter; + + public TypeChecker(IErrorReporter errorReporter) + { + _errorReporter = errorReporter; + } + + /// + /// 检查类型是否可赋值 + /// + public bool IsAssignable(CType targetType, CType sourceType) + { + // 相同类型 + if (targetType.Equals(sourceType)) + { + return true; + } + + // void* 可以接受任何指针 + if (targetType is PointerType { BaseType: VoidType } && sourceType is PointerType) + { + return true; + } + + // 整数提升 + if (IsIntegerType(targetType) && IsIntegerType(sourceType)) + { + return true; + } + + // 浮点提升 + if (targetType is DoubleType && sourceType is FloatType) + { + return true; + } + + // int 到 double + if (targetType is DoubleType && sourceType is IntType) + { + return true; + } + + return false; + } + + /// + /// 获取二元运算结果类型 + /// + public CType? GetBinaryResultType(TokenType op, CType leftType, CType rightType, SourceLocation location) + { + // 算术运算 + if (op is TokenType.Plus or TokenType.Minus or TokenType.Star or TokenType.Slash or TokenType.Percent) + { + if (IsNumericType(leftType) && IsNumericType(rightType)) + { + // 类型提升 + if (leftType is DoubleType || rightType is DoubleType) + return DoubleType.Instance; + if (leftType is FloatType || rightType is FloatType) + return FloatType.Instance; + if (leftType is LongType || rightType is LongType) + return LongType.Instance; + return IntType.Instance; + } + + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Invalid operands to binary operator '{GetOperatorString(op)}'", + location + )); + return null; + } + + // 比较运算 + if (op is TokenType.Equal or TokenType.NotEqual or TokenType.Less or TokenType.Greater + or TokenType.LessEqual or TokenType.GreaterEqual) + { + if (IsComparableType(leftType, rightType)) + { + return IntType.Instance; + } + + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Invalid operands to comparison operator '{GetOperatorString(op)}'", + location + )); + return null; + } + + // 逻辑运算 + if (op is TokenType.And or TokenType.Or) + { + return IntType.Instance; + } + + // 位运算 + if (op is TokenType.BitAnd or TokenType.BitOr or TokenType.BitXor + or TokenType.LeftShift or TokenType.RightShift) + { + if (IsIntegerType(leftType) && IsIntegerType(rightType)) + { + return IntType.Instance; + } + + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Invalid operands to bitwise operator '{GetOperatorString(op)}'", + location + )); + return null; + } + + return IntType.Instance; + } + + /// + /// 获取一元运算结果类型 + /// + public CType? GetUnaryResultType(TokenType op, CType operandType, SourceLocation location) + { + switch (op) + { + case TokenType.Minus: // 负号 + if (IsNumericType(operandType)) + { + return operandType; + } + break; + + case TokenType.Not: // 逻辑非 + return IntType.Instance; + + case TokenType.BitNot: // 按位非 + if (IsIntegerType(operandType)) + { + return operandType; + } + break; + } + + _errorReporter.Report(new ErrorInfo( + ErrorLevel.Error, + $"Invalid operand to unary operator '{GetOperatorString(op)}'", + location + )); + return null; + } + + private bool IsNumericType(CType type) + { + return type is IntType or CharType or LongType or ShortType or FloatType or DoubleType; + } + + private bool IsIntegerType(CType type) + { + return type is IntType or CharType or LongType or ShortType; + } + + private bool IsComparableType(CType left, CType right) + { + // 数字类型之间可以比较 + if (IsNumericType(left) && IsNumericType(right)) + { + return true; + } + + // 指针之间可以比较 + if (left is PointerType && right is PointerType) + { + return true; + } + + return false; + } + + private string GetOperatorString(TokenType op) + { + return op switch + { + TokenType.Plus => "+", + TokenType.Minus => "-", + TokenType.Star => "*", + TokenType.Slash => "/", + TokenType.Percent => "%", + TokenType.Equal => "==", + TokenType.NotEqual => "!=", + TokenType.Less => "<", + TokenType.Greater => ">", + TokenType.LessEqual => "<=", + TokenType.GreaterEqual => ">=", + TokenType.And => "&&", + TokenType.Or => "||", + TokenType.BitAnd => "&", + TokenType.BitOr => "|", + TokenType.BitXor => "^", + TokenType.LeftShift => "<<", + TokenType.RightShift => ">>", + TokenType.Not => "!", + TokenType.BitNot => "~", + _ => op.ToString() + }; + } +} + +/// +/// 类型系统 +/// +public abstract record CType +{ + public abstract string Name { get; } +} + +public sealed record VoidType : CType +{ + public static readonly VoidType Instance = new(); + public override string Name => "void"; +} + +public sealed record IntType : CType +{ + public static readonly IntType Instance = new(); + public override string Name => "int"; +} + +public sealed record CharType : CType +{ + public static readonly CharType Instance = new(); + public override string Name => "char"; +} + +public sealed record FloatType : CType +{ + public static readonly FloatType Instance = new(); + public override string Name => "float"; +} + +public sealed record DoubleType : CType +{ + public static readonly DoubleType Instance = new(); + public override string Name => "double"; +} + +public sealed record LongType : CType +{ + public static readonly LongType Instance = new(); + public override string Name => "long"; +} + +public sealed record ShortType : CType +{ + public static readonly ShortType Instance = new(); + public override string Name => "short"; +} + +public sealed record PointerType(CType BaseType) : CType +{ + public override string Name => $"{BaseType.Name}*"; +} + +public sealed record ArrayType(CType ElementType, int Size) : CType +{ + public override string Name => $"{ElementType.Name}[{Size}]"; +} + +public sealed record FunctionType(CType ReturnType, List ParameterTypes) : CType +{ + public override string Name => $"{ReturnType.Name}(...)"; +} diff --git a/src/TinyCC.Core/Target/ElfWriter.cs b/src/TinyCC.Core/Target/ElfWriter.cs new file mode 100644 index 0000000..dda6cbf --- /dev/null +++ b/src/TinyCC.Core/Target/ElfWriter.cs @@ -0,0 +1,78 @@ +namespace TinyCC.Core; + +/// +/// ELF 可执行文件生成器 +/// +public sealed class ElfWriter +{ + /// + /// 生成 ELF 可执行文件 + /// + public byte[] WriteExecutable(byte[] machineCode, string entryPoint) + { + using var stream = new MemoryStream(); + var writer = new BinaryWriter(stream); + + // ELF 头 + WriteElfHeader(writer, machineCode.Length); + + // 程序头 + var programHeaderOffset = 64; // ELF 头大小 + WriteProgramHeader(writer, programHeaderOffset, machineCode.Length); + + // 对齐到页面大小 + var codeOffset = (int)stream.Position; + var padding = 0x1000 - (codeOffset % 0x1000); + if (padding < 0x1000) + { + stream.Write(new byte[padding]); + } + codeOffset = (int)stream.Position; + + // 代码段 + stream.Write(machineCode, 0, machineCode.Length); + + // 更新程序头中的偏移 + stream.Position = programHeaderOffset + 8; // p_offset 字段 + writer.Write(codeOffset); + + return stream.ToArray(); + } + + private void WriteElfHeader(BinaryWriter writer, int codeSize) + { + // e_ident + writer.Write(new byte[] { 0x7F, (byte)'E', (byte)'L', (byte)'F' }); // Magic + writer.Write((byte)2); // EI_CLASS: 64-bit + writer.Write((byte)1); // EI_DATA: Little endian + writer.Write((byte)1); // EI_VERSION: Current + writer.Write((byte)0); // EI_OSABI: System V + writer.Write(new byte[8]); // EI_PAD + + writer.Write((ushort)2); // e_type: ET_EXEC (Executable) + writer.Write((ushort)0x3E); // e_machine: EM_X86_64 + writer.Write((uint)1); // e_version: Current + writer.Write((ulong)0x400000); // e_entry: Entry point (0x400000) + writer.Write((ulong)64); // e_phoff: Program header offset + writer.Write((ulong)0); // e_shoff: Section header offset (none) + writer.Write((uint)0); // e_flags + writer.Write((ushort)64); // e_ehsize: ELF header size + writer.Write((ushort)56); // e_phentsize: Program header entry size + writer.Write((ushort)1); // e_phnum: 1 program header + writer.Write((ushort)0); // e_shentsize + writer.Write((ushort)0); // e_shnum + writer.Write((ushort)0); // e_shstrndx + } + + private void WriteProgramHeader(BinaryWriter writer, int codeOffset, int codeSize) + { + writer.Write((uint)1); // p_type: PT_LOAD + writer.Write((uint)5); // p_flags: PF_R | PF_X + writer.Write((ulong)0x400000); // p_offset: Will be updated later + writer.Write((ulong)0x400000); // p_vaddr + writer.Write((ulong)0x400000); // p_paddr + writer.Write((ulong)codeSize); // p_filesz + writer.Write((ulong)codeSize); // p_memsz + writer.Write((ulong)0x1000); // p_align: Page alignment + } +} diff --git a/src/TinyCC.Core/Target/PeWriter.cs b/src/TinyCC.Core/Target/PeWriter.cs new file mode 100644 index 0000000..2e258cd --- /dev/null +++ b/src/TinyCC.Core/Target/PeWriter.cs @@ -0,0 +1,257 @@ +using System; +using System.IO; +using System.Text; + +namespace TinyCC.Core; + +/// +/// PE 可执行文件生成器(Windows) +/// +public sealed class PeWriter +{ + private const ushort DosSignature = 0x5A4D; // MZ + private const uint PeSignature = 0x00004550; // PE\0\0 + private const ushort MachineAmd64 = 0x8664; + private const ushort SectionCount = 2; // .text 和 .data + private const ushort DllCharacteristics = 0x8160; + private const uint SubsystemConsole = 3; + + /// + /// 生成 PE 可执行文件 + /// + public byte[] WriteExecutable(byte[] codeSection, byte[]? dataSection, string entryPoint) + { + using var stream = new MemoryStream(); + var writer = new BinaryWriter(stream); + + // DOS 头 + WriteDosHeader(writer); + + // DOS Stub + WriteDosStub(writer); + + // PE 签名 + var peSignatureOffset = (int)stream.Position; + writer.Write(PeSignature); + + // COFF 头 + var coffHeaderOffset = (int)stream.Position; + WriteCoffHeader(writer, codeSection, dataSection); + + // 可选头 + var optionalHeaderOffset = (int)stream.Position; + WriteOptionalHeader(writer, codeSection, dataSection); + + // 节表 + var sectionTableOffset = (int)stream.Position; + WriteSectionTable(writer, codeSection, dataSection); + + // 对齐到文件偏移 + var fileAlignment = 512; + var currentPos = (int)stream.Position; + var paddingToFirstSection = 512 - (currentPos % 512); + if (paddingToFirstSection < 512) + { + stream.Write(new byte[paddingToFirstSection]); + } + + // 代码节 + var textFileOffset = (int)stream.Position; + stream.Write(codeSection, 0, codeSection.Length); + + // 数据节 + var dataFileOffset = 0; + if (dataSection != null && dataSection.Length > 0) + { + // 对齐到文件边界 + var dataPadding = 512 - (codeSection.Length % 512); + if (dataPadding < 512) + { + stream.Write(new byte[dataPadding]); + } + dataFileOffset = (int)stream.Position; + stream.Write(dataSection, 0, dataSection.Length); + } + + // 返回完整 PE 文件 + return stream.ToArray(); + } + + private void WriteDosHeader(BinaryWriter writer) + { + writer.Write(DosSignature); // e_magic: MZ + writer.Write((ushort)0); // e_cblp + writer.Write((ushort)0); // e_cp + writer.Write((ushort)0); // e_crlc + writer.Write((ushort)0); // e_cparhdr + writer.Write((ushort)0); // e_minalloc + writer.Write((ushort)0xFFFF); // e_maxalloc + writer.Write((ushort)0); // e_ss + writer.Write((ushort)0); // e_sp + writer.Write((ushort)0); // e_csum + writer.Write((ushort)0); // e_ip + writer.Write((ushort)0); // e_cs + writer.Write((ushort)0x40); // e_lfarlc: DOS stub 偏移 + writer.Write((ushort)0); // e_ovno + writer.Write(new byte[8]); // e_res + writer.Write((ushort)0); // e_oemid + writer.Write((ushort)0); // e_oeminfo + writer.Write(new byte[20]); // e_res2 + writer.Write(0x80); // e_lfanew: PE 头偏移 + } + + private void WriteDosStub(BinaryWriter writer) + { + // 简单的 DOS stub: "This program cannot be run in DOS mode.\r\n\r\n$" + var stub = Encoding.ASCII.GetBytes("This program cannot be run in DOS mode.\r\n\r\n$"); + writer.Write(stub); + + // 填充到 0x80 字节 + var currentPos = writer.BaseStream.Position; + var padding = 0x80 - currentPos; + if (padding > 0) + { + writer.Write(new byte[padding]); + } + } + + private void WriteCoffHeader(BinaryWriter writer, byte[] codeSection, byte[]? dataSection) + { + writer.Write(MachineAmd64); // Machine: AMD64 + + var numberOfSections = dataSection != null && dataSection.Length > 0 ? (ushort)2 : (ushort)1; + writer.Write(numberOfSections); + + writer.Write((uint)0); // TimeDateStamp (可设置为实际时间戳) + writer.Write((uint)0); // PointerToSymbolTable + writer.Write((uint)0); // NumberOfSymbols + writer.Write((ushort)(96)); // SizeOfOptionalHeader (PE32+ 标准大小) + writer.Write((ushort)0x0022); // Characteristics: EXECUTABLE_IMAGE | LARGE_ADDRESS_AWARE + } + + private void WriteOptionalHeader(BinaryWriter writer, byte[] codeSection, byte[]? dataSection) + { + // PE32+ 魔数 + writer.Write((ushort)0x020B); // Magic: PE32+ + writer.Write((byte)14); // MajorLinkerVersion + writer.Write((byte)0); // MinorLinkerVersion + + // 代码大小 + writer.Write(AlignUp(codeSection.Length, 512)); // SizeOfCode + + var dataSize = dataSection != null ? AlignUp(dataSection.Length, 512) : 0; + writer.Write(dataSize); // SizeOfInitializedData + writer.Write((uint)0); // SizeOfUninitializedData + + // 入口点 RVA + var baseAddress = 0x00400000UL; + var textRva = 0x1000U; + writer.Write(textRva); // AddressOfEntryPoint + + // 代码基址 + writer.Write(textRva); // BaseOfCode + + // PE32+ 特有字段 + writer.Write((ulong)baseAddress); // ImageBase (64-bit) + writer.Write((uint)0x1000); // SectionAlignment + writer.Write((uint)0x200); // FileAlignment + + writer.Write((ushort)0); // MajorOperatingSystemVersion + writer.Write((ushort)0); // MinorOperatingSystemVersion + writer.Write((ushort)0); // MajorImageVersion + writer.Write((ushort)0); // MinorImageVersion + + // 子系统版本 + writer.Write((ushort)6); // MajorSubsystemVersion (Windows Vista+) + writer.Write((ushort)0); // MinorSubsystemVersion + + writer.Write((uint)0); // Win32VersionValue + + // 镜像大小 + var imageSize = 0x1000 + (uint)AlignUp(codeSection.Length, 0x1000); + if (dataSize > 0) + { + imageSize += (uint)AlignUp(dataSize, 0x1000); + } + writer.Write(imageSize); // SizeOfImage + + writer.Write((uint)0x200); // SizeOfHeaders + writer.Write((uint)0); // CheckSum + writer.Write((ushort)SubsystemConsole); // Subsystem: Console + writer.Write((ushort)DllCharacteristics); // DllCharacteristics + + // 栈和堆大小 (64-bit) + writer.Write((ulong)0x100000); // SizeOfStackReserve + writer.Write((ulong)0x1000); // SizeOfStackCommit + writer.Write((ulong)0x100000); // SizeOfHeapReserve + writer.Write((ulong)0x1000); // SizeOfHeapCommit + + writer.Write((uint)0); // LoaderFlags + writer.Write((uint)16); // NumberOfRvaAndSizes + + // 数据目录 (16 项) + for (int i = 0; i < 16; i++) + { + writer.Write((uint)0); // VirtualAddress + writer.Write((uint)0); // Size + } + } + + private void WriteSectionTable(BinaryWriter writer, byte[] codeSection, byte[]? dataSection) + { + // .text 节 + WriteSectionHeader(writer, ".text", + codeSection.Length, + 0x1000, // VirtualAddress + AlignUp(codeSection.Length, 512), + 0x1000, // PointerToRawData + 0x60000020); // Characteristics: CODE | EXECUTE | READ + + // .data 节 (如果有) + if (dataSection != null && dataSection.Length > 0) + { + var textRawSize = AlignUp(codeSection.Length, 512); + WriteSectionHeader(writer, ".data", + dataSection.Length, + 0x2000, // VirtualAddress + AlignUp(dataSection.Length, 512), + 0x1000 + (uint)textRawSize, // PointerToRawData + 0xC0000040); // Characteristics: INITIALIZED_DATA | READ | WRITE + } + } + + private void WriteSectionHeader(BinaryWriter writer, string name, int virtualSize, + uint virtualAddress, int rawSize, uint rawAddress, uint characteristics) + { + // 节名 (8 字节) + var nameBytes = Encoding.ASCII.GetBytes(name); + var nameBuffer = new byte[8]; + Array.Copy(nameBytes, nameBuffer, Math.Min(nameBytes.Length, 8)); + writer.Write(nameBuffer); + + writer.Write(virtualSize); // VirtualSize + writer.Write(virtualAddress); // VirtualAddress + writer.Write(rawSize); // SizeOfRawData + writer.Write(rawAddress); // PointerToRawData + writer.Write((uint)0); // PointerToRelocations + writer.Write((uint)0); // PointerToLinenumbers + writer.Write((ushort)0); // NumberOfRelocations + writer.Write((ushort)0); // NumberOfLinenumbers + writer.Write(characteristics); // Characteristics + } + + private static int AlignUp(int value, int alignment) + { + return (value + alignment - 1) & ~(alignment - 1); + } +} + +// PE 可选头结构大小计算 +internal struct OptionalHeader64 +{ + // 仅用于 sizeof 计算 + public ushort Magic; + public byte MajorLinkerVersion; + public byte MinorLinkerVersion; + // ... 其他字段 +} diff --git a/src/TinyCC.Core/TinyCC.Core.csproj b/src/TinyCC.Core/TinyCC.Core.csproj new file mode 100644 index 0000000..fa71b7a --- /dev/null +++ b/src/TinyCC.Core/TinyCC.Core.csproj @@ -0,0 +1,9 @@ + + + + net8.0 + enable + enable + + + diff --git a/test_output b/test_output new file mode 100644 index 0000000000000000000000000000000000000000..44765cc7a86c1a8b8caa69df6e61d4ffcaebbadd GIT binary patch literal 4159 zcmb<-^>JfjWMqH=CI&kO2+sj51R-Iv5ZZyk0;+}qOtXT71z>6%pbQ9Y59PsVggU6i zC^Z@aqaiRF0;3@?8UmvsFd71*Aut*OqaiRF0;3@?8UmvsK=Tj?_2_)+(f9_`d-v#e k{Q$%-JQz=SbiRo_43gT<40XXi7AV7``N@YDAU0SH0LL{MasU7T literal 0 HcmV?d00001 diff --git a/tests/TinyCC.E2ETests/E2ETestRunner.cs b/tests/TinyCC.E2ETests/E2ETestRunner.cs new file mode 100644 index 0000000..06701e3 --- /dev/null +++ b/tests/TinyCC.E2ETests/E2ETestRunner.cs @@ -0,0 +1,238 @@ +using System.Diagnostics; +using System.Text; +using TinyCC.Core; + +namespace TinyCC.E2ETests; + +/// +/// 端到端测试运行器 +/// 负责编译 C 源代码并执行生成的 ELF 文件 +/// +public sealed class E2ETestRunner +{ + private readonly string _tempDirectory; + + public E2ETestRunner(string? tempDirectory = null) + { + _tempDirectory = tempDirectory ?? "/tmp/tinycc-debug"; // 固定路径便于调试 + Directory.CreateDirectory(_tempDirectory); + } + + /// + /// 运行单个测试用例 + /// + public async Task RunTestAsync(TestCase testCase) + { + var sourceFile = Path.Combine(_tempDirectory, $"{testCase.Name}.c"); + var outputFile = Path.Combine(_tempDirectory, $"{testCase.Name}.out"); + + try + { + // 写入源代码 + await File.WriteAllTextAsync(sourceFile, testCase.SourceCode); + + // 编译 + var compileResult = Compile(sourceFile, outputFile); + if (!compileResult.Success) + { + return new TestResult( + testCase.Name, + false, + -1, + null, + $"编译失败: {compileResult.Error}" + ); + } + + // 执行 + var executeResult = await ExecuteAsync(outputFile); + + // 调试:如果失败,打印 objdump + if (executeResult.ExitCode != testCase.ExpectedExitCode) + { + try + { + var psi = new ProcessStartInfo("objdump", $"-d -M intel \"{outputFile}\"") + { + RedirectStandardOutput = true, + UseShellExecute = false + }; + using var p = Process.Start(psi)!; + var dump = await p.StandardOutput.ReadToEndAsync(); + Console.WriteLine($"[DEBUG] objdump:\n{dump}"); + } + catch { } + } + + // 验证结果 + bool passed = executeResult.ExitCode == testCase.ExpectedExitCode; + if (testCase.ExpectedOutput != null && executeResult.Output != testCase.ExpectedOutput) + { + passed = false; + } + + return new TestResult( + testCase.Name, + passed, + executeResult.ExitCode, + executeResult.Output, + passed ? null : $"期望退出码 {testCase.ExpectedExitCode},实际 {executeResult.ExitCode}" + ); + } + catch (Exception ex) + { + return new TestResult( + testCase.Name, + false, + -1, + null, + $"测试执行异常: {ex.Message}" + ); + } + } + + private CompilationResult Compile(string sourceFile, string outputFile) + { + try + { + var errorReporter = new ErrorReporter(); + var driver = new CompilerDriver(errorReporter); + + var options = new CompilationOptions( + SourceFile: sourceFile, + OutputFile: outputFile, + Platform: TargetPlatform.LinuxX64 + ); + + var result = driver.Compile(options); + + if (errorReporter.HasErrors) + { + var errors = string.Join("\n", errorReporter.GetErrors().Select(e => e.ToString())); + return new CompilationResult(false, errors); + } + + if (!result.Success) + { + return new CompilationResult(false, result.Message); + } + + return new CompilationResult(true, null); + } + catch (Exception ex) + { + return new CompilationResult(false, ex.Message); + } + } + + private async Task ExecuteAsync(string executablePath) + { + try + { + // 设置执行权限 + var chmod = Process.Start(new ProcessStartInfo + { + FileName = "chmod", + Arguments = $"+x \"{executablePath}\"", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false + }); + + if (chmod != null) + { + await chmod.WaitForExitAsync(); + } + + var psi = new ProcessStartInfo + { + FileName = executablePath, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true + }; + + using var process = Process.Start(psi) + ?? throw new InvalidOperationException($"无法启动进程: {executablePath}"); + + var outputBuilder = new StringBuilder(); + var errorBuilder = new StringBuilder(); + + process.OutputDataReceived += (sender, e) => + { + if (e.Data != null) + { + outputBuilder.AppendLine(e.Data); + } + }; + + process.ErrorDataReceived += (sender, e) => + { + if (e.Data != null) + { + errorBuilder.AppendLine(e.Data); + } + }; + + process.BeginOutputReadLine(); + process.BeginErrorReadLine(); + + await process.WaitForExitAsync(); + + var output = outputBuilder.ToString().TrimEnd(); + var error = errorBuilder.ToString().TrimEnd(); + + return new ExecutionResult( + process.ExitCode, + string.IsNullOrEmpty(output) ? error : output + ); + } + catch (Exception ex) + { + return new ExecutionResult(-1, $"执行失败: {ex.Message}"); + } + } + + public void Cleanup() + { + // 调试期间保留文件 + } +} + +/// +/// 测试用例 +/// +public record TestCase( + string Name, + string SourceCode, + int ExpectedExitCode, + string? ExpectedOutput = null +); + +/// +/// 测试结果 +/// +public record TestResult( + string TestCaseName, + bool Passed, + int ActualExitCode, + string? ActualOutput, + string? ErrorMessage +); + +/// +/// 编译结果 +/// +public record CompilationResult( + bool Success, + string? Error +); + +/// +/// 执行结果 +/// +public record ExecutionResult( + int ExitCode, + string Output +); diff --git a/tests/TinyCC.E2ETests/E2ETests.cs b/tests/TinyCC.E2ETests/E2ETests.cs new file mode 100644 index 0000000..2ef9c90 --- /dev/null +++ b/tests/TinyCC.E2ETests/E2ETests.cs @@ -0,0 +1,58 @@ +using Xunit; +using Xunit.Abstractions; + +namespace TinyCC.E2ETests; + +public class E2ETests : IAsyncLifetime +{ + private readonly ITestOutputHelper _output; + private readonly E2ETestRunner _runner; + + public E2ETests(ITestOutputHelper output) + { + _output = output; + _runner = new E2ETestRunner(); + } + + public Task InitializeAsync() => Task.CompletedTask; + + public Task DisposeAsync() + { + _runner.Cleanup(); + return Task.CompletedTask; + } + + [Theory] + [MemberData(nameof(GetBasicTestCases))] + public async Task BasicFeature_ShouldCompileAndRun(TestCase testCase) + { + // Act + var result = await _runner.RunTestAsync(testCase); + + // Assert + _output.WriteLine($"测试: {testCase.Name}"); + _output.WriteLine($"源代码:\n{testCase.SourceCode}"); + _output.WriteLine($"结果: {(result.Passed ? "通过" : "失败")}"); + if (!result.Passed) + { + _output.WriteLine($"错误: {result.ErrorMessage}"); + _output.WriteLine($"实际退出码: {result.ActualExitCode}"); + if (result.ActualOutput != null) + { + _output.WriteLine($"实际输出: {result.ActualOutput}"); + } + } + + Assert.True(result.Passed, result.ErrorMessage); + } + + public static TheoryData GetBasicTestCases() + { + var data = new TheoryData(); + foreach (var testCase in TestCases.GetBasicTests()) + { + data.Add(testCase); + } + return data; + } +} diff --git a/tests/TinyCC.E2ETests/TestCases.cs b/tests/TinyCC.E2ETests/TestCases.cs new file mode 100644 index 0000000..6f47107 --- /dev/null +++ b/tests/TinyCC.E2ETests/TestCases.cs @@ -0,0 +1,231 @@ +namespace TinyCC.E2ETests; + +/// +/// 端到端测试用例集合 +/// +public static class TestCases +{ + /// + /// 获取所有基础测试用例 + /// + public static IEnumerable GetBasicTests() + { + // 最简单的测试:返回常量 + yield return new TestCase( + Name: "simple_return_zero", + SourceCode: """ + int main() { + return 0; + } + """, + ExpectedExitCode: 0 + ); + + // 返回常量 42 + yield return new TestCase( + Name: "simple_return_42", + SourceCode: """ + int main() { + return 42; + } + """, + ExpectedExitCode: 42 + ); + + // 算术运算测试 - 使用 if-else 替代三元运算符 + yield return new TestCase( + Name: "arithmetic_add", + SourceCode: """ + int add(int a, int b) { + return a + b; + } + int main() { + int result; + result = add(3, 4); + if (result == 7) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 控制流测试 - for 循环 + yield return new TestCase( + Name: "control_flow_for_loop", + SourceCode: """ + int main() { + int sum; + int i; + sum = 0; + for (i = 1; i <= 10; i = i + 1) { + sum = sum + i; + } + if (sum == 55) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 控制流测试 - while 循环 + yield return new TestCase( + Name: "control_flow_while_loop", + SourceCode: """ + int main() { + int sum; + int i; + sum = 0; + i = 1; + while (i <= 10) { + sum = sum + i; + i = i + 1; + } + if (sum == 55) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 函数调用测试 + yield return new TestCase( + Name: "function_call", + SourceCode: """ + int multiply(int a, int b) { + return a * b; + } + int main() { + int result; + result = multiply(6, 7); + if (result == 42) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 条件分支测试 + yield return new TestCase( + Name: "conditional_branch", + SourceCode: """ + int max(int a, int b) { + if (a > b) { + return a; + } else { + return b; + } + } + int main() { + int result; + result = max(10, 20); + if (result == 20) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 变量赋值测试 + yield return new TestCase( + Name: "variable_assignment", + SourceCode: """ + int main() { + int x; + x = 42; + if (x == 42) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 递归函数测试 + yield return new TestCase( + Name: "recursive_factorial", + SourceCode: """ + int factorial(int n) { + if (n <= 1) { + return 1; + } + return n * factorial(n - 1); + } + int main() { + int result; + result = factorial(5); + if (result == 120) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + + // 局部变量作用域测试 + yield return new TestCase( + Name: "local_variable_scope", + SourceCode: """ + int main() { + int x; + x = 10; + if (x > 5) { + int y; + y = 20; + x = y; + } + if (x == 20) { + return 0; + } + return 1; + } + """, + ExpectedExitCode: 0 + ); + } + + /// + /// 获取预期失败的测试用例(用于记录当前不支持的功能) + /// + public static IEnumerable GetKnownFailures() + { + // 指针测试 - 当前可能不支持 + yield return new TestCase( + Name: "pointers_basic", + SourceCode: """ + int main() { + int x = 42; + int *p = &x; + return *p == 42 ? 0 : 1; + } + """, + ExpectedExitCode: 0 + ); + + // 数组测试 - 当前可能不支持 + yield return new TestCase( + Name: "arrays_basic", + SourceCode: """ + int main() { + int arr[3]; + arr[0] = 1; + arr[1] = 2; + arr[2] = 3; + return arr[1] == 2 ? 0 : 1; + } + """, + ExpectedExitCode: 0 + ); + } +} diff --git a/tests/TinyCC.E2ETests/TinyCC.E2ETests.csproj b/tests/TinyCC.E2ETests/TinyCC.E2ETests.csproj new file mode 100644 index 0000000..bd4568d --- /dev/null +++ b/tests/TinyCC.E2ETests/TinyCC.E2ETests.csproj @@ -0,0 +1,27 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + diff --git a/tests/TinyCC.Tests/TinyCC.Tests.csproj b/tests/TinyCC.Tests/TinyCC.Tests.csproj new file mode 100644 index 0000000..bd4568d --- /dev/null +++ b/tests/TinyCC.Tests/TinyCC.Tests.csproj @@ -0,0 +1,27 @@ + + + + net8.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + diff --git a/tests/TinyCC.Tests/UnitTest1.cs b/tests/TinyCC.Tests/UnitTest1.cs new file mode 100644 index 0000000..dcdd2c0 --- /dev/null +++ b/tests/TinyCC.Tests/UnitTest1.cs @@ -0,0 +1,10 @@ +namespace TinyCC.Tests; + +public class UnitTest1 +{ + [Fact] + public void Test1() + { + + } +} \ No newline at end of file diff --git a/tests/TinyCC.Tests/UnitTests.cs b/tests/TinyCC.Tests/UnitTests.cs new file mode 100644 index 0000000..9874c42 --- /dev/null +++ b/tests/TinyCC.Tests/UnitTests.cs @@ -0,0 +1,86 @@ +using TinyCC.Core; + +namespace TinyCC.Tests; + +public class LexerTests +{ + private readonly IErrorReporter _errorReporter; + + public LexerTests() + { + _errorReporter = new ErrorReporter(); + } + + [Fact] + public void Tokenize_SimpleExpression_ReturnsTokens() + { + var source = "int x = 3 + 4;"; + var lexer = new Lexer(source, "test.c", _errorReporter); + var tokens = lexer.Tokenize().ToList(); + + Assert.False(_errorReporter.HasErrors); + Assert.Contains(tokens, t => t.Type == TokenType.Int); + Assert.Contains(tokens, t => t.Type == TokenType.Identifier && t.Lexeme == "x"); + Assert.Contains(tokens, t => t.Type == TokenType.Assign); + Assert.Contains(tokens, t => t.Type == TokenType.IntLiteral && Convert.ToInt64(t.Value!) == 3); + Assert.Contains(tokens, t => t.Type == TokenType.IntLiteral && Convert.ToInt64(t.Value!) == 4); + Assert.Contains(tokens, t => t.Type == TokenType.Plus); + Assert.Contains(tokens, t => t.Type == TokenType.Semicolon); + } + + [Fact] + public void Tokenize_FunctionDefinition_ReturnsTokens() + { + var source = "int add(int a, int b) { return a + b; }"; + var lexer = new Lexer(source, "test.c", _errorReporter); + var tokens = lexer.Tokenize().ToList(); + + Assert.False(_errorReporter.HasErrors); + Assert.Contains(tokens, t => t.Type == TokenType.Int); + Assert.Contains(tokens, t => t.Type == TokenType.Identifier && t.Lexeme == "add"); + Assert.Contains(tokens, t => t.Type == TokenType.LeftParen); + Assert.Contains(tokens, t => t.Type == TokenType.RightParen); + Assert.Contains(tokens, t => t.Type == TokenType.LeftBrace); + Assert.Contains(tokens, t => t.Type == TokenType.Return); + Assert.Contains(tokens, t => t.Type == TokenType.RightBrace); + } + + [Fact] + public void Tokenize_SkipsComments() + { + var source = "int x; // this is a comment\nint y;"; + var lexer = new Lexer(source, "test.c", _errorReporter); + var tokens = lexer.Tokenize().ToList(); + + Assert.False(_errorReporter.HasErrors); + var identifiers = tokens.Where(t => t.Type == TokenType.Identifier).ToList(); + Assert.Equal(2, identifiers.Count); + Assert.Equal("x", identifiers[0].Lexeme); + Assert.Equal("y", identifiers[1].Lexeme); + } +} + +public class ParserTests +{ + private readonly IErrorReporter _errorReporter; + + public ParserTests() + { + _errorReporter = new ErrorReporter(); + } + + [Fact] + public void Parse_SimpleFunction_ReturnsAst() + { + var source = "int add(int a, int b) { return a + b; }"; + var lexer = new Lexer(source, "test.c", _errorReporter); + var tokens = lexer.Tokenize().ToList(); + var parser = new Parser(tokens, _errorReporter); + + var ast = parser.Parse(); + + Assert.False(_errorReporter.HasErrors); + Assert.NotNull(ast); + Assert.IsType(ast); + } +}