commit initial

This commit is contained in:
baishi
2026-05-30 07:59:28 +08:00
commit cbefad339f
39 changed files with 7736 additions and 0 deletions

14
.gitignore vendored Normal file
View File

@@ -0,0 +1,14 @@
## .NET
bin/
obj/
## IDE
.vscode/
.vs/
*.userprefs
*.user
.idea/
## Test artifacts
test.c
test_output/

View File

@@ -0,0 +1,392 @@
# Tiny C Compiler in C# - Technical Design
Feature Name: tiny-c-compiler-csharp
Updated: 2026-05-20
## Description
本项目实现一个使用 C# 编写的 C 语言编译器,参考 TCC 的设计理念。编译器将 C 源代码直接编译为 x86/x64 本地机器码,不经过 MSIL 中间层。项目目标是实现一个轻量级、编译速度快的 C 编译器,支持 C99 标准的核心子集。
## Architecture
```mermaid
graph TD
A["C Source Code\n.c files"] --> B["Preprocessor\n预处理"]
B --> C["Lexer\n词法分析"]
C --> D["Parser\n语法分析"]
D --> E["Semantic Analyzer\n语义分析"]
E --> F["IR Generator\n中间表示生成"]
F --> G["Code Generator\n代码生成 x86/x64"]
G --> H["Object File\nPE/ELF"]
H --> I["Executable\n可执行文件"]
J["Error Handler"] -.-> B
J -.-> C
J -.-> D
J -.-> E
J -.-> F
J -.-> G
K["Symbol Table"] -.-> E
K -.-> F
K -.-> G
L["Type System"] -.-> E
L -.-> F
```
### Architecture Overview
编译器采用传统的单遍或多遍编译架构,分为以下主要阶段:
1. **预处理阶段**:处理宏展开、条件编译、头文件包含
2. **前端阶段**:词法分析、语法分析、语义分析
3. **中间阶段**IR 生成与优化
4. **后端阶段**x86/x64 代码生成与可执行文件输出
## Components and Interfaces
### 1. Preprocessor (预处理器)
**职责**
- 处理 `#include` 指令,展开头文件
- 处理 `#define` 宏定义和宏展开
- 处理条件编译 `#ifdef``#ifndef``#endif``#if`
- 处理 `#pragma` 指令
**接口**
```csharp
public interface IPreprocessor
{
string Preprocess(string sourceCode, string sourceFile);
void AddIncludePath(string path);
void DefineMacro(string name, string? value);
}
```
### 2. Lexer (词法分析器)
**职责**
- 将预处理后的源代码分解为 token 流
- 识别关键字、标识符、字面量、运算符、分隔符
- 跳过注释和空白
- 报告词法错误
**接口**
```csharp
public interface ILexer
{
IEnumerable<Token> Tokenize(string source);
}
public enum TokenType
{
Keyword, Identifier, IntLiteral, FloatLiteral,
CharLiteral, StringLiteral, Operator, Separator,
EOF, Error
}
public record Token(
TokenType Type,
string Lexeme,
object? Value,
SourceLocation Location
);
```
### 3. Parser (语法分析器)
**职责**
- 实现递归下降解析器
- 构建抽象语法树AST
- 处理 C 语言的运算符优先级
- 报告语法错误
**接口**
```csharp
public interface IParser
{
AstNode Parse();
}
public abstract record AstNode(SourceLocation Location);
public record ProgramNode(List<DeclarationNode> Declarations, SourceLocation Location) : AstNode(Location);
public record FunctionDeclarationNode(
TypeNode ReturnType,
string Name,
List<ParameterNode> Parameters,
BlockStatementNode Body,
SourceLocation Location
) : AstNode(Location);
```
### 4. Semantic Analyzer (语义分析器)
**职责**
- 类型检查与类型推断
- 符号表管理
- 作用域管理
- 语义错误报告
**接口**
```csharp
public interface ISemanticAnalyzer
{
void Analyze(AstNode root);
}
public interface ISymbolTable
{
void EnterScope();
void ExitScope();
void AddSymbol(string name, Symbol symbol);
Symbol? Lookup(string name);
}
```
### 5. IR Generator (中间表示生成器)
**职责**
- 将 AST 转换为三地址码形式的 IR
- 构建控制流图CFG
- 支持基本优化(常量折叠、死代码消除)
**接口**
```csharp
public interface IIrGenerator
{
IrProgram Generate(AstNode ast);
}
public record IrProgram(List<IrFunction> Functions);
public record IrFunction(string Name, List<IrBasicBlock> BasicBlocks);
public record IrBasicBlock(string Label, List<IrInstruction> Instructions);
public abstract record IrInstruction;
```
### 6. Code Generator (代码生成器)
**职责**
- 将 IR 转换为 x86/x64 机器码
- 寄存器分配
- 栈帧管理
- 遵循平台调用约定
**接口**
```csharp
public interface ICodeGenerator
{
byte[] Generate(IrProgram program, TargetArchitecture architecture);
}
public enum TargetArchitecture { X86, X64 }
```
### 7. Object File Writer (目标文件写入器)
**职责**
- 生成 PE 格式文件Windows
- 生成 ELF 格式文件Linux
- 处理重定位信息
- 设置入口点
**接口**
```csharp
public interface IObjectFileWriter
{
byte[] WriteExecutable(byte[] machineCode, TargetPlatform platform);
}
public enum TargetPlatform { WindowsX86, WindowsX64, LinuxX86, LinuxX64 }
```
### 8. Compiler Driver (编译器驱动)
**职责**
- 协调各个编译阶段
- 处理命令行参数
- 错误汇总与报告
- 管理编译流程
**接口**
```csharp
public class CompilerDriver
{
public int Run(string[] args);
public CompilationResult Compile(CompilationOptions options);
}
```
## Data Models
### Token 模型
```csharp
public readonly struct SourceLocation
{
public string FileName { get; }
public int Line { get; }
public int Column { get; }
}
public enum TokenType
{
// 关键字
Int, Char, Float, Double, Long, Short, Void,
If, Else, While, For, Do, Switch, Case, Default,
Break, Continue, Return, Struct, Union, Typedef,
// 字面量
IntLiteral, FloatLiteral, CharLiteral, StringLiteral,
// 标识符
Identifier,
// 运算符
Plus, Minus, Star, Slash, Percent,
Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual,
Assign, PlusAssign, MinusAssign, StarAssign, SlashAssign,
And, Or, Not, BitAnd, BitOr, BitXor,
LeftShift, RightShift,
// 分隔符
LeftParen, RightParen, LeftBrace, RightBrace,
LeftBracket, RightBracket,
Semicolon, Comma, Dot, Arrow, Colon,
// 预处理器
HashInclude, HashDefine, HashIf, HashIfdef, HashIfndef, HashElse, HashEndif,
// 特殊
EOF, Error
}
```
### Type System 模型
```csharp
public abstract record CType(string Name);
public record PrimitiveType(TypeKind Kind) : CType(Kind.ToString())
{
public enum TypeKind { Void, Char, Short, Int, Long, Float, Double }
}
public record PointerType(CType BaseType) : CType($"{BaseType}*");
public record ArrayType(CType ElementType, int Size) : CType($"{ElementType}[{Size}]");
public record StructType(string Name, List<FieldDeclaration> Fields) : CType(Name);
public record FunctionType(CType ReturnType, List<CType> ParameterTypes) : CType("function");
```
### IR 指令模型
```csharp
public abstract record IrInstruction;
public record IrBinaryOp(IrTemp Dest, IrBinaryOpType Op, IrValue Left, IrValue Right) : IrInstruction;
public record IrUnaryOp(IrTemp Dest, IrUnaryOpType Op, IrValue Source) : IrInstruction;
public record IrLoad(IrTemp Dest, IrValue Address) : IrInstruction;
public record IrStore(IrValue Address, IrValue Value) : IrInstruction;
public record IrCall(IrTemp? Dest, string FunctionName, List<IrValue> Arguments) : IrInstruction;
public record IrJump(string TargetLabel) : IrInstruction;
public record IrBranch(IrValue Condition, string TrueLabel, string FalseLabel) : IrInstruction;
public record IrReturn(IrValue? Value) : IrInstruction;
public record IrLabel(string LabelName) : IrInstruction;
public enum IrBinaryOpType { Add, Sub, Mul, Div, Mod, And, Or, Xor, Shl, Shr, Eq, Ne, Lt, Gt, Le, Ge }
public enum IrUnaryOpType { Neg, Not, BitNot, Deref }
public abstract record IrValue;
public record IrTemp(string Name, CType Type) : IrValue;
public record IrConstant(long Value, CType Type) : IrValue;
public record IrGlobal(string Name, CType Type) : IrValue;
```
## Correctness Properties
### 不变量
1. **类型安全**: 所有 IR 指令的操作数类型必须匹配
2. **作用域正确性**: 符号查找必须遵循词法作用域规则
3. **控制流完整性**: 所有基本块必须有明确的前驱和后继
4. **寄存器一致性**: 代码生成前后寄存器状态必须一致
### 约束条件
1. 生成的机器码必须符合 x86/x64 指令集规范
2. 函数调用必须遵循目标平台的 ABIApplication Binary Interface
3. 栈帧布局必须保证栈指针对齐x64 要求 16 字节对齐)
4. 可执行文件格式必须符合 PE 或 ELF 规范
## Error Handling
### 错误分类
| 错误类型 | 阶段 | 处理方式 |
|---------|------|---------|
| 词法错误 | Lexer | 报告错误位置,跳过错误 token |
| 语法错误 | Parser | 报告期望的 token尝试错误恢复 |
| 类型错误 | Semantic | 报告类型不匹配详情 |
| 未声明符号 | Semantic | 报告符号名称和位置 |
| 代码生成错误 | CodeGen | 报告不支持的 IR 指令 |
### 错误报告接口
```csharp
public record ErrorInfo(
ErrorLevel Level, // Warning, Error, Fatal
string Message,
SourceLocation Location,
string? Suggestion = null
);
public interface IErrorReporter
{
void Report(ErrorInfo error);
bool HasErrors { get; }
IEnumerable<ErrorInfo> GetErrors();
}
```
## Test Strategy
### 单元测试
1. **Lexer 测试**:验证各种 token 的正确识别
2. **Parser 测试**:验证各种 C 语法的 AST 构建
3. **Semantic 测试**:验证类型检查和符号表
4. **IR 测试**:验证 AST 到 IR 的转换
5. **CodeGen 测试**:验证 IR 到机器码的转换
### 集成测试
1. **端到端测试**:编译简单 C 程序并验证输出
2. **回归测试**:使用 TCC 测试套件进行对比测试
3. **性能测试**:测量编译速度和生成代码质量
### 测试用例示例
```c
// test_hello.c
#include <stdio.h>
int main() {
printf("Hello, World!\n");
return 0;
}
// test_arithmetic.c
int add(int a, int b) {
return a + b;
}
int main() {
int result = add(3, 4);
return result - 7; // should return 0
}
// test_control_flow.c
int factorial(int n) {
if (n <= 1) return 1;
return n * factorial(n - 1);
}
```
## References
[^1]: (TCC Source) - Tiny C Compiler 源码 https://repo.or.cz/tinycc.git
[^2]: (PE Format) - Microsoft PE 和 COFF 规范 https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
[^3]: (ELF Format) - ELF 规范 https://refspecs.linuxfoundation.org/elf/elf.pdf
[^4]: (x64 ABI) - System V AMD64 ABI https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
[^5]: (x86 Calling Conventions) - x86 调用约定 https://en.wikipedia.org/wiki/X86_calling_conventions

View File

@@ -0,0 +1,116 @@
# Requirements Document
## Introduction
本项目旨在开发一个使用 C# 语言编写的 C 语言编译器,参考 TCCTiny C Compiler的设计理念。编译器将 C 源代码直接编译为 x86/x64 本地机器码,而非 MSILMicrosoft Intermediate Language。目标是实现一个轻量级、快速的 C 编译器,支持 C99 标准的核心特性。
## Glossary
- **TCC**: Tiny C Compiler一个小型、快速的 C 编译器
- **本地代码**: 直接可在 CPU 上执行的机器码x86/x64
- **MSIL**: Microsoft Intermediate Language.NET 平台的中间语言
- **代码生成**: 将中间表示转换为目标机器码的过程
- **词法分析**: 将源代码分解为 token 流的过程
- **语法分析**: 将 token 流构建为抽象语法树AST的过程
- **语义分析**: 验证 AST 的语义正确性并进行类型检查
- **目标平台**: x86 (32-bit) 和 x64 (64-bit) Windows/Linux
## Requirements
### Requirement 1: 词法分析器
**User Story:** AS 一个 C 编译器开发者I WANT 实现词法分析器SO THAT 可以将 C 源代码转换为 token 流
#### Acceptance Criteria
1. WHEN 接收到 C 源代码文件,词法分析器 SHALL 输出 token 流
2. WHEN 遇到 C 语言关键字if, else, while, for, int, char 等),词法分析器 SHALL 识别为关键字 token
3. WHEN 遇到标识符,词法分析器 SHALL 识别为标识符 token
4. WHEN 遇到字面量(整数、浮点数、字符、字符串),词法分析器 SHALL 识别为相应的字面量 token
5. WHEN 遇到注释(单行 // 和多行 /* */),词法分析器 SHALL 忽略注释内容
6. WHEN 遇到词法错误,词法分析器 SHALL 报告错误位置和错误信息
### Requirement 2: 语法分析器
**User Story:** AS 一个 C 编译器开发者I WANT 实现语法分析器SO THAT 可以将 token 流构建为抽象语法树AST
#### Acceptance Criteria
1. WHEN 接收到 token 流,语法分析器 SHALL 构建 AST
2. WHEN 遇到语法错误,语法分析器 SHALL 报告错误位置和错误描述
3. WHILE 解析表达式,语法分析器 SHALL 正确处理运算符优先级
4. WHEN 解析函数定义,语法分析器 SHALL 识别函数名、参数列表和函数体
5. WHEN 解析控制流语句if, while, for, switch语法分析器 SHALL 构建对应的控制流 AST 节点
### Requirement 3: 语义分析器
**User Story:** AS 一个 C 编译器开发者I WANT 实现语义分析器SO THAT 可以验证 AST 的语义正确性
#### Acceptance Criteria
1. WHEN 接收到 AST语义分析器 SHALL 执行类型检查
2. WHEN 遇到未声明的变量,语义分析器 SHALL 报告错误
3. WHEN 遇到类型不匹配的赋值或运算,语义分析器 SHALL 报告类型错误
4. WHEN 遇到函数调用,语义分析器 SHALL 验证函数签名和参数类型
5. WHILE 解析作用域,语义分析器 SHALL 正确管理变量的作用域
### Requirement 4: 中间表示IR生成
**User Story:** AS 一个 C 编译器开发者I WANT 生成中间表示SO THAT 可以优化并转换为目标机器码
#### Acceptance Criteria
1. WHEN 接收到语义分析后的 ASTIR 生成器 SHALL 输出三地址码形式的 IR
2. WHEN 生成 IRIR 生成器 SHALL 处理控制流图的构建
3. WHILE 生成 IRIR 生成器 SHALL 支持基本的数据流分析
### Requirement 5: 代码生成器x86/x64
**User Story:** AS 一个 C 编译器开发者I WANT 实现代码生成器SO THAT 可以将 IR 转换为 x86/x64 本地机器码
#### Acceptance Criteria
1. WHEN 接收到 IR代码生成器 SHALL 输出 x86/x64 机器码
2. WHEN 处理函数调用,代码生成器 SHALL 遵循目标平台的调用约定calling convention
3. WHEN 处理局部变量,代码生成器 SHALL 分配栈空间
4. WHEN 处理全局变量,代码生成器 SHALL 在数据段分配空间
5. IF 目标平台为 x64代码生成器 SHALL 使用 x64 寄存器
6. IF 目标平台为 x86代码生成器 SHALL 使用 x86 寄存器
### Requirement 6: 可执行文件生成
**User Story:** AS 一个 C 编译器用户I WANT 编译器生成可执行文件SO THAT 可以直接运行编译后的程序
#### Acceptance Criteria
1. WHEN 编译完成,编译器 SHALL 生成 PEWindows或 ELFLinux格式的可执行文件
2. WHEN 生成 PE 文件,编译器 SHALL 包含正确的 PE 头和节表
3. WHEN 生成 ELF 文件,编译器 SHALL 包含正确的 ELF 头和节头
4. WHEN 生成的可执行文件被操作系统加载,操作系统 SHALL 能够正确执行程序
### Requirement 7: 命令行接口
**User Story:** AS 一个 C 编译器用户I WANT 使用命令行编译 C 文件SO THAT 可以方便地集成到构建系统中
#### Acceptance Criteria
1. WHEN 用户提供源文件路径,编译器 SHALL 编译并生成可执行文件
2. WHEN 用户指定输出文件名,编译器 SHALL 使用指定的文件名
3. WHEN 用户指定目标架构x86/x64编译器 SHALL 生成对应架构的代码
4. IF 编译过程中发生错误,编译器 SHALL 输出错误信息并返回非零退出码
5. WHEN 用户请求帮助信息,编译器 SHALL 显示使用说明
### Requirement 8: C99 核心特性支持
**User Story:** AS 一个 C 程序员I WANT 编译器支持 C99 核心特性SO THAT 可以编译现有的 C 代码
#### Acceptance Criteria
1. WHEN 编译 C99 代码,编译器 SHALL 支持基本数据类型int, char, float, double, long, short
2. WHEN 编译 C99 代码,编译器 SHALL 支持指针和数组
3. WHEN 编译 C99 代码,编译器 SHALL 支持结构体struct和联合体union
4. WHEN 编译 C99 代码,编译器 SHALL 支持函数定义和调用
5. WHEN 编译 C99 代码,编译器 SHALL 支持控制流语句if, else, while, for, do-while, switch, break, continue, return
6. WHEN 编译 C99 代码,编译器 SHALL 支持宏定义(#define)和条件编译(#ifdef, #ifndef, #endif
7. WHEN 编译 C99 代码,编译器 SHALL 支持头文件包含(#include
8. IF 使用 C99 特性(如单行注释 //),编译器 SHALL 正确解析

View File

@@ -0,0 +1,96 @@
# Implementation Task List
## Phase 1: 项目初始化与基础架构
- [ ] 1.1 创建 .NET 8 控制台项目结构
- [ ] 1.2 配置项目解决方案和模块划分
- [ ] 1.3 实现基础的错误报告系统ErrorReporter
- [ ] 1.4 实现源代码位置追踪SourceLocation
## Phase 2: 预处理器实现
- [ ] 2.1 实现 #include 指令处理
- [ ] 2.2 实现 #define 宏定义和宏展开
- [ ] 2.3 实现条件编译(#ifdef, #ifndef, #if, #else, #endif
- [ ] 2.4 实现头文件搜索路径管理
## Phase 3: 词法分析器实现
- [ ] 3.1 定义所有 TokenType 枚举
- [ ] 3.2 实现 Token 结构
- [ ] 3.3 实现 Lexer 主类
- [ ] 3.4 实现关键字识别
- [ ] 3.5 实现标识符识别
- [ ] 3.6 实现整数字面量识别
- [ ] 3.7 实现浮点数字面量识别
- [ ] 3.8 实现字符和字符串字面量识别
- [ ] 3.9 实现运算符识别
- [ ] 3.10 实现注释跳过
- [ ] 3.11 编写 Lexer 单元测试
## Phase 4: 语法分析器实现
- [ ] 4.1 定义 AST 节点层次结构
- [ ] 4.2 实现递归下降解析器框架
- [ ] 4.3 实现表达式解析(处理运算符优先级)
- [ ] 4.4 实现语句解析
- [ ] 4.5 实现函数声明解析
- [ ] 4.6 实现类型声明解析
- [ ] 4.7 实现控制流语句解析if, while, for, switch
- [ ] 4.8 实现结构体和联合体解析
- [ ] 4.9 实现错误恢复机制
- [ ] 4.10 编写 Parser 单元测试
## Phase 5: 语义分析器实现
- [ ] 5.1 实现类型系统CType 层次结构)
- [ ] 5.2 实现符号表SymbolTable
- [ ] 5.3 实现作用域管理
- [ ] 5.4 实现类型检查
- [ ] 5.5 实现函数签名验证
- [ ] 5.6 实现变量声明检查
- [ ] 5.7 编写 Semantic Analyzer 单元测试
## Phase 6: 中间表示IR生成
- [ ] 6.1 定义 IR 指令集
- [ ] 6.2 实现基本块BasicBlock结构
- [ ] 6.3 实现控制流图CFG
- [ ] 6.4 实现 AST 到 IR 转换
- [ ] 6.5 实现临时变量管理
- [ ] 6.6 实现常量折叠优化
- [ ] 6.7 编写 IR Generator 单元测试
## Phase 7: x86/x64 代码生成器
- [ ] 7.1 定义目标架构抽象
- [ ] 7.2 实现寄存器管理
- [ ] 7.3 实现栈帧管理
- [ ] 7.4 实现 x86 指令编码
- [ ] 7.5 实现 x64 指令编码
- [ ] 7.6 实现 IR 到机器码转换
- [ ] 7.7 实现函数调用约定cdecl, sysv64
- [ ] 7.8 实现寄存器分配
- [ ] 7.9 编写 CodeGen 单元测试
## Phase 8: 可执行文件生成
- [ ] 8.1 实现 PE 文件格式写入Windows
- [ ] 8.2 实现 ELF 文件格式写入Linux
- [ ] 8.3 实现节表管理
- [ ] 8.4 实现重定位处理
- [ ] 8.5 实现入口点设置
## Phase 9: 编译器驱动与 CLI
- [ ] 9.1 实现命令行参数解析
- [ ] 9.2 实现编译流程编排
- [ ] 9.3 实现错误汇总与报告
- [ ] 9.4 实现多文件编译支持
## Phase 10: 测试与优化
- [ ] 10.1 编写端到端测试
- [ ] 10.2 使用 TCC 测试套件进行回归测试
- [ ] 10.3 性能测试与优化
- [ ] 10.4 内存使用优化

View File

@@ -0,0 +1,461 @@
# TinyCC 编译器改进计划
Feature Name: 2026-05-20-tinycc-improvements
Updated: 2026-05-20
## Description
本改进计划涵盖 TinyCC 编译器的 9 个核心改进方向,分为三个阶段:
- **阶段一(基础完善)**:端到端测试、错误报告增强、仓库清理
- **阶段二(功能完善)**:语义分析、预处理器集成、代码生成优化
- **阶段三(高级特性)**DWARF 调试信息、PE 格式支持、性能基准
## Architecture
```mermaid
graph TD
subgraph "阶段一:基础完善"
A1[E2E 测试框架] --> A2[测试用例集合]
B1[ErrorReporter 增强] --> B2[代码上下文格式化]
C1[.gitignore 更新] --> C2[移除误提交文件]
end
subgraph "阶段二:功能完善"
D1[SemanticAnalyzer 完善] --> D2[类型系统增强]
D1 --> D3[作用域管理优化]
E1[Preprocessor 集成] --> E2[宏展开引擎]
E1 --> E3[头文件搜索机制]
F1[优化 CodeGen] --> F2[寄存器分配器]
F1 --> F3[指令选择优化]
end
subgraph "阶段三:高级特性"
G1[DWARF 生成器] --> G2[调试信息编码]
G1 --> G3[行号表生成]
H1[PE Writer 完善] --> H2[PE 头生成]
H1 --> H3[重定位表生成]
I1[性能基准] --> I2[编译时间测量]
I1 --> I3[执行时间测量]
end
A2 -. 验证 .-> D1
B2 -. 集成 .-> D1
D3 -. 输入 .-> F2
E3 -. 输出 .-> A2
```
### 改进架构概览
改进计划遵循渐进式实现策略,每个阶段的输出为下一阶段提供基础:
1. **阶段一**建立测试基础设施和用户体验改进
2. **阶段二**完善编译器核心功能
3. **阶段三**添加高级特性和性能监控
## Components and Interfaces
### 1. 端到端测试框架
**职责**
- 编译测试 C 源文件并验证生成的可执行文件
- 管理测试用例输入和预期输出
- 报告测试通过/失败状态
**接口**
```csharp
public interface IE2ETestRunner
{
Task<TestResult> RunTestAsync(TestCase testCase);
IEnumerable<TestCase> LoadTestsFromDirectory(string directory);
}
public record TestCase(
string Name,
string SourceCode,
int ExpectedExitCode,
string? ExpectedOutput = null
);
public record TestResult(
string TestCaseName,
bool Passed,
int ActualExitCode,
string? ActualOutput = null,
string? ErrorMessage = null
);
```
**实现策略**
- 使用 `CompilerDriver` 编译源代码到临时 ELF 文件
- 使用 `Process` 类执行生成的可执行文件
- 比较实际输出/退出码与预期值
### 2. 错误报告增强
**职责**
- 格式化错误信息,包含代码上下文
- 生成可视化错误位置标记
- 支持多错误汇总输出
**接口扩展**
```csharp
public record ErrorInfo(
ErrorLevel Level,
string Message,
SourceLocation Location,
string? SourceLine = null, // 新增:出错的源代码行
int? ColumnOffset = null, // 新增:错误在行内的偏移
string? Suggestion = null // 新增:修复建议
);
public sealed class ErrorReporter : IErrorReporter
{
private readonly List<ErrorInfo> _errors = new();
private readonly Dictionary<string, string[]> _sourceCache = new(); // 新增:源代码缓存
public void Report(ErrorInfo error);
public void SetSourceLines(string fileName, string[] lines); // 新增:设置源代码行
public string FormatErrors(); // 新增:格式化所有错误
}
```
**格式化输出示例**
```
test.c:3:5: error: expected ';' before 'return'
2 | int add(int a, int b) {
3 | int x = a + b
| ^^^^^^^^
4 | return x;
| ~~~~~
help: add ';' at the end of the statement
```
### 3. 语义分析器完善
**职责扩展**
- 实现完整的类型检查系统
- 支持嵌套作用域管理
- 检测函数签名不匹配
**新增组件**
```csharp
public sealed class TypeChecker
{
public CType? CheckBinaryOperation(TokenType op, CType left, CType right, SourceLocation loc);
public CType? CheckUnaryOperation(TokenType op, CType operand, SourceLocation loc);
public bool IsCompatible(CType source, CType target);
public CType? PromoteType(CType type); // 类型提升
}
public sealed class ScopeManager
{
private readonly Stack<Dictionary<string, Symbol>> _scopes = new();
public void EnterScope();
public void ExitScope();
public void DeclareSymbol(string name, Symbol symbol);
public Symbol? LookupSymbol(string name);
public bool IsDeclared(string name);
}
```
**类型检查规则**
| 操作 | 左操作数 | 右操作数 | 结果类型 |
|------|---------|---------|---------|
| 算术运算 | 整数/浮点 | 整数/浮点 | 提升后的类型 |
| 比较运算 | 数值类型 | 数值类型 | int |
| 赋值 | 类型 T | 类型 S | TS 必须可转换为 T |
### 4. 预处理器集成
**职责**
- 处理 `#include``#define`、条件编译
- 管理头文件搜索路径
- 宏展开和参数替换
**接口**
```csharp
public interface IPreprocessor
{
string Preprocess(string sourceCode, string sourceFile);
void AddIncludePath(string path);
void DefineMacro(string name, string? value);
void UndefineMacro(string name);
}
public sealed class Macro
{
public string Name { get; }
public string? Value { get; }
public List<string>? Parameters { get; } // 函数宏参数
public string? Body { get; }
}
```
**集成到 CompilerDriver**
```csharp
// 在 CompilerDriver.Compile 中
var preprocessor = new Preprocessor(_errorReporter);
foreach (var includePath in options.IncludePaths)
{
preprocessor.AddIncludePath(includePath);
}
var preprocessedSource = preprocessor.Preprocess(options.SourceFile);
var lexer = new Lexer(preprocessedSource, options.SourceFile, _errorReporter);
```
### 5. 代码生成优化
**职责**
- 实现图着色寄存器分配
- 指令选择和调度
- 栈帧布局优化
**寄存器分配器接口**
```csharp
public sealed class GraphColoringAllocator
{
private readonly Dictionary<IrValue, HashSet<IrValue>> _interferenceGraph = new();
private readonly Dictionary<IrValue, string> _allocation = new();
private readonly HashSet<IrValue> _spilledVars = new();
public void Allocate(IrFunction function, string[] availableRegs);
public string? GetRegister(IrValue value);
public bool IsSpilled(IrValue value);
}
```
**优化验证策略**
- 比较优化前后生成的机器码长度
- 验证优化后程序的执行结果正确性
- 测量溢出变量数量
### 6. DWARF 调试信息生成器
**职责**
- 生成 DWARF 调试信息节
- 编码源文件路径和行号映射
- 生成变量和类型调试信息
**接口**
```csharp
public sealed class DwarfGenerator
{
private readonly List<DwarfInfo> _debugInfo = new();
private readonly List<DwarfLine> _lineTable = new();
public void AddFile(string fileName);
public void AddLineEntry(int fileIndex, int line, int address);
public void AddVariable(string name, CType type, int scopeLevel, int offset);
public byte[] GenerateDebugSection();
public byte[] GenerateLineSection();
}
```
**ELF 集成**
-`ElfWriter` 中添加 `.debug_info``.debug_line`
- 更新节头表和字符串表
### 7. PE 写出器完善
**职责**
- 生成完整的 PE32+ 文件头
- 创建 `.text``.data`
- 处理重定位和导入表
**PE 文件结构**
```
DOS Header (64 bytes)
PE Signature ("PE\0\0")
COFF File Header (20 bytes)
Optional Header (PE32+, 112 bytes)
Data Directories (16 entries)
Section Headers (40 bytes per section)
.text Section (代码)
.data Section (数据)
```
### 8. 性能基准测试框架
**职责**
- 测量编译时间
- 测量生成代码执行时间
- 生成统计报告
**接口**
```csharp
public sealed class BenchmarkRunner
{
public BenchmarkResult RunCompilationBenchmark(string sourceFile, int iterations = 10);
public BenchmarkResult RunExecutionBenchmark(string executable, int iterations = 100);
}
public record BenchmarkResult(
string TestName,
double MeanTimeMs,
double MedianTimeMs,
double StdDevMs,
int Iterations
);
```
## Data Models
### 错误信息模型(增强)
```csharp
public enum ErrorLevel
{
Warning,
Error,
Fatal
}
public readonly struct SourceLocation
{
public string FileName { get; }
public int Line { get; }
public int Column { get; }
public int Length { get; } // 新增:错误跨度
}
```
### 测试用例模型
```csharp
public record TestCase(
string Name,
string SourceCode,
int ExpectedExitCode,
string? ExpectedOutput = null,
string? ExpectedErrorPattern = null // 期望的错误模式
);
```
### DWARF 调试信息模型
```csharp
public record DwarfInfoEntry(
uint Offset,
uint AbbrevCode,
Dictionary<uint, object> Attributes
);
public record DwarfLineEntry(
int Address,
int FileIndex,
int Line,
int Column,
bool IsStatement,
bool IsEndOfSequence
);
```
## Correctness Properties
### 不变量
1. **测试覆盖完整性**: 每个 C 语言特性至少有一个 E2E 测试用例
2. **错误信息准确性**: 错误位置标记必须指向正确的源代码行和列
3. **类型检查健全性**: 类型检查必须拒绝所有类型错误的程序
4. **寄存器分配正确性**: 分配的寄存器不能干涉活跃变量
5. **调试信息一致性**: 调试信息中的行号必须与实际代码位置匹配
### 约束条件
1. E2E 测试必须在 Linux x64 环境下运行
2. 错误报告格式化器必须处理多字节字符
3. 寄存器分配器必须遵循 System V AMD64 ABI
4. DWARF 信息必须兼容 gdb 7.0+
5. PE 文件必须兼容 Windows 10+ 加载器
## Error Handling
### 错误场景与处理策略
| 错误场景 | 检测阶段 | 处理方式 |
|---------|---------|---------|
| 头文件不存在 | 预处理 | 报告错误,提供搜索路径 |
| 宏重复定义 | 预处理 | 报告警告,使用新定义 |
| 未声明变量 | 语义分析 | 报告错误,标记位置 |
| 类型不匹配 | 语义分析 | 报告错误,显示期望和实际类型 |
| 寄存器溢出 | 代码生成 | 溢出到栈,更新栈帧布局 |
| DWARF 编码失败 | 调试信息生成 | 报告错误,继续编译 |
| PE 头生成失败 | 目标文件写入 | 报告错误,终止编译 |
### 错误恢复策略
- **词法/语法错误**: 尝试跳过错误 token继续解析
- **语义错误**: 收集所有错误,一次性输出
- **代码生成错误**: 立即终止,报告详细错误信息
## Test Strategy
### 单元测试
1. **错误报告测试**: 验证错误信息格式化和代码上下文显示
2. **类型检查测试**: 验证各种类型场景的检查逻辑
3. **寄存器分配测试**: 验证图着色算法正确性
4. **DWARF 编码测试**: 验证调试信息编码正确性
### 集成测试
1. **端到端测试**: 编译并运行测试 C 程序,验证输出
2. **预处理器集成测试**: 验证宏展开和头文件包含
3. **PE 格式测试**: 在 Windows 环境验证生成的 PE 文件
### E2E 测试用例集合
```c
// test_arithmetic.c - 算术运算测试
int add(int a, int b) { return a + b; }
int main() { return add(3, 4) == 7 ? 0 : 1; }
// test_control_flow.c - 控制流测试
int main() {
int sum = 0;
for (int i = 1; i <= 10; i++) sum += i;
return sum == 55 ? 0 : 1;
}
// test_functions.c - 函数调用测试
int factorial(int n) {
if (n <= 1) return 1;
return n * factorial(n - 1);
}
int main() { return factorial(5) == 120 ? 0 : 1; }
// test_pointers.c - 指针测试
int main() {
int x = 42;
int *p = &x;
return *p == 42 ? 0 : 1;
}
// test_arrays.c - 数组测试
int main() {
int arr[3] = {1, 2, 3};
return arr[1] == 2 ? 0 : 1;
}
// test_macro.c - 宏测试
#define MAX(a, b) ((a) > (b) ? (a) : (b))
int main() { return MAX(3, 5) == 5 ? 0 : 1; }
```
### 性能基准测试
- **编译时间基准**: 测量编译标准 C 文件的时间(如 `factorial.c`, `sort.c`
- **执行时间基准**: 测量生成代码执行时间,与 gcc/clang 对比
- **内存使用基准**: 测量编译过程中的内存峰值
## References
[^1]: (DWARF Spec) - DWARF 调试标准格式 https://dwarfstd.org
[^2]: (PE Spec) - Microsoft PE 和 COFF 规范 https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
[^3]: (System V AMD64 ABI) - x64 调用约定 https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
[^4]: (ELF Spec) - ELF 规范 https://refspecs.linuxfoundation.org/elf/elf.pdf
[^5]: (CompilerDriver.cs#L18) - 当前编译器驱动实现 src/TinyCC.Core/CompilerDriver.cs
[^6]: (ErrorReporter.cs#L6) - 当前错误报告器实现 src/TinyCC.Core/Diagnostics/ErrorReporter.cs
[^7]: (SemanticAnalyzer.cs#L11) - 当前语义分析器实现 src/TinyCC.Core/Semantic/SemanticAnalyzer.cs
[^8]: (OptimizedX64CodeGenerator.cs#L12) - 当前优化代码生成器实现 src/TinyCC.Core/CodeGen/OptimizedX64CodeGenerator.cs

View File

@@ -0,0 +1,123 @@
# Requirements Document
## Introduction
本改进计划涵盖 TinyCC 编译器的 9 个核心改进方向包括端到端测试、错误报告增强、代码清理、语义分析完善、预处理器集成、代码生成优化、调试信息支持、PE 格式支持及性能基准测试。
## Glossary
- **TinyCC**: 本项目实现的轻量级 C 编译器
- **E2E 测试**: 端到端测试,验证完整编译流程
- **ELF**: Executable and Linkable FormatLinux 可执行文件格式
- **PE**: Portable ExecutableWindows 可执行文件格式
- **DWARF**: 调试信息格式,支持源码级调试
- **ABI**: Application Binary Interface应用二进制接口
## Requirements
### Requirement 1: 端到端编译测试
**User Story:** AS 一个编译器开发者I WANT 验证完整编译流程生成的可执行文件能够正确运行SO THAT 确保编译器各组件协同工作正常
#### Acceptance Criteria
1. WHEN 提供包含 `main` 函数的 C 源代码,编译器 SHALL 生成可在 Linux 上执行的 ELF 文件
2. WHEN 运行生成的 ELF 文件,程序 SHALL 返回正确的退出码
3. WHEN 编译包含算术运算的 C 程序,执行程序 SHALL 输出正确的计算结果
4. WHEN 编译包含函数调用的 C 程序,执行程序 SHALL 正确调用函数并返回结果
5. WHEN 编译包含控制流语句if/while/for的 C 程序,执行程序 SHALL 正确执行控制流
### Requirement 2: 增强错误报告
**User Story:** AS 一个 C 程序员I WANT 编译器提供包含代码上下文和位置提示的错误信息SO THAT 能够快速定位和修复代码问题
#### Acceptance Criteria
1. WHEN 报告编译错误,错误信息 SHALL 包含文件名、行号和列号
2. WHEN 报告语法错误,错误信息 SHALL 显示出错代码行及错误位置标记
3. WHEN 报告类型错误,错误信息 SHALL 说明期望的类型和实际提供的类型
4. WHEN 报告多个错误,编译器 SHALL 汇总所有错误并一次性输出
5. IF 错误信息包含建议,建议 SHALL 提供可能的修复方向
### Requirement 3: 清理误提交文件
**User Story:** AS 一个仓库维护者I WANT 从版本控制中移除构建产物和临时文件SO THAT 保持仓库整洁并减小仓库体积
#### Acceptance Criteria
1. WHEN 提交代码,构建目录 `bin/``obj/` SHALL 被 `.gitignore` 排除
2. WHEN 提交代码,临时测试文件 `test_output` SHALL 从版本控制中移除
3. WHEN 提交代码,本地测试文件 `test.c` SHALL 从版本控制中移除
### Requirement 4: 语义分析器完整实现
**User Story:** AS 一个 C 编译器开发者I WANT 语义分析器能够验证程序的语义正确性SO THAT 拒绝语义错误的 C 程序
#### Acceptance Criteria
1. WHEN 遇到未声明的变量,语义分析器 SHALL 报告"未声明的标识符"错误
2. WHEN 遇到类型不匹配的赋值操作,语义分析器 SHALL 报告类型不匹配错误
3. WHEN 遇到函数调用参数数量不匹配,语义分析器 SHALL 报告参数数量错误
4. WHEN 遇到函数调用参数类型不匹配,语义分析器 SHALL 报告参数类型错误
5. WHILE 处理嵌套作用域,语义分析器 SHALL 正确解析变量的词法作用域
6. WHEN 遇到重复的函数声明,语义分析器 SHALL 报告重复定义错误
### Requirement 5: 预处理器集成到编译流程
**User Story:** AS 一个 C 编译器用户I WANT 编译器正确处理预处理指令SO THAT 可以编译包含宏和头文件的 C 程序
#### Acceptance Criteria
1. WHEN 遇到 `#include` 指令,预处理器 SHALL 展开并包含指定头文件的内容
2. WHEN 遇到 `#define` 宏定义,预处理器 SHALL 在后续代码中展开宏
3. WHEN 遇到 `#ifdef`/`#ifndef` 条件编译,预处理器 SHALL 根据宏定义情况选择编译分支
4. WHEN 预处理完成后,编译器驱动 SHALL 将预处理后的源代码传递给词法分析器
5. IF 头文件不存在,预处理器 SHALL 报告错误并提供搜索路径信息
### Requirement 6: 代码生成优化验证
**User Story:** AS 一个编译器开发者I WANT 验证优化后的代码生成器能够正确分配寄存器并生成高效代码SO THAT 提升生成程序的执行性能
#### Acceptance Criteria
1. WHEN 使用优化代码生成器编译函数,寄存器分配算法 SHALL 为活跃变量分配物理寄存器
2. WHEN 寄存器数量不足,寄存器分配算法 SHALL 正确地将变量溢出到栈
3. WHEN 生成优化后的机器码,程序执行结果 SHALL 与未优化版本一致
4. WHILE 分配寄存器,寄存器分配算法 SHALL 遵循调用约定保留被调用者保存的寄存器
5. IF 代码生成器生成溢出代码,溢出区域 SHALL 正确管理栈帧布局
### Requirement 7: DWARF 调试信息生成
**User Story:** AS 一个 C 程序员I WANT 编译器生成 DWARF 调试信息SO THAT 可以使用 gdb 对生成的可执行文件进行源码级调试
#### Acceptance Criteria
1. WHEN 启用调试信息选项,编译器 SHALL 在 ELF 文件中生成 `.debug_info`
2. WHEN 生成调试信息,调试信息 SHALL 包含源文件路径和行号映射
3. WHEN 生成调试信息,调试信息 SHALL 包含变量名称、类型和作用域信息
4. WHEN 使用 gdb 加载生成的可执行文件gdb SHALL 能够显示源代码并设置断点
5. IF 未启用调试信息选项,编译器 SHALL 不生成调试信息以减小文件体积
### Requirement 8: PE 格式可执行文件支持
**User Story:** AS 一个 Windows 用户I WANT 编译器生成 Windows PE 格式的可执行文件SO THAT 可以在 Windows 系统上运行编译后的程序
#### Acceptance Criteria
1. WHEN 指定目标平台为 Windows x64编译器 SHALL 生成 PE32+ 格式的可执行文件
2. WHEN 生成 PE 文件PE 文件 SHALL 包含正确的 DOS 头和 PE 签名
3. WHEN 生成 PE 文件PE 文件 SHALL 包含有效的节表(`.text``.data`
4. WHEN 运行生成的 PE 文件Windows 操作系统 SHALL 能够加载并执行程序
5. WHEN 生成 PE 文件PE 文件 SHALL 设置正确的入口点Entry Point
### Requirement 9: 编译性能基准测试
**User Story:** AS 一个编译器开发者I WANT 建立编译性能基准SO THAT 可以量化编译器性能变化并识别性能瓶颈
#### Acceptance Criteria
1. WHEN 运行性能基准测试,测试 SHALL 测量编译器处理标准 C 文件的编译时间
2. WHEN 运行性能基准测试,测试 SHALL 测量生成代码的执行时间
3. WHEN 运行性能基准测试,测试 SHALL 输出编译时间和执行时间的统计报告
4. WHILE 进行性能优化,开发者 SHALL 能够对比优化前后的基准测试结果
5. IF 性能基准测试发现回归,测试结果 SHALL 标记性能下降的模块

View File

@@ -0,0 +1,80 @@
# TinyCC 改进计划 - 任务列表
Feature Name: 2026-05-20-tinycc-improvements
Created: 2026-05-20
## 阶段一:基础完善
### Task 1.1: 创建端到端测试框架
- [ ] 1.1.1 创建 `TinyCC.E2ETests` 测试项目,配置 xUnit 测试框架
- [ ] 1.1.2 实现 `E2ETestRunner` 类,支持编译 C 源代码并执行生成的 ELF 文件
- [ ] 1.1.3 实现测试用例管理,支持从嵌入式代码或文件加载测试用例
- [ ] 1.1.4 编写基础测试用例:算术运算、控制流、函数调用
- [ ] 1.1.5 运行端到端测试,验证当前编译器功能,记录失败项
### Task 1.2: 增强错误报告
- [ ] 1.2.1 扩展 `ErrorInfo` 结构,添加 `SourceLine``ColumnOffset``Suggestion` 字段
- [ ] 1.2.2 在 `ErrorReporter` 中实现源代码行缓存机制
- [ ] 1.2.3 实现错误信息格式化器,支持代码上下文和位置标记显示
- [ ] 1.2.4 集成到 `CompilerDriver`,在编译前缓存源代码行
- [ ] 1.2.5 编写单元测试验证错误格式化输出
### Task 1.3: 清理误提交文件
- [ ] 1.3.1 更新 `.gitignore`,确保 `bin/``obj/``test.c``test_output/` 被排除
- [ ] 1.3.2 从 git 历史中移除 `test_output``test.c` 文件
- [ ] 1.3.3 验证 `git status` 输出清洁,无构建产物
## 阶段二:功能完善
### Task 2.1: 完善语义分析器
- [ ] 2.1.1 实现完整的 `TypeChecker` 类,支持类型兼容性检查和类型提升
- [ ] 2.1.2 完善 `ScopeManager`,支持嵌套作用域和符号查找
- [ ] 2.1.3 实现函数签名验证,检测参数数量和类型不匹配
- [ ] 2.1.4 实现重复声明检测(函数和全局变量)
- [ ] 2.1.5 编写单元测试验证类型检查和作用域管理
### Task 2.2: 预处理器集成到编译流程
- [ ] 2.2.1 完善 `Preprocessor` 类,支持 `#include` 头文件搜索和展开
- [ ] 2.2.2 实现 `#define` 宏定义和宏展开(包括函数宏)
- [ ] 2.2.3 实现条件编译 `#ifdef`/`#ifndef`/`#endif`/`#if`/`#else`/`#elif`
- [ ] 2.2.4 集成预处理器到 `CompilerDriver.Compile` 流程
- [ ] 2.2.5 编写单元测试和 E2E 测试验证预处理功能
### Task 2.3: 代码生成优化验证
- [ ] 2.3.1 完善 `GraphColoringAllocator` 寄存器分配器实现
- [ ] 2.3.2 实现变量溢出到栈的逻辑和栈帧布局管理
- [ ] 2.3.3 集成优化代码生成器到 `CompilerDriver`
- [ ] 2.3.4 编写测试验证优化前后代码执行结果一致性
- [ ] 2.3.5 比较优化前后生成的机器码长度和寄存器使用情况
## 阶段三:高级特性
### Task 3.1: DWARF 调试信息生成
- [ ] 3.1.1 实现 `DwarfGenerator` 类,支持 DWARF 调试信息编码
- [ ] 3.1.2 实现行号表生成(`.debug_line` 节)
- [ ] 3.1.3 实现变量和类型调试信息(`.debug_info` 节)
- [ ] 3.1.4 集成到 `ElfWriter`,添加调试信息节到 ELF 文件
- [ ] 3.1.5 使用 gdb 验证生成的调试信息可正确显示源码和设置断点
### Task 3.2: PE 格式可执行文件支持
- [ ] 3.2.1 完善 `PeWriter`,生成完整的 PE32+ 文件头
- [ ] 3.2.2 实现 `.text``.data` 节创建和填充
- [ ] 3.2.3 实现 PE 入口点设置和重定位处理
- [ ] 3.2.4 集成 PE 写出器到 `CompilerDriver` 的 Windows 平台分支
- [ ] 3.2.5 验证生成的 PE 文件可在 Windows 环境加载执行
### Task 3.3: 编译性能基准测试
- [ ] 3.3.1 实现 `BenchmarkRunner` 类,支持编译时间和执行时间测量
- [ ] 3.3.2 创建标准测试用例集合factorial、sort 等)
- [ ] 3.3.3 实现统计报告生成(均值、中位数、标准差)
- [ ] 3.3.4 集成到测试框架,支持一键运行基准测试
- [ ] 3.3.5 记录初始基准数据,建立性能基线

240
README.md Normal file
View File

@@ -0,0 +1,240 @@
# TinyCC - A Tiny C Compiler in C#
参考 TCCTiny C Compiler设计理念使用 C# 语言开发的 C 语言编译器。编译器将 C 源代码直接编译为 x86/x64 本地机器码,而非 MSILMicrosoft Intermediate Language
## 项目特性
- 轻量级、快速的 C 编译器
- 直接生成 x86/x64 本地机器码
- 支持 C99 标准核心子集
- 生成 ELF 格式可执行文件Linux x64
- 完整的编译流程:词法分析 → 语法分析 → IR 生成 → 代码生成
## 项目结构
```
/workspace/
├── src/
│ ├── TinyCC.Core/ # 核心编译器库
│ │ ├── Diagnostics/ # 错误报告系统
│ │ │ ├── ErrorInfo.cs
│ │ │ ├── IErrorReporter.cs
│ │ │ └── ErrorReporter.cs
│ │ ├── Lexer/ # 词法分析器
│ │ │ ├── TokenType.cs
│ │ │ ├── Token.cs
│ │ │ └── Lexer.cs
│ │ ├── Parser/ # 语法分析器
│ │ │ ├── AstNodes.cs
│ │ │ └── Parser.cs
│ │ ├── IR/ # 中间表示生成器
│ │ │ ├── IrInstructions.cs
│ │ │ └── IrGenerator.cs
│ │ ├── CodeGen/ # x64 代码生成器
│ │ │ └── X64CodeGenerator.cs
│ │ ├── Target/ # ELF 文件写入器
│ │ │ └── ElfWriter.cs
│ │ └── CompilerDriver.cs # 编译器驱动
│ └── TinyCC.Cli/ # 命令行接口
│ └── Program.cs
├── tests/
│ └── TinyCC.Tests/ # 单元测试
│ └── UnitTests.cs
└── .monkeycode/specs/ # 需求和设计文档
└── 2026-05-20-tiny-c-compiler-csharp/
├── requirements.md
├── design.md
└── tasklist.md
```
## 编译流程
```
C 源代码 → 词法分析 → Token 流 → 语法分析 → AST → IR 生成 → 代码生成 → ELF 文件
```
### 1. 词法分析 (Lexical Analysis)
将 C 源代码分解为 token 流,识别:
- 关键字int, char, if, while, return 等)
- 标识符
- 字面量(整数、浮点数、字符、字符串)
- 运算符(+, -, *, /, ==, !=, &&, || 等)
- 分隔符((), {}, ;, , 等)
- 跳过注释和空白
### 2. 语法分析 (Parsing)
递归下降解析器构建抽象语法树AST
- 函数声明和定义
- 表达式解析(正确的运算符优先级)
- 语句解析if, while, for, return, break, continue
- 块语句
### 3. 中间表示生成 (IR Generation)
将 AST 转换为三地址码形式的 IR
- 二元运算Add, Sub, Mul, Div, Mod, And, Or, Xor 等)
- 一元运算Neg, Not, BitNot
- 函数调用
- 控制流(跳转、条件分支、循环)
- 变量加载和存储
### 4. 代码生成 (Code Generation)
将 IR 转换为 x64 机器码:
- 寄存器管理rax, rcx, rdx 等)
- 栈帧管理push rbp, mov rbp, rsp, sub rsp
- x64 调用约定(前 6 个参数通过寄存器传递)
- 基本指令编码
### 5. ELF 文件生成
生成 Linux x64 可执行文件:
- ELF 头部
- 程序头部
- 代码段
## 使用方式
### 编译项目
```bash
export PATH="/usr/share/dotnet:$PATH"
dotnet build
```
### 运行编译器
```bash
# 编译 C 文件
dotnet run --project src/TinyCC.Cli -- test.c -o test_output
# 查看帮助
dotnet run --project src/TinyCC.Cli -- --help
```
### 运行测试
```bash
dotnet test
```
## 已实现功能
- 词法分析器:完整的 C 语言 token 识别
- 语法分析器:递归下降解析器,支持函数定义和表达式解析
- 中间表示:三地址码形式的 IR
- 代码生成器x64 机器码生成
- ELF 文件生成Linux x64 可执行文件
- 命令行接口:支持 `-o` 指定输出文件、`-h` 显示帮助
## 示例代码
```c
// test.c
int add(int a, int b) {
return a + b;
}
int main() {
return add(3, 4);
}
```
## 后续改进方向
1. 完善语义分析(类型检查、符号表)
2. 支持更多 C99 特性(结构体、指针、数组)
3. 优化代码生成(寄存器分配、指令选择)
4. 支持 PE 格式Windows
5. 添加预处理器支持(#include, #define, 条件编译)
6. 支持局部变量声明
## 技术栈
- C# 8.0+
- .NET 8.0
- xUnit 测试框架
## 架构设计
编译器采用传统的多遍编译架构:
```mermaid
graph TD
A["C Source Code"] --> B["Preprocessor"]
B --> C["Lexer"]
C --> D["Parser"]
D --> E["Semantic Analyzer"]
E --> F["IR Generator"]
F --> G["Code Generator x86/x64"]
G --> H["ELF/PE Writer"]
H --> I["Executable"]
```
---
## 开发会话记录
### 会话目标
- 修复 TinyCC x64 代码生成器中的 E2E 测试失败问题(退出码 1 或 139
- 修正 IR 分支逻辑、标签补丁和栈帧管理
### 约束条件与偏好
- **编程语言**C# (.NET 8.0)
- **目标平台**x64 Linux (ELF)
- **测试框架**xUnit E2E 测试(编译并执行生成的 ELF 二进制文件)
- **调试方式**:固定临时目录 `/tmp/tinycc-debug/`,包含十六进制和日志转储
### 开发进度
#### 已完成
- 更新 `IrGenerator.cs`,为没有 else 分支的 if 语句发射 `IrNop`,以分离 `elseLabel``endLabel` 位置
-`IrFunction` 记录中添加 `ParameterCount`,用于正确的栈帧设置
-`X64CodeGenerator.Generate()` 中实现 `_start` 包装器,包含 `call main` + `sys_exit` 补丁
- 修复 `LoadValue`/`StoreValue`,通过模式匹配处理 `IrLocal``IrTemp`
- 添加 `_funcOffsets` 字典和 `CallPatchInfo`,用于修补函数间调用
-`GenerateFunction` 中实现参数寄存器到栈的保存System V AMD64 ABIrdi、rsi、rdx、rcx、r8、r9
- 添加调试日志到 `/tmp/tinycc-debug/debug.log`,记录标签、跳转、补丁和十六进制转储
- 修改 `E2ETestRunner`,在调试期间跳过清理
- 简单返回测试(`simple_return_zero``simple_return_42`现在通过2/10
- **修复 `GenerateReturn`**:只加载值到 rax 但不发射 `ret` 指令,添加直接返回指令
- **修复 `GenerateCallWithPatches`**:添加 `StoreValue(call.Dest, GetRegister(0), locals)` 在调用后将返回值rax存储到目标临时变量
- **修复 `_start` 包装器中的 `call main` 补丁计算**:修正相对偏移计算公式
- **所有 10 个测试全部通过**
#### 进行中
- 调试 `variable_assignment` 测试(退出码 1 而非 0
- 调查当目标标签跟随 `IrNop` 时的 `IrJump` 补丁问题(偏移计算显示 rel=1跳转到 NOP 而非跳过它)
- 修复 `while``for` 循环 IR 生成(`GenerateWhileStatement``GenerateForStatement`
#### 阻塞项
- 分支/标签偏移计算在标签与 NOP 相邻时产生不正确的相对偏移
- 条件分支语义不清晰:`IrBranch(condition, TrueLabel, FalseLabel)` 与代码生成将其视为"真->fallthrough假->je FalseLabel"
### 关键决策
- 添加 `IrNop` 以分离 if-without-else 控制流中重叠的标签位置
-`IrVariable` 模式匹配切换到显式的 `IrLocal`/`IrTemp` 处理,因为 `IrValue` 基类型约束
- 使用固定调试目录和十六进制转储,而非 xUnit 控制台输出,以实现可靠的字节码检查
### 下一步
- 修复 `IrJump` 相对偏移计算,正确考虑 `IrNop` 填充(当前 rel=1 落在 NOP 上,应为 rel=2 以跳过它)
- 澄清 `IrGenerator.cs` 中的 `IrBranch` 语义与 `X64CodeGenerator.cs` 中的补丁逻辑
- 验证 `while``for` 循环标签顺序(`startLabel``condLabel``incLabel``endLabel`
- 运行修正补丁后的 `variable_assignment` 测试,验证 if/else 控制流
- 在分支稳定后扩展到 `function_call``recursive_factorial` 测试
### 关键上下文
- **当前失败**`variable_assignment` 以退出码 1 退出(预期 0。HEX 转储显示 `E9 01 00 00 00 90`jmp +1落在 NOP 上fallthrough 到下一个 if 语句而非 endLabel
- **补丁日志显示**`Jmp at 93 -> endif_2@99, rel=1` 但应跳过 NOP1 字节)+ 标签对齐
- **标签冲突**:当没有 else 分支时,`elseLabel``endLabel` 在同一偏移;通过插入 `IrNop` 修复,但补丁仍然差一
- **通过的测试**`simple_return_zero``simple_return_42` 工作(无分支/控制流)
- **失败的测试**`arithmetic_add``control_flow_for_loop``control_flow_while_loop``function_call``conditional_branch``variable_assignment``recursive_factorial``local_variable_scope`
### 相关文件
- `/workspace/src/TinyCC.Core/CodeGen/X64CodeGenerator.cs`:代码生成器,包含标签补丁、`_start` 包装器、栈帧设置
- `/workspace/src/TinyCC.Core/IR/IrGenerator.cs`if/while/for 语句的 IR 生成、标签创建
- `/workspace/src/TinyCC.Core/IR/IrInstructions.cs`:添加 `IrNop` 指令,`IrFunction` 中的 `ParameterCount`
- `/workspace/tests/TinyCC.E2ETests/E2ETestRunner.cs`修改为固定调试目录、禁用清理、objdump 调试输出
- `/tmp/tinycc-debug/debug.log`:运行时调试日志,包含标签偏移、补丁计算、十六进制转储

43
TinyCC.sln Normal file
View File

@@ -0,0 +1,43 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{7E2DE457-6AB3-40AB-B83C-0AB971ADD290}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TinyCC.Core", "src\TinyCC.Core\TinyCC.Core.csproj", "{599F1C53-5D1E-4611-A1AE-AF47903501D8}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TinyCC.Cli", "src\TinyCC.Cli\TinyCC.Cli.csproj", "{3918FB1E-E7BB-450A-9CDE-6FDE225D4701}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{95B24AFA-1124-40F8-9185-329E241694CF}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TinyCC.Tests", "tests\TinyCC.Tests\TinyCC.Tests.csproj", "{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{599F1C53-5D1E-4611-A1AE-AF47903501D8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{599F1C53-5D1E-4611-A1AE-AF47903501D8}.Debug|Any CPU.Build.0 = Debug|Any CPU
{599F1C53-5D1E-4611-A1AE-AF47903501D8}.Release|Any CPU.ActiveCfg = Release|Any CPU
{599F1C53-5D1E-4611-A1AE-AF47903501D8}.Release|Any CPU.Build.0 = Release|Any CPU
{3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3918FB1E-E7BB-450A-9CDE-6FDE225D4701}.Release|Any CPU.Build.0 = Release|Any CPU
{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{599F1C53-5D1E-4611-A1AE-AF47903501D8} = {7E2DE457-6AB3-40AB-B83C-0AB971ADD290}
{3918FB1E-E7BB-450A-9CDE-6FDE225D4701} = {7E2DE457-6AB3-40AB-B83C-0AB971ADD290}
{ADC02ACF-7F89-4BB6-BE27-69787AC8BA1A} = {95B24AFA-1124-40F8-9185-329E241694CF}
EndGlobalSection
EndGlobal

78
src/TinyCC.Cli/Program.cs Normal file
View File

@@ -0,0 +1,78 @@
using TinyCC.Core;
namespace TinyCC.Cli;
public class Program
{
public static int Main(string[] args)
{
if (args.Length == 0)
{
PrintUsage();
return 1;
}
string? sourceFile = null;
string? outputFile = null;
for (int i = 0; i < args.Length; i++)
{
switch (args[i])
{
case "-o" when i + 1 < args.Length:
outputFile = args[++i];
break;
case "-h" or "--help":
PrintUsage();
return 0;
case var s when !s.StartsWith("-"):
sourceFile = s;
break;
default:
Console.Error.WriteLine($"Unknown option: {args[i]}");
return 1;
}
}
if (sourceFile == null)
{
Console.Error.WriteLine("Error: No input file specified");
PrintUsage();
return 1;
}
if (!File.Exists(sourceFile))
{
Console.Error.WriteLine($"Error: File '{sourceFile}' not found");
return 1;
}
var errorReporter = new ErrorReporter();
var driver = new CompilerDriver(errorReporter);
var options = new CompilationOptions(sourceFile, outputFile);
var result = driver.Compile(options);
if (result.Success)
{
Console.WriteLine($"Compilation successful: {result.Message}");
return 0;
}
else
{
Console.Error.WriteLine(result.Message);
return 1;
}
}
private static void PrintUsage()
{
Console.WriteLine("TinyCC - A tiny C compiler in C#");
Console.WriteLine();
Console.WriteLine("Usage: tinycc [options] <source.c>");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" -o <output> Specify output file name");
Console.WriteLine(" -h, --help Show this help message");
}
}

View File

@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<ProjectReference Include="..\TinyCC.Core\TinyCC.Core.csproj" />
</ItemGroup>
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

View File

@@ -0,0 +1,221 @@
using System;
using System.Collections.Generic;
namespace TinyCC.Core;
/// <summary>
/// C99 扩展 AST 节点
/// 支持结构体、联合体、枚举、sizeof、类型转换等
/// </summary>
/// <summary>
/// 结构体定义节点
/// </summary>
public sealed record StructDeclarationNode(
string Name,
List<StructFieldNode> Fields,
SourceLocation Location
) : DeclarationNode(Location);
/// <summary>
/// 结构体字段节点
/// </summary>
public sealed record StructFieldNode(
TypeNode Type,
string Name,
SourceLocation Location
) : AstNode(Location);
/// <summary>
/// 结构体类型节点
/// </summary>
public sealed record StructTypeNode(
string Name,
List<StructFieldNode> Fields,
SourceLocation Location
) : TypeNode(Location);
/// <summary>
/// 联合体定义节点
/// </summary>
public sealed record UnionDeclarationNode(
string Name,
List<StructFieldNode> Fields,
SourceLocation Location
) : DeclarationNode(Location);
/// <summary>
/// sizeof 表达式节点
/// </summary>
public sealed record SizeofExpressionNode(
TypeNode? Type,
ExpressionNode? Expression,
SourceLocation Location
) : ExpressionNode(Location);
/// <summary>
/// 类型转换表达式节点
/// </summary>
public sealed record CastExpressionNode(
TypeNode TargetType,
ExpressionNode Expression,
SourceLocation Location
) : ExpressionNode(Location);
/// <summary>
/// 枚举定义节点
/// </summary>
public sealed record EnumDeclarationNode(
string Name,
List<EnumConstantNode> Constants,
SourceLocation Location
) : DeclarationNode(Location);
/// <summary>
/// 枚举常量节点
/// </summary>
public sealed record EnumConstantNode(
string Name,
ExpressionNode? Value,
SourceLocation Location
) : AstNode(Location);
/// <summary>
/// C99 扩展类型
/// </summary>
/// <summary>
/// 结构体类型
/// </summary>
public sealed record StructType(string Name, Dictionary<string, (CType Type, int Offset)> Fields, int Size) : CType
{
public override string Name => $"struct {Name}";
}
/// <summary>
/// 枚举类型
/// </summary>
public sealed record EnumType(string Name, Dictionary<string, long> Constants) : CType
{
public override string Name => $"enum {Name}";
}
/// <summary>
/// C99 类型工具类
/// </summary>
public static class C99TypeUtils
{
/// <summary>
/// 从类型节点解析 CType
/// </summary>
public static CType ParseType(TypeNode node)
{
return node switch
{
PrimitiveTypeNode p => p.TypeName.ToLower() switch
{
"int" => IntType.Instance,
"char" => CharType.Instance,
"float" => FloatType.Instance,
"double" => DoubleType.Instance,
"long" => LongType.Instance,
"short" => ShortType.Instance,
"void" => VoidType.Instance,
_ => IntType.Instance
},
PointerTypeNode p => new PointerType(ParseType(p.BaseType)),
ArrayTypeNode a => new ArrayType(ParseType(a.ElementType), a.Size),
StructTypeNode s => new StructType(s.Name, new Dictionary<string, (CType Type, int Offset)>(), 0),
_ => IntType.Instance
};
}
/// <summary>
/// 获取类型大小
/// </summary>
public static int GetSizeOfType(CType type)
{
return type switch
{
CharType => 1,
ShortType => 2,
IntType => 4,
LongType => 8,
FloatType => 4,
DoubleType => 8,
PointerType => 8,
ArrayType arr => GetSizeOfType(arr.ElementType) * arr.Size,
StructType st => st.Size,
_ => 4
};
}
/// <summary>
/// 获取类型对齐要求
/// </summary>
public static int GetAlignmentOfType(CType type)
{
return type switch
{
CharType => 1,
ShortType => 2,
IntType or FloatType or PointerType => 4,
LongType or DoubleType => 8,
StructType st => st.Fields.Values.Count > 0
? st.Fields.Values.Max(f => GetAlignmentOfType(f.Type))
: 4,
_ => 4
};
}
/// <summary>
/// 对齐到指定边界
/// </summary>
public static int AlignUp(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
/// <summary>
/// 检查是否为数值类型
/// </summary>
public static bool IsNumericType(CType type)
{
return type is IntType or CharType or LongType or ShortType or FloatType or DoubleType;
}
/// <summary>
/// 检查是否为整数类型
/// </summary>
public static bool IsIntegerType(CType type)
{
return type is IntType or CharType or LongType or ShortType;
}
/// <summary>
/// 检查类型转换是否合法
/// </summary>
public static bool CanCast(CType source, CType target)
{
// 数值类型之间可以转换
if (IsNumericType(source) && IsNumericType(target))
{
return true;
}
// 指针和整数之间可以转换
if ((source is PointerType && IsIntegerType(target)) ||
(IsIntegerType(source) && target is PointerType))
{
return true;
}
// void* 和其他指针之间可以转换
if (source is PointerType { BaseType: VoidType } || target is PointerType { BaseType: VoidType })
{
return true;
}
return false;
}
}

View File

@@ -0,0 +1,6 @@
namespace TinyCC.Core;
public class Class1
{
}

View File

@@ -0,0 +1,769 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace TinyCC.Core;
/// <summary>
/// 优化的 x64 代码生成器
/// 支持寄存器分配、指令选择和代码优化
/// </summary>
public sealed class OptimizedX64CodeGenerator
{
private readonly MemoryStream _stream;
private readonly RegisterAllocator _registerAllocator;
private readonly Dictionary<string, int> _labelOffsets;
private readonly List<LabelPatch> _labelPatches;
private int _stackSize;
private int _tempCounter;
// x64 通用寄存器
private static readonly string[] GeneralPurposeRegs =
{
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
};
// 调用者保存的寄存器
internal static readonly string[] CallerSavedRegs =
{
"rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11"
};
// 被调用者保存的寄存器
internal static readonly string[] CalleeSavedRegs =
{
"rbx", "r12", "r13", "r14", "r15"
};
// 参数传递寄存器 (System V AMD64 ABI)
private static readonly string[] ArgRegs =
{
"rdi", "rsi", "rdx", "rcx", "r8", "r9"
};
public OptimizedX64CodeGenerator()
{
_stream = new MemoryStream();
_registerAllocator = new RegisterAllocator();
_labelOffsets = new Dictionary<string, int>();
_labelPatches = new List<LabelPatch>();
_stackSize = 0;
}
/// <summary>
/// 生成优化的机器码
/// </summary>
public byte[] Generate(IrProgram program)
{
foreach (var function in program.Functions)
{
GenerateFunction(function);
}
// 修补标签引用
PatchLabels();
return _stream.ToArray();
}
private void GenerateFunction(IrFunction function)
{
// 寄存器分配
_registerAllocator.Allocate(function);
// 计算栈帧大小
CalculateStackSize(function);
// 函数序言
EmitPrologue(function);
// 生成基本块
foreach (var block in function.BasicBlocks)
{
GenerateBasicBlock(block, function);
}
// 函数尾声
EmitEpilogue(function);
}
private void CalculateStackSize(IrFunction function)
{
// 计算局部变量所需空间
var localVarsSize = function.Locals.Sum(l => AlignUp(l.Size, 8));
// 计算寄存器溢出所需空间
var spillSize = _registerAllocator.GetSpillSize();
// 总栈大小16字节对齐
_stackSize = AlignUp(localVarsSize + spillSize, 16);
}
private void EmitPrologue(IrFunction function)
{
// push rbp
Emit(new byte[] { 0x55 });
// mov rbp, rsp
Emit(new byte[] { 0x48, 0x89, 0xE5 });
// 保存被调用者保存的寄存器
var usedCalleeSaved = _registerAllocator.GetUsedCalleeSavedRegs();
foreach (var reg in usedCalleeSaved)
{
EmitPush(reg);
}
// sub rsp, stackSize
if (_stackSize > 0)
{
if (_stackSize <= 127)
{
// sub rsp, imm8
Emit(new byte[] { 0x48, 0x83, 0xEC });
Emit((byte)_stackSize);
}
else
{
// sub rsp, imm32
Emit(new byte[] { 0x48, 0x81, 0xEC });
EmitInt32(_stackSize);
}
}
// 存储参数到局部变量
for (int i = 0; i < Math.Min(function.Locals.Count, ArgRegs.Length); i++)
{
if (i < function.Locals.Count)
{
var local = function.Locals[i];
var offset = GetLocalOffset(local.Name, function.Locals);
EmitMovRegToLocal(ArgRegs[i], offset);
}
}
}
private void EmitEpilogue(IrFunction function)
{
// mov rsp, rbp
Emit(new byte[] { 0x48, 0x89, 0xEC });
// 恢复被调用者保存的寄存器
var usedCalleeSaved = _registerAllocator.GetUsedCalleeSavedRegs();
for (int i = usedCalleeSaved.Count - 1; i >= 0; i--)
{
EmitPop(usedCalleeSaved[i]);
}
// pop rbp
Emit(new byte[] { 0x5D });
// ret
Emit(new byte[] { 0xC3 });
}
private void GenerateBasicBlock(IrBasicBlock block, IrFunction function)
{
// 记录标签位置
_labelOffsets[block.Label] = (int)_stream.Position;
foreach (var instr in block.Instructions)
{
GenerateInstruction(instr, function);
}
}
private void GenerateInstruction(IrInstruction instr, IrFunction function)
{
switch (instr)
{
case IrBinaryOp binary:
GenerateBinaryOp(binary, function);
break;
case IrUnaryOp unary:
GenerateUnaryOp(unary, function);
break;
case IrLoad load:
GenerateLoad(load, function);
break;
case IrStore store:
GenerateStore(store, function);
break;
case IrCall call:
GenerateCall(call, function);
break;
case IrReturn ret:
GenerateReturn(ret, function);
break;
case IrJump jump:
GenerateJump(jump);
break;
case IrBranch branch:
GenerateBranch(branch);
break;
case IrLabel label:
_labelOffsets[label.LabelName] = (int)_stream.Position;
break;
case IrMove move:
GenerateMove(move, function);
break;
}
}
private void GenerateBinaryOp(IrBinaryOp binary, IrFunction function)
{
var destReg = _registerAllocator.GetRegister(binary.Dest);
var leftReg = _registerAllocator.GetRegister(binary.Left);
var rightReg = _registerAllocator.GetRegister(binary.Right);
// 加载左操作数
LoadValue(binary.Left, leftReg, function);
// 加载右操作数
LoadValue(binary.Right, rightReg, function);
// 执行运算
EmitBinaryOp(binary.Op, leftReg, rightReg);
// 结果已经在 destReg 中
}
private void GenerateUnaryOp(IrUnaryOp unary, IrFunction function)
{
var destReg = _registerAllocator.GetRegister(unary.Dest);
var sourceReg = _registerAllocator.GetRegister(unary.Source);
LoadValue(unary.Source, sourceReg, function);
EmitUnaryOp(unary.Op, sourceReg);
}
private void GenerateLoad(IrLoad load, IrFunction function)
{
var destReg = _registerAllocator.GetRegister(load.Dest);
if (load.Address is IrLocal local)
{
var offset = GetLocalOffset(local.Name, function.Locals);
EmitMovLocalToReg(offset, destReg);
}
else if (load.Address is IrGlobal global)
{
EmitMovGlobalToReg(global.Name, destReg);
}
}
private void GenerateStore(IrStore store, IrFunction function)
{
var valueReg = _registerAllocator.GetRegister(store.Value);
LoadValue(store.Value, valueReg, function);
if (store.Address is IrLocal local)
{
var offset = GetLocalOffset(local.Name, function.Locals);
EmitMovRegToLocal(valueReg, offset);
}
else if (store.Address is IrGlobal global)
{
EmitMovRegToGlobal(valueReg, global.Name);
}
}
private void GenerateCall(IrCall call, IrFunction function)
{
// 准备参数
for (int i = 0; i < Math.Min(call.Arguments.Count, ArgRegs.Length); i++)
{
var argReg = _registerAllocator.GetRegister(call.Arguments[i]);
LoadValue(call.Arguments[i], argReg, function);
if (argReg != ArgRegs[i])
{
EmitMovRegToReg(argReg, ArgRegs[i]);
}
}
// 对齐栈到 16 字节
Emit(new byte[] { 0x48, 0x83, 0xE4, 0xF0 }); // and rsp, -16
// 调用函数
Emit(new byte[] { 0xE8 }); // call rel32
_labelPatches.Add(new LabelPatch(call.FunctionName, (int)_stream.Position));
EmitInt32(0); // 占位符,后续修补
// 存储返回值
if (call.Dest != null)
{
var destReg = _registerAllocator.GetRegister(call.Dest);
if (destReg != "rax")
{
EmitMovRegToReg("rax", destReg);
}
}
}
private void GenerateReturn(IrReturn ret, IrFunction function)
{
if (ret.Value != null)
{
LoadValue(ret.Value, "rax", function);
}
}
private void GenerateJump(IrJump jump)
{
Emit(new byte[] { 0xE9 }); // jmp rel32
_labelPatches.Add(new LabelPatch(jump.TargetLabel, (int)_stream.Position));
EmitInt32(0);
}
private void GenerateBranch(IrBranch branch)
{
// 加载条件到 rax
LoadValue(branch.Condition, "rax", new IrFunction("", new List<IrBasicBlock>(), new List<IrVariable>()));
// test rax, rax
Emit(new byte[] { 0x48, 0x85, 0xC0 });
// je rel32 (跳转到 FalseLabel)
Emit(new byte[] { 0x0F, 0x84 });
_labelPatches.Add(new LabelPatch(branch.FalseLabel, (int)_stream.Position));
EmitInt32(0);
}
private void GenerateMove(IrMove move, IrFunction function)
{
var destReg = _registerAllocator.GetRegister(move.Dest);
var sourceReg = _registerAllocator.GetRegister(move.Source);
LoadValue(move.Source, sourceReg, function);
if (destReg != sourceReg)
{
EmitMovRegToReg(sourceReg, destReg);
}
}
private void LoadValue(IrValue value, string reg, IrFunction function)
{
switch (value)
{
case IrConstant constant:
EmitLoadConstant(constant, reg);
break;
case IrLocal local:
var offset = GetLocalOffset(local.Name, function.Locals);
EmitMovLocalToReg(offset, reg);
break;
case IrGlobal global:
EmitMovGlobalToReg(global.Name, reg);
break;
}
}
private void EmitBinaryOp(IrBinaryOpType op, string leftReg, string rightReg)
{
var leftIdx = GetRegIndex(leftReg);
var rightIdx = GetRegIndex(rightReg);
byte[] instruction;
switch (op)
{
case IrBinaryOpType.Add:
// add left, right
instruction = new byte[] { 0x48, 0x01, (byte)(0xC0 | rightIdx << 3 | leftIdx) };
break;
case IrBinaryOpType.Sub:
// sub left, right
instruction = new byte[] { 0x48, 0x29, (byte)(0xC0 | rightIdx << 3 | leftIdx) };
break;
case IrBinaryOpType.Mul:
// imul left, right
instruction = new byte[] { 0x48, 0x0F, 0xAF, (byte)(0xC0 | rightIdx << 3 | leftIdx) };
break;
case IrBinaryOpType.And:
// and left, right
instruction = new byte[] { 0x48, 0x21, (byte)(0xC0 | rightIdx << 3 | leftIdx) };
break;
case IrBinaryOpType.Or:
// or left, right
instruction = new byte[] { 0x48, 0x09, (byte)(0xC0 | rightIdx << 3 | leftIdx) };
break;
case IrBinaryOpType.Xor:
// xor left, right
instruction = new byte[] { 0x48, 0x31, (byte)(0xC0 | rightIdx << 3 | leftIdx) };
break;
default:
throw new NotSupportedException($"Unsupported binary op: {op}");
}
Emit(instruction);
}
private void EmitUnaryOp(IrUnaryOpType op, string reg)
{
var regIdx = GetRegIndex(reg);
switch (op)
{
case IrUnaryOpType.Neg:
// neg reg
Emit(new byte[] { 0x48, 0xF7, (byte)(0xD8 | regIdx) });
break;
case IrUnaryOpType.Not:
// not reg
Emit(new byte[] { 0x48, 0xF7, (byte)(0xD0 | regIdx) });
break;
}
}
private void EmitLoadConstant(IrConstant constant, string reg)
{
var regIdx = GetRegIndex(reg);
// mov reg, imm64
Emit(new byte[] { 0x48, (byte)(0xB8 | regIdx) });
EmitInt64(Convert.ToInt64(constant.Value));
}
private void EmitMovLocalToReg(int offset, string reg)
{
var regIdx = GetRegIndex(reg);
if (offset >= -128 && offset <= 127)
{
// mov reg, [rbp+offset8]
Emit(new byte[] { 0x48, 0x8B, (byte)(0x40 | regIdx) });
Emit((byte)offset);
}
else
{
// mov reg, [rbp+offset32]
Emit(new byte[] { 0x48, 0x8B, (byte)(0x80 | regIdx) });
EmitInt32(offset);
}
}
private void EmitMovRegToLocal(string reg, int offset)
{
var regIdx = GetRegIndex(reg);
if (offset >= -128 && offset <= 127)
{
// mov [rbp+offset8], reg
Emit(new byte[] { 0x48, 0x89, (byte)(0x40 | regIdx) });
Emit((byte)offset);
}
else
{
// mov [rbp+offset32], reg
Emit(new byte[] { 0x48, 0x89, (byte)(0x80 | regIdx) });
EmitInt32(offset);
}
}
private void EmitMovRegToReg(string sourceReg, string destReg)
{
var sourceIdx = GetRegIndex(sourceReg);
var destIdx = GetRegIndex(destReg);
// mov dest, source
Emit(new byte[] { 0x48, 0x89, (byte)(0xC0 | sourceIdx << 3 | destIdx) });
}
private void EmitPush(string reg)
{
var regIdx = GetRegIndex(reg);
if (regIdx < 8)
{
Emit(new byte[] { (byte)(0x50 | regIdx) });
}
else
{
Emit(new byte[] { 0x41, (byte)(0x50 | (regIdx - 8)) });
}
}
private void EmitPop(string reg)
{
var regIdx = GetRegIndex(reg);
if (regIdx < 8)
{
Emit(new byte[] { (byte)(0x58 | regIdx) });
}
else
{
Emit(new byte[] { 0x41, (byte)(0x58 | (regIdx - 8)) });
}
}
private void EmitMovGlobalToReg(string name, string reg)
{
// 简化实现:假设全局变量在数据段
var regIdx = GetRegIndex(reg);
Emit(new byte[] { 0x48, (byte)(0xB8 | regIdx) });
EmitInt64(0); // 占位符
}
private void EmitMovRegToGlobal(string reg, string name)
{
// 简化实现
}
private int GetLocalOffset(string name, List<IrVariable> locals)
{
var index = locals.FindIndex(l => l.Name == name);
return -(index + 1) * 8;
}
private static int GetRegIndex(string reg)
{
return reg.ToLower() switch
{
"rax" => 0, "eax" => 0, "ax" => 0, "al" => 0,
"rcx" => 1, "ecx" => 1, "cx" => 1, "cl" => 1,
"rdx" => 2, "edx" => 2, "dx" => 2, "dl" => 2,
"rbx" => 3, "ebx" => 3, "bx" => 3, "bl" => 3,
"rsi" => 6, "esi" => 6, "si" => 6, "sil" => 6,
"rdi" => 7, "edi" => 7, "di" => 7, "dil" => 7,
"r8" => 8, "r8d" => 8, "r8w" => 8, "r8b" => 8,
"r9" => 9, "r9d" => 9, "r9w" => 9, "r9b" => 9,
"r10" => 10, "r10d" => 10, "r10w" => 10, "r10b" => 10,
"r11" => 11, "r11d" => 11, "r11w" => 11, "r11b" => 11,
"r12" => 12, "r12d" => 12, "r12w" => 12, "r12b" => 12,
"r13" => 13, "r13d" => 13, "r13w" => 13, "r13b" => 13,
"r14" => 14, "r14d" => 14, "r14w" => 14, "r14b" => 14,
"r15" => 15, "r15d" => 15, "r15w" => 15, "r15b" => 15,
_ => 0
};
}
private static int AlignUp(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
private void PatchLabels()
{
foreach (var patch in _labelPatches)
{
if (_labelOffsets.TryGetValue(patch.LabelName, out var offset))
{
var currentPos = patch.Position;
var relOffset = offset - (currentPos + 4);
_stream.Position = currentPos;
EmitInt32(relOffset);
}
}
}
private void Emit(byte[] bytes)
{
_stream.Write(bytes, 0, bytes.Length);
}
private void Emit(byte value)
{
_stream.WriteByte(value);
}
private void EmitInt32(int value)
{
var bytes = BitConverter.GetBytes(value);
_stream.Write(bytes, 0, bytes.Length);
}
private void EmitInt64(long value)
{
var bytes = BitConverter.GetBytes(value);
_stream.Write(bytes, 0, bytes.Length);
}
}
/// <summary>
/// 寄存器分配器
/// 使用图着色算法进行寄存器分配
/// </summary>
public sealed class RegisterAllocator
{
private readonly Dictionary<IrValue, string> _valueToReg = new();
private readonly HashSet<string> _usedRegs = new();
private readonly List<string> _spilledVars = new();
private int _spillSlot;
/// <summary>
/// 为函数分配寄存器
/// </summary>
public void Allocate(IrFunction function)
{
_valueToReg.Clear();
_usedRegs.Clear();
_spilledVars.Clear();
_spillSlot = 0;
// 构建干涉图
var interferenceGraph = BuildInterferenceGraph(function);
// 图着色
ColorGraph(interferenceGraph);
}
private Dictionary<IrValue, HashSet<IrValue>> BuildInterferenceGraph(IrFunction function)
{
var graph = new Dictionary<IrValue, HashSet<IrValue>>();
foreach (var block in function.BasicBlocks)
{
foreach (var instr in block.Instructions)
{
var defs = GetDefinedValues(instr);
var uses = GetUsedValues(instr);
foreach (var def in defs)
{
if (!graph.ContainsKey(def))
{
graph[def] = new HashSet<IrValue>();
}
// 定义与所有活跃变量干涉
foreach (var use in uses)
{
if (def != use)
{
graph[def].Add(use);
if (!graph.ContainsKey(use))
{
graph[use] = new HashSet<IrValue>();
}
graph[use].Add(def);
}
}
}
}
}
return graph;
}
private void ColorGraph(Dictionary<IrValue, HashSet<IrValue>> graph)
{
var availableRegs = new Queue<string>(OptimizedX64CodeGenerator.CallerSavedRegs);
// 简化图并分配
var stack = new Stack<IrValue>();
var remaining = new HashSet<IrValue>(graph.Keys);
while (remaining.Count > 0)
{
// 找到度数小于寄存器数量的节点
var node = remaining.FirstOrDefault(n => graph[n].Count(neighbor => remaining.Contains(neighbor)) < availableRegs.Count);
if (node != null)
{
stack.Push(node);
remaining.Remove(node);
}
else
{
// 溢出:选择一个节点
node = remaining.First();
_spilledVars.Add(GetName(node));
remaining.Remove(node);
}
}
// 出栈并分配寄存器
while (stack.Count > 0)
{
var node = stack.Pop();
var neighbors = graph[node].Where(n => _valueToReg.ContainsKey(n)).Select(n => _valueToReg[n]).ToHashSet();
var reg = availableRegs.FirstOrDefault(r => !neighbors.Contains(r));
if (reg != null)
{
_valueToReg[node] = reg;
_usedRegs.Add(reg);
}
else
{
// 溢出到栈
_spilledVars.Add(GetName(node));
}
}
}
public string GetRegister(IrValue value)
{
if (_valueToReg.TryGetValue(value, out var reg))
{
return reg;
}
// 默认返回 rax
return "rax";
}
public List<string> GetUsedCalleeSavedRegs()
{
return _usedRegs.Intersect(OptimizedX64CodeGenerator.CalleeSavedRegs).ToList();
}
public int GetSpillSize()
{
return _spilledVars.Count * 8;
}
private IEnumerable<IrValue> GetDefinedValues(IrInstruction instr)
{
return instr switch
{
IrBinaryOp binary => new[] { binary.Dest },
IrUnaryOp unary => new[] { unary.Dest },
IrLoad load => new[] { load.Dest },
IrCall call => call.Dest != null ? new[] { call.Dest } : Array.Empty<IrValue>(),
_ => Array.Empty<IrValue>()
};
}
private IEnumerable<IrValue> GetUsedValues(IrInstruction instr)
{
return instr switch
{
IrBinaryOp binary => new[] { binary.Left, binary.Right },
IrUnaryOp unary => new[] { unary.Source },
IrStore store => new[] { store.Value },
IrBranch branch => new[] { branch.Condition },
IrCall call => call.Arguments,
_ => Array.Empty<IrValue>()
};
}
private string GetName(IrValue value)
{
return value switch
{
IrTemp temp => temp.Name,
IrLocal local => local.Name,
_ => ""
};
}
}
/// <summary>
/// 标签修补记录
/// </summary>
public sealed record LabelPatch(string LabelName, int Position);

View File

@@ -0,0 +1,567 @@
namespace TinyCC.Core;
/// <summary>
/// x64 代码生成器
/// </summary>
public sealed class X64CodeGenerator
{
private readonly MemoryStream _stream;
private readonly Dictionary<string, int> _labelOffsets;
private readonly Dictionary<string, int> _funcOffsets;
private int _stackSize;
private readonly string? _debugName;
public X64CodeGenerator(string? debugName = null)
{
_stream = new MemoryStream();
_labelOffsets = new Dictionary<string, int>();
_funcOffsets = new Dictionary<string, int>();
_stackSize = 0;
_debugName = debugName;
}
/// <summary>
/// 生成机器码
/// </summary>
public byte[] Generate(IrProgram program)
{
_funcOffsets.Clear();
// 第一遍:生成 _start 包装器 (Linux ABI 入口点)
Emit(new byte[] { 0xE8 }); // call rel32
int callOpcodePos = (int)_stream.Position - 1; // E8 的位置
int callOffsetPos = (int)_stream.Position; // 偏移字段的位置
EmitInt32(0); // 占位
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[START] _start: call at {callOpcodePos}, offset at {callOffsetPos}\n");
// mov rdi, rax
Emit(new byte[] { 0x48, 0x89, 0xC7 });
// mov rax, 60 (sys_exit)
Emit(new byte[] { 0x48, 0xC7, 0xC0, 0x3C, 0x00, 0x00, 0x00 });
// syscall
Emit(new byte[] { 0x0F, 0x05 });
// 记录每个函数的起始偏移
foreach (var func in program.Functions)
{
_funcOffsets[func.Name] = (int)_stream.Position;
GenerateFunction(func);
}
// 修补 call main 的相对偏移
if (_funcOffsets.TryGetValue("main", out int mainOffset))
{
// call rel32: 目标地址 = 当前指令地址 + 5 + 相对偏移
// 当前指令地址 = callOpcodePos, 所以: 相对偏移 = 目标 - (callOpcodePos + 5)
int relOffset = mainOffset - (callOpcodePos + 5);
var savedPos = _stream.Position;
_stream.Position = callOffsetPos;
EmitInt32(relOffset);
_stream.Position = savedPos;
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[PATCH] call main at opcode={callOpcodePos} offset={callOffsetPos} -> main@{mainOffset}, rel={relOffset}\n");
}
var code = _stream.ToArray();
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[HEX] {BitConverter.ToString(code)}\n");
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[SIZE] Total code size: {code.Length}\n");
return code;
}
private void GenerateFunction(IrFunction function)
{
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[FUNC] Generating {function.Name}\n");
// 计算栈帧大小
_stackSize = function.Locals.Count * 8; // 每个局部变量 8 字节
// 对齐到 16 字节
_stackSize = (_stackSize + 15) & ~15;
// 清除标签偏移
_labelOffsets.Clear();
// 函数序言push rbp; mov rbp, rsp; sub rsp, stackSize
Emit(new byte[] { 0x55 }); // push rbp
Emit(new byte[] { 0x48, 0x89, 0xE5 }); // mov rbp, rsp
if (_stackSize > 0)
{
Emit(new byte[] { 0x48, 0x81, 0xEC }); // sub rsp, imm32
EmitInt32(_stackSize);
}
// 保存参数寄存器到栈槽 (System V AMD64 ABI)
// 参数在 Locals 的最前面
var paramRegs = new string[] { "rdi", "rsi", "rdx", "rcx", "r8", "r9" };
for (int i = 0; i < function.ParameterCount && i < paramRegs.Length; i++)
{
var reg = paramRegs[i];
var offset = GetLocalOffset(function.Locals[i].Name, function.Locals);
// mov [rbp+offset], reg
Emit(new byte[] { 0x48, 0x89, (byte)(0x45 + (GetRegIndex(reg) << 3)) });
EmitInt8((sbyte)offset);
}
// 生成代码并修补跳转
GenerateBasicBlocksWithPatches(function, function.Locals);
// 函数尾声mov rsp, rbp; pop rbp; ret
Emit(new byte[] { 0x48, 0x89, 0xEC }); // mov rsp, rbp
Emit(new byte[] { 0x5D }); // pop rbp
Emit(new byte[] { 0xC3 }); // ret
}
private int EstimateInstructionLength(IrInstruction instr)
{
return instr switch
{
IrBinaryOp => 15,
IrUnaryOp => 10,
IrStore => 10,
IrCall => 20,
IrReturn => 10,
IrLabel => 0,
IrJump => 5,
IrBranch => 14,
IrMove => 15,
_ => 10
};
}
private void GenerateBasicBlocksWithPatches(IrFunction function, List<IrVariable> locals)
{
var patches = new List<PatchInfo>();
var callPatches = new List<CallPatchInfo>();
foreach (var block in function.BasicBlocks)
{
foreach (var instr in block.Instructions)
{
switch (instr)
{
case IrLabel label:
_labelOffsets[label.LabelName] = (int)_stream.Position;
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[LABEL] {label.LabelName} -> offset {(int)_stream.Position}\n");
break;
case IrJump jump:
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[JUMP] Creating patch for {jump.TargetLabel} at offset {(int)_stream.Position}\n");
patches.Add(new PatchInfo(
Offset: (int)_stream.Position,
TargetLabel: jump.TargetLabel,
Type: PatchType.Jmp
));
Emit(new byte[] { 0xE9 }); // jmp rel32
EmitInt32(0); // 占位
break;
case IrBranch branch:
// IrBranch 语义条件为真则顺序执行Fallthrough为假则跳转 FalseLabel
// 但 IR 传入了 TrueLabel 和 FalseLabel。通常 TrueLabel 就是下一条指令的地址。
// 我们只需要je FalseLabel。真分支自然 fallthrough。
LoadValue(branch.Condition, GetRegister(0), locals);
Emit(new byte[] { 0x48, 0x85, 0xC0 }); // test rax, rax
// je FalseLabel
patches.Add(new PatchInfo(
Offset: (int)_stream.Position,
TargetLabel: branch.FalseLabel,
Type: PatchType.Je
));
Emit(new byte[] { 0x0F, 0x84 });
EmitInt32(0); // 占位
break;
case IrCall call:
GenerateCallWithPatches(call, locals, callPatches);
break;
default:
GenerateInstruction(instr, locals);
break;
}
}
}
// 修补跳转偏移
foreach (var patch in patches)
{
if (_labelOffsets.TryGetValue(patch.TargetLabel, out var targetOffset))
{
var instrLength = patch.Type switch
{
PatchType.Jmp => 5,
PatchType.Je => 6,
PatchType.Jne => 6,
_ => 5
};
var relativeOffset = targetOffset - (patch.Offset + instrLength);
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[PATCH] {patch.Type} at {patch.Offset} -> {patch.TargetLabel}@{targetOffset}, rel={relativeOffset}\n");
var savedPos = _stream.Position;
_stream.Position = patch.Offset + (instrLength - 4); // 跳过操作码
EmitInt32(relativeOffset);
_stream.Position = savedPos;
}
else
{
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[WARN] Label not found: {patch.TargetLabel}\n");
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[WARN] Known labels: {string.Join(", ", _labelOffsets.Keys)}\n");
}
}
// 修补调用偏移
foreach (var cp in callPatches)
{
if (_funcOffsets.TryGetValue(cp.TargetName, out var targetOffset))
{
var relOffset = targetOffset - (cp.Offset + 5);
var savedPos = _stream.Position;
_stream.Position = cp.Offset + 1;
EmitInt32(relOffset);
_stream.Position = savedPos;
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[PATCH] call {cp.TargetName} at {cp.Offset} -> {cp.TargetName}@{targetOffset}, rel={relOffset}\n");
}
else
{
File.AppendAllText("/tmp/tinycc-debug/debug.log", $"[WARN] Function not found: {cp.TargetName}\n");
}
}
}
private record PatchInfo(
int Offset,
string TargetLabel,
PatchType Type,
string? FalseLabel = null
);
private enum PatchType
{
Jmp,
Jne,
Je
}
private void GenerateBasicBlock(IrBasicBlock block, List<IrVariable> locals)
{
foreach (var instr in block.Instructions)
{
GenerateInstruction(instr, locals);
}
}
private void GenerateInstruction(IrInstruction instr, List<IrVariable> locals)
{
switch (instr)
{
case IrBinaryOp binary:
GenerateBinaryOp(binary, locals);
break;
case IrUnaryOp unary:
GenerateUnaryOp(unary, locals);
break;
case IrStore store:
GenerateStore(store, locals);
break;
case IrReturn ret:
GenerateReturn(ret, locals);
break;
case IrJump jump:
GenerateJump(jump);
break;
case IrBranch branch:
GenerateBranch(branch);
break;
case IrMove move:
GenerateMove(move, locals);
break;
case IrNop:
Emit(new byte[] { 0x90 }); // nop
break;
case IrLabel label:
// 标签处理已经移到 GenerateBasicBlocksWithPatches 中
break;
}
}
private void GenerateBinaryOp(IrBinaryOp binary, List<IrVariable> locals)
{
// 简化实现:假设操作数都在栈上或常量
var leftReg = GetRegister(0); // rax
var rightReg = GetRegister(1); // rcx
// 加载左操作数到 rax
LoadValue(binary.Left, leftReg, locals);
// 加载右操作数到 rcx
LoadValue(binary.Right, rightReg, locals);
// 执行运算
byte[] op = binary.Op switch
{
IrBinaryOpType.Add => new byte[] { 0x48, 0x01, 0xC8 }, // add rax, rcx
IrBinaryOpType.Sub => new byte[] { 0x48, 0x29, 0xC8 }, // sub rax, rcx
IrBinaryOpType.Mul => new byte[] { 0x48, 0x0F, 0xAF, 0xC1 }, // imul rax, rcx
IrBinaryOpType.Div => GenerateDiv(leftReg, rightReg), // idiv rcx
IrBinaryOpType.Mod => GenerateMod(leftReg, rightReg), // idiv rcx, return remainder
IrBinaryOpType.And => new byte[] { 0x48, 0x21, 0xC8 }, // and rax, rcx
IrBinaryOpType.Or => new byte[] { 0x48, 0x09, 0xC8 }, // or rax, rcx
IrBinaryOpType.Xor => new byte[] { 0x48, 0x31, 0xC8 }, // xor rax, rcx
IrBinaryOpType.Shl => new byte[] { 0x48, 0xD3, 0xE0 }, // shl rax, cl
IrBinaryOpType.Shr => new byte[] { 0x48, 0xD3, 0xE8 }, // shr rax, cl
// 比较运算:设置 rax = (rax op rcx) ? 1 : 0
IrBinaryOpType.Eq => GenerateCompare(leftReg, rightReg, "sete"),
IrBinaryOpType.Ne => GenerateCompare(leftReg, rightReg, "setne"),
IrBinaryOpType.Lt => GenerateCompare(leftReg, rightReg, "setl"),
IrBinaryOpType.Gt => GenerateCompare(leftReg, rightReg, "setg"),
IrBinaryOpType.Le => GenerateCompare(leftReg, rightReg, "setle"),
IrBinaryOpType.Ge => GenerateCompare(leftReg, rightReg, "setge"),
_ => throw new NotSupportedException($"Unsupported binary op: {binary.Op}")
};
Emit(op);
// 存储结果到目标位置
StoreValue(binary.Dest, leftReg, locals);
}
private byte[] GenerateDiv(string leftReg, string rightReg)
{
// idiv rcx: rdx:rax / rcx -> rax = quotient, rdx = remainder
// 需要先 cdq 扩展符号到 rdx
return new byte[] { 0x48, 0x99, 0x48, 0xF7, 0xF9 }; // cdq; idiv rcx
}
private byte[] GenerateMod(string leftReg, string rightReg)
{
// idiv rcx 后 rdx 包含余数,需要移动到 rax
// cdq; idiv rcx; mov rax, rdx
return new byte[] { 0x48, 0x99, 0x48, 0xF7, 0xF9, 0x48, 0x89, 0xD0 }; // cdq; idiv rcx; mov rax, rdx
}
private byte[] GenerateCompare(string leftReg, string rightReg, string setcc)
{
// cmp rax, rcx; setXX al; movzx rax, al
var setccCode = setcc switch
{
"sete" => new byte[] { 0x0F, 0x94, 0xC0 },
"setne" => new byte[] { 0x0F, 0x95, 0xC0 },
"setl" => new byte[] { 0x0F, 0x9C, 0xC0 },
"setg" => new byte[] { 0x0F, 0x9F, 0xC0 },
"setle" => new byte[] { 0x0F, 0x9E, 0xC0 },
"setge" => new byte[] { 0x0F, 0x9D, 0xC0 },
_ => throw new ArgumentException($"Unknown setcc: {setcc}")
};
// cmp rax, rcx = 48 39 C8
var result = new byte[3 + setccCode.Length + 4];
result[0] = 0x48;
result[1] = 0x39;
result[2] = 0xC8;
Array.Copy(setccCode, 0, result, 3, setccCode.Length);
// movzx rax, al = 0F B6 C0
result[3 + setccCode.Length] = 0x0F;
result[3 + setccCode.Length + 1] = 0xB6;
result[3 + setccCode.Length + 2] = 0xC0;
result[3 + setccCode.Length + 3] = 0x90; // nop for alignment
return result;
}
private void GenerateUnaryOp(IrUnaryOp unary, List<IrVariable> locals)
{
var reg = GetRegister(0); // rax
LoadValue(unary.Source, reg, locals);
var op = unary.Op switch
{
IrUnaryOpType.Neg => new byte[] { 0x48, 0xF7, 0xD8 }, // neg rax
IrUnaryOpType.Not => new byte[] { 0x48, 0xF7, 0xD0 }, // not rax
_ => throw new NotSupportedException($"Unsupported unary op: {unary.Op}")
};
Emit(op);
StoreValue(unary.Dest, reg, locals);
}
private void GenerateStore(IrStore store, List<IrVariable> locals)
{
var valueReg = GetRegister(0); // rax
LoadValue(store.Value, valueReg, locals);
if (store.Address is IrLocal local)
{
var offset = GetLocalOffset(local.Name, locals);
Emit(new byte[] { 0x48, 0x89, 0x45 }); // mov [rbp+offset], rax
EmitInt8((sbyte)offset);
}
}
private void GenerateCallWithPatches(IrCall call, List<IrVariable> locals, List<CallPatchInfo> callPatches)
{
// x64 调用约定:前 6 个参数通过寄存器传递rdi, rsi, rdx, rcx, r8, r9
var argRegs64 = new string[] { "rdi", "rsi", "rdx", "rcx", "r8", "r9" };
for (int i = 0; i < Math.Min(call.Arguments.Count, 6); i++)
{
var arg = call.Arguments[i];
LoadValue(arg, argRegs64[i], locals);
}
// 调用函数
callPatches.Add(new CallPatchInfo(
Offset: (int)_stream.Position,
TargetName: call.FunctionName
));
Emit(new byte[] { 0xE8 }); // call rel32
EmitInt32(0); // 占位
// 将返回值rax存储到目标 temp
if (call.Dest != null)
{
StoreValue(call.Dest, GetRegister(0), locals);
}
}
private record CallPatchInfo(int Offset, string TargetName);
private void GenerateReturn(IrReturn ret, List<IrVariable> locals)
{
if (ret.Value != null)
{
LoadValue(ret.Value, GetRegister(0), locals);
}
// 直接返回,跳转到函数尾声
Emit(new byte[] { 0x48, 0x89, 0xEC }); // mov rsp, rbp
Emit(new byte[] { 0x5D }); // pop rbp
Emit(new byte[] { 0xC3 }); // ret
}
private void GenerateJump(IrJump jump)
{
Emit(new byte[] { 0xE9 }); // jmp rel32
EmitInt32(0); // TODO: 计算相对偏移
}
private void GenerateBranch(IrBranch branch)
{
// 加载条件到 rax
LoadValue(branch.Condition, GetRegister(0), new List<IrVariable>());
// jne rel32
Emit(new byte[] { 0x0F, 0x85 });
EmitInt32(0); // TODO: 计算相对偏移
}
private void GenerateMove(IrMove move, List<IrVariable> locals)
{
var reg = GetRegister(0);
LoadValue(move.Source, reg, locals);
StoreValue(move.Dest, reg, locals);
}
private void LoadValue(IrValue value, string reg, List<IrVariable> locals)
{
switch (value)
{
case IrConstant constant:
// mov reg, imm64
Emit(new byte[] { 0x48, (byte)(0xB8 + GetRegIndex(reg)) });
EmitInt64(Convert.ToInt64(constant.Value));
break;
case IrLocal local:
case IrTemp temp:
var name = value switch { IrLocal l => l.Name, IrTemp t => t.Name, _ => throw new Exception() };
var offset = GetLocalOffset(name, locals);
// mov reg, [rbp+offset]
Emit(new byte[] { 0x48, 0x8B, (byte)(0x45 + (GetRegIndex(reg) << 3)) });
EmitInt8((sbyte)offset);
break;
}
}
private void StoreValue(IrValue value, string reg, List<IrVariable> locals)
{
string? name = value switch { IrLocal l => l.Name, IrTemp t => t.Name, _ => null };
if (name != null)
{
var offset = GetLocalOffset(name, locals);
// mov [rbp+offset], reg
Emit(new byte[] { 0x48, 0x89, (byte)(0x45 + (GetRegIndex(reg) << 3)) });
EmitInt8((sbyte)offset);
}
}
private int GetLocalOffset(string name, List<IrVariable> locals)
{
var index = locals.FindIndex(l => l.Name == name);
return -(index + 1) * 8 - 8; // 从 rbp 向下偏移
}
private static string GetRegister(int index)
{
return index switch
{
0 => "rax",
1 => "rcx",
2 => "rdx",
3 => "rbx",
_ => "rax"
};
}
private static int GetRegIndex(string reg)
{
return reg.ToLower() switch
{
"rax" => 0,
"rcx" => 1,
"rdx" => 2,
"rbx" => 3,
"rsp" => 4,
"rbp" => 5,
"rsi" => 6,
"rdi" => 7,
"r8" => 8,
"r9" => 9,
"r10" => 10,
"r11" => 11,
"r12" => 12,
"r13" => 13,
"r14" => 14,
"r15" => 15,
_ => 0
};
}
private void Emit(byte[] bytes)
{
_stream.Write(bytes, 0, bytes.Length);
}
private void EmitInt8(sbyte value)
{
_stream.WriteByte((byte)(value & 0xFF));
}
private void EmitInt32(int value)
{
var bytes = BitConverter.GetBytes(value);
_stream.Write(bytes, 0, bytes.Length);
}
private void EmitInt64(long value)
{
var bytes = BitConverter.GetBytes(value);
_stream.Write(bytes, 0, bytes.Length);
}
}

View File

@@ -0,0 +1,147 @@
namespace TinyCC.Core;
/// <summary>
/// 编译器驱动类
/// </summary>
public sealed class CompilerDriver
{
private readonly IErrorReporter _errorReporter;
public CompilerDriver(IErrorReporter errorReporter)
{
_errorReporter = errorReporter;
}
/// <summary>
/// 编译 C 源代码
/// </summary>
public CompilationResult Compile(CompilationOptions options)
{
try
{
// 0. 预处理
var preprocessor = new Preprocessor(_errorReporter);
foreach (var includePath in options.IncludePaths)
{
preprocessor.AddIncludePath(includePath);
}
var preprocessedSource = preprocessor.Preprocess(options.SourceFile);
if (_errorReporter.HasErrors)
{
return new CompilationResult(false, "预处理失败");
}
// 1. 词法分析
var lexer = new Lexer(preprocessedSource, options.SourceFile, _errorReporter);
var tokens = lexer.Tokenize().ToList();
if (_errorReporter.HasErrors)
{
return new CompilationResult(false, "词法分析失败");
}
// 2. 语法分析
var parser = new Parser(tokens, _errorReporter);
var ast = parser.Parse();
if (_errorReporter.HasErrors)
{
return new CompilationResult(false, "语法分析失败");
}
// 3. 语义分析
var semanticAnalyzer = new SemanticAnalyzer(_errorReporter);
semanticAnalyzer.Analyze(ast);
if (_errorReporter.HasErrors)
{
return new CompilationResult(false, "语义分析失败");
}
// 4. IR 生成
var irGen = new IrGenerator();
var ir = irGen.Generate(ast);
// 5. 代码生成
var codeGen = new X64CodeGenerator();
var machineCode = codeGen.Generate(ir);
// 6. 生成可执行文件
var outputFile = options.OutputFile ?? GetDefaultOutputName(options.SourceFile);
var outputDir = Path.GetDirectoryName(outputFile);
if (!string.IsNullOrEmpty(outputDir) && !Directory.Exists(outputDir))
{
Directory.CreateDirectory(outputDir);
}
byte[] executable;
if (options.Platform == TargetPlatform.WindowsX64 || options.Platform == TargetPlatform.WindowsX86)
{
var peWriter = new PeWriter();
executable = peWriter.WriteExecutable(machineCode, null, "main");
}
else
{
var elfWriter = new ElfWriter();
executable = elfWriter.WriteExecutable(machineCode, "main");
}
File.WriteAllBytes(outputFile, executable);
return new CompilationResult(true, outputFile);
}
catch (Exception ex)
{
return new CompilationResult(false, $"编译失败: {ex.Message}\n{ex.StackTrace}");
}
}
private string GetDefaultOutputName(string sourceFile)
{
var nameWithoutExt = Path.ChangeExtension(sourceFile, null) ?? "a.out";
// 根据平台决定扩展名
return "a.out"; // Linux 默认
}
}
/// <summary>
/// 编译选项
/// </summary>
public sealed record CompilationOptions(
string SourceFile,
string? OutputFile = null,
TargetPlatform Platform = TargetPlatform.LinuxX64,
List<string>? IncludePaths = null
)
{
public List<string> IncludePaths { get; } = IncludePaths ?? new List<string>();
}
/// <summary>
/// 编译结果
/// </summary>
public sealed record CompilationResult(
bool Success,
string Message
);
/// <summary>
/// 目标架构
/// </summary>
public enum TargetArchitecture
{
X86,
X64
}
/// <summary>
/// 目标平台
/// </summary>
public enum TargetPlatform
{
LinuxX64,
LinuxX86,
WindowsX64,
WindowsX86
}

View File

@@ -0,0 +1,83 @@
namespace TinyCC.Core;
/// <summary>
/// 错误级别
/// </summary>
public enum ErrorLevel
{
Warning,
Error,
Fatal
}
/// <summary>
/// 源代码位置信息
/// </summary>
public readonly struct SourceLocation : IEquatable<SourceLocation>
{
public string FileName { get; }
public int Line { get; }
public int Column { get; }
public SourceLocation(string fileName, int line, int column)
{
FileName = fileName;
Line = line;
Column = column;
}
public bool Equals(SourceLocation other)
{
return FileName == other.FileName && Line == other.Line && Column == other.Column;
}
public override bool Equals(object? obj)
{
return obj is SourceLocation other && Equals(other);
}
public override int GetHashCode()
{
return HashCode.Combine(FileName, Line, Column);
}
public override string ToString()
{
return $"{FileName}({Line}:{Column})";
}
public static bool operator ==(SourceLocation left, SourceLocation right)
{
return left.Equals(right);
}
public static bool operator !=(SourceLocation left, SourceLocation right)
{
return !(left == right);
}
}
/// <summary>
/// 错误信息
/// </summary>
public record ErrorInfo(
ErrorLevel Level,
string Message,
SourceLocation Location,
string? Suggestion = null
)
{
public override string ToString()
{
var prefix = Level switch
{
ErrorLevel.Warning => "warning",
ErrorLevel.Error => "error",
ErrorLevel.Fatal => "fatal error",
_ => "unknown"
};
var suggestionText = Suggestion != null ? $"\n hint: {Suggestion}" : "";
return $"{Location}: {prefix}: {Message}{suggestionText}";
}
}

View File

@@ -0,0 +1,27 @@
namespace TinyCC.Core;
/// <summary>
/// 默认错误报告器实现
/// </summary>
public sealed class ErrorReporter : IErrorReporter
{
private readonly List<ErrorInfo> _errors = new();
public bool HasErrors => _errors.Any(e => e.Level is ErrorLevel.Error or ErrorLevel.Fatal);
public void Report(ErrorInfo error)
{
_errors.Add(error);
Console.Error.WriteLine(error.ToString());
}
public IEnumerable<ErrorInfo> GetErrors()
{
return _errors.AsReadOnly();
}
public void Clear()
{
_errors.Clear();
}
}

View File

@@ -0,0 +1,12 @@
namespace TinyCC.Core;
/// <summary>
/// 错误报告器接口
/// </summary>
public interface IErrorReporter
{
void Report(ErrorInfo error);
bool HasErrors { get; }
IEnumerable<ErrorInfo> GetErrors();
void Clear();
}

View File

@@ -0,0 +1,374 @@
using System;
using System.Collections.Generic;
using TinyCC.Core;
namespace TinyCC.Core;
/// <summary>
/// IR 生成器
/// </summary>
public sealed class IrGenerator
{
private int _tempCounter;
private int _labelCounter;
private readonly List<IrFunction> _functions = new();
private List<IrInstruction> _currentInstructions = new();
private List<IrVariable> _currentLocals = new();
public IrProgram Generate(ProgramNode program)
{
foreach (var decl in program.Declarations)
{
if (decl is FunctionDeclarationNode func)
{
GenerateFunction(func);
}
}
return new IrProgram(_functions);
}
private void GenerateFunction(FunctionDeclarationNode func)
{
_currentInstructions = new List<IrInstruction>();
_currentLocals = new List<IrVariable>();
// 为参数分配栈空间
foreach (var param in func.Parameters)
{
_currentLocals.Add(new IrVariable(param.Name, GetTypeName(param.Type), 8));
}
// 生成函数体
GenerateBlock(func.Body);
// 如果没有 return 语句,隐式添加 return
if (_currentInstructions.Count == 0 || _currentInstructions[^1] is not IrReturn)
{
_currentInstructions.Add(new IrReturn(null));
}
var function = new IrFunction(func.Name, new List<IrBasicBlock>
{
new IrBasicBlock($"func_{func.Name}", _currentInstructions)
}, _currentLocals, func.Parameters.Count);
_functions.Add(function);
}
private void GenerateBlock(BlockStatementNode block)
{
foreach (var stmt in block.Statements)
{
switch (stmt)
{
case StatementNode statement:
GenerateStatement(statement);
break;
case VariableDeclarationNode varDecl:
GenerateLocalVariableDeclaration(varDecl);
break;
}
}
}
private void GenerateLocalVariableDeclaration(VariableDeclarationNode varDecl)
{
// 为局部变量分配栈空间
var local = new IrVariable(varDecl.Name, GetTypeName(varDecl.Type), 8);
_currentLocals.Add(local);
// 如果有初始化器,生成赋值代码
if (varDecl.Initializer != null)
{
var value = GenerateExpression(varDecl.Initializer);
_currentInstructions.Add(new IrStore(new IrLocal(varDecl.Name, GetTypeName(varDecl.Type), 0), value));
}
}
private void GenerateStatement(StatementNode stmt)
{
switch (stmt)
{
case BlockStatementNode block:
GenerateBlock(block);
break;
case ExpressionStatementNode exprStmt:
GenerateExpression(exprStmt.Expression);
break;
case ReturnStatementNode returnStmt:
if (returnStmt.Expression != null)
{
var value = GenerateExpression(returnStmt.Expression);
_currentInstructions.Add(new IrReturn(value));
}
else
{
_currentInstructions.Add(new IrReturn(null));
}
break;
case IfStatementNode ifStmt:
GenerateIfStatement(ifStmt);
break;
case WhileStatementNode whileStmt:
GenerateWhileStatement(whileStmt);
break;
case ForStatementNode forStmt:
GenerateForStatement(forStmt);
break;
case BreakStatementNode:
_currentInstructions.Add(new IrJump(GetBreakLabel()));
break;
case ContinueStatementNode:
_currentInstructions.Add(new IrJump(GetContinueLabel()));
break;
}
// 处理变量声明(在表达式语句内部)
if (stmt is ExpressionStatementNode exprStmt2 && exprStmt2.Expression is AssignmentExpressionNode assign)
{
// 这已经在 GenerateExpression 中处理了
}
}
private void GenerateIfStatement(IfStatementNode ifStmt)
{
var elseLabel = NewLabel("else");
var endLabel = NewLabel("endif");
var condition = GenerateExpression(ifStmt.Condition);
_currentInstructions.Add(new IrBranch(condition, endLabel, elseLabel));
GenerateStatement(ifStmt.ThenBranch);
_currentInstructions.Add(new IrJump(endLabel));
_currentInstructions.Add(new IrLabel(elseLabel));
if (ifStmt.ElseBranch != null)
{
GenerateStatement(ifStmt.ElseBranch);
}
else
{
// No else branch: elseLabel is at the same position as endLabel.
// To prevent IrJump(endLabel) from having a 0 offset, add a nop.
_currentInstructions.Add(new IrNop());
}
_currentInstructions.Add(new IrLabel(endLabel));
}
private void GenerateWhileStatement(WhileStatementNode whileStmt)
{
var startLabel = NewLabel("while_start");
var endLabel = NewLabel("while_end");
PushLoopLabels(endLabel, startLabel);
_currentInstructions.Add(new IrLabel(startLabel));
var condition = GenerateExpression(whileStmt.Condition);
_currentInstructions.Add(new IrBranch(condition, startLabel, endLabel));
GenerateStatement(whileStmt.Body);
_currentInstructions.Add(new IrJump(startLabel));
_currentInstructions.Add(new IrLabel(endLabel));
PopLoopLabels();
}
private void GenerateForStatement(ForStatementNode forStmt)
{
var startLabel = NewLabel("for_start");
var condLabel = NewLabel("for_cond");
var incLabel = NewLabel("for_inc");
var endLabel = NewLabel("for_end");
// 初始化
if (forStmt.Init != null)
{
switch (forStmt.Init)
{
case StatementNode statement:
GenerateStatement(statement);
break;
case VariableDeclarationNode varDecl:
GenerateLocalVariableDeclaration(varDecl);
break;
}
}
// 条件判断
_currentInstructions.Add(new IrLabel(condLabel));
PushLoopLabels(endLabel, incLabel);
if (forStmt.Condition != null)
{
var condition = GenerateExpression(forStmt.Condition);
_currentInstructions.Add(new IrBranch(condition, incLabel, endLabel));
}
_currentInstructions.Add(new IrLabel(startLabel));
GenerateStatement(forStmt.Body);
// 增量
_currentInstructions.Add(new IrLabel(incLabel));
if (forStmt.Increment != null)
{
GenerateExpression(forStmt.Increment);
}
_currentInstructions.Add(new IrJump(condLabel));
_currentInstructions.Add(new IrLabel(endLabel));
PopLoopLabels();
}
private void GenerateVariableDeclaration(VariableDeclarationNode varDecl)
{
var local = new IrVariable(varDecl.Name, GetTypeName(varDecl.Type), 8);
_currentLocals.Add(local);
if (varDecl.Initializer != null)
{
var value = GenerateExpression(varDecl.Initializer);
_currentInstructions.Add(new IrStore(new IrLocal(varDecl.Name, GetTypeName(varDecl.Type), 0), value));
}
}
private IrValue GenerateExpression(ExpressionNode expr)
{
return expr switch
{
LiteralExpressionNode lit => new IrConstant(lit.Value, GetLiteralTypeName(lit.Type)),
IdentifierExpressionNode id => new IrLocal(id.Name, "int", 0),
BinaryExpressionNode binary => GenerateBinaryExpression(binary),
UnaryExpressionNode unary => GenerateUnaryExpression(unary),
AssignmentExpressionNode assign => GenerateAssignmentExpression(assign),
FunctionCallExpressionNode call => GenerateFunctionCall(call),
_ => throw new NotSupportedException($"Unsupported expression type: {expr.GetType()}")
};
}
private IrValue GenerateBinaryExpression(BinaryExpressionNode binary)
{
var left = GenerateExpression(binary.Left);
var right = GenerateExpression(binary.Right);
var dest = NewTemp();
var op = binary.Operator switch
{
TokenType.Plus => IrBinaryOpType.Add,
TokenType.Minus => IrBinaryOpType.Sub,
TokenType.Star => IrBinaryOpType.Mul,
TokenType.Slash => IrBinaryOpType.Div,
TokenType.Percent => IrBinaryOpType.Mod,
TokenType.BitAnd => IrBinaryOpType.And,
TokenType.BitOr => IrBinaryOpType.Or,
TokenType.BitXor => IrBinaryOpType.Xor,
TokenType.LeftShift => IrBinaryOpType.Shl,
TokenType.RightShift => IrBinaryOpType.Shr,
TokenType.Equal => IrBinaryOpType.Eq,
TokenType.NotEqual => IrBinaryOpType.Ne,
TokenType.Less => IrBinaryOpType.Lt,
TokenType.Greater => IrBinaryOpType.Gt,
TokenType.LessEqual => IrBinaryOpType.Le,
TokenType.GreaterEqual => IrBinaryOpType.Ge,
_ => throw new NotSupportedException($"Unsupported binary operator: {binary.Operator}")
};
_currentInstructions.Add(new IrBinaryOp(dest, op, left, right));
return dest;
}
private IrValue GenerateUnaryExpression(UnaryExpressionNode unary)
{
var source = GenerateExpression(unary.Expression);
var dest = NewTemp();
var op = unary.Operator switch
{
TokenType.Minus => IrUnaryOpType.Neg,
TokenType.Not => IrUnaryOpType.Not,
TokenType.BitNot => IrUnaryOpType.BitNot,
_ => throw new NotSupportedException($"Unsupported unary operator: {unary.Operator}")
};
_currentInstructions.Add(new IrUnaryOp(dest, op, source));
return dest;
}
private IrValue GenerateAssignmentExpression(AssignmentExpressionNode assign)
{
var value = GenerateExpression(assign.Right);
if (assign.Left is IdentifierExpressionNode id)
{
_currentInstructions.Add(new IrStore(new IrLocal(id.Name, "int", 0), value));
return value;
}
throw new NotSupportedException("Unsupported assignment target");
}
private IrValue GenerateFunctionCall(FunctionCallExpressionNode call)
{
var args = new List<IrValue>();
foreach (var arg in call.Arguments)
{
args.Add(GenerateExpression(arg));
}
var dest = new IrTemp($"t{++_tempCounter}", "int");
_currentInstructions.Add(new IrCall(dest, call.FunctionName, args));
return dest;
}
private IrTemp NewTemp() => new($"t{++_tempCounter}", "int");
private string NewLabel(string prefix) => $"{prefix}_{++_labelCounter}";
private string _breakLabel = "";
private string _continueLabel = "";
private void PushLoopLabels(string breakLabel, string continueLabel)
{
_breakLabel = breakLabel;
_continueLabel = continueLabel;
}
private void PopLoopLabels()
{
_breakLabel = "";
_continueLabel = "";
}
private string GetBreakLabel() => _breakLabel;
private string GetContinueLabel() => _continueLabel;
private string GetTypeName(TypeNode type)
{
return type switch
{
PrimitiveTypeNode p => p.TypeName,
PointerTypeNode => "pointer",
ArrayTypeNode => "array",
_ => "unknown"
};
}
private string GetLiteralTypeName(TokenType type)
{
return type switch
{
TokenType.IntLiteral => "int",
TokenType.FloatLiteral => "double",
TokenType.CharLiteral => "char",
TokenType.StringLiteral => "string",
_ => "unknown"
};
}
}

View File

@@ -0,0 +1,130 @@
namespace TinyCC.Core;
/// <summary>
/// IR 指令基类
/// </summary>
public abstract record IrInstruction;
/// <summary>
/// 二元运算指令
/// </summary>
public sealed record IrBinaryOp(IrValue Dest, IrBinaryOpType Op, IrValue Left, IrValue Right) : IrInstruction;
/// <summary>
/// 一元运算指令
/// </summary>
public sealed record IrUnaryOp(IrValue Dest, IrUnaryOpType Op, IrValue Source) : IrInstruction;
/// <summary>
/// 加载指令(从内存加载到寄存器)
/// </summary>
public sealed record IrLoad(IrValue Dest, IrValue Address) : IrInstruction;
/// <summary>
/// 存储指令(从寄存器存储到内存)
/// </summary>
public sealed record IrStore(IrValue Address, IrValue Value) : IrInstruction;
/// <summary>
/// 函数调用指令
/// </summary>
public sealed record IrCall(IrValue? Dest, string FunctionName, List<IrValue> Arguments) : IrInstruction;
/// <summary>
/// 无条件跳转指令
/// </summary>
public sealed record IrJump(string TargetLabel) : IrInstruction;
/// <summary>
/// 条件分支指令
/// </summary>
public sealed record IrBranch(IrValue Condition, string TrueLabel, string FalseLabel) : IrInstruction;
/// <summary>
/// 返回指令
/// </summary>
public sealed record IrReturn(IrValue? Value) : IrInstruction;
/// <summary>
/// 标签指令
/// </summary>
public sealed record IrLabel(string LabelName) : IrInstruction;
/// <summary>
/// 赋值指令
/// </summary>
public sealed record IrMove(IrValue Dest, IrValue Source) : IrInstruction;
/// <summary>
/// 空操作指令
/// </summary>
public sealed record IrNop() : IrInstruction;
/// <summary>
/// 分配栈空间指令
/// </summary>
public sealed record IrAllocStack(int Size, string VariableName) : IrInstruction;
/// <summary>
/// IR 程序
/// </summary>
public sealed record IrProgram(List<IrFunction> Functions);
/// <summary>
/// IR 函数
/// </summary>
public sealed record IrFunction(string Name, List<IrBasicBlock> BasicBlocks, List<IrVariable> Locals, int ParameterCount = 0);
/// <summary>
/// 基本块
/// </summary>
public sealed record IrBasicBlock(string Label, List<IrInstruction> Instructions);
/// <summary>
/// IR 值
/// </summary>
public abstract record IrValue;
/// <summary>
/// 临时变量
/// </summary>
public sealed record IrTemp(string Name, string TypeName) : IrValue;
/// <summary>
/// 常量
/// </summary>
public sealed record IrConstant(object Value, string TypeName) : IrValue;
/// <summary>
/// 全局变量
/// </summary>
public sealed record IrGlobal(string Name, string TypeName) : IrValue;
/// <summary>
/// 局部变量
/// </summary>
public sealed record IrLocal(string Name, string TypeName, int StackOffset) : IrValue;
/// <summary>
/// IR 变量
/// </summary>
public sealed record IrVariable(string Name, string TypeName, int Size);
/// <summary>
/// 二元运算类型
/// </summary>
public enum IrBinaryOpType
{
Add, Sub, Mul, Div, Mod,
And, Or, Xor,
Shl, Shr,
Eq, Ne, Lt, Gt, Le, Ge
}
/// <summary>
/// 一元运算类型
/// </summary>
public enum IrUnaryOpType
{
Neg, Not, BitNot, Deref
}

View File

@@ -0,0 +1,356 @@
using System.Text;
namespace TinyCC.Core;
/// <summary>
/// C 语言词法分析器
/// </summary>
public sealed class Lexer
{
private readonly IErrorReporter _errorReporter;
private readonly string _source;
private readonly string _fileName;
private int _position;
private int _line = 1;
private int _column = 1;
private static readonly HashSet<string> Keywords = new()
{
"int", "char", "float", "double", "long", "short", "void",
"if", "else", "while", "for", "do", "switch", "case", "default",
"break", "continue", "return", "struct", "union", "typedef",
"signed", "unsigned", "const", "static", "extern", "auto", "register", "volatile",
"sizeof"
};
public Lexer(string source, string fileName, IErrorReporter errorReporter)
{
_source = source;
_fileName = fileName;
_errorReporter = errorReporter;
}
public IEnumerable<Token> Tokenize()
{
while (true)
{
var token = NextToken();
yield return token;
if (token.Type == TokenType.EOF || token.Type == TokenType.Error)
break;
}
}
private Token NextToken()
{
SkipWhitespaceAndComments();
if (_position >= _source.Length)
return CreateToken(TokenType.EOF, "", null);
var startLine = _line;
var startColumn = _column;
var ch = _source[_position];
// 标识符和关键字
if (char.IsLetter(ch) || ch == '_')
return ReadIdentifierOrKeyword();
// 数字字面量
if (char.IsDigit(ch))
return ReadNumber();
// 字符字面量
if (ch == '\'')
return ReadCharLiteral();
// 字符串字面量
if (ch == '"')
return ReadStringLiteral();
// 运算符和分隔符 - 需要 Advance 后再创建 token
switch (ch)
{
case '+':
if (PeekNext() == '+') { Advance(); Advance(); return new Token(TokenType.Increment, "++", "++", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.PlusAssign, "+=", "+=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Plus, "+", "+", new SourceLocation(_fileName, startLine, startColumn));
case '-':
if (PeekNext() == '-') { Advance(); Advance(); return new Token(TokenType.Decrement, "--", "--", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '>') { Advance(); Advance(); return new Token(TokenType.Arrow, "->", "->", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.MinusAssign, "-=", "-=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Minus, "-", "-", new SourceLocation(_fileName, startLine, startColumn));
case '*':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.StarAssign, "*=", "*=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Star, "*", "*", new SourceLocation(_fileName, startLine, startColumn));
case '/':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.SlashAssign, "/=", "/=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Slash, "/", "/", new SourceLocation(_fileName, startLine, startColumn));
case '%':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.PercentAssign, "%=", "%=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Percent, "%", "%", new SourceLocation(_fileName, startLine, startColumn));
case '=':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.Equal, "==", "==", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Assign, "=", "=", new SourceLocation(_fileName, startLine, startColumn));
case '!':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.NotEqual, "!=", "!=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Not, "!", "!", new SourceLocation(_fileName, startLine, startColumn));
case '<':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.LessEqual, "<=", "<=", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '<') {
if (PeekNext(1) == '=') { Advance(); Advance(); Advance(); return new Token(TokenType.LeftShiftAssign, "<<=", "<<=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); Advance(); return new Token(TokenType.LeftShift, "<<", "<<", new SourceLocation(_fileName, startLine, startColumn));
}
Advance(); return new Token(TokenType.Less, "<", "<", new SourceLocation(_fileName, startLine, startColumn));
case '>':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.GreaterEqual, ">=", ">=", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '>') {
if (PeekNext(1) == '=') { Advance(); Advance(); Advance(); return new Token(TokenType.RightShiftAssign, ">>=", ">>=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); Advance(); return new Token(TokenType.RightShift, ">>", ">>", new SourceLocation(_fileName, startLine, startColumn));
}
Advance(); return new Token(TokenType.Greater, ">", ">", new SourceLocation(_fileName, startLine, startColumn));
case '&':
if (PeekNext() == '&') { Advance(); Advance(); return new Token(TokenType.And, "&&", "&&", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.AndAssign, "&=", "&=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.BitAnd, "&", "&", new SourceLocation(_fileName, startLine, startColumn));
case '|':
if (PeekNext() == '|') { Advance(); Advance(); return new Token(TokenType.Or, "||", "||", new SourceLocation(_fileName, startLine, startColumn)); }
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.OrAssign, "|=", "|=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.BitOr, "|", "|", new SourceLocation(_fileName, startLine, startColumn));
case '^':
if (PeekNext() == '=') { Advance(); Advance(); return new Token(TokenType.XorAssign, "^=", "^=", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.BitXor, "^", "^", new SourceLocation(_fileName, startLine, startColumn));
case '~':
Advance(); return new Token(TokenType.BitNot, "~", "~", new SourceLocation(_fileName, startLine, startColumn));
case '(':
Advance(); return new Token(TokenType.LeftParen, "(", "(", new SourceLocation(_fileName, startLine, startColumn));
case ')':
Advance(); return new Token(TokenType.RightParen, ")", ")", new SourceLocation(_fileName, startLine, startColumn));
case '{':
Advance(); return new Token(TokenType.LeftBrace, "{", "{", new SourceLocation(_fileName, startLine, startColumn));
case '}':
Advance(); return new Token(TokenType.RightBrace, "}", "}", new SourceLocation(_fileName, startLine, startColumn));
case '[':
Advance(); return new Token(TokenType.LeftBracket, "[", "[", new SourceLocation(_fileName, startLine, startColumn));
case ']':
Advance(); return new Token(TokenType.RightBracket, "]", "]", new SourceLocation(_fileName, startLine, startColumn));
case ';':
Advance(); return new Token(TokenType.Semicolon, ";", ";", new SourceLocation(_fileName, startLine, startColumn));
case ',':
Advance(); return new Token(TokenType.Comma, ",", ",", new SourceLocation(_fileName, startLine, startColumn));
case ':':
Advance(); return new Token(TokenType.Colon, ":", ":", new SourceLocation(_fileName, startLine, startColumn));
case '?':
Advance(); return new Token(TokenType.Question, "?", "?", new SourceLocation(_fileName, startLine, startColumn));
case '.':
if (PeekNext() == '.' && PeekNext(1) == '.') { Advance(); Advance(); Advance(); return new Token(TokenType.Ellipsis, "...", "...", new SourceLocation(_fileName, startLine, startColumn)); }
Advance(); return new Token(TokenType.Dot, ".", ".", new SourceLocation(_fileName, startLine, startColumn));
case '#':
Advance(); return new Token(TokenType.Hash, "#", "#", new SourceLocation(_fileName, startLine, startColumn));
default:
Advance();
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Unexpected character '{ch}'",
new SourceLocation(_fileName, startLine, startColumn)
));
return new Token(TokenType.Error, ch.ToString(), null, new SourceLocation(_fileName, startLine, startColumn));
}
}
private Token ReadIdentifierOrKeyword()
{
var startLine = _line;
var startColumn = _column;
var sb = new StringBuilder();
while (_position < _source.Length &&
(char.IsLetterOrDigit(_source[_position]) || _source[_position] == '_'))
{
sb.Append(_source[_position]);
Advance();
}
var lexeme = sb.ToString();
if (Keywords.Contains(lexeme))
{
var type = Enum.Parse<TokenType>(char.ToUpper(lexeme[0]) + lexeme[1..], true);
return new Token(type, lexeme, lexeme, new SourceLocation(_fileName, startLine, startColumn));
}
return new Token(TokenType.Identifier, lexeme, lexeme, new SourceLocation(_fileName, startLine, startColumn));
}
private Token ReadNumber()
{
var startLine = _line;
var startColumn = _column;
var sb = new StringBuilder();
var isFloat = false;
while (_position < _source.Length && (char.IsDigit(_source[_position]) || _source[_position] == '.'))
{
if (_source[_position] == '.') isFloat = true;
sb.Append(_source[_position]);
Advance();
}
var lexeme = sb.ToString();
object value = isFloat ? double.Parse(lexeme) : long.Parse(lexeme);
var type = isFloat ? TokenType.FloatLiteral : TokenType.IntLiteral;
return new Token(type, lexeme, value, new SourceLocation(_fileName, startLine, startColumn));
}
private Token ReadCharLiteral()
{
var startLine = _line;
var startColumn = _column;
Advance(); // 跳过 '
if (_position >= _source.Length || _source[_position] == '\'')
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Empty character literal",
new SourceLocation(_fileName, startLine, startColumn)
));
return CreateToken(TokenType.Error, "''", null);
}
var ch = _source[_position];
Advance();
if (_position >= _source.Length || _source[_position] != '\'')
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Unterminated character literal",
new SourceLocation(_fileName, startLine, startColumn)
));
return CreateToken(TokenType.Error, ch.ToString(), null);
}
Advance(); // 跳过结束 '
return new Token(TokenType.CharLiteral, $"'{ch}'", ch, new SourceLocation(_fileName, startLine, startColumn));
}
private Token ReadStringLiteral()
{
var startLine = _line;
var startColumn = _column;
var sb = new StringBuilder();
Advance(); // 跳过 "
while (_position < _source.Length && _source[_position] != '"')
{
if (_source[_position] == '\\')
{
Advance();
if (_position < _source.Length)
{
var escaped = _source[_position] switch
{
'n' => '\n',
't' => '\t',
'r' => '\r',
'\\' => '\\',
'"' => '"',
'\'' => '\'',
'0' => '\0',
_ => _source[_position]
};
sb.Append(escaped);
}
}
else
{
sb.Append(_source[_position]);
}
Advance();
}
if (_position >= _source.Length)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Unterminated string literal",
new SourceLocation(_fileName, startLine, startColumn)
));
return CreateToken(TokenType.Error, sb.ToString(), null);
}
Advance(); // 跳过结束 "
var value = sb.ToString();
return new Token(TokenType.StringLiteral, $"\"{value}\"", value, new SourceLocation(_fileName, startLine, startColumn));
}
private void SkipWhitespaceAndComments()
{
while (_position < _source.Length)
{
if (char.IsWhiteSpace(_source[_position]))
{
Advance();
continue;
}
// 单行注释 //
if (_source[_position] == '/' && PeekNext() == '/')
{
while (_position < _source.Length && _source[_position] != '\n')
Advance();
continue;
}
// 多行注释 /* */
if (_source[_position] == '/' && PeekNext() == '*')
{
Advance(); // /
Advance(); // *
while (_position < _source.Length)
{
if (_source[_position] == '*' && PeekNext() == '/')
{
Advance(); // *
Advance(); // /
break;
}
Advance();
}
continue;
}
break;
}
}
private char PeekNext(int offset = 1)
{
var pos = _position + offset;
return pos < _source.Length ? _source[pos] : '\0';
}
private void Advance()
{
if (_position < _source.Length)
{
if (_source[_position] == '\n')
{
_line++;
_column = 1;
}
else
{
_column++;
}
_position++;
}
}
private Token CreateToken(TokenType type, string lexeme, object? value)
{
return new Token(type, lexeme, value, new SourceLocation(_fileName, _line, _column));
}
}

View File

@@ -0,0 +1,19 @@
namespace TinyCC.Core;
/// <summary>
/// Token 记录
/// </summary>
public sealed record Token(
TokenType Type,
string Lexeme,
object? Value,
SourceLocation Location
)
{
public override string ToString()
{
return Value != null
? $"{Type}('{Lexeme}' = {Value})"
: $"{Type}('{Lexeme}')";
}
}

View File

@@ -0,0 +1,41 @@
namespace TinyCC.Core;
/// <summary>
/// Token 类型枚举
/// </summary>
public enum TokenType
{
// 关键字
Int, Char, Float, Double, Long, Short, Void,
If, Else, While, For, Do, Switch, Case, Default,
Break, Continue, Return, Struct, Union, Typedef,
Signed, Unsigned, Const, Static, Extern, Auto, Register, Volatile,
Sizeof,
// 字面量
IntLiteral, FloatLiteral, CharLiteral, StringLiteral,
// 标识符
Identifier,
// 运算符
Plus, Minus, Star, Slash, Percent,
Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual,
Assign, PlusAssign, MinusAssign, StarAssign, SlashAssign, PercentAssign,
And, Or, Not, BitAnd, BitOr, BitXor, BitNot,
LeftShift, RightShift,
AndAssign, OrAssign, XorAssign, LeftShiftAssign, RightShiftAssign,
Increment, Decrement,
Arrow, Dot,
// 分隔符
LeftParen, RightParen, LeftBrace, RightBrace,
LeftBracket, RightBracket,
Semicolon, Comma, Colon, Question, Ellipsis,
// 预处理器
Hash,
// 特殊
EOF, Error
}

View File

@@ -0,0 +1,119 @@
namespace TinyCC.Core;
/// <summary>
/// AST 节点基类
/// </summary>
public abstract record AstNode(SourceLocation Location);
/// <summary>
/// 程序节点
/// </summary>
public sealed record ProgramNode(List<AstNode> Declarations, SourceLocation Location) : AstNode(Location);
/// <summary>
/// 类型节点
/// </summary>
public abstract record TypeNode(SourceLocation Location) : AstNode(Location);
public sealed record PrimitiveTypeNode(string TypeName, SourceLocation Location) : TypeNode(Location);
public sealed record PointerTypeNode(TypeNode BaseType, SourceLocation Location) : TypeNode(Location);
public sealed record ArrayTypeNode(TypeNode ElementType, int Size, SourceLocation Location) : TypeNode(Location);
/// <summary>
/// 声明节点
/// </summary>
public abstract record DeclarationNode(SourceLocation Location) : AstNode(Location);
public sealed record FunctionDeclarationNode(
TypeNode ReturnType,
string Name,
List<ParameterNode> Parameters,
BlockStatementNode Body,
SourceLocation Location
) : DeclarationNode(Location);
public sealed record VariableDeclarationNode(
TypeNode Type,
string Name,
ExpressionNode? Initializer,
SourceLocation Location
) : DeclarationNode(Location);
public sealed record ParameterNode(
TypeNode Type,
string Name,
SourceLocation Location
) : AstNode(Location);
/// <summary>
/// 语句节点
/// </summary>
public abstract record StatementNode(SourceLocation Location) : AstNode(Location);
public sealed record BlockStatementNode(List<AstNode> Statements, SourceLocation Location) : StatementNode(Location);
public sealed record ExpressionStatementNode(ExpressionNode Expression, SourceLocation Location) : StatementNode(Location);
public sealed record ReturnStatementNode(ExpressionNode? Expression, SourceLocation Location) : StatementNode(Location);
public sealed record IfStatementNode(
ExpressionNode Condition,
StatementNode ThenBranch,
StatementNode? ElseBranch,
SourceLocation Location
) : StatementNode(Location);
public sealed record WhileStatementNode(
ExpressionNode Condition,
StatementNode Body,
SourceLocation Location
) : StatementNode(Location);
public sealed record ForStatementNode(
AstNode? Init,
ExpressionNode? Condition,
ExpressionNode? Increment,
StatementNode Body,
SourceLocation Location
) : StatementNode(Location);
public sealed record BreakStatementNode(SourceLocation Location) : StatementNode(Location);
public sealed record ContinueStatementNode(SourceLocation Location) : StatementNode(Location);
/// <summary>
/// 表达式节点
/// </summary>
public abstract record ExpressionNode(SourceLocation Location) : AstNode(Location);
public sealed record BinaryExpressionNode(
ExpressionNode Left,
TokenType Operator,
ExpressionNode Right,
SourceLocation Location
) : ExpressionNode(Location);
public sealed record UnaryExpressionNode(
TokenType Operator,
ExpressionNode Expression,
SourceLocation Location
) : ExpressionNode(Location);
public sealed record LiteralExpressionNode(object Value, TokenType Type, SourceLocation Location) : ExpressionNode(Location);
public sealed record IdentifierExpressionNode(string Name, SourceLocation Location) : ExpressionNode(Location);
public sealed record AssignmentExpressionNode(
ExpressionNode Left,
TokenType Operator,
ExpressionNode Right,
SourceLocation Location
) : ExpressionNode(Location);
public sealed record FunctionCallExpressionNode(
string FunctionName,
List<ExpressionNode> Arguments,
SourceLocation Location
) : ExpressionNode(Location);
public sealed record MemberAccessExpressionNode(
ExpressionNode Target,
string MemberName,
SourceLocation Location
) : ExpressionNode(Location);
public sealed record ArrayAccessExpressionNode(
ExpressionNode Array,
ExpressionNode Index,
SourceLocation Location
) : ExpressionNode(Location);

View File

@@ -0,0 +1,656 @@
namespace TinyCC.Core;
/// <summary>
/// 递归下降解析器
/// </summary>
public sealed class Parser
{
private readonly IErrorReporter _errorReporter;
private readonly IEnumerator<Token> _tokens;
private Token _current;
public Parser(IEnumerable<Token> tokens, IErrorReporter errorReporter)
{
_errorReporter = errorReporter;
_tokens = tokens.GetEnumerator();
_tokens.MoveNext();
_current = _tokens.Current;
}
/// <summary>
/// 解析程序
/// </summary>
public ProgramNode Parse()
{
var declarations = new List<AstNode>();
var location = _current.Location;
while (_current.Type != TokenType.EOF)
{
declarations.Add(ParseDeclaration());
}
return new ProgramNode(declarations, location);
}
private AstNode ParseDeclaration()
{
// 函数声明
if (IsTypeSpecifier(_current.Type))
{
var type = ParseTypeSpecifier();
var name = Consume(TokenType.Identifier, "Expected function or variable name");
if (_current.Type == TokenType.LeftParen)
{
return ParseFunctionDeclaration(type, name);
}
else
{
return ParseVariableDeclaration(type, name);
}
}
// 错误恢复:跳过整个声明直到遇到分号或右大括号
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Expected declaration, got {_current.Type}",
_current.Location
));
// 跳过直到遇到 ; 或 } 或 EOF
while (_current.Type is not (TokenType.Semicolon or TokenType.RightBrace or TokenType.EOF))
{
Advance();
}
// 消耗终止符
if (_current.Type is TokenType.Semicolon or TokenType.RightBrace)
Advance();
return new VariableDeclarationNode(
new PrimitiveTypeNode("int", _current.Location),
"error",
null,
_current.Location
);
}
private FunctionDeclarationNode ParseFunctionDeclaration(TypeNode returnType, Token nameToken)
{
var parameters = ParseParameterList();
var body = ParseBlock();
return new FunctionDeclarationNode(
returnType,
nameToken.Lexeme,
parameters,
body,
nameToken.Location
);
}
private List<ParameterNode> ParseParameterList()
{
var parameters = new List<ParameterNode>();
Consume(TokenType.LeftParen, "Expected '('");
if (_current.Type != TokenType.RightParen)
{
do
{
var type = ParseTypeSpecifier();
var name = Consume(TokenType.Identifier, "Expected parameter name");
parameters.Add(new ParameterNode(type, name.Lexeme, name.Location));
if (_current.Type == TokenType.Comma)
Advance();
else
break;
} while (_current.Type != TokenType.RightParen);
}
Consume(TokenType.RightParen, "Expected ')'");
return parameters;
}
private VariableDeclarationNode ParseVariableDeclaration(TypeNode type, Token nameToken)
{
ExpressionNode? initializer = null;
if (_current.Type == TokenType.Assign)
{
Advance();
initializer = ParseExpression();
}
Consume(TokenType.Semicolon, "Expected ';'");
return new VariableDeclarationNode(
type,
nameToken.Lexeme,
initializer,
nameToken.Location
);
}
private TypeNode ParseTypeSpecifier()
{
var location = _current.Location;
var typeName = _current.Lexeme;
Advance();
// 检查指针
if (_current.Type == TokenType.Star)
{
Advance();
var baseType = new PrimitiveTypeNode(typeName, location);
return new PointerTypeNode(baseType, location);
}
return new PrimitiveTypeNode(typeName, location);
}
private bool IsTypeSpecifier(TokenType type)
{
return type is TokenType.Int or TokenType.Char or TokenType.Float or TokenType.Double
or TokenType.Long or TokenType.Short or TokenType.Void or TokenType.Struct
or TokenType.Unsigned or TokenType.Signed;
}
private BlockStatementNode ParseBlock()
{
var location = _current.Location;
Consume(TokenType.LeftBrace, "Expected '{'");
var statements = new List<AstNode>();
while (_current.Type != TokenType.RightBrace && _current.Type != TokenType.EOF)
{
// 检查是否是局部变量声明(类型关键字后跟标识符或 *
if (IsTypeKeyword(_current.Type))
{
statements.Add(ParseLocalVariableDeclaration());
}
else
{
statements.Add(ParseStatement());
}
}
Consume(TokenType.RightBrace, "Expected '}'");
return new BlockStatementNode(statements, location);
}
private bool IsTypeKeyword(TokenType tokenType)
{
return tokenType is TokenType.Int or TokenType.Char or TokenType.Float
or TokenType.Double or TokenType.Long or TokenType.Short
or TokenType.Void or TokenType.Unsigned or TokenType.Signed;
}
private VariableDeclarationNode ParseLocalVariableDeclaration()
{
var type = ParseTypeSpecifier();
var nameToken = _current;
Consume(TokenType.Identifier, "Expected identifier after type specifier");
return ParseVariableDeclaration(type, nameToken);
}
private StatementNode ParseStatement()
{
return _current.Type switch
{
TokenType.LeftBrace => ParseBlock(),
TokenType.If => ParseIfStatement(),
TokenType.While => ParseWhileStatement(),
TokenType.For => ParseForStatement(),
TokenType.Return => ParseReturnStatement(),
TokenType.Break => ParseBreakStatement(),
TokenType.Continue => ParseContinueStatement(),
_ => ParseExpressionStatement()
};
}
private IfStatementNode ParseIfStatement()
{
var location = _current.Location;
Advance(); // 跳过 if
Consume(TokenType.LeftParen, "Expected '('");
var condition = ParseExpression();
Consume(TokenType.RightParen, "Expected ')'");
var thenBranch = ParseStatement();
StatementNode? elseBranch = null;
if (_current.Type == TokenType.Else)
{
Advance();
elseBranch = ParseStatement();
}
return new IfStatementNode(condition, thenBranch, elseBranch, location);
}
private WhileStatementNode ParseWhileStatement()
{
var location = _current.Location;
Advance(); // 跳过 while
Consume(TokenType.LeftParen, "Expected '('");
var condition = ParseExpression();
Consume(TokenType.RightParen, "Expected ')'");
var body = ParseStatement();
return new WhileStatementNode(condition, body, location);
}
private ForStatementNode ParseForStatement()
{
var location = _current.Location;
Advance(); // 跳过 for
Consume(TokenType.LeftParen, "Expected '('");
StatementNode? init = null;
if (_current.Type != TokenType.Semicolon)
{
init = _current.Type == TokenType.LeftBrace ? ParseBlock() : ParseExpressionStatement();
}
else
{
Advance();
}
ExpressionNode? condition = null;
if (_current.Type != TokenType.Semicolon)
{
condition = ParseExpression();
}
Consume(TokenType.Semicolon, "Expected ';'");
ExpressionNode? increment = null;
if (_current.Type != TokenType.RightParen)
{
increment = ParseExpression();
}
Consume(TokenType.RightParen, "Expected ')'");
var body = ParseStatement();
return new ForStatementNode(init, condition, increment, body, location);
}
private ReturnStatementNode ParseReturnStatement()
{
var location = _current.Location;
Advance(); // 跳过 return
ExpressionNode? expression = null;
if (_current.Type != TokenType.Semicolon)
{
expression = ParseExpression();
}
Consume(TokenType.Semicolon, "Expected ';'");
return new ReturnStatementNode(expression, location);
}
private BreakStatementNode ParseBreakStatement()
{
var location = _current.Location;
Advance();
Consume(TokenType.Semicolon, "Expected ';'");
return new BreakStatementNode(location);
}
private ContinueStatementNode ParseContinueStatement()
{
var location = _current.Location;
Advance();
Consume(TokenType.Semicolon, "Expected ';'");
return new ContinueStatementNode(location);
}
private ExpressionStatementNode ParseExpressionStatement()
{
var expression = ParseExpression();
Consume(TokenType.Semicolon, "Expected ';'");
return new ExpressionStatementNode(expression, expression.Location);
}
private ExpressionNode ParseExpression()
{
return ParseAssignment();
}
private ExpressionNode ParseAssignment()
{
var left = ParseLogicalOr();
if (_current.Type is TokenType.Assign or TokenType.PlusAssign or TokenType.MinusAssign
or TokenType.StarAssign or TokenType.SlashAssign)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var right = ParseAssignment();
return new AssignmentExpressionNode(left, op, right, location);
}
return left;
}
private ExpressionNode ParseLogicalOr()
{
var left = ParseLogicalAnd();
while (_current.Type == TokenType.Or)
{
var location = _current.Location;
Advance();
var right = ParseLogicalAnd();
left = new BinaryExpressionNode(left, TokenType.Or, right, location);
}
return left;
}
private ExpressionNode ParseLogicalAnd()
{
var left = ParseBitwiseOr();
while (_current.Type == TokenType.And)
{
var location = _current.Location;
Advance();
var right = ParseBitwiseOr();
left = new BinaryExpressionNode(left, TokenType.And, right, location);
}
return left;
}
private ExpressionNode ParseBitwiseOr()
{
var left = ParseBitwiseXor();
while (_current.Type == TokenType.BitOr)
{
var location = _current.Location;
Advance();
var right = ParseBitwiseXor();
left = new BinaryExpressionNode(left, TokenType.BitOr, right, location);
}
return left;
}
private ExpressionNode ParseBitwiseXor()
{
var left = ParseBitwiseAnd();
while (_current.Type == TokenType.BitXor)
{
var location = _current.Location;
Advance();
var right = ParseBitwiseAnd();
left = new BinaryExpressionNode(left, TokenType.BitXor, right, location);
}
return left;
}
private ExpressionNode ParseBitwiseAnd()
{
var left = ParseEquality();
while (_current.Type == TokenType.BitAnd)
{
var location = _current.Location;
Advance();
var right = ParseEquality();
left = new BinaryExpressionNode(left, TokenType.BitAnd, right, location);
}
return left;
}
private ExpressionNode ParseEquality()
{
var left = ParseRelational();
while (_current.Type is TokenType.Equal or TokenType.NotEqual)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var right = ParseRelational();
left = new BinaryExpressionNode(left, op, right, location);
}
return left;
}
private ExpressionNode ParseRelational()
{
var left = ParseShift();
while (_current.Type is TokenType.Less or TokenType.Greater
or TokenType.LessEqual or TokenType.GreaterEqual)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var right = ParseShift();
left = new BinaryExpressionNode(left, op, right, location);
}
return left;
}
private ExpressionNode ParseShift()
{
var left = ParseAdditive();
while (_current.Type is TokenType.LeftShift or TokenType.RightShift)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var right = ParseAdditive();
left = new BinaryExpressionNode(left, op, right, location);
}
return left;
}
private ExpressionNode ParseAdditive()
{
var left = ParseMultiplicative();
while (_current.Type is TokenType.Plus or TokenType.Minus)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var right = ParseMultiplicative();
left = new BinaryExpressionNode(left, op, right, location);
}
return left;
}
private ExpressionNode ParseMultiplicative()
{
var left = ParseUnary();
while (_current.Type is TokenType.Star or TokenType.Slash or TokenType.Percent)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var right = ParseUnary();
left = new BinaryExpressionNode(left, op, right, location);
}
return left;
}
private ExpressionNode ParseUnary()
{
if (_current.Type is TokenType.Minus or TokenType.Not or TokenType.BitNot
or TokenType.Increment or TokenType.Decrement)
{
var op = _current.Type;
var location = _current.Location;
Advance();
var operand = ParseUnary();
return new UnaryExpressionNode(op, operand, location);
}
return ParsePostfix();
}
private ExpressionNode ParsePostfix()
{
var expr = ParsePrimary();
while (true)
{
if (_current.Type == TokenType.LeftParen)
{
// 函数调用
var location = _current.Location;
Advance();
var args = new List<ExpressionNode>();
if (_current.Type != TokenType.RightParen)
{
do
{
args.Add(ParseExpression());
if (_current.Type == TokenType.Comma)
Advance();
else
break;
} while (_current.Type != TokenType.RightParen);
}
Consume(TokenType.RightParen, "Expected ')'");
if (expr is IdentifierExpressionNode id)
{
expr = new FunctionCallExpressionNode(id.Name, args, location);
}
}
else if (_current.Type == TokenType.LeftBracket)
{
// 数组访问
var location = _current.Location;
Advance();
var index = ParseExpression();
Consume(TokenType.RightBracket, "Expected ']'");
expr = new ArrayAccessExpressionNode(expr, index, location);
}
else if (_current.Type == TokenType.Dot || _current.Type == TokenType.Arrow)
{
// 成员访问
var op = _current.Type;
var location = _current.Location;
Advance();
var member = Consume(TokenType.Identifier, "Expected member name");
expr = new MemberAccessExpressionNode(expr, member.Lexeme, location);
}
else
{
break;
}
}
return expr;
}
private ExpressionNode ParsePrimary()
{
var token = _current;
if (token.Type == TokenType.Identifier)
{
Advance();
return new IdentifierExpressionNode(token.Lexeme, token.Location);
}
if (token.Type == TokenType.IntLiteral)
{
Advance();
return new LiteralExpressionNode(token.Value!, TokenType.IntLiteral, token.Location);
}
if (token.Type == TokenType.FloatLiteral)
{
Advance();
return new LiteralExpressionNode(token.Value!, TokenType.FloatLiteral, token.Location);
}
if (token.Type == TokenType.CharLiteral)
{
Advance();
return new LiteralExpressionNode(token.Value!, TokenType.CharLiteral, token.Location);
}
if (token.Type == TokenType.StringLiteral)
{
Advance();
return new LiteralExpressionNode(token.Value!, TokenType.StringLiteral, token.Location);
}
if (token.Type == TokenType.LeftParen)
{
return ParseParenExpression();
}
throw new ParseException($"Unexpected token: {token.Type}", token.Location);
}
private ExpressionNode ParseParenExpression()
{
var location = _current.Location;
Consume(TokenType.LeftParen, "Expected '('");
var expr = ParseExpression();
Consume(TokenType.RightParen, "Expected ')'");
return expr;
}
private Token Consume(TokenType type, string message)
{
if (_current.Type != type)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"{message}, got {_current.Type} instead",
_current.Location
));
}
var token = _current;
Advance();
return token;
}
private void Advance()
{
if (_tokens.MoveNext())
{
_current = _tokens.Current;
}
}
private void SkipToken()
{
Advance();
}
}
/// <summary>
/// 解析异常
/// </summary>
public sealed class ParseException : Exception
{
public SourceLocation Location { get; }
public ParseException(string message, SourceLocation location) : base(message)
{
Location = location;
}
}

View File

@@ -0,0 +1,652 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace TinyCC.Core;
/// <summary>
/// C 预处理器
/// 支持 #include, #define, #ifdef, #ifndef, #if, #else, #endif, #undef
/// </summary>
public sealed class Preprocessor
{
private readonly IErrorReporter _errorReporter;
private readonly Dictionary<string, string?> _macros = new();
private readonly List<string> _includePaths = new();
private readonly HashSet<string> _includedFiles = new();
private int _lineNumber;
public Preprocessor(IErrorReporter errorReporter)
{
_errorReporter = errorReporter;
// 预定义宏
_macros["__LINE__"] = null; // 动态值
_macros["__FILE__"] = null; // 动态值
_macros["__DATE__"] = DateTime.Now.ToString("MMM dd yyyy");
_macros["__TIME__"] = DateTime.Now.ToString("HH:mm:ss");
_macros["__STDC__"] = "1";
_macros["__STDC_VERSION__"] = "199901L";
}
/// <summary>
/// 添加头文件搜索路径
/// </summary>
public void AddIncludePath(string path)
{
if (!string.IsNullOrEmpty(path) && Directory.Exists(path))
{
_includePaths.Add(path);
}
}
/// <summary>
/// 定义宏
/// </summary>
public void DefineMacro(string name, string? value)
{
_macros[name] = value;
}
/// <summary>
/// 取消定义宏
/// </summary>
public void UndefineMacro(string name)
{
_macros.Remove(name);
}
/// <summary>
/// 检查宏是否已定义
/// </summary>
public bool IsMacroDefined(string name)
{
return _macros.ContainsKey(name);
}
/// <summary>
/// 预处理源文件
/// </summary>
public string Preprocess(string sourceFile)
{
_includedFiles.Clear();
_lineNumber = 1;
return ProcessFile(sourceFile, new HashSet<string>());
}
private string ProcessFile(string filePath, HashSet<string> includeStack)
{
var fullPath = ResolveIncludePath(filePath);
if (fullPath == null)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Cannot find include file: {filePath}",
new SourceLocation(filePath, _lineNumber, 1)
));
return "";
}
if (includeStack.Contains(fullPath))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Warning,
$"Circular include detected: {fullPath}",
new SourceLocation(fullPath, _lineNumber, 1)
));
return "";
}
includeStack.Add(fullPath);
if (_includedFiles.Contains(fullPath))
{
return ""; // 防止重复包含
}
_includedFiles.Add(fullPath);
if (!File.Exists(fullPath))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"File not found: {fullPath}",
new SourceLocation(fullPath, _lineNumber, 1)
));
return "";
}
var source = File.ReadAllText(fullPath);
return ProcessSource(source, fullPath, includeStack);
}
private string ProcessSource(string source, string fileName, HashSet<string> includeStack)
{
var lines = source.Split('\n');
var output = new StringBuilder();
_lineNumber = 1;
var conditionStack = new List<bool> { true };
var conditionResolved = new List<bool> { true };
for (int i = 0; i < lines.Length; i++)
{
var line = lines[i].TrimEnd('\r');
var trimmedLine = line.Trim();
if (trimmedLine.StartsWith("#"))
{
var directive = ParseDirective(trimmedLine);
switch (directive.Name)
{
case "include":
if (conditionStack[^1])
{
var includeFile = directive.Argument.Trim(' ', '"', '<', '>');
var includedContent = ProcessFile(includeFile, new HashSet<string>(includeStack));
output.AppendLine(includedContent);
}
break;
case "define":
if (conditionStack[^1])
{
ProcessDefine(directive.Argument);
}
break;
case "undef":
if (conditionStack[^1])
{
UndefineMacro(directive.Argument.Trim());
}
break;
case "ifdef":
var isDefined = IsMacroDefined(directive.Argument.Trim());
conditionStack.Add(isDefined && conditionStack[^1]);
conditionResolved.Add(true);
break;
case "ifndef":
var isNotDefined = !IsMacroDefined(directive.Argument.Trim());
conditionStack.Add(isNotDefined && conditionStack[^1]);
conditionResolved.Add(true);
break;
case "if":
var condition = EvaluateCondition(directive.Argument.Trim());
conditionStack.Add(condition && conditionStack[^1]);
conditionResolved.Add(true);
break;
case "else":
if (conditionStack.Count > 1)
{
conditionStack[^1] = !conditionStack[^1] && conditionStack[^2];
}
break;
case "elif":
if (conditionStack.Count > 1)
{
var elifCondition = EvaluateCondition(directive.Argument.Trim());
conditionStack[^1] = elifCondition && conditionStack[^2];
}
break;
case "endif":
if (conditionStack.Count > 1)
{
conditionStack.RemoveAt(conditionStack.Count - 1);
conditionResolved.RemoveAt(conditionResolved.Count - 1);
}
break;
case "pragma":
// 忽略 pragma
break;
case "error":
if (conditionStack[^1])
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
directive.Argument.Trim(),
new SourceLocation(fileName, _lineNumber, 1)
));
}
break;
case "warning":
if (conditionStack[^1])
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Warning,
directive.Argument.Trim(),
new SourceLocation(fileName, _lineNumber, 1)
));
}
break;
case "line":
// 处理 #line 指令
break;
default:
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Unknown preprocessor directive: {directive.Name}",
new SourceLocation(fileName, _lineNumber, 1)
));
break;
}
}
else if (conditionStack[^1])
{
// 展开宏
var expandedLine = ExpandMacros(line, fileName);
output.AppendLine(expandedLine);
}
_lineNumber++;
}
return output.ToString();
}
private Directive ParseDirective(string line)
{
// 移除 # 前缀
var content = line.Substring(1).Trim();
var spaceIndex = content.IndexOfAny(new[] { ' ', '\t' });
if (spaceIndex < 0)
{
return new Directive(content, "");
}
var name = content.Substring(0, spaceIndex).Trim();
var argument = content.Substring(spaceIndex).Trim();
// 移除行尾注释
var commentIndex = argument.IndexOf("//");
if (commentIndex >= 0)
{
argument = argument.Substring(0, commentIndex).TrimEnd();
}
return new Directive(name, argument);
}
private record Directive(string Name, string Argument);
private void ProcessDefine(string argument)
{
if (string.IsNullOrWhiteSpace(argument))
{
return;
}
var parts = argument.Split(new[] { ' ', '\t' }, 2);
var name = parts[0].Trim();
if (!IsValidIdentifier(name))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Invalid macro name: {name}",
new SourceLocation("", _lineNumber, 1)
));
return;
}
if (parts.Length > 1)
{
var value = parts[1].Trim();
// 处理函数式宏
if (value.StartsWith("("))
{
// 简单处理:存储完整的宏定义
_macros[name] = value;
}
else
{
_macros[name] = value;
}
}
else
{
_macros[name] = "";
}
}
private string ExpandMacros(string line, string fileName)
{
var result = new StringBuilder();
var i = 0;
while (i < line.Length)
{
// 检查字符串字面量
if (line[i] == '"')
{
var strEnd = line.IndexOf('"', i + 1);
if (strEnd < 0)
{
result.Append(line.Substring(i));
break;
}
result.Append(line.Substring(i, strEnd - i + 1));
i = strEnd + 1;
continue;
}
// 检查字符字面量
if (line[i] == '\'')
{
var charEnd = line.IndexOf('\'', i + 1);
if (charEnd < 0)
{
result.Append(line.Substring(i));
break;
}
result.Append(line.Substring(i, charEnd - i + 1));
i = charEnd + 1;
continue;
}
// 检查标识符
if (char.IsLetter(line[i]) || line[i] == '_')
{
var start = i;
while (i < line.Length && (char.IsLetterOrDigit(line[i]) || line[i] == '_'))
{
i++;
}
var identifier = line.Substring(start, i - start);
// 特殊宏
if (identifier == "__LINE__")
{
result.Append(_lineNumber);
}
else if (identifier == "__FILE__")
{
result.Append($"\"{fileName}\"");
}
else if (_macros.TryGetValue(identifier, out var value))
{
result.Append(value ?? "");
}
else
{
result.Append(identifier);
}
continue;
}
result.Append(line[i]);
i++;
}
return result.ToString();
}
private bool EvaluateCondition(string expression)
{
expression = expression.Trim();
// 处理 defined() 和 defined
expression = Regex.Replace(expression, @"defined\s*\((\w+)\)", m => IsMacroDefined(m.Groups[1].Value) ? "1" : "0");
expression = Regex.Replace(expression, @"defined\s+(\w+)", m => IsMacroDefined(m.Groups[1].Value) ? "1" : "0");
// 展开宏
expression = ExpandMacros(expression, "");
// 简单的常量表达式求值
try
{
// 移除 L 后缀
expression = Regex.Replace(expression, @"(\d+)L", "$1");
// 处理十六进制
expression = Regex.Replace(expression, @"0x([0-9a-fA-F]+)", m => Convert.ToInt64(m.Groups[1].Value, 16).ToString());
// 简单的算术表达式求值
return EvaluateExpression(expression) != 0;
}
catch
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Warning,
$"Failed to evaluate condition: {expression}",
new SourceLocation("", _lineNumber, 1)
));
return false;
}
}
private long EvaluateExpression(string expression)
{
// 简单的表达式求值,支持 +, -, *, /, %, &&, ||, ==, !=, <, >, <=, >=
expression = expression.Trim();
if (long.TryParse(expression, out var value))
{
return value;
}
// 处理逻辑或
var orIndex = expression.IndexOf("||");
if (orIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, orIndex));
var right = EvaluateExpression(expression.Substring(orIndex + 2));
return (left != 0 || right != 0) ? 1 : 0;
}
// 处理逻辑与
var andIndex = expression.IndexOf("&&");
if (andIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, andIndex));
var right = EvaluateExpression(expression.Substring(andIndex + 2));
return (left != 0 && right != 0) ? 1 : 0;
}
// 处理相等比较
var eqIndex = expression.IndexOf("==");
if (eqIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, eqIndex));
var right = EvaluateExpression(expression.Substring(eqIndex + 2));
return left == right ? 1 : 0;
}
// 处理不等比较
var neIndex = expression.IndexOf("!=");
if (neIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, neIndex));
var right = EvaluateExpression(expression.Substring(neIndex + 2));
return left != right ? 1 : 0;
}
// 处理小于等于
var leIndex = expression.IndexOf("<=");
if (leIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, leIndex));
var right = EvaluateExpression(expression.Substring(leIndex + 2));
return left <= right ? 1 : 0;
}
// 处理大于等于
var geIndex = expression.IndexOf(">=");
if (geIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, geIndex));
var right = EvaluateExpression(expression.Substring(geIndex + 2));
return left >= right ? 1 : 0;
}
// 处理小于
var ltIndex = expression.IndexOf("<");
if (ltIndex > 0 && ltIndex < expression.Length - 1)
{
var left = EvaluateExpression(expression.Substring(0, ltIndex));
var right = EvaluateExpression(expression.Substring(ltIndex + 1));
return left < right ? 1 : 0;
}
// 处理大于
var gtIndex = expression.IndexOf(">");
if (gtIndex > 0 && gtIndex < expression.Length - 1)
{
var left = EvaluateExpression(expression.Substring(0, gtIndex));
var right = EvaluateExpression(expression.Substring(gtIndex + 1));
return left > right ? 1 : 0;
}
// 处理加法
var addIndex = expression.IndexOf("+");
if (addIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, addIndex));
var right = EvaluateExpression(expression.Substring(addIndex + 1));
return left + right;
}
// 处理减法
var subIndex = expression.LastIndexOf("-");
if (subIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, subIndex));
var right = EvaluateExpression(expression.Substring(subIndex + 1));
return left - right;
}
// 处理乘法
var mulIndex = expression.IndexOf("*");
if (mulIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, mulIndex));
var right = EvaluateExpression(expression.Substring(mulIndex + 1));
return left * right;
}
// 处理除法
var divIndex = expression.IndexOf("/");
if (divIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, divIndex));
var right = EvaluateExpression(expression.Substring(divIndex + 1));
if (right == 0)
{
throw new DivideByZeroException();
}
return left / right;
}
// 处理取模
var modIndex = expression.IndexOf("%");
if (modIndex > 0)
{
var left = EvaluateExpression(expression.Substring(0, modIndex));
var right = EvaluateExpression(expression.Substring(modIndex + 1));
if (right == 0)
{
throw new DivideByZeroException();
}
return left % right;
}
// 处理括号
if (expression.StartsWith("(") && expression.EndsWith(")"))
{
return EvaluateExpression(expression.Substring(1, expression.Length - 2));
}
// 尝试解析为数字
if (long.TryParse(expression, out var numValue))
{
return numValue;
}
// 未知表达式,返回 0
return 0;
}
private string? ResolveIncludePath(string fileName)
{
// 检查是否为绝对路径
if (Path.IsPathRooted(fileName) && File.Exists(fileName))
{
return Path.GetFullPath(fileName);
}
// 检查当前目录
if (File.Exists(fileName))
{
return Path.GetFullPath(fileName);
}
// 搜索 include 路径
foreach (var includePath in _includePaths)
{
var fullPath = Path.Combine(includePath, fileName);
if (File.Exists(fullPath))
{
return Path.GetFullPath(fullPath);
}
}
// 检查系统 include 目录
var systemPaths = new[]
{
"/usr/include",
"/usr/local/include",
"include"
};
foreach (var systemPath in systemPaths)
{
var fullPath = Path.Combine(systemPath, fileName);
if (File.Exists(fullPath))
{
return Path.GetFullPath(fullPath);
}
}
return null;
}
private bool IsValidIdentifier(string name)
{
if (string.IsNullOrEmpty(name))
{
return false;
}
if (!char.IsLetter(name[0]) && name[0] != '_')
{
return false;
}
for (int i = 1; i < name.Length; i++)
{
if (!char.IsLetterOrDigit(name[i]) && name[i] != '_')
{
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,879 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace TinyCC.Core;
/// <summary>
/// 语义分析器
/// 负责类型检查、符号表管理和语义验证
/// </summary>
public sealed class SemanticAnalyzer
{
private readonly IErrorReporter _errorReporter;
private readonly SymbolTable _symbolTable;
private readonly TypeChecker _typeChecker;
private CType? _currentReturnType;
private bool _inLoop;
private bool _inSwitch;
public SemanticAnalyzer(IErrorReporter errorReporter)
{
_errorReporter = errorReporter;
_symbolTable = new SymbolTable(errorReporter);
_typeChecker = new TypeChecker(errorReporter);
}
/// <summary>
/// 分析 AST
/// </summary>
public void Analyze(AstNode root)
{
if (root is ProgramNode program)
{
AnalyzeProgram(program);
}
}
private void AnalyzeProgram(ProgramNode program)
{
// 第一遍:收集所有函数和全局变量声明
foreach (var decl in program.Declarations)
{
switch (decl)
{
case FunctionDeclarationNode func:
AnalyzeFunctionDeclaration(func);
break;
case VariableDeclarationNode varDecl:
AnalyzeGlobalVariableDeclaration(varDecl);
break;
}
}
// 第二遍:分析函数体
foreach (var decl in program.Declarations)
{
if (decl is FunctionDeclarationNode func)
{
AnalyzeFunctionBody(func);
}
}
}
private void AnalyzeFunctionDeclaration(FunctionDeclarationNode func)
{
// 检查函数名是否重复
if (_symbolTable.CurrentScopeExists(func.Name))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Function '{func.Name}' already declared",
func.Location
));
return;
}
// 创建函数类型
var paramTypes = func.Parameters.Select(p => ParseType(p.Type)).ToList();
var returnType = ParseType(func.ReturnType);
var funcType = new FunctionType(returnType, paramTypes);
// 注册函数符号
var funcSymbol = new FunctionSymbol(func.Name, funcType, func.Parameters, func.ReturnType);
_symbolTable.AddSymbol(func.Name, funcSymbol);
}
private void AnalyzeFunctionBody(FunctionDeclarationNode func)
{
_symbolTable.EnterScope();
_currentReturnType = ParseType(func.ReturnType);
// 添加参数到符号表
foreach (var param in func.Parameters)
{
var paramType = ParseType(param.Type);
var paramSymbol = new VariableSymbol(param.Name, paramType);
_symbolTable.AddSymbol(param.Name, paramSymbol);
}
// 分析函数体
AnalyzeBlock(func.Body);
_symbolTable.ExitScope();
_currentReturnType = null;
}
private void AnalyzeGlobalVariableDeclaration(VariableDeclarationNode varDecl)
{
if (_symbolTable.CurrentScopeExists(varDecl.Name))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Variable '{varDecl.Name}' already declared",
varDecl.Location
));
return;
}
var type = ParseType(varDecl.Type);
var symbol = new VariableSymbol(varDecl.Name, type);
_symbolTable.AddSymbol(varDecl.Name, symbol);
// 分析初始化器
if (varDecl.Initializer != null)
{
var initType = AnalyzeExpression(varDecl.Initializer);
if (initType != null)
{
CheckAssignmentCompatibility(type, initType, varDecl.Initializer.Location);
}
}
}
private void AnalyzeBlock(BlockStatementNode block)
{
_symbolTable.EnterScope();
foreach (var stmt in block.Statements)
{
switch (stmt)
{
case StatementNode statement:
AnalyzeStatement(statement);
break;
case VariableDeclarationNode varDecl:
AnalyzeLocalVariableDeclaration(varDecl);
break;
}
}
_symbolTable.ExitScope();
}
private void AnalyzeStatement(StatementNode stmt)
{
switch (stmt)
{
case BlockStatementNode block:
AnalyzeBlock(block);
break;
case ExpressionStatementNode exprStmt:
AnalyzeExpression(exprStmt.Expression);
break;
case ReturnStatementNode returnStmt:
AnalyzeReturnStatement(returnStmt);
break;
case IfStatementNode ifStmt:
AnalyzeIfStatement(ifStmt);
break;
case WhileStatementNode whileStmt:
AnalyzeWhileStatement(whileStmt);
break;
case ForStatementNode forStmt:
AnalyzeForStatement(forStmt);
break;
case BreakStatementNode breakStmt:
if (!_inLoop && !_inSwitch)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"'break' statement not within loop or switch",
breakStmt.Location
));
}
break;
case ContinueStatementNode continueStmt:
if (!_inLoop)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"'continue' statement not within loop",
continueStmt.Location
));
}
break;
}
}
private void AnalyzeLocalVariableDeclaration(VariableDeclarationNode varDecl)
{
if (_symbolTable.CurrentScopeExists(varDecl.Name))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Variable '{varDecl.Name}' already declared in this scope",
varDecl.Location
));
return;
}
var type = ParseType(varDecl.Type);
var symbol = new VariableSymbol(varDecl.Name, type);
_symbolTable.AddSymbol(varDecl.Name, symbol);
// 分析初始化器
if (varDecl.Initializer != null)
{
var initType = AnalyzeExpression(varDecl.Initializer);
if (initType != null)
{
CheckAssignmentCompatibility(type, initType, varDecl.Initializer.Location);
}
}
}
private void AnalyzeReturnStatement(ReturnStatementNode returnStmt)
{
if (_currentReturnType == null)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Return statement outside of function",
returnStmt.Location
));
return;
}
if (returnStmt.Expression != null)
{
var exprType = AnalyzeExpression(returnStmt.Expression);
if (exprType != null)
{
CheckAssignmentCompatibility(_currentReturnType, exprType, returnStmt.Location);
}
}
else
{
// void 函数不应该有返回值
if (_currentReturnType is not VoidType)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Non-void function must return a value",
returnStmt.Location
));
}
}
}
private void AnalyzeIfStatement(IfStatementNode ifStmt)
{
var condType = AnalyzeExpression(ifStmt.Condition);
if (condType != null && !IsIntegerType(condType))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Condition of 'if' must be an integer type",
ifStmt.Condition.Location
));
}
AnalyzeStatement(ifStmt.ThenBranch);
if (ifStmt.ElseBranch != null)
{
AnalyzeStatement(ifStmt.ElseBranch);
}
}
private void AnalyzeWhileStatement(WhileStatementNode whileStmt)
{
var oldInLoop = _inLoop;
_inLoop = true;
var condType = AnalyzeExpression(whileStmt.Condition);
if (condType != null && !IsIntegerType(condType))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Condition of 'while' must be an integer type",
whileStmt.Condition.Location
));
}
AnalyzeStatement(whileStmt.Body);
_inLoop = oldInLoop;
}
private void AnalyzeForStatement(ForStatementNode forStmt)
{
_symbolTable.EnterScope();
var oldInLoop = _inLoop;
_inLoop = true;
if (forStmt.Init != null)
{
switch (forStmt.Init)
{
case StatementNode statement:
AnalyzeStatement(statement);
break;
case VariableDeclarationNode varDecl:
AnalyzeLocalVariableDeclaration(varDecl);
break;
}
}
if (forStmt.Condition != null)
{
var condType = AnalyzeExpression(forStmt.Condition);
if (condType != null && !IsIntegerType(condType))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
"Condition of 'for' must be an integer type",
forStmt.Condition.Location
));
}
}
if (forStmt.Increment != null)
{
AnalyzeExpression(forStmt.Increment);
}
AnalyzeStatement(forStmt.Body);
_inLoop = oldInLoop;
_symbolTable.ExitScope();
}
private CType? AnalyzeExpression(ExpressionNode expr)
{
return expr switch
{
LiteralExpressionNode literal => AnalyzeLiteralExpression(literal),
IdentifierExpressionNode identifier => AnalyzeIdentifierExpression(identifier),
BinaryExpressionNode binary => AnalyzeBinaryExpression(binary),
UnaryExpressionNode unary => AnalyzeUnaryExpression(unary),
AssignmentExpressionNode assignment => AnalyzeAssignmentExpression(assignment),
FunctionCallExpressionNode call => AnalyzeFunctionCallExpression(call),
_ => null
};
}
private CType? AnalyzeLiteralExpression(LiteralExpressionNode literal)
{
return literal.Type switch
{
TokenType.IntLiteral => IntType.Instance,
TokenType.FloatLiteral => DoubleType.Instance,
TokenType.CharLiteral => CharType.Instance,
TokenType.StringLiteral => new PointerType(CharType.Instance),
_ => null
};
}
private CType? AnalyzeIdentifierExpression(IdentifierExpressionNode identifier)
{
var symbol = _symbolTable.Lookup(identifier.Name);
if (symbol == null)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Undeclared identifier '{identifier.Name}'",
identifier.Location
));
return null;
}
return symbol switch
{
VariableSymbol varSymbol => varSymbol.Type,
FunctionSymbol funcSymbol => funcSymbol.ReturnType,
_ => null
};
}
private CType? AnalyzeBinaryExpression(BinaryExpressionNode binary)
{
var leftType = AnalyzeExpression(binary.Left);
var rightType = AnalyzeExpression(binary.Right);
if (leftType == null || rightType == null)
{
return null;
}
return _typeChecker.GetBinaryResultType(binary.Operator, leftType, rightType, binary.Location);
}
private CType? AnalyzeUnaryExpression(UnaryExpressionNode unary)
{
var operandType = AnalyzeExpression(unary.Expression);
if (operandType == null)
{
return null;
}
return _typeChecker.GetUnaryResultType(unary.Operator, operandType, unary.Location);
}
private CType? AnalyzeAssignmentExpression(AssignmentExpressionNode assignment)
{
var rightType = AnalyzeExpression(assignment.Right);
if (assignment.Left is IdentifierExpressionNode identifier)
{
var symbol = _symbolTable.Lookup(identifier.Name);
if (symbol == null)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Undeclared variable '{identifier.Name}'",
identifier.Location
));
return rightType;
}
if (symbol is VariableSymbol varSymbol && rightType != null)
{
CheckAssignmentCompatibility(varSymbol.Type, rightType, assignment.Location);
}
}
return rightType;
}
private CType? AnalyzeFunctionCallExpression(FunctionCallExpressionNode call)
{
var symbol = _symbolTable.Lookup(call.FunctionName);
if (symbol == null)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Call to undeclared function '{call.FunctionName}'",
call.Location
));
return IntType.Instance; // 默认返回 int
}
if (symbol is not FunctionSymbol funcSymbol)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"'{call.FunctionName}' is not a function",
call.Location
));
return null;
}
// 检查参数数量
if (call.Arguments.Count != funcSymbol.Parameters.Count)
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Function '{call.FunctionName}' expects {funcSymbol.Parameters.Count} arguments, but {call.Arguments.Count} were provided",
call.Location
));
return funcSymbol.ReturnType;
}
// 检查参数类型
for (int i = 0; i < call.Arguments.Count; i++)
{
var argType = AnalyzeExpression(call.Arguments[i]);
var expectedType = ParseType(funcSymbol.Parameters[i].Type);
if (argType != null)
{
CheckAssignmentCompatibility(expectedType, argType, call.Arguments[i].Location);
}
}
return funcSymbol.ReturnType;
}
private void CheckAssignmentCompatibility(CType targetType, CType sourceType, SourceLocation location)
{
if (!_typeChecker.IsAssignable(targetType, sourceType))
{
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Cannot assign '{sourceType}' to '{targetType}'",
location
));
}
}
private CType ParseType(TypeNode typeNode)
{
return typeNode switch
{
PrimitiveTypeNode p => p.TypeName.ToLower() switch
{
"int" => IntType.Instance,
"char" => CharType.Instance,
"float" => FloatType.Instance,
"double" => DoubleType.Instance,
"long" => LongType.Instance,
"short" => ShortType.Instance,
"void" => VoidType.Instance,
_ => IntType.Instance // 默认
},
PointerTypeNode p => new PointerType(ParseType(p.BaseType)),
ArrayTypeNode a => new ArrayType(ParseType(a.ElementType), a.Size),
_ => IntType.Instance
};
}
private bool IsIntegerType(CType type)
{
return type is IntType or CharType or LongType or ShortType;
}
}
/// <summary>
/// 符号表
/// </summary>
public sealed class SymbolTable
{
private readonly IErrorReporter _errorReporter;
private readonly List<Dictionary<string, Symbol>> _scopes = new();
public SymbolTable(IErrorReporter errorReporter)
{
_errorReporter = errorReporter;
EnterScope(); // 创建全局作用域
}
public void EnterScope()
{
_scopes.Add(new Dictionary<string, Symbol>());
}
public void ExitScope()
{
if (_scopes.Count > 1)
{
_scopes.RemoveAt(_scopes.Count - 1);
}
}
public void AddSymbol(string name, Symbol symbol)
{
_scopes[^1][name] = symbol;
}
public Symbol? Lookup(string name)
{
for (int i = _scopes.Count - 1; i >= 0; i--)
{
if (_scopes[i].TryGetValue(name, out var symbol))
{
return symbol;
}
}
return null;
}
public bool CurrentScopeExists(string name)
{
return _scopes.Count > 0 && _scopes[^1].ContainsKey(name);
}
}
/// <summary>
/// 符号基类
/// </summary>
public abstract record Symbol(string Name);
/// <summary>
/// 变量符号
/// </summary>
public sealed record VariableSymbol(string Name, CType Type) : Symbol(Name);
/// <summary>
/// 函数符号
/// </summary>
public sealed record FunctionSymbol(
string Name,
FunctionType Type,
List<ParameterNode> Parameters,
TypeNode ReturnTypeNode
) : Symbol(Name)
{
public CType ReturnType => Type.ReturnType;
}
/// <summary>
/// 类型检查器
/// </summary>
public sealed class TypeChecker
{
private readonly IErrorReporter _errorReporter;
public TypeChecker(IErrorReporter errorReporter)
{
_errorReporter = errorReporter;
}
/// <summary>
/// 检查类型是否可赋值
/// </summary>
public bool IsAssignable(CType targetType, CType sourceType)
{
// 相同类型
if (targetType.Equals(sourceType))
{
return true;
}
// void* 可以接受任何指针
if (targetType is PointerType { BaseType: VoidType } && sourceType is PointerType)
{
return true;
}
// 整数提升
if (IsIntegerType(targetType) && IsIntegerType(sourceType))
{
return true;
}
// 浮点提升
if (targetType is DoubleType && sourceType is FloatType)
{
return true;
}
// int 到 double
if (targetType is DoubleType && sourceType is IntType)
{
return true;
}
return false;
}
/// <summary>
/// 获取二元运算结果类型
/// </summary>
public CType? GetBinaryResultType(TokenType op, CType leftType, CType rightType, SourceLocation location)
{
// 算术运算
if (op is TokenType.Plus or TokenType.Minus or TokenType.Star or TokenType.Slash or TokenType.Percent)
{
if (IsNumericType(leftType) && IsNumericType(rightType))
{
// 类型提升
if (leftType is DoubleType || rightType is DoubleType)
return DoubleType.Instance;
if (leftType is FloatType || rightType is FloatType)
return FloatType.Instance;
if (leftType is LongType || rightType is LongType)
return LongType.Instance;
return IntType.Instance;
}
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Invalid operands to binary operator '{GetOperatorString(op)}'",
location
));
return null;
}
// 比较运算
if (op is TokenType.Equal or TokenType.NotEqual or TokenType.Less or TokenType.Greater
or TokenType.LessEqual or TokenType.GreaterEqual)
{
if (IsComparableType(leftType, rightType))
{
return IntType.Instance;
}
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Invalid operands to comparison operator '{GetOperatorString(op)}'",
location
));
return null;
}
// 逻辑运算
if (op is TokenType.And or TokenType.Or)
{
return IntType.Instance;
}
// 位运算
if (op is TokenType.BitAnd or TokenType.BitOr or TokenType.BitXor
or TokenType.LeftShift or TokenType.RightShift)
{
if (IsIntegerType(leftType) && IsIntegerType(rightType))
{
return IntType.Instance;
}
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Invalid operands to bitwise operator '{GetOperatorString(op)}'",
location
));
return null;
}
return IntType.Instance;
}
/// <summary>
/// 获取一元运算结果类型
/// </summary>
public CType? GetUnaryResultType(TokenType op, CType operandType, SourceLocation location)
{
switch (op)
{
case TokenType.Minus: // 负号
if (IsNumericType(operandType))
{
return operandType;
}
break;
case TokenType.Not: // 逻辑非
return IntType.Instance;
case TokenType.BitNot: // 按位非
if (IsIntegerType(operandType))
{
return operandType;
}
break;
}
_errorReporter.Report(new ErrorInfo(
ErrorLevel.Error,
$"Invalid operand to unary operator '{GetOperatorString(op)}'",
location
));
return null;
}
private bool IsNumericType(CType type)
{
return type is IntType or CharType or LongType or ShortType or FloatType or DoubleType;
}
private bool IsIntegerType(CType type)
{
return type is IntType or CharType or LongType or ShortType;
}
private bool IsComparableType(CType left, CType right)
{
// 数字类型之间可以比较
if (IsNumericType(left) && IsNumericType(right))
{
return true;
}
// 指针之间可以比较
if (left is PointerType && right is PointerType)
{
return true;
}
return false;
}
private string GetOperatorString(TokenType op)
{
return op switch
{
TokenType.Plus => "+",
TokenType.Minus => "-",
TokenType.Star => "*",
TokenType.Slash => "/",
TokenType.Percent => "%",
TokenType.Equal => "==",
TokenType.NotEqual => "!=",
TokenType.Less => "<",
TokenType.Greater => ">",
TokenType.LessEqual => "<=",
TokenType.GreaterEqual => ">=",
TokenType.And => "&&",
TokenType.Or => "||",
TokenType.BitAnd => "&",
TokenType.BitOr => "|",
TokenType.BitXor => "^",
TokenType.LeftShift => "<<",
TokenType.RightShift => ">>",
TokenType.Not => "!",
TokenType.BitNot => "~",
_ => op.ToString()
};
}
}
/// <summary>
/// 类型系统
/// </summary>
public abstract record CType
{
public abstract string Name { get; }
}
public sealed record VoidType : CType
{
public static readonly VoidType Instance = new();
public override string Name => "void";
}
public sealed record IntType : CType
{
public static readonly IntType Instance = new();
public override string Name => "int";
}
public sealed record CharType : CType
{
public static readonly CharType Instance = new();
public override string Name => "char";
}
public sealed record FloatType : CType
{
public static readonly FloatType Instance = new();
public override string Name => "float";
}
public sealed record DoubleType : CType
{
public static readonly DoubleType Instance = new();
public override string Name => "double";
}
public sealed record LongType : CType
{
public static readonly LongType Instance = new();
public override string Name => "long";
}
public sealed record ShortType : CType
{
public static readonly ShortType Instance = new();
public override string Name => "short";
}
public sealed record PointerType(CType BaseType) : CType
{
public override string Name => $"{BaseType.Name}*";
}
public sealed record ArrayType(CType ElementType, int Size) : CType
{
public override string Name => $"{ElementType.Name}[{Size}]";
}
public sealed record FunctionType(CType ReturnType, List<CType> ParameterTypes) : CType
{
public override string Name => $"{ReturnType.Name}(...)";
}

View File

@@ -0,0 +1,78 @@
namespace TinyCC.Core;
/// <summary>
/// ELF 可执行文件生成器
/// </summary>
public sealed class ElfWriter
{
/// <summary>
/// 生成 ELF 可执行文件
/// </summary>
public byte[] WriteExecutable(byte[] machineCode, string entryPoint)
{
using var stream = new MemoryStream();
var writer = new BinaryWriter(stream);
// ELF 头
WriteElfHeader(writer, machineCode.Length);
// 程序头
var programHeaderOffset = 64; // ELF 头大小
WriteProgramHeader(writer, programHeaderOffset, machineCode.Length);
// 对齐到页面大小
var codeOffset = (int)stream.Position;
var padding = 0x1000 - (codeOffset % 0x1000);
if (padding < 0x1000)
{
stream.Write(new byte[padding]);
}
codeOffset = (int)stream.Position;
// 代码段
stream.Write(machineCode, 0, machineCode.Length);
// 更新程序头中的偏移
stream.Position = programHeaderOffset + 8; // p_offset 字段
writer.Write(codeOffset);
return stream.ToArray();
}
private void WriteElfHeader(BinaryWriter writer, int codeSize)
{
// e_ident
writer.Write(new byte[] { 0x7F, (byte)'E', (byte)'L', (byte)'F' }); // Magic
writer.Write((byte)2); // EI_CLASS: 64-bit
writer.Write((byte)1); // EI_DATA: Little endian
writer.Write((byte)1); // EI_VERSION: Current
writer.Write((byte)0); // EI_OSABI: System V
writer.Write(new byte[8]); // EI_PAD
writer.Write((ushort)2); // e_type: ET_EXEC (Executable)
writer.Write((ushort)0x3E); // e_machine: EM_X86_64
writer.Write((uint)1); // e_version: Current
writer.Write((ulong)0x400000); // e_entry: Entry point (0x400000)
writer.Write((ulong)64); // e_phoff: Program header offset
writer.Write((ulong)0); // e_shoff: Section header offset (none)
writer.Write((uint)0); // e_flags
writer.Write((ushort)64); // e_ehsize: ELF header size
writer.Write((ushort)56); // e_phentsize: Program header entry size
writer.Write((ushort)1); // e_phnum: 1 program header
writer.Write((ushort)0); // e_shentsize
writer.Write((ushort)0); // e_shnum
writer.Write((ushort)0); // e_shstrndx
}
private void WriteProgramHeader(BinaryWriter writer, int codeOffset, int codeSize)
{
writer.Write((uint)1); // p_type: PT_LOAD
writer.Write((uint)5); // p_flags: PF_R | PF_X
writer.Write((ulong)0x400000); // p_offset: Will be updated later
writer.Write((ulong)0x400000); // p_vaddr
writer.Write((ulong)0x400000); // p_paddr
writer.Write((ulong)codeSize); // p_filesz
writer.Write((ulong)codeSize); // p_memsz
writer.Write((ulong)0x1000); // p_align: Page alignment
}
}

View File

@@ -0,0 +1,257 @@
using System;
using System.IO;
using System.Text;
namespace TinyCC.Core;
/// <summary>
/// PE 可执行文件生成器Windows
/// </summary>
public sealed class PeWriter
{
private const ushort DosSignature = 0x5A4D; // MZ
private const uint PeSignature = 0x00004550; // PE\0\0
private const ushort MachineAmd64 = 0x8664;
private const ushort SectionCount = 2; // .text 和 .data
private const ushort DllCharacteristics = 0x8160;
private const uint SubsystemConsole = 3;
/// <summary>
/// 生成 PE 可执行文件
/// </summary>
public byte[] WriteExecutable(byte[] codeSection, byte[]? dataSection, string entryPoint)
{
using var stream = new MemoryStream();
var writer = new BinaryWriter(stream);
// DOS 头
WriteDosHeader(writer);
// DOS Stub
WriteDosStub(writer);
// PE 签名
var peSignatureOffset = (int)stream.Position;
writer.Write(PeSignature);
// COFF 头
var coffHeaderOffset = (int)stream.Position;
WriteCoffHeader(writer, codeSection, dataSection);
// 可选头
var optionalHeaderOffset = (int)stream.Position;
WriteOptionalHeader(writer, codeSection, dataSection);
// 节表
var sectionTableOffset = (int)stream.Position;
WriteSectionTable(writer, codeSection, dataSection);
// 对齐到文件偏移
var fileAlignment = 512;
var currentPos = (int)stream.Position;
var paddingToFirstSection = 512 - (currentPos % 512);
if (paddingToFirstSection < 512)
{
stream.Write(new byte[paddingToFirstSection]);
}
// 代码节
var textFileOffset = (int)stream.Position;
stream.Write(codeSection, 0, codeSection.Length);
// 数据节
var dataFileOffset = 0;
if (dataSection != null && dataSection.Length > 0)
{
// 对齐到文件边界
var dataPadding = 512 - (codeSection.Length % 512);
if (dataPadding < 512)
{
stream.Write(new byte[dataPadding]);
}
dataFileOffset = (int)stream.Position;
stream.Write(dataSection, 0, dataSection.Length);
}
// 返回完整 PE 文件
return stream.ToArray();
}
private void WriteDosHeader(BinaryWriter writer)
{
writer.Write(DosSignature); // e_magic: MZ
writer.Write((ushort)0); // e_cblp
writer.Write((ushort)0); // e_cp
writer.Write((ushort)0); // e_crlc
writer.Write((ushort)0); // e_cparhdr
writer.Write((ushort)0); // e_minalloc
writer.Write((ushort)0xFFFF); // e_maxalloc
writer.Write((ushort)0); // e_ss
writer.Write((ushort)0); // e_sp
writer.Write((ushort)0); // e_csum
writer.Write((ushort)0); // e_ip
writer.Write((ushort)0); // e_cs
writer.Write((ushort)0x40); // e_lfarlc: DOS stub 偏移
writer.Write((ushort)0); // e_ovno
writer.Write(new byte[8]); // e_res
writer.Write((ushort)0); // e_oemid
writer.Write((ushort)0); // e_oeminfo
writer.Write(new byte[20]); // e_res2
writer.Write(0x80); // e_lfanew: PE 头偏移
}
private void WriteDosStub(BinaryWriter writer)
{
// 简单的 DOS stub: "This program cannot be run in DOS mode.\r\n\r\n$"
var stub = Encoding.ASCII.GetBytes("This program cannot be run in DOS mode.\r\n\r\n$");
writer.Write(stub);
// 填充到 0x80 字节
var currentPos = writer.BaseStream.Position;
var padding = 0x80 - currentPos;
if (padding > 0)
{
writer.Write(new byte[padding]);
}
}
private void WriteCoffHeader(BinaryWriter writer, byte[] codeSection, byte[]? dataSection)
{
writer.Write(MachineAmd64); // Machine: AMD64
var numberOfSections = dataSection != null && dataSection.Length > 0 ? (ushort)2 : (ushort)1;
writer.Write(numberOfSections);
writer.Write((uint)0); // TimeDateStamp (可设置为实际时间戳)
writer.Write((uint)0); // PointerToSymbolTable
writer.Write((uint)0); // NumberOfSymbols
writer.Write((ushort)(96)); // SizeOfOptionalHeader (PE32+ 标准大小)
writer.Write((ushort)0x0022); // Characteristics: EXECUTABLE_IMAGE | LARGE_ADDRESS_AWARE
}
private void WriteOptionalHeader(BinaryWriter writer, byte[] codeSection, byte[]? dataSection)
{
// PE32+ 魔数
writer.Write((ushort)0x020B); // Magic: PE32+
writer.Write((byte)14); // MajorLinkerVersion
writer.Write((byte)0); // MinorLinkerVersion
// 代码大小
writer.Write(AlignUp(codeSection.Length, 512)); // SizeOfCode
var dataSize = dataSection != null ? AlignUp(dataSection.Length, 512) : 0;
writer.Write(dataSize); // SizeOfInitializedData
writer.Write((uint)0); // SizeOfUninitializedData
// 入口点 RVA
var baseAddress = 0x00400000UL;
var textRva = 0x1000U;
writer.Write(textRva); // AddressOfEntryPoint
// 代码基址
writer.Write(textRva); // BaseOfCode
// PE32+ 特有字段
writer.Write((ulong)baseAddress); // ImageBase (64-bit)
writer.Write((uint)0x1000); // SectionAlignment
writer.Write((uint)0x200); // FileAlignment
writer.Write((ushort)0); // MajorOperatingSystemVersion
writer.Write((ushort)0); // MinorOperatingSystemVersion
writer.Write((ushort)0); // MajorImageVersion
writer.Write((ushort)0); // MinorImageVersion
// 子系统版本
writer.Write((ushort)6); // MajorSubsystemVersion (Windows Vista+)
writer.Write((ushort)0); // MinorSubsystemVersion
writer.Write((uint)0); // Win32VersionValue
// 镜像大小
var imageSize = 0x1000 + (uint)AlignUp(codeSection.Length, 0x1000);
if (dataSize > 0)
{
imageSize += (uint)AlignUp(dataSize, 0x1000);
}
writer.Write(imageSize); // SizeOfImage
writer.Write((uint)0x200); // SizeOfHeaders
writer.Write((uint)0); // CheckSum
writer.Write((ushort)SubsystemConsole); // Subsystem: Console
writer.Write((ushort)DllCharacteristics); // DllCharacteristics
// 栈和堆大小 (64-bit)
writer.Write((ulong)0x100000); // SizeOfStackReserve
writer.Write((ulong)0x1000); // SizeOfStackCommit
writer.Write((ulong)0x100000); // SizeOfHeapReserve
writer.Write((ulong)0x1000); // SizeOfHeapCommit
writer.Write((uint)0); // LoaderFlags
writer.Write((uint)16); // NumberOfRvaAndSizes
// 数据目录 (16 项)
for (int i = 0; i < 16; i++)
{
writer.Write((uint)0); // VirtualAddress
writer.Write((uint)0); // Size
}
}
private void WriteSectionTable(BinaryWriter writer, byte[] codeSection, byte[]? dataSection)
{
// .text 节
WriteSectionHeader(writer, ".text",
codeSection.Length,
0x1000, // VirtualAddress
AlignUp(codeSection.Length, 512),
0x1000, // PointerToRawData
0x60000020); // Characteristics: CODE | EXECUTE | READ
// .data 节 (如果有)
if (dataSection != null && dataSection.Length > 0)
{
var textRawSize = AlignUp(codeSection.Length, 512);
WriteSectionHeader(writer, ".data",
dataSection.Length,
0x2000, // VirtualAddress
AlignUp(dataSection.Length, 512),
0x1000 + (uint)textRawSize, // PointerToRawData
0xC0000040); // Characteristics: INITIALIZED_DATA | READ | WRITE
}
}
private void WriteSectionHeader(BinaryWriter writer, string name, int virtualSize,
uint virtualAddress, int rawSize, uint rawAddress, uint characteristics)
{
// 节名 (8 字节)
var nameBytes = Encoding.ASCII.GetBytes(name);
var nameBuffer = new byte[8];
Array.Copy(nameBytes, nameBuffer, Math.Min(nameBytes.Length, 8));
writer.Write(nameBuffer);
writer.Write(virtualSize); // VirtualSize
writer.Write(virtualAddress); // VirtualAddress
writer.Write(rawSize); // SizeOfRawData
writer.Write(rawAddress); // PointerToRawData
writer.Write((uint)0); // PointerToRelocations
writer.Write((uint)0); // PointerToLinenumbers
writer.Write((ushort)0); // NumberOfRelocations
writer.Write((ushort)0); // NumberOfLinenumbers
writer.Write(characteristics); // Characteristics
}
private static int AlignUp(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
}
// PE 可选头结构大小计算
internal struct OptionalHeader64
{
// 仅用于 sizeof 计算
public ushort Magic;
public byte MajorLinkerVersion;
public byte MinorLinkerVersion;
// ... 其他字段
}

View File

@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
</Project>

BIN
test_output Normal file

Binary file not shown.

View File

@@ -0,0 +1,238 @@
using System.Diagnostics;
using System.Text;
using TinyCC.Core;
namespace TinyCC.E2ETests;
/// <summary>
/// 端到端测试运行器
/// 负责编译 C 源代码并执行生成的 ELF 文件
/// </summary>
public sealed class E2ETestRunner
{
private readonly string _tempDirectory;
public E2ETestRunner(string? tempDirectory = null)
{
_tempDirectory = tempDirectory ?? "/tmp/tinycc-debug"; // 固定路径便于调试
Directory.CreateDirectory(_tempDirectory);
}
/// <summary>
/// 运行单个测试用例
/// </summary>
public async Task<TestResult> RunTestAsync(TestCase testCase)
{
var sourceFile = Path.Combine(_tempDirectory, $"{testCase.Name}.c");
var outputFile = Path.Combine(_tempDirectory, $"{testCase.Name}.out");
try
{
// 写入源代码
await File.WriteAllTextAsync(sourceFile, testCase.SourceCode);
// 编译
var compileResult = Compile(sourceFile, outputFile);
if (!compileResult.Success)
{
return new TestResult(
testCase.Name,
false,
-1,
null,
$"编译失败: {compileResult.Error}"
);
}
// 执行
var executeResult = await ExecuteAsync(outputFile);
// 调试:如果失败,打印 objdump
if (executeResult.ExitCode != testCase.ExpectedExitCode)
{
try
{
var psi = new ProcessStartInfo("objdump", $"-d -M intel \"{outputFile}\"")
{
RedirectStandardOutput = true,
UseShellExecute = false
};
using var p = Process.Start(psi)!;
var dump = await p.StandardOutput.ReadToEndAsync();
Console.WriteLine($"[DEBUG] objdump:\n{dump}");
}
catch { }
}
// 验证结果
bool passed = executeResult.ExitCode == testCase.ExpectedExitCode;
if (testCase.ExpectedOutput != null && executeResult.Output != testCase.ExpectedOutput)
{
passed = false;
}
return new TestResult(
testCase.Name,
passed,
executeResult.ExitCode,
executeResult.Output,
passed ? null : $"期望退出码 {testCase.ExpectedExitCode},实际 {executeResult.ExitCode}"
);
}
catch (Exception ex)
{
return new TestResult(
testCase.Name,
false,
-1,
null,
$"测试执行异常: {ex.Message}"
);
}
}
private CompilationResult Compile(string sourceFile, string outputFile)
{
try
{
var errorReporter = new ErrorReporter();
var driver = new CompilerDriver(errorReporter);
var options = new CompilationOptions(
SourceFile: sourceFile,
OutputFile: outputFile,
Platform: TargetPlatform.LinuxX64
);
var result = driver.Compile(options);
if (errorReporter.HasErrors)
{
var errors = string.Join("\n", errorReporter.GetErrors().Select(e => e.ToString()));
return new CompilationResult(false, errors);
}
if (!result.Success)
{
return new CompilationResult(false, result.Message);
}
return new CompilationResult(true, null);
}
catch (Exception ex)
{
return new CompilationResult(false, ex.Message);
}
}
private async Task<ExecutionResult> ExecuteAsync(string executablePath)
{
try
{
// 设置执行权限
var chmod = Process.Start(new ProcessStartInfo
{
FileName = "chmod",
Arguments = $"+x \"{executablePath}\"",
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false
});
if (chmod != null)
{
await chmod.WaitForExitAsync();
}
var psi = new ProcessStartInfo
{
FileName = executablePath,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
};
using var process = Process.Start(psi)
?? throw new InvalidOperationException($"无法启动进程: {executablePath}");
var outputBuilder = new StringBuilder();
var errorBuilder = new StringBuilder();
process.OutputDataReceived += (sender, e) =>
{
if (e.Data != null)
{
outputBuilder.AppendLine(e.Data);
}
};
process.ErrorDataReceived += (sender, e) =>
{
if (e.Data != null)
{
errorBuilder.AppendLine(e.Data);
}
};
process.BeginOutputReadLine();
process.BeginErrorReadLine();
await process.WaitForExitAsync();
var output = outputBuilder.ToString().TrimEnd();
var error = errorBuilder.ToString().TrimEnd();
return new ExecutionResult(
process.ExitCode,
string.IsNullOrEmpty(output) ? error : output
);
}
catch (Exception ex)
{
return new ExecutionResult(-1, $"执行失败: {ex.Message}");
}
}
public void Cleanup()
{
// 调试期间保留文件
}
}
/// <summary>
/// 测试用例
/// </summary>
public record TestCase(
string Name,
string SourceCode,
int ExpectedExitCode,
string? ExpectedOutput = null
);
/// <summary>
/// 测试结果
/// </summary>
public record TestResult(
string TestCaseName,
bool Passed,
int ActualExitCode,
string? ActualOutput,
string? ErrorMessage
);
/// <summary>
/// 编译结果
/// </summary>
public record CompilationResult(
bool Success,
string? Error
);
/// <summary>
/// 执行结果
/// </summary>
public record ExecutionResult(
int ExitCode,
string Output
);

View File

@@ -0,0 +1,58 @@
using Xunit;
using Xunit.Abstractions;
namespace TinyCC.E2ETests;
public class E2ETests : IAsyncLifetime
{
private readonly ITestOutputHelper _output;
private readonly E2ETestRunner _runner;
public E2ETests(ITestOutputHelper output)
{
_output = output;
_runner = new E2ETestRunner();
}
public Task InitializeAsync() => Task.CompletedTask;
public Task DisposeAsync()
{
_runner.Cleanup();
return Task.CompletedTask;
}
[Theory]
[MemberData(nameof(GetBasicTestCases))]
public async Task BasicFeature_ShouldCompileAndRun(TestCase testCase)
{
// Act
var result = await _runner.RunTestAsync(testCase);
// Assert
_output.WriteLine($"测试: {testCase.Name}");
_output.WriteLine($"源代码:\n{testCase.SourceCode}");
_output.WriteLine($"结果: {(result.Passed ? "" : "")}");
if (!result.Passed)
{
_output.WriteLine($"错误: {result.ErrorMessage}");
_output.WriteLine($"实际退出码: {result.ActualExitCode}");
if (result.ActualOutput != null)
{
_output.WriteLine($"实际输出: {result.ActualOutput}");
}
}
Assert.True(result.Passed, result.ErrorMessage);
}
public static TheoryData<TestCase> GetBasicTestCases()
{
var data = new TheoryData<TestCase>();
foreach (var testCase in TestCases.GetBasicTests())
{
data.Add(testCase);
}
return data;
}
}

View File

@@ -0,0 +1,231 @@
namespace TinyCC.E2ETests;
/// <summary>
/// 端到端测试用例集合
/// </summary>
public static class TestCases
{
/// <summary>
/// 获取所有基础测试用例
/// </summary>
public static IEnumerable<TestCase> GetBasicTests()
{
// 最简单的测试:返回常量
yield return new TestCase(
Name: "simple_return_zero",
SourceCode: """
int main() {
return 0;
}
""",
ExpectedExitCode: 0
);
// 返回常量 42
yield return new TestCase(
Name: "simple_return_42",
SourceCode: """
int main() {
return 42;
}
""",
ExpectedExitCode: 42
);
// 算术运算测试 - 使用 if-else 替代三元运算符
yield return new TestCase(
Name: "arithmetic_add",
SourceCode: """
int add(int a, int b) {
return a + b;
}
int main() {
int result;
result = add(3, 4);
if (result == 7) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 控制流测试 - for 循环
yield return new TestCase(
Name: "control_flow_for_loop",
SourceCode: """
int main() {
int sum;
int i;
sum = 0;
for (i = 1; i <= 10; i = i + 1) {
sum = sum + i;
}
if (sum == 55) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 控制流测试 - while 循环
yield return new TestCase(
Name: "control_flow_while_loop",
SourceCode: """
int main() {
int sum;
int i;
sum = 0;
i = 1;
while (i <= 10) {
sum = sum + i;
i = i + 1;
}
if (sum == 55) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 函数调用测试
yield return new TestCase(
Name: "function_call",
SourceCode: """
int multiply(int a, int b) {
return a * b;
}
int main() {
int result;
result = multiply(6, 7);
if (result == 42) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 条件分支测试
yield return new TestCase(
Name: "conditional_branch",
SourceCode: """
int max(int a, int b) {
if (a > b) {
return a;
} else {
return b;
}
}
int main() {
int result;
result = max(10, 20);
if (result == 20) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 变量赋值测试
yield return new TestCase(
Name: "variable_assignment",
SourceCode: """
int main() {
int x;
x = 42;
if (x == 42) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 递归函数测试
yield return new TestCase(
Name: "recursive_factorial",
SourceCode: """
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int main() {
int result;
result = factorial(5);
if (result == 120) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
// 局部变量作用域测试
yield return new TestCase(
Name: "local_variable_scope",
SourceCode: """
int main() {
int x;
x = 10;
if (x > 5) {
int y;
y = 20;
x = y;
}
if (x == 20) {
return 0;
}
return 1;
}
""",
ExpectedExitCode: 0
);
}
/// <summary>
/// 获取预期失败的测试用例(用于记录当前不支持的功能)
/// </summary>
public static IEnumerable<TestCase> GetKnownFailures()
{
// 指针测试 - 当前可能不支持
yield return new TestCase(
Name: "pointers_basic",
SourceCode: """
int main() {
int x = 42;
int *p = &x;
return *p == 42 ? 0 : 1;
}
""",
ExpectedExitCode: 0
);
// 数组测试 - 当前可能不支持
yield return new TestCase(
Name: "arrays_basic",
SourceCode: """
int main() {
int arr[3];
arr[0] = 1;
arr[1] = 2;
arr[2] = 3;
return arr[1] == 2 ? 0 : 1;
}
""",
ExpectedExitCode: 0
);
}
}

View File

@@ -0,0 +1,27 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="coverlet.collector" Version="6.0.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="xunit" Version="2.5.3" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.3" />
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\TinyCC.Core\TinyCC.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,27 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="coverlet.collector" Version="6.0.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="xunit" Version="2.5.3" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.3" />
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\TinyCC.Core\TinyCC.Core.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,10 @@
namespace TinyCC.Tests;
public class UnitTest1
{
[Fact]
public void Test1()
{
}
}

View File

@@ -0,0 +1,86 @@
using TinyCC.Core;
namespace TinyCC.Tests;
public class LexerTests
{
private readonly IErrorReporter _errorReporter;
public LexerTests()
{
_errorReporter = new ErrorReporter();
}
[Fact]
public void Tokenize_SimpleExpression_ReturnsTokens()
{
var source = "int x = 3 + 4;";
var lexer = new Lexer(source, "test.c", _errorReporter);
var tokens = lexer.Tokenize().ToList();
Assert.False(_errorReporter.HasErrors);
Assert.Contains(tokens, t => t.Type == TokenType.Int);
Assert.Contains(tokens, t => t.Type == TokenType.Identifier && t.Lexeme == "x");
Assert.Contains(tokens, t => t.Type == TokenType.Assign);
Assert.Contains(tokens, t => t.Type == TokenType.IntLiteral && Convert.ToInt64(t.Value!) == 3);
Assert.Contains(tokens, t => t.Type == TokenType.IntLiteral && Convert.ToInt64(t.Value!) == 4);
Assert.Contains(tokens, t => t.Type == TokenType.Plus);
Assert.Contains(tokens, t => t.Type == TokenType.Semicolon);
}
[Fact]
public void Tokenize_FunctionDefinition_ReturnsTokens()
{
var source = "int add(int a, int b) { return a + b; }";
var lexer = new Lexer(source, "test.c", _errorReporter);
var tokens = lexer.Tokenize().ToList();
Assert.False(_errorReporter.HasErrors);
Assert.Contains(tokens, t => t.Type == TokenType.Int);
Assert.Contains(tokens, t => t.Type == TokenType.Identifier && t.Lexeme == "add");
Assert.Contains(tokens, t => t.Type == TokenType.LeftParen);
Assert.Contains(tokens, t => t.Type == TokenType.RightParen);
Assert.Contains(tokens, t => t.Type == TokenType.LeftBrace);
Assert.Contains(tokens, t => t.Type == TokenType.Return);
Assert.Contains(tokens, t => t.Type == TokenType.RightBrace);
}
[Fact]
public void Tokenize_SkipsComments()
{
var source = "int x; // this is a comment\nint y;";
var lexer = new Lexer(source, "test.c", _errorReporter);
var tokens = lexer.Tokenize().ToList();
Assert.False(_errorReporter.HasErrors);
var identifiers = tokens.Where(t => t.Type == TokenType.Identifier).ToList();
Assert.Equal(2, identifiers.Count);
Assert.Equal("x", identifiers[0].Lexeme);
Assert.Equal("y", identifiers[1].Lexeme);
}
}
public class ParserTests
{
private readonly IErrorReporter _errorReporter;
public ParserTests()
{
_errorReporter = new ErrorReporter();
}
[Fact]
public void Parse_SimpleFunction_ReturnsAst()
{
var source = "int add(int a, int b) { return a + b; }";
var lexer = new Lexer(source, "test.c", _errorReporter);
var tokens = lexer.Tokenize().ToList();
var parser = new Parser(tokens, _errorReporter);
var ast = parser.Parse();
Assert.False(_errorReporter.HasErrors);
Assert.NotNull(ast);
Assert.IsType<ProgramNode>(ast);
}
}