mini-dog-c编译器开发 - 05 解释器与代码生成

原创于 2026-05-04 20:11:16 发布 · 334 阅读

6 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#c语言 #编译器

编译器专栏收录该内容

6 篇文章

订阅专栏

本篇为 mini-dog-c 编译器开发系列第五篇，介绍解释器的实现，包括环境管理、表达式求值和函数调用。

1. 解释器 vs 编译器

很多人容易混淆"解释器"和"编译器"，其实区别很简单：

编译器：把源代码翻译成另一种形式（通常是机器码或字节码），然后由硬件或虚拟机执行
解释器：直接遍历源代码的中间表示（这里是 AST），一边分析一边执行，不生成额外的目标代码

mini-dog-c 的"编译"流程实际上是：源代码 → Token → AST → 解释执行，没有生成机器码或字节码这一步。

2. 值（Value）系统

解释器需要一种运行时表示数据的方式。mini-dog-c 定义了一个 Value 类型：

typedef enum {
    VALUE_INT,       // 整数
    VALUE_DOUBLE,    // 浮点数
    VALUE_CHAR,      // 字符
    VALUE_BOOL,      // 布尔
    VALUE_STRING,    // 字符串
    VALUE_FUNCTION,  // 函数
    VALUE_NULL,      // 空值
} ValueType;

typedef struct {
    ValueType type;
    union {
        int int_value;
        double double_value;
        char char_value;
        bool bool_value;
        struct {
            char *string;
            int length;
        } string_value;
        struct {
            char *name;
            char **params;
            int param_count;
            ASTNode *body;
            void *closure;
        } function;
    } data;
} Value;

每种字面量类型都对应一个 value_create_* 函数：

Value *value_create_int(int v) {
    Value *value = (Value *)malloc(sizeof(Value));
    value->type = VALUE_INT;
    value->data.int_value = v;
    return value;
}

Value *value_create_string(const char *str) {
    Value *value = (Value *)malloc(sizeof(Value));
    value->type = VALUE_STRING;
    value->data.string_value.string = strdup_custom(str);
    value->data.string_value.length = strlen(str);
    return value;
}

3. 环境（Environment）

变量需要存储在哪里？答案是环境。环境是一个作用域概念，保存了变量名到值的映射：

struct Env {
    Env *parent;                  // 父作用域（用于闭包）
    char *names[ENV_VAR_MAX];    // 变量名
    Value *values[ENV_VAR_MAX];   // 对应的值
    int count;                   // 当前作用域变量数量
};
typedef struct Env Env;

全局环境：解释器初始化时创建一个全局环境，用于存储全局变量和函数定义。

局部环境：函数调用时创建一个新的局部环境，父环境指向调用者的环境：

Env *env_create(Env *parent) {
    Env *env = (Env *)malloc(sizeof(Env));
    env->parent = parent;
    env->count = 0;
    return env;
}

bool env_define(Env *env, const char *name, Value *value) {
    env->names[env->count] = strdup_custom(name);
    env->values[env->count] = value;
    env->count++;
    return true;
}

Value *env_get(Env *env, const char *name) {
    for (int i = 0; i < env->count; i++)
        if (strcmp(env->names[i], name) == 0)
            return env->values[i];
    if (env->parent) return env_get(env->parent, name);  // 向上查找
    return NULL;
}

bool env_set(Env *env, const char *name, Value *value) {
    for (int i = 0; i < env->count; i++)
        if (strcmp(env->names[i], name) == 0) {
            env->values[i] = value;
            return true;
        }
    return env_define(env, name, value);  // 未找到则定义到当前作用域
}

4. 二元表达式求值

二元表达式的求值模式很固定：先求左操作数，再求右操作数，然后根据操作符类型计算结果：

static Value *eval_binary(Evaluator *eval, Env *env, TokenType op,
                          ASTNode *left_node, ASTNode *right_node) {
    Value *left  = eval_expression(eval, env, left_node);
    Value *right = eval_expression(eval, env, right_node);

    Value *result = NULL;

    switch (op) {
        case TOKEN_PLUS:
            if (left->type == VALUE_STRING && right->type == VALUE_STRING) {
                // 字符串连接
                size_t len = strlen(left->data.string_value.string)
                           + strlen(right->data.string_value.string) + 1;
                char *buf = (char *)malloc(len);
                strcpy(buf, left->data.string_value.string);
                strcat(buf, right->data.string_value.string);
                result = value_create_string(buf);
                free(buf);
            } else if (left->type == VALUE_INT && right->type == VALUE_INT) {
                result = value_create_int(left->data.int_value + right->data.int_value);
            } else {
                double lv = left->type == VALUE_INT ? left->data.int_value
                                                    : left->data.double_value;
                double rv = right->type == VALUE_INT ? right->data.int_value
                                                     : right->data.double_value;
                result = value_create_double(lv + rv);
            }
            break;

        case TOKEN_MINUS:
            if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_int(left->data.int_value - right->data.int_value);
            else {
                double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value;
                double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value;
                result = value_create_double(lv - rv);
            }
            break;

        case TOKEN_STAR:
            if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_int(left->data.int_value * right->data.int_value);
            else {
                double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value;
                double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value;
                result = value_create_double(lv * rv);
            }
            break;

        case TOKEN_SLASH:
            if (right->type == VALUE_INT && right->data.int_value == 0)
                result = value_create_null();  // 除零返回 null
            else if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_int(left->data.int_value / right->data.int_value);
            else {
                double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value;
                double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value;
                result = value_create_double(lv / rv);
            }
            break;

        case TOKEN_EQ:
            if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_bool(left->data.int_value == right->data.int_value);
            else if (left->type == VALUE_BOOL && right->type == VALUE_BOOL)
                result = value_create_bool(left->data.bool_value == right->data.bool_value);
            else
                result = value_create_bool(false);
            break;

        case TOKEN_NE:
            if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_bool(left->data.int_value != right->data.int_value);
            else if (left->type == VALUE_BOOL && right->type == VALUE_BOOL)
                result = value_create_bool(left->data.bool_value != right->data.bool_value);
            else
                result = value_create_bool(true);
            break;

        case TOKEN_LT:
            if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_bool(left->data.int_value < right->data.int_value);
            else {
                double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value;
                double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value;
                result = value_create_bool(lv < rv);
            }
            break;

        case TOKEN_GT:
            if (left->type == VALUE_INT && right->type == VALUE_INT)
                result = value_create_bool(left->data.int_value > right->data.int_value);
            else {
                double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value;
                double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value;
                result = value_create_bool(lv > rv);
            }
            break;
    }

    value_free(left);
    value_free(right);
    return result;
}

关键细节：操作数用完后要 value_free 释放，否则会造成内存泄漏。同时注意 eval_expression 返回的值所有权归调用者，所以 eval_binary 负责释放左右操作数，结果的所有权交给上一级。

5. 函数调用

函数调用是最复杂的部分，需要：

从环境中查找函数
创建新的局部环境，绑定参数
执行函数体
返回结果

static Value *eval_call(Evaluator *eval, Env *env, const char *name,
                        ASTNode **args, int arg_count) {
    Value *func = env_get(env, name);
    if (!func || func->type != VALUE_FUNCTION)
        return value_create_null();

    // 创建新的局部环境，父环境指向调用者的环境
    Env *local_env = env_create(env);

    // 绑定参数
    for (int i = 0; i < arg_count && i < func->data.function.param_count; i++) {
        Value *arg_val = eval_expression(eval, env, args[i]);
        env_define(local_env, func->data.function.params[i], arg_val);
    }

    // 执行函数体
    Value *result = eval_block(eval, local_env, func->data.function.body);

    env_free(local_env);
    return result ? result : value_create_null();
}

一个重要问题：参数求值时使用的是调用者的环境（env），而不是新创建的局部环境（local_env）。这样 a = 10; foo(a) 中 a 能正确解析为 10。

6. 代码块求值

static Value *eval_block(Evaluator *eval, Env *env, ASTNode *block) {
    Value *result = NULL;
    for (int i = 0; i < block->data.block.statement_count; i++) {
        ASTNode *stmt = block->data.block.statements[i];

        switch (stmt->type) {
            case AST_VAR_DECL: {
                Value *init = eval_expression(eval, env, stmt->data.var_decl.initializer);
                env_define(env, stmt->data.var_decl.name, init);
                break;
            }

            case AST_RETURN_STMT:
                if (result) value_free(result);
                return stmt->data.return_stmt.value
                    ? eval_expression(eval, env, stmt->data.return_stmt.value)
                    : value_create_null();

            case AST_IF_STMT: {
                Value *cond = eval_expression(eval, env,
                                             stmt->data.if_stmt.condition);
                bool cond_true = (cond && cond->type == VALUE_BOOL
                                && cond->data.bool_value);
                if (cond) value_free(cond);
                ASTNode *branch = cond_true
                    ? stmt->data.if_stmt.then_branch
                    : stmt->data.if_stmt.else_branch;
                if (result) value_free(result);
                if (branch) return eval_block(eval, env, branch);
                result = value_create_null();
                break;
            }

            case AST_EXPR_STMT: {
                if (result) value_free(result);
                result = stmt->data.return_stmt.value
                    ? eval_expression(eval, env, stmt->data.return_stmt.value)
                    : value_create_null();
                break;
            }

            case AST_FN_DECL: {
                Value *fn = value_create_function(
                    stmt->data.fn_decl.name,
                    stmt->data.fn_decl.params,
                    stmt->data.fn_decl.param_count,
                    stmt->data.fn_decl.body,
                    NULL
                );
                env_define(env, stmt->data.fn_decl.name, fn);
                break;
            }
        }
    }
    return result ? result : value_create_null();
}

注意：return 语句会立即返回，通过层层 return 把控制流和值直接传出去，不需要遍历完整个块。

7. 主循环

Value *evaluator_run(Evaluator *eval) {
    for (int i = 0; i < eval->ast->data.program.statement_count; i++) {
        ASTNode *stmt = eval->ast->data.program.statements[i];

        switch (stmt->type) {
            case AST_VAR_DECL: {
                Value *init = eval_expression(eval, eval->global_env,
                                             stmt->data.var_decl.initializer);
                env_define(eval->global_env, stmt->data.var_decl.name, init);
                break;
            }

            case AST_FN_DECL: {
                Value *fn = value_create_function(
                    stmt->data.fn_decl.name,
                    stmt->data.fn_decl.params,
                    stmt->data.fn_decl.param_count,
                    stmt->data.fn_decl.body, NULL
                );
                env_define(eval->global_env, stmt->data.fn_decl.name, fn);
                break;
            }

            case AST_EXPR_STMT: {
                Value *r = eval_expression(eval, eval->global_env,
                                         stmt->data.return_stmt.value);
                break;
            }

            case AST_RETURN_STMT:
                return stmt->data.return_stmt.value
                    ? eval_expression(eval, eval->global_env,
                                     stmt->data.return_stmt.value)
                    : value_create_null();

            case AST_IF_STMT: {
                Value *cond = eval_expression(eval, eval->global_env,
                                             stmt->data.if_stmt.condition);
                bool cond_true = (cond && cond->type == VALUE_BOOL
                                && cond->data.bool_value);
                if (cond) value_free(cond);
                ASTNode *branch = cond_true
                    ? stmt->data.if_stmt.then_branch
                    : stmt->data.if_stmt.else_branch;
                if (branch) return eval_block(eval, eval->global_env, branch);
                break;
            }

            case AST_BLOCK_STMT:
                return eval_block(eval, eval->global_env, stmt);
        }
    }
    return value_create_null();
}

8. 内存管理：所有权与生命周期

解释器最难的部分是内存管理。每个 eval_expression 调用返回一个 Value*，调用者负责释放。规则如下：

操作	语义值由谁释放
`eval_binary`	释放左右操作数，返回结果（由上层释放）
`eval_unary`	释放操作数，返回结果
`eval_expression`（字面量）	返回新分配的值
`env_get`	返回借用的指针，不释放
`env_set`	接管传入值的所有权

赋值表达式是内存管理最容易出错的地方：

case AST_ASSIGN_EXPR: {
    Value *v = eval_expression(eval, env, node->data.assign_expr.value);
    if (env_set(env, node->data.assign_expr.name, v)) {
        // env_set 成功，值已存入环境，caller 不 free
        Value *stored = env_get(env, node->data.assign_expr.name);
        return stored ? stored : value_create_null();
    } else {
        value_free(v);  // env_set 失败时需要自己释放
        return value_create_null();
    }
}

env_get 返回借用（borrowed）引用，所以如果直接返回 env_get 的结果，不需要释放；如果 env_set 失败，则必须释放。

9. 示例程序执行过程

以一个具体程序为例：

let x = 10;
let y = 20;

fn add(a, b) {
    return a + b;
}

let sum = add(x, y);

Step 1：词法分析，生成 Token 序列。

Step 2：语法分析，生成 AST：

Program:
  VarDecl: x = IntLiteral(10)
  VarDecl: y = IntLiteral(20)
  FnDecl: add(params: [a, b])
    Block:
      ReturnStmt:
        BinaryExpr(+) Ident(a) Ident(b)
  VarDecl: sum = CallExpr(add, [Ident(x), Ident(y)])

Step 3：解释执行。

evaluator_run 遍历顶层语句：

let x = 10：在全局环境定义 x = Value(10)
let y = 20：在全局环境定义 y = Value(20)
fn add(a, b) { ... }：在全局环境定义 add = Value(Fn)
let sum = add(x, y)：
- eval_expression 求值右侧 CallExpr
- eval_call 查找 add，创建新环境 local_env
- 绑定 a = Value(10)，b = Value(20)
- eval_block 执行 return a + b
  - eval_expression 求值 a + b → Value(30)
  - 遇到 return，立即返回 Value(30)
- Value(30) 存入环境，变量 sum = Value(30)