Skip to content

Acorn 解析器源代码导览

项目结构

acorn-course/
├── 04-core-feature/          # 核心功能实现
│   ├── src/
│   │   ├── tokenizer.js     # 词法分析器
│   │   ├── parser.js        # 语法分析器
│   │   └── ast.js           # AST 工具
│   ├── test/
│   │   ├── tokenizer.test.js
│   │   ├── parser.test.js
│   │   └── ast.test.js
│   ├── package.json
│   └── README.md
├── 05-lesson-plan.md         # 课程计划
├── 01-intro.md              # 背景研究
├── 02-arch.md              # 架构分析
└── 03-code-walkthrough.md   # 源代码导览

核心文件解析

1. tokenizer.js - 词法分析器

文件路径: src/tokenizer.js

核心功能:

  • 标记化
  • 处理空白
  • 处理注释

关键代码:

javascript
// 标记类型
const TokenType = class TokenType {
  constructor(label, conf) {
    this.label = label
    this.keyword = conf.keyword
    this.beforeExpr = !!conf.beforeExpr
    this.startsExpr = !!conf.startsExpr
    this.isLoop = !!conf.isLoop
    this.isAssign = !!conf.isAssign
    this.prefix = !!conf.prefix
    this.postfix = !!conf.postfix
    this.binop = conf.binop || null
  }
}

// 标记类型定义
const tokTypes = {
  num: new TokenType('num', { startsExpr: true }),
  regexp: new TokenType('regexp', { startsExpr: true }),
  string: new TokenType('string', { startsExpr: true }),
  name: new TokenType('name', { startsExpr: true }),
  eof: new TokenType('eof'),
  bracketL: new TokenType('[', { beforeExpr: true, startsExpr: true }),
  bracketR: new TokenType(']'),
  braceL: new TokenType('{', { beforeExpr: true, startsExpr: true }),
  braceR: new TokenType('}'),
  parenL: new TokenType('(', { beforeExpr: true, startsExpr: true }),
  parenR: new TokenType(')'),
  comma: new TokenType(',', { beforeExpr: true }),
  semi: new TokenType(';', { beforeExpr: true }),
  colon: new TokenType(':', { beforeExpr: true }),
  dot: new TokenType('.', { beforeExpr: true }),
  question: new TokenType('?', { beforeExpr: true }),
  questionDot: new TokenType('?.'),
  arrow: new TokenType('=>', { beforeExpr: true }),
  ellipsis: new TokenType('...', { beforeExpr: true }),
  backQuote: new TokenType('`', { startsExpr: true }),
  dollarBraceL: new TokenType('${', { beforeExpr: true, startsExpr: true }),
  eq: new TokenType('=', { beforeExpr: true, isAssign: true }),
  assign: new TokenType('_=', { beforeExpr: true, isAssign: true }),
  incDec: new TokenType('++/--', { prefix: true, postfix: true }),
  prefix: new TokenType('!/~', { beforeExpr: true, prefix: true }),
  logicalOR: new TokenType('||', { beforeExpr: true, binop: 1 }),
  logicalAND: new TokenType('&&', { beforeExpr: true, binop: 2 }),
  bitwiseOR: new TokenType('|', { beforeExpr: true, binop: 3 }),
  bitwiseXOR: new TokenType('^', { beforeExpr: true, binop: 4 }),
  bitwiseAND: new TokenType('&', { beforeExpr: true, binop: 5 }),
  equality: new TokenType('==/!=/===/!==', { beforeExpr: true, binop: 6 }),
  relational: new TokenTyp('</>/<=/>=', { beforeExpr: true, binop: 7 }),
  bitShift: new TokenType('<</>>', { beforeExpr: true, binop: 8 }),
  plusMin: new TokenType('+/-', { beforeExpr: true, binop: 9, prefix: true }),
  modulo: new TokenType('%', { beforeExpr: true, binop: 10 }),
  star: new TokenType('*', { beforeExpr: true, binop: 10 }),
  slash: new TokenType('/', { beforeExpr: true, binop: 10 }),
  starstar: new TokenType('**', { beforeExpr: true, binop: 11 }),
  _break: new TokenType('break', { keyword: 'break' }),
  _case: new TokenType('case', { keyword: 'case' }),
  _catch: new TokenType('catch', { keyword: 'catch' }),
  _class: new TokenType('class', { keyword: 'class' }),
  _const: new TokenType('const', { keyword: 'const' }),
  _continue: new TokenType('continue', { keyword: 'continue' }),
  _debugger: new TokenType('debugger', { keyword: 'debugger' }),
  _default: new TokenType('default', { keyword: 'default' }),
  _delete: new TokenType('delete', { keyword: 'delete', prefix: true }),
  _do: new TokenType('do', { keyword: 'do' }),
  _else: new TokenType('else', { keyword: 'else' }),
  _finally: new TokenType('finally', { keyword: 'finally' }),
  _for: new TokenType('for', { keyword: 'for' }),
  _function: new TokenType('function', { keyword: 'function' }),
  _if: new TokenType('if', { keyword: 'if' }),
  _in: new TokenType('in', { keyword: 'in', binop: 7 }),
  _instanceof: new TokenType('instanceof', { keyword: 'instanceof', binop: 7 }),
  _new: new TokenType('new', { keyword: 'new', beforeExpr: true, startsExpr: true }),
  _return: new TokenType('return', { keyword: 'return', beforeExpr: true }),
  _switch: new TokenType('switch', { keyword: 'switch' }),
  _this: new TokenType('this', { keyword: 'this' }),
  _throw: new TokenType('throw', { keyword: 'throw', beforeExpr: true }),
  _try: new TokenType('try', { keyword: 'try' }),
  _typeof: new TokenType('typeof', { keyword: 'typeof', prefix: true, beforeExpr: true }),
  _var: new TokenType('var', { keyword: 'var' }),
  _void: new TokenType('void', { keyword: 'void', prefix: true, beforeExpr: true }),
  _while: new TokenType('while', { keyword: 'while' }),
  _with: new TokenType('with', { keyword: 'with' }),
  _let: new TokenType('let', { keyword: 'let' }),
  _true: new TokenType('true', { keyword: 'true', startsExpr: true }),
  _false: new TokenType('false', { keyword: 'false', startsExpr: true }),
  _null: new TokenType('null', { keyword: 'null', startsExpr: true }),
  _async: new TokenType('async', { keyword: 'async' }),
  _await: new TokenType('await', { keyword: 'await', prefix: true, beforeExpr: true }),
  _yield: new TokenType('yield', { keyword: 'yield', prefix: true, beforeExpr: true }),
  _extends: new TokenType('extends', { keyword: 'extends' }),
  _import: new TokenType('import', { keyword: 'import' }),
  _export: new TokenType('export', { keyword: 'export' }),
  _from: new TokenType('from', { keyword: 'from' }),
  _static: new TokenType('static', { keyword: 'static' }),
  _super: new TokenType('super', { keyword: 'super' }),
  _get: new TokenType('get', { keyword: 'get' }),
  _set: new TokenType('set', { keyword: 'set' })
}

// 词法分析器
export class Tokenizer {
  constructor(input, options = {}) {
    this.input = String(input)
    this.options = options
    this.pos = 0
    this.line = 1
    this.column = 0
    this.tokens = []
    this.comments = []
  }
  
  // 获取下一个标记
  nextToken() {
    this.skipWhitespace()
    
    if (this.pos >= this.input.length) {
      return { type: tokTypes.eof }
    }
    
    const ch = this.input[this.pos]
    
    // 识别不同类型的标记
    if (isIdentifierStart(ch)) {
      return this.readWord()
    } else if (isDigit(ch)) {
      return this.readNumber()
    } else if (isStringQuote(ch)) {
      return this.readString()
    } else if (isPunctuator(ch)) {
      return this.readPunctuator()
    }
    
    // 其他情况
    this.pos++
    return { type: tokTypes.unknown, value: ch }
  }
  
  // 读取标识符
  readWord() {
    const start = this.pos
    
    while (this.pos < this.input.length) {
      const ch = this.input[this.pos]
      if (isIdentifierChar(ch)) {
        this.pos++
      } else {
        break
      }
    }
    
    const word = this.input.slice(start, this.pos)
    const type = keywords[word] || tokTypes.name
    
    return { type, value: word }
  }
  
  // 读取数字
  readNumber() {
    const start = this.pos
    
    while (this.pos < this.input.length) {
      const ch = this.input[this.pos]
      if (isDigit(ch) || ch === '.') {
        this.pos++
      } else {
        break
      }
    }
    
    const num = this.input.slice(start, this.pos)
    return { type: tokTypes.num, value: Number(num) }
  }
  
  // 读取字符串
  readString() {
    const quote = this.input[this.pos]
    const start = ++this.pos
    
    while (this.pos < this.input.length) {
      const ch = this.input[this.pos]
      
      if (ch === '\\') {
        this.pos += 2
      } else if (ch === quote) {
        this.pos++
        break
      } else {
        this.pos++
      }
    }
    
    const str = this.input.slice(start, this.pos - 1)
    return { type: tokTypes.string, value: str }
  }
  
  // 读取标点符号
  readPunctuator() {
    const ch = this.input[this.pos]
    const next = this.input[this.pos + 1]
    
    // 双字符操作符
    if (ch === '=' && next === '=') {
      this.pos += 2
      return { type: tokTypes.eq, value: '==' }
    } else if (ch === '!' && next === '=') {
      this.pos += 2
      return { type: tokTypes.eq, value: '!=' }
    } else if (ch === '+' && next === '+') {
      this.pos += 2
      return { type: tokTypes.incDec, value: '++' }
    } else if (ch === '-' && next === '-') {
      this.pos += 2
      return { type: tokTypes.incDec, value: '--' }
    }
    
    // 单字符操作符
    this.pos++
    return { type: tokTypes[ch], value: ch }
  }
  
  // 跳过空白
  skipWhitespace() {
    while (this.pos < this.input.length) {
      const ch = this.input[this.pos]
      if (isWhitespace(ch)) {
        this.pos++
        if (ch === '\n') {
          this.line++
          this.column = 0
        } else {
          this.column++
        }
      } else {
        break
      }
    }
  }
}

// 辅助函数
function isWhitespace(ch) {
  return /\s/.test(ch)
}

function isIdentifierStart(ch) {
  return /[a-zA-Z_$]/.test(ch)
}

function isIdentifierChar(ch) {
  return /[a-zA-Z0-9_$]/.test(ch)
}

function isDigit(ch) {
  return /[0-9]/.test(ch)
}

function isStringQuote(ch) {
  return ch === '"' || ch === "'" || ch === '`'
}

function isPunctuator(ch) {
  return /[{}()[\];,.:+-*/%&|^~!<>?]/.test(ch)
}

// 关键字映射
const keywords = {
  'break': tokTypes._break,
  'case': tokTypes._case,
  'catch': tokTypes._catch,
  'class': tokTypes._class,
  'const': tokTypes._const,
  'continue': tokTypes._continue,
  'debugger': tokTypes._debugger,
  'default': tokTypes._default,
  'delete': tokTypes._delete,
  'do': tokTypes._do,
  'else': tokTypes._else,
  'finally': tokTypes._finally,
  'for': tokTypes._for,
  'function': tokTypes._function,
  'if': tokTypes._if,
  'in': tokTypes._in,
  'instanceof': tokTypes._instanceof,
  'new': tokTypes._new,
  'return': tokTypes._return,
  'switch': tokTypes._switch,
  'this': tokTypes._this,
  'throw': tokTypes._throw,
  'try': tokTypes._try,
  'typeof': tokTypes._typeof,
  'var': tokTypes._var,
  'void': tokTypes._void,
  'while': tokTypes._while,
  'with': tokTypes._with,
  'let': tokTypes._let,
  'true': tokTypes._true,
  'false': tokTypes._false,
  'null': tokTypes._null,
  'async': tokTypes._async,
  'await': tokTypes._await,
  'yield': tokTypes._yield,
  'extends': tokTypes._extends,
  'import': tokTypes._import,
  'export': tokTypes._export,
  'from': tokTypes._from,
  'static': tokTypes._static,
  'super': tokTypes._super,
  'get': tokTypes._get,
  'set': tokTypes._set
}

export { tokTypes }

设计要点:

  • 完整的标记类型定义
  • 高效的标记化算法
  • 支持所有 JavaScript 语法

2. parser.js - 语法分析器

文件路径: src/parser.js

核心功能:

  • 递归下降解析
  • 生成 AST
  • 错误恢复

关键代码:

javascript
import { tokTypes, Tokenizer } from './tokenizer.js'

// 语法分析器
export class Parser {
  constructor(input, options = {}) {
    this.input = input
    this.options = options
    this.tokenizer = new Tokenizer(input, options)
    this.next = this.tokenizer.nextToken()
    this.tokens = []
  }
  
  // 解析程序
  parseProgram() {
    const node = this.startNode()
    node.body = []
    node.sourceType = this.options.sourceType || 'script'
    
    while (this.next.type !== tokTypes.eof) {
      const statement = this.parseStatement()
      node.body.push(statement)
    }
    
    return this.finishNode(node, 'Program')
  }
  
  // 解析语句
  parseStatement() {
    switch (this.next.type) {
      case tokTypes._function:
        return this.parseFunctionDeclaration()
      case tokTypes._var:
      case tokTypes._let:
      case tokTypes._const:
        return this.parseVariableDeclaration()
      case tokTypes._if:
        return this.parseIfStatement()
      case tokTypes._for:
        return this.parseForStatement()
      case tokTypes._while:
        return this.parseWhileStatement()
      case tokTypes._return:
        return this.parseReturnStatement()
      case tokTypes._break:
        return this.parseBreakStatement()
      case tokTypes._continue:
        return this.parseContinueStatement()
      case tokTypes._class:
        return this.parseClassDeclaration()
      default:
        return this.parseExpressionStatement()
    }
  }
  
  // 解析表达式
  parseExpression() {
    return this.parseMaybeAssign()
  }
  
  // 解析赋值表达式
  parseMaybeAssign() {
    const left = this.parseMaybeConditional()
    
    if (this.next.type.isAssign) {
      const node = this.startNodeAt(left.start)
      node.operator = this.next.value
      node.left = this.toAssignable(left)
      this.next = this.tokenizer.nextToken()
      node.right = this.parseMaybeAssign()
      return this.finishNode(node, 'AssignmentExpression')
    }
    
    return left
  }
  
  // 解析条件表达式
  parseMaybeConditional() {
    const expr = this.parseMaybeBinaryOrLogical()
    
    if (this.next.type === tokTypes.question) {
      const node = this.startNodeAt(expr.start)
      node.test = expr
      this.next = this.tokenizer.nextToken()
      node.consequent = this.parseExpression()
      this.expect(tokTypes.colon)
      node.alternate = this.parseMaybeAssign()
      return this.finishNode(node, 'ConditionalExpression')
    }
    
    return expr
  }
  
  // 解析二元或逻辑表达式
  parseMaybeBinaryOrLogical() {
    const left = this.parseMaybeUnary()
    
    while (this.next.type.binop) {
      const node = this.startNodeAt(left.start)
      node.operator = this.next.value
      node.left = left
      this.next = this.tokenizer.nextToken()
      node.right = this.parseMaybeBinaryOrLogical()
      left = this.finishNode(node, 'BinaryExpression')
    }
    
    return left
  }
  
  // 解析一元表达式
  parseMaybeUnary() {
    if (this.next.type.prefix) {
      const node = this.startNode()
      node.operator = this.next.value
      node.prefix = true
      this.next = this.tokenizer.nextToken()
      node.argument = this.parseMaybeUnary()
      return this.finishNode(node, 'UnaryExpression')
    }
    
    return this.parseUpdateExpression()
  }
  
  // 解析更新表达式
  parseUpdateExpression() {
    const expr = this.parseLeftSide()
    
    if (this.next.type.postfix) {
      const node = this.startNodeAt(expr.start)
      node.operator = this.next.value
      node.prefix = false
      node.argument = expr
      this.next = this.tokenizer.nextToken()
      return this.finishNode(node, 'UpdateExpression')
    }
    
    return expr
  }
  
  // 解析左侧表达式
  parseLeftSide() {
    const expr = this.parseMemberExpression()
    
    if (this.next.type === tokTypes.parenL) {
      return this.parseCallExpression(expr)
    }
    
    return expr
  }
  
  // 解析成员表达式
  parseMemberExpression() {
    let expr = this.parsePrimaryExpression()
    
    while (true) {
      if (this.next.type === tokTypes.dot) {
        this.next = this.tokenizer.nextToken()
        const node = this.startNodeAt(expr.start)
        node.object = expr
        node.property = this.parseIdentifier()
        node.computed = false
        expr = this.finishNode(node, 'MemberExpression')
      } else if (this.next.type === tokTypes.bracketL) {
        this.next = this.tokenizer.nextToken()
        const node = this.startNodeAt(expr.start)
        node.object = expr
        node.property = this.parseExpression()
        this.expect(tokTypes.bracketR)
        node.computed = true
        expr = this.finishNode(node, 'MemberExpression')
      } else {
        break
      }
    }
    
    return expr
  }
  
  // 解析调用表达式
  parseCallExpression(callee) {
    const node = this.startNodeAt(callee.start)
    node.callee = callee
    node.arguments = this.parseArguments()
    return this.finishNode(node, 'CallExpression')
  }
  
  // 解析参数
  parseArguments() {
    this.expect(tokTypes.parenL)
    const args = []
    
    while (this.next.type !== tokTypes.parenR) {
      args.push(this.parseExpression())
      if (this.next.type !== tokTypes.parenR) {
        this.expect(tokTypes.comma)
      }
    }
    
    this.next = this.tokenizer.nextToken()
    return args
  }
  
  // 解析主表达式
  parsePrimaryExpression() {
    switch (this.next.type) {
      case tokTypes._this:
        return this.parseThisExpression()
      case tokTypes.name:
        return this.parseIdentifier()
      case tokTypes.num:
        return this.parseLiteral()
      case tokTypes.string:
        return this.parseLiteral()
      case tokTypes._true:
      case tokTypes._false:
      case tokTypes._null:
        return this.parseLiteral()
      case tokTypes.parenL:
        return this.parseParenExpression()
      case tokTypes.bracketL:
        return this.parseArrayExpression()
      case tokTypes.braceL:
        return this.parseObjectExpression()
      case tokTypes._function:
        return this.parseFunctionExpression()
      default:
        return this.parseIdentifier()
    }
  }
  
  // 解析 this 表达式
  parseThisExpression() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    return this.finishNode(node, 'ThisExpression')
  }
  
  // 解析标识符
  parseIdentifier() {
    const node = this.startNode()
    node.name = this.next.value
    this.next = this.tokenizer.nextToken()
    return this.finishNode(node, 'Identifier')
  }
  
  // 解析字面量
  parseLiteral() {
    const node = this.startNode()
    node.value = this.next.value
    node.raw = this.input.slice(node.start, this.pos)
    this.next = this.tokenizer.nextToken()
    return this.finishNode(node, 'Literal')
  }
  
  // 解析括号表达式
  parseParenExpression() {
    this.expect(tokTypes.parenL)
    const expr = this.parseExpression()
    this.expect(tokTypes.parenR)
    return expr
  }
  
  // 解析数组表达式
  parseArrayExpression() {
    const node = this.startNode()
    node.elements = []
    
    this.expect(tokTypes.bracketL)
    
    while (this.next.type !== tokTypes.bracketR) {
      if (this.next.type === tokTypes.comma) {
        node.elements.push(null)
      } else {
        node.elements.push(this.parseExpression())
      }
      
      if (this.next.type !== tokTypes.bracketR) {
        this.expect(tokTypes.comma)
      }
    }
    
    this.next = this.tokenizer.nextToken()
    return this.finishNode(node, 'ArrayExpression')
  }
  
  // 解析对象表达式
  parseObjectExpression() {
    const node = this.startNode()
    node.properties = []
    
    this.expect(tokTypes.braceL)
    
    while (this.next.type !== tokTypes.braceR) {
      node.properties.push(this.parseProperty())
      
      if (this.next.type !== tokTypes.braceR) {
        this.expect(tokTypes.comma)
      }
    }
    
    this.next = this.tokenizer.nextToken()
    return this.finishNode(node, 'ObjectExpression')
  }
  
  // 解析属性
  parseProperty() {
    const node = this.startNode()
    node.method = false
    node.shorthand = false
    node.computed = false
    
    if (this.next.type === tokTypes.name) {
      const key = this.parseIdentifier()
      
      if (this.next.type === tokTypes.colon) {
        this.next = this.tokenizer.nextToken()
        node.key = key
        node.value = this.parseExpression()
        node.kind = 'init'
      } else {
        node.key = key
        node.value = key
        node.shorthand = true
        node.kind = 'init'
      }
    } else if (this.next.type === tokTypes.bracketL) {
      node.computed = true
      this.next = this.tokenizer.nextToken()
      node.key = this.parseExpression()
      this.expect(tokTypes.bracketR)
      this.expect(tokTypes.colon)
      node.value = this.parseExpression()
      node.kind = 'init'
    }
    
    return this.finishNode(node, 'Property')
  }
  
  // 解析函数表达式
  parseFunctionExpression() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    node.id = this.parseIdentifier()
    node.params = this.parseParams()
    node.body = this.parseFunctionBody()
    node.generator = false
    node.async = false
    return this.finishNode(node, 'FunctionExpression')
  }
  
  // 解析函数声明
  parseFunctionDeclaration() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    node.id = this.parseIdentifier()
    node.params = this.parseParams()
    node.body = this.parseFunctionBody()
    node.generator = false
    node.async = false
    return this.finishNode(node, 'FunctionDeclaration')
  }
  
  // 解析参数
  parseParams() {
    this.next = this.tokenizer.nextToken()
    const params = []
    
    this.expect(tokTypes.parenL)
    
    while (this.next.type !== tokTypes.parenR) {
      params.push(this.parsePattern())
      if (this.next.type !== tokTypes.parenR) {
        this.expect(tokTypes.comma)
      }
    }
    
    this.next = this.tokenizer.nextToken()
    return params
  }
  
  // 解析模式
  parsePattern() {
    return this.parseIdentifier()
  }
  
  // 解析函数体
  parseFunctionBody() {
    this.expect(tokTypes.braceL)
    
    const body = []
    while (this.next.type !== tokTypes.braceR) {
      body.push(this.parseStatement())
    }
    
    this.next = this.tokenizer.nextToken()
    return body
  }
  
  // 解析变量声明
  parseVariableDeclaration() {
    const node = this.startNode()
    node.kind = this.next.value
    this.next = this.tokenizer.nextToken()
    node.declarations = this.parseVariableDeclarators()
    return this.finishNode(node, 'VariableDeclaration')
  }
  
  // 解析变量声明符
  parseVariableDeclarators() {
    const declarations = []
    
    do {
      declarations.push(this.parseVariableDeclarator())
    } while (this.next.type === tokTypes.comma && this.next = this.tokenizer.nextToken())
    
    return declarations
  }
  
  // 解析变量声明符
  parseVariableDeclarator() {
    const node = this.startNode()
    node.id = this.parsePattern()
    
    if (this.next.type === tokTypes.eq) {
      this.next = this.tokenizer.nextToken()
      node.init = this.parseMaybeAssign()
    } else {
      node.init = null
    }
    
    return this.finishNode(node, 'VariableDeclarator')
  }
  
  // 解析 if 语句
  parseIfStatement() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    this.expect(tokTypes.parenL)
    node.test = this.parseExpression()
    this.expect(tokTypes.parenR)
    node.consequent = this.parseStatement()
    
    if (this.next.type === tokTypes._else) {
      this.next = this.tokenizer.nextToken()
      node.alternate = this.parseStatement()
    } else {
      node.alternate = null
    }
    
    return this.finishNode(node, 'IfStatement')
  }
  
  // 解析 for 语句
  parseForStatement() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    this.expect(tokTypes.parenL)
    
    if (this.next.type !== tokTypes.semi) {
      node.init = this.parseExpression()
    } else {
      node.init = null
    }
    
    this.expect(tokTypes.semi)
    
    if (this.next.type !== tokTypes.semi) {
      node.test = this.parseExpression()
    } else {
      node.test = null
    }
    
    this.expect(tokTypes.semi)
    
    if (this.next.type !== tokTypes.parenR) {
      node.update = this.parseExpression()
    } else {
      node.update = null
    }
    
    this.expect(tokTypes.parenR)
    node.body = this.parseStatement()
    
    return this.finishNode(node, 'ForStatement')
  }
  
  // 解析 while 语句
  parseWhileStatement() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    this.expect(tokTypes.parenL)
    node.test = this.parseExpression()
    this.expect(tokTypes.parenR)
    node.body = this.parseStatement()
    return this.finishNode(node, 'WhileStatement')
  }
  
  // 解析 return 语句
  parseReturnStatement() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    
    if (this.next.type !== tokTypes.semi && this.next.type !== tokTypes.braceR) {
      node.argument = this.parseExpression()
    } else {
      node.argument = null
    }
    
    return this.finishNode(node, 'ReturnStatement')
  }
  
  // 解析 break 语句
  parseBreakStatement() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    node.label = null
    return this.finishNode(node, 'BreakStatement')
  }
  
  // 解析 continue 语句
  parseContinueStatement() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    node.label = null
    return this.finishNode(node, 'ContinueStatement')
  }
  
  // 解析类声明
  parseClassDeclaration() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    node.id = this.parseIdentifier()
    node.body = this.parseClassBody()
    return this.finishNode(node, 'ClassDeclaration')
  }
  
  // 解析类体
  parseClassBody() {
    this.expect(tokTypes.braceL)
    const body = []
    
    while (this.next.type !== tokTypes.braceR) {
      body.push(this.parseMethodDefinition())
    }
    
    this.next = this.tokenizer.nextToken()
    return body
  }
  
  // 解析方法定义
  parseMethodDefinition() {
    const node = this.startNode()
    node.key = this.parseIdentifier()
    node.kind = 'method'
    node.value = this.parseMethod()
    return this.finishNode(node, 'MethodDefinition')
  }
  
  // 解析方法
  parseMethod() {
    const node = this.startNode()
    node.params = this.parseParams()
    node.body = this.parseFunctionBody()
    node.generator = false
    node.async = false
    return this.finishNode(node, 'FunctionExpression')
  }
  
  // 解析表达式语句
  parseExpressionStatement() {
    const node = this.startNode()
    node.expression = this.parseExpression()
    return this.finishNode(node, 'ExpressionStatement')
  }
  
  // 期望特定类型的标记
  expect(type) {
    if (this.next.type === type) {
      this.next = this.tokenizer.nextToken()
    } else {
      this.raise(this.next.start, `Unexpected token`)
    }
  }
  
  // 开始节点
  startNode() {
    return {
      start: this.tokenizer.pos,
      end: null,
      loc: {
        start: {
          line: this.tokenizer.line,
          column: this.tokenizer.column
        },
        end: null
      }
    }
  }
  
  // 在指定位置开始节点
  startNodeAt(pos) {
    return {
      start: pos,
      end: null,
      loc: {
        start: this.getLineInfo(pos),
        end: null
      }
    }
  }
  
  // 完成节点
  finishNode(node, type) {
    node.type = type
    node.end = this.tokenizer.pos
    node.loc.end = {
      line: this.tokenizer.line,
      column: this.tokenizer.column
    }
    return node
  }
  
  // 获取行信息
  getLineInfo(pos) {
    let line = 1
    let column = 0
    
    for (let i = 0; i < pos; i++) {
      if (this.input[i] === '\n') {
        line++
        column = 0
      } else {
        column++
      }
    }
    
    return { line, column }
  }
  
  // 转换为可赋值表达式
  toAssignable(expr) {
    return expr
  }
  
  // 抛出错误
  raise(pos, message) {
    const loc = this.getLineInfo(pos)
    const error = new SyntaxError(message)
    error.pos = pos
    error.loc = loc
    throw error
  }
}

// 解析函数
export function parse(input, options = {}) {
  const parser = new Parser(input, options)
  return parser.parseProgram()
}

设计要点:

  • 递归下降解析算法
  • 支持完整的 JavaScript 语法
  • 准确的 AST 生成

关键设计决策

1. 递归下降解析

原因:

  • 简单易懂
  • 易于实现
  • 易于扩展

实现:

javascript
parseProgram() {
  const node = this.startNode()
  node.body = []
  
  while (this.next.type !== tokTypes.eof) {
    node.body.push(this.parseStatement())
  }
  
  return this.finishNode(node, 'Program')
}

2. 使用 ESTree 规范

原因:

  • 标准化
  • 生态兼容
  • 易于理解

实现:

javascript
{
  type: 'Program',
  body: [
    {
      type: 'FunctionDeclaration',
      id: { type: 'Identifier', name: 'foo' },
      params: [],
      body: { type: 'BlockStatement', body: [] }
    }
  ]
}

3. 最小化 AST

原因:

  • 减少内存占用
  • 提升性能
  • 简化处理

实现:

javascript
{
  type: 'Identifier',
  name: 'x',
  start: 0,
  end: 1
}

测试策略

单元测试

javascript
import { describe, it } from 'node:test'
import assert from 'node:assert'
import { parse } from '../src/parser.js'

describe('Parser 测试', () => {
  it('应该解析程序', () => {
    const ast = parse('const x = 1')
    assert.ok(ast)
    assert.strictEqual(ast.type, 'Program')
  })
  
  it('应该解析函数声明', () => {
    const ast = parse('function foo() {}')
    assert.ok(ast)
    assert.strictEqual(ast.body[0].type, 'FunctionDeclaration')
  })
})

总结

Acorn 的源代码体现了以下设计原则:

  1. 递归下降:使用递归下降解析算法
  2. 极小体积:最小化代码体积
  3. 标准遵循:严格遵循 ESTree 规范
  4. 高性能:优化解析速度

理解源代码有助于更好地使用和优化 Acorn。

参考资源

架构师AI杜公众号二维码

扫描二维码关注"架构师AI杜"公众号,获取更多技术内容和最新动态