Acorn 解析器架构分析

整体架构

Acorn 采用递归下降解析算法，通过词法分析、语法分析和 AST 生成三个阶段解析 JavaScript 代码。

┌─────────────────────────────────────────────┐
│              Acorn Parser                   │
├─────────────────────────────────────────────┤
│  1. Tokenizer (词法分析器)                    │
│     - Tokenization (标记化)                 │
│     - Whitespace Handling (空白处理)         │
│     - Comment Handling (注释处理)            │
├─────────────────────────────────────────────┤
│  2. Parser (语法分析器)                       │
│     - Recursive Descent (递归下降)           │
│     - AST Generation (AST 生成)             │
│     - Error Recovery (错误恢复)              │
├─────────────────────────────────────────────┤
│  3. AST (抽象语法树)                         │
│     - ESTree Spec (ESTree 规范)              │
│     - Location Info (位置信息)               │
│     - Source Map (源映射)                    │
└─────────────────────────────────────────────┘

核心组件

1. Tokenizer

词法分析器负责将源代码转换为标记（Token）。

职责： 标记化，处理空白，处理注释。

关键方法：

javascript

class Tokenizer {
  constructor(input, options) {
    this.input = input
    this.options = options
    this.pos = 0
    this.line = 1
    this.column = 0
  }
  
  // 获取下一个标记
  nextToken() {
    this.skipWhitespace()
    
    if (this.pos >= this.input.length) {
      return { type: tokTypes.eof }
    }
    
    const ch = this.input[this.pos]
    
    // 识别不同类型的标记
    if (isIdentifierStart(ch)) {
      return this.readWord()
    } else if (isDigit(ch)) {
      return this.readNumber()
    } else if (isStringQuote(ch)) {
      return this.readString()
    } else if (isPunctuator(ch)) {
      return this.readPunctuator()
    }
    
    // 其他情况
    this.pos++
    return { type: tokTypes.unknown, value: ch }
  }
  
  // 读取标识符
  readWord() {
    const start = this.pos
    
    while (this.pos < this.input.length) {
      const ch = this.input[this.pos]
      if (isIdentifierChar(ch)) {
        this.pos++
      } else {
        break
      }
    }
    
    const word = this.input.slice(start, this.pos)
    const type = keywords[word] || tokTypes.name
    
    return { type, value: word }
  }
  
  // 跳过空白
  skipWhitespace() {
    while (this.pos < this.input.length) {
      const ch = this.input[this.pos]
      if (isWhitespace(ch)) {
        this.pos++
        if (ch === '\n') {
          this.line++
          this.column = 0
        } else {
          this.column++
        }
      } else {
        break
      }
    }
  }
}

优势：快速：高效的标记化算法，准确：准确的标记识别，流式：支持流式处理。

2. Parser

语法分析器负责将标记转换为 AST。

职责： 递归下降解析，生成 AST，错误恢复。

关键方法：

javascript

class Parser {
  constructor(input, options) {
    this.input = input
    this.options = options
    this.tokenizer = new Tokenizer(input, options)
    this.next = this.tokenizer.nextToken()
  }
  
  // 解析程序
  parseProgram() {
    const node = this.startNode()
    node.body = []
    
    while (this.next.type !== tokTypes.eof) {
      const statement = this.parseStatement()
      node.body.push(statement)
    }
    
    return this.finishNode(node, 'Program')
  }
  
  // 解析语句
  parseStatement() {
    switch (this.next.type) {
      case tokTypes._function:
        return this.parseFunctionDeclaration()
      case tokTypes._var:
      case tokTypes._let:
      case tokTypes._const:
        return this.parseVariableDeclaration()
      case tokTypes._if:
        return this.parseIfStatement()
      case tokTypes._for:
        return this.parseForStatement()
      case tokTypes._return:
        return this.parseReturnStatement()
      default:
        return this.parseExpressionStatement()
    }
  }
  
  // 解析表达式
  parseExpression() {
    return this.parseMaybeAssign()
  }
  
  // 解析赋值表达式
  parseMaybeAssign() {
    const left = this.parseMaybeConditional()
    
    if (this.next.type.isAssign) {
      const node = this.startNodeAt(left.start)
      node.operator = this.next.value
      node.left = this.toAssignable(left)
      this.next = this.tokenizer.nextToken()
      node.right = this.parseMaybeAssign()
      return this.finishNode(node, 'AssignmentExpression')
    }
    
    return left
  }
  
  // 解析函数声明
  parseFunctionDeclaration() {
    const node = this.startNode()
    this.next = this.tokenizer.nextToken()
    node.id = this.parseIdentifier()
    node.params = this.parseParams()
    node.body = this.parseFunctionBody()
    return this.finishNode(node, 'FunctionDeclaration')
  }
  
  // 解析参数
  parseParams() {
    this.next = this.tokenizer.nextToken()
    const params = []
    
    this.expect(tokTypes.parenL)
    
    while (this.next.type !== tokTypes.parenR) {
      params.push(this.parsePattern())
      if (this.next.type !== tokTypes.parenR) {
        this.expect(tokTypes.comma)
      }
    }
    
    this.next = this.tokenizer.nextToken()
    return params
  }
  
  // 解析函数体
  parseFunctionBody() {
    this.expect(tokTypes.braceL)
    
    const body = []
    while (this.next.type !== tokTypes.braceR) {
      body.push(this.parseStatement())
    }
    
    this.next = this.tokenizer.nextToken()
    return body
  }
  
  // 期望特定类型的标记
  expect(type) {
    if (this.next.type === type) {
      this.next = this.tokenizer.nextToken()
    } else {
      this.raise(this.next.start, `Unexpected token`)
    }
  }
}

优势：完整：支持完整的 JavaScript 语法，准确：准确的 AST 生成，高效：高效的解析算法。

3. AST

AST 遵循 ESTree 规范。

结构：

javascript

{
  type: 'Program',
  body: [
    {
      type: 'FunctionDeclaration',
      id: {
        type: 'Identifier',
        name: 'foo'
      },
      params: [
        {
          type: 'Identifier',
          name: 'x'
        }
      ],
      body: {
        type: 'BlockStatement',
        body: [
          {
            type: 'ReturnStatement',
            argument: {
              type: 'Identifier',
              name: 'x'
            }
          }
        ]
      }
    }
  ]
}

递归下降解析

解析流程

Program
  ↓
Statement
  ↓
Expression
  ↓
Term
  ↓
Factor

示例

javascript

// 解析: x = 1 + 2
parseExpression()
  ↓
parseMaybeAssign()
  ↓
parseMaybeConditional()
  ↓
parseMaybeBinary()
  ↓
parseMaybeBinaryOrLogical()
  ↓
parseLeftSide()
  ↓
parseIdentifier()

错误处理

错误报告

javascript

raise(pos, message) {
  const loc = this.getLineInfo(pos)
  const error = new SyntaxError(message)
  error.pos = pos
  error.loc = loc
  error.raisedAt = this.pos
  throw error
}

getLineInfo(pos) {
  let line = 1
  let column = 0
  
  for (let i = 0; i < pos; i++) {
    if (this.input[i] === '\n') {
      line++
      column = 0
    } else {
      column++
    }
  }
  
  return { line, column }
}

错误恢复

javascript

parseStatement() {
  try {
    return this.parseStatementInternal()
  } catch (error) {
    // 恢复到下一个语句
    this.skipToNextStatement()
    return this.parseEmptyStatement()
  }
}

性能优化

1. 最小化 AST

javascript

// 只存储必要的信息
{
  type: 'Identifier',
  name: 'x',
  start: 0,
  end: 1
}

2. 快速查找

javascript

// 使用对象查找关键字
const keywords = {
  'function': tokTypes._function,
  'var': tokTypes._var,
  'let': tokTypes._let,
  'const': tokTypes._const
}

3. 缓存标记

javascript

// 缓存标记
this.tokenCache = new Map()

function getToken(pos) {
  if (this.tokenCache.has(pos)) {
    return this.tokenCache.get(pos)
  }
  
  const token = this.readToken(pos)
  this.tokenCache.set(pos, token)
  return token
}

总结

Acorn 的架构体现了四个核心特点：递归下降使用经典的递归下降解析算法，代码结构清晰易于理解和维护；极小体积通过精心设计的代码结构实现了最小化的代码体积；高性能通过算法优化实现了卓越的解析速度；标准遵循严格遵循 ESTree 规范，确保输出的 AST 与其他工具兼容。理解 Acorn 的架构有助于更好地使用和优化 Acorn。

参考资源

Acorn 源码，ESTree 规范，递归下降解析。

扫描二维码关注"架构师AI杜"公众号，获取更多技术内容和最新动态

Acorn 解析器架构分析 ​

整体架构 ​

核心组件 ​

1. Tokenizer ​

2. Parser ​

3. AST ​

递归下降解析 ​

解析流程 ​

示例 ​

错误处理 ​

错误报告 ​

错误恢复 ​

性能优化 ​

1. 最小化 AST ​

2. 快速查找 ​

3. 缓存标记 ​

总结 ​

参考资源 ​

Acorn 解析器架构分析

整体架构

核心组件

1. Tokenizer

2. Parser

3. AST

递归下降解析

解析流程

示例

错误处理

错误报告

错误恢复

性能优化

1. 最小化 AST

2. 快速查找

3. 缓存标记

总结

参考资源