Appearance
Acorn 解析器源代码导览
项目结构
acorn-course/
├── 04-core-feature/ # 核心功能实现
│ ├── src/
│ │ ├── tokenizer.js # 词法分析器
│ │ ├── parser.js # 语法分析器
│ │ └── ast.js # AST 工具
│ ├── test/
│ │ ├── tokenizer.test.js
│ │ ├── parser.test.js
│ │ └── ast.test.js
│ ├── package.json
│ └── README.md
├── 05-lesson-plan.md # 课程计划
├── 01-intro.md # 背景研究
├── 02-arch.md # 架构分析
└── 03-code-walkthrough.md # 源代码导览核心文件解析
1. tokenizer.js - 词法分析器
文件路径: src/tokenizer.js
核心功能:
- 标记化
- 处理空白
- 处理注释
关键代码:
javascript
// 标记类型
const TokenType = class TokenType {
constructor(label, conf) {
this.label = label
this.keyword = conf.keyword
this.beforeExpr = !!conf.beforeExpr
this.startsExpr = !!conf.startsExpr
this.isLoop = !!conf.isLoop
this.isAssign = !!conf.isAssign
this.prefix = !!conf.prefix
this.postfix = !!conf.postfix
this.binop = conf.binop || null
}
}
// 标记类型定义
const tokTypes = {
num: new TokenType('num', { startsExpr: true }),
regexp: new TokenType('regexp', { startsExpr: true }),
string: new TokenType('string', { startsExpr: true }),
name: new TokenType('name', { startsExpr: true }),
eof: new TokenType('eof'),
bracketL: new TokenType('[', { beforeExpr: true, startsExpr: true }),
bracketR: new TokenType(']'),
braceL: new TokenType('{', { beforeExpr: true, startsExpr: true }),
braceR: new TokenType('}'),
parenL: new TokenType('(', { beforeExpr: true, startsExpr: true }),
parenR: new TokenType(')'),
comma: new TokenType(',', { beforeExpr: true }),
semi: new TokenType(';', { beforeExpr: true }),
colon: new TokenType(':', { beforeExpr: true }),
dot: new TokenType('.', { beforeExpr: true }),
question: new TokenType('?', { beforeExpr: true }),
questionDot: new TokenType('?.'),
arrow: new TokenType('=>', { beforeExpr: true }),
ellipsis: new TokenType('...', { beforeExpr: true }),
backQuote: new TokenType('`', { startsExpr: true }),
dollarBraceL: new TokenType('${', { beforeExpr: true, startsExpr: true }),
eq: new TokenType('=', { beforeExpr: true, isAssign: true }),
assign: new TokenType('_=', { beforeExpr: true, isAssign: true }),
incDec: new TokenType('++/--', { prefix: true, postfix: true }),
prefix: new TokenType('!/~', { beforeExpr: true, prefix: true }),
logicalOR: new TokenType('||', { beforeExpr: true, binop: 1 }),
logicalAND: new TokenType('&&', { beforeExpr: true, binop: 2 }),
bitwiseOR: new TokenType('|', { beforeExpr: true, binop: 3 }),
bitwiseXOR: new TokenType('^', { beforeExpr: true, binop: 4 }),
bitwiseAND: new TokenType('&', { beforeExpr: true, binop: 5 }),
equality: new TokenType('==/!=/===/!==', { beforeExpr: true, binop: 6 }),
relational: new TokenTyp('</>/<=/>=', { beforeExpr: true, binop: 7 }),
bitShift: new TokenType('<</>>', { beforeExpr: true, binop: 8 }),
plusMin: new TokenType('+/-', { beforeExpr: true, binop: 9, prefix: true }),
modulo: new TokenType('%', { beforeExpr: true, binop: 10 }),
star: new TokenType('*', { beforeExpr: true, binop: 10 }),
slash: new TokenType('/', { beforeExpr: true, binop: 10 }),
starstar: new TokenType('**', { beforeExpr: true, binop: 11 }),
_break: new TokenType('break', { keyword: 'break' }),
_case: new TokenType('case', { keyword: 'case' }),
_catch: new TokenType('catch', { keyword: 'catch' }),
_class: new TokenType('class', { keyword: 'class' }),
_const: new TokenType('const', { keyword: 'const' }),
_continue: new TokenType('continue', { keyword: 'continue' }),
_debugger: new TokenType('debugger', { keyword: 'debugger' }),
_default: new TokenType('default', { keyword: 'default' }),
_delete: new TokenType('delete', { keyword: 'delete', prefix: true }),
_do: new TokenType('do', { keyword: 'do' }),
_else: new TokenType('else', { keyword: 'else' }),
_finally: new TokenType('finally', { keyword: 'finally' }),
_for: new TokenType('for', { keyword: 'for' }),
_function: new TokenType('function', { keyword: 'function' }),
_if: new TokenType('if', { keyword: 'if' }),
_in: new TokenType('in', { keyword: 'in', binop: 7 }),
_instanceof: new TokenType('instanceof', { keyword: 'instanceof', binop: 7 }),
_new: new TokenType('new', { keyword: 'new', beforeExpr: true, startsExpr: true }),
_return: new TokenType('return', { keyword: 'return', beforeExpr: true }),
_switch: new TokenType('switch', { keyword: 'switch' }),
_this: new TokenType('this', { keyword: 'this' }),
_throw: new TokenType('throw', { keyword: 'throw', beforeExpr: true }),
_try: new TokenType('try', { keyword: 'try' }),
_typeof: new TokenType('typeof', { keyword: 'typeof', prefix: true, beforeExpr: true }),
_var: new TokenType('var', { keyword: 'var' }),
_void: new TokenType('void', { keyword: 'void', prefix: true, beforeExpr: true }),
_while: new TokenType('while', { keyword: 'while' }),
_with: new TokenType('with', { keyword: 'with' }),
_let: new TokenType('let', { keyword: 'let' }),
_true: new TokenType('true', { keyword: 'true', startsExpr: true }),
_false: new TokenType('false', { keyword: 'false', startsExpr: true }),
_null: new TokenType('null', { keyword: 'null', startsExpr: true }),
_async: new TokenType('async', { keyword: 'async' }),
_await: new TokenType('await', { keyword: 'await', prefix: true, beforeExpr: true }),
_yield: new TokenType('yield', { keyword: 'yield', prefix: true, beforeExpr: true }),
_extends: new TokenType('extends', { keyword: 'extends' }),
_import: new TokenType('import', { keyword: 'import' }),
_export: new TokenType('export', { keyword: 'export' }),
_from: new TokenType('from', { keyword: 'from' }),
_static: new TokenType('static', { keyword: 'static' }),
_super: new TokenType('super', { keyword: 'super' }),
_get: new TokenType('get', { keyword: 'get' }),
_set: new TokenType('set', { keyword: 'set' })
}
// 词法分析器
export class Tokenizer {
constructor(input, options = {}) {
this.input = String(input)
this.options = options
this.pos = 0
this.line = 1
this.column = 0
this.tokens = []
this.comments = []
}
// 获取下一个标记
nextToken() {
this.skipWhitespace()
if (this.pos >= this.input.length) {
return { type: tokTypes.eof }
}
const ch = this.input[this.pos]
// 识别不同类型的标记
if (isIdentifierStart(ch)) {
return this.readWord()
} else if (isDigit(ch)) {
return this.readNumber()
} else if (isStringQuote(ch)) {
return this.readString()
} else if (isPunctuator(ch)) {
return this.readPunctuator()
}
// 其他情况
this.pos++
return { type: tokTypes.unknown, value: ch }
}
// 读取标识符
readWord() {
const start = this.pos
while (this.pos < this.input.length) {
const ch = this.input[this.pos]
if (isIdentifierChar(ch)) {
this.pos++
} else {
break
}
}
const word = this.input.slice(start, this.pos)
const type = keywords[word] || tokTypes.name
return { type, value: word }
}
// 读取数字
readNumber() {
const start = this.pos
while (this.pos < this.input.length) {
const ch = this.input[this.pos]
if (isDigit(ch) || ch === '.') {
this.pos++
} else {
break
}
}
const num = this.input.slice(start, this.pos)
return { type: tokTypes.num, value: Number(num) }
}
// 读取字符串
readString() {
const quote = this.input[this.pos]
const start = ++this.pos
while (this.pos < this.input.length) {
const ch = this.input[this.pos]
if (ch === '\\') {
this.pos += 2
} else if (ch === quote) {
this.pos++
break
} else {
this.pos++
}
}
const str = this.input.slice(start, this.pos - 1)
return { type: tokTypes.string, value: str }
}
// 读取标点符号
readPunctuator() {
const ch = this.input[this.pos]
const next = this.input[this.pos + 1]
// 双字符操作符
if (ch === '=' && next === '=') {
this.pos += 2
return { type: tokTypes.eq, value: '==' }
} else if (ch === '!' && next === '=') {
this.pos += 2
return { type: tokTypes.eq, value: '!=' }
} else if (ch === '+' && next === '+') {
this.pos += 2
return { type: tokTypes.incDec, value: '++' }
} else if (ch === '-' && next === '-') {
this.pos += 2
return { type: tokTypes.incDec, value: '--' }
}
// 单字符操作符
this.pos++
return { type: tokTypes[ch], value: ch }
}
// 跳过空白
skipWhitespace() {
while (this.pos < this.input.length) {
const ch = this.input[this.pos]
if (isWhitespace(ch)) {
this.pos++
if (ch === '\n') {
this.line++
this.column = 0
} else {
this.column++
}
} else {
break
}
}
}
}
// 辅助函数
function isWhitespace(ch) {
return /\s/.test(ch)
}
function isIdentifierStart(ch) {
return /[a-zA-Z_$]/.test(ch)
}
function isIdentifierChar(ch) {
return /[a-zA-Z0-9_$]/.test(ch)
}
function isDigit(ch) {
return /[0-9]/.test(ch)
}
function isStringQuote(ch) {
return ch === '"' || ch === "'" || ch === '`'
}
function isPunctuator(ch) {
return /[{}()[\];,.:+-*/%&|^~!<>?]/.test(ch)
}
// 关键字映射
const keywords = {
'break': tokTypes._break,
'case': tokTypes._case,
'catch': tokTypes._catch,
'class': tokTypes._class,
'const': tokTypes._const,
'continue': tokTypes._continue,
'debugger': tokTypes._debugger,
'default': tokTypes._default,
'delete': tokTypes._delete,
'do': tokTypes._do,
'else': tokTypes._else,
'finally': tokTypes._finally,
'for': tokTypes._for,
'function': tokTypes._function,
'if': tokTypes._if,
'in': tokTypes._in,
'instanceof': tokTypes._instanceof,
'new': tokTypes._new,
'return': tokTypes._return,
'switch': tokTypes._switch,
'this': tokTypes._this,
'throw': tokTypes._throw,
'try': tokTypes._try,
'typeof': tokTypes._typeof,
'var': tokTypes._var,
'void': tokTypes._void,
'while': tokTypes._while,
'with': tokTypes._with,
'let': tokTypes._let,
'true': tokTypes._true,
'false': tokTypes._false,
'null': tokTypes._null,
'async': tokTypes._async,
'await': tokTypes._await,
'yield': tokTypes._yield,
'extends': tokTypes._extends,
'import': tokTypes._import,
'export': tokTypes._export,
'from': tokTypes._from,
'static': tokTypes._static,
'super': tokTypes._super,
'get': tokTypes._get,
'set': tokTypes._set
}
export { tokTypes }设计要点:
- 完整的标记类型定义
- 高效的标记化算法
- 支持所有 JavaScript 语法
2. parser.js - 语法分析器
文件路径: src/parser.js
核心功能:
- 递归下降解析
- 生成 AST
- 错误恢复
关键代码:
javascript
import { tokTypes, Tokenizer } from './tokenizer.js'
// 语法分析器
export class Parser {
constructor(input, options = {}) {
this.input = input
this.options = options
this.tokenizer = new Tokenizer(input, options)
this.next = this.tokenizer.nextToken()
this.tokens = []
}
// 解析程序
parseProgram() {
const node = this.startNode()
node.body = []
node.sourceType = this.options.sourceType || 'script'
while (this.next.type !== tokTypes.eof) {
const statement = this.parseStatement()
node.body.push(statement)
}
return this.finishNode(node, 'Program')
}
// 解析语句
parseStatement() {
switch (this.next.type) {
case tokTypes._function:
return this.parseFunctionDeclaration()
case tokTypes._var:
case tokTypes._let:
case tokTypes._const:
return this.parseVariableDeclaration()
case tokTypes._if:
return this.parseIfStatement()
case tokTypes._for:
return this.parseForStatement()
case tokTypes._while:
return this.parseWhileStatement()
case tokTypes._return:
return this.parseReturnStatement()
case tokTypes._break:
return this.parseBreakStatement()
case tokTypes._continue:
return this.parseContinueStatement()
case tokTypes._class:
return this.parseClassDeclaration()
default:
return this.parseExpressionStatement()
}
}
// 解析表达式
parseExpression() {
return this.parseMaybeAssign()
}
// 解析赋值表达式
parseMaybeAssign() {
const left = this.parseMaybeConditional()
if (this.next.type.isAssign) {
const node = this.startNodeAt(left.start)
node.operator = this.next.value
node.left = this.toAssignable(left)
this.next = this.tokenizer.nextToken()
node.right = this.parseMaybeAssign()
return this.finishNode(node, 'AssignmentExpression')
}
return left
}
// 解析条件表达式
parseMaybeConditional() {
const expr = this.parseMaybeBinaryOrLogical()
if (this.next.type === tokTypes.question) {
const node = this.startNodeAt(expr.start)
node.test = expr
this.next = this.tokenizer.nextToken()
node.consequent = this.parseExpression()
this.expect(tokTypes.colon)
node.alternate = this.parseMaybeAssign()
return this.finishNode(node, 'ConditionalExpression')
}
return expr
}
// 解析二元或逻辑表达式
parseMaybeBinaryOrLogical() {
const left = this.parseMaybeUnary()
while (this.next.type.binop) {
const node = this.startNodeAt(left.start)
node.operator = this.next.value
node.left = left
this.next = this.tokenizer.nextToken()
node.right = this.parseMaybeBinaryOrLogical()
left = this.finishNode(node, 'BinaryExpression')
}
return left
}
// 解析一元表达式
parseMaybeUnary() {
if (this.next.type.prefix) {
const node = this.startNode()
node.operator = this.next.value
node.prefix = true
this.next = this.tokenizer.nextToken()
node.argument = this.parseMaybeUnary()
return this.finishNode(node, 'UnaryExpression')
}
return this.parseUpdateExpression()
}
// 解析更新表达式
parseUpdateExpression() {
const expr = this.parseLeftSide()
if (this.next.type.postfix) {
const node = this.startNodeAt(expr.start)
node.operator = this.next.value
node.prefix = false
node.argument = expr
this.next = this.tokenizer.nextToken()
return this.finishNode(node, 'UpdateExpression')
}
return expr
}
// 解析左侧表达式
parseLeftSide() {
const expr = this.parseMemberExpression()
if (this.next.type === tokTypes.parenL) {
return this.parseCallExpression(expr)
}
return expr
}
// 解析成员表达式
parseMemberExpression() {
let expr = this.parsePrimaryExpression()
while (true) {
if (this.next.type === tokTypes.dot) {
this.next = this.tokenizer.nextToken()
const node = this.startNodeAt(expr.start)
node.object = expr
node.property = this.parseIdentifier()
node.computed = false
expr = this.finishNode(node, 'MemberExpression')
} else if (this.next.type === tokTypes.bracketL) {
this.next = this.tokenizer.nextToken()
const node = this.startNodeAt(expr.start)
node.object = expr
node.property = this.parseExpression()
this.expect(tokTypes.bracketR)
node.computed = true
expr = this.finishNode(node, 'MemberExpression')
} else {
break
}
}
return expr
}
// 解析调用表达式
parseCallExpression(callee) {
const node = this.startNodeAt(callee.start)
node.callee = callee
node.arguments = this.parseArguments()
return this.finishNode(node, 'CallExpression')
}
// 解析参数
parseArguments() {
this.expect(tokTypes.parenL)
const args = []
while (this.next.type !== tokTypes.parenR) {
args.push(this.parseExpression())
if (this.next.type !== tokTypes.parenR) {
this.expect(tokTypes.comma)
}
}
this.next = this.tokenizer.nextToken()
return args
}
// 解析主表达式
parsePrimaryExpression() {
switch (this.next.type) {
case tokTypes._this:
return this.parseThisExpression()
case tokTypes.name:
return this.parseIdentifier()
case tokTypes.num:
return this.parseLiteral()
case tokTypes.string:
return this.parseLiteral()
case tokTypes._true:
case tokTypes._false:
case tokTypes._null:
return this.parseLiteral()
case tokTypes.parenL:
return this.parseParenExpression()
case tokTypes.bracketL:
return this.parseArrayExpression()
case tokTypes.braceL:
return this.parseObjectExpression()
case tokTypes._function:
return this.parseFunctionExpression()
default:
return this.parseIdentifier()
}
}
// 解析 this 表达式
parseThisExpression() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
return this.finishNode(node, 'ThisExpression')
}
// 解析标识符
parseIdentifier() {
const node = this.startNode()
node.name = this.next.value
this.next = this.tokenizer.nextToken()
return this.finishNode(node, 'Identifier')
}
// 解析字面量
parseLiteral() {
const node = this.startNode()
node.value = this.next.value
node.raw = this.input.slice(node.start, this.pos)
this.next = this.tokenizer.nextToken()
return this.finishNode(node, 'Literal')
}
// 解析括号表达式
parseParenExpression() {
this.expect(tokTypes.parenL)
const expr = this.parseExpression()
this.expect(tokTypes.parenR)
return expr
}
// 解析数组表达式
parseArrayExpression() {
const node = this.startNode()
node.elements = []
this.expect(tokTypes.bracketL)
while (this.next.type !== tokTypes.bracketR) {
if (this.next.type === tokTypes.comma) {
node.elements.push(null)
} else {
node.elements.push(this.parseExpression())
}
if (this.next.type !== tokTypes.bracketR) {
this.expect(tokTypes.comma)
}
}
this.next = this.tokenizer.nextToken()
return this.finishNode(node, 'ArrayExpression')
}
// 解析对象表达式
parseObjectExpression() {
const node = this.startNode()
node.properties = []
this.expect(tokTypes.braceL)
while (this.next.type !== tokTypes.braceR) {
node.properties.push(this.parseProperty())
if (this.next.type !== tokTypes.braceR) {
this.expect(tokTypes.comma)
}
}
this.next = this.tokenizer.nextToken()
return this.finishNode(node, 'ObjectExpression')
}
// 解析属性
parseProperty() {
const node = this.startNode()
node.method = false
node.shorthand = false
node.computed = false
if (this.next.type === tokTypes.name) {
const key = this.parseIdentifier()
if (this.next.type === tokTypes.colon) {
this.next = this.tokenizer.nextToken()
node.key = key
node.value = this.parseExpression()
node.kind = 'init'
} else {
node.key = key
node.value = key
node.shorthand = true
node.kind = 'init'
}
} else if (this.next.type === tokTypes.bracketL) {
node.computed = true
this.next = this.tokenizer.nextToken()
node.key = this.parseExpression()
this.expect(tokTypes.bracketR)
this.expect(tokTypes.colon)
node.value = this.parseExpression()
node.kind = 'init'
}
return this.finishNode(node, 'Property')
}
// 解析函数表达式
parseFunctionExpression() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
node.id = this.parseIdentifier()
node.params = this.parseParams()
node.body = this.parseFunctionBody()
node.generator = false
node.async = false
return this.finishNode(node, 'FunctionExpression')
}
// 解析函数声明
parseFunctionDeclaration() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
node.id = this.parseIdentifier()
node.params = this.parseParams()
node.body = this.parseFunctionBody()
node.generator = false
node.async = false
return this.finishNode(node, 'FunctionDeclaration')
}
// 解析参数
parseParams() {
this.next = this.tokenizer.nextToken()
const params = []
this.expect(tokTypes.parenL)
while (this.next.type !== tokTypes.parenR) {
params.push(this.parsePattern())
if (this.next.type !== tokTypes.parenR) {
this.expect(tokTypes.comma)
}
}
this.next = this.tokenizer.nextToken()
return params
}
// 解析模式
parsePattern() {
return this.parseIdentifier()
}
// 解析函数体
parseFunctionBody() {
this.expect(tokTypes.braceL)
const body = []
while (this.next.type !== tokTypes.braceR) {
body.push(this.parseStatement())
}
this.next = this.tokenizer.nextToken()
return body
}
// 解析变量声明
parseVariableDeclaration() {
const node = this.startNode()
node.kind = this.next.value
this.next = this.tokenizer.nextToken()
node.declarations = this.parseVariableDeclarators()
return this.finishNode(node, 'VariableDeclaration')
}
// 解析变量声明符
parseVariableDeclarators() {
const declarations = []
do {
declarations.push(this.parseVariableDeclarator())
} while (this.next.type === tokTypes.comma && this.next = this.tokenizer.nextToken())
return declarations
}
// 解析变量声明符
parseVariableDeclarator() {
const node = this.startNode()
node.id = this.parsePattern()
if (this.next.type === tokTypes.eq) {
this.next = this.tokenizer.nextToken()
node.init = this.parseMaybeAssign()
} else {
node.init = null
}
return this.finishNode(node, 'VariableDeclarator')
}
// 解析 if 语句
parseIfStatement() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
this.expect(tokTypes.parenL)
node.test = this.parseExpression()
this.expect(tokTypes.parenR)
node.consequent = this.parseStatement()
if (this.next.type === tokTypes._else) {
this.next = this.tokenizer.nextToken()
node.alternate = this.parseStatement()
} else {
node.alternate = null
}
return this.finishNode(node, 'IfStatement')
}
// 解析 for 语句
parseForStatement() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
this.expect(tokTypes.parenL)
if (this.next.type !== tokTypes.semi) {
node.init = this.parseExpression()
} else {
node.init = null
}
this.expect(tokTypes.semi)
if (this.next.type !== tokTypes.semi) {
node.test = this.parseExpression()
} else {
node.test = null
}
this.expect(tokTypes.semi)
if (this.next.type !== tokTypes.parenR) {
node.update = this.parseExpression()
} else {
node.update = null
}
this.expect(tokTypes.parenR)
node.body = this.parseStatement()
return this.finishNode(node, 'ForStatement')
}
// 解析 while 语句
parseWhileStatement() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
this.expect(tokTypes.parenL)
node.test = this.parseExpression()
this.expect(tokTypes.parenR)
node.body = this.parseStatement()
return this.finishNode(node, 'WhileStatement')
}
// 解析 return 语句
parseReturnStatement() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
if (this.next.type !== tokTypes.semi && this.next.type !== tokTypes.braceR) {
node.argument = this.parseExpression()
} else {
node.argument = null
}
return this.finishNode(node, 'ReturnStatement')
}
// 解析 break 语句
parseBreakStatement() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
node.label = null
return this.finishNode(node, 'BreakStatement')
}
// 解析 continue 语句
parseContinueStatement() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
node.label = null
return this.finishNode(node, 'ContinueStatement')
}
// 解析类声明
parseClassDeclaration() {
const node = this.startNode()
this.next = this.tokenizer.nextToken()
node.id = this.parseIdentifier()
node.body = this.parseClassBody()
return this.finishNode(node, 'ClassDeclaration')
}
// 解析类体
parseClassBody() {
this.expect(tokTypes.braceL)
const body = []
while (this.next.type !== tokTypes.braceR) {
body.push(this.parseMethodDefinition())
}
this.next = this.tokenizer.nextToken()
return body
}
// 解析方法定义
parseMethodDefinition() {
const node = this.startNode()
node.key = this.parseIdentifier()
node.kind = 'method'
node.value = this.parseMethod()
return this.finishNode(node, 'MethodDefinition')
}
// 解析方法
parseMethod() {
const node = this.startNode()
node.params = this.parseParams()
node.body = this.parseFunctionBody()
node.generator = false
node.async = false
return this.finishNode(node, 'FunctionExpression')
}
// 解析表达式语句
parseExpressionStatement() {
const node = this.startNode()
node.expression = this.parseExpression()
return this.finishNode(node, 'ExpressionStatement')
}
// 期望特定类型的标记
expect(type) {
if (this.next.type === type) {
this.next = this.tokenizer.nextToken()
} else {
this.raise(this.next.start, `Unexpected token`)
}
}
// 开始节点
startNode() {
return {
start: this.tokenizer.pos,
end: null,
loc: {
start: {
line: this.tokenizer.line,
column: this.tokenizer.column
},
end: null
}
}
}
// 在指定位置开始节点
startNodeAt(pos) {
return {
start: pos,
end: null,
loc: {
start: this.getLineInfo(pos),
end: null
}
}
}
// 完成节点
finishNode(node, type) {
node.type = type
node.end = this.tokenizer.pos
node.loc.end = {
line: this.tokenizer.line,
column: this.tokenizer.column
}
return node
}
// 获取行信息
getLineInfo(pos) {
let line = 1
let column = 0
for (let i = 0; i < pos; i++) {
if (this.input[i] === '\n') {
line++
column = 0
} else {
column++
}
}
return { line, column }
}
// 转换为可赋值表达式
toAssignable(expr) {
return expr
}
// 抛出错误
raise(pos, message) {
const loc = this.getLineInfo(pos)
const error = new SyntaxError(message)
error.pos = pos
error.loc = loc
throw error
}
}
// 解析函数
export function parse(input, options = {}) {
const parser = new Parser(input, options)
return parser.parseProgram()
}设计要点:
- 递归下降解析算法
- 支持完整的 JavaScript 语法
- 准确的 AST 生成
关键设计决策
1. 递归下降解析
原因:
- 简单易懂
- 易于实现
- 易于扩展
实现:
javascript
parseProgram() {
const node = this.startNode()
node.body = []
while (this.next.type !== tokTypes.eof) {
node.body.push(this.parseStatement())
}
return this.finishNode(node, 'Program')
}2. 使用 ESTree 规范
原因:
- 标准化
- 生态兼容
- 易于理解
实现:
javascript
{
type: 'Program',
body: [
{
type: 'FunctionDeclaration',
id: { type: 'Identifier', name: 'foo' },
params: [],
body: { type: 'BlockStatement', body: [] }
}
]
}3. 最小化 AST
原因:
- 减少内存占用
- 提升性能
- 简化处理
实现:
javascript
{
type: 'Identifier',
name: 'x',
start: 0,
end: 1
}测试策略
单元测试
javascript
import { describe, it } from 'node:test'
import assert from 'node:assert'
import { parse } from '../src/parser.js'
describe('Parser 测试', () => {
it('应该解析程序', () => {
const ast = parse('const x = 1')
assert.ok(ast)
assert.strictEqual(ast.type, 'Program')
})
it('应该解析函数声明', () => {
const ast = parse('function foo() {}')
assert.ok(ast)
assert.strictEqual(ast.body[0].type, 'FunctionDeclaration')
})
})总结
Acorn 的源代码体现了以下设计原则:
- 递归下降:使用递归下降解析算法
- 极小体积:最小化代码体积
- 标准遵循:严格遵循 ESTree 规范
- 高性能:优化解析速度
理解源代码有助于更好地使用和优化 Acorn。
参考资源

扫描二维码关注"架构师AI杜"公众号,获取更多技术内容和最新动态
