Appearance
Node.js 集群模式面试题
1. 什么是集群模式?
问题:什么是 Node.js 的集群模式?为什么要使用它?
答案: 集群模式允许 Node.js 应用利用多核 CPU 的优势,通过创建多个工作进程来处理请求,从而提高应用的性能和可用性。
为什么使用集群模式:
- 利用多核 CPU:Node.js 是单线程的,单个进程只能使用一个 CPU 核心
- 提高吞吐量:多个进程可以同时处理请求
- 增强可用性:某个进程崩溃不会影响其他进程
- 零停机部署:可以逐个重启工作进程
工作原理:
┌─────────────────────────────────────┐
│ Master 进程 │
│ ┌───────────────────────────────┐ │
│ │ 负载均衡器 │ │
│ └───────────────┬───────────────┘ │
└──────────────────┼──────────────────┘
│
┌───────────┼───────────┐
│ │ │
┌──────▼───┐ ┌────▼────┐ ┌───▼────┐
│ Worker 1 │ │ Worker 2│ │ Worker 3│
│ (进程) │ │ (进程) │ │ (进程) │
└──────────┘ └─────────┘ └─────────┘2. cluster 模块的基本使用
问题:如何使用 cluster 模块创建集群?
答案:
基本示例:
javascript
const cluster = require('cluster');
const http = require('http');
const os = require('os');
const numCPUs = os.cpus().length;
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
// 创建工作进程
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
// 监听工作进程事件
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died`);
// 自动重启工作进程
cluster.fork();
});
} else {
// 工作进程创建 HTTP 服务器
http.createServer((req, res) => {
res.writeHead(200);
res.end(`Hello from Worker ${process.pid}\n`);
}).listen(8000);
console.log(`Worker ${process.pid} started`);
}Express 应用使用集群:
javascript
const cluster = require('cluster');
const os = require('os');
const express = require('express');
const numCPUs = os.cpus().length;
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
// 创建多个工作进程
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker) => {
console.log(`Worker ${worker.process.pid} died, restarting...`);
cluster.fork();
});
} else {
const app = express();
app.get('/', (req, res) => {
res.send(`Hello from Worker ${process.pid}`);
});
app.listen(3000, () => {
console.log(`Worker ${process.pid} started on port 3000`);
});
}3. 集群的负载均衡策略
问题:Node.js 集群使用什么负载均衡策略?
答案:
负载均衡策略:
- 轮询(Round Robin) - 默认策略:
javascript
// Master 进程负责分发连接
// 按顺序将连接分配给工作进程
const cluster = require('cluster');
if (cluster.isMaster) {
// 设置调度策略(默认就是 SCHED_RR)
cluster.schedulingPolicy = cluster.SCHED_RR;
for (let i = 0; i < 4; i++) {
cluster.fork();
}
}- 共享端口(Shared Socket):
javascript
// 所有工作进程共享同一个端口
// 操作系统负责负载均衡
const cluster = require('cluster');
const net = require('net');
if (cluster.isMaster) {
// 创建监听器
const server = net.createServer({ pauseOnConnect: true }, (connection) => {
// 获取工作进程
const worker = getWorker(); // 自定义选择策略
worker.send('connection', connection);
});
server.listen(8000);
} else {
process.on('message', (msg, connection) => {
if (msg === 'connection') {
// 处理连接
handleConnection(connection);
}
});
}自定义负载均衡:
javascript
const cluster = require('cluster');
if (cluster.isMaster) {
const workers = [];
// 创建工作进程
for (let i = 0; i < 4; i++) {
const worker = cluster.fork();
workers.push({
worker,
connections: 0,
cpu: 0
});
}
// 根据负载选择工作进程
function selectWorker() {
// 选择连接数最少的工作进程
return workers.reduce((min, current) =>
current.connections < min.connections ? current : min
).worker;
}
// 监听工作进程消息
Object.values(cluster.workers).forEach(worker => {
worker.on('message', (msg) => {
if (msg.type === 'stats') {
const w = workers.find(w => w.worker === worker);
if (w) {
w.connections = msg.connections;
w.cpu = msg.cpu;
}
}
});
});
}4. 工作进程间通信
问题:集群中的工作进程如何通信?
答案:
进程间通信(IPC):
javascript
const cluster = require('cluster');
if (cluster.isMaster) {
const worker = cluster.fork();
// Master 向 Worker 发送消息
worker.send({
type: 'config',
data: { port: 3000, env: 'production' }
});
// 接收 Worker 消息
worker.on('message', (msg) => {
console.log('Message from worker:', msg);
});
} else {
// Worker 接收消息
process.on('message', (msg) => {
if (msg.type === 'config') {
console.log('Received config:', msg.data);
}
});
// Worker 向 Master 发送消息
process.send({
type: 'status',
pid: process.pid,
memory: process.memoryUsage()
});
}共享数据:
javascript
const cluster = require('cluster');
// 使用 Redis 共享数据
const Redis = require('ioredis');
const redis = new Redis();
if (cluster.isMaster) {
// 创建工作进程
for (let i = 0; i < 4; i++) {
cluster.fork();
}
} else {
const express = require('express');
const app = express();
app.get('/counter', async (req, res) => {
// 使用 Redis 原子操作
const count = await redis.incr('counter');
res.json({ count, pid: process.pid });
});
app.listen(3000);
}5. 集群的优雅关闭
问题:如何实现集群的优雅关闭?
答案:
优雅关闭策略:
javascript
const cluster = require('cluster');
const http = require('http');
if (cluster.isMaster) {
// 创建工作进程
for (let i = 0; i < 4; i++) {
cluster.fork();
}
// 优雅关闭处理
function shutdown() {
console.log('Shutting down gracefully...');
// 停止接收新连接
for (const id in cluster.workers) {
cluster.workers[id].send('shutdown');
}
// 等待工作进程退出
let terminated = 0;
const totalWorkers = Object.keys(cluster.workers).length;
cluster.on('exit', () => {
terminated++;
if (terminated === totalWorkers) {
console.log('All workers terminated');
process.exit(0);
}
});
// 超时强制退出
setTimeout(() => {
console.error('Forced shutdown');
process.exit(1);
}, 30000);
}
process.on('SIGTERM', shutdown);
process.on('SIGINT', shutdown);
} else {
const server = http.createServer((req, res) => {
res.end('Hello');
});
server.listen(8000);
// 接收关闭信号
process.on('message', (msg) => {
if (msg === 'shutdown') {
console.log(`Worker ${process.pid} shutting down...`);
// 关闭服务器,停止接收新连接
server.close(() => {
console.log(`Worker ${process.pid} server closed`);
// 等待现有连接处理完成
setTimeout(() => {
process.exit(0);
}, 5000);
});
}
});
}6. 集群监控和管理
问题:如何监控和管理集群?
答案:
监控实现:
javascript
const cluster = require('cluster');
const os = require('os');
if (cluster.isMaster) {
const workers = new Map();
// 创建工作进程
for (let i = 0; i < os.cpus().length; i++) {
const worker = cluster.fork();
workers.set(worker.id, {
worker,
stats: {
requests: 0,
errors: 0,
startTime: Date.now()
}
});
}
// 定期收集统计信息
setInterval(() => {
workers.forEach((data, id) => {
data.worker.send({ type: 'getStats' });
});
}, 5000);
// 处理工作进程消息
cluster.on('message', (worker, msg) => {
if (msg.type === 'stats') {
const data = workers.get(worker.id);
if (data) {
data.stats = { ...data.stats, ...msg.data };
console.log(`Worker ${worker.id} stats:`, data.stats);
}
}
});
// 健康检查
setInterval(() => {
workers.forEach((data, id) => {
const uptime = Date.now() - data.stats.startTime;
const errorRate = data.stats.errors / data.stats.requests;
if (errorRate > 0.1) { // 错误率超过 10%
console.warn(`Worker ${id} error rate too high, restarting...`);
data.worker.kill('SIGTERM');
}
});
}, 30000);
// 自动重启
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.id} died`);
const newWorker = cluster.fork();
workers.set(newWorker.id, {
worker: newWorker,
stats: {
requests: 0,
errors: 0,
startTime: Date.now()
}
});
});
} else {
let stats = {
requests: 0,
errors: 0
};
const http = require('http');
const server = http.createServer((req, res) => {
stats.requests++;
try {
// 处理请求
res.end('Hello');
} catch (error) {
stats.errors++;
res.statusCode = 500;
res.end('Error');
}
});
server.listen(8000);
// 响应统计请求
process.on('message', (msg) => {
if (msg.type === 'getStats') {
process.send({
type: 'stats',
data: {
...stats,
memory: process.memoryUsage(),
uptime: process.uptime()
}
});
}
});
}7. 集群 vs PM2
问题:使用原生 cluster 模块和 PM2 有什么区别?
答案:
对比:
| 特性 | cluster 模块 | PM2 |
|---|---|---|
| 易用性 | 需要手动编写代码 | 命令行工具,开箱即用 |
| 功能 | 基本的进程管理 | 完整的进程管理、监控、日志 |
| 零停机部署 | 需要手动实现 | 内置支持 |
| 监控 | 需要自行实现 | 内置监控和 Web 界面 |
| 日志管理 | 需要自行实现 | 自动日志分割和管理 |
| 集群模式 | 需要编写代码 | pm2 start app.js -i max |
PM2 使用示例:
javascript
// ecosystem.config.js
module.exports = {
apps: [{
name: 'my-app',
script: './app.js',
instances: 'max', // 使用所有 CPU 核心
exec_mode: 'cluster',
env: {
NODE_ENV: 'development'
},
env_production: {
NODE_ENV: 'production'
},
max_memory_restart: '1G',
restart_delay: 3000,
max_restarts: 10,
min_uptime: '10s'
}]
};bash
# 启动
pm2 start ecosystem.config.js
# 生产环境
pm2 start ecosystem.config.js --env production
# 监控
pm2 monit
# 日志
pm2 logs
# 重启
pm2 reload all8. 集群模式的最佳实践
问题:使用集群模式有哪些最佳实践?
答案:
最佳实践:
- 状态外置:
javascript
// 不好:在工作进程中存储状态
let counter = 0;
app.get('/count', (req, res) => {
counter++;
res.json({ count: counter });
});
// 好:使用外部存储
const redis = require('redis');
const client = redis.createClient();
app.get('/count', async (req, res) => {
const count = await client.incr('counter');
res.json({ count });
});- 会话共享:
javascript
const session = require('express-session');
const RedisStore = require('connect-redis')(session);
app.use(session({
store: new RedisStore({ client: redisClient }),
secret: 'keyboard cat',
resave: false,
saveUninitialized: false
}));- 日志标记:
javascript
// 在日志中包含进程 ID
const winston = require('winston');
const logger = winston.createLogger({
format: winston.format.combine(
winston.format.label({ label: `PID:${process.pid}` }),
winston.format.timestamp(),
winston.format.json()
),
transports: [
new winston.transports.Console()
]
});
app.use((req, res, next) => {
logger.info(`${req.method} ${req.url}`);
next();
});- 优雅的错误处理:
javascript
// 捕获未处理的错误
process.on('uncaughtException', (err) => {
console.error('Uncaught Exception:', err);
// 优雅关闭
server.close(() => {
process.exit(1);
});
});
process.on('unhandledRejection', (reason, promise) => {
console.error('Unhandled Rejection at:', promise, 'reason:', reason);
});- 健康检查端点:
javascript
app.get('/health', (req, res) => {
res.json({
status: 'ok',
pid: process.pid,
uptime: process.uptime(),
memory: process.memoryUsage(),
timestamp: Date.now()
});
});9. 集群的常见问题
问题:使用集群模式时常见的问题有哪些?如何解决?
答案:
常见问题:
- 端口占用:
javascript
// 错误:所有进程绑定到同一端口会冲突
app.listen(3000); // 错误!
// 正确:使用 cluster 模块的共享端口
if (cluster.isMaster) {
// Master 不绑定端口
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
} else {
// Worker 共享端口
app.listen(3000); // 正确!
}- 内存泄漏:
javascript
// 监控内存使用
if (cluster.isWorker) {
setInterval(() => {
const usage = process.memoryUsage();
if (usage.heapUsed > 500 * 1024 * 1024) { // 超过 500MB
console.warn('Memory usage high, exiting...');
process.exit(1); // Master 会重启工作进程
}
}, 60000);
}- 数据库连接过多:
javascript
// 使用连接池限制连接数
const pool = mysql.createPool({
connectionLimit: 10, // 限制每个进程的连接数
host: 'localhost',
user: 'root',
password: 'password',
database: 'mydb'
});- 文件描述符耗尽:
bash
# 增加文件描述符限制
ulimit -n 65535
# 或在 systemd 服务中配置
[Service]
LimitNOFILE=6553510. 现代替代方案
问题:除了 cluster 模块,还有哪些现代的集群方案?
答案:
现代方案:
- Worker Threads(适合 CPU 密集型):
javascript
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');
if (isMainThread) {
// 主线程
const worker = new Worker(__filename, {
workerData: { start: 1, end: 1e9 }
});
worker.on('message', result => {
console.log('Result:', result);
});
} else {
// Worker 线程
const { start, end } = workerData;
let sum = 0;
for (let i = start; i <= end; i++) {
sum += i;
}
parentPort.postMessage(sum);
}- 容器化部署(Docker + Kubernetes):
yaml
# deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nodejs-app
spec:
replicas: 4 # 运行 4 个实例
selector:
matchLabels:
app: nodejs-app
template:
metadata:
labels:
app: nodejs-app
spec:
containers:
- name: nodejs
image: myapp:latest
resources:
limits:
memory: "512Mi"
cpu: "500m"- Serverless(AWS Lambda、Vercel):
javascript
// 无需管理集群,平台自动扩展
exports.handler = async (event, context) => {
return {
statusCode: 200,
body: JSON.stringify({ message: 'Hello' })
};
};- 反向代理(Nginx):
nginx
upstream nodejs_backend {
least_conn; # 最少连接数负载均衡
server 127.0.0.1:3000;
server 127.0.0.1:3001;
server 127.0.0.1:3002;
server 127.0.0.1:3003;
}
server {
listen 80;
location / {
proxy_pass http://nodejs_backend;
proxy_http_version 1.1;
}
}