Skip to content

第78天:个人助理Agent-需求分析与架构设计

学习目标

  • 掌握Agent需求分析
  • 学习Agent架构设计
  • 理解任务规划机制
  • 掌握工具调用设计
  • 学习记忆系统设计

需求分析

用户需求

python
class AgentUserRequirements:
    def __init__(self):
        self.requirements = {
            "productivity": {
                "description": "生产力需求",
                "needs": [
                    "任务管理",
                    "日程安排",
                    "信息检索",
                    "文档处理",
                    "邮件管理"
                ]
            },
            "personalization": {
                "description": "个性化需求",
                "needs": [
                    "学习用户偏好",
                    "适应工作习惯",
                    "提供个性化建议",
                    "记住重要信息",
                    "理解上下文"
                ]
            },
            "autonomy": {
                "description": "自主性需求",
                "needs": [
                    "自主规划任务",
                    "自动执行操作",
                    "主动提供建议",
                    "自我优化",
                    "处理异常情况"
                ]
            },
            "integration": {
                "description": "集成需求",
                "needs": [
                    "集成常用工具",
                    "跨平台同步",
                    "API接口",
                    "第三方服务",
                    "本地应用"
                ]
            }
        }
    
    def analyze_requirements(self) -> Dict:
        analysis = {}
        
        for category, category_info in self.requirements.items():
            analysis[category] = {
                "description": category_info["description"],
                "needs": category_info["needs"],
                "priority": self._calculate_priority(category_info["needs"])
            }
        
        return analysis
    
    def _calculate_priority(self, needs: List[str]) -> str:
        high_priority_keywords = [
            "任务管理",
            "日程安排",
            "自主规划",
            "自动执行",
            "集成常用工具"
        ]
        
        high_count = sum(
            1 for need in needs
            if any(keyword in need for keyword in high_priority_keywords)
        )
        
        if high_count >= 3:
            return "高"
        elif high_count >= 2:
            return "中"
        else:
            return "低"

功能需求

python
class AgentFunctionalRequirements:
    def __init__(self):
        self.modules = {
            "perception": {
                "description": "感知模块",
                "features": [
                    "自然语言理解",
                    "意图识别",
                    "实体提取",
                    "上下文理解",
                    "多模态输入"
                ]
            },
            "planning": {
                "description": "规划模块",
                "features": [
                    "任务分解",
                    "步骤规划",
                    "资源分配",
                    "时间安排",
                    "优先级排序"
                ]
            },
            "execution": {
                "description": "执行模块",
                "features": [
                    "工具调用",
                    "任务执行",
                    "状态监控",
                    "错误处理",
                    "结果验证"
                ]
            },
            "memory": {
                "description": "记忆模块",
                "features": [
                    "短期记忆",
                    "长期记忆",
                    "知识库",
                    "经验学习",
                    "记忆检索"
                ]
            },
            "learning": {
                "description": "学习模块",
                "features": [
                    "用户偏好学习",
                    "模式识别",
                    "反馈学习",
                    "自我优化",
                    "知识更新"
                ]
            }
        }
    
    def get_functional_spec(self) -> Dict:
        spec = {}
        
        for module_name, module_info in self.modules.items():
            spec[module_name] = {
                "description": module_info["description"],
                "features": [
                    {
                        "name": feature,
                        "description": self._describe_feature(feature),
                        "priority": self._determine_priority(feature)
                    }
                    for feature in module_info["features"]
                ]
            }
        
        return spec
    
    def _describe_feature(self, feature: str) -> str:
        descriptions = {
            "自然语言理解": "理解用户的自然语言输入",
            "意图识别": "识别用户的意图和目标",
            "实体提取": "提取关键实体和信息",
            "上下文理解": "理解对话上下文",
            "多模态输入": "支持文本、语音、图像等多种输入",
            "任务分解": "将复杂任务分解为子任务",
            "步骤规划": "规划任务执行的步骤",
            "资源分配": "分配执行任务所需的资源",
            "时间安排": "安排任务执行的时间",
            "优先级排序": "根据重要性排序任务",
            "工具调用": "调用外部工具完成任务",
            "任务执行": "执行具体的任务操作",
            "状态监控": "监控任务执行状态",
            "错误处理": "处理执行过程中的错误",
            "结果验证": "验证任务执行结果",
            "短期记忆": "存储临时信息",
            "长期记忆": "存储重要信息和知识",
            "知识库": "存储和管理知识",
            "经验学习": "从执行经验中学习",
            "记忆检索": "检索相关的记忆信息",
            "用户偏好学习": "学习用户的偏好和习惯",
            "模式识别": "识别用户行为模式",
            "反馈学习": "从用户反馈中学习",
            "自我优化": "优化自身行为和策略",
            "知识更新": "更新和扩充知识库"
        }
        
        return descriptions.get(feature, feature)
    
    def _determine_priority(self, feature: str) -> str:
        high_priority_features = [
            "自然语言理解",
            "意图识别",
            "任务分解",
            "工具调用",
            "任务执行",
            "短期记忆",
            "长期记忆",
            "用户偏好学习"
        ]
        
        if feature in high_priority_features:
            return "高"
        else:
            return "中"

Agent架构设计

整体架构

python
class AgentArchitecture:
    def __init__(self):
        self.layers = {
            "interface": {
                "name": "接口层",
                "components": [
                    "自然语言接口",
                    "语音接口",
                    "图像接口",
                    "API接口",
                    "Web界面"
                ]
            },
            "perception": {
                "name": "感知层",
                "components": [
                    "NLU引擎",
                    "意图识别器",
                    "实体提取器",
                    "上下文理解器",
                    "多模态处理器"
                ]
            },
            "planning": {
                "name": "规划层",
                "components": [
                    "任务分解器",
                    "规划器",
                    "调度器",
                    "优化器",
                    "决策器"
                ]
            },
            "execution": {
                "name": "执行层",
                "components": [
                    "工具管理器",
                    "执行器",
                    "监控器",
                    "错误处理器",
                    "验证器"
                ]
            },
            "memory": {
                "name": "记忆层",
                "components": [
                    "短期记忆",
                    "长期记忆",
                    "知识库",
                    "经验库",
                    "记忆管理器"
                ]
            },
            "learning": {
                "name": "学习层",
                "components": [
                    "偏好学习器",
                    "模式识别器",
                    "反馈处理器",
                    "优化器",
                    "知识更新器"
                ]
            }
        }
    
    def get_architecture(self) -> Dict:
        return self.layers
    
    def get_data_flow(self) -> List[Dict]:
        return [
            {
                "from": "interface",
                "to": "perception",
                "description": "用户输入"
            },
            {
                "from": "perception",
                "to": "planning",
                "description": "理解结果"
            },
            {
                "from": "planning",
                "to": "execution",
                "description": "执行计划"
            },
            {
                "from": "execution",
                "to": "memory",
                "description": "存储执行结果"
            },
            {
                "from": "memory",
                "to": "learning",
                "description": "提供学习数据"
            },
            {
                "from": "learning",
                "to": "memory",
                "description": "更新记忆"
            },
            {
                "from": "memory",
                "to": "planning",
                "description": "提供上下文"
            }
        ]

核心组件

python
class AgentCoreComponents:
    def __init__(self):
        self.components = {
            "llm_engine": {
                "name": "大语言模型引擎",
                "description": "核心推理引擎,负责理解和生成",
                "capabilities": [
                    "自然语言理解",
                    "任务推理",
                    "决策生成",
                    "文本生成"
                ],
                "models": [
                    "GPT-4o",
                    "Claude-3.5-Sonnet",
                    "Gemini-1.5-Pro"
                ]
            },
            "tool_registry": {
                "name": "工具注册表",
                "description": "管理和注册所有可用工具",
                "capabilities": [
                    "工具注册",
                    "工具发现",
                    "工具调用",
                    "工具监控"
                ],
                "tools": [
                    "搜索工具",
                    "日历工具",
                    "邮件工具",
                    "文件工具",
                    "API工具"
                ]
            },
            "memory_system": {
                "name": "记忆系统",
                "description": "存储和管理Agent的记忆",
                "capabilities": [
                    "记忆存储",
                    "记忆检索",
                    "记忆更新",
                    "记忆清理"
                ],
                "types": [
                    "短期记忆",
                    "长期记忆",
                    "知识库",
                    "经验库"
                ]
            },
            "planning_engine": {
                "name": "规划引擎",
                "description": "规划和分解任务",
                "capabilities": [
                    "任务分解",
                    "步骤规划",
                    "资源分配",
                    "时间安排"
                ],
                "algorithms": [
                    "层次规划",
                    "前向搜索",
                    "回溯算法",
                    "启发式搜索"
                ]
            },
            "learning_engine": {
                "name": "学习引擎",
                "description": "学习和优化Agent行为",
                "capabilities": [
                    "偏好学习",
                    "模式识别",
                    "反馈学习",
                    "自我优化"
                ],
                "methods": [
                    "强化学习",
                    "监督学习",
                    "无监督学习",
                    "迁移学习"
                ]
            }
        }
    
    def get_components(self) -> Dict:
        return self.components

任务规划机制

任务分解

python
class TaskDecomposer:
    def __init__(self, llm_client):
        self.llm_client = llm_client
    
    async def decompose_task(
        self,
        task: str,
        context: Optional[Dict] = None
    ) -> List[Dict]:
        prompt = f"""请将以下任务分解为可执行的子任务:

任务:{task}

上下文:
{context if context else "无"}

请以JSON格式返回子任务列表,每个子任务包含:
- id: 子任务ID
- name: 子任务名称
- description: 子任务描述
- dependencies: 依赖的子任务ID列表
- estimated_time: 预估时间(分钟)
- priority: 优先级(high/medium/low)"""
        
        try:
            completion = self.llm_client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": "你是一个专业的任务分解器"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                response_format={"type": "json_object"}
            )
            
            result = completion.choices[0].message.content
            
            import json
            return json.loads(result).get("subtasks", [])
        
        except Exception as e:
            return []
    
    def build_task_graph(
        self,
        subtasks: List[Dict]
    ) -> Dict:
        graph = {
            "nodes": [],
            "edges": []
        }
        
        for subtask in subtasks:
            graph["nodes"].append({
                "id": subtask["id"],
                "name": subtask["name"],
                "description": subtask["description"],
                "estimated_time": subtask["estimated_time"],
                "priority": subtask["priority"]
            })
            
            for dep_id in subtask.get("dependencies", []):
                graph["edges"].append({
                    "from": dep_id,
                    "to": subtask["id"]
                })
        
        return graph

任务规划

python
class TaskPlanner:
    def __init__(self, llm_client):
        self.llm_client = llm_client
    
    async def plan_execution(
        self,
        task_graph: Dict,
        constraints: Optional[Dict] = None
    ) -> List[Dict]:
        prompt = f"""请规划以下任务的执行顺序:

任务图:
{self._format_task_graph(task_graph)}

约束条件:
{constraints if constraints else "无"}

请以JSON格式返回执行计划,包含:
- sequence: 执行顺序(子任务ID列表)
- total_time: 总预估时间
- critical_path: 关键路径"""
        
        try:
            completion = self.llm_client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": "你是一个专业的任务规划器"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                response_format={"type": "json_object"}
            )
            
            result = completion.choices[0].message.content
            
            import json
            return json.loads(result)
        
        except Exception as e:
            return {
                "sequence": [node["id"] for node in task_graph["nodes"]],
                "total_time": 0,
                "critical_path": []
            }
    
    def _format_task_graph(self, task_graph: Dict) -> str:
        nodes_str = "\n".join([
            f"- {node['id']}: {node['name']} ({node['estimated_time']}分钟)"
            for node in task_graph["nodes"]
        ])
        
        edges_str = "\n".join([
            f"- {edge['from']} -> {edge['to']}"
            for edge in task_graph["edges"]
        ])
        
        return f"节点:\n{nodes_str}\n\n依赖关系:\n{edges_str}"

工具调用设计

MCP工具集成

python
class MCPToolManager:
    def __init__(self):
        self.tools = {}
        self.tool_schemas = {}
    
    def register_tool(
        self,
        name: str,
        schema: Dict,
        handler: callable
    ):
        self.tools[name] = handler
        self.tool_schemas[name] = schema
    
    async def call_tool(
        self,
        name: str,
        parameters: Dict
    ) -> Dict:
        if name not in self.tools:
            raise ValueError(f"工具不存在: {name}")
        
        schema = self.tool_schemas[name]
        
        self._validate_parameters(parameters, schema)
        
        handler = self.tools[name]
        
        try:
            result = await handler(parameters)
            
            return {
                "success": True,
                "result": result
            }
        
        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }
    
    def _validate_parameters(
        self,
        parameters: Dict,
        schema: Dict
    ):
        from pydantic import BaseModel, ValidationError
        
        try:
            model = type(
                "ToolParameters",
                (BaseModel,),
                {
                    "__annotations__": {
                        key: self._get_type(value)
                        for key, value in schema["properties"].items()
                    }
                }
            )
            
            model(**parameters)
        
        except ValidationError as e:
            raise ValueError(f"参数验证失败: {e}")
    
    def _get_type(self, property_schema: Dict) -> type:
        type_mapping = {
            "string": str,
            "number": float,
            "integer": int,
            "boolean": bool,
            "array": list,
            "object": dict
        }
        
        return type_mapping.get(
            property_schema.get("type", "string"),
            str
        )
    
    def get_tool_schemas(self) -> Dict:
        return self.tool_schemas
    
    def discover_tools(self, prompt: str) -> List[str]:
        relevant_tools = []
        
        for tool_name, schema in self.tool_schemas.items():
            description = schema.get("description", "")
            
            if self._is_relevant(prompt, description):
                relevant_tools.append(tool_name)
        
        return relevant_tools
    
    def _is_relevant(self, prompt: str, description: str) -> bool:
        keywords = prompt.lower().split()
        description_lower = description.lower()
        
        return any(
            keyword in description_lower
            for keyword in keywords
        )

常用工具实现

python
class CommonTools:
    @staticmethod
    async def search_web(parameters: Dict) -> Dict:
        query = parameters["query"]
        
        import requests
        
        try:
            response = requests.get(
                f"https://api.duckduckgo.com/",
                params={"q": query}
            )
            
            return {
                "query": query,
                "results": response.json().get("RelatedTopics", [])
            }
        
        except Exception as e:
            raise Exception(f"搜索失败: {str(e)}")
    
    @staticmethod
    async def create_calendar_event(parameters: Dict) -> Dict:
        title = parameters["title"]
        start_time = parameters["start_time"]
        end_time = parameters.get("end_time")
        description = parameters.get("description", "")
        
        return {
            "event_id": f"evt_{hash(title)}",
            "title": title,
            "start_time": start_time,
            "end_time": end_time,
            "description": description,
            "status": "created"
        }
    
    @staticmethod
    async def send_email(parameters: Dict) -> Dict:
        to = parameters["to"]
        subject = parameters["subject"]
        body = parameters["body"]
        
        return {
            "email_id": f"email_{hash(subject)}",
            "to": to,
            "subject": subject,
            "status": "sent"
        }
    
    @staticmethod
    async def read_file(parameters: Dict) -> Dict:
        file_path = parameters["file_path"]
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            return {
                "file_path": file_path,
                "content": content,
                "status": "success"
            }
        
        except Exception as e:
            raise Exception(f"读取文件失败: {str(e)}")
    
    @staticmethod
    async def write_file(parameters: Dict) -> Dict:
        file_path = parameters["file_path"]
        content = parameters["content"]
        
        try:
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(content)
            
            return {
                "file_path": file_path,
                "status": "success"
            }
        
        except Exception as e:
            raise Exception(f"写入文件失败: {str(e)}")

记忆系统设计

记忆存储

python
class MemoryStorage:
    def __init__(self):
        self.short_term_memory = []
        self.long_term_memory = {}
        self.knowledge_base = {}
        self.experience_base = []
    
    def add_short_term(
        self,
        content: str,
        metadata: Optional[Dict] = None
    ):
        memory = {
            "id": f"stm_{len(self.short_term_memory)}",
            "content": content,
            "metadata": metadata or {},
            "timestamp": datetime.now().isoformat()
        }
        
        self.short_term_memory.append(memory)
        
        if len(self.short_term_memory) > 100:
            self.short_term_memory.pop(0)
        
        return memory["id"]
    
    def add_long_term(
        self,
        key: str,
        content: str,
        metadata: Optional[Dict] = None
    ):
        memory = {
            "content": content,
            "metadata": metadata or {},
            "created_at": datetime.now().isoformat(),
            "accessed_at": datetime.now().isoformat()
        }
        
        self.long_term_memory[key] = memory
    
    def add_knowledge(
        self,
        key: str,
        knowledge: str,
        confidence: float = 1.0
    ):
        self.knowledge_base[key] = {
            "knowledge": knowledge,
            "confidence": confidence,
            "created_at": datetime.now().isoformat()
        }
    
    def add_experience(
        self,
        experience: Dict
    ):
        experience["timestamp"] = datetime.now().isoformat()
        self.experience_base.append(experience)
    
    def retrieve_short_term(
        self,
        limit: int = 10
    ) -> List[Dict]:
        return self.short_term_memory[-limit:]
    
    def retrieve_long_term(
        self,
        key: str
    ) -> Optional[Dict]:
        if key in self.long_term_memory:
            memory = self.long_term_memory[key]
            memory["accessed_at"] = datetime.now().isoformat()
            return memory
        
        return None
    
    def retrieve_knowledge(
        self,
        query: str
    ) -> List[Dict]:
        results = []
        
        for key, knowledge in self.knowledge_base.items():
            if query.lower() in key.lower():
                results.append({
                    "key": key,
                    **knowledge
                })
        
        return results
    
    def retrieve_experience(
        self,
        query: str,
        limit: int = 5
    ) -> List[Dict]:
        results = []
        
        for exp in reversed(self.experience_base):
            if query.lower() in str(exp).lower():
                results.append(exp)
            
            if len(results) >= limit:
                break
        
        return results

记忆检索

python
class MemoryRetriever:
    def __init__(self, storage: MemoryStorage, llm_client):
        self.storage = storage
        self.llm_client = llm_client
    
    async def retrieve_relevant_memories(
        self,
        query: str,
        context: Optional[Dict] = None
    ) -> Dict:
        short_term = self.storage.retrieve_short_term()
        knowledge = self.storage.retrieve_knowledge(query)
        experience = self.storage.retrieve_experience(query)
        
        relevant_memories = {
            "short_term": await self._filter_relevant(
                query,
                short_term
            ),
            "knowledge": knowledge,
            "experience": experience
        }
        
        return relevant_memories
    
    async def _filter_relevant(
        self,
        query: str,
        memories: List[Dict]
    ) -> List[Dict]:
        if not memories:
            return []
        
        prompt = f"""请从以下记忆中筛选出与查询相关的记忆:

查询:{query}

记忆:
{self._format_memories(memories)}

请返回相关记忆的ID列表,用逗号分隔。"""
        
        try:
            completion = self.llm_client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": "你是一个专业的记忆检索器"},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=200
            )
            
            result = completion.choices[0].message.content
            
            relevant_ids = [
                id.strip()
                for id in result.split(',')
            ]
            
            return [
                memory
                for memory in memories
                if memory["id"] in relevant_ids
            ]
        
        except Exception as e:
            return memories
    
    def _format_memories(self, memories: List[Dict]) -> str:
        return "\n".join([
            f"- {mem['id']}: {mem['content']}"
            for mem in memories
        ])

实践练习

练习1:分析Agent需求

python
def analyze_agent_requirements():
    user_req = AgentUserRequirements()
    func_req = AgentFunctionalRequirements()
    
    user_analysis = user_req.analyze_requirements()
    func_spec = func_req.get_functional_spec()
    
    return user_analysis, func_spec

练习2:设计Agent架构

python
def design_agent_architecture():
    arch = AgentArchitecture()
    components = AgentCoreComponents()
    
    architecture = arch.get_architecture()
    core_components = components.get_components()
    
    return architecture, core_components

练习3:实现记忆系统

python
def implement_memory_system():
    storage = MemoryStorage()
    
    return storage

总结

本节我们学习了个人助理Agent的需求分析与架构设计:

  1. Agent需求分析
  2. Agent架构设计
  3. 任务规划机制
  4. 工具调用设计
  5. 记忆系统设计

Agent是AI应用的高级形式,需要综合运用多种技术。

参考资源