数字人 Agent 与交互体验设计完整实现
import time
import json
import hashlib
import secrets
from typing import Dict, List, Any, Optional, Tuple, Set
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
import numpy as np
from collections import deque, defaultdict
import statistics
import threading
import asyncio
from abc import ABC, abstractmethod
import uuid
import math
class EmotionType(Enum):
"""情感类型"""
NEUTRAL = "neutral" # 中性
HAPPY = "happy" # 快乐
SAD = "sad" # 悲伤
ANGRY = "angry" # 愤怒
SURPRISED = "surprised" # 惊讶
FEARFUL = "fearful" # 恐惧
DISGUSTED = "disgusted" # 厌恶
class InteractionMode(Enum):
"""交互模式"""
VOICE = "voice" # 语音
TEXT = "text" # 文本
GESTURE = "gesture" # 手势
EYE_TRACKING = "eye_tracking" # 眼动
MULTIMODAL = "multimodal" # 多模态
class PersonalityTrait(Enum):
"""人格特质"""
OPENNESS = "openness" # 开放性
CONSCIENTIOUSNESS = "conscientiousness" # 尽责性
EXTRAVERSION = "extraversion" # 外向性
AGREEABLENESS = "agreeableness" # 宜人性
NEUROTICISM = "neuroticism" # 神经质
@dataclass
class FacialExpression:
"""面部表情"""
expression_id: str
emotion: EmotionType
intensity: float # 0.0 - 1.0
blend_shapes: Dict[str, float] # 混合形状权重
duration_ms: int
transition_ms: int
@dataclass
class BodyGesture:
"""身体手势"""
gesture_id: str
name: str
joint_positions: Dict[str, Tuple[float, float, float]]
duration_ms: int
context: str
@dataclass
class VoiceProfile:
"""语音特征"""
profile_id: str
pitch: float # 音高
speed: float # 语速
volume: float # 音量
tone: str # 音色
language: str
accent: str
@dataclass
class DigitalHumanProfile:
"""数字人档案"""
human_id: str
name: str
age: int
gender: str
appearance_model: str
personality_traits: Dict[PersonalityTrait, float]
voice_profile: VoiceProfile
default_emotion: EmotionType
knowledge_domains: List[str]
interaction_modes: List[InteractionMode]
@dataclass
class ConversationContext:
"""对话上下文"""
context_id: str
user_id: str
digital_human_id: str
conversation_history: List[Dict[str, Any]]
current_emotion: EmotionType
user_emotion: Optional[EmotionType]
interaction_mode: InteractionMode
started_at: datetime
updated_at: datetime
class EmotionEngine:
"""
情感引擎
支持:
1. 情感识别
2. 情感生成
3. 情感转换
4. 情感表达
"""
def __init__(self):
self.emotion_states: Dict[str, EmotionType] = {}
self.emotion_intensity: Dict[str, float] = {}
self.emotion_history: Dict[str, deque] = defaultdict(lambda: deque(maxlen=50))
self.lock = threading.Lock()
# 情感转换矩阵
self.transition_matrix = self._init_transition_matrix()
def _init_transition_matrix(self) -> Dict[EmotionType, Dict[EmotionType, float]]:
"""初始化情感转换矩阵"""
return {
EmotionType.NEUTRAL: {
EmotionType.NEUTRAL: 0.5,
EmotionType.HAPPY: 0.2,
EmotionType.SAD: 0.1,
EmotionType.ANGRY: 0.05,
EmotionType.SURPRISED: 0.1,
EmotionType.FEARFUL: 0.03,
EmotionType.DISGUSTED: 0.02
},
EmotionType.HAPPY: {
EmotionType.HAPPY: 0.6,
EmotionType.NEUTRAL: 0.2,
EmotionType.SURPRISED: 0.15,
EmotionType.SAD: 0.03,
EmotionType.ANGRY: 0.01,
EmotionType.FEARFUL: 0.005,
EmotionType.DISGUSTED: 0.005
},
# ... 其他情感转换概率
}
def recognize_emotion(self, text: str, voice_features: Dict[str, float] = None) -> EmotionType:
"""识别情感(基于文本和语音特征)"""
# 简化版情感识别
text_lower = text.lower()
# 关键词匹配
happy_keywords = ['开心', '高兴', '快乐', '好', '棒', 'excellent', 'great', 'happy']
sad_keywords = ['难过', '伤心', '悲伤', '不好', 'sad', 'unhappy']
angry_keywords = ['生气', '愤怒', '讨厌', 'angry', 'mad']
happy_score = sum(1 for word in happy_keywords if word in text_lower)
sad_score = sum(1 for word in sad_keywords if word in text_lower)
angry_score = sum(1 for word in angry_keywords if word in text_lower)
max_score = max(happy_score, sad_score, angry_score)
if max_score == 0:
return EmotionType.NEUTRAL
elif happy_score == max_score:
return EmotionType.HAPPY
elif sad_score == max_score:
return EmotionType.SAD
elif angry_score == max_score:
return EmotionType.ANGRY
return EmotionType.NEUTRAL
def generate_emotion(self, context: str, personality: Dict[PersonalityTrait, float]) -> EmotionType:
"""生成情感(基于上下文和人格)"""
# 基于人格特质生成情感倾向
extraversion = personality.get(PersonalityTrait.EXTRAVERSION, 0.5)
neuroticism = personality.get(PersonalityTrait.NEUROTICISM, 0.5)
# 外向者更容易表现积极情感
if extraversion > 0.7:
return EmotionType.HAPPY
# 神经质高者更容易表现消极情感
elif neuroticism > 0.7:
return EmotionType.SAD
return EmotionType.NEUTRAL
def transition_emotion(self, current: EmotionType, stimulus: str) -> EmotionType:
"""情感转换"""
# 基于刺激和转换矩阵决定新情感
# 简化实现
if '惊喜' in stimulus or 'surprise' in stimulus.lower():
return EmotionType.SURPRISED
elif '害怕' in stimulus or 'fear' in stimulus.lower():
return EmotionType.FEARFUL
# 默认保持当前情感
return current
def generate_expression(self, emotion: EmotionType, intensity: float) -> FacialExpression:
"""生成面部表情"""
expression_id = f"expr_{emotion.value}_{int(time.time())}"
# 根据情感生成混合形状权重
blend_shapes = self._emotion_to_blend_shapes(emotion, intensity)
duration_ms = 2000 + int(intensity * 3000)
transition_ms = 200 + int(intensity * 300)
return FacialExpression(
expression_id=expression_id,
emotion=emotion,
intensity=intensity,
blend_shapes=blend_shapes,
duration_ms=duration_ms,
transition_ms=transition_ms
)
def _emotion_to_blend_shapes(self, emotion: EmotionType, intensity: float) -> Dict[str, float]:
"""情感转换为混合形状"""
base_shapes = {
"mouth_smile": 0.0,
"mouth_frown": 0.0,
"eyebrow_up": 0.0,
"eyebrow_down": 0.0,
"eye_wide": 0.0,
"eye_narrow": 0.0,
"cheek_raise": 0.0,
"nose_wrinkle": 0.0
}
if emotion == EmotionType.HAPPY:
base_shapes["mouth_smile"] = intensity
base_shapes["cheek_raise"] = intensity * 0.7
base_shapes["eye_narrow"] = intensity * 0.5
elif emotion == EmotionType.SAD:
base_shapes["mouth_frown"] = intensity
base_shapes["eyebrow_up"] = intensity * 0.8
base_shapes["eye_narrow"] = intensity * 0.3
elif emotion == EmotionType.ANGRY:
base_shapes["eyebrow_down"] = intensity
base_shapes["eye_narrow"] = intensity
base_shapes["nose_wrinkle"] = intensity * 0.5
elif emotion == EmotionType.SURPRISED:
base_shapes["eyebrow_up"] = intensity
base_shapes["eye_wide"] = intensity
base_shapes["mouth_smile"] = intensity * 0.3
return base_shapes
class ConversationManager:
"""
对话管理器
支持:
1. 对话上下文管理
2. 智能回复生成
3. 多轮对话跟踪
4. 个性化响应
"""
def __init__(self, digital_human_profile: DigitalHumanProfile):
self.profile = digital_human_profile
self.conversations: Dict[str, ConversationContext] = {}
self.response_templates: Dict[str, List[str]] = self._load_response_templates()
self.lock = threading.Lock()
def _load_response_templates(self) -> Dict[str, List[str]]:
"""加载回复模板"""
return {
"greeting": [
"你好!我是{ name},很高兴见到你!",
"嗨!有什么我可以帮助你的吗?",
"欢迎来到这里!我是{ name}。"
],
"farewell": [
"再见!期待下次与你交流!",
"祝你有美好的一天!",
"有任何问题随时找我!"
],
"acknowledgment": [
"我明白了。",
"好的,请继续说。",
"嗯,我在听。"
],
"empathy": [
"我能理解你的感受。",
"这确实不容易。",
"我在这里支持你。"
]
}
def start_conversation(self, user_id: str, initial_message: str) -> str:
"""开始对话"""
context_id = str(uuid.uuid4())
context = ConversationContext(
context_id=context_id,
user_id=user_id,
digital_human_id=self.profile.human_id,
conversation_history=[{
"role": "user",
"content": initial_message,
"timestamp": datetime.now().isoformat()
}],
current_emotion=self.profile.default_emotion,
user_emotion=None,
interaction_mode=InteractionMode.TEXT,
started_at=datetime.now(),
updated_at=datetime.now()
)
with self.lock:
self.conversations[context_id] = context
return context_id
def process_message(self, context_id: str, user_message: str,
emotion: Optional[EmotionType] = None) -> Dict[str, Any]:
"""处理用户消息"""
if context_id not in self.conversations:
return {"error": "Conversation not found"}
context = self.conversations[context_id]
# 更新对话历史
context.conversation_history.append({
"role": "user",
"content": user_message,
"timestamp": datetime.now().isoformat(),
"emotion": emotion.value if emotion else None
})
# 识别用户情感
if emotion is None:
emotion = EmotionType.NEUTRAL
context.user_emotion = emotion
context.updated_at = datetime.now()
# 生成回复
response = self._generate_response(context, user_message, emotion)
# 更新数字人情感
context.current_emotion = response.get("emotion", EmotionType.NEUTRAL)
# 添加回复到历史
context.conversation_history.append({
"role": "assistant",
"content": response["text"],
"timestamp": datetime.now().isoformat(),
"emotion": response["emotion"].value
})
return response
def _generate_response(self, context: ConversationContext,
user_message: str, user_emotion: EmotionType) -> Dict[str, Any]:
"""生成回复"""
# 基于人格和上下文生成个性化回复
personality = self.profile.personality_traits
# 简单回复逻辑
text_lower = user_message.lower()
if any(word in text_lower for word in ['你好', 'hi', 'hello', '嗨']):
template = np.random.choice(self.response_templates["greeting"])
response_text = template.format(name=self.profile.name)
response_emotion = EmotionType.HAPPY
elif any(word in text_lower for word in ['再见', 'bye', 'goodbye']):
template = np.random.choice(self.response_templates["farewell"])
response_text = template.format(name=self.profile.name)
response_emotion = EmotionType.NEUTRAL
elif user_emotion == EmotionType.SAD:
template = np.random.choice(self.response_templates["empathy"])
response_text = template
response_emotion = EmotionType.SAD
else:
template = np.random.choice(self.response_templates["acknowledgment"])
response_text = template
response_emotion = EmotionType.NEUTRAL
# 生成面部表情
emotion_engine = EmotionEngine()
expression = emotion_engine.generate_expression(response_emotion, 0.7)
return {
"text": response_text,
"emotion": response_emotion,
"expression": expression,
"voice_profile": self.profile.voice_profile,
"response_time_ms": np.random.randint(500, 1500)
}
def get_conversation_summary(self, context_id: str) -> Dict[str, Any]:
"""获取对话摘要"""
if context_id not in self.conversations:
return {"error": "Conversation not found"}
context = self.conversations[context_id]
return {
"context_id": context_id,
"user_id": context.user_id,
"message_count": len(context.conversation_history),
"current_emotion": context.current_emotion.value,
"user_emotion": context.user_emotion.value if context.user_emotion else None,
"duration_minutes": (datetime.now() - context.started_at).total_seconds() / 60,
"interaction_mode": context.interaction_mode.value
}
# 使用示例
if __name__ == "__main__":
print("=== 数字人 Agent 与交互体验设计 ===\n")
print("=== 创建数字人档案 ===")
# 创建语音特征
voice_profile = VoiceProfile(
profile_id="voice_001",
pitch=1.0,
speed=1.0,
volume=0.8,
tone="warm",
language="zh-CN",
accent="standard"
)
# 创建数字人
digital_human = DigitalHumanProfile(
human_id="dh_001",
name="小智",
age=25,
gender="female",
appearance_model="realistic_young_female",
personality_traits={
PersonalityTrait.OPENNESS: 0.8,
PersonalityTrait.CONSCIENTIOUSNESS: 0.7,
PersonalityTrait.EXTRAVERSION: 0.9,
PersonalityTrait.AGREEABLENESS: 0.85,
PersonalityTrait.NEUROTICISM: 0.3
},
voice_profile=voice_profile,
default_emotion=EmotionType.NEUTRAL,
knowledge_domains=["customer_service", "technology", "lifestyle"],
interaction_modes=[InteractionMode.VOICE, InteractionMode.TEXT, InteractionMode.GESTURE]
)
print(f"数字人:{digital_human.name}")
print(f"年龄:{digital_human.age}")
print(f"人格特质:")
for trait, value in digital_human.personality_traits.items():
print(f" - {trait.value}: {value:.2f}")
print(f"交互模式:{[mode.value for mode in digital_human.interaction_modes]}")
print(f"\n=== 创建对话管理器 ===")
conv_manager = ConversationManager(digital_human)
print(f"\n=== 开始对话 ===")
# 开始对话
context_id = conv_manager.start_conversation(
user_id="user_123",
initial_message="你好!"
)
print(f"对话 ID: {context_id}")
print(f"\n=== 模拟对话 ===")
# 对话 1
response1 = conv_manager.process_message(context_id, "你好!", EmotionType.HAPPY)
print(f"用户:你好!")
print(f"数字人:{response1['text']}")
print(f"情感:{response1['emotion'].value}")
print(f"表情:{response1['expression'].expression_id}")
print(f"响应时间:{response1['response_time_ms']}ms")
# 对话 2
response2 = conv_manager.process_message(context_id, "我今天心情不太好。", EmotionType.SAD)
print(f"\n用户:我今天心情不太好。")
print(f"数字人:{response2['text']}")
print(f"情感:{response2['emotion'].value}")
print(f"响应时间:{response2['response_time_ms']}ms")
# 对话 3
response3 = conv_manager.process_message(context_id, "谢谢你的关心!", EmotionType.HAPPY)
print(f"\n用户:谢谢你的关心!")
print(f"数字人:{response3['text']}")
print(f"情感:{response3['emotion'].value}")
print(f"响应时间:{response3['response_time_ms']}ms")
# 对话 4
response4 = conv_manager.process_message(context_id, "再见!", EmotionType.NEUTRAL)
print(f"\n用户:再见!")
print(f"数字人:{response4['text']}")
print(f"情感:{response4['emotion'].value}")
print(f"响应时间:{response4['response_time_ms']}ms")
print(f"\n=== 对话摘要 ===")
summary = conv_manager.get_conversation_summary(context_id)
print(f"对话 ID: {summary['context_id']}")
print(f"消息数:{summary['message_count']}")
print(f"持续时间:{summary['duration_minutes']:.2f}分钟")
print(f"当前情感:{summary['current_emotion']}")
print(f"用户情感:{summary['user_emotion']}")
print(f"交互模式:{summary['interaction_mode']}")
print(f"\n关键观察:")
print("1. 数字人建模:3D 建模、动作捕捉、表情合成、实时渲染")
print("2. Agent 智能:人格设定、智能对话、情感计算、行为决策")
print("3. 多模交互:语音、手势、眼动、情感反馈")
print("4. 体验优化:UX 设计、可用性测试、用户研究、持续迭代")
print("5. 数字生命:建模 + 智能 + 交互 + 体验 = 可信赖")
print("\n数字生命的使命:让交互更自然、更智能、更有温度")