ToT Beam Search 推理器完整实现
import openai
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
import json
import math
@dataclass
class ThoughtNode:
"""思维节点"""
id: int
content: str
depth: int
parent_id: Optional[int]
value: float # 启发式评分 (0-1)
path_value: float # 路径累积评分
is_complete: bool = False
class ToTBeamSearch:
"""
ToT Beam Search 推理器
集束搜索策略:每步保留 top-k 路径
"""
def __init__(self, model: str = "gpt-4",
beam_width: int = 3,
max_depth: int = 5,
temperature: float = 0.7):
"""
初始化
Args:
model: LLM 模型
beam_width: 集束宽度 (保留的候选数)
max_depth: 最大深度
temperature: 生成温度
"""
self.model = model
self.beam_width = beam_width
self.max_depth = max_depth
self.temperature = temperature
self.nodes: Dict[int, ThoughtNode] = {}
self.current_id = 0
def _call_llm(self, prompt: str, temperature: float = None) -> str:
"""调用 LLM"""
if temperature is None:
temperature = self.temperature
response = openai.ChatCompletion.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=temperature
)
return response.choices[0].message.content
def generate_candidates(self, current_thought: str, problem: str,
context: str) -> List[str]:
"""生成候选思维(每步生成多个候选)"""
prompt = f"""
问题:{problem}
当前思维:{current_thought}
上下文:{context}
请生成{self.beam_width * 2}个可能的下一步思考(生成更多以便筛选)。
每个思考应该:
1. 是当前思考的自然延续
2. 推进问题解决
3. 简洁明了(1-2 句话)
输出格式(JSON 数组):
["下一步思考 1", "下一步思考 2", "下一步思考 3", ...]
"""
response_text = self._call_llm(prompt, temperature=0.7)
candidates = json.loads(response_text)
return candidates
def evaluate_candidate(self, candidate: str, problem: str,
context: str) -> float:
"""评估候选思维质量(返回 0-1 评分)"""
prompt = f"""
问题:{problem}
上下文:{context}
候选思维:{candidate}
请评估这个候选思维的质量(0-1 分):
1. 是否有助于解决问题?
2. 是否有逻辑错误?
3. 是否接近解决方案?
只输出一个数字(0.0-1.0):
"""
response_text = self._call_llm(prompt, temperature=0.3)
# 提取数字
try:
score = float(response_text.strip())
return max(0.0, min(1.0, score)) # 限制在 0-1 范围
except:
return 0.5 # 默认值
def search(self, problem: str) -> Optional[str]:
"""
使用 Beam Search 解决问题
Args:
problem: 问题描述
Returns:
solution: 解决方案或 None
"""
print(f"开始 Beam Search:{problem[:50]}...")
print("="*70 + "\n")
print(f"集束宽度:{self.beam_width}")
print(f"最大深度:{self.max_depth}\n")
# 生成初始候选
initial_prompt = f"""
问题:{problem}
请生成{self.beam_width * 2}个初始思考方向。
每个思考方向应该:
1. 代表一种可能的解决策略
2. 简洁明了(1-2 句话)
3. 彼此不同
输出格式(JSON 数组):
["思考方向 1", "思考方向 2", ...]
"""
response_text = self._call_llm(initial_prompt, temperature=0.7)
initial_candidates = json.loads(response_text)
# 评估初始候选
beam = [] # (path_value, node_id, path)
for i, candidate in enumerate(initial_candidates):
score = self.evaluate_candidate(candidate, problem, "")
node_id = self.current_id
self.nodes[node_id] = ThoughtNode(
id=node_id,
content=candidate,
depth=1,
parent_id=None,
value=score,
path_value=score,
is_complete=False
)
self.current_id += 1
beam.append((score, node_id, [node_id]))
# 按路径评分排序,保留 top-k
beam.sort(key=lambda x: x[0], reverse=True)
beam = beam[:self.beam_width]
print(f"初始候选:{len(initial_candidates)} 个")
print(f"保留 top-{self.beam_width}: {[self.nodes[nid].content[:30] for _, nid, _ in beam]}")
print()
best_solution = None
best_score = 0.0
# Beam Search 主循环
for depth in range(1, self.max_depth):
print(f"深度 {depth}/{self.max_depth}")
print("-"*50)
next_beam = []
# 对 beam 中每个路径进行扩展
for path_score, node_id, path in beam:
current_node = self.nodes[node_id]
context = self._get_context(path)
print(f" 扩展路径:{current_node.content[:40]}...")
print(f" 路径评分:{path_score:.3f}")
# 生成候选
candidates = self.generate_candidates(
current_node.content, problem, context
)
# 评估并添加到 next_beam
for candidate in candidates:
score = self.evaluate_candidate(candidate, problem, context)
new_path_score = path_score * 0.9 + score * 0.1 # 加权平均
new_node_id = self.current_id
self.nodes[new_node_id] = ThoughtNode(
id=new_node_id,
content=candidate,
depth=depth + 1,
parent_id=node_id,
value=score,
path_value=new_path_score,
is_complete=(depth + 1 >= self.max_depth)
)
self.current_id += 1
next_beam.append((new_path_score, new_node_id, path + [new_node_id]))
# 检查是否完成
if score > 0.95:
if new_path_score > best_score:
best_solution = self._get_context(path + [new_node_id])
best_score = new_path_score
print(f" ✓ 找到高质量解:{score:.3f}")
# 排序并保留 top-k
next_beam.sort(key=lambda x: x[0], reverse=True)
beam = next_beam[:self.beam_width]
print(f" 保留 top-{self.beam_width} 路径")
print(f" 最佳路径评分:{beam[0][0]:.3f}\n" if beam else " 无可用路径\n")
if not beam:
break
print("="*70)
print(f"\n搜索完成,共创建 {len(self.nodes)} 个节点")
print(f"最佳解决方案评分:{best_score:.3f}")
return best_solution
def _get_context(self, path: List[int]) -> str:
"""获取路径上下文"""
context_parts = []
for node_id in path:
context_parts.append(self.nodes[node_id].content)
return "\n".join(context_parts)
def get_statistics(self) -> Dict:
"""获取搜索统计"""
total = len(self.nodes)
avg_value = sum(n.value for n in self.nodes.values()) / total if total > 0 else 0
max_depth = max(n.depth for n in self.nodes.values()) if total > 0 else 0
return {
"total_nodes": total,
"avg_value": avg_value,
"max_depth": max_depth,
"beam_width": self.beam_width
}
# 使用示例
if __name__ == "__main__":
# 初始化推理器
reasoner = ToTBeamSearch(
beam_width=3,
max_depth=4
)
# 示例问题:创意写作
problem = """
写一篇微型小说,主题是"时间旅行者的遗憾"。
要求:
1. 有完整的故事结构(开头、发展、高潮、结尾)
2. 情感真挚
3. 长度适中(200-300 字)
"""
print("ToT Beam Search 示例:创意写作")
print("="*70 + "\n")
print(f"任务:{problem.strip()}\n")
# 求解
solution = reasoner.search(problem)
if solution:
print("\n找到的解决方案:")
print("-"*70)
print(solution)
print("-"*70)
else:
print("\n未找到满意解决方案")
# 统计
stats = reasoner.get_statistics()
print("\n搜索统计:")
print(f" 总节点数:{stats['total_nodes']}")
print(f" 平均评分:{stats['avg_value']:.3f}")
print(f" 最大深度:{stats['max_depth']}")
print(f" 集束宽度:{stats['beam_width']}")
print("\n" + "="*70)
print("\n关键观察:")
print("1. Beam Search 平衡探索与利用(beam_width 控制)")
print("2. 计算效率显著优于穷举搜索")
print("3. 启发式评估确保质量")
print("4. 适用于创意写作、推理、规划等任务")
print("5. beam_width 是关键超参数(太小易丢失最优解,太大计算成本高)")