结果校验完整实现
from typing import Any, Dict, List, Optional, Callable
from dataclasses import dataclass, field
from enum import Enum
from datetime import datetime
import json
import hashlib
import re
class ValidationStatus(Enum):
"""校验状态"""
PASSED = "passed"
FAILED = "failed"
WARNING = "warning"
SKIPPED = "skipped"
class ValidationType(Enum):
"""校验类型"""
CORRECTNESS = "correctness" # 正确性
COMPLETENESS = "completeness" # 完整性
CONSISTENCY = "consistency" # 一致性
FORMAT = "format" # 格式
RANGE = "range" # 范围
BUSINESS = "business" # 业务规则
@dataclass
class ValidationResult:
"""校验结果"""
validation_id: str
validation_type: ValidationType
status: ValidationStatus
message: str
field: str = None
expected: Any = None
actual: Any = None
timestamp: datetime = field(default_factory=datetime.now)
severity: str = "info" # info, warning, error, critical
@dataclass
class ValidationReport:
"""校验报告"""
report_id: str
target: str
total_validations: int = 0
passed: int = 0
failed: int = 0
warnings: int = 0
skipped: int = 0
results: List[ValidationResult] = field(default_factory=list)
start_time: datetime = field(default_factory=datetime.now)
end_time: datetime = None
overall_status: ValidationStatus = ValidationStatus.PASSED
def add_result(self, result: ValidationResult):
"""添加校验结果"""
self.results.append(result)
self.total_validations += 1
if result.status == ValidationStatus.PASSED:
self.passed += 1
elif result.status == ValidationStatus.FAILED:
self.failed += 1
if result.severity == "critical":
self.overall_status = ValidationStatus.FAILED
elif result.status == ValidationStatus.WARNING:
self.warnings += 1
elif result.status == ValidationStatus.SKIPPED:
self.skipped += 1
def get_summary(self) -> Dict:
"""获取摘要"""
return {
"report_id": self.report_id,
"target": self.target,
"total": self.total_validations,
"passed": self.passed,
"failed": self.failed,
"warnings": self.warnings,
"skipped": self.skipped,
"pass_rate": f"{(self.passed / self.total_validations * 100):.2f}%" if self.total_validations > 0 else "N/A",
"overall_status": self.overall_status.value,
"duration": str(self.end_time - self.start_time) if self.end_time else None
}
class ResultValidator:
"""
结果校验器
核心功能:
1. 正确性验证
2. 完整性检查
3. 一致性校验
4. 格式验证
5. 范围检查
6. 业务规则校验
"""
def __init__(self):
self.validators: Dict[ValidationType, List[Callable]] = {
ValidationType.CORRECTNESS: [],
ValidationType.COMPLETENESS: [],
ValidationType.CONSISTENCY: [],
ValidationType.FORMAT: [],
ValidationType.RANGE: [],
ValidationType.BUSINESS: []
}
self.reports: Dict[str, ValidationReport] = {}
def register_validator(self, validation_type: ValidationType, validator: Callable):
"""注册校验器"""
self.validators[validation_type].append(validator)
def validate_correctness(
self,
actual: Any,
expected: Any,
field: str = "result",
tolerance: float = 0.0
) -> ValidationResult:
"""
正确性验证
Args:
actual: 实际值
expected: 期望值
field: 字段名
tolerance: 容差(用于浮点数比较)
Returns:
ValidationResult: 校验结果
"""
import uuid
# 浮点数比较
if isinstance(actual, (int, float)) and isinstance(expected, (int, float)):
passed = abs(actual - expected) <= tolerance
message = f"Value {actual} {'within' if passed else 'outside'} tolerance {tolerance} of expected {expected}"
# 字符串比较
elif isinstance(actual, str) and isinstance(expected, str):
passed = actual == expected
message = f"String {'matches' if passed else 'does not match'} expected"
# 字典比较
elif isinstance(actual, dict) and isinstance(expected, dict):
passed = actual == expected
message = f"Dictionary {'matches' if passed else 'does not match'} expected"
# 列表比较
elif isinstance(actual, list) and isinstance(expected, list):
passed = actual == expected
message = f"List {'matches' if passed else 'does not match'} expected"
else:
passed = actual == expected
message = f"Value {'matches' if passed else 'does not match'} expected"
return ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=ValidationType.CORRECTNESS,
status=ValidationStatus.PASSED if passed else ValidationStatus.FAILED,
message=message,
field=field,
expected=expected,
actual=actual,
severity="error" if not passed else "info"
)
def validate_completeness(
self,
data: Dict,
required_fields: List[str],
field: str = "data"
) -> ValidationResult:
"""
完整性检查
Args:
data: 数据字典
required_fields: 必填字段列表
field: 字段名
Returns:
ValidationResult: 校验结果
"""
import uuid
missing_fields = [f for f in required_fields if f not in data or data[f] is None]
passed = len(missing_fields) == 0
return ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=ValidationType.COMPLETENESS,
status=ValidationStatus.PASSED if passed else ValidationStatus.FAILED,
message=f"{'All required fields present' if passed else f'Missing fields: {missing_fields}'}",
field=field,
expected=required_fields,
actual=missing_fields,
severity="error" if not passed else "info"
)
def validate_consistency(
self,
data_sources: List[Dict],
key_fields: List[str],
field: str = "data"
) -> ValidationResult:
"""
一致性校验
Args:
data_sources: 多个数据源
key_fields: 关键字段列表
field: 字段名
Returns:
ValidationResult: 校验结果
"""
import uuid
if len(data_sources) < 2:
return ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=ValidationType.CONSISTENCY,
status=ValidationStatus.SKIPPED,
message="Need at least 2 data sources for consistency check",
field=field
)
inconsistencies = []
for key_field in key_fields:
values = [source.get(key_field) for source in data_sources]
if len(set(values)) > 1:
inconsistencies.append({
"field": key_field,
"values": values
})
passed = len(inconsistencies) == 0
return ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=ValidationType.CONSISTENCY,
status=ValidationStatus.PASSED if passed else ValidationStatus.FAILED,
message=f"{'All data sources consistent' if passed else f'Inconsistencies found: {inconsistencies}'}",
field=field,
expected="Consistent values",
actual=inconsistencies,
severity="error" if not passed else "info"
)
def validate_format(
self,
value: str,
pattern: str,
field: str = "value"
) -> ValidationResult:
"""
格式验证
Args:
value: 待验证值
pattern: 正则表达式模式
field: 字段名
Returns:
ValidationResult: 校验结果
"""
import uuid
passed = bool(re.match(pattern, value))
return ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=ValidationType.FORMAT,
status=ValidationStatus.PASSED if passed else ValidationStatus.FAILED,
message=f"Value {'matches' if passed else 'does not match'} pattern {pattern}",
field=field,
expected=pattern,
actual=value,
severity="warning" if not passed else "info"
)
def validate_range(
self,
value: Any,
min_value: Any = None,
max_value: Any = None,
field: str = "value"
) -> ValidationResult:
"""
范围检查
Args:
value: 待验证值
min_value: 最小值
max_value: 最大值
field: 字段名
Returns:
ValidationResult: 校验结果
"""
import uuid
passed = True
if min_value is not None and value < min_value:
passed = False
if max_value is not None and value > max_value:
passed = False
return ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=ValidationType.RANGE,
status=ValidationStatus.PASSED if passed else ValidationStatus.FAILED,
message=f"Value {value} {'within' if passed else 'outside'} range [{min_value}, {max_value}]",
field=field,
expected=f"[{min_value}, {max_value}]",
actual=value,
severity="error" if not passed else "info"
)
def run_validation(
self,
target: str,
data: Dict,
validation_rules: List[Dict]
) -> ValidationReport:
"""
执行校验
Args:
target: 校验目标
data: 待校验数据
validation_rules: 校验规则列表
Returns:
ValidationReport: 校验报告
"""
import uuid
report = ValidationReport(
report_id=str(uuid.uuid4()),
target=target
)
for rule in validation_rules:
validation_type = ValidationType(rule["type"])
field = rule.get("field", "data")
try:
if validation_type == ValidationType.CORRECTNESS:
result = self.validate_correctness(
actual=data.get(field),
expected=rule["expected"],
field=field,
tolerance=rule.get("tolerance", 0.0)
)
elif validation_type == ValidationType.COMPLETENESS:
result = self.validate_completeness(
data=data,
required_fields=rule["required_fields"],
field=field
)
elif validation_type == ValidationType.CONSISTENCY:
result = self.validate_consistency(
data_sources=rule["data_sources"],
key_fields=rule["key_fields"],
field=field
)
elif validation_type == ValidationType.FORMAT:
result = self.validate_format(
value=data.get(field, ""),
pattern=rule["pattern"],
field=field
)
elif validation_type == ValidationType.RANGE:
result = self.validate_range(
value=data.get(field),
min_value=rule.get("min_value"),
max_value=rule.get("max_value"),
field=field
)
else:
continue
report.add_result(result)
except Exception as e:
report.add_result(ValidationResult(
validation_id=str(uuid.uuid4()),
validation_type=validation_type,
status=ValidationStatus.FAILED,
message=f"Validation error: {str(e)}",
field=field,
severity="critical"
))
report.end_time = datetime.now()
self.reports[report.report_id] = report
return report
# 使用示例
if __name__ == "__main__":
# 创建校验器
validator = ResultValidator()
# 示例数据
user_data = {
"user_id": "U123456",
"name": "张三",
"email": "zhangsan@example.com",
"age": 25,
"balance": 1000.50
}
# 定义校验规则
validation_rules = [
{
"type": "completeness",
"field": "user",
"required_fields": ["user_id", "name", "email", "age"]
},
{
"type": "format",
"field": "email",
"pattern": r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
},
{
"type": "range",
"field": "age",
"min_value": 0,
"max_value": 150
},
{
"type": "correctness",
"field": "user_id",
"expected": "U123456"
}
]
print("=== 执行结果校验 ===")
report = validator.run_validation(
target="User Data Validation",
data=user_data,
validation_rules=validation_rules
)
# 输出报告
summary = report.get_summary()
print(f"\n校验报告:{summary['report_id'][:8]}...")
print(f"目标:{summary['target']}")
print(f"总校验数:{summary['total']}")
print(f"通过:{summary['passed']}")
print(f"失败:{summary['failed']}")
print(f"警告:{summary['warnings']}")
print(f"通过率:{summary['pass_rate']}")
print(f"整体状态:{summary['overall_status']}")
print("\n=== 详细结果 ===")
for result in report.results:
status_icon = "✓" if result.status == ValidationStatus.PASSED else "✗"
print(f"{status_icon} [{result.validation_type.value}] {result.message}")
if result.status == ValidationStatus.FAILED:
print(f" 期望:{result.expected}")
print(f" 实际:{result.actual}")
print("\n关键观察:")
print("1. 多维度校验:正确性、完整性、格式、范围")
print("2. 自动报告:生成详细校验报告,包含通过率")
print("3. 分级告警:info/warning/error/critical 四级严重性")
print("4. 可扩展:支持自定义校验规则")
print("5. 及时发现问题:执行后立即验证,避免错误扩散")