run_mock_parity_diff.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import json
  4. import os
  5. import subprocess
  6. import sys
  7. import tempfile
  8. from collections import defaultdict
  9. from pathlib import Path
  10. def load_manifest(path: Path) -> list[dict]:
  11. return json.loads(path.read_text())
  12. def load_parity_text(path: Path) -> str:
  13. return path.read_text()
  14. def ensure_refs_exist(manifest: list[dict], parity_text: str) -> list[tuple[str, str]]:
  15. missing: list[tuple[str, str]] = []
  16. for entry in manifest:
  17. for ref in entry.get("parity_refs", []):
  18. if ref not in parity_text:
  19. missing.append((entry["name"], ref))
  20. return missing
  21. def run_harness(rust_root: Path) -> dict:
  22. with tempfile.TemporaryDirectory(prefix="mock-parity-report-") as temp_dir:
  23. report_path = Path(temp_dir) / "report.json"
  24. env = os.environ.copy()
  25. env["MOCK_PARITY_REPORT_PATH"] = str(report_path)
  26. subprocess.run(
  27. [
  28. "cargo",
  29. "test",
  30. "-p",
  31. "rusty-claude-cli",
  32. "--test",
  33. "mock_parity_harness",
  34. "--",
  35. "--nocapture",
  36. ],
  37. cwd=rust_root,
  38. check=True,
  39. env=env,
  40. )
  41. return json.loads(report_path.read_text())
  42. def main() -> int:
  43. script_path = Path(__file__).resolve()
  44. rust_root = script_path.parent.parent
  45. repo_root = rust_root.parent
  46. manifest = load_manifest(rust_root / "mock_parity_scenarios.json")
  47. parity_text = load_parity_text(repo_root / "PARITY.md")
  48. missing_refs = ensure_refs_exist(manifest, parity_text)
  49. if missing_refs:
  50. print("Missing PARITY.md references:", file=sys.stderr)
  51. for scenario_name, ref in missing_refs:
  52. print(f" - {scenario_name}: {ref}", file=sys.stderr)
  53. return 1
  54. should_run = "--no-run" not in sys.argv[1:]
  55. report = run_harness(rust_root) if should_run else None
  56. report_by_name = {
  57. entry["name"]: entry for entry in report.get("scenarios", [])
  58. } if report else {}
  59. print("Mock parity diff checklist")
  60. print(f"Repo root: {repo_root}")
  61. print(f"Scenario manifest: {rust_root / 'mock_parity_scenarios.json'}")
  62. print(f"PARITY source: {repo_root / 'PARITY.md'}")
  63. print()
  64. for entry in manifest:
  65. scenario_name = entry["name"]
  66. scenario_report = report_by_name.get(scenario_name)
  67. status = "PASS" if scenario_report else ("MAPPED" if not should_run else "MISSING")
  68. print(f"[{status}] {scenario_name} ({entry['category']})")
  69. print(f" description: {entry['description']}")
  70. print(f" parity refs: {' | '.join(entry['parity_refs'])}")
  71. if scenario_report:
  72. print(
  73. " result: iterations={iterations} requests={requests} tool_uses={tool_uses} tool_errors={tool_errors}".format(
  74. iterations=scenario_report["iterations"],
  75. requests=scenario_report["request_count"],
  76. tool_uses=", ".join(scenario_report["tool_uses"]) or "none",
  77. tool_errors=scenario_report["tool_error_count"],
  78. )
  79. )
  80. print(f" final: {scenario_report['final_message']}")
  81. print()
  82. coverage = defaultdict(list)
  83. for entry in manifest:
  84. for ref in entry["parity_refs"]:
  85. coverage[ref].append(entry["name"])
  86. print("PARITY coverage map")
  87. for ref, scenarios in coverage.items():
  88. print(f"- {ref}")
  89. print(f" scenarios: {', '.join(scenarios)}")
  90. if report and report.get("scenarios"):
  91. first = report["scenarios"][0]
  92. print()
  93. print("First scenario result")
  94. print(f"- name: {first['name']}")
  95. print(f"- iterations: {first['iterations']}")
  96. print(f"- requests: {first['request_count']}")
  97. print(f"- tool_uses: {', '.join(first['tool_uses']) or 'none'}")
  98. print(f"- tool_errors: {first['tool_error_count']}")
  99. print(f"- final_message: {first['final_message']}")
  100. print()
  101. print(
  102. "Harness summary: {scenario_count} scenarios, {request_count} requests".format(
  103. scenario_count=report["scenario_count"],
  104. request_count=report["request_count"],
  105. )
  106. )
  107. return 0
  108. if __name__ == "__main__":
  109. raise SystemExit(main())