ghibili.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import os
  2. import json
  3. import requests
  4. def collect_chinese_titles(root_dir):
  5. """Traverse the directory tree and collect all unique Chinese movie titles (directory names)."""
  6. titles = set()
  7. for dirpath, dirnames, _ in os.walk(root_dir):
  8. for dirname in dirnames:
  9. titles.add(dirname)
  10. return list(titles)
  11. def match_english_title(chinese_title):
  12. """
  13. Match the Chinese title to an English title.
  14. This can use a hard-coded mapping or call DeepSeek API for translation.
  15. """
  16. # Example hard-coded mapping (expand as needed)
  17. hardcoded = {
  18. "2013-辉夜姬物语": "The Tale of the Princess Kaguya",
  19. "2004-哈尔移动城堡": "Howl's Moving Castle",
  20. "1995-侧耳倾听": "Whisper of the Heart",
  21. "1992-飞天红猪侠": "Porco Rosso",
  22. "1988-萤火虫之墓": "Grave of the Fireflies",
  23. "1989-魔女宅急便": "Kiki's Delivery Service",
  24. "1994-百变狸猫": "Pom Poko",
  25. "1984-风之谷": "Nausicaä of the Valley of the Wind",
  26. "1979-鲁邦三世 卡里奥斯特罗城": "Lupin III: The Castle of Cagliostro",
  27. "2011-虞美人盛开的山坡": "From Up on Poppy Hill",
  28. "2008-《悬崖上的金鱼姬》《崖上的波妞》": "Ponyo",
  29. "2006-地海战记": "Tales from Earthsea",
  30. "2010 借东西的小矮人亚莉亚蒂": "The Secret World of Arrietty",
  31. "2002-猫的报恩": "The Cat Returns",
  32. "1999-我的邻居山田君": "My Neighbors the Yamadas",
  33. "1988-龙猫": "My Neighbor Totoro",
  34. "2001-千Yu千寻": "Spirited Away",
  35. "1993-听到涛声": "Ocean Waves",
  36. "2014-记忆中的玛妮": "When Marnie Was There",
  37. "1991-岁月的童话": "Only Yesterday",
  38. "1997-幽灵公主": "Princess Mononoke",
  39. "2016-红海龟": "The Red Turtle",
  40. "2013-起风了": "The Wind Rises"
  41. }
  42. if chinese_title in hardcoded:
  43. return hardcoded[chinese_title]
  44. # Example DeepSeek API call (pseudo-code, replace with actual API details)
  45. # Uncomment and fill in your DeepSeek API key and endpoint if available
  46. # try:
  47. # response = requests.post(
  48. # "https://api.deepseek.com/translate",
  49. # json={"text": chinese_title, "source_lang": "zh", "target_lang": "en"},
  50. # headers={"Authorization": "Bearer YOUR_DEEPSEEK_API_KEY"}
  51. # )
  52. # if response.ok:
  53. # return response.json().get("translation")
  54. # except Exception as e:
  55. # print(f"DeepSeek API error for {chinese_title}: {e}")
  56. return None # No match found
  57. def search_imdb_id(english_title):
  58. """Search IMDb for the movie and return the IMDb ID."""
  59. if not english_title:
  60. return None
  61. params = {"q": english_title, "s": "tt", "ttype": "ft", "ref_": "fn_ft"}
  62. headers = {
  63. "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
  64. }
  65. try:
  66. resp = requests.get("https://www.imdb.com/find", params=params, headers=headers, timeout=10)
  67. resp.raise_for_status()
  68. import re
  69. matches = re.findall(r'/title/(tt\d+)/', resp.text)
  70. if matches:
  71. return matches[0]
  72. except Exception as e:
  73. print(f"IMDb search error for {english_title}: {e}")
  74. return None
  75. def main(root_dir):
  76. chinese_titles = collect_chinese_titles(root_dir)
  77. results = []
  78. unmatched = []
  79. print(f"Found {len(chinese_titles)} unique Chinese titles.")
  80. for chinese_title in chinese_titles:
  81. english_title = match_english_title(chinese_title)
  82. imdb_id = search_imdb_id(english_title)
  83. if english_title and imdb_id:
  84. results.append({
  85. "chinese": chinese_title,
  86. "english": english_title,
  87. "imdb_id": imdb_id
  88. })
  89. else:
  90. unmatched.append({
  91. "chinese": chinese_title,
  92. "english": english_title,
  93. "imdb_id": imdb_id if imdb_id else None,
  94. "reason": "No English match" if not english_title else "No IMDb match"
  95. })
  96. print(f"Matched: {len(results)}")
  97. print(f"Unmatched: {len(unmatched)}")
  98. with open("ghibli_imdb_results.json", "w", encoding="utf-8") as f:
  99. json.dump({"matched": results, "unmatched": unmatched}, f, ensure_ascii=False, indent=2)
  100. if __name__ == "__main__":
  101. import sys
  102. if len(sys.argv) < 2:
  103. print("Usage: python ghiblil.py <directory /media/yazoo/luks-67672a15-a412-4a17-bb01-c76509e21243/crm/crm-media/anime/赠品:宫崎骏+新海城动画")
  104. else:
  105. main(sys.argv[1])