ghibili2.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import json
  4. import requests
  5. def collect_chinese_titles(root_dir):
  6. """Traverse the directory tree and collect all unique Chinese movie titles (directory names)."""
  7. titles = set()
  8. for dirpath, dirnames, _ in os.walk(root_dir):
  9. for dirname in dirnames:
  10. titles.add(dirname)
  11. return list(titles)
  12. def match_english_title(chinese_title):
  13. """
  14. Match the Chinese title to an English title.
  15. This can use a hard-coded mapping or call DeepSeek API for translation.
  16. """
  17. hardcoded = {
  18. "2013-辉夜姬物语": "The Tale of the Princess Kaguya",
  19. "2004-哈尔移动城堡": "Howl's Moving Castle",
  20. "1995-侧耳倾听": "Whisper of the Heart",
  21. "1992-飞天红猪侠": "Porco Rosso",
  22. "1988-萤火虫之墓": "Grave of the Fireflies",
  23. "1989-魔女宅急便": "Kiki's Delivery Service",
  24. "1994-百变狸猫": "Pom Poko",
  25. "1984-风之谷": "Nausicaä of the Valley of the Wind",
  26. "1979-鲁邦三世 卡里奥斯特罗城": "Lupin III: The Castle of Cagliostro",
  27. "2011-虞美人盛开的山坡": "From Up on Poppy Hill",
  28. "2008-《悬崖上的金鱼姬》《崖上的波妞》": "Ponyo",
  29. "2006-地海战记": "Tales from Earthsea",
  30. "2010 借东西的小矮人亚莉亚蒂": "The Secret World of Arrietty",
  31. "2002-猫的报恩": "The Cat Returns",
  32. "1999-我的邻居山田君": "My Neighbors the Yamadas",
  33. "1988-龙猫": "My Neighbor Totoro",
  34. "2001-千Yu千寻": "Spirited Away",
  35. "1993-听到涛声": "Ocean Waves",
  36. "2014-记忆中的玛妮": "When Marnie Was There",
  37. "1991-岁月的童话": "Only Yesterday",
  38. "1997-幽灵公主": "Princess Mononoke",
  39. "2016-红海龟": "The Red Turtle",
  40. "2013-起风了": "The Wind Rises"
  41. }
  42. if chinese_title in hardcoded:
  43. return hardcoded[chinese_title]
  44. # Optionally add DeepSeek API translation here
  45. return None # No match found
  46. def search_imdb_id(english_title):
  47. """Search IMDb for the movie and return the IMDb ID."""
  48. if not english_title:
  49. return None
  50. params = {"q": english_title, "s": "tt", "ttype": "ft", "ref_": "fn_ft"}
  51. headers = {
  52. "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
  53. }
  54. try:
  55. resp = requests.get("https://www.imdb.com/find", params=params, headers=headers, timeout=10)
  56. resp.raise_for_status()
  57. import re
  58. matches = re.findall(r'/title/(tt\d+)/', resp.text)
  59. if matches:
  60. return matches[0]
  61. except Exception as e:
  62. print(f"IMDb search error for {english_title}: {e}")
  63. return None
  64. def search_tmdb_id(english_title, api_key):
  65. """Search TMDB for the movie and return the TMDB ID and IMDb ID if available."""
  66. if not english_title or not api_key:
  67. return None, None
  68. url = "https://api.themoviedb.org/3/search/movie"
  69. params = {
  70. "api_key": api_key,
  71. "query": english_title,
  72. "language": "en-US"
  73. }
  74. try:
  75. resp = requests.get(url, params=params, timeout=10)
  76. resp.raise_for_status()
  77. data = resp.json()
  78. if data.get("results"):
  79. movie = data["results"][0]
  80. tmdb_id = movie.get("id")
  81. imdb_id = None
  82. # Try to get IMDb ID from TMDB details
  83. details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
  84. details_params = {"api_key": api_key}
  85. details_resp = requests.get(details_url, params=details_params, timeout=10)
  86. if details_resp.ok:
  87. imdb_id = details_resp.json().get("imdb_id")
  88. return tmdb_id, imdb_id
  89. except Exception as e:
  90. print(f"TMDB search error for {english_title}: {e}")
  91. return None, None
  92. def main(root_dir, use_tmdb=False, tmdb_api_key=None):
  93. chinese_titles = collect_chinese_titles(root_dir)
  94. results = []
  95. unmatched = []
  96. print(f"Found {len(chinese_titles)} unique Chinese titles.")
  97. for chinese_title in chinese_titles:
  98. english_title = match_english_title(chinese_title)
  99. imdb_id = None
  100. tmdb_id = None
  101. if use_tmdb and tmdb_api_key:
  102. try:
  103. search_str = chinese_title.split("-")[1]
  104. print(f"Using TMDB API for ID search for title {search_str} ")
  105. tmdb_id, imdb_id = search_tmdb_id(search_str, tmdb_api_key)
  106. except IndexError as e:
  107. continue
  108. else:
  109. imdb_id = search_imdb_id(english_title)
  110. if english_title and (imdb_id or tmdb_id):
  111. entry = {
  112. "chinese": chinese_title,
  113. "english": english_title,
  114. }
  115. if imdb_id:
  116. entry["imdb_id"] = imdb_id
  117. if tmdb_id:
  118. entry["tmdb_id"] = tmdb_id
  119. results.append(entry)
  120. else:
  121. unmatched.append({
  122. "chinese": chinese_title,
  123. "english": english_title,
  124. "imdb_id": imdb_id if imdb_id else None,
  125. "tmdb_id": tmdb_id if tmdb_id else None,
  126. "reason": "No English match" if not english_title else "No ID match"
  127. })
  128. print(f"Matched: {len(results)}")
  129. print(f"Unmatched: {len(unmatched)}")
  130. with open("ghibli_imdb_results.json", "w", encoding="utf-8") as f:
  131. json.dump({"matched": results, "unmatched": unmatched}, f, ensure_ascii=False, indent=2)
  132. if __name__ == "__main__":
  133. import sys
  134. # Usage: python ghibili.py <directory> [--tmdb TMDB_API_KEY]
  135. use_tmdb = False
  136. tmdb_api_key = '36dfc5c362f731b9b777e1f30028fbb0'
  137. if len(sys.argv) < 2:
  138. print("Usage: python ghiblil.py <directory> [--tmdb TMDB_API_KEY]")
  139. else:
  140. root_dir = sys.argv[1]
  141. if len(sys.argv) >= 4 and sys.argv[2] == "--tmdb":
  142. use_tmdb = True
  143. tmdb_api_key = sys.argv[3]
  144. main(root_dir, use_tmdb, tmdb_api_key)