# -*- coding: utf-8 -*- import os import json import requests def collect_chinese_titles(root_dir): """Traverse the directory tree and collect all unique Chinese movie titles (directory names).""" titles = set() for dirpath, dirnames, _ in os.walk(root_dir): for dirname in dirnames: titles.add(dirname) return list(titles) def match_english_title(chinese_title): """ Match the Chinese title to an English title. This can use a hard-coded mapping or call DeepSeek API for translation. """ hardcoded = { "2013-辉夜姬物语": "The Tale of the Princess Kaguya", "2004-哈尔移动城堡": "Howl's Moving Castle", "1995-侧耳倾听": "Whisper of the Heart", "1992-飞天红猪侠": "Porco Rosso", "1988-萤火虫之墓": "Grave of the Fireflies", "1989-魔女宅急便": "Kiki's Delivery Service", "1994-百变狸猫": "Pom Poko", "1984-风之谷": "Nausicaä of the Valley of the Wind", "1979-鲁邦三世 卡里奥斯特罗城": "Lupin III: The Castle of Cagliostro", "2011-虞美人盛开的山坡": "From Up on Poppy Hill", "2008-《悬崖上的金鱼姬》《崖上的波妞》": "Ponyo", "2006-地海战记": "Tales from Earthsea", "2010 借东西的小矮人亚莉亚蒂": "The Secret World of Arrietty", "2002-猫的报恩": "The Cat Returns", "1999-我的邻居山田君": "My Neighbors the Yamadas", "1988-龙猫": "My Neighbor Totoro", "2001-千Yu千寻": "Spirited Away", "1993-听到涛声": "Ocean Waves", "2014-记忆中的玛妮": "When Marnie Was There", "1991-岁月的童话": "Only Yesterday", "1997-幽灵公主": "Princess Mononoke", "2016-红海龟": "The Red Turtle", "2013-起风了": "The Wind Rises" } if chinese_title in hardcoded: return hardcoded[chinese_title] # Optionally add DeepSeek API translation here return None # No match found def search_imdb_id(english_title): """Search IMDb for the movie and return the IMDb ID.""" if not english_title: return None params = {"q": english_title, "s": "tt", "ttype": "ft", "ref_": "fn_ft"} headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" } try: resp = requests.get("https://www.imdb.com/find", params=params, headers=headers, timeout=10) resp.raise_for_status() import re matches = re.findall(r'/title/(tt\d+)/', resp.text) if matches: return matches[0] except Exception as e: print(f"IMDb search error for {english_title}: {e}") return None def search_tmdb_id(english_title, api_key): """Search TMDB for the movie and return the TMDB ID and IMDb ID if available.""" if not english_title or not api_key: return None, None url = "https://api.themoviedb.org/3/search/movie" params = { "api_key": api_key, "query": english_title, "language": "en-US" } try: resp = requests.get(url, params=params, timeout=10) resp.raise_for_status() data = resp.json() if data.get("results"): movie = data["results"][0] tmdb_id = movie.get("id") imdb_id = None # Try to get IMDb ID from TMDB details details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}" details_params = {"api_key": api_key} details_resp = requests.get(details_url, params=details_params, timeout=10) if details_resp.ok: imdb_id = details_resp.json().get("imdb_id") return tmdb_id, imdb_id except Exception as e: print(f"TMDB search error for {english_title}: {e}") return None, None def main(root_dir, use_tmdb=False, tmdb_api_key=None): chinese_titles = collect_chinese_titles(root_dir) results = [] unmatched = [] print(f"Found {len(chinese_titles)} unique Chinese titles.") for chinese_title in chinese_titles: english_title = match_english_title(chinese_title) imdb_id = None tmdb_id = None if use_tmdb and tmdb_api_key: try: search_str = chinese_title.split("-")[1] print(f"Using TMDB API for ID search for title {search_str} ") tmdb_id, imdb_id = search_tmdb_id(search_str, tmdb_api_key) except IndexError as e: continue else: imdb_id = search_imdb_id(english_title) if english_title and (imdb_id or tmdb_id): entry = { "chinese": chinese_title, "english": english_title, } if imdb_id: entry["imdb_id"] = imdb_id if tmdb_id: entry["tmdb_id"] = tmdb_id results.append(entry) else: unmatched.append({ "chinese": chinese_title, "english": english_title, "imdb_id": imdb_id if imdb_id else None, "tmdb_id": tmdb_id if tmdb_id else None, "reason": "No English match" if not english_title else "No ID match" }) print(f"Matched: {len(results)}") print(f"Unmatched: {len(unmatched)}") with open("ghibli_imdb_results.json", "w", encoding="utf-8") as f: json.dump({"matched": results, "unmatched": unmatched}, f, ensure_ascii=False, indent=2) if __name__ == "__main__": import sys # Usage: python ghibili.py [--tmdb TMDB_API_KEY] use_tmdb = False tmdb_api_key = '36dfc5c362f731b9b777e1f30028fbb0' if len(sys.argv) < 2: print("Usage: python ghiblil.py [--tmdb TMDB_API_KEY]") else: root_dir = sys.argv[1] if len(sys.argv) >= 4 and sys.argv[2] == "--tmdb": use_tmdb = True tmdb_api_key = sys.argv[3] main(root_dir, use_tmdb, tmdb_api_key)