# -*- coding: utf-8 -*- import os import json import requests def collect_chinese_titles_and_paths(root_dir): """Traverse the directory tree and collect all unique Chinese movie titles (directory names) and their paths, excluding directories containing certain keywords. Also collect .mkv and .mp4 files (even with unwanted trailing chars), but ignore files containing '国语' in their filename.""" exclude_keywords = ['删除文件', '默认日语', '切换语言'] titles = [] paths = {} media_files = {} for dirpath, dirnames, filenames in os.walk(root_dir): for dirname in dirnames: if any(keyword in dirname for keyword in exclude_keywords): continue full_path = os.path.join(dirpath, dirname) titles.append(dirname) paths[dirname] = full_path # Collect .mkv and .mp4 files, allowing for unwanted trailing chars (e.g., .mkv1, .mp41) try: files_in_dir = os.listdir(full_path) media_files[dirname] = [ os.path.join(full_path, f) for f in files_in_dir if ( (f.lower().endswith(('.mkv', '.mp4')) or f.lower().endswith(('.mkv1', '.mp41')) or any(f.lower().endswith(ext + str(i)) for ext in ['.mkv', '.mp4'] for i in range(1, 10))) and '国语' not in f and '720P' not in f ) ] except Exception as e: media_files[dirname] = [] return titles, paths, media_files def match_english_title(chinese_title): """ Match the Chinese title to an English title. This can use a hard-coded mapping or call DeepSeek API for translation. """ hardcoded = { "2013-辉夜姬物语": "The Tale of the Princess Kaguya", "2004-哈尔移动城堡": "Howl's Moving Castle", "1995-侧耳倾听": "Whisper of the Heart", "1992-飞天红猪侠": "Porco Rosso", "1988-萤火虫之墓": "Grave of the Fireflies", "1989-魔女宅急便": "Kiki's Delivery Service", "1994-百变狸猫": "Pom Poko", "1984-风之谷": "Nausicaä of the Valley of the Wind", "1979-鲁邦三世 卡里奥斯特罗城": "Lupin III: The Castle of Cagliostro", "2011-虞美人盛开的山坡": "From Up on Poppy Hill", "2008-《悬崖上的金鱼姬》《崖上的波妞》": "Ponyo", "2006-地海战记": "Tales from Earthsea", "2010-借东西的小矮人亚莉亚蒂": "The Secret World of Arrietty", "2002-猫的报恩": "The Cat Returns", "1999-我的邻居山田君": "My Neighbors the Yamadas", "1988-龙猫": "My Neighbor Totoro", "2001-千Yu千寻": "Spirited Away", "1993-听到涛声": "Ocean Waves", "2014-记忆中的玛妮": "When Marnie Was There", "1991-岁月的童话": "Only Yesterday", "1997-幽灵公主": "Princess Mononoke", "2016-红海龟": "The Red Turtle", "2013-起风了": "The Wind Rises" } if chinese_title in hardcoded: return hardcoded[chinese_title] # Optionally add DeepSeek API translation here return None # No match found def search_imdb_id(english_title): """Search IMDb for the movie and return the IMDb ID.""" if not english_title: return None params = {"q": english_title, "s": "tt", "ttype": "ft", "ref_": "fn_ft"} headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" } try: resp = requests.get("https://www.imdb.com/find", params=params, headers=headers, timeout=10) resp.raise_for_status() import re matches = re.findall(r'/title/(tt\d+)/', resp.text) if matches: return matches[0] except Exception as e: print(f"IMDb search error for {english_title}: {e}") return None def search_tmdb_id(english_title, api_key): """Search TMDB for the movie and return the TMDB ID and IMDb ID if available.""" if not english_title or not api_key: return None, None url = "https://api.themoviedb.org/3/search/movie" params = { "api_key": api_key, "query": english_title, "language": "en-US" } try: resp = requests.get(url, params=params, timeout=10) resp.raise_for_status() data = resp.json() if data.get("results"): movie = data["results"][0] tmdb_id = movie.get("id") imdb_id = None # Try to get IMDb ID from TMDB details details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}" details_params = {"api_key": api_key} details_resp = requests.get(details_url, params=details_params, timeout=10) if details_resp.ok: imdb_id = details_resp.json().get("imdb_id") return tmdb_id, imdb_id except Exception as e: print(f"TMDB search error for {english_title}: {e}") return None, None def main(root_dir, use_tmdb=False, tmdb_api_key=None): chinese_titles, dir_paths, media_files = collect_chinese_titles_and_paths(root_dir) results = [] unmatched = [] print(f"Found {len(chinese_titles)} unique Chinese titles.") for chinese_title in chinese_titles: english_title = match_english_title(chinese_title) imdb_id = None tmdb_id = None dir_path = dir_paths.get(chinese_title, "") media_list = media_files.get(chinese_title, []) # If using TMDB, search by the second token (Chinese title) for better accuracy if use_tmdb and tmdb_api_key: try: search_str = chinese_title.split("-", 1)[1] except IndexError: search_str = chinese_title print(f"Using TMDB API for ID search for title {search_str}") tmdb_id, imdb_id, fetched_english_title = None, None, None url = "https://api.themoviedb.org/3/search/movie" params = { "api_key": tmdb_api_key, "query": search_str, "language": "zh-CN" } try: resp = requests.get(url, params=params, timeout=10) resp.raise_for_status() data = resp.json() if data.get("results"): movie = data["results"][0] tmdb_id = movie.get("id") # Try to get English title from TMDB details details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}" details_params = {"api_key": tmdb_api_key, "language": "en-US"} details_resp = requests.get(details_url, params=details_params, timeout=10) if details_resp.ok: details = details_resp.json() fetched_english_title = details.get("title") imdb_id = details.get("imdb_id") except Exception as e: print(f"TMDB search error for {search_str}: {e}") else: imdb_id = search_imdb_id(english_title) fetched_english_title = english_title # Prefer fetched English title if available final_english_title = fetched_english_title if fetched_english_title else english_title if final_english_title and (imdb_id or tmdb_id): entry = { "chinese": chinese_title, "english": final_english_title, "path": dir_path, "media_files": media_list } if imdb_id: entry["imdb_id"] = imdb_id if tmdb_id: entry["tmdb_id"] = tmdb_id results.append(entry) else: unmatched.append({ "chinese": chinese_title, "english": final_english_title, "imdb_id": imdb_id if imdb_id else None, "tmdb_id": tmdb_id if tmdb_id else None, "path": dir_path, "media_files": media_list, "reason": "No English match" if not final_english_title else "No ID match" }) print(f"Matched: {len(results)}") print(f"Unmatched: {len(unmatched)}") with open("ghibli_imdb_results.json", "w", encoding="utf-8") as f: json.dump({"matched": results, "unmatched": unmatched}, f, ensure_ascii=False, indent=2) if __name__ == "__main__": import sys # Usage: python ghibili.py [--tmdb TMDB_API_KEY] # Example: python3 ghibili4.py /media/yazoo/luks-67672a15-a412-4a17-bb01-c76509e21243/crm/crm-media/anime/赠品:宫崎骏+新海城动画 --tmdb 36dfc5c362f731b9b777e1f30028fbb0 use_tmdb = False tmdb_api_key = '36dfc5c362f731b9b777e1f30028fbb0' if len(sys.argv) < 2: print("Usage: python ghiblil.py [--tmdb TMDB_API_KEY]") else: root_dir = sys.argv[1] if len(sys.argv) >= 4 and sys.argv[2] == "--tmdb": use_tmdb = True tmdb_api_key = sys.argv[3] main(root_dir, use_tmdb, tmdb_api_key)