gogadmin
/
ghibili


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
							# -*- coding: utf-8 -*-

import os
import json
import requests

def collect_chinese_titles(root_dir):
    """Traverse the directory tree and collect all unique Chinese movie titles (directory names)."""
    titles = set()
    for dirpath, dirnames, _ in os.walk(root_dir):
        for dirname in dirnames:
            titles.add(dirname)
    return list(titles)

def match_english_title(chinese_title):
    """
    Match the Chinese title to an English title.
    This can use a hard-coded mapping or call DeepSeek API for translation.
    """
    hardcoded = {
        "2013-辉夜姬物语": "The Tale of the Princess Kaguya",
        "2004-哈尔移动城堡": "Howl's Moving Castle",
        "1995-侧耳倾听": "Whisper of the Heart",
        "1992-飞天红猪侠": "Porco Rosso",
        "1988-萤火虫之墓": "Grave of the Fireflies",
        "1989-魔女宅急便": "Kiki's Delivery Service",
        "1994-百变狸猫": "Pom Poko",
        "1984-风之谷": "Nausicaä of the Valley of the Wind",
        "1979-鲁邦三世 卡里奥斯特罗城": "Lupin III: The Castle of Cagliostro",
        "2011-虞美人盛开的山坡": "From Up on Poppy Hill",
        "2008-《悬崖上的金鱼姬》《崖上的波妞》": "Ponyo",
        "2006-地海战记": "Tales from Earthsea",
        "2010-借东西的小矮人亚莉亚蒂": "The Secret World of Arrietty",
        "2002-猫的报恩": "The Cat Returns",
        "1999-我的邻居山田君": "My Neighbors the Yamadas",
        "1988-龙猫": "My Neighbor Totoro",
        "2001-千Yu千寻": "Spirited Away",
        "1993-听到涛声": "Ocean Waves",
        "2014-记忆中的玛妮": "When Marnie Was There",
        "1991-岁月的童话": "Only Yesterday",
        "1997-幽灵公主": "Princess Mononoke",
        "2016-红海龟": "The Red Turtle",
        "2013-起风了": "The Wind Rises"
    }
    if chinese_title in hardcoded:
        return hardcoded[chinese_title]
    # Optionally add DeepSeek API translation here
    return None  # No match found

def search_imdb_id(english_title):
    """Search IMDb for the movie and return the IMDb ID."""
    if not english_title:
        return None
    params = {"q": english_title, "s": "tt", "ttype": "ft", "ref_": "fn_ft"}
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
    }
    try:
        resp = requests.get("https://www.imdb.com/find", params=params, headers=headers, timeout=10)
        resp.raise_for_status()
        import re
        matches = re.findall(r'/title/(tt\d+)/', resp.text)
        if matches:
            return matches[0]
    except Exception as e:
        print(f"IMDb search error for {english_title}: {e}")
    return None

def search_tmdb_id(english_title, api_key):
    """Search TMDB for the movie and return the TMDB ID and IMDb ID if available."""
    if not english_title or not api_key:
        return None, None
    url = "https://api.themoviedb.org/3/search/movie"
    params = {
        "api_key": api_key,
        "query": english_title,
        "language": "en-US"
    }
    try:
        resp = requests.get(url, params=params, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        if data.get("results"):
            movie = data["results"][0]
            tmdb_id = movie.get("id")
            imdb_id = None
            # Try to get IMDb ID from TMDB details
            details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
            details_params = {"api_key": api_key}
            details_resp = requests.get(details_url, params=details_params, timeout=10)
            if details_resp.ok:
                imdb_id = details_resp.json().get("imdb_id")
            return tmdb_id, imdb_id
    except Exception as e:
        print(f"TMDB search error for {english_title}: {e}")
    return None, None
def collect_chinese_titles_and_paths(root_dir):
    """Traverse the directory tree and collect all unique Chinese movie titles (directory names) and their paths."""
    titles = []
    paths = {}
    for dirpath, dirnames, _ in os.walk(root_dir):
        for dirname in dirnames:
            titles.append(dirname)
            paths[dirname] = os.path.join(dirpath, dirname)
    return titles, paths


def main(root_dir, use_tmdb=False, tmdb_api_key=None):
    chinese_titles, dir_paths = collect_chinese_titles_and_paths(root_dir)
    results = []
    unmatched = []
    print(f"Found {len(chinese_titles)} unique Chinese titles.")
    for chinese_title in chinese_titles:
        english_title = match_english_title(chinese_title)
        imdb_id = None
        tmdb_id = None
        dir_path = dir_paths.get(chinese_title, "")

        # If using TMDB, search by the second token (Chinese title) for better accuracy
        if use_tmdb and tmdb_api_key:
            try:
                search_str = chinese_title.split("-", 1)[1]
            except IndexError:
                search_str = chinese_title
            print(f"Using TMDB API for ID search for title {search_str}")
            tmdb_id, imdb_id, fetched_english_title = None, None, None
            url = "https://api.themoviedb.org/3/search/movie"
            params = {
                "api_key": tmdb_api_key,
                "query": search_str,
                "language": "zh-CN"
            }
            try:
                resp = requests.get(url, params=params, timeout=10)
                resp.raise_for_status()
                data = resp.json()
                if data.get("results"):
                    movie = data["results"][0]
                    tmdb_id = movie.get("id")
                    # Try to get English title from TMDB details
                    details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
                    details_params = {"api_key": tmdb_api_key, "language": "en-US"}
                    details_resp = requests.get(details_url, params=details_params, timeout=10)
                    if details_resp.ok:
                        details = details_resp.json()
                        fetched_english_title = details.get("title")
                        imdb_id = details.get("imdb_id")
            except Exception as e:
                print(f"TMDB search error for {search_str}: {e}")
        else:
            imdb_id = search_imdb_id(english_title)
            fetched_english_title = english_title

        # Prefer fetched English title if available
        final_english_title = fetched_english_title if fetched_english_title else english_title

        if final_english_title and (imdb_id or tmdb_id):
            entry = {
                "chinese": chinese_title,
                "english": final_english_title,
                "path": dir_path
            }
            if imdb_id:
                entry["imdb_id"] = imdb_id
            if tmdb_id:
                entry["tmdb_id"] = tmdb_id
            results.append(entry)
        else:
            unmatched.append({
                "chinese": chinese_title,
                "english": final_english_title,
                "imdb_id": imdb_id if imdb_id else None,
                "tmdb_id": tmdb_id if tmdb_id else None,
                "path": dir_path,
                "reason": "No English match" if not final_english_title else "No ID match"
            })
    print(f"Matched: {len(results)}")
    print(f"Unmatched: {len(unmatched)}")
    with open("ghibli_imdb_results.json", "w", encoding="utf-8") as f:
        json.dump({"matched": results, "unmatched": unmatched}, f, ensure_ascii=False, indent=2)


if __name__ == "__main__":
    import sys
    # Usage: python ghibili.py <directory> [--tmdb TMDB_API_KEY]
    use_tmdb = False
    tmdb_api_key = '36dfc5c362f731b9b777e1f30028fbb0'
    if len(sys.argv) < 2:
        print("Usage: python ghiblil.py <directory> [--tmdb TMDB_API_KEY]")
    else:
        root_dir = sys.argv[1]
        if len(sys.argv) >= 4 and sys.argv[2] == "--tmdb":
            use_tmdb = True
            tmdb_api_key = sys.argv[3]
        main(root_dir, use_tmdb, tmdb_api_key)