gogadmin
/
ghibili


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
							# -*- coding: utf-8 -*-

import os
import json
import requests

def collect_chinese_titles_and_paths(root_dir):
    """Traverse the directory tree and collect all unique Chinese movie titles (directory names) and their paths,
    excluding directories containing certain keywords. Also collect .mkv and .mp4 files (even with unwanted trailing chars),
    but ignore files containing '国语' in their filename."""
    exclude_keywords = ['删除文件', '默认日语', '切换语言']
    titles = []
    paths = {}
    media_files = {}
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for dirname in dirnames:
            if any(keyword in dirname for keyword in exclude_keywords):
                continue
            full_path = os.path.join(dirpath, dirname)
            titles.append(dirname)
            paths[dirname] = full_path
            # Collect .mkv and .mp4 files, allowing for unwanted trailing chars (e.g., .mkv1, .mp41)
            try:
                files_in_dir = os.listdir(full_path)
                media_files[dirname] = [
                    os.path.join(full_path, f)
                    for f in files_in_dir
                    if (
                        (f.lower().endswith(('.mkv', '.mp4')) or
                         f.lower().endswith(('.mkv1', '.mp41')) or
                         any(f.lower().endswith(ext + str(i)) for ext in ['.mkv', '.mp4'] for i in range(1, 10)))
                        and '国语' not in f
                        and '720P' not in f
                    )
                ]
            except Exception as e:
                media_files[dirname] = []
    return titles, paths, media_files

def match_english_title(chinese_title):
    """
    Match the Chinese title to an English title.
    This can use a hard-coded mapping or call DeepSeek API for translation.
    """
    hardcoded = {
        "2013-辉夜姬物语": "The Tale of the Princess Kaguya",
        "2004-哈尔移动城堡": "Howl's Moving Castle",
        "1995-侧耳倾听": "Whisper of the Heart",
        "1992-飞天红猪侠": "Porco Rosso",
        "1988-萤火虫之墓": "Grave of the Fireflies",
        "1989-魔女宅急便": "Kiki's Delivery Service",
        "1994-百变狸猫": "Pom Poko",
        "1984-风之谷": "Nausicaä of the Valley of the Wind",
        "1979-鲁邦三世 卡里奥斯特罗城": "Lupin III: The Castle of Cagliostro",
        "2011-虞美人盛开的山坡": "From Up on Poppy Hill",
        "2008-《悬崖上的金鱼姬》《崖上的波妞》": "Ponyo",
        "2006-地海战记": "Tales from Earthsea",
        "2010-借东西的小矮人亚莉亚蒂": "The Secret World of Arrietty",
        "2002-猫的报恩": "The Cat Returns",
        "1999-我的邻居山田君": "My Neighbors the Yamadas",
        "1988-龙猫": "My Neighbor Totoro",
        "2001-千Yu千寻": "Spirited Away",
        "1993-听到涛声": "Ocean Waves",
        "2014-记忆中的玛妮": "When Marnie Was There",
        "1991-岁月的童话": "Only Yesterday",
        "1997-幽灵公主": "Princess Mononoke",
        "2016-红海龟": "The Red Turtle",
        "2013-起风了": "The Wind Rises"
    }
    if chinese_title in hardcoded:
        return hardcoded[chinese_title]
    # Optionally add DeepSeek API translation here
    return None  # No match found

def search_imdb_id(english_title):
    """Search IMDb for the movie and return the IMDb ID."""
    if not english_title:
        return None
    params = {"q": english_title, "s": "tt", "ttype": "ft", "ref_": "fn_ft"}
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
    }
    try:
        resp = requests.get("https://www.imdb.com/find", params=params, headers=headers, timeout=10)
        resp.raise_for_status()
        import re
        matches = re.findall(r'/title/(tt\d+)/', resp.text)
        if matches:
            return matches[0]
    except Exception as e:
        print(f"IMDb search error for {english_title}: {e}")
    return None

def search_tmdb_id(english_title, api_key):
    """Search TMDB for the movie and return the TMDB ID and IMDb ID if available."""
    if not english_title or not api_key:
        return None, None
    url = "https://api.themoviedb.org/3/search/movie"
    params = {
        "api_key": api_key,
        "query": english_title,
        "language": "en-US"
    }
    try:
        resp = requests.get(url, params=params, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        if data.get("results"):
            movie = data["results"][0]
            tmdb_id = movie.get("id")
            imdb_id = None
            # Try to get IMDb ID from TMDB details
            details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
            details_params = {"api_key": api_key}
            details_resp = requests.get(details_url, params=details_params, timeout=10)
            if details_resp.ok:
                imdb_id = details_resp.json().get("imdb_id")
            return tmdb_id, imdb_id
    except Exception as e:
        print(f"TMDB search error for {english_title}: {e}")
    return None, None


def main(root_dir, use_tmdb=False, tmdb_api_key=None):
    chinese_titles, dir_paths, media_files = collect_chinese_titles_and_paths(root_dir)
    results = []
    unmatched = []
    print(f"Found {len(chinese_titles)} unique Chinese titles.")
    for chinese_title in chinese_titles:
        english_title = match_english_title(chinese_title)
        imdb_id = None
        tmdb_id = None
        dir_path = dir_paths.get(chinese_title, "")
        media_list = media_files.get(chinese_title, [])

        # If using TMDB, search by the second token (Chinese title) for better accuracy
        if use_tmdb and tmdb_api_key:
            try:
                search_str = chinese_title.split("-", 1)[1]
            except IndexError:
                search_str = chinese_title
            print(f"Using TMDB API for ID search for title {search_str}")
            tmdb_id, imdb_id, fetched_english_title = None, None, None
            url = "https://api.themoviedb.org/3/search/movie"
            params = {
                "api_key": tmdb_api_key,
                "query": search_str,
                "language": "zh-CN"
            }
            try:
                resp = requests.get(url, params=params, timeout=10)
                resp.raise_for_status()
                data = resp.json()
                if data.get("results"):
                    movie = data["results"][0]
                    tmdb_id = movie.get("id")
                    # Try to get English title from TMDB details
                    details_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}"
                    details_params = {"api_key": tmdb_api_key, "language": "en-US"}
                    details_resp = requests.get(details_url, params=details_params, timeout=10)
                    if details_resp.ok:
                        details = details_resp.json()
                        fetched_english_title = details.get("title")
                        imdb_id = details.get("imdb_id")
            except Exception as e:
                print(f"TMDB search error for {search_str}: {e}")
        else:
            imdb_id = search_imdb_id(english_title)
            fetched_english_title = english_title

        # Prefer fetched English title if available
        final_english_title = fetched_english_title if fetched_english_title else english_title

        if final_english_title and (imdb_id or tmdb_id):
            entry = {
                "chinese": chinese_title,
                "english": final_english_title,
                "path": dir_path,
                "media_files": media_list
            }
            if imdb_id:
                entry["imdb_id"] = imdb_id
            if tmdb_id:
                entry["tmdb_id"] = tmdb_id
            results.append(entry)
        else:
            unmatched.append({
                "chinese": chinese_title,
                "english": final_english_title,
                "imdb_id": imdb_id if imdb_id else None,
                "tmdb_id": tmdb_id if tmdb_id else None,
                "path": dir_path,
                "media_files": media_list,
                "reason": "No English match" if not final_english_title else "No ID match"
            })
    print(f"Matched: {len(results)}")
    print(f"Unmatched: {len(unmatched)}")
    with open("ghibli_imdb_results.json", "w", encoding="utf-8") as f:
        json.dump({"matched": results, "unmatched": unmatched}, f, ensure_ascii=False, indent=2)


if __name__ == "__main__":
    import sys
    # Usage: python ghibili.py <directory> [--tmdb TMDB_API_KEY]
    # Example: python3 ghibili4.py /media/yazoo/luks-67672a15-a412-4a17-bb01-c76509e21243/crm/crm-media/anime/赠品：宫崎骏+新海城动画 --tmdb 36dfc5c362f731b9b777e1f30028fbb0
    use_tmdb = False
    tmdb_api_key = '36dfc5c362f731b9b777e1f30028fbb0'
    if len(sys.argv) < 2:
        print("Usage: python ghiblil.py <directory> [--tmdb TMDB_API_KEY]")
    else:
        root_dir = sys.argv[1]
        if len(sys.argv) >= 4 and sys.argv[2] == "--tmdb":
            use_tmdb = True
            tmdb_api_key = sys.argv[3]
        main(root_dir, use_tmdb, tmdb_api_key)