transform_files.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import json
  2. import os
  3. import re
  4. def sanitize_filename(name):
  5. # Remove illegal characters for most filesystems and Jellyfin
  6. name = re.sub(r'[\\/:*?"<>|]', '', name)
  7. name = name.replace('【', '').replace('】', '')
  8. name = name.replace('《', '').replace('》', '')
  9. name = name.replace('(', '').replace(')', '')
  10. name = name.replace(' ', '.')
  11. name = re.sub(r'\.+', '.', name) # Replace multiple dots with one
  12. return name.strip('.')
  13. def transform_entry(entry):
  14. # Try to extract year and english title for Jellyfin
  15. year = None
  16. title = entry.get("english") or entry.get("chinese")
  17. chinese = entry.get("chinese", "")
  18. tmdb_id = entry.get("tmdb_id")
  19. # Extract year from chinese field if present
  20. m = re.match(r"(\d{4})[- ]", chinese)
  21. if m:
  22. year = m.group(1)
  23. # Build new filename: English.Title.Year.[tmdbid-<id>].ext
  24. transformed_files = []
  25. for f in entry.get("media_files", []):
  26. ext = os.path.splitext(f)[1]
  27. # If extension ends with '1' (e.g., .mp41, .mkv1), strip the '1'
  28. if len(ext) > 1 and ext[-1] == "1":
  29. ext = ext[:-1]
  30. if title:
  31. base = sanitize_filename(title)
  32. parts = [base]
  33. if year:
  34. parts.append(year)
  35. if tmdb_id:
  36. parts.append(f"[tmdbid-{tmdb_id}]")
  37. new_name = ".".join(parts) + ext
  38. transformed_files.append({
  39. "original": f,
  40. "suggested": os.path.join(os.path.dirname(f), new_name)
  41. })
  42. else:
  43. transformed_files.append({
  44. "original": f,
  45. "suggested": f
  46. })
  47. # Add Jellyfin-friendly fields
  48. result = dict(entry)
  49. result["jellyfin_title"] = sanitize_filename(title) if title else None
  50. result["jellyfin_year"] = year
  51. result["jellyfin_media_files"] = transformed_files
  52. return result
  53. # def transform_entry(entry):
  54. # # Try to extract year and english title for Jellyfin
  55. # year = None
  56. # title = entry.get("english") or entry.get("chinese")
  57. # chinese = entry.get("chinese", "")
  58. # tmdb_id = entry.get("tmdb_id")
  59. # # Extract year from chinese field if present
  60. # m = re.match(r"(\d{4})[- ]", chinese)
  61. # if m:
  62. # year = m.group(1)
  63. # # Build new filename: English.Title.Year.[tmdbid-<id>].ext
  64. # transformed_files = []
  65. # for f in entry.get("media_files", []):
  66. # ext = os.path.splitext(f)[1]
  67. # if title:
  68. # base = sanitize_filename(title)
  69. # parts = [base]
  70. # if year:
  71. # parts.append(year)
  72. # if tmdb_id:
  73. # parts.append(f"[tmdbid-{tmdb_id}]")
  74. # new_name = ".".join(parts) + ext
  75. # transformed_files.append({
  76. # "original": f,
  77. # "suggested": os.path.join(os.path.dirname(f), new_name)
  78. # })
  79. # else:
  80. # transformed_files.append({
  81. # "original": f,
  82. # "suggested": f
  83. # })
  84. # # Add Jellyfin-friendly fields
  85. # result = dict(entry)
  86. # result["jellyfin_title"] = sanitize_filename(title) if title else None
  87. # result["jellyfin_year"] = year
  88. # result["jellyfin_media_files"] = transformed_files
  89. # return result
  90. # def transform_entry(entry):
  91. # # Try to extract year and english title for Jellyfin
  92. # year = None
  93. # title = entry.get("english") or entry.get("chinese")
  94. # chinese = entry.get("chinese", "")
  95. # # Extract year from chinese field if present
  96. # m = re.match(r"(\d{4})[- ]", chinese)
  97. # if m:
  98. # year = m.group(1)
  99. # # Build new filename: English.Title.(Year).ext
  100. # transformed_files = []
  101. # for f in entry.get("media_files", []):
  102. # ext = os.path.splitext(f)[1]
  103. # if title:
  104. # if year:
  105. # new_name = f"{sanitize_filename(title)}.{year}{ext}"
  106. # else:
  107. # new_name = f"{sanitize_filename(title)}{ext}"
  108. # transformed_files.append({
  109. # "original": f,
  110. # "suggested": os.path.join(os.path.dirname(f), new_name)
  111. # })
  112. # else:
  113. # transformed_files.append({
  114. # "original": f,
  115. # "suggested": f
  116. # })
  117. # # Add Jellyfin-friendly fields
  118. # result = dict(entry)
  119. # result["jellyfin_title"] = sanitize_filename(title) if title else None
  120. # result["jellyfin_year"] = year
  121. # result["jellyfin_media_files"] = transformed_files
  122. # return result
  123. def main():
  124. with open("ghibli_imdb_results.json", "r", encoding="utf-8") as f:
  125. data = json.load(f)
  126. transformed = {"matched": [], "unmatched": []}
  127. for section in ["matched", "unmatched"]:
  128. for entry in data.get(section, []):
  129. transformed[section].append(transform_entry(entry))
  130. with open("ghibli_jellyfin_ready.json", "w", encoding="utf-8") as f:
  131. json.dump(transformed, f, ensure_ascii=False, indent=2)
  132. if __name__ == "__main__":
  133. main()