import os import sys def find_small_media_files(root_dir, max_size_mb=50, output_file="small_media_files.txt"): import re max_size_bytes = max_size_mb * 1024 * 1024 media_exts = ('.mkv', '.mp4', '.mht', '.txt', '.url', '.gif', '.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.webm') found = [] for dirpath, _, filenames in os.walk(root_dir): for fname in filenames: fpath = os.path.join(dirpath, fname) # Check for extension, size, or pattern match matches_ext_and_size = ( fname.lower().endswith(media_exts) and os.path.getsize(fpath) < max_size_bytes ) matches_pattern = ( fname.endswith("__") or "padding" in fname.lower() ) try: if matches_ext_and_size or matches_pattern: found.append(fpath) except Exception: continue with open(output_file, "w", encoding="utf-8") as f: for path in found: f.write(path + "\n") print(f"Found {len(found)} files matching criteria. Results saved to {output_file}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python find_small_media.py [max_size_mb] [output_file]") else: root = sys.argv[1] max_mb = int(sys.argv[2]) if len(sys.argv) > 2 else 1 out_file = sys.argv[3] if len(sys.argv) > 3 else "small_media_files.txt" find_small_media_files(root, max_mb, out_file)