| 1234567891011121314151617181920212223242526272829303132333435363738 |
- import os
- import sys
- def find_small_media_files(root_dir, max_size_mb=50, output_file="small_media_files.txt"):
- import re
- max_size_bytes = max_size_mb * 1024 * 1024
- media_exts = ('.mkv', '.mp4', '.mht', '.txt', '.url', '.gif', '.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.webm')
- found = []
- for dirpath, _, filenames in os.walk(root_dir):
- for fname in filenames:
- fpath = os.path.join(dirpath, fname)
- # Check for extension, size, or pattern match
- matches_ext_and_size = (
- fname.lower().endswith(media_exts) and
- os.path.getsize(fpath) < max_size_bytes
- )
- matches_pattern = (
- fname.endswith("__") or
- "padding" in fname.lower()
- )
- try:
- if matches_ext_and_size or matches_pattern:
- found.append(fpath)
- except Exception:
- continue
- with open(output_file, "w", encoding="utf-8") as f:
- for path in found:
- f.write(path + "\n")
- print(f"Found {len(found)} files matching criteria. Results saved to {output_file}")
- if __name__ == "__main__":
- if len(sys.argv) < 2:
- print("Usage: python find_small_media.py <directory> [max_size_mb] [output_file]")
- else:
- root = sys.argv[1]
- max_mb = int(sys.argv[2]) if len(sys.argv) > 2 else 1
- out_file = sys.argv[3] if len(sys.argv) > 3 else "small_media_files.txt"
- find_small_media_files(root, max_mb, out_file)
|