-
Notifications
You must be signed in to change notification settings - Fork 0
/
fsmov.py
447 lines (353 loc) · 14.8 KB
/
fsmov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#!/usr/bin/python
import csv
import json
import os
import shlex
import subprocess
import sys
import time
from collections import defaultdict
from multiprocessing.dummy import Pool as ThreadPool
################################################################################
## Config
import signal
include_ext = ( # File types to match
'.mov',
'.mp4',
'.mpg'
'.avi',
'.m4v',
'.mkv',
'.mpeg',
'.qt',
'.r3d'
# '.dat'
)
csv_file_name = 'scan_output.csv' # Output file
frame_grab_dir = 'frames' # Frame extraction directory
frame_size = '480x300' # Frame size
prefix_num_dirs = 2 # Number of directory names to prefix frame file with
copy_results = True # Whether to copy to clipboard afterwards (will overwrite clipboard!)
use_threads = True # Whether to use threading (might prevent exiting the program until completion)
thread_count = 8 # Number of threads. Ideally matches the number of CPU cores (4-16)
################################################################################
## State
error_list = [] # Any errors that happen while reading files
pool = None
################################################################################
## Functions
# Order results for spreadsheet import
def order_file_details(metadata, clip_index, formatted_path, path):
## !!
# add details to the results
# Change the order or insert items here. This will be the final order of results
movie_name = os.path.basename(path)
return [
formatted_path[0], # last 3 directories of path
formatted_path[1],
formatted_path[2],
movie_name, # movie file name
metadata['size'], # file size (b,kb,mb, or gb)
metadata['creation_date'], # file creation data
'', # blank for notes
clip_index, # clip number
metadata['duration'], # duration (HH:MM:SS)
metadata['width-height'], # width and height
path.replace("/Volumes", '') # full path (minus "/Volumes")
]
# Process a directory of files. Most of the magic happens here
def process_directory(directory_path, files):
clip = 1
total_clips = len(files)
results = []
def process_file(file_tuple):
filename = file_tuple[0]
clip = file_tuple[1]
# for each file
path = os.path.join(directory_path, filename)
# get details
metadata = get_video_metadata(path)
clip_str = "%i/%i" % (clip, total_clips)
# Split the path into components
formatted_path = directory_path.replace("/Volumes/", "").split("/")[-3:]
while len(formatted_path) < 3:
# ensure formatted path has at least 3 items
formatted_path.insert(0, '')
results.append(order_file_details(metadata, clip_str, formatted_path, path))
# increment clip
clip += 1
return order_file_details(metadata, clip_str, formatted_path, path)
pool = ThreadPool(thread_count)
results = pool.map(process_file, list(map(
lambda x, y: (x, y), files, range(1, len(files)+1)
)))
pool.close()
pool.join()
return results
# Output the final results as csv and print
def save_results(processed_results):
# save csv
with open(csv_file_name, 'w') as csvfile:
csvwriter = csv.writer(csvfile,
delimiter='\t',
quotechar='|',
quoting=csv.QUOTE_MINIMAL)
# for row in processed_results:
csvwriter.writerows(processed_results)
# Search for files and save
def find_files(root_directory):
processed_results = []
empty_dirs = []
root_directory = os.path.realpath(os.path.expanduser(root_directory))
if not os.path.exists(root_directory):
error_exit("Invalid path! %s" % root_directory)
dir_files_map = recursive_walk(root_directory, use_threads)
print("%i directories found, processing..." % len(dir_files_map))
for directory, files in dir_files_map: #dir_files_map.items():
if len(files):
processed_results += process_directory(directory, files)
else:
empty_dirs += [directory]
print("**** %i results ****\n" % len(processed_results))
save_results(processed_results)
# print the resulting file
with open(csv_file_name, 'r') as csvfile:
print(csvfile.read())
print("Output saved to %s <3\n" % csv_file_name)
# print directories without results
if empty_dirs:
print("**** %i empty directories ****" % len(empty_dirs))
# print directories without results
if error_list:
print("**** %i errors ****" % len(error_list))
print("\n--".join(error_list[:50]))
################################################################################
## Utilities
def filter_files(filenames):
results = []
for filename in filenames:
# only pick files that match a filter
if str(filename).lower().endswith(include_ext):
results.append(filename)
if len(results):
return results
dir_count = 1
match_count = 0
# Walk through a directory structure recursively
def recursive_walk(base_dir, top_level=False, threaded=False):
# matches is a mapping of directory name to a list of movies
matches = [] #OrderedDict()
global match_count, dir_count
for root, dirnames, filenames in os.walk(base_dir):
results = filter_files(filenames)
if results:
matches.append((root, results))
# matches[root] = results
match_count += len(results)
# if top_level:
# directories = map(lambda x: os.path.join(root, x), dirnames)
# for directory in directories:
# matches += recursive_walk(directory, threaded=True)
# break
if threaded or top_level:
directories = map(lambda x: os.path.join(root, x), dirnames)
try:
matches = [matches] + pool.map(recursive_walk, directories)
pool.close()
pool.join()
except (KeyboardInterrupt, SystemExit):
pool.terminate()
pool.join()
raise KeyboardInterrupt
# matches = list(filter(lambda x: len(matches), matches))
matches = list(reduce(lambda x, y: x + y, matches))
break
dir_count += 1
if dir_count % 1000 == 0:
print("%i directories scanned, %i movies" % (dir_count, match_count))
return matches
# Function to extract the metadata of the input video file using ffprobe (ffmpeg)
def get_video_metadata(path_to_video_file):
results = defaultdict(str)
file_stat = os.stat(path_to_video_file)
results['size'] = get_human_readable_size(file_stat.st_size, 2)
results['creation_date'] = time.ctime(file_stat.st_ctime)
try:
# Use ffprobe to get metadata
cmd = "%s -v quiet -print_format json -show_streams" % ffprobe_path
args = shlex.split(cmd)
args.append(path_to_video_file)
# run the ffprobe process, decode stdout into utf-8 & convert to JSON
ffprobe_output = subprocess.check_output(args).decode('utf-8')
ffprobe_output = json.loads(ffprobe_output)
except Exception as e:
# ffprobe error, probably an unsupported codec
error_list.append("--Unable to read %s" % path_to_video_file + str(e))
results['error'] = str(e)
return results
try:
# uncomment to see all the metadata available (fps, bitrate, etc):
# import pprint
# pp = pprint.PrettyPrinter(indent=2)
# pp.pprint(ffprobe_output)
# Get size and duration
duration = float(ffprobe_output['streams'][0]['duration'])
rounded_duration = int(round(duration))
results['width-height'] = "%sx%s" % (
ffprobe_output['streams'][0]['width'],
ffprobe_output['streams'][0]['height']
)
# Extract the time (from duration seconds)
seconds = int(rounded_duration) % 60
minutes = int(rounded_duration / 60.0) % 60
hours = int(rounded_duration / (60.0 * 60.0))
results['duration'] = "%02i:%02i:%02i" % (
hours,
minutes,
seconds
)
try:
# frame grab
parts = os.path.split(path_to_video_file)
prefix = '_'.join(parts[0].split('/')[-prefix_num_dirs:])
image_file_name = "%s/%s_%s.jpg" % (frame_grab_dir,
prefix,
parts[1])
cmd = "%s -ss %i -i %s -vframes 1 -s %s -y -f image2" % (ffmpeg_path,
round(rounded_duration / 2),
path_to_video_file.replace(' ', '\ '),
frame_size)
args = shlex.split(cmd)
args.append(image_file_name)
print(cmd)
# run the ffprobe process, decode stdout into utf-8 & convert to JSON
ffprobe_output = subprocess.check_output(args).decode('utf-8')
print(ffprobe_output)
except Exception as e:
error_list.append("--Error, possible corrupt: %s - %s" % (path_to_video_file, str(e)))
results['error'] = str(e)
# print(results)
except Exception as e:
error_list.append("--Error, possible corrupt: %s - %s" % (path_to_video_file, str(e)))
results['error'] = str(e)
return results
# Function to extract the metadata of the input video file using ffprobe (ffmpeg)
def get_video_frame(path_to_video_file):
results = defaultdict(str)
file_stat = os.stat(path_to_video_file)
results['size'] = get_human_readable_size(file_stat.st_size, 2)
results['creation_date'] = time.ctime(file_stat.st_ctime)
try:
# Use ffprobe to get metadata
parts = os.path.split(path_to_video_file)
image_file_name = "%s/%s_%s.jpg" % (frame_grab_dir,
os.path.basename(parts[0]), parts[1])
cmd = "%s -ss 125 -i %i -vframe 1 -s 480x300 -f image2 %s" % (ffmpeg_path, round(rounded_duration/2), image_file_name)
args = shlex.split(cmd)
args.append(path_to_video_file)
# run the ffprobe process, decode stdout into utf-8 & convert to JSON
ffprobe_output = subprocess.check_output(args).decode('utf-8')
ffprobe_output = json.loads(ffprobe_output)
except Exception as e:
# ffprobe error, probably an unsupported codec
error_list.append("--Unable to read %s" % path_to_video_file + str(e))
results['error'] = str(e)
return results
try:
# uncomment to see all the metadata available (fps, bitrate, etc):
# import pprint
# pp = pprint.PrettyPrinter(indent=2)
# pp.pprint(ffprobe_output)
# Get size and duration
duration = float(ffprobe_output['streams'][0]['duration'])
rounded_duration = int(round(duration))
results['width-height'] = "%sx%s" % (
ffprobe_output['streams'][0]['width'],
ffprobe_output['streams'][0]['height']
)
# Extract the time (from duration seconds)
seconds = int(rounded_duration) % 60
minutes = int(rounded_duration / 60.0) % 60
hours = int(rounded_duration / (60.0 * 60.0))
results['duration'] = "%02i:%02i:%02i" % (
hours,
minutes,
seconds
)
# print(results)
except Exception as e:
error_list.append("--Error, possible corrupt: %s - %s" % (path_to_video_file, str(e)))
results['error'] = str(e)
return results
# ffmpeg -ss 125 -i %s -vframe 1 -s 480x300 -f image2 %s/%s
# Convert bytes to human readable format
def get_human_readable_size(size, precision=2):
suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
suffix_idx = 0
while size > 1024:
suffix_idx += 1 # increment the index of the suffix
size = size / 1024.0 # apply the division
if suffix_idx == 0:
precision = 0
return "%.*f %s" % (precision, size, suffixes[suffix_idx])
# Print error message and exit
def error_exit(msg):
print(msg)
exit(2)
# Test if a program exists (ffprobe, in our case)
def which(program):
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
ffprobe_path = which("ffprobe")
ffmpeg_path = which("ffmpeg")
################################################################################
## Entry point
# Run the thing
if __name__ == "__main__":
if len(sys.argv) < 2 or not sys.argv[1]:
# require at least 1 argument
error_exit(
"Please include the path to the directory to scan! Usage:\n"
" python %s /path/to/directory/to/scan" % __file__)
exit(2)
if not ffprobe_path:
# ffprobe isn't installed
error_exit(
"You'll need to install ffmpeg first. Homebrew package manager is the easiest way:\n\n"
" /usr/bin/ruby -e \"$(curl -fsSL "
"https://raw.githubusercontent.com/Homebrew/install/master/install)\"\n"
" brew install ffmpeg --with-fdk-aac --with-ffplay --with-freetype --with-libass "
"--with-libquvi --with-libvorbis --with-libvpx --with-opus --with-x265\n"
"\nSee: https://trac.ffmpeg.org/wiki/CompilationGuide/MacOSX#ffmpegthroughHomebrew\n"
"(make sure installing homebrew is ok with company policies)")
if not os.path.isabs(frame_grab_dir):
frame_grab_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), frame_grab_dir)
if not os.path.exists(frame_grab_dir):
os.mkdir(frame_grab_dir)
original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
pool = ThreadPool(thread_count)
signal.signal(signal.SIGINT, original_sigint_handler)
# Do it!
find_files(sys.argv[1])
print("\nComplete.\n"
"To paste into Google sheets:\n"
"Cmd+Shift+V")
# Copy to clipboard
if copy_results:
print("\nResults copied to clipboard")
command = 'cat %s | pbcopy' % csv_file_name
os.system(command)
else:
print("\nTo copy output to clipboard:\n"
"cat %s | pbcopy\n\n" % csv_file_name)