Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configs for user/topic filters and folder/file format #25

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions config_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@
# R"SUMMARY", # Summary file of the recording in JSON file format.
]

# If True, recordings will be grouped in folders by their owning user.
GROUP_BY_USER = True

# If True, recordings will be grouped in folders by their topics
GROUP_BY_TOPIC = True
# Group records in a folder hierarchy using the order below.
# Reorder or comment out any of the folder groups below to control the folder hierarchy created to orgainze the downloaded recording files.
GROUP_FOLDERS_BY = [
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GROUP_BY or FOLDER_HEIRARCHY

# R"YEAR_MONTH", # Recordings will be grouped in folders by their recording start date in yyyy-mm format.
R"USER_EMAIL", # Recordings will be grouped in folders by their owning user's email address.
R"TOPIC", # Recordings will be grouped in folders by their topics.
]

# If True, each instance of recording will be in its own folder (which may contain multiple files).
# Note: One "meeting" can have multiple recording instances.
Expand All @@ -55,6 +57,18 @@
# This works when "Record a separate audio file of each participant" is enabled.
INCLUDE_PARTICIPANT_AUDIO = True

# Recording file name format to use when saving files. Reorder or comment out any file name format pieces below to control the file naming pattern.
# Example: 2023-12-25t143021z__name-of-the-meeting__audio_transcript__ff625374.VTT
FILE_NAME_FORMAT = [
R"RECORDING_START_DATETIME", # Recording start datetime
R"RECORDING_NAME", # Recording name
R"RECORDING_TYPE", # Recoding type
R"FILE_ID", # Recording unique file ID
]
Comment on lines +60 to +67
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit problematic
I use the file_id in the end to detect already downloaded file. I don't think it's a good idea to let users remove it...


# Seperator character(s) to place in between the file name format pieces when building the recording file names.
FILE_NAME_SEPERATOR = "__"

# Set to True for more verbose output
VERBOSE_OUTPUT = False

Expand Down
75 changes: 50 additions & 25 deletions zoom_batch_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,15 @@ def download_recordings(users, from_date, to_date):

for user_email, user_name in users:
user_description = get_user_description(user_email, user_name)
user_host_folder = get_user_host_folder(user_email)
host_folder = CONFIG.OUTPUT_PATH

utils.print_bright(
f'Downloading recordings from user {user_description} - Starting at {date_to_str(from_date)} '
f'and up to {date_to_str(to_date)} (inclusive).'
)

meetings = get_meetings(get_meeting_uuids(user_email, from_date, to_date))
user_file_count, user_total_size, user_skipped_count = download_recordings_from_meetings(meetings, user_host_folder)
user_file_count, user_total_size, user_skipped_count = download_recordings_from_meetings(meetings, host_folder, user_email)

utils.print_bright('######################################################################')
print()
Expand All @@ -166,12 +166,6 @@ def download_recordings(users, from_date, to_date):
def get_user_description(user_email, user_name):
return f'{user_email} ({user_name})' if (user_name) else user_email

def get_user_host_folder(user_email):
if CONFIG.GROUP_BY_USER:
return os.path.join(CONFIG.OUTPUT_PATH, user_email)
else:
return CONFIG.OUTPUT_PATH

def date_to_str(date):
return date.strftime('%Y-%m-%d')

Expand Down Expand Up @@ -214,7 +208,7 @@ def get_meetings(meeting_uuids):

return meetings

def download_recordings_from_meetings(meetings, host_folder):
def download_recordings_from_meetings(meetings, host_folder, user_email):
file_count, total_size, skipped_count = 0, 0, 0

for meeting in meetings:
Expand All @@ -240,30 +234,52 @@ def download_recordings_from_meetings(meetings, host_folder):

url = recording_file['download_url']
topic = utils.slugify(meeting['topic'])
ext = recording_file.get('file_extension') or os.path.splitext(recording_file['file_name'])[1]
recording_name = utils.slugify(f'{topic}__{recording_file["recording_start"]}')
file_id = recording_file['id']
file_name_suffix = os.path.splitext(recording_file['file_name'])[0] + '__' if 'file_name' in recording_file else ''
recording_type_suffix = recording_file["recording_type"] + '__' if 'recording_type' in recording_file else ''
file_name = utils.slugify(
f'{recording_name}__{recording_type_suffix}{file_name_suffix}{file_id[-8:]}'
) + '.' + ext
recording_name = utils.slugify(f'{topic}')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

utils.slugify(topic)


file_name = build_file_name(recording_file, topic)
file_size = int(recording_file.get('file_size'))

if download_recording_file(url, host_folder, file_name, file_size, topic, recording_name):
if download_recording_file(url, host_folder, file_name, file_size, topic, recording_name, recording_file["recording_start"], user_email):
total_size += file_size
file_count += 1
else:
skipped_count += 1

return file_count, total_size, skipped_count

def download_recording_file(download_url, host_folder, file_name, file_size, topic, recording_name):
def build_file_name(recording_file, topic):
recording_name = utils.slugify(f'{topic}')
recording_start = utils.slugify(f'{recording_file["recording_start"]}')
file_id = recording_file['id'][-8:]
file_name_suffix = os.path.splitext(recording_file['file_name'])[0] + '__' if 'file_name' in recording_file else ''
recording_type_suffix = ''

recording_type_suffix = recording_file["recording_type"] if 'recording_type' in recording_file else ''
file_extension = recording_file.get('file_extension') or os.path.splitext(recording_file['file_name'])[1]

file_name_pieces = []
for format in CONFIG.FILE_NAME_FORMAT:
if format == "RECORDING_START_DATETIME":
file_name_pieces.append(f'{recording_start}')
if format == "RECORDING_NAME":
file_name_pieces.append(f'{recording_name}{file_name_suffix}')
if format == "RECORDING_TYPE":
file_name_pieces.append(f'{recording_type_suffix}')
if format == "FILE_ID":
file_name_pieces.append(f'{file_id}')

file_name = utils.slugify(f'{CONFIG.FILE_NAME_SEPERATOR}'.join(file_name_pieces)) + '.' + file_extension

return file_name

def download_recording_file(download_url, host_folder, file_name, file_size, topic, recording_name, recording_start, user_email):
folder_path = create_folder_path(host_folder, topic, recording_name, recording_start, user_email)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would call this outside this method

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to pass all these parameters to the download method

file_path = os.path.join(folder_path, file_name)

if CONFIG.VERBOSE_OUTPUT:
print()
utils.print_dim(f'URL: {download_url}')

file_path = create_path(host_folder, file_name, topic, recording_name)
utils.print_dim(f'Folder: {folder_path}')

if os.path.exists(file_path) and abs(os.path.getsize(file_path) - file_size) <= CONFIG.FILE_SIZE_MISMATCH_TOLERANCE:
utils.print_dim(f'Skipping existing file: {file_name}')
Expand All @@ -287,16 +303,25 @@ def download_recording_file(download_url, host_folder, file_name, file_size, top

return True

def create_path(host_folder, file_name, topic, recording_name):
def create_folder_path(host_folder, topic, recording_name, recording_start, user_email):
folder_path = host_folder

if CONFIG.GROUP_BY_TOPIC:
folder_path = os.path.join(folder_path, topic)
for group_by in CONFIG.GROUP_FOLDERS_BY:
if group_by == "YEAR_MONTH":
recording_start_date = datetime.datetime.strptime(recording_start, '%Y-%m-%dT%H:%M:%SZ')
year_month = recording_start_date.strftime('%Y-%m')
folder_path = os.path.join(folder_path, year_month)
if group_by == "USER_EMAIL":
folder_path = os.path.join(folder_path, user_email)
if group_by == "TOPIC":
folder_path = os.path.join(folder_path, topic)

if CONFIG.GROUP_BY_RECORDING:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this still here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be part of your grouping mechanism, no?

folder_path = os.path.join(folder_path, recording_name)

os.makedirs(folder_path, exist_ok=True)
return os.path.join(folder_path, file_name)

return folder_path

def do_with_token(do):
def do_as_get(token):
Expand Down