-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Configs for user/topic filters and folder/file format #25
base: master
Are you sure you want to change the base?
Changes from all commits
63fc1fd
db82b83
d96f662
0cec807
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
.venv | ||
.venv | ||
config.py | ||
__pycache__ | ||
.DS_Store | ||
.vscode |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,10 +8,16 @@ | |
|
||
# Date range (inclusive) for downloads, None value for Days gets replaced by first/last day of the month. | ||
START_DAY, START_MONTH, START_YEAR = None, 5, 2020 | ||
END_DAY, END_MONTH, END_YEAR = None , 3, 2022 | ||
END_DAY, END_MONTH, END_YEAR = None, 3, 2022 | ||
|
||
# Put here emails of the users you want to check for recordings. If empty, all users under the account will be checked. | ||
USERS = [ | ||
USERS_INCLUDE = [ | ||
# R"####@####.####", | ||
# R"####@####.####", | ||
] | ||
|
||
# Put here emails of the users you want to exclude from checking for recordings. Optional. | ||
USERS_EXCLUDE = [ | ||
# R"####@####.####", | ||
# R"####@####.####", | ||
] | ||
Comment on lines
-14
to
23
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic here isn't clear to me from the docs, What gets precedence? |
||
|
@@ -22,6 +28,9 @@ | |
# R"############", | ||
] | ||
|
||
# If True, topics that partially match your topic filters are downloaded. If False, only meetings with exact topic matches are downloaded. | ||
PARTIAL_MATCH_TOPICS = False | ||
|
||
# Put here the file types you wish to download. If empty, no file type filtering will happen. | ||
RECORDING_FILE_TYPES = [ | ||
# R"MP4", # Video file of the recording. | ||
|
@@ -32,11 +41,13 @@ | |
# R"SUMMARY", # Summary file of the recording in JSON file format. | ||
] | ||
|
||
# If True, recordings will be grouped in folders by their owning user. | ||
GROUP_BY_USER = True | ||
|
||
# If True, recordings will be grouped in folders by their topics | ||
GROUP_BY_TOPIC = True | ||
# Group records in a folder hierarchy using the order below. | ||
# Reorder or comment out any of the folder groups below to control the folder hierarchy created to orgainze the downloaded recording files. | ||
GROUP_FOLDERS_BY = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
# R"YEAR_MONTH", # Recordings will be grouped in folders by their recording start date in yyyy-mm format. | ||
R"USER_EMAIL", # Recordings will be grouped in folders by their owning user's email address. | ||
R"TOPIC", # Recordings will be grouped in folders by their topics. | ||
] | ||
|
||
# If True, each instance of recording will be in its own folder (which may contain multiple files). | ||
# Note: One "meeting" can have multiple recording instances. | ||
|
@@ -46,6 +57,18 @@ | |
# This works when "Record a separate audio file of each participant" is enabled. | ||
INCLUDE_PARTICIPANT_AUDIO = True | ||
|
||
# Recording file name format to use when saving files. Reorder or comment out any file name format pieces below to control the file naming pattern. | ||
# Example: 2023-12-25t143021z__name-of-the-meeting__audio_transcript__ff625374.VTT | ||
FILE_NAME_FORMAT = [ | ||
R"RECORDING_START_DATETIME", # Recording start datetime | ||
R"RECORDING_NAME", # Recording name | ||
R"RECORDING_TYPE", # Recoding type | ||
R"FILE_ID", # Recording unique file ID | ||
] | ||
Comment on lines
+60
to
+67
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit problematic |
||
|
||
# Seperator character(s) to place in between the file name format pieces when building the recording file names. | ||
FILE_NAME_SEPERATOR = "__" | ||
|
||
# Set to True for more verbose output | ||
VERBOSE_OUTPUT = False | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,9 @@ def main(): | |
|
||
print_filter_warnings() | ||
|
||
if CONFIG.PARTIAL_MATCH_TOPICS: | ||
CONFIG.TOPICS = [topic.lower() for topic in CONFIG.TOPICS] | ||
|
||
from_date = datetime.datetime(CONFIG.START_YEAR, CONFIG.START_MONTH, CONFIG.START_DAY or 1) | ||
to_date = datetime.datetime( | ||
CONFIG.END_YEAR, CONFIG.END_MONTH, CONFIG.END_DAY or monthrange(CONFIG.END_YEAR, CONFIG.END_MONTH)[1], | ||
|
@@ -38,8 +41,11 @@ def print_filter_warnings(): | |
if CONFIG.TOPICS: | ||
utils.print_bright(f'Topics filter is active {CONFIG.TOPICS}') | ||
did_print = True | ||
if CONFIG.USERS: | ||
utils.print_bright(f'Users filter is active {CONFIG.USERS}') | ||
if CONFIG.USERS_INCLUDE: | ||
utils.print_bright(f'Users include filter is active {CONFIG.USERS_INCLUDE}') | ||
did_print = True | ||
if CONFIG.USERS_EXCLUDE: | ||
utils.print_bright(f'Users exclude filter is active {CONFIG.USERS_EXCLUDE}') | ||
did_print = True | ||
if CONFIG.RECORDING_FILE_TYPES: | ||
utils.print_bright(f'Recording file types filter is active {CONFIG.RECORDING_FILE_TYPES}') | ||
|
@@ -49,16 +55,29 @@ def print_filter_warnings(): | |
print() | ||
|
||
def get_users(): | ||
if CONFIG.USERS: | ||
return [(email, '') for email in CONFIG.USERS] | ||
|
||
return paginate_reduce( | ||
'https://api.zoom.us/v2/users?status=active', [], | ||
lambda users, page: users + [(user['email'], get_user_name(user)) for user in page['users']] | ||
) + paginate_reduce( | ||
'https://api.zoom.us/v2/users?status=inactive', [], | ||
lambda users, page: users + [(user['email'], get_user_name(user)) for user in page['users']] | ||
) | ||
if CONFIG.USERS_INCLUDE: | ||
users = [(email, '') for email in CONFIG.USERS_INCLUDE] | ||
else: | ||
users = paginate_reduce( | ||
'https://api.zoom.us/v2/users?status=active', [], | ||
lambda users, page: users + [(user['email'], get_user_name(user)) for user in page['users']] | ||
) + paginate_reduce( | ||
'https://api.zoom.us/v2/users?status=inactive', [], | ||
lambda users, page: users + [(user['email'], get_user_name(user)) for user in page['users']] | ||
) | ||
|
||
if CONFIG.VERBOSE_OUTPUT: | ||
utils.print_dim('Found matching users:') | ||
|
||
for user_email, user_name in users: | ||
if user_email in CONFIG.USERS_EXCLUDE: | ||
users.pop(users.index((user_email, user_name))) | ||
continue | ||
|
||
if CONFIG.VERBOSE_OUTPUT: | ||
utils.print_dim(f'{user_name} <{user_email}>') | ||
Comment on lines
+69
to
+78
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would just run a filter on the list, then print it as is |
||
|
||
return users | ||
|
||
def paginate_reduce(url, initial, reduce): | ||
initial_url = utils.add_url_params(url, {'page_size': 300}) | ||
|
@@ -125,15 +144,15 @@ def download_recordings(users, from_date, to_date): | |
|
||
for user_email, user_name in users: | ||
user_description = get_user_description(user_email, user_name) | ||
user_host_folder = get_user_host_folder(user_email) | ||
host_folder = CONFIG.OUTPUT_PATH | ||
|
||
utils.print_bright( | ||
f'Downloading recordings from user {user_description} - Starting at {date_to_str(from_date)} ' | ||
f'and up to {date_to_str(to_date)} (inclusive).' | ||
) | ||
|
||
meetings = get_meetings(get_meeting_uuids(user_email, from_date, to_date)) | ||
user_file_count, user_total_size, user_skipped_count = download_recordings_from_meetings(meetings, user_host_folder) | ||
user_file_count, user_total_size, user_skipped_count = download_recordings_from_meetings(meetings, host_folder, user_email) | ||
|
||
utils.print_bright('######################################################################') | ||
print() | ||
|
@@ -147,12 +166,6 @@ def download_recordings(users, from_date, to_date): | |
def get_user_description(user_email, user_name): | ||
return f'{user_email} ({user_name})' if (user_name) else user_email | ||
|
||
def get_user_host_folder(user_email): | ||
if CONFIG.GROUP_BY_USER: | ||
return os.path.join(CONFIG.OUTPUT_PATH, user_email) | ||
else: | ||
return CONFIG.OUTPUT_PATH | ||
|
||
def date_to_str(date): | ||
return date.strftime('%Y-%m-%d') | ||
|
||
|
@@ -180,6 +193,8 @@ def get_meeting_uuids(user_email, start_date, end_date): | |
local_start_date = local_end_date + datetime.timedelta(days=1) | ||
progress_bar.update(1) | ||
|
||
utils.print_dim(f"Meetings found: {len(meeting_uuids)}") | ||
|
||
return meeting_uuids | ||
|
||
def get_meetings(meeting_uuids): | ||
|
@@ -189,14 +204,23 @@ def get_meetings(meeting_uuids): | |
url = f'https://api.zoom.us/v2/meetings/{utils.double_encode(meeting_uuid)}/recordings' | ||
meetings.append(get_with_token(lambda t: requests.get(url=url, headers=get_headers(t))).json()) | ||
|
||
utils.print_dim(f"Recordings found: {len(meetings)}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't the number of recordings |
||
|
||
return meetings | ||
|
||
def download_recordings_from_meetings(meetings, host_folder): | ||
def download_recordings_from_meetings(meetings, host_folder, user_email): | ||
file_count, total_size, skipped_count = 0, 0, 0 | ||
|
||
for meeting in meetings: | ||
if CONFIG.TOPICS and meeting['topic'] not in CONFIG.TOPICS and utils.slugify(meeting['topic']) not in CONFIG.TOPICS: | ||
continue | ||
if CONFIG.TOPICS and meeting['topic']: | ||
if CONFIG.PARTIAL_MATCH_TOPICS: | ||
topic_lower = str.lower(meeting['topic']) | ||
topic_lower_slug = utils.slugify(meeting['topic']) | ||
Comment on lines
+217
to
+218
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put outside the |
||
if not any(topic in topic_lower for topic in CONFIG.TOPICS) and not any(topic in topic_lower_slug for topic in CONFIG.TOPICS): | ||
continue | ||
else: | ||
if meeting['topic'] not in CONFIG.TOPICS and utils.slugify(meeting['topic']) not in CONFIG.TOPICS: | ||
continue | ||
|
||
recording_files = meeting.get('recording_files') or [] | ||
participant_audio_files = meeting.get('participant_audio_files') or [] if CONFIG.INCLUDE_PARTICIPANT_AUDIO else [] | ||
|
@@ -210,30 +234,52 @@ def download_recordings_from_meetings(meetings, host_folder): | |
|
||
url = recording_file['download_url'] | ||
topic = utils.slugify(meeting['topic']) | ||
ext = recording_file.get('file_extension') or os.path.splitext(recording_file['file_name'])[1] | ||
recording_name = utils.slugify(f'{topic}__{recording_file["recording_start"]}') | ||
file_id = recording_file['id'] | ||
file_name_suffix = os.path.splitext(recording_file['file_name'])[0] + '__' if 'file_name' in recording_file else '' | ||
recording_type_suffix = recording_file["recording_type"] + '__' if 'recording_type' in recording_file else '' | ||
file_name = utils.slugify( | ||
f'{recording_name}__{recording_type_suffix}{file_name_suffix}{file_id[-8:]}' | ||
) + '.' + ext | ||
recording_name = utils.slugify(f'{topic}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
file_name = build_file_name(recording_file, topic) | ||
file_size = int(recording_file.get('file_size')) | ||
|
||
if download_recording_file(url, host_folder, file_name, file_size, topic, recording_name): | ||
if download_recording_file(url, host_folder, file_name, file_size, topic, recording_name, recording_file["recording_start"], user_email): | ||
total_size += file_size | ||
file_count += 1 | ||
else: | ||
skipped_count += 1 | ||
|
||
return file_count, total_size, skipped_count | ||
|
||
def download_recording_file(download_url, host_folder, file_name, file_size, topic, recording_name): | ||
def build_file_name(recording_file, topic): | ||
recording_name = utils.slugify(f'{topic}') | ||
recording_start = utils.slugify(f'{recording_file["recording_start"]}') | ||
file_id = recording_file['id'][-8:] | ||
file_name_suffix = os.path.splitext(recording_file['file_name'])[0] + '__' if 'file_name' in recording_file else '' | ||
recording_type_suffix = '' | ||
|
||
recording_type_suffix = recording_file["recording_type"] if 'recording_type' in recording_file else '' | ||
file_extension = recording_file.get('file_extension') or os.path.splitext(recording_file['file_name'])[1] | ||
|
||
file_name_pieces = [] | ||
for format in CONFIG.FILE_NAME_FORMAT: | ||
if format == "RECORDING_START_DATETIME": | ||
file_name_pieces.append(f'{recording_start}') | ||
if format == "RECORDING_NAME": | ||
file_name_pieces.append(f'{recording_name}{file_name_suffix}') | ||
if format == "RECORDING_TYPE": | ||
file_name_pieces.append(f'{recording_type_suffix}') | ||
if format == "FILE_ID": | ||
file_name_pieces.append(f'{file_id}') | ||
|
||
file_name = utils.slugify(f'{CONFIG.FILE_NAME_SEPERATOR}'.join(file_name_pieces)) + '.' + file_extension | ||
|
||
return file_name | ||
|
||
def download_recording_file(download_url, host_folder, file_name, file_size, topic, recording_name, recording_start, user_email): | ||
folder_path = create_folder_path(host_folder, topic, recording_name, recording_start, user_email) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would call this outside this method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to pass all these parameters to the download method |
||
file_path = os.path.join(folder_path, file_name) | ||
|
||
if CONFIG.VERBOSE_OUTPUT: | ||
print() | ||
utils.print_dim(f'URL: {download_url}') | ||
|
||
file_path = create_path(host_folder, file_name, topic, recording_name) | ||
utils.print_dim(f'Folder: {folder_path}') | ||
|
||
if os.path.exists(file_path) and abs(os.path.getsize(file_path) - file_size) <= CONFIG.FILE_SIZE_MISMATCH_TOLERANCE: | ||
utils.print_dim(f'Skipping existing file: {file_name}') | ||
|
@@ -257,16 +303,25 @@ def download_recording_file(download_url, host_folder, file_name, file_size, top | |
|
||
return True | ||
|
||
def create_path(host_folder, file_name, topic, recording_name): | ||
def create_folder_path(host_folder, topic, recording_name, recording_start, user_email): | ||
folder_path = host_folder | ||
|
||
if CONFIG.GROUP_BY_TOPIC: | ||
folder_path = os.path.join(folder_path, topic) | ||
for group_by in CONFIG.GROUP_FOLDERS_BY: | ||
if group_by == "YEAR_MONTH": | ||
recording_start_date = datetime.datetime.strptime(recording_start, '%Y-%m-%dT%H:%M:%SZ') | ||
year_month = recording_start_date.strftime('%Y-%m') | ||
folder_path = os.path.join(folder_path, year_month) | ||
if group_by == "USER_EMAIL": | ||
folder_path = os.path.join(folder_path, user_email) | ||
if group_by == "TOPIC": | ||
folder_path = os.path.join(folder_path, topic) | ||
|
||
if CONFIG.GROUP_BY_RECORDING: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this still here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be part of your grouping mechanism, no? |
||
folder_path = os.path.join(folder_path, recording_name) | ||
|
||
os.makedirs(folder_path, exist_ok=True) | ||
return os.path.join(folder_path, file_name) | ||
|
||
return folder_path | ||
|
||
def do_with_token(do): | ||
def do_as_get(token): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pls revert the added spaces