-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Configs for user/topic filters and folder/file format #25
base: master
Are you sure you want to change the base?
Changes from 1 commit
63fc1fd
db82b83
d96f662
0cec807
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,11 +41,13 @@ | |
# R"SUMMARY", # Summary file of the recording in JSON file format. | ||
] | ||
|
||
# If True, recordings will be grouped in folders by their owning user. | ||
GROUP_BY_USER = True | ||
|
||
# If True, recordings will be grouped in folders by their topics | ||
GROUP_BY_TOPIC = True | ||
# Group records in a folder hierarchy using the order below. | ||
# Reorder or comment out any of the folder groups below to control the folder hierarchy created to orgainze the downloaded recording files. | ||
GROUP_FOLDERS_BY = [ | ||
# R"YEAR_MONTH", # Recordings will be grouped in folders by their recording start date in yyyy-mm format. | ||
R"USER_EMAIL", # Recordings will be grouped in folders by their owning user's email address. | ||
R"TOPIC", # Recordings will be grouped in folders by their topics. | ||
] | ||
|
||
# If True, each instance of recording will be in its own folder (which may contain multiple files). | ||
# Note: One "meeting" can have multiple recording instances. | ||
|
@@ -55,6 +57,18 @@ | |
# This works when "Record a separate audio file of each participant" is enabled. | ||
INCLUDE_PARTICIPANT_AUDIO = True | ||
|
||
# Recording file name format to use when saving files. Reorder or comment out any file name format pieces below to control the file naming pattern. | ||
# Example: 2023-12-25t143021z__name-of-the-meeting__audio_transcript__ff625374.VTT | ||
FILE_NAME_FORMAT = [ | ||
R"RECORDING_START_DATETIME", # Recording start datetime | ||
R"RECORDING_NAME", # Recording name | ||
R"RECORDING_TYPE", # Recoding type | ||
R"FILE_ID", # Recording unique file ID | ||
] | ||
Comment on lines
+60
to
+67
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit problematic |
||
|
||
# Seperator character(s) to place in between the file name format pieces when building the recording file names. | ||
FILE_NAME_SEPERATOR = "__" | ||
|
||
# Set to True for more verbose output | ||
VERBOSE_OUTPUT = False | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -144,15 +144,15 @@ def download_recordings(users, from_date, to_date): | |
|
||
for user_email, user_name in users: | ||
user_description = get_user_description(user_email, user_name) | ||
user_host_folder = get_user_host_folder(user_email) | ||
host_folder = CONFIG.OUTPUT_PATH | ||
|
||
utils.print_bright( | ||
f'Downloading recordings from user {user_description} - Starting at {date_to_str(from_date)} ' | ||
f'and up to {date_to_str(to_date)} (inclusive).' | ||
) | ||
|
||
meetings = get_meetings(get_meeting_uuids(user_email, from_date, to_date)) | ||
user_file_count, user_total_size, user_skipped_count = download_recordings_from_meetings(meetings, user_host_folder) | ||
user_file_count, user_total_size, user_skipped_count = download_recordings_from_meetings(meetings, host_folder, user_email) | ||
|
||
utils.print_bright('######################################################################') | ||
print() | ||
|
@@ -166,12 +166,6 @@ def download_recordings(users, from_date, to_date): | |
def get_user_description(user_email, user_name): | ||
return f'{user_email} ({user_name})' if (user_name) else user_email | ||
|
||
def get_user_host_folder(user_email): | ||
if CONFIG.GROUP_BY_USER: | ||
return os.path.join(CONFIG.OUTPUT_PATH, user_email) | ||
else: | ||
return CONFIG.OUTPUT_PATH | ||
|
||
def date_to_str(date): | ||
return date.strftime('%Y-%m-%d') | ||
|
||
|
@@ -214,7 +208,7 @@ def get_meetings(meeting_uuids): | |
|
||
return meetings | ||
|
||
def download_recordings_from_meetings(meetings, host_folder): | ||
def download_recordings_from_meetings(meetings, host_folder, user_email): | ||
file_count, total_size, skipped_count = 0, 0, 0 | ||
|
||
for meeting in meetings: | ||
|
@@ -240,30 +234,52 @@ def download_recordings_from_meetings(meetings, host_folder): | |
|
||
url = recording_file['download_url'] | ||
topic = utils.slugify(meeting['topic']) | ||
ext = recording_file.get('file_extension') or os.path.splitext(recording_file['file_name'])[1] | ||
recording_name = utils.slugify(f'{topic}__{recording_file["recording_start"]}') | ||
file_id = recording_file['id'] | ||
file_name_suffix = os.path.splitext(recording_file['file_name'])[0] + '__' if 'file_name' in recording_file else '' | ||
recording_type_suffix = recording_file["recording_type"] + '__' if 'recording_type' in recording_file else '' | ||
file_name = utils.slugify( | ||
f'{recording_name}__{recording_type_suffix}{file_name_suffix}{file_id[-8:]}' | ||
) + '.' + ext | ||
recording_name = utils.slugify(f'{topic}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
file_name = build_file_name(recording_file, topic) | ||
file_size = int(recording_file.get('file_size')) | ||
|
||
if download_recording_file(url, host_folder, file_name, file_size, topic, recording_name): | ||
if download_recording_file(url, host_folder, file_name, file_size, topic, recording_name, recording_file["recording_start"], user_email): | ||
total_size += file_size | ||
file_count += 1 | ||
else: | ||
skipped_count += 1 | ||
|
||
return file_count, total_size, skipped_count | ||
|
||
def download_recording_file(download_url, host_folder, file_name, file_size, topic, recording_name): | ||
def build_file_name(recording_file, topic): | ||
recording_name = utils.slugify(f'{topic}') | ||
recording_start = utils.slugify(f'{recording_file["recording_start"]}') | ||
file_id = recording_file['id'][-8:] | ||
file_name_suffix = os.path.splitext(recording_file['file_name'])[0] + '__' if 'file_name' in recording_file else '' | ||
recording_type_suffix = '' | ||
|
||
recording_type_suffix = recording_file["recording_type"] if 'recording_type' in recording_file else '' | ||
file_extension = recording_file.get('file_extension') or os.path.splitext(recording_file['file_name'])[1] | ||
|
||
file_name_pieces = [] | ||
for format in CONFIG.FILE_NAME_FORMAT: | ||
if format == "RECORDING_START_DATETIME": | ||
file_name_pieces.append(f'{recording_start}') | ||
if format == "RECORDING_NAME": | ||
file_name_pieces.append(f'{recording_name}{file_name_suffix}') | ||
if format == "RECORDING_TYPE": | ||
file_name_pieces.append(f'{recording_type_suffix}') | ||
if format == "FILE_ID": | ||
file_name_pieces.append(f'{file_id}') | ||
|
||
file_name = utils.slugify(f'{CONFIG.FILE_NAME_SEPERATOR}'.join(file_name_pieces)) + '.' + file_extension | ||
|
||
return file_name | ||
|
||
def download_recording_file(download_url, host_folder, file_name, file_size, topic, recording_name, recording_start, user_email): | ||
folder_path = create_folder_path(host_folder, topic, recording_name, recording_start, user_email) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would call this outside this method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to pass all these parameters to the download method |
||
file_path = os.path.join(folder_path, file_name) | ||
|
||
if CONFIG.VERBOSE_OUTPUT: | ||
print() | ||
utils.print_dim(f'URL: {download_url}') | ||
|
||
file_path = create_path(host_folder, file_name, topic, recording_name) | ||
utils.print_dim(f'Folder: {folder_path}') | ||
|
||
if os.path.exists(file_path) and abs(os.path.getsize(file_path) - file_size) <= CONFIG.FILE_SIZE_MISMATCH_TOLERANCE: | ||
utils.print_dim(f'Skipping existing file: {file_name}') | ||
|
@@ -287,16 +303,25 @@ def download_recording_file(download_url, host_folder, file_name, file_size, top | |
|
||
return True | ||
|
||
def create_path(host_folder, file_name, topic, recording_name): | ||
def create_folder_path(host_folder, topic, recording_name, recording_start, user_email): | ||
folder_path = host_folder | ||
|
||
if CONFIG.GROUP_BY_TOPIC: | ||
folder_path = os.path.join(folder_path, topic) | ||
for group_by in CONFIG.GROUP_FOLDERS_BY: | ||
if group_by == "YEAR_MONTH": | ||
recording_start_date = datetime.datetime.strptime(recording_start, '%Y-%m-%dT%H:%M:%SZ') | ||
year_month = recording_start_date.strftime('%Y-%m') | ||
folder_path = os.path.join(folder_path, year_month) | ||
if group_by == "USER_EMAIL": | ||
folder_path = os.path.join(folder_path, user_email) | ||
if group_by == "TOPIC": | ||
folder_path = os.path.join(folder_path, topic) | ||
|
||
if CONFIG.GROUP_BY_RECORDING: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this still here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be part of your grouping mechanism, no? |
||
folder_path = os.path.join(folder_path, recording_name) | ||
|
||
os.makedirs(folder_path, exist_ok=True) | ||
return os.path.join(folder_path, file_name) | ||
|
||
return folder_path | ||
|
||
def do_with_token(do): | ||
def do_as_get(token): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GROUP_BY
orFOLDER_HEIRARCHY