diff --git a/.gitignore b/.gitignore index 9535c31..30b37b0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /data/ /cache/ *.log +__pycache__ +ffmpeg.exe diff --git a/main_windows.py b/main_windows.py index e9a747c..dd3c8b6 100644 --- a/main_windows.py +++ b/main_windows.py @@ -2,6 +2,7 @@ import sys import argparse import time +import re parser = argparse.ArgumentParser(add_help=False) @@ -36,7 +37,6 @@ - qrcode (qrcode login) - Pillow (Add answer to problem; Convert PPT to PDF) -required system binaries: - aria2c (Download files multi-threaded & resume support) - ffmpeg with nvenc support (Concatenate video segments and convert to HEVC) """) @@ -144,7 +144,10 @@ os.makedirs(f"{DOWNLOAD_FOLDER}/{folder_name}", exist_ok=True) os.makedirs(f"{CACHE_FOLDER}/{folder_name}", exist_ok=True) - name_prefix += folder_name + "/" + + name_prefix += folder_name.rstrip() + "/" + # Remove illegal characters for Windows filenames + name_prefix = re.sub(r'[<>:"\\|?*]', '_', name_prefix) if args.lesson_name_filter is not None: lesson_data['data']['activities'] = [l for l in lesson_data['data']['activities'] if @@ -179,7 +182,10 @@ def download_lesson_video(lesson: dict, name_prefix: str = ""): lesson_video_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/v3/lesson-summary/replay?lesson_id={lesson['courseware_id']}").json() - name_prefix += "-" + lesson['title'] + + name_prefix += "-" + lesson['title'].rstrip() + # Remove illegal characters for Windows filenames + name_prefix = re.sub(r'[<>:"\\|?*]', '_', name_prefix) if 'live' not in lesson_video_data['data']: print(f"Skipping {name_prefix} - No Video", file=sys.stderr) @@ -216,7 +222,10 @@ def download_lesson_ppt(lesson: dict, name_prefix: str = ""): lesson_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/v3/lesson-summary/student?lesson_id={lesson['courseware_id']}").json() - name_prefix += "-" + lesson['title'] + name_prefix += "-" + lesson['title'].rstrip() + + # Remove illegal characters for Windows filenames + name_prefix = re.sub(r'[<>:"\\|?*]', '_', name_prefix) if 'presentations' not in lesson_data['data']: print(f"Skipping {name_prefix} - No PPT", file=sys.stderr) diff --git a/ppt_processing.py b/ppt_processing.py index ab15529..2486310 100644 --- a/ppt_processing.py +++ b/ppt_processing.py @@ -1,6 +1,7 @@ import os import time import subprocess +import re CACHE_FOLDER = 'cache' DOWNLOAD_FOLDER = 'data' @@ -11,7 +12,9 @@ # ppt_raw_data = rainclassroom_sess.get( # f"https://{YKT_HOST}/api/v3/lesson-summary/student/presentation?presentation_id={ppt_id}&lesson_id={lesson_id}").json() - name_prefix += "-" + ppt_raw_data['data']['presentation']['title'] + name_prefix += "-" + ppt_raw_data['data']['presentation']['title'].rstrip() + # Remove illegal characters for Windows filenames + name_prefix = re.sub(r'[<>:"\\|?*]', '_', name_prefix) # If PDF is present, skip if os.path.exists(f"{DOWNLOAD_FOLDER}/{name_prefix}.pdf"):