diff --git a/main.py b/main.py new file mode 100644 index 0000000..538b7ef --- /dev/null +++ b/main.py @@ -0,0 +1,714 @@ +# -*- coding: utf-8 -*- + +import os +import subprocess +import sys +import argparse +import time +import re +import traceback +import option +import shutil + +if sys.platform == 'win32': + os.system('chcp 65001') + +parser = argparse.ArgumentParser(add_help=False) + +parser.add_argument("-h", "--help", action="store_true", help="Show this help message and exit") +parser.add_argument("-c", "--session-cookie", help="Session Cookie", required=False) +parser.add_argument("-y", "--ykt-host", help="RainClassroom Host", required=False, default="pro.yuketang.cn") +parser.add_argument("-i", "--idm", action="store_true", help="Use IDMan.exe") +parser.add_argument("-ni", "--no-idm", action="store_true", help="Don't use IDMan.exe, implied when the system is not Windows") +parser.add_argument("-a", "--all", action="store_true", help="Download all content without asking") +parser.add_argument("-na", "--no-all", action="store_true", help="Ask before downloading each course") +parser.add_argument("-nv", "--no-video", action="store_true", help="Don't Download Video") +parser.add_argument("-np", "--no-ppt", action="store_true", help="Don't Download PPT") +parser.add_argument("-npc", "--no-convert-ppt-to-pdf", action="store_true", help="Don't Convert PPT to PDF") +parser.add_argument("-npa", "--no-ppt-answer", action="store_true", help="Don't Store PPT Problem Answer") +parser.add_argument("--course-name-filter", action="store", help="Filter Course Name", default=None) +parser.add_argument("--lesson-name-filter", action="store", help="Filter Lesson Name", default=None) + +args = parser.parse_args() + +args.__setattr__('video', not args.no_video) +args.__setattr__('ppt', not args.no_ppt) +args.__setattr__('ppt_to_pdf', not args.no_convert_ppt_to_pdf) +args.__setattr__('ppt_problem_answer', not args.no_ppt_answer) + +# Check if no arguments are provided or only --help is provided +if args.help or len(sys.argv) == 1: + print("""RainClassroom Video Downloader + +requirements: + - Python >= 3.12 + - requests + - websocket-client (qrcode login) + - qrcode (qrcode login) + - Pillow (Add answer to problem; Convert PPT to PDF) + + - aria2c (Download files multi-threaded & resume support) + - ffmpeg with nvenc support (Concatenate video segments and convert to HEVC) +""") + print(parser.format_help()) + + if sys.platform == 'win32': + print('\nYOU SHALL RUN THIS EXECUTABLE FROM POWERSHELL WITH ARGUMENT!!') + os.system('pause') + + exit() + +# Check for dependencies +try: + import requests +except ImportError: + print("requests is not installed. Please install it using 'pip install requests'", file=sys.stderr) + exit(1) + +if args.session_cookie is None: + try: + import websocket + except ImportError: + print("websocket-client is not installed. Please install it using 'pip install websocket-client'", file=sys.stderr) + exit(1) + + try: + import qrcode + except ImportError: + print("qrcode is not installed. Please install it using 'pip install qrcode'", file=sys.stderr) + exit(1) + +if args.ppt_to_pdf or args.ppt_problem_answer: + try: + import PIL + except ImportError: + print("PIL is not installed. Please install it using 'pip install pillow'", file=sys.stderr) + exit(1) + +if args.all and args.no_all: + print("'-a' and '-na' cannot be used together") +if args.idm and args.no_idm: + print("'-idm' and '-no_idm' cannot be used together") + +if args.all: + allin_flag = 1 +elif args.no_all: + allin_flag = 0 +else: + allin_flag = option.ask_for_allin() + +if sys.platform != 'win32': + print("Inferring --no-idm flag as the system is not Windows") + args.no_idm = True + +if args.idm: + idm_flag = 1 +elif args.no_idm: + idm_flag = 0 +else: + idm_flag = option.ask_for_idm() + +if idm_flag and shutil.which('IDMan.exe') is None: + print("IDMan.exe is not found. Please install IDM and add it to PATH, or specify '--no-idm' flag", file=sys.stderr) + exit(1) + +if idm_flag and sys.platform != 'win32': + print("WARNING: Are you sure that you want to use IDM on a non-Windows system?", file=sys.stderr) + +args.__setattr__("aria2c_path", "aria2c") +if shutil.which("aria2c") is None and os.path.exists("aria2c.exe"): + args.__setattr__("aria2c_path", os.path.join(os.getcwd(), "aria2c")) + print(f"aria2c is not found in PATH, using local binary at {args.aria2c_path}") + +if not idm_flag: + if shutil.which(args.aria2c_path) is None: + print("aria2c is not found. Please install aria2 and add it to PATH, or use IDM instead", file=sys.stderr) + exit(1) + + print("IDM is not enabled, aria2c will be used for downloading") + +import requests +import json + +# --- --- --- Section Init --- --- --- # +# Login to RainClassroom +userinfo = {} +rainclassroom_sess = requests.session() + +YKT_HOST = args.ykt_host +DOWNLOAD_FOLDER = "data" +CACHE_FOLDER = "cache" + +os.makedirs(DOWNLOAD_FOLDER, exist_ok=True) +os.makedirs(CACHE_FOLDER, exist_ok=True) + +# --- --- --- Section Load Session --- --- --- # + +if args.session_cookie is not None: + rainclassroom_sess.cookies['sessionid'] = args.session_cookie + +# --- --- --- Section Login --- --- --- # +else: + import websocket + import qrcode + + + def on_message(ws, message): + global userinfo + userinfo = json.loads(message) + if 'subscribe_status' in userinfo: + ws.close() + return + + qr = qrcode.QRCode() + qr.add_data(userinfo["qrcode"]) + # Flush screen first + print("\033c") + qr.print_ascii(out=sys.stdout) + print("请扫描二维码登录") + + + def on_error(ws, error): + print(error) + + + def on_open(ws): + ws.send(data=json.dumps({"op": "requestlogin", "role": "web", "version": 1.4, "type": "qrcode", "from": "web"})) + + + # websocket数据交互 + ws = websocket.WebSocketApp(f"wss://{YKT_HOST}/wsapp/", + on_message=on_message, + on_error=on_error) + ws.on_open = on_open + ws.run_forever() + + # 登录 + req = rainclassroom_sess.get(f"https://{YKT_HOST}/v/course_meta/user_info") + rainclassroom_sess.post(f"https://{YKT_HOST}/pc/web_login", + data=json.dumps({'UserID': userinfo['UserID'], 'Auth': userinfo['Auth']})) + + # Store session + with open(f"{DOWNLOAD_FOLDER}/session.txt", "a", encoding='utf-8') as f: + f.write(rainclassroom_sess.cookies['sessionid'] + "\n") + +# --- --- --- Section Get Course List --- --- --- # + +# 获取自己的课程列表 +shown_courses = rainclassroom_sess.get(f"https://{YKT_HOST}/v2/api/web/courses/list?identity=2").json() + +hidden_courses = rainclassroom_sess.get(f"https://{YKT_HOST}/v2/api/web/classroom_archive").json() + +for course in hidden_courses['data']['classrooms']: + course['classroom_id'] = course['id'] + +courses = shown_courses['data']['list'] + hidden_courses['data']['classrooms'] + +if args.course_name_filter is not None: + courses = [c for c in courses if args.course_name_filter in c['name']] + +rainclassroom_sess.cookies['xtbz'] = 'ykt' + + +# --- --- --- Section Get Lesson List --- --- --- # + + +def get_lesson_list(course: dict, name_prefix: str = ""): + lesson_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/v2/api/web/logs/learn/{course['classroom_id']}?actype=-1&page=0&offset=500&sort=-1").json() + + folder_name = f"{course['name']}-{course['teacher']['name']}" + folder_name = option.windows_filesame_sanitizer(folder_name) + + if idm_flag: + folder_name = folder_name.replace('/', '\\') + folder_name = re.sub(r'[“”]', '_', folder_name) + + print('folder name would be:', folder_name) + + # Rename old folder + if os.path.exists(f"{DOWNLOAD_FOLDER}/{course['name']}"): + os.rename(f"{DOWNLOAD_FOLDER}/{course['name']}", f"{DOWNLOAD_FOLDER}/{folder_name}") + + if os.path.exists(f"{CACHE_FOLDER}/{course['name']}"): + os.rename(f"{CACHE_FOLDER}/{course['name']}", f"{CACHE_FOLDER}/{folder_name}") + + os.makedirs(f"{DOWNLOAD_FOLDER}/{folder_name}", exist_ok=True) + os.makedirs(f"{CACHE_FOLDER}/{folder_name}", exist_ok=True) + + name_prefix += folder_name.rstrip() + "/" + name_prefix = option.windows_filesame_sanitizer(name_prefix) + + if args.lesson_name_filter is not None: + lesson_data['data']['activities'] = [l for l in lesson_data['data']['activities'] if + args.lesson_name_filter in l['title']] + + length = len(lesson_data['data']['activities']) + + if args.video: + for index, lesson in enumerate(lesson_data['data']['activities']): + if not lesson['type'] in [2, 3, 14, 15, 17]: + continue + + lesson['classroom_id'] = course['classroom_id'] + + # Lesson + try: + if lesson['type'] == 2: + print('Script type detected!') + download_lesson_video_type2(lesson, name_prefix + str(length - index)) + elif lesson['type'] in [3, 14]: + print('Normal type detected!') + download_lesson_video(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 15: + print('MOOCv2 type detected!') + download_lesson_video_type15(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 17: + print('MOOCv1 type detected!') + download_lesson_video_type17(lesson, name_prefix + str(length - index)) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) + + print('sbykt may not prepare cold data in one run, rescanning for missing ones') + + for index, lesson in enumerate(lesson_data['data']['activities']): + if not lesson['type'] in [2, 3, 14, 15, 17]: + continue + + lesson['classroom_id'] = course['classroom_id'] + + # Lesson + try: + if lesson['type'] == 2: + print('Script type detected!') + download_lesson_video_type2(lesson, name_prefix + str(length - index)) + elif lesson['type'] in [3, 14]: + print('Normal type detected!') + download_lesson_video(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 15: + print('MOOCv2 type detected!') + download_lesson_video_type15(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 17: + print('MOOCv1 type detected!') + download_lesson_video_type17(lesson, name_prefix + str(length - index)) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) + + if args.ppt: + for index, lesson in enumerate(lesson_data['data']['activities']): + if lesson['type'] in (15, 17): + print("mooc type has no ppts!") + continue + lesson['classroom_id'] = course['classroom_id'] + + # Lesson + try: + download_lesson_ppt(lesson, name_prefix + str(length - index)) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) + + print('sbykt may not prepare cold data in one run, rescanning for missing ones') + + for index, lesson in enumerate(lesson_data['data']['activities']): + if lesson['type'] in (15, 17): + print("mooc type has no ppts!") + continue + lesson['classroom_id'] = course['classroom_id'] + + # Lesson + try: + download_lesson_ppt(lesson, name_prefix + str(length - index)) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) + + +# --- --- --- Section Download Lesson Video --- --- --- # + +from video_processing import download_segments_in_parallel, concatenate_segments + + +def download_lesson_video(lesson: dict, name_prefix: str = ""): + lesson_video_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/api/v3/lesson-summary/replay?lesson_id={lesson['courseware_id']}").json() + + name_prefix += "-" + lesson['title'].rstrip() + name_prefix = option.windows_filesame_sanitizer(name_prefix) + + if idm_flag: + name_prefix = re.sub(r'[“”]', '_', name_prefix) + + if 'live' not in lesson_video_data['data']: + print(f"v3 protocol detection failed, falling back to v1") + + fallback_flag = 1 + + lesson_video_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/v/lesson/get_lesson_replay_timeline/?lesson_id={lesson['courseware_id']}").json() + + if 'live_timeline' not in lesson_video_data['data'] or len(lesson_video_data['data']['live_timeline']) == 0: + print(f"Skipping {name_prefix} - No Video", file=sys.stderr) + return + else: + fallback_flag = 0 + + if len(lesson_video_data['data']['live']) == 0: + print(f"Skipping {name_prefix} - No Video", file=sys.stderr) + return + + if os.path.exists(f"{DOWNLOAD_FOLDER}/{name_prefix}.mp4"): + print(f"Skipping {name_prefix} - Video already present") + time.sleep(0.25) + return + + has_error = False + + # Download segments in parallel + try: + download_segments_in_parallel(idm_flag, fallback_flag, CACHE_FOLDER, lesson_video_data, name_prefix) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download {name_prefix}", file=sys.stderr) + has_error = True + + # Start concatenation if downloads were successful + if not has_error: + time.sleep(1) + if 'live' in lesson_video_data['data'] and len(lesson_video_data['data']['live']) > 0: + print(f"Concatenating {name_prefix}") + concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix, len(lesson_video_data['data']['live'])) + elif 'live_timeline' in lesson_video_data['data'] and len(lesson_video_data['data']['live_timeline']) > 0: + print(f"Concatenating {name_prefix}") + concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix, + len(lesson_video_data['data']['live_timeline'])) + else: + print('concatenate cannot start due to previous failure') + else: + print('concatenate cannot start due to previous failure') + + if has_error: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + f.write(f"{name_prefix}\n") + + +def download_lesson_video_type15(lesson: dict, name_prefix: str = ""): + mooc_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/c27/online_courseware/xty/kls/pub_news/{lesson['courseware_id']}/", + headers={ + "Xtbz": "ykt", + "Classroom-Id": str(lesson['classroom_id']) + } + ).json() + + for chapter in mooc_data['data']['content_info']: + chapter_name = chapter['name'] + + for orphan in chapter['leaf_list']: + orphan_title = orphan['title'] + orphan_id = orphan['id'] + has_error = False + + name_prefix_orphan = name_prefix + chapter_name + " - " + orphan_title + name_prefix_orphan = option.windows_filesame_sanitizer(name_prefix_orphan) + + if idm_flag: + name_prefix_orphan = re.sub(r'[“”]', '_', name_prefix_orphan) + + mooc_orphan_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/mooc-api/v1/lms/learn/leaf_info/{str(lesson['classroom_id'])}/{str(orphan_id)}/", + headers={ + "Xtbz": "ykt", + "Classroom-Id": str(lesson['classroom_id']) + } + ).json() + + if 'data' not in mooc_orphan_data or 'content_info' not in mooc_orphan_data['data']: + print('no media detected, skipping!') + continue + + mooc_orphan_media_id = mooc_orphan_data['data']['content_info']['media']['ccid'] + mooc_orphan_media_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_orphan_media_id}&provider=cc&is_single=0&format=json" + ).json() + + quality_keys = list( + map(lambda x: (int(x[7:]), x), mooc_orphan_media_data['data']['playurl']['sources'].keys())) + quality_keys.sort(key=lambda x: x[0], reverse=True) + download_url_list = mooc_orphan_media_data['data']['playurl']['sources'][quality_keys[0][1]] + # print(download_url_list) + + # Download segments in parallel + try: + download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_orphan) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download {name_prefix}", file=sys.stderr) + has_error = True + + # Start concatenation if downloads were successful + if not has_error: + time.sleep(0.25) + if 'playurl' in mooc_orphan_media_data['data'] and len(download_url_list) > 0: + print(f"Concatenating {name_prefix}") + concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_orphan, len(download_url_list)) + else: + print('concatenate cannot start due to previous failure') + else: + print('concatenate cannot start due to previous failure') + + if has_error: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + f.write(f"{name_prefix}\n") + + for section in chapter['section_list']: + section_name = section['name'] + + for lesson_d in section['leaf_list']: + lesson_name = lesson_d['title'] + lesson_id = lesson_d['id'] + has_error = False + + name_prefix_lesson = name_prefix + chapter_name + " - " + section_name + " - " + lesson_name + name_prefix_lesson = option.windows_filesame_sanitizer(name_prefix_lesson) + + if idm_flag: + name_prefix_lesson = re.sub(r'[“”]', '_', name_prefix_lesson) + + mooc_lesson_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/mooc-api/v1/lms/learn/leaf_info/{str(lesson['classroom_id'])}/{str(lesson_id)}/", + headers={ + "Xtbz": "ykt", + "Classroom-Id": str(lesson['classroom_id']) + } + ).json() + + if 'data' not in mooc_lesson_data or 'content_info' not in mooc_lesson_data['data']: + print('no media detected, skipping!') + continue + + mooc_media_id = mooc_lesson_data['data']['content_info']['media']['ccid'] + + mooc_media_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_media_id}&provider=cc&is_single=0&format=json" + ).json() + + quality_keys = list( + map(lambda x: (int(x[7:]), x), mooc_media_data['data']['playurl']['sources'].keys())) + quality_keys.sort(key=lambda x: x[0], reverse=True) + download_url_list = mooc_media_data['data']['playurl']['sources'][quality_keys[0][1]] + # print(download_url_list) + + # Download segments in parallel + try: + download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_lesson) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download {name_prefix}", file=sys.stderr) + has_error = True + + # Start concatenation if downloads were successful + if not has_error: + time.sleep(1) + if 'playurl' in mooc_media_data['data'] and len(download_url_list) > 0: + print(f"Concatenating {name_prefix}") + concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_lesson, len(download_url_list)) + else: + print('concatenate cannot start due to previous failure') + else: + print('concatenate cannot start due to previous failure') + + if has_error: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + f.write(f"{name_prefix}\n") + + +def download_lesson_video_type17(lesson: dict, name_prefix: str = ""): + mooc_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/c27/online_courseware/xty/kls/pub_news/{lesson['courseware_id']}/", + headers={ + "Xtbz": "ykt", + "Classroom-Id": str(lesson['classroom_id']) + } + ).json() + + if 'name' not in mooc_data['data']['content_info'] or 'content_info' not in mooc_data['data']: + print('no media detected, skipping!') + return + + only_lesson_name = mooc_data['data']['content_info']['name'] + only_lesson_id = mooc_data['data']['content_info']['id'] + + has_error = False + + name_prefix_lesson = name_prefix + only_lesson_name + name_prefix_lesson = option.windows_filesame_sanitizer(name_prefix_lesson) + + if idm_flag: + name_prefix_lesson = re.sub(r'[“”]', '_', name_prefix_lesson) + + mooc_lesson_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/mooc-api/v1/lms/learn/leaf_info/{str(lesson['classroom_id'])}/{str(only_lesson_id)}/", + headers={ + "Xtbz": "ykt", + "Classroom-Id": str(lesson['classroom_id']) + } + ).json() + + if 'data' not in mooc_lesson_data or 'content_info' not in mooc_lesson_data['data']: + print('no media detected, skipping!') + return + + mooc_media_id = mooc_lesson_data['data']['content_info']['media']['ccid'] + + mooc_media_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_media_id}&provider=cc&is_single=0&format=json" + ).json() + + quality_keys = list(map(lambda x: (int(x[7:]), x), mooc_media_data['data']['playurl']['sources'].keys())) + quality_keys.sort(key=lambda x: x[0], reverse=True) + download_url_list = mooc_media_data['data']['playurl']['sources'][quality_keys[0][1]] + # print(download_url_list) + + # Download segments in parallel + try: + download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_lesson) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download {name_prefix}", file=sys.stderr) + has_error = True + + # Start concatenation if downloads were successful + if not has_error: + time.sleep(1) + if 'playurl' in mooc_media_data['data'] and len(download_url_list) > 0: + print(f"Concatenating {name_prefix}") + concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_lesson, len(download_url_list)) + else: + print('concatenate cannot start due to previous failure') + else: + print('concatenate cannot start due to previous failure') + + if has_error: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + f.write(f"{name_prefix}\n") + + +def download_lesson_video_type2(lesson: dict, name_prefix: str = ""): + # "id": 6036907, "courseware_id": "1055476" + # https://pro.yuketang.cn/v2/api/web/cards/detlist/1055476?classroom_id=3058049 + + lesson_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/v2/api/web/cards/detlist/{lesson['courseware_id']}?classroom_id={lesson['classroom_id']}").json() + name_prefix += "-" + lesson_data['data']['Title'].strip() + + name_prefix = option.windows_filesame_sanitizer(name_prefix) + + for slide in lesson_data['data']['Slides']: + slide_id = slide['PageIndex'] + for shape in slide['Shapes']: + if shape['ShapeType'] == 1 and 'file_title' in shape: + file_title = shape['file_title'] + quality_keys = list(map(lambda x: (int(x[7:]), x), shape['playurls'].keys())) + quality_keys.sort(key=lambda x: x[0], reverse=True) + download_url_list = shape['playurls'][quality_keys[0][1]] + + name_prefix_shape = name_prefix + f" - {slide_id} - {file_title}" + name_prefix_shape = option.windows_filesame_sanitizer(name_prefix_shape) + + if idm_flag: + name_prefix_shape = re.sub(r'[“”]', '_', name_prefix_shape) + + # Download segments in parallel + try: + download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_shape) + has_error = False + except Exception: + print(traceback.format_exc()) + print(f"Failed to download {name_prefix}", file=sys.stderr) + has_error = True + + # Start concatenation if downloads were successful + if not has_error: + time.sleep(1) + if 'playurl' in shape and len(download_url_list) > 0: + print(f"Concatenating {name_prefix}") + concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_shape, len(download_url_list)) + else: + print('concatenate cannot start due to previous failure') + else: + print('concatenate cannot start due to previous failure') + + if has_error: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + f.write(f"{name_prefix}\n") + + +from ppt_processing import download_ppt + + +def download_lesson_ppt(lesson: dict, name_prefix: str = ""): + lesson_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/api/v3/lesson-summary/student?lesson_id={lesson['courseware_id']}").json() + name_prefix += "-" + lesson['title'].rstrip() + + name_prefix = option.windows_filesame_sanitizer(name_prefix) + + if 'presentations' not in lesson_data['data']: + print(f"v3 protocol detection failed, falling back to v1") + + ppt_info = rainclassroom_sess.get( + f"https://{YKT_HOST}/v2/api/web/lessonafter/{lesson['courseware_id']}/presentation?classroom_id={lesson['classroom_id']}").json() + if 'id' not in ppt_info['data'][0]: + print(f"Skipping {name_prefix} - No PPT", file=sys.stderr) + return + + for index, ppt in enumerate(ppt_info['data']): + # PPT + try: + ppt_raw_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/v2/api/web/lessonafter/presentation/{ppt['id']}?classroom_id={lesson['classroom_id']}").json() + download_ppt(1, args.ppt_problem_answer, args.ppt_to_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, args.aria2c_path, + ppt_raw_data, name_prefix + f"-{index}") + + except Exception as e: + print(traceback.format_exc()) + print(f"Failed to download PPT {name_prefix} - {ppt['title']}", file=sys.stderr) + + else: + for index, ppt in enumerate(lesson_data['data']['presentations']): + # PPT + try: + ppt_raw_data = rainclassroom_sess.get( + f"https://{YKT_HOST}/api/v3/lesson-summary/student/presentation?presentation_id={ppt['id']}&lesson_id={lesson['courseware_id']}").json() + download_ppt(3, args.ppt_problem_answer, args.ppt_to_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, args.aria2c_path, + ppt_raw_data, name_prefix + f"-{index}") + + except Exception as e: + print(traceback.format_exc()) + print(f"Failed to download PPT {name_prefix} - {ppt['title']}", file=sys.stderr) + + + + +# --- --- --- Section Main --- --- --- # + +print('successfully parsed account info!') + +for course in courses: + skip_flag = 0 + try: + print(course) + if not allin_flag: + skip_flag = option.ask_for_input() + if skip_flag: + continue + else: + get_lesson_list(course) + else: + get_lesson_list(course) + except Exception as e: + print(traceback.format_exc()) + print(f"Failed to parse {course['name']}", file=sys.stderr) diff --git a/main_windows.py b/main_windows.py deleted file mode 100644 index c5585df..0000000 --- a/main_windows.py +++ /dev/null @@ -1,663 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import sys -import argparse -import time -import re -import traceback -import option - -if sys.platform == 'win32': - os.system('chcp 65001') - -parser = argparse.ArgumentParser(add_help=False) - -parser.add_argument("-h", "--help", action="store_true", help="Show this help message and exit") -parser.add_argument("-c", "--session-cookie", help="Session Cookie", required=False) -parser.add_argument("-y", "--ykt-host", help="RainClassroom Host", required=False, default="pro.yuketang.cn") -parser.add_argument("-i", "--idm", action="store_true", help="Use IDMan.exe") -parser.add_argument("-ni", "--no-idm", action="store_true", help="Don't use IDMan.exe") -parser.add_argument("-a", "--all", action="store_true", help="All in") -parser.add_argument("-na", "--no-all", action="store_true", help="No All in") -parser.add_argument("--video", action="store_true", help="Download Video") -parser.add_argument("--ppt", action="store_true", help="Download PPT") -parser.add_argument("--ppt-to-pdf", action="store_true", help="Convert PPT to PDF", default=True) -parser.add_argument("--ppt-problem-answer", action="store_true", help="Store PPT Problem Answer", default=True) -parser.add_argument("--course-name-filter", action="store", help="Filter Course Name", default=None) -parser.add_argument("--lesson-name-filter", action="store", help="Filter Lesson Name", default=None) - -# Check for no arguments and display help if none are given -if len(sys.argv) == 1: - parser.print_help() - print('\nYOU SHALL RUN THIS EXECUTABLE FROM POWERSHELL WITH ARGUMENT!!') - print('YOU SHALL RUN THIS EXECUTABLE FROM POWERSHELL WITH ARGUMENT!!') - print('YOU SHALL RUN THIS EXECUTABLE FROM POWERSHELL WITH ARGUMENT!!') - sys.exit() - -args = parser.parse_args() - -# Check if no arguments are provided or only --help is provided -if args.help or len(vars(args)) == 0: - print("""RainClassroom Video Downloader - -requirements: - - Python >= 3.12 - - requests - - websocket-client (qrcode login) - - qrcode (qrcode login) - - Pillow (Add answer to problem; Convert PPT to PDF) - - - aria2c (Download files multi-threaded & resume support) - - ffmpeg with nvenc support (Concatenate video segments and convert to HEVC) -""") - print(parser.format_help()) - exit() - -import requests -import json - -# --- --- --- Section Init --- --- --- # -# Login to RainClassroom -userinfo = {} -rainclassroom_sess = requests.session() - -YKT_HOST = args.ykt_host -DOWNLOAD_FOLDER = "data" -CACHE_FOLDER = "cache" - -os.makedirs(DOWNLOAD_FOLDER, exist_ok=True) -os.makedirs(CACHE_FOLDER, exist_ok=True) - -# --- --- --- Section Load Session --- --- --- # - -if args.session_cookie is not None: - rainclassroom_sess.cookies['sessionid'] = args.session_cookie - -# --- --- --- Section Login --- --- --- # -else: - import websocket - import qrcode - - - def on_message(ws, message): - global userinfo - userinfo = json.loads(message) - if 'subscribe_status' in userinfo: - ws.close() - return - - qr = qrcode.QRCode() - qr.add_data(userinfo["qrcode"]) - # Flush screen first - print("\033c") - qr.print_ascii(out=sys.stdout) - print("请扫描二维码登录") - - - def on_error(ws, error): - print(error) - - - def on_open(ws): - ws.send(data=json.dumps({"op": "requestlogin", "role": "web", "version": 1.4, "type": "qrcode", "from": "web"})) - - - # websocket数据交互 - ws = websocket.WebSocketApp(f"wss://{YKT_HOST}/wsapp/", - on_message=on_message, - on_error=on_error) - ws.on_open = on_open - ws.run_forever() - - # 登录 - req = rainclassroom_sess.get(f"https://{YKT_HOST}/v/course_meta/user_info") - rainclassroom_sess.post(f"https://{YKT_HOST}/pc/web_login", - data=json.dumps({'UserID': userinfo['UserID'], 'Auth': userinfo['Auth']})) - - # Store session - with open(f"{DOWNLOAD_FOLDER}/session.txt", "a", encoding='utf-8') as f: - f.write(rainclassroom_sess.cookies['sessionid'] + "\n") - -# --- --- --- Section Get Course List --- --- --- # - -# 获取自己的课程列表 -shown_courses = rainclassroom_sess.get(f"https://{YKT_HOST}/v2/api/web/courses/list?identity=2").json() - -hidden_courses = rainclassroom_sess.get(f"https://{YKT_HOST}/v2/api/web/classroom_archive").json() - -for course in hidden_courses['data']['classrooms']: - course['classroom_id'] = course['id'] - -courses = shown_courses['data']['list'] + hidden_courses['data']['classrooms'] - -if args.course_name_filter is not None: - courses = [c for c in courses if args.course_name_filter in c['name']] - -rainclassroom_sess.cookies['xtbz'] = 'ykt' - - -# --- --- --- Section Get Lesson List --- --- --- # - - -def get_lesson_list(course: dict, name_prefix: str = ""): - lesson_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/v2/api/web/logs/learn/{course['classroom_id']}?actype=-1&page=0&offset=500&sort=-1").json() - - folder_name = f"{course['name']}-{course['teacher']['name']}" - folder_name = option.windows_filesame_sanitizer(folder_name) - - if idm_flag: - folder_name = folder_name.replace('/', '\\') - folder_name = re.sub(r'[“”]', '_', folder_name) - - print('folder name would be:', folder_name) - - # Rename old folder - if os.path.exists(f"{DOWNLOAD_FOLDER}/{course['name']}"): - os.rename(f"{DOWNLOAD_FOLDER}/{course['name']}", f"{DOWNLOAD_FOLDER}/{folder_name}") - - if os.path.exists(f"{CACHE_FOLDER}/{course['name']}"): - os.rename(f"{CACHE_FOLDER}/{course['name']}", f"{CACHE_FOLDER}/{folder_name}") - - os.makedirs(f"{DOWNLOAD_FOLDER}/{folder_name}", exist_ok=True) - os.makedirs(f"{CACHE_FOLDER}/{folder_name}", exist_ok=True) - - name_prefix += folder_name.rstrip() + "/" - name_prefix = option.windows_filesame_sanitizer(name_prefix) - - if args.lesson_name_filter is not None: - lesson_data['data']['activities'] = [l for l in lesson_data['data']['activities'] if - args.lesson_name_filter in l['title']] - - length = len(lesson_data['data']['activities']) - - if args.video: - for index, lesson in enumerate(lesson_data['data']['activities']): - if not lesson['type'] in [2, 3, 14, 15, 17]: - continue - - lesson['classroom_id'] = course['classroom_id'] - - # Lesson - try: - if lesson['type'] == 2: - print('Script type detected!') - download_lesson_video_type2(lesson, name_prefix + str(length - index)) - elif lesson['type'] in [3, 14]: - print('Normal type detected!') - download_lesson_video(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 15: - print('MOOCv2 type detected!') - download_lesson_video_type15(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 17: - print('MOOCv1 type detected!') - download_lesson_video_type17(lesson, name_prefix + str(length - index)) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) - - print('sbykt may not prepare cold data in one run, rescanning for missing ones') - - for index, lesson in enumerate(lesson_data['data']['activities']): - if not lesson['type'] in [2, 3, 14, 15, 17]: - continue - - lesson['classroom_id'] = course['classroom_id'] - - # Lesson - try: - if lesson['type'] == 2: - print('Script type detected!') - download_lesson_video_type2(lesson, name_prefix + str(length - index)) - elif lesson['type'] in [3, 14]: - print('Normal type detected!') - download_lesson_video(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 15: - print('MOOCv2 type detected!') - download_lesson_video_type15(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 17: - print('MOOCv1 type detected!') - download_lesson_video_type17(lesson, name_prefix + str(length - index)) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) - - if args.ppt: - for index, lesson in enumerate(lesson_data['data']['activities']): - if lesson['type'] in (15, 17): - print("mooc type has no ppts!") - continue - lesson['classroom_id'] = course['classroom_id'] - - # Lesson - try: - download_lesson_ppt(lesson, name_prefix + str(length - index)) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) - - print('sbykt may not prepare cold data in one run, rescanning for missing ones') - - for index, lesson in enumerate(lesson_data['data']['activities']): - if lesson['type'] in (15, 17): - print("mooc type has no ppts!") - continue - lesson['classroom_id'] = course['classroom_id'] - - # Lesson - try: - download_lesson_ppt(lesson, name_prefix + str(length - index)) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) - - -# --- --- --- Section Download Lesson Video --- --- --- # - -from video_processing import download_segments_in_parallel, concatenate_segments - - -def download_lesson_video(lesson: dict, name_prefix: str = ""): - lesson_video_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/api/v3/lesson-summary/replay?lesson_id={lesson['courseware_id']}").json() - - name_prefix += "-" + lesson['title'].rstrip() - name_prefix = option.windows_filesame_sanitizer(name_prefix) - - if idm_flag: - name_prefix = re.sub(r'[“”]', '_', name_prefix) - - if 'live' not in lesson_video_data['data']: - print(f"v3 protocol detection failed, falling back to v1") - - fallback_flag = 1 - - lesson_video_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/v/lesson/get_lesson_replay_timeline/?lesson_id={lesson['courseware_id']}").json() - - if 'live_timeline' not in lesson_video_data['data'] or len(lesson_video_data['data']['live_timeline']) == 0: - print(f"Skipping {name_prefix} - No Video", file=sys.stderr) - return - else: - fallback_flag = 0 - - if len(lesson_video_data['data']['live']) == 0: - print(f"Skipping {name_prefix} - No Video", file=sys.stderr) - return - - if os.path.exists(f"{DOWNLOAD_FOLDER}/{name_prefix}.mp4"): - print(f"Skipping {name_prefix} - Video already present") - time.sleep(0.25) - return - - has_error = False - - # Download segments in parallel - try: - download_segments_in_parallel(idm_flag, fallback_flag, CACHE_FOLDER, lesson_video_data, name_prefix) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download {name_prefix}", file=sys.stderr) - has_error = True - - # Start concatenation if downloads were successful - if not has_error: - time.sleep(1) - if 'live' in lesson_video_data['data'] and len(lesson_video_data['data']['live']) > 0: - print(f"Concatenating {name_prefix}") - concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix, len(lesson_video_data['data']['live'])) - elif 'live_timeline' in lesson_video_data['data'] and len(lesson_video_data['data']['live_timeline']) > 0: - print(f"Concatenating {name_prefix}") - concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix, - len(lesson_video_data['data']['live_timeline'])) - else: - print('concatenate cannot start due to previous failure') - else: - print('concatenate cannot start due to previous failure') - - if has_error: - with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: - f.write(f"{name_prefix}\n") - - -def download_lesson_video_type15(lesson: dict, name_prefix: str = ""): - mooc_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/c27/online_courseware/xty/kls/pub_news/{lesson['courseware_id']}/", - headers={ - "Xtbz": "ykt", - "Classroom-Id": str(lesson['classroom_id']) - } - ).json() - - for chapter in mooc_data['data']['content_info']: - chapter_name = chapter['name'] - - for orphan in chapter['leaf_list']: - orphan_title = orphan['title'] - orphan_id = orphan['id'] - has_error = False - - name_prefix_orphan = name_prefix + chapter_name + " - " + orphan_title - name_prefix_orphan = option.windows_filesame_sanitizer(name_prefix_orphan) - - if idm_flag: - name_prefix_orphan = re.sub(r'[“”]', '_', name_prefix_orphan) - - mooc_orphan_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/mooc-api/v1/lms/learn/leaf_info/{str(lesson['classroom_id'])}/{str(orphan_id)}/", - headers={ - "Xtbz": "ykt", - "Classroom-Id": str(lesson['classroom_id']) - } - ).json() - - if 'data' not in mooc_orphan_data or 'content_info' not in mooc_orphan_data['data']: - print('no media detected, skipping!') - continue - - mooc_orphan_media_id = mooc_orphan_data['data']['content_info']['media']['ccid'] - mooc_orphan_media_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_orphan_media_id}&provider=cc&is_single=0&format=json" - ).json() - - quality_keys = list( - map(lambda x: (int(x[7:]), x), mooc_orphan_media_data['data']['playurl']['sources'].keys())) - quality_keys.sort(key=lambda x: x[0], reverse=True) - download_url_list = mooc_orphan_media_data['data']['playurl']['sources'][quality_keys[0][1]] - # print(download_url_list) - - # Download segments in parallel - try: - download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_orphan) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download {name_prefix}", file=sys.stderr) - has_error = True - - # Start concatenation if downloads were successful - if not has_error: - time.sleep(0.25) - if 'playurl' in mooc_orphan_media_data['data'] and len(download_url_list) > 0: - print(f"Concatenating {name_prefix}") - concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_orphan, len(download_url_list)) - else: - print('concatenate cannot start due to previous failure') - else: - print('concatenate cannot start due to previous failure') - - if has_error: - with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: - f.write(f"{name_prefix}\n") - - for section in chapter['section_list']: - section_name = section['name'] - - for lesson_d in section['leaf_list']: - lesson_name = lesson_d['title'] - lesson_id = lesson_d['id'] - has_error = False - - name_prefix_lesson = name_prefix + chapter_name + " - " + section_name + " - " + lesson_name - name_prefix_lesson = option.windows_filesame_sanitizer(name_prefix_lesson) - - if idm_flag: - name_prefix_lesson = re.sub(r'[“”]', '_', name_prefix_lesson) - - mooc_lesson_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/mooc-api/v1/lms/learn/leaf_info/{str(lesson['classroom_id'])}/{str(lesson_id)}/", - headers={ - "Xtbz": "ykt", - "Classroom-Id": str(lesson['classroom_id']) - } - ).json() - - if 'data' not in mooc_lesson_data or 'content_info' not in mooc_lesson_data['data']: - print('no media detected, skipping!') - continue - - mooc_media_id = mooc_lesson_data['data']['content_info']['media']['ccid'] - - mooc_media_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_media_id}&provider=cc&is_single=0&format=json" - ).json() - - quality_keys = list( - map(lambda x: (int(x[7:]), x), mooc_media_data['data']['playurl']['sources'].keys())) - quality_keys.sort(key=lambda x: x[0], reverse=True) - download_url_list = mooc_media_data['data']['playurl']['sources'][quality_keys[0][1]] - # print(download_url_list) - - # Download segments in parallel - try: - download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_lesson) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download {name_prefix}", file=sys.stderr) - has_error = True - - # Start concatenation if downloads were successful - if not has_error: - time.sleep(1) - if 'playurl' in mooc_media_data['data'] and len(download_url_list) > 0: - print(f"Concatenating {name_prefix}") - concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_lesson, len(download_url_list)) - else: - print('concatenate cannot start due to previous failure') - else: - print('concatenate cannot start due to previous failure') - - if has_error: - with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: - f.write(f"{name_prefix}\n") - - -def download_lesson_video_type17(lesson: dict, name_prefix: str = ""): - mooc_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/c27/online_courseware/xty/kls/pub_news/{lesson['courseware_id']}/", - headers={ - "Xtbz": "ykt", - "Classroom-Id": str(lesson['classroom_id']) - } - ).json() - - if 'name' not in mooc_data['data']['content_info'] or 'content_info' not in mooc_data['data']: - print('no media detected, skipping!') - return - - only_lesson_name = mooc_data['data']['content_info']['name'] - only_lesson_id = mooc_data['data']['content_info']['id'] - - has_error = False - - name_prefix_lesson = name_prefix + only_lesson_name - name_prefix_lesson = option.windows_filesame_sanitizer(name_prefix_lesson) - - if idm_flag: - name_prefix_lesson = re.sub(r'[“”]', '_', name_prefix_lesson) - - mooc_lesson_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/mooc-api/v1/lms/learn/leaf_info/{str(lesson['classroom_id'])}/{str(only_lesson_id)}/", - headers={ - "Xtbz": "ykt", - "Classroom-Id": str(lesson['classroom_id']) - } - ).json() - - if 'data' not in mooc_lesson_data or 'content_info' not in mooc_lesson_data['data']: - print('no media detected, skipping!') - return - - mooc_media_id = mooc_lesson_data['data']['content_info']['media']['ccid'] - - mooc_media_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_media_id}&provider=cc&is_single=0&format=json" - ).json() - - quality_keys = list(map(lambda x: (int(x[7:]), x), mooc_media_data['data']['playurl']['sources'].keys())) - quality_keys.sort(key=lambda x: x[0], reverse=True) - download_url_list = mooc_media_data['data']['playurl']['sources'][quality_keys[0][1]] - # print(download_url_list) - - # Download segments in parallel - try: - download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_lesson) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download {name_prefix}", file=sys.stderr) - has_error = True - - # Start concatenation if downloads were successful - if not has_error: - time.sleep(1) - if 'playurl' in mooc_media_data['data'] and len(download_url_list) > 0: - print(f"Concatenating {name_prefix}") - concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_lesson, len(download_url_list)) - else: - print('concatenate cannot start due to previous failure') - else: - print('concatenate cannot start due to previous failure') - - if has_error: - with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: - f.write(f"{name_prefix}\n") - - -def download_lesson_video_type2(lesson: dict, name_prefix: str = ""): - # "id": 6036907, "courseware_id": "1055476" - # https://pro.yuketang.cn/v2/api/web/cards/detlist/1055476?classroom_id=3058049 - - lesson_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/v2/api/web/cards/detlist/{lesson['courseware_id']}?classroom_id={lesson['classroom_id']}").json() - name_prefix += "-" + lesson_data['data']['Title'].strip() - - name_prefix = option.windows_filesame_sanitizer(name_prefix) - - for slide in lesson_data['data']['Slides']: - slide_id = slide['PageIndex'] - for shape in slide['Shapes']: - if shape['ShapeType'] == 1 and 'file_title' in shape: - file_title = shape['file_title'] - quality_keys = list(map(lambda x: (int(x[7:]), x), shape['playurls'].keys())) - quality_keys.sort(key=lambda x: x[0], reverse=True) - download_url_list = shape['playurls'][quality_keys[0][1]] - - name_prefix_shape = name_prefix + f" - {slide_id} - {file_title}" - name_prefix_shape = option.windows_filesame_sanitizer(name_prefix_shape) - - if idm_flag: - name_prefix_shape = re.sub(r'[“”]', '_', name_prefix_shape) - - # Download segments in parallel - try: - download_segments_in_parallel(idm_flag, 2, CACHE_FOLDER, download_url_list, name_prefix_shape) - has_error = False - except Exception: - print(traceback.format_exc()) - print(f"Failed to download {name_prefix}", file=sys.stderr) - has_error = True - - # Start concatenation if downloads were successful - if not has_error: - time.sleep(1) - if 'playurl' in shape and len(download_url_list) > 0: - print(f"Concatenating {name_prefix}") - concatenate_segments(CACHE_FOLDER, DOWNLOAD_FOLDER, name_prefix_shape, len(download_url_list)) - else: - print('concatenate cannot start due to previous failure') - else: - print('concatenate cannot start due to previous failure') - - if has_error: - with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: - f.write(f"{name_prefix}\n") - - -from ppt_processing import download_ppt - - -def download_lesson_ppt(lesson: dict, name_prefix: str = ""): - lesson_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/api/v3/lesson-summary/student?lesson_id={lesson['courseware_id']}").json() - name_prefix += "-" + lesson['title'].rstrip() - - name_prefix = option.windows_filesame_sanitizer(name_prefix) - - if 'presentations' not in lesson_data['data']: - print(f"v3 protocol detection failed, falling back to v1") - - ppt_info = rainclassroom_sess.get( - f"https://{YKT_HOST}/v2/api/web/lessonafter/{lesson['courseware_id']}/presentation?classroom_id={lesson['classroom_id']}").json() - if 'id' not in ppt_info['data'][0]: - print(f"Skipping {name_prefix} - No PPT", file=sys.stderr) - return - - for index, ppt in enumerate(ppt_info['data']): - # PPT - try: - ppt_raw_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/v2/api/web/lessonafter/presentation/{ppt['id']}?classroom_id={lesson['classroom_id']}").json() - download_ppt(1, args.ppt_problem_answer, args.ppt_to_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, - ppt_raw_data, name_prefix + f"-{index}") - - except Exception as e: - print(traceback.format_exc()) - print(f"Failed to download PPT {name_prefix} - {ppt['title']}", file=sys.stderr) - - else: - for index, ppt in enumerate(lesson_data['data']['presentations']): - # PPT - try: - ppt_raw_data = rainclassroom_sess.get( - f"https://{YKT_HOST}/api/v3/lesson-summary/student/presentation?presentation_id={ppt['id']}&lesson_id={lesson['courseware_id']}").json() - download_ppt(3, args.ppt_problem_answer, args.ppt_to_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, - ppt_raw_data, name_prefix + f"-{index}") - - except Exception as e: - print(traceback.format_exc()) - print(f"Failed to download PPT {name_prefix} - {ppt['title']}", file=sys.stderr) - - - - -# --- --- --- Section Main --- --- --- # - - -import option as opt - -print('successfully parsed account info!') - -if args.all and args.no_all: - print("'-a' and '-na' cannot be used together") -if args.idm and args.no_idm: - print("'-idm' and '-no_idm' cannot be used together") - -if args.all: - allin_flag = 1 -elif args.no_all: - allin_flag = 0 -else: - allin_flag = opt.ask_for_allin() - -if args.idm: - idm_flag = 1 -elif args.no_idm: - idm_flag = 0 -else: - idm_flag = opt.ask_for_idm() - -for course in courses: - skip_flag = 0 - try: - print(course) - if not allin_flag: - skip_flag = opt.ask_for_input() - if skip_flag: - continue - else: - get_lesson_list(course) - else: - get_lesson_list(course) - except Exception as e: - print(traceback.format_exc()) - print(f"Failed to parse {course['name']}", file=sys.stderr) diff --git a/ppt_processing.py b/ppt_processing.py index a7580bf..479bf5e 100644 --- a/ppt_processing.py +++ b/ppt_processing.py @@ -3,9 +3,12 @@ import subprocess import re import option +import sys + +WINDOWS = sys.platform == 'win32' -def download_ppt(version, arg_ans, arg_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, ppt_raw_data, name_prefix: str = ""): +def download_ppt(version, arg_ans, arg_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, ARIA2C_PATH, ppt_raw_data, name_prefix: str = ""): print(f"Downloading {name_prefix}") if version == 1: @@ -43,10 +46,13 @@ f.write(f"{slide['cover']}\n out={DOWNLOAD_FOLDER}/{name_prefix}/{slide['index']}.jpg\n") images.append(f"{DOWNLOAD_FOLDER}/{name_prefix}/{slide['index']}.jpg") - ppt_download_command = (f".\\aria2c -i {CACHE_FOLDER}/ppt_download.txt -x 16 -s 16 -c " + ppt_download_command = (f"{ARIA2C_PATH} -i {CACHE_FOLDER}/ppt_download.txt -x 16 -s 16 -c " f"-l aria2c_ppt.log --log-level warn") - subprocess.run(['powershell', '-Command', ppt_download_command], text=True) + if WINDOWS: + subprocess.run(['powershell', '-Command', ppt_download_command], text=True) + else: + subprocess.run(ppt_download_command, shell=True) from PIL import Image diff --git a/video_processing.py b/video_processing.py index 8985d38..1db3361 100644 --- a/video_processing.py +++ b/video_processing.py @@ -5,15 +5,23 @@ import time import traceback from concurrent.futures import ThreadPoolExecutor, as_completed +import shutil + +FFMPEG_PATH = "ffmpeg" if shutil.which("ffmpeg") else os.path.join(os.getcwd(), "ffmpeg") +ARIA2C_PATH = "aria2c" if shutil.which("aria2c") else os.path.join(os.getcwd(), "aria2c") +WINDOWS = sys.platform == 'win32' def download_segment(CACHE_FOLDER, url: str, order: int, name_prefix: str = ""): print(f"Downloading {name_prefix} - {order}") - video_download_command = (f".\\aria2c -o '{CACHE_FOLDER}/{name_prefix}-{order}.mp4'" + video_download_command = (f"{ARIA2C_PATH} -o '{CACHE_FOLDER}/{name_prefix}-{order}.mp4'" f" -x 16 -s 16 '{url}' -c -l aria2c_video.log --log-level warn") - result = subprocess.run(['powershell', '-Command', video_download_command], text=True) + if WINDOWS: + result = subprocess.run(['powershell', '-Command', video_download_command], text=True) + else: + result = subprocess.run(video_download_command, shell=True) return result @@ -91,14 +99,14 @@ save_dir = os.path.dirname(output_path) save_name = os.path.basename(output_path) - if 'mp3' in url: + if 'mp3' in url or not WINDOWS: if idm_flag: video_download_command = ( f"idman /n /d \"{url}\" /p \"$(pwd)\" /f '{CACHE_FOLDER}/{name_prefix}-{order}.mp4'" ) else: video_download_command = ( - f".\\ffmpeg -i '{url}' -c:v copy -c:a copy -n '{CACHE_FOLDER}/{name_prefix}-{order}.mp4' " + f"{FFMPEG_PATH} -i '{url}' -c:v copy -c:a copy -n '{CACHE_FOLDER}/{name_prefix}-{order}.mp4' " f"-hide_banner -loglevel error -stats" ) @@ -109,7 +117,10 @@ f"--check-segments-count false --download-retry-count 15 --thread-count 64" ) - result = subprocess.run(['powershell', '-Command', video_download_command], text=True) + if WINDOWS: + result = subprocess.run(['powershell', '-Command', video_download_command], text=True) + else: + result = subprocess.run(video_download_command, shell=True) return result @@ -244,7 +255,7 @@ # First video concatenation command using CUDA acceleration video_concatenating_command = ( - f"ffmpeg -f concat -safe 0 -hwaccel cuda -hwaccel_output_format cuda " + f"{FFMPEG_PATH} -f concat -safe 0 -hwaccel cuda -hwaccel_output_format cuda " f"-i '{CACHE_FOLDER}/concat.txt' " f"-c:v av1_nvenc -cq 36 -g 200 -bf 7 -b_strategy 1 -sc_threshold 80 -me_range 16 " f"-surfaces 64 -bufsize 12800k -refs 16 -r 7.5 -temporal-aq 1 -rc-lookahead 127 " @@ -253,7 +264,10 @@ ) # Run the first command - result = subprocess.run(['powershell', '-Command', video_concatenating_command], text=True) + if WINDOWS: + result = subprocess.run(['powershell', '-Command', video_concatenating_command], text=True) + else: + result = subprocess.run(video_concatenating_command, shell=True) # If the first command fails, try the fallback if result.returncode != 0: @@ -261,7 +275,7 @@ # Fallback video concatenation command using cuvid acceleration video_concatenating_command_fallback = ( - f"ffmpeg -f concat -safe 0 " + f"{FFMPEG_PATH} -f concat -safe 0 " f"-i '{CACHE_FOLDER}/concat.txt' " f"-c:v av1_nvenc -cq 36 -g 200 -bf 7 -b_strategy 1 -sc_threshold 80 -me_range 16 " f"-surfaces 64 -bufsize 12800k -refs 16 -r 7.5 -temporal-aq 1 -rc-lookahead 127 " @@ -270,7 +284,10 @@ ) # Run the fallback command - fallback_result = subprocess.run(['powershell', '-Command', video_concatenating_command_fallback], text=True) + if WINDOWS: + fallback_result = subprocess.run(['powershell', '-Command', video_concatenating_command_fallback], text=True) + else: + fallback_result = subprocess.run(video_concatenating_command_fallback, shell=True) # Check if the fallback also fails if fallback_result.returncode != 0: