diff --git a/main.py b/main.py index 4104d67..adfab01 100644 --- a/main.py +++ b/main.py @@ -194,12 +194,39 @@ with open(f"{DOWNLOAD_FOLDER}/session.txt", "a", encoding='utf-8') as f: f.write("\n" + rainclassroom_sess.cookies['sessionid'] + "\n") +# --- --- --- Generic Error Handling --- --- --- # + +class APIError(Exception): + pass + +def check_response(r: dict): + if 'success' in r: + e = not r['success'] + + elif 'errcode' in r: + e = r['errcode'] != 0 + + elif 'code' in r: + e = r['code'] != 0 + + else: + print(json.dumps(r)) + print("Unknown API return status") + e = False + + if e: + print(json.dumps(r)) + raise APIError() + + # --- --- --- Section Get Course List --- --- --- # # 获取自己的课程列表 shown_courses = rainclassroom_sess.get(f"https://{YKT_HOST}/v2/api/web/courses/list?identity=2").json() +check_response(shown_courses) hidden_courses = rainclassroom_sess.get(f"https://{YKT_HOST}/v2/api/web/classroom_archive").json() +check_response(hidden_courses) for course in hidden_courses['data']['classrooms']: course['classroom_id'] = course['id'] @@ -246,6 +273,7 @@ def get_lesson_list(course: dict, name_prefix: str = ""): lesson_data = rainclassroom_sess.get( f"https://{YKT_HOST}/v2/api/web/logs/learn/{course['classroom_id']}?actype=-1&page=0&offset=500&sort=-1").json() + check_response(lesson_data) folder_name = f"{course['name']}-{course['teacher']['name']}" folder_name = option.windows_filesame_sanitizer(folder_name) @@ -276,7 +304,23 @@ length = len(lesson_data['data']['activities']) + def parse_single_lesson(index: int, lesson: dict): + if lesson['type'] == 2: + print('Script type detected!') + download_lesson_video_type2(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 14: + print('Normal type detected!') + download_lesson_video(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 15: + print('MOOCv2 type detected!') + download_lesson_video_type15(lesson, name_prefix + str(length - index)) + elif lesson['type'] == 17: + print('MOOCv1 type detected!') + download_lesson_video_type17(lesson, name_prefix + str(length - index)) + if args.video: + failed_lessons = [] + for index, lesson in enumerate(lesson_data['data']['activities']): if not lesson['type'] in [2, 14, 15, 17]: continue @@ -285,46 +329,41 @@ # Lesson try: - if lesson['type'] == 2: - print('Script type detected!') - download_lesson_video_type2(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 14: - print('Normal type detected!') - download_lesson_video(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 15: - print('MOOCv2 type detected!') - download_lesson_video_type15(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 17: - print('MOOCv1 type detected!') - download_lesson_video_type17(lesson, name_prefix + str(length - index)) + parse_single_lesson(index, lesson) except Exception: print(traceback.format_exc()) print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) + failed_lessons.append((index, lesson)) - print('sbykt may not prepare cold data in one run, rescanning for missing ones') + if len(failed_lessons) > 0: + print('Retrying failed lessons') - for index, lesson in enumerate(lesson_data['data']['activities']): - if not lesson['type'] in [14, 15, 17]: - continue + for retry_count in range(3): + if len(failed_lessons) == 0: + break - lesson['classroom_id'] = course['classroom_id'] + print(f"Retry #{retry_count + 1}") + still_failed_lessons = [] + for index, lesson in failed_lessons: + try: + parse_single_lesson(index, lesson) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) + still_failed_lessons.append((index, lesson)) - # Lesson - try: - if lesson['type'] == 14: - print('Normal type detected!') - download_lesson_video(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 15: - print('MOOCv2 type detected!') - download_lesson_video_type15(lesson, name_prefix + str(length - index)) - elif lesson['type'] == 17: - print('MOOCv1 type detected!') - download_lesson_video_type17(lesson, name_prefix + str(length - index)) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download video for {name_prefix} - {lesson['title']}", file=sys.stderr) + failed_lessons = still_failed_lessons + + if len(failed_lessons) > 0: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + for index, lesson in failed_lessons: + f.write(f"Video for {name_prefix} - {lesson['title']}\n") + f.write(json.dumps(lesson) + "\n\n\n") + + print(f"Video for {name_prefix} - {lesson['title']} failed to download", file=sys.stderr) if args.ppt: + failed_lessons = [] for index, lesson in enumerate(lesson_data['data']['activities']): if lesson['type'] in (15, 17): print("mooc type has no ppts!") @@ -337,31 +376,43 @@ except Exception: print(traceback.format_exc()) print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) + failed_lessons.append((index, lesson)) + + if len(failed_lessons) > 0: + print('Retrying failed lessons') - print('sbykt may not prepare cold data in one run, rescanning for missing ones') + for retry_count in range(3): + if len(failed_lessons) == 0: + break - for index, lesson in enumerate(lesson_data['data']['activities']): - if lesson['type'] in (15, 17): - print("mooc type has no ppts!") - continue - lesson['classroom_id'] = course['classroom_id'] + print(f"Retry #{retry_count + 1}") + still_failed_lessons = [] + for index, lesson in failed_lessons: + try: + download_lesson_ppt(lesson, name_prefix + str(length - index)) + except Exception: + print(traceback.format_exc()) + print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) + still_failed_lessons.append((index, lesson)) - # Lesson - try: - download_lesson_ppt(lesson, name_prefix + str(length - index)) - except Exception: - print(traceback.format_exc()) - print(f"Failed to download PPT for {name_prefix} - {lesson['title']}", file=sys.stderr) + failed_lessons = still_failed_lessons + + if len(failed_lessons) > 0: + with open(f"{DOWNLOAD_FOLDER}/error.log", "a") as f: + for index, lesson in failed_lessons: + f.write(f"PPT for {name_prefix} - {lesson['title']}\n") + f.write(json.dumps(lesson) + "\n\n\n") + print(f"PPT for {name_prefix} - {lesson['title']} failed to download", file=sys.stderr) # --- --- --- Section Download Lesson Video --- --- --- # from video_processing import download_segments_in_parallel, concatenate_segments - def download_lesson_video(lesson: dict, name_prefix: str = ""): lesson_video_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/v3/lesson-summary/replay?lesson_id={lesson['courseware_id']}").json() + check_response(lesson_video_data) name_prefix += "-" + lesson['title'].rstrip() name_prefix = option.windows_filesame_sanitizer(name_prefix) @@ -376,6 +427,7 @@ lesson_video_data = rainclassroom_sess.get( f"https://{YKT_HOST}/v/lesson/get_lesson_replay_timeline/?lesson_id={lesson['courseware_id']}").json() + check_response(lesson_video_data) if 'live_timeline' not in lesson_video_data['data'] or len(lesson_video_data['data']['live_timeline']) == 0: print(f"Skipping {name_prefix} - No Video", file=sys.stderr) @@ -430,6 +482,7 @@ "Classroom-Id": str(lesson['classroom_id']) } ).json() + check_response(mooc_data) for chapter in mooc_data['data']['content_info']: chapter_name = chapter['name'] @@ -452,6 +505,7 @@ "Classroom-Id": str(lesson['classroom_id']) } ).json() + check_response(mooc_orphan_data) if 'data' not in mooc_orphan_data or 'content_info' not in mooc_orphan_data['data']: print('no media detected, skipping!') @@ -461,6 +515,7 @@ mooc_orphan_media_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_orphan_media_id}&provider=cc&is_single=0&format=json" ).json() + check_response(mooc_orphan_media_data) quality_keys = list(map(lambda x: (int(x[7:]), x), mooc_orphan_media_data['data']['playurl']['sources'].keys())) quality_keys.sort(key=lambda x: x[0], reverse=True) @@ -511,6 +566,7 @@ "Classroom-Id": str(lesson['classroom_id']) } ).json() + check_response(mooc_lesson_data) if 'data' not in mooc_lesson_data or 'content_info' not in mooc_lesson_data['data']: print('no media detected, skipping!') @@ -521,6 +577,7 @@ mooc_media_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_media_id}&provider=cc&is_single=0&format=json" ).json() + check_response(mooc_media_data) quality_keys = list(map(lambda x: (int(x[7:]), x), mooc_media_data['data']['playurl']['sources'].keys())) quality_keys.sort(key=lambda x: x[0], reverse=True) @@ -559,6 +616,7 @@ "Classroom-Id": str(lesson['classroom_id']) } ).json() + check_response(mooc_data) if 'name' not in mooc_data['data']['content_info'] or 'content_info' not in mooc_data['data']: print('no media detected, skipping!') @@ -582,6 +640,7 @@ "Classroom-Id": str(lesson['classroom_id']) } ).json() + check_response(mooc_lesson_data) if 'data' not in mooc_lesson_data or 'content_info' not in mooc_lesson_data['data']: print('no media detected, skipping!') @@ -592,6 +651,7 @@ mooc_media_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/open/audiovideo/playurl?video_id={mooc_media_id}&provider=cc&is_single=0&format=json" ).json() + check_response(mooc_media_data) quality_keys = list(map(lambda x: (int(x[7:]), x), mooc_media_data['data']['playurl']['sources'].keys())) quality_keys.sort(key=lambda x: x[0], reverse=True) @@ -628,6 +688,7 @@ lesson_data = rainclassroom_sess.get( f"https://{YKT_HOST}/v2/api/web/cards/detlist/{lesson['courseware_id']}?classroom_id={lesson['classroom_id']}").json() + check_response(lesson_data) name_prefix += "-" + lesson_data['data']['Title'].strip() name_prefix = option.windows_filesame_sanitizer(name_prefix) @@ -678,6 +739,7 @@ def download_lesson_ppt(lesson: dict, name_prefix: str = ""): lesson_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/v3/lesson-summary/student?lesson_id={lesson['courseware_id']}").json() + check_response(lesson_data) name_prefix += "-" + lesson['title'].rstrip() name_prefix = option.windows_filesame_sanitizer(name_prefix) @@ -687,6 +749,8 @@ ppt_info = rainclassroom_sess.get( f"https://{YKT_HOST}/v2/api/web/lessonafter/{lesson['courseware_id']}/presentation?classroom_id={lesson['classroom_id']}").json() + check_response(ppt_info) + if 'id' not in ppt_info['data'][0]: print(f"Skipping {name_prefix} - No PPT", file=sys.stderr) return @@ -696,6 +760,7 @@ try: ppt_raw_data = rainclassroom_sess.get( f"https://{YKT_HOST}/v2/api/web/lessonafter/presentation/{ppt['id']}?classroom_id={lesson['classroom_id']}").json() + check_response(ppt_raw_data) download_ppt(1, args.ppt_problem_answer, args.ppt_to_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, args.aria2c_path, ppt_raw_data, name_prefix + f"-{index}") @@ -709,6 +774,7 @@ try: ppt_raw_data = rainclassroom_sess.get( f"https://{YKT_HOST}/api/v3/lesson-summary/student/presentation?presentation_id={ppt['id']}&lesson_id={lesson['courseware_id']}").json() + check_response(ppt_raw_data) download_ppt(3, args.ppt_problem_answer, args.ppt_to_pdf, CACHE_FOLDER, DOWNLOAD_FOLDER, args.aria2c_path, ppt_raw_data, name_prefix + f"-{index}")