Newer
Older
thu-learn-downloader-optimized / downloader.py
import dataclasses
import os

import tqdm
import tqdm.contrib
import tqdm.contrib.concurrent

import thu_learn_lib
import thu_learn_lib.ty
import thu_learn_lib.utils


@dataclasses.dataclass
class DownloadTask:
    session: "Downloader"
    url: str
    filename: str
    prefix: str = "."


class Downloader:
    helper: thu_learn_lib.LearnHelper
    prefix: str
    file_size_limit: float = None  # MB
    sync_docs: bool = True
    sync_work: bool = True
    sync_submit: bool = True
    download_tasks: list[DownloadTask] = None

    def __init__(
        self,
        username: str,
        password: str,
        prefix: str = "thu-learn",
        file_size_limit: float = None,
        sync_docs: bool = True,
        sync_work: bool = True,
        sync_submit: bool = True,
    ) -> None:
        self.helper = thu_learn_lib.LearnHelper(
            username=username,
            password=password,
        )
        self.prefix = prefix
        self.file_size_limit = file_size_limit
        self.sync_docs = sync_docs
        self.sync_work = sync_work
        self.sync_submit = sync_submit

        assert self.helper.login()

    @staticmethod
    def download(
        self: "Downloader",
        url: str,
        filename: str,
        prefix: str = ".",
        position: int = 0,
    ) -> bool:
        response = self.helper.get(url=url, stream=True)
        file_size = int(response.headers.get("content-length", 0))
        if self.file_size_limit:
            if file_size > self.file_size_limit * 1024 * 1024:
                print(f"Skip file {filename}")
                return False
        filename = thu_learn_lib.utils.slugify(filename)
        path = os.path.join(prefix, filename)
        if os.path.exists(path):
            if os.path.getsize(path) == file_size:
                # print(f"file {filename} is already synced")
                return True
        os.makedirs(prefix, exist_ok=True)
        chunk_size = 8192  # 8KB
        try:
            with open(
                file=path,
                mode="wb",
            ) as file:
                with tqdm.tqdm(
                    desc=f"{position - 6} {filename}",
                    total=file_size,
                    leave=False,
                    unit="B",
                    unit_scale=True,
                    dynamic_ncols=True,
                    position=position,
                ) as progress_bar:
                    for content in response.iter_content(chunk_size):
                        file.write(content)
                        progress_bar.update(len(content))
        except KeyboardInterrupt:
            raise KeyboardInterrupt()
        except:
            return False
        return True

    @staticmethod
    def retry_download(
        task: DownloadTask,
        position: int = 0,
        max_retries: int = 5,
    ) -> bool:
        for i in range(max_retries):
            if Downloader.download(
                self=task.session,
                url=task.url,
                filename=task.filename,
                prefix=task.prefix,
                position=position,
            ):
                return True
        print(f"Failed to download file {task.filename}")
        return False

    def schedule_download(
        self,
        url: str,
        filename: str,
        prefix: str = ".",
    ) -> None:
        if not self.download_tasks:
            self.download_tasks = []
        self.download_tasks.append(
            DownloadTask(
                session=self,
                url=url,
                filename=filename,
                prefix=prefix,
            )
        )

    def finish_download(self, desc: str = "download") -> bool:
        if self.download_tasks:
            success = all(
                tqdm.contrib.concurrent.process_map(
                    Downloader.retry_download,
                    self.download_tasks,
                    range(6, 6 + len(self.download_tasks)),
                    desc=desc,
                    leave=False,
                    dynamic_ncols=True,
                    position=4,
                )
            )
            self.download_tasks.clear()
            return success
        else:
            return True

    def sync_semester(
        self,
        semester_id: str,
        course_type: thu_learn_lib.ty.CourseType = thu_learn_lib.ty.CourseType.STUDENT,
    ) -> bool:
        course_list = self.helper.get_course_list(
            semester_id=semester_id,
            course_type=course_type,
        )
        for course in tqdm.tqdm(
            iterable=course_list,
            desc=semester_id,
            leave=False,
            dynamic_ncols=True,
            position=2,
        ):
            self.sync_course(course=course, semester_id=semester_id)

    def sync_course(
        self,
        course: thu_learn_lib.ty.CourseInfo,
        semester_id: str,
    ) -> bool:
        file_list = self.helper.get_file_list(
            course_id=course.id,
            course_type=course.course_type,
        )
        # print(
        #     f"Syncing Course {course.course_number} {course.name} {course.english_name} ......"
        # )
        if self.sync_docs:
            pass
        if self.sync_docs:
            for file in file_list:
                self.sync_file(file, semester_id=semester_id, course=course)
            self.finish_download(desc=course.english_name)
        if self.sync_work:
            homework_list = self.helper.get_homework_list(course_id=course.id)
            for homework in homework_list:
                self.sync_homework(
                    homework=homework, semester_id=semester_id, course=course
                )
            self.finish_download(desc=course.english_name)

    def sync_file(
        self,
        file: thu_learn_lib.ty.File,
        semester_id: str,
        course: thu_learn_lib.ty.CourseInfo,
    ) -> bool:
        prefix = os.path.join(
            self.prefix,
            thu_learn_lib.utils.slugify(
                f"{course.course_number}-{course.english_name}"
            ),
            thu_learn_lib.utils.slugify("documents"),
            thu_learn_lib.utils.slugify(file.clazz),
        )
        filename = (
            thu_learn_lib.utils.slugify(file.title)
            + f".{thu_learn_lib.utils.slugify(file.file_type)}"
            if file.file_type
            else ""
        )
        self.schedule_download(
            url=file.download_url,
            filename=filename,
            prefix=prefix,
        )
        return True

    def sync_homework(
        self,
        homework: thu_learn_lib.ty.Homework,
        semester_id: str,
        course: thu_learn_lib.ty.CourseInfo,
    ) -> bool:
        prefix = os.path.join(
            self.prefix,
            thu_learn_lib.utils.slugify(
                f"{course.course_number}-{course.english_name}"
            ),
            thu_learn_lib.utils.slugify("work"),
            thu_learn_lib.utils.slugify(homework.title),
        )
        os.makedirs(prefix, exist_ok=True)
        lines = []
        lines.append(f"## Contents and Requirements")
        lines.append(f"")
        lines.append(f"### Title")
        lines.append(f"")
        lines.append(f"{homework.title}")
        lines.append(f"")
        lines.append(f"### Description")
        lines.append(f"")
        lines.append(f"{homework.description}")
        lines.append(f"")
        if homework.attachment:
            filename = thu_learn_lib.utils.slugify(
                f"attach-{homework.title}{os.path.splitext(homework.attachment.name)[-1]}"
            )
            self.schedule_download(
                url=homework.attachment.download_url,
                prefix=prefix,
                filename=filename,
            )
            lines.append(f"### Attach.")
            lines.append(f"")
            lines.append(f"[{homework.attachment.name}]({filename})")
            lines.append(f"")
        lines.append(f"### ANS")
        lines.append(f"")
        lines.append(f"{homework.answer_content}")
        lines.append(f"")
        if homework.answer_attachment:
            filename = thu_learn_lib.utils.slugify(
                f"ans-{homework.title}{os.path.splitext(homework.answer_attachment.name)[-1]}"
            )
            self.schedule_download(
                url=homework.answer_attachment.download_url,
                prefix=prefix,
                filename=filename,
            )
            lines.append(f"### Attach.")
            lines.append(f"")
            lines.append(f"[{homework.answer_attachment.name}]({filename})")
            lines.append(f"")
        lines.append(f"### Deadline (GMT+8)")
        lines.append(f"")
        lines.append(f"{homework.deadline.isoformat() if homework.deadline else None}")
        lines.append(f"")
        if self.sync_submit:
            lines.append(f"## My coursework submitted")
            lines.append(f"")
            lines.append(f"### Content")
            lines.append(f"")
            lines.append(f"{homework.submitted_content}")
            lines.append(f"")
            if homework.submitted_attachment:
                filename = thu_learn_lib.utils.slugify(
                    f"submit-{homework.title}{os.path.splitext(homework.submitted_attachment.name)[-1]}"
                )
                self.schedule_download(
                    url=homework.submitted_attachment.download_url,
                    prefix=prefix,
                    filename=filename,
                )
                lines.append(f"### Attach.")
                lines.append(f"")
                lines.append(f"[{homework.submitted_attachment.name}]({filename})")
                lines.append(f"")
            lines.append(f"## Instructors' comments")
            lines.append(f"")
            lines.append(f"### By")
            lines.append(f"")
            lines.append(f"{homework.grader_name}")
            lines.append(f"")
            lines.append(f"### Date")
            lines.append(f"")
            lines.append(
                f"{homework.grade_time.isoformat() if homework.grade_time else None}"
            )
            lines.append(f"")
            lines.append(f"### Grade")
            lines.append(f"")
            lines.append(f"{homework.grade}")
            lines.append(f"")
            lines.append(f"### Comment")
            lines.append(f"")
            lines.append(f"{homework.grade_content}")
            lines.append(f"")
            if homework.grade_attachment:
                filename = thu_learn_lib.utils.slugify(
                    f"comment-{homework.title}{os.path.splitext(homework.grade_attachment.name)[-1]}"
                )
                self.schedule_download(
                    url=homework.grade_attachment.download_url,
                    prefix=prefix,
                    filename=filename,
                )
                lines.append(f"### Attach.")
                lines.append(f"")
                lines.append(f"[{homework.grade_attachment.name}]({filename})")
                lines.append(f"")
        lines = [line + "\n" for line in lines]
        filename = "README.md"
        with open(os.path.join(prefix, filename), "w") as file:
            file.writelines(lines)