diff --git a/.gitignore b/.gitignore index db2cd19..81da02f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,9 +14,7 @@ .Python build/ develop-eggs/ -dist/*.txt -dist/downloads -dist/*.py +dist/ downloads/ eggs/ .eggs/ diff --git a/README.md b/README.md index 1cb683c..f1a4f1b 100644 --- a/README.md +++ b/README.md @@ -67,5 +67,22 @@ python.exe -m thu_learn_downloader.main ## exe packing -- run in project root: -.\.venv\Scripts\pyinstaller.exe --onefile --clean --add-data "thu_learn_downloader/openssl.conf;thu_learn_downloader" .\thu_learn_downloader\main.py --runtime-hook .\thu_learn_downloader\hook.py \ No newline at end of file +- run in project root: +- please install playwright directly in venv using: +```powershell +$env:PLAYWRIGHT_BROWSERS_PATH="0" +playwright install chromium +``` +(please refer to:https://playwright.dev/python/docs/library#pyinstaller) +then: +```powershell +.\.venv\Scripts\pyinstaller.exe --onefile --clean --add-data "thu_learn_downloader/openssl.conf;thu_learn_downloader" .\thu_learn_downloader\main.py --runtime-hook .\thu_learn_downloader\hook.py +``` +## exe recommended usage +```powershell +.\thu-learn-downloader-windows-x86_64.exe -u [USERNAME] -p [PASSWORD] -s [SEMESTER] +``` +NOTICE THAT PASSWORD WILL NOT BE REDACTED!!! + +then complete 2FA in prompted browser panel. +The download of selected semester will automatically start, or will quit if no valid semester is appointed. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 01fba06..8533911 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,4 +28,5 @@ urllib3~=2.2.3 typer~=0.12.5 python-dateutil~=2.9.0.post0 -tenacity~=9.0.0 \ No newline at end of file +tenacity~=9.0.0 +playwright~=1.53.0 \ No newline at end of file diff --git a/thu_learn_downloader/client/learn.py b/thu_learn_downloader/client/learn.py index 4b822da..f91896a 100644 --- a/thu_learn_downloader/client/learn.py +++ b/thu_learn_downloader/client/learn.py @@ -1,13 +1,13 @@ import functools -import urllib.parse +import re from collections.abc import Sequence -from urllib.parse import ParseResult from bs4 import BeautifulSoup, Tag +from playwright.sync_api import sync_playwright from requests import Response +from requests.cookies import RequestsCookieJar from thu_learn_downloader.common.typing import cast - from . import url from .client import Client, Language from .semester import Semester @@ -24,35 +24,35 @@ soup: BeautifulSoup = BeautifulSoup( markup=response.text, features="html.parser" ) - login_form: Tag = cast(Tag, soup.select_one(selector="#loginForm")) - action: str = cast(str, login_form["action"]) - response: Response = self.client.post( - url=action, data={"i_user": username, "i_pass": password, "atOnce": True}, verify=False - ) - soup: BeautifulSoup = BeautifulSoup( - markup=response.text, features="html.parser" - ) - a: Tag = cast(Tag, soup.select_one(selector="a")) - href: str = cast(str, a["href"]) - parse_result: ParseResult = urllib.parse.urlparse(url=href) - query: dict[str, list[str]] = urllib.parse.parse_qs(qs=parse_result.query) - print("Query received:", query) + login_button: Tag = cast(Tag, soup.select_one(selector="#loginButtonId")) + onclick: str = cast(str, login_button["onclick"]) + login_url: str = cast(str, re.search(r"'(https?://[^']+)'", onclick).group(1)) - status = query.get("status", ["unknown"])[0] - ticket = query.get("ticket", [None])[0] - if ticket is None: - print("Login probably failed — no ticket received.") - print("Full query dict:", query) - return + jar = RequestsCookieJar() - self.client.get(url=href, verify=False) - self.client.get( - url=url.make_url(path="/b/j_spring_security_thauth_roaming_entry"), - params={"ticket": ticket}, - verify = False - ) - self.client.get(url=url.make_url(path="/f/wlxt/index/course/student/"), verify=False) - assert status == "SUCCESS" + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) + context = browser.new_context() + page = context.new_page() + page.goto(login_url) + page.fill("#i_user", username) + page.fill("#i_pass", password) + page.evaluate("doLogin()") + + page.wait_for_url(re.compile(r"learn\.tsinghua\.edu\.cn/.*"), timeout=300_000) + + cookies = context.cookies() + for cookie in cookies: + jar.set( + name=cookie['name'], + value=cookie['value'], + domain=cookie.get('domain'), + path=cookie.get('path', '/'), + ) + + browser.close() + + self.client.cookies.update(jar) @functools.cached_property # def semesters(self) -> Sequence[Semester]: @@ -71,6 +71,8 @@ print("Request failed with status:", response.status_code) return [] + print(response.text) + try: data = response.json() print("Parsed JSON:", data) # Debugging output