From e3ee78f3e32b1e387fe98bddaf858082cb816d4e Mon Sep 17 00:00:00 2001 From: yasirarism <55983182+yasirarism@users.noreply.github.com> Date: Mon, 22 May 2023 14:34:13 +0000 Subject: [PATCH] Tes fix scraper --- misskaty/helper/kuso_utils.py | 90 ++++++++++++++++++--------------- misskaty/plugins/web_scraper.py | 4 +- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/misskaty/helper/kuso_utils.py b/misskaty/helper/kuso_utils.py index d78efe7c..c9216ad4 100644 --- a/misskaty/helper/kuso_utils.py +++ b/misskaty/helper/kuso_utils.py @@ -3,7 +3,7 @@ import re import chevron from telegraph.aio import Telegraph import logging -from aiohttp import ClientSession +from misskaty.helper.http import http from misskaty import BOT_USERNAME from bs4 import BeautifulSoup as bs4 @@ -13,64 +13,73 @@ headers = {"Accept": "*/*", "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; async def kusonimeBypass(url: str, slug=None): - hasil = {} + result = {} _url = url - request = ClientSession(headers=headers) if slug: noslug_url = "https://kusonime.com/{slug}" _url = noslug_url.format({"slug": slug}) try: - test = await request.get(_url) - page = await test.text() - soup = bs4(page, "html.parser") + page = await http.get(_url, headers=headers) + soup = BeautifulSoup(page.text, "lxml") thumb = soup.find("div", {"class": "post-thumb"}).find("img").get("src") data = [] # title = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > p:nth-child(3) > strong")[0].text.strip() - title = soup.find("h1", {"class": "jdlz"}).text # fix title njing haha + try: + title = soup.find("h1", {"class": "jdlz"}).text # fix title njing haha + season = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(3)")[0].text.split(":").pop().strip() + tipe = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(5)")[0].text.split(":").pop().strip() + status_anime = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(6)")[0].text.split(":").pop().strip() + ep = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(7)")[0].text.split(":").pop().strip() + score = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(8)")[0].text.split(":").pop().strip() + duration = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(9)")[0].text.split(":").pop().strip() + rilis = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(10)")[0].text.split(":").pop().strip() + except Exception: + e = traceback.format_exc() + LOGGER.error(e) + title, season, tipe, status_anime, ep, score, duration, rilis = "None", "None", "None", "None", 0, 0, 0, "None" + num = 1 genre = [] for _genre in soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(2)"): gen = _genre.text.split(":").pop().strip().split(", ") genre = gen - status_anime = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(6)")[0].text.split(":").pop().strip() - for num, smokedl in enumerate(soup.find("div", {"class": "dlbod"}).find_all("div", {"class": "smokeddl"}), start=1): - titl = soup.select(f"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.dlbod > div:nth-child({num}) > div.smokettl")[0].text - titl = re.sub("Download", "", titl).strip() - mendata = {"name": titl, "links": []} - for smokeurl in smokedl.find_all("div", {"class": "smokeurl"}): + for smokedl in soup.find("div", {"class": "dlbodz"}).find_all("div", {"class": "smokeddlrh"}): + mendata = {"name": title, "links": []} + for smokeurl in smokedl.find_all("div", {"class": "smokeurlrh"}): quality = smokeurl.find("strong").text links = [] for link in smokeurl.find_all("a"): url = link.get("href") client = link.text links.append({"client": client, "url": url}) - mendata["links"].append(dict(quality=quality, link_download=links)) + mendata["links"].append({"quality": quality, "link_download": links}) data.append(mendata) - hasil |= { - "error": False, - "title": title, - "thumb": thumb, - "genre": genre, - "genre_string": ", ".join(genre), - "status_anim": status_anime, - "data": data, - } - except: - hasil |= {"error": True, "error_message": "kuso bypass error"} + num += 1 + result.update({"error": False, "title": title, "thumb": thumb, "genre": genre, "genre_string": ", ".join(genre), "status_anime": status_anime, "season": season, "tipe": tipe, "ep": ep, "score": score, "duration": duration, "rilis": rilis, "data": data}) + except Exception: + err = traceback.format_exc() + LOGGER.error(err) + result.update({"error": True, "error_message": err}) finally: - await request.close() - return hasil + await http.delete(_url) + return result -async def byPassPh(url: str, msg_id: int): +aasync def byPassPh(url: str, name: str): kusonime = await kusonimeBypass(url) - results = {"error": True, "error_message": "Post to or create TGH error"} - if not kusonime["error"]: - template = """ + results = {"error": True, "error_message": kusonime} + template = """ +

Title : {{title}}

Genre : {{genre_string}}

-

Status : {{status_anime}}

-
+

Season : {{season}}

+

Type : {{tipe}}

+

Status : {{status_anime}}

+

Total Episode : {{ep}}

+

Score : {{score}}

+

Duration : {{duration}}

+

Released on : {{rilis}}

+

{{#data}}

{{name}}

{{#links}} @@ -82,12 +91,13 @@ async def byPassPh(url: str, msg_id: int):
{{/data}} """.strip() - html = chevron.render(template, kusonime) + if not kusonime["error"]: + html = render(template, kusonime) telegraph = Telegraph() - if telegraph.get_access_token() is None: - await telegraph.create_account(short_name=BOT_USERNAME) - page = await telegraph.create_page(f"{kusonime.get('title')}-{msg_id}", html_content=html) - results |= {"error": False, "url": f'https://telegra.ph/{page["path"]}'} + if not telegraph.get_access_token(): + await telegraph.create_account(short_name=bot_name) + page = await telegraph.create_page(f"{kusonime.get('title')} By {escape(name)}", html_content=html) + results.update({"error": False, "url": "https://telegra.ph/{}".format(page["path"])}) del results["error_message"] return results @@ -99,5 +109,5 @@ class Kusonime: async def byPass(self, url): return await kusonimeBypass(url) - async def telegraph(self, url, msg_id): - return await byPassPh(url, msg_id) + async def telegraph(self, url, name): + return await byPassPh(url, name) diff --git a/misskaty/plugins/web_scraper.py b/misskaty/plugins/web_scraper.py index 25a6cf3c..23bf2f35 100644 --- a/misskaty/plugins/web_scraper.py +++ b/misskaty/plugins/web_scraper.py @@ -887,7 +887,7 @@ async def gomovpage_callback(client, callback_query, strings): @app.on_callback_query(filters.create(lambda _, __, query: "kusoextract#" in query.data)) @ratelimiter @use_chat_lang() -async def kusonime_scrap(_, callback_query, strings): +async def kusonime_scrap(client, callback_query, strings): if callback_query.from_user.id != int(callback_query.data.split("#")[3]): return await callback_query.answer(strings("unauth"), True) idlink = int(callback_query.data.split("#")[2]) @@ -906,7 +906,7 @@ async def kusonime_scrap(_, callback_query, strings): ph = init_url.get("ph_url") await callback_query.message.edit_msg(strings("res_scrape").format(link=link, kl=ph), reply_markup=keyboard, disable_web_page_preview=False) return - tgh = await kuso.telegraph(link, message_id) + tgh = await kuso.telegraph(link, client.me.username) if tgh["error"]: await callback_query.message.edit_msg(f"ERROR: {tgh['error_message']}", reply_markup=keyboard) return