mirror of
https://github.com/yasirarism/MissKatyPyro.git
synced 2026-01-03 11:04:51 +00:00
Tes fix scraper
This commit is contained in:
parent
10c243f33b
commit
e3ee78f3e3
2 changed files with 52 additions and 42 deletions
|
|
@ -3,7 +3,7 @@ import re
|
||||||
import chevron
|
import chevron
|
||||||
from telegraph.aio import Telegraph
|
from telegraph.aio import Telegraph
|
||||||
import logging
|
import logging
|
||||||
from aiohttp import ClientSession
|
from misskaty.helper.http import http
|
||||||
from misskaty import BOT_USERNAME
|
from misskaty import BOT_USERNAME
|
||||||
from bs4 import BeautifulSoup as bs4
|
from bs4 import BeautifulSoup as bs4
|
||||||
|
|
||||||
|
|
@ -13,64 +13,73 @@ headers = {"Accept": "*/*", "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64;
|
||||||
|
|
||||||
|
|
||||||
async def kusonimeBypass(url: str, slug=None):
|
async def kusonimeBypass(url: str, slug=None):
|
||||||
hasil = {}
|
result = {}
|
||||||
_url = url
|
_url = url
|
||||||
request = ClientSession(headers=headers)
|
|
||||||
if slug:
|
if slug:
|
||||||
noslug_url = "https://kusonime.com/{slug}"
|
noslug_url = "https://kusonime.com/{slug}"
|
||||||
_url = noslug_url.format({"slug": slug})
|
_url = noslug_url.format({"slug": slug})
|
||||||
try:
|
try:
|
||||||
test = await request.get(_url)
|
page = await http.get(_url, headers=headers)
|
||||||
page = await test.text()
|
soup = BeautifulSoup(page.text, "lxml")
|
||||||
soup = bs4(page, "html.parser")
|
|
||||||
thumb = soup.find("div", {"class": "post-thumb"}).find("img").get("src")
|
thumb = soup.find("div", {"class": "post-thumb"}).find("img").get("src")
|
||||||
data = []
|
data = []
|
||||||
# title = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > p:nth-child(3) > strong")[0].text.strip()
|
# title = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > p:nth-child(3) > strong")[0].text.strip()
|
||||||
title = soup.find("h1", {"class": "jdlz"}).text # fix title njing haha
|
try:
|
||||||
|
title = soup.find("h1", {"class": "jdlz"}).text # fix title njing haha
|
||||||
|
season = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(3)")[0].text.split(":").pop().strip()
|
||||||
|
tipe = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(5)")[0].text.split(":").pop().strip()
|
||||||
|
status_anime = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(6)")[0].text.split(":").pop().strip()
|
||||||
|
ep = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(7)")[0].text.split(":").pop().strip()
|
||||||
|
score = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(8)")[0].text.split(":").pop().strip()
|
||||||
|
duration = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(9)")[0].text.split(":").pop().strip()
|
||||||
|
rilis = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(10)")[0].text.split(":").pop().strip()
|
||||||
|
except Exception:
|
||||||
|
e = traceback.format_exc()
|
||||||
|
LOGGER.error(e)
|
||||||
|
title, season, tipe, status_anime, ep, score, duration, rilis = "None", "None", "None", "None", 0, 0, 0, "None"
|
||||||
|
num = 1
|
||||||
genre = []
|
genre = []
|
||||||
for _genre in soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(2)"):
|
for _genre in soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(2)"):
|
||||||
gen = _genre.text.split(":").pop().strip().split(", ")
|
gen = _genre.text.split(":").pop().strip().split(", ")
|
||||||
genre = gen
|
genre = gen
|
||||||
status_anime = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(6)")[0].text.split(":").pop().strip()
|
for smokedl in soup.find("div", {"class": "dlbodz"}).find_all("div", {"class": "smokeddlrh"}):
|
||||||
for num, smokedl in enumerate(soup.find("div", {"class": "dlbod"}).find_all("div", {"class": "smokeddl"}), start=1):
|
mendata = {"name": title, "links": []}
|
||||||
titl = soup.select(f"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.dlbod > div:nth-child({num}) > div.smokettl")[0].text
|
for smokeurl in smokedl.find_all("div", {"class": "smokeurlrh"}):
|
||||||
titl = re.sub("Download", "", titl).strip()
|
|
||||||
mendata = {"name": titl, "links": []}
|
|
||||||
for smokeurl in smokedl.find_all("div", {"class": "smokeurl"}):
|
|
||||||
quality = smokeurl.find("strong").text
|
quality = smokeurl.find("strong").text
|
||||||
links = []
|
links = []
|
||||||
for link in smokeurl.find_all("a"):
|
for link in smokeurl.find_all("a"):
|
||||||
url = link.get("href")
|
url = link.get("href")
|
||||||
client = link.text
|
client = link.text
|
||||||
links.append({"client": client, "url": url})
|
links.append({"client": client, "url": url})
|
||||||
mendata["links"].append(dict(quality=quality, link_download=links))
|
mendata["links"].append({"quality": quality, "link_download": links})
|
||||||
data.append(mendata)
|
data.append(mendata)
|
||||||
hasil |= {
|
num += 1
|
||||||
"error": False,
|
result.update({"error": False, "title": title, "thumb": thumb, "genre": genre, "genre_string": ", ".join(genre), "status_anime": status_anime, "season": season, "tipe": tipe, "ep": ep, "score": score, "duration": duration, "rilis": rilis, "data": data})
|
||||||
"title": title,
|
except Exception:
|
||||||
"thumb": thumb,
|
err = traceback.format_exc()
|
||||||
"genre": genre,
|
LOGGER.error(err)
|
||||||
"genre_string": ", ".join(genre),
|
result.update({"error": True, "error_message": err})
|
||||||
"status_anim": status_anime,
|
|
||||||
"data": data,
|
|
||||||
}
|
|
||||||
except:
|
|
||||||
hasil |= {"error": True, "error_message": "kuso bypass error"}
|
|
||||||
finally:
|
finally:
|
||||||
await request.close()
|
await http.delete(_url)
|
||||||
return hasil
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def byPassPh(url: str, msg_id: int):
|
aasync def byPassPh(url: str, name: str):
|
||||||
kusonime = await kusonimeBypass(url)
|
kusonime = await kusonimeBypass(url)
|
||||||
results = {"error": True, "error_message": "Post to or create TGH error"}
|
results = {"error": True, "error_message": kusonime}
|
||||||
if not kusonime["error"]:
|
template = """
|
||||||
template = """
|
|
||||||
<img src={{{thumb}}}>
|
<img src={{{thumb}}}>
|
||||||
|
|
||||||
<p><b>Title</b> : <code>{{title}}</code></p>
|
<p><b>Title</b> : <code>{{title}}</code></p>
|
||||||
<p><b>Genre</b> : <code>{{genre_string}}</code></p>
|
<p><b>Genre</b> : <code>{{genre_string}}</code></p>
|
||||||
<p><b>Status</b> : <code>{{status_anime}}</code></p>
|
<br><br><p><b>Season</b> : <code>{{season}}</code></p>
|
||||||
<br>
|
<br><br><p><b>Type</b> : <code>{{tipe}}</code></p>
|
||||||
|
<br><br><p><b>Status</b> : <code>{{status_anime}}</code></p>
|
||||||
|
<br><br><p><b>Total Episode</b> : <code>{{ep}}</code></p>
|
||||||
|
<br><br><p><b>Score</b> : <code>{{score}}</code></p>
|
||||||
|
<br><br><p><b>Duration</b> : <code>{{duration}}</code></p>
|
||||||
|
<br><br><p><b>Released on</b> : <code>{{rilis}}</code></p>
|
||||||
|
<br><br>
|
||||||
{{#data}}
|
{{#data}}
|
||||||
<h4>{{name}}</h4>
|
<h4>{{name}}</h4>
|
||||||
{{#links}}
|
{{#links}}
|
||||||
|
|
@ -82,12 +91,13 @@ async def byPassPh(url: str, msg_id: int):
|
||||||
<br>
|
<br>
|
||||||
{{/data}}
|
{{/data}}
|
||||||
""".strip()
|
""".strip()
|
||||||
html = chevron.render(template, kusonime)
|
if not kusonime["error"]:
|
||||||
|
html = render(template, kusonime)
|
||||||
telegraph = Telegraph()
|
telegraph = Telegraph()
|
||||||
if telegraph.get_access_token() is None:
|
if not telegraph.get_access_token():
|
||||||
await telegraph.create_account(short_name=BOT_USERNAME)
|
await telegraph.create_account(short_name=bot_name)
|
||||||
page = await telegraph.create_page(f"{kusonime.get('title')}-{msg_id}", html_content=html)
|
page = await telegraph.create_page(f"{kusonime.get('title')} By {escape(name)}", html_content=html)
|
||||||
results |= {"error": False, "url": f'https://telegra.ph/{page["path"]}'}
|
results.update({"error": False, "url": "https://telegra.ph/{}".format(page["path"])})
|
||||||
del results["error_message"]
|
del results["error_message"]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
@ -99,5 +109,5 @@ class Kusonime:
|
||||||
async def byPass(self, url):
|
async def byPass(self, url):
|
||||||
return await kusonimeBypass(url)
|
return await kusonimeBypass(url)
|
||||||
|
|
||||||
async def telegraph(self, url, msg_id):
|
async def telegraph(self, url, name):
|
||||||
return await byPassPh(url, msg_id)
|
return await byPassPh(url, name)
|
||||||
|
|
|
||||||
|
|
@ -887,7 +887,7 @@ async def gomovpage_callback(client, callback_query, strings):
|
||||||
@app.on_callback_query(filters.create(lambda _, __, query: "kusoextract#" in query.data))
|
@app.on_callback_query(filters.create(lambda _, __, query: "kusoextract#" in query.data))
|
||||||
@ratelimiter
|
@ratelimiter
|
||||||
@use_chat_lang()
|
@use_chat_lang()
|
||||||
async def kusonime_scrap(_, callback_query, strings):
|
async def kusonime_scrap(client, callback_query, strings):
|
||||||
if callback_query.from_user.id != int(callback_query.data.split("#")[3]):
|
if callback_query.from_user.id != int(callback_query.data.split("#")[3]):
|
||||||
return await callback_query.answer(strings("unauth"), True)
|
return await callback_query.answer(strings("unauth"), True)
|
||||||
idlink = int(callback_query.data.split("#")[2])
|
idlink = int(callback_query.data.split("#")[2])
|
||||||
|
|
@ -906,7 +906,7 @@ async def kusonime_scrap(_, callback_query, strings):
|
||||||
ph = init_url.get("ph_url")
|
ph = init_url.get("ph_url")
|
||||||
await callback_query.message.edit_msg(strings("res_scrape").format(link=link, kl=ph), reply_markup=keyboard, disable_web_page_preview=False)
|
await callback_query.message.edit_msg(strings("res_scrape").format(link=link, kl=ph), reply_markup=keyboard, disable_web_page_preview=False)
|
||||||
return
|
return
|
||||||
tgh = await kuso.telegraph(link, message_id)
|
tgh = await kuso.telegraph(link, client.me.username)
|
||||||
if tgh["error"]:
|
if tgh["error"]:
|
||||||
await callback_query.message.edit_msg(f"ERROR: {tgh['error_message']}", reply_markup=keyboard)
|
await callback_query.message.edit_msg(f"ERROR: {tgh['error_message']}", reply_markup=keyboard)
|
||||||
return
|
return
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue