tes fix kuso extract

This commit is contained in:
yasirarism 2023-09-26 22:36:46 +07:00 committed by GitHub
parent f0f7e1a682
commit 4dddaea6e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 121 additions and 177 deletions

View file

@ -8,111 +8,56 @@ from telegraph.aio import Telegraph
from misskaty import BOT_USERNAME from misskaty import BOT_USERNAME
from misskaty.helper.http import fetch from misskaty.helper.http import fetch
from misskaty.helper.media_helper import post_to_telegraph
LOGGER = logging.getLogger("MissKaty") LOGGER = logging.getLogger("MissKaty")
headers = {
"Accept": "*/*",
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582",
}
async def kusonimeBypass(url: str):
async def kusonimeBypass(url: str, slug=None):
result = {} result = {}
_url = url page = await fetch.get(url)
if slug: if page.status_code != 200:
noslug_url = "https://kusonime.com/{slug}" raise Exception(f"ERROR: Hostname might be blocked by server!")
_url = noslug_url.format({"slug": slug})
try: try:
page = await fetch.get(_url, headers=headers)
soup = BeautifulSoup(page.text, "lxml") soup = BeautifulSoup(page.text, "lxml")
thumb = soup.find("div", {"class": "post-thumb"}).find("img").get("src") thumb = soup.find("div", {"class": "post-thumb"}).find("img").get("src")
data = [] data = []
# title = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > p:nth-child(3) > strong")[0].text.strip()
try: try:
title = soup.find("h1", {"class": "jdlz"}).text # fix title njing haha title = soup.find("h1", {"class": "jdlz"}).text # fix title njing haha
season = ( season = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(3)")[0].text.split(":").pop().strip()
soup.select( tipe = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(5)")[0].text.split(":").pop().strip()
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(3)" status_anime = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(6)")[0].text.split(":").pop().strip()
)[0] ep = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(7)")[0].text.split(":").pop().strip()
.text.split(":") score = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(8)")[0].text.split(":").pop().strip()
.pop() duration = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(9)")[0].text.split(":").pop().strip()
.strip() rilis = soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(10)")[0].text.split(":").pop().strip()
)
tipe = (
soup.select(
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(5)"
)[0]
.text.split(":")
.pop()
.strip()
)
status_anime = (
soup.select(
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(6)"
)[0]
.text.split(":")
.pop()
.strip()
)
ep = (
soup.select(
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(7)"
)[0]
.text.split(":")
.pop()
.strip()
)
score = (
soup.select(
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(8)"
)[0]
.text.split(":")
.pop()
.strip()
)
duration = (
soup.select(
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(9)"
)[0]
.text.split(":")
.pop()
.strip()
)
rilis = (
soup.select(
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(10)"
)[0]
.text.split(":")
.pop()
.strip()
)
except Exception: except Exception:
e = traceback.format_exc() e = traceback.format_exc()
LOGGER.error(e) LOGGER.error(e)
title, season, tipe, status_anime, ep, score, duration, rilis = ( title, season, tipe, status_anime, ep, score, duration, rilis = "None", "None", "None", "None", 0, 0, 0, "None"
"None", num = 1
"None",
"None",
"None",
0,
0,
0,
"None",
)
genre = [] genre = []
for _genre in soup.select( for _genre in soup.select("#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(2)"):
"#venkonten > div.vezone > div.venser > div.venutama > div.lexot > div.info > p:nth-child(2)"
):
gen = _genre.text.split(":").pop().strip().split(", ") gen = _genre.text.split(":").pop().strip().split(", ")
genre = gen genre = gen
for _, smokedl in enumerate( for smokedl in soup.find("div", {"class": "dlbodz"}).find_all("div", {"class": "smokeddlrh"}):
soup.find("div", {"class": "dlbodz"}).find_all( if not smokedl:
"div", {"class": "smokeddlrh"} continue
),
start=1,
):
mendata = {"name": title, "links": []} mendata = {"name": title, "links": []}
for smokeurl in smokedl.find_all("div", {"class": "smokeurl"}):
if not smokeurl:
continue
quality = smokeurl.find("strong").text
links = []
for link in smokeurl.find_all("a"):
url = link.get("href")
client = link.text
links.append({"client": client, "url": url})
mendata["links"].append({"quality": quality, "link_download": links})
for smokeurl in smokedl.find_all("div", {"class": "smokeurlrh"}): for smokeurl in smokedl.find_all("div", {"class": "smokeurlrh"}):
if not smokeurl:
continue
quality = smokeurl.find("strong").text quality = smokeurl.find("strong").text
links = [] links = []
for link in smokeurl.find_all("a"): for link in smokeurl.find_all("a"):
@ -121,45 +66,62 @@ async def kusonimeBypass(url: str, slug=None):
links.append({"client": client, "url": url}) links.append({"client": client, "url": url})
mendata["links"].append({"quality": quality, "link_download": links}) mendata["links"].append({"quality": quality, "link_download": links})
data.append(mendata) data.append(mendata)
result |= { num += 1
"error": False, for smokedl in soup.find("div", {"class": "dlbodz"}).find_all("div", {"class": "smokeddl"}):
"title": title, if not smokedl:
"thumb": thumb, continue
"genre": genre, mendata = {"name": title, "links": []}
"genre_string": ", ".join(genre), for smokeurl in smokedl.find_all("div", {"class": "smokeurl"}):
"status_anime": status_anime, if not smokeurl:
"season": season, continue
"tipe": tipe, quality = smokeurl.find("strong").text
"ep": ep, links = []
"score": score, for link in smokeurl.find_all("a"):
"duration": duration, url = link.get("href")
"rilis": rilis, client = link.text
"data": data, links.append({"client": client, "url": url})
} mendata["links"].append({"quality": quality, "link_download": links})
except Exception: for smokeurl in smokedl.find_all("div", {"class": "smokeurlrh"}):
if not smokeurl:
continue
quality = smokeurl.find("strong").text
links = []
for link in smokeurl.find_all("a"):
url = link.get("href")
client = link.text
links.append({"client": client, "url": url})
mendata["links"].append({"quality": quality, "link_download": links})
data.append(mendata)
num += 1
result.update({"title": title, "thumb": thumb, "genre": genre, "genre_string": ", ".join(genre), "status_anime": status_anime, "season": season, "tipe": tipe, "ep": ep, "score": score, "duration": duration, "rilis": rilis, "data": data})
except Exception as e:
if len(result) != 0:
result.clear()
err = traceback.format_exc() err = traceback.format_exc()
LOGGER.error(err) page.close()
result |= {"error": True, "error_message": err} LOGGER.error(f"class: {e.__class__.__name_}, {err}")
await http.delete(_url) raise Exception(f"ERROR: {err}")
return result finally:
page.close()
return result
async def byPassPh(url: str, name: str): async def byPassPh(url: str, name: str) -> Optional[str]:
kusonime = await kusonimeBypass(url) kusonime = await kusonimeBypass(url)
results = {"error": True, "error_message": kusonime} if not isinstance(kusonime, dict):
if not kusonime["error"]: return kusonime
template = """ template = """
<img src={{{thumb}}}> <img src={{{thumb}}}>
<p><b>Title</b> : <code>{{title}}</code></p> <p><b>Title</b> : <code>{{title}}</code></p>
<p><b>Genre</b> : <code>{{genre_string}}</code></p> <p><b>Genre</b> : <code>{{genre_string}}</code></p>
<br><br><p><b>Season</b> : <code>{{season}}</code></p> <br><p><b>Season</b> : <code>{{season}}</code></p>
<br><br><p><b>Type</b> : <code>{{tipe}}</code></p> <br><p><b>Type</b> : <code>{{tipe}}</code></p>
<br><br><p><b>Status</b> : <code>{{status_anime}}</code></p> <br><p><b>Status</b> : <code>{{status_anime}}</code></p>
<br><br><p><b>Total Episode</b> : <code>{{ep}}</code></p> <br><p><b>Total Episode</b> : <code>{{ep}}</code></p>
<br><br><p><b>Score</b> : <code>{{score}}</code></p> <br><p><b>Score</b> : <code>{{score}}</code></p>
<br><br><p><b>Duration</b> : <code>{{duration}}</code></p> <br><p><b>Duration</b> : <code>{{duration}}</code></p>
<br><br><p><b>Released on</b> : <code>{{rilis}}</code></p> <br><p><b>Released on</b> : <code>{{rilis}}</code></p>
<br><br> <br><br>
{{#data}} {{#data}}
<h4>{{name}}</h4> <h4>{{name}}</h4>
@ -172,16 +134,10 @@ async def byPassPh(url: str, name: str):
<br> <br>
{{/data}} {{/data}}
""".strip() """.strip()
html = chevron.render(template, kusonime) plink = await post_to_telegraph(
telegraph = Telegraph() False, f"{kusonime.get('title')} By {escape(name)}", render(template, kusonime))
if not telegraph.get_access_token(): )
await telegraph.create_account(short_name=BOT_USERNAME) return "https://telegra.ph/{}".format(plink)
page = await telegraph.create_page(
f"{kusonime.get('title')} By {escape(name)}", html_content=html
)
results |= {"error": False, "url": f'https://telegra.ph/{page["path"]}'}
del results["error_message"]
return results
class Kusonime: class Kusonime:

View file

@ -185,13 +185,12 @@ async def getDataPahe(msg, kueri, CurrentPage, strings):
async def getDataKuso(msg, kueri, CurrentPage, user, strings): async def getDataKuso(msg, kueri, CurrentPage, user, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
kusodata = [] kusodata = []
try: data = await fetch.get(
data = await fetch.get( f"{web['kusonime']}/?s={kueri}", follow_redirects=True
f"{web['kusonime']}/?s={kueri}", follow_redirects=True )
) if data.status_code != 200:
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, None return None, 0, None, None
res = BeautifulSoup(data, "lxml").find_all("h2", {"class": "episodeye"}) res = BeautifulSoup(data, "lxml").find_all("h2", {"class": "episodeye"})
for i in res: for i in res:
ress = i.find_all("a")[0] ress = i.find_all("a")[0]
@ -238,11 +237,10 @@ async def getDataKuso(msg, kueri, CurrentPage, user, strings):
async def getDataMovieku(msg, kueri, CurrentPage, strings): async def getDataMovieku(msg, kueri, CurrentPage, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
moviekudata = [] moviekudata = []
try: data = await fetch.get(
data = await fetch.get( f"{web['movieku']}/?s={kueri}", follow_redirects=True
f"{web['movieku']}/?s={kueri}", follow_redirects=True )
) if data.status_code != 200:
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, None return None, None
r = BeautifulSoup(data, "lxml") r = BeautifulSoup(data, "lxml")
@ -279,12 +277,10 @@ async def getDataMovieku(msg, kueri, CurrentPage, strings):
async def getDataNodrakor(msg, kueri, CurrentPage, user, strings): async def getDataNodrakor(msg, kueri, CurrentPage, user, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
nodrakordata = [] nodrakordata = []
try: data = await fetch.get(
data = await fetch.get( f"{web['nodrakor']}/?s={kueri}", follow_redirects=True,
f"{web['nodrakor']}/?s={kueri}", )
follow_redirects=True, if data.status_code != 200:
)
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, 0, None return None, 0, None
text = BeautifulSoup(data, "lxml") text = BeautifulSoup(data, "lxml")
@ -331,12 +327,10 @@ async def getDataNodrakor(msg, kueri, CurrentPage, user, strings):
async def getDataSavefilm21(msg, kueri, CurrentPage, user, strings): async def getDataSavefilm21(msg, kueri, CurrentPage, user, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
sfdata = [] sfdata = []
try: data = await fetch.get(
data = await fetch.get( f"{web['savefilm21']}/?s={kueri}", follow_redirects=True,
f"{web['savefilm21']}/?s={kueri}", )
follow_redirects=True, if data.status_code != 200:
)
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, 0, None return None, 0, None
text = BeautifulSoup(data, "lxml") text = BeautifulSoup(data, "lxml")
@ -382,15 +376,13 @@ async def getDataSavefilm21(msg, kueri, CurrentPage, user, strings):
# Lendrive GetData # Lendrive GetData
async def getDataLendrive(msg, kueri, CurrentPage, user, strings): async def getDataLendrive(msg, kueri, CurrentPage, user, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
try: if kueri:
if kueri: data = await fetch.get(
data = await fetch.get( f"{web['lendrive']}/?s={kueri}", follow_redirects=True,
f"{web['lendrive']}/?s={kueri}", )
follow_redirects=True, else:
) data = await fetch.get(web["lendrive"], follow_redirects=True)
else: if data.status_code != 200:
data = await fetch.get(web["lendrive"], follow_redirects=True)
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, 0, None return None, 0, None
res = BeautifulSoup(data, "lxml") res = BeautifulSoup(data, "lxml")
@ -442,12 +434,10 @@ async def getDataLendrive(msg, kueri, CurrentPage, user, strings):
# MelongMovie GetData # MelongMovie GetData
async def getDataMelong(msg, kueri, CurrentPage, user, strings): async def getDataMelong(msg, kueri, CurrentPage, user, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
try: data = await fetch.get(
data = await fetch.get( f"{web['melongmovie']}/?s={kueri}", follow_redirects=True,
f"{web['melongmovie']}/?s={kueri}", )
follow_redirects=True, if data.status_code != 200:
)
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, 0, None return None, 0, None
bs4 = BeautifulSoup(data, "lxml") bs4 = BeautifulSoup(data, "lxml")
@ -492,13 +482,12 @@ async def getDataMelong(msg, kueri, CurrentPage, user, strings):
# GoMov GetData # GoMov GetData
async def getDataGomov(msg, kueri, CurrentPage, user, strings): async def getDataGomov(msg, kueri, CurrentPage, user, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
try: gomovv = await fetch.get(
gomovv = await fetch.get( f"{web['gomov']}/?s={kueri}", follow_redirects=True
f"{web['gomov']}/?s={kueri}", follow_redirects=True )
) if gomovv.status_code != 200:
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, None return None, 0, None
text = BeautifulSoup(gomovv, "lxml") text = BeautifulSoup(gomovv, "lxml")
entry = text.find_all(class_="entry-header") entry = text.find_all(class_="entry-header")
if entry[0].text.strip() == "Nothing Found": if entry[0].text.strip() == "Nothing Found":
@ -548,12 +537,11 @@ async def getDataGomov(msg, kueri, CurrentPage, user, strings):
async def getSame(msg, query, current_page, strings): async def getSame(msg, query, current_page, strings):
if not SCRAP_DICT.get(msg.id): if not SCRAP_DICT.get(msg.id):
cfse = cloudscraper.create_scraper() cfse = cloudscraper.create_scraper()
try: if query:
if query: data = cfse.get(f"{web['samehadaku']}/?s={query}")
data = cfse.get(f"{web['samehadaku']}/?s={query}") else:
else: data = cfse.get(web["samehadaku"])
data = cfse.get(web["samehadaku"]) if data.status_code != 200:
except Exception as err:
await msg.edit_msg(strings("err_getweb").format(err=err)) await msg.edit_msg(strings("err_getweb").format(err=err))
return None, None return None, None
res = BeautifulSoup(data.text, "lxml").find_all(class_="animposx") res = BeautifulSoup(data.text, "lxml").find_all(class_="animposx")