mirror of
https://github.com/yasirarism/MissKatyPyro.git
synced 2026-01-03 11:04:51 +00:00
refator scraper movieku
This commit is contained in:
parent
4e50e99ded
commit
c90561b96d
1 changed files with 30 additions and 21 deletions
|
|
@ -16,7 +16,7 @@ import httpx
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
from pykeyboard import InlineButton, InlineKeyboard
|
from pykeyboard import InlineButton, InlineKeyboard
|
||||||
from pyrogram.errors import MessageTooLong, QueryIdInvalid
|
from pyrogram.errors import QueryIdInvalid
|
||||||
from pyrogram.types import Message
|
from pyrogram.types import Message
|
||||||
|
|
||||||
from database import dbname
|
from database import dbname
|
||||||
|
|
@ -1450,28 +1450,37 @@ async def muviku_scrap(_, message, strings):
|
||||||
html = await fetch.get(link)
|
html = await fetch.get(link)
|
||||||
html.raise_for_status()
|
html.raise_for_status()
|
||||||
soup = BeautifulSoup(html.text, "lxml")
|
soup = BeautifulSoup(html.text, "lxml")
|
||||||
res = soup.find_all(class_="smokeurl")
|
data = {}
|
||||||
data = []
|
output = []
|
||||||
for div in res:
|
total_links = 0
|
||||||
paragraphs = div.find_all('p')
|
valid_resolutions = {'1080p', '720p', '480p', '360p'}
|
||||||
for p in paragraphs:
|
current_title = None
|
||||||
resolution = p.find('strong').text
|
|
||||||
links = p.find_all('a')
|
for element in soup.find_all(['h3', 'p']):
|
||||||
for link in links:
|
if element.name == 'h3' and 'smokettl' in element.get('class', []):
|
||||||
href = link.get('href')
|
current_title = element.text.strip()
|
||||||
title = link.text
|
if current_title not in data:
|
||||||
data.append({
|
data[current_title] = []
|
||||||
"resolusi": resolution,
|
elif element.name == 'p' and current_title:
|
||||||
"link": href,
|
strong_tag = element.find('strong')
|
||||||
"title": title,
|
if strong_tag:
|
||||||
})
|
resolution = strong_tag.text.strip()
|
||||||
|
if resolution in valid_resolutions:
|
||||||
|
links = ', '.join([f'<a href="{a["href"]}">{a.text.strip()}</a>' for a in element.find_all('a')])
|
||||||
|
data[current_title].append(f"{resolution} {links}")
|
||||||
|
|
||||||
|
for title, resolutions in data.items():
|
||||||
|
output.append(title)
|
||||||
|
output.extend(resolutions)
|
||||||
|
output.append('')
|
||||||
|
for res in resolutions:
|
||||||
|
total_links += res.count('<a href=')
|
||||||
if not data:
|
if not data:
|
||||||
return await message.reply(strings("no_result"))
|
return await message.reply(strings("no_result"))
|
||||||
res = "".join(f"<b>Host: <a href='{i['link']}'>{i['resolusi']} {i['title']}</a></b>\n\n" for i in data)
|
if len(total_links) > 70:
|
||||||
await message.reply_msg(res)
|
url = await post_to_telegraph(False, link, "<br>".join(output))
|
||||||
except MessageTooLong:
|
return await message.reply_msg(f"Your result is too long, i have pasted your result on Telegraph:\n{url}")
|
||||||
url = await post_to_telegraph(False, link, res.replace("\n", "<br>"))
|
await message.reply_msg("\n".join(output))
|
||||||
await message.reply_msg(f"Your result is too long, i have pasted your result on Telegraph:\n{url}")
|
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return await message.reply(
|
return await message.reply(
|
||||||
strings("invalid_cmd_scrape").format(cmd=message.command[0])
|
strings("invalid_cmd_scrape").format(cmd=message.command[0])
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue