refator scraper movieku

This commit is contained in:
Yasir Aris M 2024-09-24 09:39:20 +07:00 committed by GitHub
parent 4e50e99ded
commit c90561b96d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -16,7 +16,7 @@ import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cachetools import TTLCache from cachetools import TTLCache
from pykeyboard import InlineButton, InlineKeyboard from pykeyboard import InlineButton, InlineKeyboard
from pyrogram.errors import MessageTooLong, QueryIdInvalid from pyrogram.errors import QueryIdInvalid
from pyrogram.types import Message from pyrogram.types import Message
from database import dbname from database import dbname
@ -1450,28 +1450,37 @@ async def muviku_scrap(_, message, strings):
html = await fetch.get(link) html = await fetch.get(link)
html.raise_for_status() html.raise_for_status()
soup = BeautifulSoup(html.text, "lxml") soup = BeautifulSoup(html.text, "lxml")
res = soup.find_all(class_="smokeurl") data = {}
data = [] output = []
for div in res: total_links = 0
paragraphs = div.find_all('p') valid_resolutions = {'1080p', '720p', '480p', '360p'}
for p in paragraphs: current_title = None
resolution = p.find('strong').text
links = p.find_all('a') for element in soup.find_all(['h3', 'p']):
for link in links: if element.name == 'h3' and 'smokettl' in element.get('class', []):
href = link.get('href') current_title = element.text.strip()
title = link.text if current_title not in data:
data.append({ data[current_title] = []
"resolusi": resolution, elif element.name == 'p' and current_title:
"link": href, strong_tag = element.find('strong')
"title": title, if strong_tag:
}) resolution = strong_tag.text.strip()
if resolution in valid_resolutions:
links = ', '.join([f'<a href="{a["href"]}">{a.text.strip()}</a>' for a in element.find_all('a')])
data[current_title].append(f"{resolution} {links}")
for title, resolutions in data.items():
output.append(title)
output.extend(resolutions)
output.append('')
for res in resolutions:
total_links += res.count('<a href=')
if not data: if not data:
return await message.reply(strings("no_result")) return await message.reply(strings("no_result"))
res = "".join(f"<b>Host: <a href='{i['link']}'>{i['resolusi']} {i['title']}</a></b>\n\n" for i in data) if len(total_links) > 70:
await message.reply_msg(res) url = await post_to_telegraph(False, link, "<br>".join(output))
except MessageTooLong: return await message.reply_msg(f"Your result is too long, i have pasted your result on Telegraph:\n{url}")
url = await post_to_telegraph(False, link, res.replace("\n", "<br>")) await message.reply_msg("\n".join(output))
await message.reply_msg(f"Your result is too long, i have pasted your result on Telegraph:\n{url}")
except IndexError: except IndexError:
return await message.reply( return await message.reply(
strings("invalid_cmd_scrape").format(cmd=message.command[0]) strings("invalid_cmd_scrape").format(cmd=message.command[0])