refator scraper movieku

2026-01-03 11:04:51 +00:00 · 2024-09-24 09:39:20 +07:00 · 2024-09-24 09:39:20 +07:00 · c90561b96d
commit c90561b96d
parent 4e50e99ded
1 changed files with 30 additions and 21 deletions
--- a/misskaty/plugins/web_scraper.py
+++ b/misskaty/plugins/web_scraper.py
@ -16,7 +16,7 @@ import httpx
 from bs4 import BeautifulSoup
 from cachetools import TTLCache
 from pykeyboard import InlineButton, InlineKeyboard
-from pyrogram.errors import MessageTooLong, QueryIdInvalid
+from pyrogram.errors import QueryIdInvalid
 from pyrogram.types import Message
 from database import dbname
@ -1450,28 +1450,37 @@ async def muviku_scrap(_, message, strings):
            html = await fetch.get(link)
            html.raise_for_status()
            soup = BeautifulSoup(html.text, "lxml")
-            res = soup.find_all(class_="smokeurl")
+            data = {}
-            data = []
+            output = []
-            for div in res:
+            total_links = 0
-                paragraphs = div.find_all('p')
+            valid_resolutions = {'1080p', '720p', '480p', '360p'}
-                for p in paragraphs:
+            current_title = None
-                    resolution = p.find('strong').text
+
-                    links = p.find_all('a')
+            for element in soup.find_all(['h3', 'p']):
-                    for link in links:
+                if element.name == 'h3' and 'smokettl' in element.get('class', []):
-                        href = link.get('href')
+                    current_title = element.text.strip()
-                        title = link.text
+                    if current_title not in data:
-                        data.append({
+                        data[current_title] = []
-                            "resolusi": resolution,
+                elif element.name == 'p' and current_title:
-                            "link": href,
+                    strong_tag = element.find('strong')
-                            "title": title,
+                    if strong_tag:
-                        })
+                        resolution = strong_tag.text.strip()
                        if resolution in valid_resolutions:
                            links = ', '.join([f'<a href="{a["href"]}">{a.text.strip()}</a>' for a in element.find_all('a')])
                            data[current_title].append(f"{resolution} {links}")
            for title, resolutions in data.items():
                output.append(title)
                output.extend(resolutions)
                output.append('')
                for res in resolutions:
                    total_links += res.count('<a href=')
            if not data:
                return await message.reply(strings("no_result"))
-            res = "".join(f"<b>Host: <a href='{i['link']}'>{i['resolusi']} {i['title']}</a></b>\n\n" for i in data)
+            if len(total_links) > 70:
-            await message.reply_msg(res)
+                url = await post_to_telegraph(False, link, "<br>".join(output))
-        except MessageTooLong:
+                return await message.reply_msg(f"Your result is too long, i have pasted your result on Telegraph:\n{url}")
-            url = await post_to_telegraph(False, link, res.replace("\n", "<br>"))
+            await message.reply_msg("\n".join(output))
            await message.reply_msg(f"Your result is too long, i have pasted your result on Telegraph:\n{url}")
        except IndexError:
            return await message.reply(
                strings("invalid_cmd_scrape").format(cmd=message.command[0])