diff --git a/misskaty/plugins/misc_tools.py b/misskaty/plugins/misc_tools.py index 5ec69f66..40dd7cd1 100644 --- a/misskaty/plugins/misc_tools.py +++ b/misskaty/plugins/misc_tools.py @@ -1,3 +1,11 @@ +""" + * @author yasir + * @date 2022-12-01 09:12:27 + * @lastModified 2022-12-01 09:32:31 + * @projectName MissKatyPyro + * Copyright @YasirPedia All rights reserved +""" + import os, re import aiohttp from bs4 import BeautifulSoup diff --git a/misskaty/plugins/scrapwebsite.py b/misskaty/plugins/scrapwebsite.py index 68fd3b2a..287e9302 100644 --- a/misskaty/plugins/scrapwebsite.py +++ b/misskaty/plugins/scrapwebsite.py @@ -1,24 +1,32 @@ +""" + * @author yasir + * @date 2022-12-01 09:12:27 + * @lastModified 2022-12-01 09:32:31 + * @projectName MissKatyPyro + * Copyright @YasirPedia All rights reserved +""" + # This plugin to scrape from melongmovie, and lk21 from bs4 import BeautifulSoup -import aiohttp import re -import requests import traceback -from misskaty import app +from misskaty import app, BOT_USERNAME from pyrogram import filters from pyrogram.errors import MessageTooLong -from info import COMMAND_HANDLER +from misskaty.vars import COMMAND_HANDLER, BOT_USERNAME from misskaty.core.decorator.errors import capture_err from misskaty.helper.tools import rentry +from misskaty.helper.http import http __MODULE__ = "WebScraper" __HELP__ = """ /melongmovie - Scrape website data from MelongMovie Web. If without query will give latest movie list. -/lk21 [query ] - Scrape website data from LayarKaca21. If without query will give latest movie list. -/terbit21 [query ] - Scrape website data from Terbit21. If without query will give latest movie list. -/savefilm21 [query ] - Scrape website data from Savefilm21. If without query will give latest movie list. -/movieku [query ] - Scrape website data from Movieku.cc -/gomov [query ] - Scrape website data from GoMov. If without query will give latest movie list. +/lk21 [query ] - Scrape website data from LayarKaca21. If without query will give latest movie list. +/pahe [query ] - Scrape website data from Pahe.li. If without query will give latest post list. +/terbit21 [query ] - Scrape website data from Terbit21. If without query will give latest movie list. +/savefilm21 [query ] - Scrape website data from Savefilm21. If without query will give latest movie list. +/movieku [query ] - Scrape website data from Movieku.cc +/gomov [query ] - Scrape website data from GoMov. If without query will give latest movie list. """ @@ -36,12 +44,7 @@ async def nodrakor(_, message): headers = { "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get( - f"https://109.234.34.246/?s={judul}", - headers=headers, - allow_redirects=False, - verify=False, - ) + html = await http.get(f"https://109.234.34.246/?s={judul}", headers=headers) soup = BeautifulSoup(html.text, "lxml") res = soup.find_all(class_="content-thumbnail text-center") data = [] @@ -53,7 +56,7 @@ async def nodrakor(_, message): return await msg.edit("Oops, data film tidak ditemukan.") res = "".join(f"{i['judul']}\n{i['link']}\n\n" for i in data) await msg.edit( - f"Hasil Pencarian di Nodrakor:\n{res}\nScraped by @MissKatyRoBot" + f"Hasil Pencarian di Nodrakor:\n{res}\nScraped by @{BOT_USERNAME}" ) except Exception as e: await msg.edit(f"ERROR: {str(e)}") @@ -63,10 +66,9 @@ async def nodrakor(_, message): @app.on_message(filters.command(["ngefilm21"], COMMAND_HANDLER)) @capture_err async def ngefilm21(_, message): - try: - title = message.text.split(" ", maxsplit=1)[1] - except IndexError: - title = "" + if len(message.command) == 1: + return await message.reply("Masukkan query yang akan dicari..!!") + title = message.text.split(" ", maxsplit=1)[1] msg = await message.reply("Sedang proses scrap, mohon tunggu..") try: @@ -74,7 +76,9 @@ async def ngefilm21(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(f"http://185.237.253.209/search?q={title}", headers=headers) + html = await http.get( + f"http://185.237.253.209/search?q={title}", headers=headers + ) soup = BeautifulSoup(html.text, "lxml") res = soup.find_all("h2") data = [] @@ -92,21 +96,19 @@ async def ngefilm21(_, message): await msg.edit(f"ERROR: {str(e)}") +# Scrape Web From Movieku.CC @app.on_message(filters.command(["movieku"], COMMAND_HANDLER)) @capture_err async def movikucc(_, message): - try: - judul = message.text.split(" ", maxsplit=1)[1] - except IndexError: - judul = "" - + if len(message.command) == 1: + return await message.reply("Masukkan query yang akan dicari..!!") + judul = message.text.split(" ", maxsplit=1)[1] msg = await message.reply("Sedang proses scrap, mohon tunggu..") try: headers = { "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - - html = requests.get(f"https://107.152.39.187/?s={judul}", headers=headers) + html = await http.get(f"https://107.152.39.187/?s={judul}", headers=headers) soup = BeautifulSoup(html.text, "lxml") data = soup.find_all(class_="bx") res = "".join( @@ -127,15 +129,14 @@ async def savefilm21(_, message): judul = message.text.split(" ", maxsplit=1)[1] except IndexError: judul = "" - msg = await message.reply("Sedang proses scrap, mohon tunggu..") try: headers = { "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get( - f"http://38.242.196.210/?s={judul}", headers=headers, allow_redirects=False + html = await http.get( + f"http://185.99.135.215/?s={judul}", headers=headers, follow_redirects=False ) soup = BeautifulSoup(html.text, "lxml") res = soup.find_all(class_="entry-title") @@ -171,7 +172,7 @@ async def melongmovie(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(f"http://167.99.31.48/?s={judul}", headers=headers) + html = await http.get(f"http://167.99.31.48/?s={judul}", headers=headers) soup = BeautifulSoup(html.text, "lxml") data = [] for res in soup.select(".box"): @@ -195,97 +196,118 @@ async def melongmovie(_, message): await msg.edit(f"ERROR: {str(e)}") +@app.on_message(filters.command(["pahe"], COMMAND_HANDLER)) +@capture_err +async def pahe_scrap(_, message): + judul = message.text.split(" ", maxsplit=1)[1] if len(message.command) > 1 else "" + pesan = await message.reply("Please wait, scraping data..") + r = await http.get(f"https://yasirapi.eu.org/pahe?q={judul}") + res = r.json() + if not res["result"]: + return await pesan.edit("Yahh, no result found.") + data = "".join( + f"**{count}. {i['judul']}**\n{i['link']}\n\n" + for count, i in enumerate(res["result"], start=1) + ) + try: + await pesan.edit( + f"**Daftar rilis movie terbaru di web Pahe**:\n{data}", + disable_web_page_preview=True, + ) + except MessageTooLong: + msg = await rentry(data) + await pesan.edit( + f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{msg}" + ) + + @app.on_message(filters.command(["terbit21"], COMMAND_HANDLER)) @capture_err async def terbit21_scrap(_, message): if len(message.command) == 1: - async with aiohttp.ClientSession() as session: - r = await session.get(f"https://yasirapi.eu.org/terbit21") - res = await r.json() - data = "".join( - f"**Judul: {i['judul']}**\n`{i['kategori']}`\n{i['link']}\n**Download:** [Klik Disini]({i['dl']})\n\n" - for i in res["result"] - ) - try: - return await message.reply( - f"**Daftar rilis movie terbaru di web Terbit21**:\n{data}", - disable_web_page_preview=True, - ) - except MessageTooLong: - msg = await rentry(data) - return await message.reply( - f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{msg}" - ) - judul = message.text.split(" ", maxsplit=1)[1] - msg = await message.reply(f"Mencari film di Terbit21 dg keyword {judul}..") - async with aiohttp.ClientSession() as session: - r = await session.get(f"https://yasirapi.eu.org/terbit21?q={judul}") - res = await r.json() + r = await http.get("https://yasirapi.eu.org/terbit21") + res = r.json() data = "".join( f"**Judul: {i['judul']}**\n`{i['kategori']}`\n{i['link']}\n**Download:** [Klik Disini]({i['dl']})\n\n" for i in res["result"] ) - if not res["result"]: - return await msg.edit("Yahh, ga ada hasil ditemukan") try: - await msg.edit( - f"Hasil pencarian query {judul} di lk21:\n{data}", + return await message.reply( + f"**Daftar rilis movie terbaru di web Terbit21**:\n{data}", disable_web_page_preview=True, ) except MessageTooLong: - pesan = await rentry(data) - await msg.edit( - f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{pesan}" + msg = await rentry(data) + return await message.reply( + f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{msg}" ) + judul = message.text.split(" ", maxsplit=1)[1] + msg = await message.reply(f"Mencari film di Terbit21 dg keyword {judul}..") + r = await http.get(f"https://yasirapi.eu.org/terbit21?q={judul}") + res = r.json() + data = "".join( + f"**Judul: {i['judul']}**\n`{i['kategori']}`\n{i['link']}\n**Download:** [Klik Disini]({i['dl']})\n\n" + for i in res["result"] + ) + if not res["result"]: + return await msg.edit("Yahh, ga ada hasil ditemukan") + try: + await msg.edit( + f"Hasil pencarian query {judul} di lk21:\n{data}", + disable_web_page_preview=True, + ) + except MessageTooLong: + pesan = await rentry(data) + await msg.edit( + f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{pesan}" + ) @app.on_message(filters.command(["lk21"], COMMAND_HANDLER)) @capture_err async def lk21_scrap(_, message): if len(message.command) == 1: - msg = await message.reply(f"Mendapatkan daftar post film terbaru di lk21") - async with aiohttp.ClientSession() as session: - r = await session.get(f"https://yasirapi.eu.org/lk21") - res = await r.json() - if res.get("detail", None): - return await msg.edit(f"ERROR: {res['detail']}") - data = "".join( - f"**Judul: {i['judul']}**\n`{i['kategori']}`\n{i['link']}\n**Download:** [Klik Disini]({i['dl']})\n\n" - for i in res["result"] - ) - try: - return await msg.edit( - f"**Daftar rilis movie terbaru di web LK21**:\n{data}", - disable_web_page_preview=True, - ) - except MessageTooLong: - msg = await rentry(data) - await msg.edit( - f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{msg}" - ) - judul = message.text.split(" ", maxsplit=1)[1] - msg = await message.reply(f"Mencari film di lk21 dg keyword {judul}..") - async with aiohttp.ClientSession() as session: - r = await session.get(f"https://yasirapi.eu.org/lk21?q={judul}") - res = await r.json() + msg = await message.reply("Mendapatkan daftar post film terbaru di lk21") + r = await http.get("https://yasirapi.eu.org/lk21") + res = r.json() if res.get("detail", None): return await msg.edit(f"ERROR: {res['detail']}") data = "".join( f"**Judul: {i['judul']}**\n`{i['kategori']}`\n{i['link']}\n**Download:** [Klik Disini]({i['dl']})\n\n" for i in res["result"] ) - if not res["result"]: - return await msg.edit("Yahh, ga ada hasil ditemukan") try: - await msg.edit( - f"Hasil pencarian query {judul} di lk21:\n{data}", + return await msg.edit( + f"**Daftar rilis movie terbaru di web LK21**:\n{data}", disable_web_page_preview=True, ) except MessageTooLong: - pesan = await rentry(data) - return await msg.edit( - f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{pesan}" + msg = await rentry(data) + await msg.edit( + f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{msg}" ) + judul = message.text.split(" ", maxsplit=1)[1] + msg = await message.reply(f"Mencari film di lk21 dg keyword {judul}..") + r = await http.get(f"https://yasirapi.eu.org/lk21?q={judul}") + res = r.json() + if res.get("detail", None): + return await msg.edit(f"ERROR: {res['detail']}") + data = "".join( + f"**Judul: {i['judul']}**\n`{i['kategori']}`\n{i['link']}\n**Download:** [Klik Disini]({i['dl']})\n\n" + for i in res["result"] + ) + if not res["result"]: + return await msg.edit("Yahh, ga ada hasil ditemukan") + try: + await msg.edit( + f"Hasil pencarian query {judul} di lk21:\n{data}", + disable_web_page_preview=True, + ) + except MessageTooLong: + pesan = await rentry(data) + return await msg.edit( + f"Karena hasil scrape terlalu panjang, maka hasil scrape di post ke rentry.\n\n{pesan}" + ) @app.on_message(filters.command(["gomov"], COMMAND_HANDLER)) @@ -296,13 +318,13 @@ async def gomov_scrap(_, message): except IndexError: judul = "" - msg = await message.reply(f"Scraping GoMov Website..") + msg = await message.reply("Scraping GoMov Website..") try: headers = { "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(f"https://185.173.38.216/?s={judul}", headers=headers) + html = await http.get(f"https://185.173.38.216/?s={judul}", headers=headers) soup = BeautifulSoup(html.text, "lxml") entry = soup.find_all(class_="entry-header") DATA = [] @@ -318,7 +340,7 @@ async def gomov_scrap(_, message): for num, i in enumerate(DATA, start=1) ) await msg.edit( - f"Hasil Pencarian di website GoMov:\n{res}\nScraped by @MissKatyRoBot" + f"Hasil Pencarian di website GoMov:\n{res}\nScraped by @{BOT_USERNAME}" ) except Exception: exc = traceback.format_exc() @@ -334,7 +356,7 @@ async def savefilm21_scrap(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(link, headers=headers, allow_redirects=False) + html = await http.get(link, headers=headers, follow_redirects=False) soup = BeautifulSoup(html.text, "lxml") res = soup.find_all(class_="button button-shadow") res = "".join(f"{i.text}\n{i['href']}\n\n" for i in res) @@ -356,7 +378,7 @@ async def nodrakor_scrap(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(link, headers=headers, allow_redirects=False, verify=False) + html = await http.get(link, headers=headers, follow_redirects=False) soup = BeautifulSoup(html.text, "lxml") hasil = soup.find_all(class_="gmr-download-wrap clearfix")[0] await message.reply(f"Hasil Scrap dari {link}:\n{hasil}") @@ -368,6 +390,7 @@ async def nodrakor_scrap(_, message): await message.reply(f"ERROR: {str(e)}") +# Scrape Link Download Movieku.CC @app.on_message(filters.command(["movieku_scrap"], COMMAND_HANDLER)) @capture_err async def muviku_scrap(_, message): @@ -377,7 +400,7 @@ async def muviku_scrap(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(link, headers=headers) + html = await http.get(link, headers=headers) soup = BeautifulSoup(html.text, "lxml") res = soup.find_all(class_="smokeurl") data = [] @@ -411,7 +434,7 @@ async def melong_scrap(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(link, headers=headers) + html = await http.get(link, headers=headers) soup = BeautifulSoup(html.text, "lxml") for ep in soup.findAll(text=re.compile(r"(?i)episode\s+\d+|LINK DOWNLOAD")): hardsub = ep.findPrevious("div") @@ -433,7 +456,7 @@ async def gomov_dl(_, message): "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582" } - html = requests.get(link, headers=headers, verify=False) + html = await http.get(link, headers=headers) soup = BeautifulSoup(html.text, "lxml") entry = soup.find(class_="gmr-download-wrap clearfix") hasil = soup.find(class_="title-download").text @@ -444,5 +467,5 @@ async def gomov_dl(_, message): await message.reply(hasil) except IndexError: await message.reply( - "Gunakan command /melong [link] untuk scrap link download" + f"Gunakan command /{message.command[0]} [link] untuk scrap link download" )