From 9c4f3e3c784c9f757ee7cfbdcc485b896671a6f3 Mon Sep 17 00:00:00 2001 From: yasir Date: Wed, 11 Jan 2023 14:31:26 +0700 Subject: [PATCH] Fix more --- misskaty/plugins/web_scraper.py | 231 ++++++++++++++++++++++++++------ 1 file changed, 191 insertions(+), 40 deletions(-) diff --git a/misskaty/plugins/web_scraper.py b/misskaty/plugins/web_scraper.py index 45fc6e32..9a23b26b 100644 --- a/misskaty/plugins/web_scraper.py +++ b/misskaty/plugins/web_scraper.py @@ -10,6 +10,7 @@ import logging from bs4 import BeautifulSoup from pykeyboard import InlineKeyboard, InlineButton from pyrogram import filters +from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup from misskaty.helper.http import http from misskaty import app from misskaty.vars import COMMAND_HANDLER @@ -43,90 +44,121 @@ def split_arr(arr, size: 5): return arrs # Terbit21 GetData -async def getDataTerbit21(msg, idpesan, kueri, CurrentPage): - if not SCRAP_DICT.get(idpesan): +async def getDataTerbit21(msg, kueri, CurrentPage): + if not SCRAP_DICT.get(msg.id): if not kueri: terbitjson = (await http.get('https://yasirapi.eu.org/terbit21')).json() else: terbitjson = (await http.get(f'https://yasirapi.eu.org/terbit21?q={kueri}')).json() if not terbitjson.get("result"): - return await msg.reply("Sorry, could not find any results!") - SCRAP_DICT[idpesan] = [split_arr(terbitjson["result"], 6), kueri] + return await msg.edit("Sorry, could not find any results!") + SCRAP_DICT[msg.id] = [split_arr(terbitjson["result"], 6), kueri] try: index = int(CurrentPage - 1) - PageLen = len(SCRAP_DICT[idpesan][0]) + PageLen = len(SCRAP_DICT[msg.id][0]) if kueri: TerbitRes = f"#Terbit21 Results For: {kueri}\n\n" else: TerbitRes = "#Terbit21 Latest:\nšŸŒ€ Use /terbit21 [title] to start search with title.\n\n" - for c, i in enumerate(SCRAP_DICT[idpesan][0][index], start=1): + for c, i in enumerate(SCRAP_DICT[msg.id][0][index], start=1): TerbitRes += f"{c}. {i['judul']}\nCategory: {i['kategori']}\n" TerbitRes += "\n" if re.search(r"Complete|Ongoing", i["kategori"]) else f"šŸ’  Download\n\n" IGNORE_CHAR = "[]" TerbitRes = ''.join(i for i in TerbitRes if not i in IGNORE_CHAR) return TerbitRes, PageLen except (IndexError, KeyError): - await msg.reply("Sorry, could not find any results!") + await msg.edit("Sorry, could not find any results!") # LK21 GetData -async def getDatalk21(msg, idpesan, kueri, CurrentPage): - if not SCRAP_DICT.get(idpesan): +async def getDatalk21(msg, kueri, CurrentPage): + if not SCRAP_DICT.get(msg.id): if not kueri: lk21json = (await http.get('https://yasirapi.eu.org/lk21')).json() else: lk21json = (await http.get(f'https://yasirapi.eu.org/lk21?q={kueri}')).json() if not lk21json.get("result"): - return await msg.reply("Sorry could not find any matching results!") - SCRAP_DICT[idpesan] = [split_arr(lk21json["result"], 6), kueri] + return await msg.edit("Sorry could not find any matching results!") + SCRAP_DICT[msg.id] = [split_arr(lk21json["result"], 6), kueri] try: index = int(CurrentPage - 1) - PageLen = len(SCRAP_DICT[idpesan][0]) + PageLen = len(SCRAP_DICT[msg.id][0]) if kueri: lkResult = f"#Layarkaca21 Results For: {kueri}\n\n" else: lkResult = "#Layarkaca21 Latest:\nšŸŒ€ Use /lk21 [title] to start search with title.\n\n" - for c, i in enumerate(SCRAP_DICT[idpesan][0][index], start=1): + for c, i in enumerate(SCRAP_DICT[msg.id][0][index], start=1): lkResult += f"{c}. {i['judul']}\nCategory: {i['kategori']}\n" lkResult += "\n" if re.search(r"Complete|Ongoing", i["kategori"]) else f"šŸ’  Download\n\n" IGNORE_CHAR = "[]" lkResult = ''.join(i for i in lkResult if not i in IGNORE_CHAR) return lkResult, PageLen except (IndexError, KeyError): - await msg.reply("Sorry could not find any matching results!") + await msg.edit("Sorry could not find any matching results!") # Pahe GetData -async def getDataPahe(msg, idpesan, kueri, CurrentPage): - if not SCRAP_DICT.get(idpesan): +async def getDataPahe(msg, kueri, CurrentPage): + if not SCRAP_DICT.get(msg.id): pahejson = (await http.get(f'https://yasirapi.eu.org/pahe?q={kueri}')).json() if not pahejson.get("result"): - return await msg.reply("Sorry could not find any matching results!", quote=True) - SCRAP_DICT[idpesan] = [split_arr(pahejson["result"], 6), kueri] + return await msg.edit("Sorry could not find any matching results!", quote=True) + SCRAP_DICT[msg.id] = [split_arr(pahejson["result"], 6), kueri] try: index = int(CurrentPage - 1) - PageLen = len(SCRAP_DICT[idpesan][0]) + PageLen = len(SCRAP_DICT[msg.id][0]) paheResult = f"#Pahe Results For: {kueri}\n\n" if kueri else f"#Pahe Latest:\nšŸŒ€ Use /pahe [title] to start search with title.\n\n" - for c, i in enumerate(SCRAP_DICT[idpesan][0][index], start=1): + for c, i in enumerate(SCRAP_DICT[msg.id][0][index], start=1): paheResult += f"{c}. {i['judul']}\n\n" IGNORE_CHAR = "[]" paheResult = ''.join(i for i in paheResult if not i in IGNORE_CHAR) return paheResult, PageLen except (IndexError, KeyError): - await msg.reply("Sorry could not find any matching results!") + await msg.edit("Sorry could not find any matching results!") + +# MelongMovie GetData +async def getDataMelong(msg, kueri, CurrentPage): + if not SCRAP_DICT.get(msg.id): + data = await http.get(f'http://167.99.31.48/?s={kueri}', headers=headers) + bs4 = BeautifulSoup(data.text, "lxml") + melongdata = [] + for res in bs4.select(".box"): + dd = res.select("a") + url = dd[0]["href"] + title = dd[0]["title"] + try: + quality = dd[0].find(class_="quality").text + except: + quality = "N/A" + data.append({"judul": title, "link": url, "quality": quality}) + if not data: + return await msg.reply("Sorry could not find any results!", quote=True) + SCRAP_DICT[msg.id] = [split_arr(melongdata, 6), kueri] + try: + index = int(CurrentPage - 1) + PageLen = len(SCRAP_DICT[msg.id][0]) + + melongResult = f"#MelongMovie Latest:\nšŸŒ€ Use /{msg.command[0]} [title] to start search with title.\n\n" + for c, i in enumerate(SCRAP_DICT[msg.id][0][index], start=1): + melongResult += f"{c}. {i['judul']}\nQuality: {i['quality']}\nExtract: /{msg.command[0]}_scrap {i['link']}\n\n" + IGNORE_CHAR = "[]" + melongResult = ''.join(i for i in melongResult if not i in IGNORE_CHAR) + return melongResult, PageLen + except (IndexError, KeyError): + await msg.edit("Sorry could not find any matching results!") # GoMov GetData -async def getDataGomov(msg, idpesan, kueri, CurrentPage): - if not SCRAP_DICT.get(idpesan): - gomovv = await http.get(f'https://185.173.38.216/?s=') +async def getDataGomov(msg, kueri, CurrentPage): + if not SCRAP_DICT.get(msg.id): + gomovv = await http.get(f'https://185.173.38.216/?s=', headers=headers) text = BeautifulSoup(gomovv.text, "lxml") entry = text.find_all(class_="entry-header") if "Nothing Found" in entry[0].text: if not kueri: - return await msg.reply("404 Not FOUND!", quote=True) + return await msg.edit("404 Not FOUND!") else: - return await msg.reply(f"404 Not FOUND For: {kueri}", quote=True) + return await msg.edit(f"404 Not FOUND For: {kueri}") data = [] for i in entry: genre = i.find(class_="gmr-movie-on").text @@ -134,20 +166,20 @@ async def getDataGomov(msg, idpesan, kueri, CurrentPage): judul = i.find(class_="entry-title").find("a").text link = i.find(class_="entry-title").find("a").get("href") data.append({"judul": judul, "link": link, "genre": genre}) - SCRAP_DICT[idpesan] = [split_arr(data, 6), kueri] + SCRAP_DICT[msg.id] = [split_arr(data, 6), kueri] try: index = int(CurrentPage - 1) - PageLen = len(SCRAP_DICT[idpesan][0]) + PageLen = len(SCRAP_DICT[msg.id][0]) gomovResult = f"#Gomov Results For: {kueri}\n\n" if kueri else f"#Gomov Latest:\nšŸŒ€ Use /{msg.command[0]} [title] to start search with title.\n\n" - for c, i in enumerate(SCRAP_DICT[idpesan][0][index], start=1): + for c, i in enumerate(SCRAP_DICT[msg.id][0][index], start=1): gomovResult += f"{c}. {i['judul']}\nGenre: {i['genre']}\n" gomovResult += "\n" if re.search(r"Series", i["genre"]) else f"Extract: /{msg.command[0]}_scrap {i['link']}\n\n" IGNORE_CHAR = "[]" gomovResult = ''.join(i for i in gomovResult if not i in IGNORE_CHAR) return gomovResult, PageLen except (IndexError, KeyError): - await msg.reply("Sorry could not find any matching results!") + await msg.edit("Sorry could not find any matching results!") # Terbit21 CMD @app.on_message(filters.command(['terbit21'], COMMAND_HANDLER)) @@ -157,7 +189,7 @@ async def terbit21_s(client, message): kueri = None pesan = await message.reply("Getting data from Terbit21..") CurrentPage = 1 - terbitres, PageLen = await getDataTerbit21(message, pesan.id, kueri, CurrentPage) + terbitres, PageLen = await getDataTerbit21(pesan, kueri, CurrentPage) keyboard = InlineKeyboard() keyboard.paginate(PageLen, CurrentPage, 'page_terbit21#{number}' + f'#{pesan.id}#{message.from_user.id}') keyboard.row( @@ -174,7 +206,7 @@ async def lk21_s(client, message): kueri = None pesan = await message.reply("Getting data from LK21..") CurrentPage = 1 - lkres, PageLen = await getDatalk21(message, pesan.id, kueri, CurrentPage) + lkres, PageLen = await getDatalk21(pesan, kueri, CurrentPage) keyboard = InlineKeyboard() keyboard.paginate(PageLen, CurrentPage, 'page_lk21#{number}' + f'#{pesan.id}#{message.from_user.id}') keyboard.row( @@ -191,7 +223,7 @@ async def pahe_s(client, message): kueri = "" pesan = await message.reply("Getting data from Pahe Web..") CurrentPage = 1 - paheres, PageLen = await getDataPahe(message, pesan.id, kueri, CurrentPage) + paheres, PageLen = await getDataPahe(pesan, kueri, CurrentPage) keyboard = InlineKeyboard() keyboard.paginate(PageLen, CurrentPage, 'page_pahe#{number}' + f'#{pesan.id}#{message.from_user.id}') keyboard.row( @@ -207,7 +239,7 @@ async def gomov_s(client, message): kueri = "" pesan = await message.reply("Getting data from Gomov Web..") CurrentPage = 1 - gomovres, PageLen = await getDataGomov(message, pesan.id, kueri, CurrentPage) + gomovres, PageLen = await getDataGomov(pesan, kueri, CurrentPage) keyboard = InlineKeyboard() keyboard.paginate(PageLen, CurrentPage, 'page_gomov#{number}' + f'#{pesan.id}#{message.from_user.id}') keyboard.row( @@ -228,7 +260,7 @@ async def terbit21page_callback(client, callback_query): return await callback_query.answer("Invalid callback data, please send CMD again..") try: - terbitres, PageLen = await getDataTerbit21(callback_query.message, message_id, kueri, CurrentPage) + terbitres, PageLen = await getDataTerbit21(callback_query.message, kueri, CurrentPage) except TypeError: return @@ -252,7 +284,7 @@ async def lk21page_callback(client, callback_query): return await callback_query.answer("Invalid callback data, please send CMD again..") try: - lkres, PageLen = await getDatalk21(callback_query.message, message_id, kueri, CurrentPage) + lkres, PageLen = await getDatalk21(callback_query.message, kueri, CurrentPage) except TypeError: return @@ -276,7 +308,7 @@ async def pahepage_callback(client, callback_query): return await callback_query.answer("Invalid callback data, please send CMD again..") try: - lkres, PageLen = await getDataPahe(callback_query.message, message_id, kueri, CurrentPage) + lkres, PageLen = await getDataPahe(callback_query.message, kueri, CurrentPage) except TypeError: return @@ -289,7 +321,7 @@ async def pahepage_callback(client, callback_query): # Gomov Page Callback @app.on_callback_query(filters.create(lambda _, __, query: 'page_gomov#' in query.data)) -async def pahepage_callback(client, callback_query): +async def gomovpage_callback(client, callback_query): if callback_query.from_user.id != int(callback_query.data.split('#')[3]): return await callback_query.answer("Not yours..", True) message_id = int(callback_query.data.split('#')[2]) @@ -300,7 +332,7 @@ async def pahepage_callback(client, callback_query): return await callback_query.answer("Invalid callback data, please send CMD again..") try: - gomovres, PageLen = await getDataGomov(callback_query.message, message_id, kueri, CurrentPage) + gomovres, PageLen = await getDataGomov(callback_query.message, kueri, CurrentPage) except TypeError: return @@ -309,4 +341,123 @@ async def pahepage_callback(client, callback_query): keyboard.row( InlineButton("āŒ Close", f"close#{callback_query.from_user.id}") ) - await editPesan(callback_query.message, gomovres, reply_markup=keyboard) \ No newline at end of file + await editPesan(callback_query.message, gomovres, reply_markup=keyboard) + +### Scrape DDL Link From Web ### + +# Savefilm21 DDL +@app.on_message(filters.command(["savefilm21_scrap"], COMMAND_HANDLER)) +async def savefilm21_scrap(_, message): + try: + link = message.text.split(" ", maxsplit=1)[1] + html = await http.get(link, headers=headers) + soup = BeautifulSoup(html.text, "lxml") + res = soup.find_all(class_="button button-shadow") + res = "".join(f"{i.text}\n{i['href']}\n\n" for i in res) + await message.reply( + f"Hasil Scrap dari {link}:\n\n{res}", + disable_web_page_preview=True, + reply_markup=InlineKeyboardMarkup( + [ + [ + InlineKeyboardButton( + text="āŒ Close", + callback_data=f"close#{message.from_user.id}", + ) + ] + ] + ), + ) + except IndexError: + return await message.reply(f"Gunakan command /{message.command[0]} [link] untuk scrap link download") + except Exception as e: + await message.reply(f"ERROR: {str(e)}") + +# Scrape DDL Link Nodrakor +@app.on_message(filters.command(["nodrakor_scrap"], COMMAND_HANDLER)) +async def nodrakor_scrap(_, message): + try: + link = message.text.split(" ", maxsplit=1)[1] + html = await http.get(link, headers=headers) + soup = BeautifulSoup(html.text, "lxml") + hasil = soup.find_all(class_="gmr-download-wrap clearfix")[0] + await message.reply(f"Hasil Scrap dari {link}:\n{hasil}") + except IndexError: + return await message.reply(f"Gunakan command /{message.command[0]} [link] untuk scrap link download") + except Exception as e: + await message.reply(f"ERROR: {str(e)}") + + +# Scrape Link Download Movieku.CC +@app.on_message(filters.command(["movieku_scrap"], COMMAND_HANDLER)) +async def muviku_scrap(_, message): + try: + link = message.text.split(" ", maxsplit=1)[1] + html = await http.get(link, headers=headers) + soup = BeautifulSoup(html.text, "lxml") + res = soup.find_all(class_="smokeurl") + data = [] + for i in res: + for b in range(len(i.find_all("a"))): + link = i.find_all("a")[b]["href"] + kualitas = i.find_all("a")[b].text + # print(f"{kualitas}\n{link + data.append({"link": link, "kualitas": kualitas}) + if not data: + return await message.reply("Oops, data film tidak ditemukan.") + res = "".join(f"Host: {i['kualitas']}\n{i['link']}\n\n" for i in data) + await message.reply(res) + except IndexError: + return await message.reply(f"Gunakan command /{message.command[0]} [link] untuk scrap link download") + except Exception as e: + await message.reply(f"ERROR: {str(e)}") + +# Scrape DDL Link Melongmovie +@app.on_message(filters.command(["melongmovie_scrap"], COMMAND_HANDLER)) +async def melong_scrap(_, message): + try: + link = message.text.split(" ", maxsplit=1)[1] + headers = {"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"} + + html = await http.get(link, headers=headers) + soup = BeautifulSoup(html.text, "lxml") + for ep in soup.findAll(text=re.compile(r"(?i)episode\s+\d+|LINK DOWNLOAD")): + hardsub = ep.findPrevious("div") + softsub = ep.findNext("div") + rep = f"{hardsub}\n{softsub}" + await message.reply(rep) + except IndexError: + await message.reply(f"Gunakan command /{message.command[0]} [link] untuk scrap link download") + +# Scrape DDL Link Gomov & Zonafilm +@app.on_message(filters.command(["gomov_scrap", "zonafilm_scrap"], COMMAND_HANDLER)) +async def gomov_zonafilm_dl(_, message): + try: + link = message.text.split(" ", maxsplit=1)[1] + headers = {"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"} + + html = await http.get(link, headers=headers) + soup = BeautifulSoup(html.text, "lxml") + entry = soup.find(class_="gmr-download-wrap clearfix") + hasil = soup.find(class_="title-download").text + for i in entry.find(class_="list-inline gmr-download-list clearfix"): + title = i.find("a").text + link = i.find("a")["href"] + hasil += f"\n{title}\n{link}\n" + await message.reply( + hasil, + reply_markup=InlineKeyboardMarkup( + [ + [ + InlineKeyboardButton( + text="āŒ Close", + callback_data=f"close#{message.from_user.id}", + ) + ] + ] + ), + ) + except IndexError: + await message.reply(f"Gunakan command /{message.command[0]} [link] untuk scrap link download") + except Exception as err: + await message.reply(f"ERROR: {err}") \ No newline at end of file