-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
467 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
marked | ||
https://fitgirl-repacks.site/all-my-repacks-a-z | ||
https://fitgirl-repacks.site/ | ||
ul | ||
lcp_catlist | ||
lcp_nextlink | ||
smart_push_smio_not_allow | ||
marked | ||
https://gog-games.com/search/all/1/title/asc/any | ||
https://gog-games.com | ||
div | ||
game-blocks grid-view | ||
btn | ||
null | ||
marked | ||
https://masquerade.site/a-z/ | ||
https://masquerade.site | ||
div | ||
letter-section | ||
null | ||
null | ||
marked | ||
https://nsw2u.xyz/switch-posts | ||
https://nsw2u.xyz/ | ||
div | ||
letter-section | ||
null | ||
null | ||
marked | ||
https://madloader.com/switch-nsp-games-collection | ||
https://madloader.com/ | ||
div | ||
entry-inner | ||
null | ||
null | ||
marked | ||
https://nxbrew.com/list-of-games/ | ||
https://nxbrew.com/ | ||
div | ||
letter-section | ||
null | ||
null | ||
marked | ||
https://www.xcinsp.com/ | ||
https://www.xcinsp.com/ | ||
ul | ||
dhswp-html-sitemap-post-list dhswp-post-list | ||
null | ||
null | ||
marked | ||
https://archive.org/download/Sony-Playstation-USA-Redump.org-2019-05-27/ | ||
https://archive.org/download/Sony-Playstation-USA-Redump.org-2019-05-27/ | ||
table | ||
directory-listing-table | ||
null | ||
null | ||
marked | ||
https://hexrom.com/roms/playstation/ | ||
https://hexrom.com/ | ||
div | ||
col-lg-4 col-sm-6 col-xs-12 | ||
next page-numbers | ||
null | ||
marked | ||
https://hexrom.com/roms/playstation-2/ | ||
https://hexrom.com/ | ||
div | ||
col-lg-4 col-sm-6 col-xs-12 | ||
next page-numbers | ||
null | ||
marked | ||
https://dlpsgame.org/list-all-game-ps2/ | ||
https://dlpsgame.org/ | ||
div | ||
listing-item | ||
null | ||
null | ||
marked | ||
https://gamesmountain.com/playstation_3 | ||
https://gamesmountain.com/ | ||
h2 | ||
entry-title | ||
next page-numbers | ||
null | ||
marked | ||
https://dlpsgame.org/list-all-game-ps3/ | ||
https://dlpsgame.org/ | ||
li | ||
listing-item | ||
null | ||
null | ||
marked | ||
https://dlpsgame.org/list-all-game-ps4/ | ||
https://dlpsgame.org/ | ||
li | ||
listing-item | ||
null | ||
null | ||
marked | ||
https://dlxbgame.net/list-all-game-xbox-iso/ | ||
https://dlxbgame.net/ | ||
div | ||
listing-item | ||
null | ||
null | ||
makred | ||
https://hexrom.com/roms/microsoft-xbox/ | ||
https://hexrom.com/ | ||
div | ||
col-lg-4 col-sm-6 col-xs-12 | ||
next page-numbers | ||
null | ||
marked | ||
https://gamesmountain.com/xbox_360_game | ||
https://gamesmountain.com/ | ||
h2 | ||
entry-title | ||
next page-numbers | ||
null | ||
marked | ||
https://hexrom.com/roms/xbox-360/ | ||
https://hexrom.com/ | ||
div | ||
col-lg-4 col-sm-6 col-xs-12 | ||
next page-numbers | ||
null | ||
marked | ||
https://nswgame.com/list-all-game-wii/ | ||
https://nswgame.com/ | ||
li | ||
listing-item | ||
null | ||
null | ||
marked | ||
https://www.emulatorgames.net/roms/nintendo-wii/ | ||
https://www.emulatorgames.net/ | ||
ul | ||
site-list | ||
page-item | ||
null | ||
marked | ||
https://romskingdom.com/en/download-roms/nintendo-wii | ||
https://romskingdom.com/ | ||
div | ||
row row-md | ||
next | ||
null | ||
marked | ||
https://archive.org/download/mame-merged/mame-merged/ | ||
https://archive.org/download/mame-merged/mame-merged/ | ||
table | ||
directory-listing-table | ||
null | ||
null | ||
marked | ||
https://ia801800.us.archive.org/view_archive.php?archive=/14/items/2020_01_06_fbn/roms/arcade.zip | ||
|
||
table | ||
archext | ||
null | ||
null | ||
marked | ||
https://archive.org/download/No-Intro-Collection_2016-01-03_Fixed/ | ||
https://archive.org/download/No-Intro-Collection_2016-01-03_Fixed/ | ||
table | ||
directory-listing-table | ||
null | ||
null | ||
marked | ||
https://archive.org/download/tosec-2021-02-14-to-2021-08-08-update/TOSEC%20%282021-02-14%20to%202021-08-08%29/ | ||
https://archive.org/download/tosec-2021-02-14-to-2021-08-08-update/TOSEC%20%282021-02-14%20to%202021-08-08%29/ | ||
table | ||
directory-listing-table | ||
null | ||
null |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import json | ||
#loads json file | ||
try: | ||
json_file = open("output.json") | ||
strings = json.load(json_file) | ||
except: | ||
print('Json file to be cleaned not found.') | ||
#Single version, used best on a single key or target | ||
#target_key = "switch-1-105518" | ||
#target_string = "https://nsw2u.xyz/aaa-clock-switch-nsp" | ||
#while True:#user input version | ||
# target_key = s if (s:=input('Enter Key (leave blank to repeat previous):')) else target_string | ||
# if target_key == 'q': | ||
# print('Stopping and Saving'): | ||
# break | ||
# target_string = s if (s:=input('Enter String (leave blank to repeat previous):')) else target_string | ||
# strings[target_key] = [s for s in strings[target_key] if s != target_string] | ||
|
||
#Automated version, used on multiple keys and targets | ||
try: | ||
targets = {'marked':['https://masquerade.site#a-z-listing-1'],'marked':['https://nsw2u.xyz/#a-z-listing-2'],'marked':['https://madloader.com/request/'],'marked':['https://nxbrew.com/#a-z-listing-1'],'marked':['https://archive.org/download/mame-merged/mame-merged/../']}#add pairs here, or structure differently up to you format: 'key2':['target1','target2'] | ||
for target_key,target_strings in targets.items(): | ||
strings[target_key] = [s for s in strings[target_key] if s not in target_strings] | ||
with open("outputcleaned.json", "w", encoding="utf-8") as file:#dumps to new json file to be used in totable.py | ||
file.write( | ||
json.dumps(strings) | ||
) | ||
except: | ||
print('Key(s) marked for cleaning nonexistent, completed.') | ||
import forsearch |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import json | ||
import random | ||
import string | ||
def listToString(s): | ||
# initialize an empty string | ||
str1 = "" | ||
count=0 | ||
# traverse in the string | ||
for ele in s: | ||
str1 += ele | ||
count+=1 | ||
if count==len(s): | ||
continue | ||
else: | ||
str1+=" " | ||
# return string | ||
return str1 | ||
input_file = 'outputcleaned.json' # input file | ||
output_file = 'outputsearchready.json' | ||
# Opening JSON file | ||
f = open(input_file) | ||
N=10 # ID length | ||
count= 0 | ||
dic= {} # to store overal output | ||
data = json.load(f) | ||
for k in data.keys(): | ||
key = k | ||
lst= [] | ||
count+=1 | ||
for sub_k in data[k]: # access each entry | ||
ID= ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N)) # generating IDs of N length | ||
j = sub_k.split("/")[-1] | ||
if j=='': | ||
j= sub_k.split("/")[-2] | ||
name=None | ||
if "-" in j: | ||
name = j.split("-") | ||
else: | ||
name= j.split("_") | ||
name = [nam.title() for nam in name ] | ||
name = listToString(name) | ||
# print(sub_k.replace("https://",""), j, name) | ||
lst.append({"id":ID, "basename":name.replace("%",""),"link":sub_k.replace("https://","")}) | ||
dic[key] = lst | ||
# comment next two lines, if you want output for all objects. This 'IF' is just for three objects to check output | ||
#if count==3: | ||
# break | ||
with open(output_file, "w") as outfile: | ||
json.dump(dic, outfile) | ||
json_file = open("outputsearchready.json") | ||
file = json.load(json_file) | ||
data = file.pop('marked') | ||
with open("outputsearchready.json", "w", encoding="utf-8") as file:#dumps to new json file to be used in totable.py | ||
file.write( | ||
json.dumps(data) | ||
) | ||
import sendtosearch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#Libraries and importing | ||
from bs4 import BeautifulSoup | ||
from selenium import webdriver | ||
from selenium.webdriver.chrome.options import Options | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | ||
from selenium.common.exceptions import TimeoutException | ||
import random | ||
import json | ||
import time | ||
import decouple | ||
from decouple import config | ||
|
||
SELENIUMCLIENT = config('SELENIUMCLIENT') | ||
#starting timer | ||
print('starting process') | ||
#setting up chrome settings | ||
uc = webdriver | ||
chrome_options = webdriver.ChromeOptions() | ||
#chrome_options.add_argument('--headless') #remove hashtag at the start to run in headless mode, must also remove extension for this to work, not recommended | ||
chrome_options.add_extension('extension_1_38_0_0.crx') | ||
chrome_options.add_argument('--no-sandbox') | ||
chrome_options.add_argument('--disable-dev-shm-usage') | ||
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36") | ||
#wd = uc.Chrome(executable_path='chromedriver',options=chrome_options) #if local | ||
wd = uc.Remote(SELENIUMCLIENT, options=chrome_options) #if for remote | ||
json_data={} | ||
#getting the links and setting up json | ||
def link_container(site_name,container_tag,class_tag,html,domain): | ||
soup=BeautifulSoup(html,'html.parser') | ||
containers=soup.find_all(container_tag,class_=class_tag) | ||
for container in containers: | ||
links=container.find_all("a") | ||
for link in links: | ||
if(domain not in link['href']): | ||
json_data[name].append(domain+link['href']) | ||
else: | ||
json_data[name].append(link['href']) | ||
#print(link['href'])#add domain+link | ||
#to getting the html of webpage | ||
def request_page(url): | ||
wd.get(url) | ||
time.sleep(5) | ||
return wd.page_source | ||
#to get the next element | ||
def return_next_ele(html,check_element,next_page): | ||
if(check_element != "null"): | ||
temp=[] | ||
retries = 1 | ||
while retries <= 1: | ||
try: | ||
temp = WebDriverWait(wd,10).until(EC.element_to_be_clickable((By.XPATH, '//button[@id="{}"]'.format(check_element)))) | ||
temp.click() | ||
break | ||
except TimeoutException: | ||
retries += 1 | ||
next=[] | ||
retries = 1 | ||
while retries <= 2: | ||
try: | ||
next = WebDriverWait(wd,10).until(EC.element_to_be_clickable((By.XPATH, '//a[@class="{}"]'.format(next_page)))) | ||
next = wd.find_elements(By.XPATH,'//a[@class="{}"]'.format(next_page))[-1] | ||
break | ||
except TimeoutException: | ||
retries += 1 | ||
# next=wd.find_element(By.XPATH,'//a[@class="lcp_nextlink"]') | ||
return next | ||
#getting data from input file | ||
input_file=open('Input Data.txt','r') | ||
name=input_file.readline().replace("\n","") | ||
json_data[name]=[] | ||
while (True): | ||
url=input_file.readline().replace("\n","") | ||
domain=input_file.readline().replace("\n","") | ||
container=input_file.readline().replace("\n","") | ||
clas=input_file.readline().replace("\n","") | ||
next_link=input_file.readline().replace("\n","") | ||
bypass=input_file.readline().replace("\n","") | ||
#getting page and getting links for output file | ||
html=request_page(url) | ||
next=return_next_ele(html,bypass,next_link) | ||
index=0 | ||
old_url="" | ||
current_url=wd.current_url | ||
while(next is not None and next is not []): | ||
if(old_url == current_url): | ||
break | ||
else: | ||
old_url=current_url | ||
index+=1 | ||
link_container(name,container,clas,wd.page_source,domain) | ||
time.sleep(random.randint(1,5)) | ||
if(next == []): | ||
break | ||
next.click() | ||
html=wd.page_source | ||
next=return_next_ele(html,bypass,next_link) | ||
current_url=wd.current_url | ||
name=input_file.readline().replace("\n","") | ||
if(name not in json_data): | ||
json_data[name]=[] | ||
if not name: break | ||
input_file.close() | ||
#outputting the data to Output.json | ||
output_file=open("output.json","w") | ||
json_string=json.dumps(json_data) | ||
output_file.write(json_string) | ||
output_file.close() | ||
wd.close() | ||
import cleaner |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
beautifulsoup4 == 4.10.0 | ||
selenium == 4.1.0 | ||
undetected-chromedriver == 3.0.6 | ||
requests-html == 0.10.0 | ||
output == 1.0.1 | ||
json2table | ||
pysftp | ||
requests | ||
meilisearch | ||
pysftp | ||
python-decouple |
Oops, something went wrong.