released v1.0.0

Wamy-Dev · Mar 15, 2022 · c9e9456 · c9e9456
1 parent 60e4bd9
commit c9e9456
Show file tree

Hide file tree

Showing 7 changed files with 467 additions and 0 deletions.
diff --git a/Input Data.txt b/Input Data.txt
@@ -0,0 +1,175 @@
+marked
+https://fitgirl-repacks.site/all-my-repacks-a-z
+https://fitgirl-repacks.site/
+ul
+lcp_catlist
+lcp_nextlink
+smart_push_smio_not_allow
+marked
+https://gog-games.com/search/all/1/title/asc/any
+https://gog-games.com
+div
+game-blocks grid-view
+btn
+null
+marked
+https://masquerade.site/a-z/
+https://masquerade.site
+div
+letter-section
+null
+null
+marked
+https://nsw2u.xyz/switch-posts
+https://nsw2u.xyz/
+div
+letter-section
+null
+null
+marked
+https://madloader.com/switch-nsp-games-collection
+https://madloader.com/
+div
+entry-inner
+null
+null
+marked
+https://nxbrew.com/list-of-games/
+https://nxbrew.com/
+div
+letter-section
+null
+null
+marked
+https://www.xcinsp.com/
+https://www.xcinsp.com/
+ul
+dhswp-html-sitemap-post-list dhswp-post-list
+null
+null
+marked
+https://archive.org/download/Sony-Playstation-USA-Redump.org-2019-05-27/
+https://archive.org/download/Sony-Playstation-USA-Redump.org-2019-05-27/
+table
+directory-listing-table
+null
+null
+marked
+https://hexrom.com/roms/playstation/
+https://hexrom.com/
+div
+col-lg-4 col-sm-6 col-xs-12
+next page-numbers
+null
+marked
+https://hexrom.com/roms/playstation-2/
+https://hexrom.com/
+div
+col-lg-4 col-sm-6 col-xs-12
+next page-numbers
+null
+marked
+https://dlpsgame.org/list-all-game-ps2/
+https://dlpsgame.org/
+div
+listing-item
+null
+null
+marked
+https://gamesmountain.com/playstation_3
+https://gamesmountain.com/
+h2
+entry-title
+next page-numbers
+null
+marked
+https://dlpsgame.org/list-all-game-ps3/
+https://dlpsgame.org/
+li
+listing-item
+null
+null
+marked
+https://dlpsgame.org/list-all-game-ps4/
+https://dlpsgame.org/
+li
+listing-item
+null
+null
+marked
+https://dlxbgame.net/list-all-game-xbox-iso/
+https://dlxbgame.net/
+div
+listing-item
+null
+null
+makred
+https://hexrom.com/roms/microsoft-xbox/
+https://hexrom.com/
+div
+col-lg-4 col-sm-6 col-xs-12
+next page-numbers
+null
+marked
+https://gamesmountain.com/xbox_360_game
+https://gamesmountain.com/
+h2
+entry-title
+next page-numbers
+null
+marked
+https://hexrom.com/roms/xbox-360/
+https://hexrom.com/
+div
+col-lg-4 col-sm-6 col-xs-12
+next page-numbers
+null
+marked
+https://nswgame.com/list-all-game-wii/
+https://nswgame.com/
+li
+listing-item
+null
+null
+marked
+https://www.emulatorgames.net/roms/nintendo-wii/
+https://www.emulatorgames.net/
+ul
+site-list
+page-item
+null
+marked
+https://romskingdom.com/en/download-roms/nintendo-wii
+https://romskingdom.com/
+div
+row row-md
+next
+null
+marked
+https://archive.org/download/mame-merged/mame-merged/
+https://archive.org/download/mame-merged/mame-merged/
+table
+directory-listing-table
+null
+null
+marked
+https://ia801800.us.archive.org/view_archive.php?archive=/14/items/2020_01_06_fbn/roms/arcade.zip
+
+table
+archext
+null
+null
+marked
+https://archive.org/download/No-Intro-Collection_2016-01-03_Fixed/
+https://archive.org/download/No-Intro-Collection_2016-01-03_Fixed/
+table
+directory-listing-table
+null
+null
+marked
+https://archive.org/download/tosec-2021-02-14-to-2021-08-08-update/TOSEC%20%282021-02-14%20to%202021-08-08%29/
+https://archive.org/download/tosec-2021-02-14-to-2021-08-08-update/TOSEC%20%282021-02-14%20to%202021-08-08%29/
+table
+directory-listing-table
+null
+null
diff --git a/cleaner.py b/cleaner.py
@@ -0,0 +1,30 @@
+import json
+#loads json file
+try:
+  json_file = open("output.json")
+  strings = json.load(json_file)
+except:
+  print('Json file to be cleaned not found.')
+#Single version, used best on a single key or target
+#target_key = "switch-1-105518"
+#target_string = "https://nsw2u.xyz/aaa-clock-switch-nsp"
+#while True:#user input version
+#    target_key = s if (s:=input('Enter Key (leave blank to repeat previous):')) else target_string
+#    if target_key == 'q':
+#        print('Stopping and Saving'):
+#        break
+#    target_string = s if (s:=input('Enter String (leave blank to repeat previous):')) else target_string
+#    strings[target_key] = [s for s in strings[target_key] if s != target_string]
+
+#Automated version, used on multiple keys and targets
+try: 
+  targets = {'marked':['https://masquerade.site#a-z-listing-1'],'marked':['https://nsw2u.xyz/#a-z-listing-2'],'marked':['https://madloader.com/request/'],'marked':['https://nxbrew.com/#a-z-listing-1'],'marked':['https://archive.org/download/mame-merged/mame-merged/../']}#add pairs here, or structure differently up to you format: 'key2':['target1','target2']
+  for target_key,target_strings in targets.items():
+      strings[target_key] = [s for s in strings[target_key] if s not in target_strings]
+  with open("outputcleaned.json", "w", encoding="utf-8") as file:#dumps to new json file to be used in totable.py
+    file.write(
+    json.dumps(strings)
+    )
+except:
+  print('Key(s) marked for cleaning nonexistent, completed.')
+import forsearch
diff --git a/extension_1_38_0_0.crx b/extension_1_38_0_0.crx
diff --git a/forsearch.py b/forsearch.py
@@ -0,0 +1,57 @@
+import json
+import random
+import string
+def listToString(s): 
+    # initialize an empty string
+    str1 = "" 
+    count=0
+    # traverse in the string  
+    for ele in s: 
+        str1 += ele
+        count+=1
+        if count==len(s):
+          continue
+        else:
+          str1+=" "  
+    # return string  
+    return str1 
+input_file =  'outputcleaned.json'  # input file 
+output_file = 'outputsearchready.json'
+# Opening JSON file
+f = open(input_file)
+N=10  # ID length 
+count= 0 
+dic= {}  # to store overal output
+data = json.load(f) 
+for k in data.keys():
+  key = k
+  lst= []
+  count+=1
+  for sub_k in data[k]: #  access each entry 
+    ID= ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N)) # generating IDs of N length
+    j = sub_k.split("/")[-1]
+    if j=='':
+      j= sub_k.split("/")[-2]
+    name=None
+    if "-" in j:
+      name = j.split("-")
+    else:
+      name= j.split("_")
+    name = [nam.title() for nam in name ]
+    name = listToString(name)
+    # print(sub_k.replace("https://",""), j, name)
+    lst.append({"id":ID, "basename":name.replace("%",""),"link":sub_k.replace("https://","")})
+  dic[key] = lst
+  # comment next two lines, if you want output for all objects. This 'IF' is just for three objects to check output
+  #if count==3:
+  #  break
+with open(output_file, "w") as outfile:
+    json.dump(dic, outfile)
+json_file = open("outputsearchready.json")
+file = json.load(json_file)
+data = file.pop('marked')
+with open("outputsearchready.json", "w", encoding="utf-8") as file:#dumps to new json file to be used in totable.py
+  file.write(
+  json.dumps(data)
+  )
+import sendtosearch
diff --git a/grabber.py b/grabber.py
@@ -0,0 +1,112 @@
+#Libraries and importing
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+from selenium.common.exceptions import TimeoutException
+import random
+import json
+import time
+import decouple
+from decouple import config
+
+SELENIUMCLIENT = config('SELENIUMCLIENT')
+#starting timer
+print('starting process')
+#setting up chrome settings
+uc = webdriver
+chrome_options = webdriver.ChromeOptions()
+#chrome_options.add_argument('--headless') #remove hashtag at the start to run in headless mode, must also remove extension for this to work, not recommended
+chrome_options.add_extension('extension_1_38_0_0.crx')
+chrome_options.add_argument('--no-sandbox') 
+chrome_options.add_argument('--disable-dev-shm-usage')
+chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36")
+#wd = uc.Chrome(executable_path='chromedriver',options=chrome_options) #if local
+wd = uc.Remote(SELENIUMCLIENT, options=chrome_options) #if for remote
+json_data={}
+#getting the links and setting up json
+def link_container(site_name,container_tag,class_tag,html,domain):
+    soup=BeautifulSoup(html,'html.parser')
+    containers=soup.find_all(container_tag,class_=class_tag)
+    for container in containers:
+        links=container.find_all("a")
+        for link in links:
+            if(domain not in link['href']):  
+                json_data[name].append(domain+link['href'])
+            else:
+                json_data[name].append(link['href'])
+            #print(link['href'])#add domain+link
+#to getting the html of webpage
+def request_page(url):
+    wd.get(url)
+    time.sleep(5)
+    return wd.page_source
+#to get the next element
+def return_next_ele(html,check_element,next_page):
+    if(check_element != "null"):
+        temp=[]
+        retries = 1
+        while retries <= 1:
+            try:
+                temp = WebDriverWait(wd,10).until(EC.element_to_be_clickable((By.XPATH, '//button[@id="{}"]'.format(check_element))))
+                temp.click()
+                break
+            except TimeoutException:
+                retries += 1
+    next=[]
+    retries = 1
+    while retries <= 2:
+        try:
+            next = WebDriverWait(wd,10).until(EC.element_to_be_clickable((By.XPATH, '//a[@class="{}"]'.format(next_page))))
+            next = wd.find_elements(By.XPATH,'//a[@class="{}"]'.format(next_page))[-1]
+            break
+        except TimeoutException:
+            retries += 1
+    # next=wd.find_element(By.XPATH,'//a[@class="lcp_nextlink"]')
+    return next
+#getting data from input file
+input_file=open('Input Data.txt','r')
+name=input_file.readline().replace("\n","")
+json_data[name]=[]
+while (True):
+    url=input_file.readline().replace("\n","")
+    domain=input_file.readline().replace("\n","")
+    container=input_file.readline().replace("\n","")
+    clas=input_file.readline().replace("\n","")
+    next_link=input_file.readline().replace("\n","")
+    bypass=input_file.readline().replace("\n","")
+    #getting page and getting links for output file
+    html=request_page(url)
+    next=return_next_ele(html,bypass,next_link)
+    index=0
+    old_url=""
+    current_url=wd.current_url
+    while(next is not None and next is not  []):
+        if(old_url == current_url):
+            break
+        else:
+            old_url=current_url
+        index+=1
+        link_container(name,container,clas,wd.page_source,domain)
+        time.sleep(random.randint(1,5))
+        if(next == []):
+            break
+        next.click()
+        html=wd.page_source
+        next=return_next_ele(html,bypass,next_link)
+        current_url=wd.current_url
+    name=input_file.readline().replace("\n","")
+    if(name not in json_data):
+        json_data[name]=[]
+    if not name: break
+input_file.close()
+#outputting the data to Output.json
+output_file=open("output.json","w")
+json_string=json.dumps(json_data)
+output_file.write(json_string)
+output_file.close()
+wd.close()
+import cleaner
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,11 @@
+beautifulsoup4 == 4.10.0
+selenium == 4.1.0
+undetected-chromedriver == 3.0.6
+requests-html == 0.10.0
+output == 1.0.1
+json2table
+pysftp
+requests
+meilisearch
+pysftp
+python-decouple