Skip to content

Commit

Permalink
Update TensorScraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
popcord authored Aug 14, 2024
1 parent 51ea4bc commit e2fffed
Showing 1 changed file with 38 additions and 8 deletions.
46 changes: 38 additions & 8 deletions TensorScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
while True:
os.system("cls || clear")
# Prompt user for selection
TO_SCRAP = input("What do you want to scrap?\n1 - Models/Checkpoints\n2 - LORAs\n>>> ")
TO_SCRAP = input("What do you want to scrap?\n1 - Models/Checkpoints\n2 - LORAs\n3 - DORAs\n4 - LOCONs\n5 - LYCORYS\n6 - EMBEDDING\n>>> ")

# Process user input
if TO_SCRAP == "1":
Expand All @@ -21,10 +21,30 @@
TO_SCRAP = "LORA"
JSON_FILENAME = "loras_data.json" # Update JSON filename for LORAs
break
elif TO_SCRAP == "3":
TO_SCRAP = "DORA"
JSON_FILENAME = "doras_data.json" # Update JSON filename for DORAs
break
elif TO_SCRAP == "4":
TO_SCRAP = "LOCON"
JSON_FILENAME = "locons_data.json" # Update JSON filename for LOCONs
break
elif TO_SCRAP == "5":
TO_SCRAP = "LYCORIS"
JSON_FILENAME = "lycoris_data.json" # Update JSON filename for LYCORIS
break
elif TO_SCRAP == "6":
TO_SCRAP = "EMBEDDING"
JSON_FILENAME = "embedding_data.json" # Update JSON filename for EMBEDDING
break
else:
print("Invalid input. Please select only 1 or 2")
print("Invalid input. Please select only between 1-6")
time.sleep(3)


# How to use
print("How to use: To find what you're looking for, simply scroll until you find something that suits your needs. For more precise results, you can also use research tools, tags, and filters available on the website.\n"+"="*10)



def save_webpage(url, filename, stype):
Expand All @@ -44,7 +64,7 @@ def save_webpage(url, filename, stype):
driver.get(url)

# Wait for user confirmation
input("#"*50+"\nPress Enter when ready to save the webpage...\n")
input("#"*50+"\nPress Enter when you are ready to save the webpage...\n")
time.sleep(WAIT_TIME) # Let the page load completely

# Get the page source
Expand All @@ -70,12 +90,14 @@ def parse_html_to_json(html_content):
with open(JSON_FILENAME, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
else:
data = {'SDXL': {}, 'SD': {}}
data = {'SD': {}, 'SDXL': {}, 'SD3': {}, 'Kolors': {}, 'HunyuanDiT': {}}

# Find all <a> tags
for a_tag in soup.find_all('a', class_='group'):
href = a_tag.get('href')
model_id = href.split('/')[-1] if href else None
model_id = href.split('/')[-2] if href else None
if model_id is not None and not model_id.isdigit():
model_id = href.split('/')[-1] if href else None

h3_tag = a_tag.find('h3')
if h3_tag:
Expand All @@ -92,10 +114,19 @@ def parse_html_to_json(html_content):

if div_tag and " XL " in div_tag.text:
if model_name not in data['SDXL']:
data['SDXL'][model_name] = f"https://tensor.art/models/{model_id}"
data['SDXL'][model_name] = f"{model_id}"
elif div_tag and " SD3 " in div_tag.text:
if model_name not in data['SD3']:
data['SD3'][model_name] = f"{model_id}"
elif div_tag and " Kolors " in div_tag.text:
if model_name not in data['Kolors']:
data['SD3'][model_name] = f"{model_id}"
elif div_tag and " HunyuanDiT " in div_tag.text:
if model_name not in data['HunyuanDiT']:
data['SD3'][model_name] = f"{model_id}"
else:
if model_name not in data['SD']:
data['SD'][model_name] = f"https://tensor.art/models/{model_id}"
data['SD'][model_name] = f"{model_id}"


return data
Expand Down Expand Up @@ -128,7 +159,6 @@ def main():
os.system("cls || clear")
if os.path.exists(HTML_FILENAME):
os.remove(HTML_FILENAME)
print(data)
input("Press Enter to exit...")


Expand Down

0 comments on commit e2fffed

Please sign in to comment.