This commit is contained in:
Andre Saddler 2024-09-27 01:35:35 -04:00 committed by GitHub
commit 54d206cd5d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 182 additions and 63 deletions

243
mkbsd.py
View file

@ -1,76 +1,193 @@
# Licensed under the WTFPL License
import os
import time
import aiohttp
import argparse
import asyncio
from urllib.parse import urlparse
url = 'https://storage.googleapis.com/panels-api/data/20240916/media-1a-i-p~s'
import json
import multiprocessing as mp
import os
import re
import time
import zipfile
from collections import defaultdict
from urllib.parse import unquote
async def delay(ms):
await asyncio.sleep(ms / 1000)
import aiohttp
import imagehash
from PIL import Image
async def download_image(session, image_url, file_path):
async def fetch_json_data(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
text = await response.text()
try:
return json.loads(text)
except json.JSONDecodeError:
raise Exception(f"Failed to parse JSON data from {url}")
else:
raise Exception(f"Failed to fetch data. Status code: {response.status}")
def extract_urls(element):
urls = []
if isinstance(element, dict):
for key, value in element.items():
if key == "url":
urls.append(value)
else:
urls.extend(extract_urls(value))
elif isinstance(element, list):
for item in element:
urls.extend(extract_urls(item))
return urls
async def download_file(session, url):
file_name = os.path.basename(unquote(url.split("?")[0]))
file_name = clean_filename(file_name)
file_path = os.path.join("downloads", file_name)
if not os.path.exists(file_path):
try:
async with session.get(url) as response:
if response.status == 200:
with open(file_path, "wb") as f:
while True:
chunk = await response.content.read(8192)
if not chunk:
break
f.write(chunk)
return f"Downloaded: {file_name}"
else:
return f"Failed to download {file_name}: HTTP {response.status}"
except Exception as e:
return f"Error downloading {file_name}: {str(e)}"
else:
return f"Skipped (already exists): {file_name}"
def clean_filename(filename):
sanitized_name = filename.replace("~", " ")
sanitized_name = re.sub(r'[<>:"/\\|?*]', "_", sanitized_name)
sanitized_name = re.sub(r"[\s_]+", " ", sanitized_name).strip()
return sanitized_name
def zip_directory(path, zip_name):
with zipfile.ZipFile(zip_name, "w", zipfile.ZIP_DEFLATED) as zipf:
for root, _, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, path)
zipf.write(file_path, arcname)
print(f"Created zip file: {zip_name}")
def compute_hash(filepath):
try:
async with session.get(image_url) as response:
if response.status != 200:
raise Exception(f"Failed to download image: {response.status}")
content = await response.read()
with open(file_path, 'wb') as f:
f.write(content)
with Image.open(filepath) as img:
return imagehash.phash(img, hash_size=8), filepath
except Exception as e:
print(f"Error downloading image: {str(e)}")
print(f"Error processing {filepath}: {e}")
return None
def find_duplicate_images(directory, threshold=2):
image_files = [
os.path.join(directory, f)
for f in os.listdir(directory)
if f.lower().endswith((".jpg", ".jpeg", ".png"))
]
image_files.sort(key=os.path.getsize)
with mp.Pool(mp.cpu_count()) as pool:
results = pool.map(compute_hash, image_files)
hash_groups = defaultdict(list)
for result in filter(None, results):
hash_value, filepath = result
hash_groups[hash_value].append(filepath)
duplicates = []
for hash_value, filepaths in hash_groups.items():
if len(filepaths) > 1:
for i in range(len(filepaths)):
for j in range(i + 1, len(filepaths)):
duplicates.append((filepaths[i], filepaths[j]))
return duplicates
def remove_duplicates(duplicates):
for image1, image2 in duplicates:
try:
if os.path.getsize(image1) < os.path.getsize(image2):
os.remove(image1)
print(f"Removed duplicate: {image1}")
else:
os.remove(image2)
print(f"Removed duplicate: {image2}")
except Exception as e:
print(f"Error removing duplicate: {e}")
async def main():
parser = argparse.ArgumentParser(
description="Download images from JSON data and remove duplicates."
)
parser.add_argument(
"--zip", action="store_true", help="Create a zip file of the downloaded images"
)
parser.add_argument(
"--zip-name",
type=str,
help="Custom name for the zip file (default: downloads.zip)",
)
parser.add_argument(
"--remove-duplicates",
action="store_true",
help="Remove duplicate images after download",
)
args = parser.parse_args()
json_url = "https://storage.googleapis.com/panels-cdn/data/20240730/all.json"
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status != 200:
raise Exception(f"⛔ Failed to fetch JSON file: {response.status}")
json_data = await response.json()
data = json_data.get('data')
if not data:
raise Exception('⛔ JSON does not have a "data" property at its root.')
download_dir = os.path.join(os.getcwd(), 'downloads')
if not os.path.exists(download_dir):
os.makedirs(download_dir)
print(f"📁 Created directory: {download_dir}")
file_index = 1
for key, subproperty in data.items():
if subproperty and subproperty.get('dhd'):
image_url = subproperty['dhd']
print(f"🔍 Found image URL!")
parsed_url = urlparse(image_url)
ext = os.path.splitext(parsed_url.path)[-1] or '.jpg'
filename = f"{file_index}{ext}"
file_path = os.path.join(download_dir, filename)
await download_image(session, image_url, file_path)
print(f"🖼️ Saved image to {file_path}")
file_index += 1
await delay(250)
json_data = await fetch_json_data(json_url)
except Exception as e:
print(f"Error: {str(e)}")
print(f"Error: {e}")
return
urls = extract_urls(json_data)
print(f"Found {len(urls)} URLs")
if not os.path.exists("downloads"):
os.makedirs("downloads")
start_time = time.time()
async with aiohttp.ClientSession() as session:
tasks = [download_file(session, url) for url in urls]
for batch in [tasks[i : i + 50] for i in range(0, len(tasks), 50)]:
results = await asyncio.gather(*batch)
for result in results:
print(result)
end_time = time.time()
print(f"Download completed in {end_time - start_time:.2f} seconds")
if args.remove_duplicates:
print("Searching for duplicate images...")
duplicates = find_duplicate_images("downloads")
if duplicates:
print(f"Found {len(duplicates)} pairs of duplicate images.")
remove_duplicates(duplicates)
else:
print("No duplicate images found.")
if args.zip:
zip_name = args.zip_name if args.zip_name else "downloads.zip"
if not zip_name.endswith(".zip"):
zip_name += ".zip"
zip_directory("downloads", zip_name)
def ascii_art():
print("""
/$$ /$$ /$$ /$$ /$$$$$$$ /$$$$$$ /$$$$$$$
| $$$ /$$$| $$ /$$/| $$__ $$ /$$__ $$| $$__ $$
| $$$$ /$$$$| $$ /$$/ | $$ \\ $$| $$ \\__/| $$ \\ $$
| $$ $$/$$ $$| $$$$$/ | $$$$$$$ | $$$$$$ | $$ | $$
| $$ $$$| $$| $$ $$ | $$__ $$ \\____ $$| $$ | $$
| $$\\ $ | $$| $$\\ $$ | $$ \\ $$ /$$ \\ $$| $$ | $$
| $$ \\/ | $$| $$ \\ $$| $$$$$$$/| $$$$$$/| $$$$$$$/
|__/ |__/|__/ \\__/|_______/ \\______/ |_______/""")
print("")
print("🤑 Starting downloads from your favorite sellout grifter's wallpaper app...")
if __name__ == "__main__":
ascii_art()
time.sleep(5)
asyncio.run(main())

2
requirements.txt Normal file
View file

@ -0,0 +1,2 @@
imagehash
aiohttp