Merge ebd7a47ebe into 82e50c64f0

2025-07-11 00:37:19 -04:00 · 2024-09-26 10:45:52 -04:00
2 changed files with 27 additions and 46 deletions
--- a/mkbsd.py
+++ b/mkbsd.py
@ -1,30 +1,28 @@
 import argparse
-import asyncio
-import json
 import multiprocessing as mp
 import os
 import re
-import time
 import zipfile
 from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
 from urllib.parse import unquote

-import aiohttp
 import imagehash
+import requests
 from PIL import Image


-async def fetch_json_data(url):
-    async with aiohttp.ClientSession() as session:
-        async with session.get(url) as response:
-            if response.status == 200:
-                text = await response.text()
-                try:
-                    return json.loads(text)
-                except json.JSONDecodeError:
-                    raise Exception(f"Failed to parse JSON data from {url}")
-            else:
-                raise Exception(f"Failed to fetch data. Status code: {response.status}")
+# python mkbsd.py [--zip] [--zip-name CUSTOM_NAME] [--remove-duplicates]
+
+
+def fetch_json_data(url):
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        raise Exception(
+            f"Failed to fetch JSON data. Status code: {response.status_code}"
+        )


 def extract_urls(element):
@ -41,27 +39,19 @@ def extract_urls(element):
    return urls


-async def download_file(session, url):
+def download_file(url):
    file_name = os.path.basename(unquote(url.split("?")[0]))
    file_name = clean_filename(file_name)
    file_path = os.path.join("downloads", file_name)
    if not os.path.exists(file_path):
-        try:
-            async with session.get(url) as response:
-                if response.status == 200:
-                    with open(file_path, "wb") as f:
-                        while True:
-                            chunk = await response.content.read(8192)
-                            if not chunk:
-                                break
-                            f.write(chunk)
-                    return f"Downloaded: {file_name}"
-                else:
-                    return f"Failed to download {file_name}: HTTP {response.status}"
-        except Exception as e:
-            return f"Error downloading {file_name}: {str(e)}"
+        print(f"Downloading {url}")
+        response = requests.get(url, stream=True)
+        with open(file_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
    else:
-        return f"Skipped (already exists): {file_name}"
+        print(f"Skipping {url}")
+    return file_path


 def clean_filename(filename):
@ -130,7 +120,7 @@ def remove_duplicates(duplicates):
            print(f"Error removing duplicate: {e}")


-async def main():
+def main():
    parser = argparse.ArgumentParser(
        description="Download images from JSON data and remove duplicates."
    )
@ -151,7 +141,7 @@ async def main():

    json_url = "https://storage.googleapis.com/panels-cdn/data/20240730/all.json"
    try:
-        json_data = await fetch_json_data(json_url)
+        json_data = fetch_json_data(json_url)
    except Exception as e:
        print(f"Error: {e}")
        return
@ -162,16 +152,8 @@ async def main():
    if not os.path.exists("downloads"):
        os.makedirs("downloads")

-    start_time = time.time()
-    async with aiohttp.ClientSession() as session:
-        tasks = [download_file(session, url) for url in urls]
-        for batch in [tasks[i : i + 50] for i in range(0, len(tasks), 50)]:
-            results = await asyncio.gather(*batch)
-            for result in results:
-                print(result)
-
-    end_time = time.time()
-    print(f"Download completed in {end_time - start_time:.2f} seconds")
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        executor.map(download_file, urls)

    if args.remove_duplicates:
        print("Searching for duplicate images...")
@ -190,4 +172,4 @@ async def main():


 if __name__ == "__main__":
-    asyncio.run(main())
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1 @@
 imagehash
-aiohttp