Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from bs4 import BeautifulSoup | |
# GitHub folder URL | |
BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices" | |
RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices" | |
# Directory to save downloaded files | |
DOWNLOAD_DIR = "voices" | |
os.makedirs(DOWNLOAD_DIR, exist_ok=True) | |
def get_file_links(folder_url): | |
"""Get the raw file links from a GitHub folder.""" | |
response = requests.get(folder_url) | |
if response.status_code != 200: | |
print(f"Failed to fetch folder content: {response.status_code}") | |
return [] | |
soup = BeautifulSoup(response.text, 'html.parser') | |
links = [] | |
for a_tag in soup.find_all('a', class_='js-navigation-open'): | |
href = a_tag.get('href') | |
if href and '/blob/' in href: # File link | |
file_path = href.split('/blob/')[-1] | |
links.append(f"{RAW_BASE_URL}/{file_path}") | |
return links | |
def download_file(url, save_dir): | |
"""Download a file from a URL to the specified directory.""" | |
local_filename = os.path.join(save_dir, url.split('/')[-1]) | |
with requests.get(url, stream=True) as response: | |
if response.status_code == 200: | |
with open(local_filename, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
print(f"Downloaded: {local_filename}") | |
else: | |
print(f"Failed to download {url}: {response.status_code}") | |
def main(): | |
print("Fetching file links...") | |
file_links = get_file_links(BASE_URL) | |
if not file_links: | |
print("No files found to download.") | |
return | |
print("Downloading files...") | |
for file_link in file_links: | |
download_file(file_link, DOWNLOAD_DIR) | |
# Ensure the script runs only when executed directly | |
if __name__ == "__main__": | |
main() | |