import os import requests from bs4 import BeautifulSoup # GitHub folder URL BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices" RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices" # Directory to save downloaded files DOWNLOAD_DIR = "voices" os.makedirs(DOWNLOAD_DIR, exist_ok=True) def get_file_links(folder_url): """Get the raw file links from a GitHub folder.""" response = requests.get(folder_url) if response.status_code != 200: print(f"Failed to fetch folder content: {response.status_code}") return [] soup = BeautifulSoup(response.text, 'html.parser') links = [] for a_tag in soup.find_all('a', class_='js-navigation-open'): href = a_tag.get('href') if href and '/blob/' in href: # File link file_path = href.split('/blob/')[-1] links.append(f"{RAW_BASE_URL}/{file_path}") return links def download_file(url, save_dir): """Download a file from a URL to the specified directory.""" local_filename = os.path.join(save_dir, url.split('/')[-1]) with requests.get(url, stream=True) as response: if response.status_code == 200: with open(local_filename, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print(f"Downloaded: {local_filename}") else: print(f"Failed to download {url}: {response.status_code}") def main(): print("Fetching file links...") file_links = get_file_links(BASE_URL) if not file_links: print("No files found to download.") return print("Downloading files...") for file_link in file_links: download_file(file_link, DOWNLOAD_DIR) # Ensure the script runs only when executed directly if __name__ == "__main__": main()