ebook2audiobook / download_github_folder.py
drewThomasson's picture
Update download_github_folder.py
8407240 verified
raw
history blame
1.88 kB
import os
import requests
from bs4 import BeautifulSoup
# GitHub folder URL
BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices"
RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices"
# Directory to save downloaded files
DOWNLOAD_DIR = "voices"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
def get_file_links(folder_url):
"""Get the raw file links from a GitHub folder."""
response = requests.get(folder_url)
if response.status_code != 200:
print(f"Failed to fetch folder content: {response.status_code}")
return []
soup = BeautifulSoup(response.text, 'html.parser')
links = []
for a_tag in soup.find_all('a', class_='js-navigation-open'):
href = a_tag.get('href')
if href and '/blob/' in href: # File link
file_path = href.split('/blob/')[-1]
links.append(f"{RAW_BASE_URL}/{file_path}")
return links
def download_file(url, save_dir):
"""Download a file from a URL to the specified directory."""
local_filename = os.path.join(save_dir, url.split('/')[-1])
with requests.get(url, stream=True) as response:
if response.status_code == 200:
with open(local_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded: {local_filename}")
else:
print(f"Failed to download {url}: {response.status_code}")
def main():
print("Fetching file links...")
file_links = get_file_links(BASE_URL)
if not file_links:
print("No files found to download.")
return
print("Downloading files...")
for file_link in file_links:
download_file(file_link, DOWNLOAD_DIR)
# Ensure the script runs only when executed directly
if __name__ == "__main__":
main()