Spaces:
Sleeping
Sleeping
File size: 1,878 Bytes
6db045e 8407240 6db045e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import requests
from bs4 import BeautifulSoup
# GitHub folder URL
BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices"
RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices"
# Directory to save downloaded files
DOWNLOAD_DIR = "voices"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
def get_file_links(folder_url):
"""Get the raw file links from a GitHub folder."""
response = requests.get(folder_url)
if response.status_code != 200:
print(f"Failed to fetch folder content: {response.status_code}")
return []
soup = BeautifulSoup(response.text, 'html.parser')
links = []
for a_tag in soup.find_all('a', class_='js-navigation-open'):
href = a_tag.get('href')
if href and '/blob/' in href: # File link
file_path = href.split('/blob/')[-1]
links.append(f"{RAW_BASE_URL}/{file_path}")
return links
def download_file(url, save_dir):
"""Download a file from a URL to the specified directory."""
local_filename = os.path.join(save_dir, url.split('/')[-1])
with requests.get(url, stream=True) as response:
if response.status_code == 200:
with open(local_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded: {local_filename}")
else:
print(f"Failed to download {url}: {response.status_code}")
def main():
print("Fetching file links...")
file_links = get_file_links(BASE_URL)
if not file_links:
print("No files found to download.")
return
print("Downloading files...")
for file_link in file_links:
download_file(file_link, DOWNLOAD_DIR)
# Ensure the script runs only when executed directly
if __name__ == "__main__":
main()
|