import os
import requests
from bs4 import BeautifulSoup

# GitHub folder URL
BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices"
RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices"

# Directory to save downloaded files
DOWNLOAD_DIR = "voices"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

def get_file_links(folder_url):
    """Get the raw file links from a GitHub folder."""
    response = requests.get(folder_url)
    if response.status_code != 200:
        print(f"Failed to fetch folder content: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    links = []
    for a_tag in soup.find_all('a', class_='js-navigation-open'):
        href = a_tag.get('href')
        if href and '/blob/' in href:  # File link
            file_path = href.split('/blob/')[-1]
            links.append(f"{RAW_BASE_URL}/{file_path}")
    return links

def download_file(url, save_dir):
    """Download a file from a URL to the specified directory."""
    local_filename = os.path.join(save_dir, url.split('/')[-1])
    with requests.get(url, stream=True) as response:
        if response.status_code == 200:
            with open(local_filename, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f"Downloaded: {local_filename}")
        else:
            print(f"Failed to download {url}: {response.status_code}")

def main():
    print("Fetching file links...")
    file_links = get_file_links(BASE_URL)
    if not file_links:
        print("No files found to download.")
        return

    print("Downloading files...")
    for file_link in file_links:
        download_file(file_link, DOWNLOAD_DIR)

# Ensure the script runs only when executed directly
if __name__ == "__main__":
    main()