ebook2audiobook

Sleeping

App Files Files Community

ebook2audiobook / download_github_folder.py

drewThomasson

Update download_github_folder.py

8407240 verified about 1 month ago

raw

history blame

1.88 kB

	import os
	import requests
	from bs4 import BeautifulSoup

	# GitHub folder URL
	BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices"
	RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices"

	# Directory to save downloaded files
	DOWNLOAD_DIR = "voices"
	os.makedirs(DOWNLOAD_DIR, exist_ok=True)

	def get_file_links(folder_url):
	"""Get the raw file links from a GitHub folder."""
	response = requests.get(folder_url)
	if response.status_code != 200:
	print(f"Failed to fetch folder content: {response.status_code}")
	return []

	soup = BeautifulSoup(response.text, 'html.parser')
	links = []
	for a_tag in soup.find_all('a', class_='js-navigation-open'):
	href = a_tag.get('href')
	if href and '/blob/' in href: # File link
	file_path = href.split('/blob/')[-1]
	links.append(f"{RAW_BASE_URL}/{file_path}")
	return links

	def download_file(url, save_dir):
	"""Download a file from a URL to the specified directory."""
	local_filename = os.path.join(save_dir, url.split('/')[-1])
	with requests.get(url, stream=True) as response:
	if response.status_code == 200:
	with open(local_filename, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	print(f"Downloaded: {local_filename}")
	else:
	print(f"Failed to download {url}: {response.status_code}")

	def main():
	print("Fetching file links...")
	file_links = get_file_links(BASE_URL)
	if not file_links:
	print("No files found to download.")
	return

	print("Downloading files...")
	for file_link in file_links:
	download_file(file_link, DOWNLOAD_DIR)

	# Ensure the script runs only when executed directly
	if __name__ == "__main__":
	main()