drewThomasson commited on
Commit
6db045e
·
verified ·
1 Parent(s): 96cdee1

Create download_github_folder.py

Browse files
Files changed (1) hide show
  1. download_github_folder.py +53 -0
download_github_folder.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ # GitHub folder URL
6
+ BASE_URL = "https://github.com/DrewThomasson/ebook2audiobook/tree/main/voices"
7
+ RAW_BASE_URL = "https://raw.githubusercontent.com/DrewThomasson/ebook2audiobook/main/voices"
8
+
9
+ # Directory to save downloaded files
10
+ DOWNLOAD_DIR = "voices"
11
+ os.makedirs(DOWNLOAD_DIR, exist_ok=True)
12
+
13
+ def get_file_links(folder_url):
14
+ """Get the raw file links from a GitHub folder."""
15
+ response = requests.get(folder_url)
16
+ if response.status_code != 200:
17
+ print(f"Failed to fetch folder content: {response.status_code}")
18
+ return []
19
+
20
+ soup = BeautifulSoup(response.text, 'html.parser')
21
+ links = []
22
+ for a_tag in soup.find_all('a', class_='js-navigation-open'):
23
+ href = a_tag.get('href')
24
+ if href and '/blob/' in href: # File link
25
+ file_path = href.split('/blob/')[-1]
26
+ links.append(f"{RAW_BASE_URL}/{file_path}")
27
+ return links
28
+
29
+ def download_file(url, save_dir):
30
+ """Download a file from a URL to the specified directory."""
31
+ local_filename = os.path.join(save_dir, url.split('/')[-1])
32
+ with requests.get(url, stream=True) as response:
33
+ if response.status_code == 200:
34
+ with open(local_filename, 'wb') as f:
35
+ for chunk in response.iter_content(chunk_size=8192):
36
+ f.write(chunk)
37
+ print(f"Downloaded: {local_filename}")
38
+ else:
39
+ print(f"Failed to download {url}: {response.status_code}")
40
+
41
+ def main():
42
+ print("Fetching file links...")
43
+ file_links = get_file_links(BASE_URL)
44
+ if not file_links:
45
+ print("No files found to download.")
46
+ return
47
+
48
+ print("Downloading files...")
49
+ for file_link in file_links:
50
+ download_file(file_link, DOWNLOAD_DIR)
51
+
52
+ if __name__ == "__main__":
53
+ main()