File size: 4,356 Bytes
bf378bb
 
 
834f51c
bf378bb
834f51c
 
 
 
bf378bb
 
834f51c
 
 
 
bf378bb
834f51c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf378bb
 
 
 
834f51c
bf378bb
 
 
834f51c
bf378bb
834f51c
bf378bb
834f51c
bf378bb
 
 
 
 
 
 
 
834f51c
bf378bb
 
 
834f51c
bf378bb
834f51c
 
bf378bb
834f51c
 
 
 
 
 
 
bf378bb
 
834f51c
bf378bb
834f51c
 
bf378bb
834f51c
 
bf378bb
834f51c
 
bf378bb
834f51c
 
bf378bb
834f51c
 
 
 
 
bf378bb
834f51c
 
 
 
bf378bb
834f51c
 
 
bf378bb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import openai
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
from langchain.text_splitter import RecursiveCharacterTextSplitter

# OpenAI API Key Input
openai.api_key = st.sidebar.text_input('Enter your OpenAI API Key', type='password')

def get_transcript(youtube_url):
    try:
        video_id = youtube_url.split("v=")[-1]
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        
        # Try fetching the manual transcript
        try:
            transcript = transcript_list.find_manually_created_transcript()
            language_code = transcript.language_code  # Save the detected language
        except NoTranscriptFound:
            # If no manual transcript is found, try fetching an auto-generated transcript in a supported language
            try:
                generated_transcripts = [trans for trans in transcript_list if trans.is_generated]
                transcript = generated_transcripts[0]
                language_code = transcript.language_code  # Save the detected language
            except NoTranscriptFound:
                raise Exception("No suitable transcript found.")
        
        full_transcript = " ".join([part['text'] for part in transcript.fetch()])
        return full_transcript, language_code  # Return both the transcript and detected language

    except TranscriptsDisabled:
        st.error("Subtitles are disabled for this video. Cannot retrieve a transcript.")
        return None, None
    except VideoUnavailable:
        st.error("The video is unavailable. Please check the link.")
        return None, None
    except Exception as e:
        st.error(f"Error retrieving transcript: {str(e)}")
        return None, None

def summarize_with_langchain_and_openai(transcript, language_code, model_name='gpt-3.5-turbo'):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
    texts = text_splitter.split_text(transcript)
    text_to_summarize = " ".join(texts[:4])  # Adjust this as needed

    # Prepare the prompt for summarization
    system_prompt = 'I want you to act as a Life Coach that can create good summaries!'
    prompt = f'''Summarize the following text in {language_code}.
    Text: {text_to_summarize}

    Add a title to the summary in {language_code}. 
    Include an INTRODUCTION, BULLET POINTS if possible, and a CONCLUSION in {language_code}.'''

    # Start summarizing using OpenAI
    response = openai.ChatCompletion.create(
        model=model_name,
        messages=[
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': prompt}
        ],
        temperature=1
    )
    
    return response['choices'][0]['message']['content']

def main():
    st.title('YouTube Video Summarizer')

    # YouTube video input
    link = st.text_input('Enter the link of the YouTube video you want to summarize:')

    # Error handling if OpenAI API key is not provided
    if not openai.api_key:
        st.error("Please enter your OpenAI API key to proceed.")
        return

    if st.button('Start'):
        if link:
            try:
                progress = st.progress(0)
                status_text = st.empty()

                status_text.text('Loading the transcript...')
                progress.progress(25)

                # Getting both the transcript and language_code
                transcript, language_code = get_transcript(link)

                if transcript is None:
                    return  # Exit early if no transcript is available

                status_text.text(f'Creating summary...')
                progress.progress(75)

                model_name = 'gpt-3.5-turbo'
                summary = summarize_with_langchain_and_openai(transcript, language_code, model_name)

                status_text.text('Summary:')
                st.markdown(summary)
                progress.progress(100)

                # Option to download summary as PDF
                st.download_button('Download Summary as PDF', summary, file_name='summary.pdf')

            except Exception as e:
                st.error(f"An error occurred: {str(e)}")
        else:
            st.error('Please enter a valid YouTube link.')

if __name__ == "__main__":
    main()