Spaces:
Sleeping
Sleeping
add youtube feature
Browse files
app.py
CHANGED
@@ -33,6 +33,8 @@ from huggingface_hub import snapshot_download
|
|
33 |
|
34 |
from gradio import Markdown
|
35 |
|
|
|
|
|
36 |
all_key_names = ['C major', 'G major', 'D major', 'A major',
|
37 |
'E major', 'B major', 'F major', 'Bb major',
|
38 |
'Eb major', 'Ab major', 'Db major', 'Gb major',
|
@@ -709,6 +711,29 @@ def gradio_generate(input_video, input_primer, input_key):
|
|
709 |
output_filename = video2music.generate(input_video, input_primer, input_key)
|
710 |
return str(output_filename)
|
711 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
712 |
|
713 |
title="Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model"
|
714 |
description_text = """
|
@@ -718,10 +743,15 @@ Generate background music using Video2Music by providing an input video.
|
|
718 |
<a href="https://arxiv.org/abs/2311.00968">Read our paper.</a>
|
719 |
<p/>
|
720 |
"""
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
|
|
|
|
|
|
|
|
|
|
725 |
|
726 |
css = '''
|
727 |
#duplicate-button {
|
@@ -732,21 +762,86 @@ border-radius: 100vh;
|
|
732 |
}
|
733 |
'''
|
734 |
|
735 |
-
# Gradio interface
|
736 |
-
gr_interface = gr.Interface(
|
737 |
-
fn=gradio_generate,
|
738 |
-
inputs=[input_video, input_primer, input_key ],
|
739 |
-
outputs=[output_video],
|
740 |
-
description=description_text,
|
741 |
-
allow_flagging='never',
|
742 |
-
cache_examples=True,
|
743 |
-
)
|
744 |
|
745 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
746 |
# with gr.Blocks() as demo:
|
747 |
with gr.Blocks(css=css) as demo:
|
748 |
title=gr.HTML(f"<h1><center>{title}</center></h1>")
|
749 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
750 |
|
751 |
#demo.queue()
|
752 |
# demo.launch(debug=True)
|
|
|
33 |
|
34 |
from gradio import Markdown
|
35 |
|
36 |
+
from pytube import YouTube
|
37 |
+
|
38 |
all_key_names = ['C major', 'G major', 'D major', 'A major',
|
39 |
'E major', 'B major', 'F major', 'Bb major',
|
40 |
'Eb major', 'Ab major', 'Db major', 'Gb major',
|
|
|
711 |
output_filename = video2music.generate(input_video, input_primer, input_key)
|
712 |
return str(output_filename)
|
713 |
|
714 |
+
def gradio_generate2(input_youtube, input_primer, input_key):
|
715 |
+
|
716 |
+
youtube_dir = Path("./youtube")
|
717 |
+
if youtube_dir.exists():
|
718 |
+
shutil.rmtree(str(youtube_dir))
|
719 |
+
|
720 |
+
youtube_dir.mkdir(parents=True)
|
721 |
+
|
722 |
+
yObject = YouTube(input_youtube)
|
723 |
+
yObject_stream = yObject.streams.get_by_resolution("240p")
|
724 |
+
fname = yObject.video_id +".mp4"
|
725 |
+
if yObject_stream == None:
|
726 |
+
yObject_stream = yObject.streams.get_lowest_resolution()
|
727 |
+
try:
|
728 |
+
yObject_stream.download(output_path=youtube_dir, filename= fname )
|
729 |
+
except:
|
730 |
+
print("An error has occurred")
|
731 |
+
|
732 |
+
input_video = youtube_dir / fname
|
733 |
+
|
734 |
+
output_filename = video2music.generate(input_video, input_primer, input_key)
|
735 |
+
return str(output_filename)
|
736 |
+
|
737 |
|
738 |
title="Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model"
|
739 |
description_text = """
|
|
|
743 |
<a href="https://arxiv.org/abs/2311.00968">Read our paper.</a>
|
744 |
<p/>
|
745 |
"""
|
746 |
+
|
747 |
+
|
748 |
+
# input_video = gr.Video(label="Input Video")
|
749 |
+
# input_primer = gr.Textbox(label="Input Primer", value="C Am F G")
|
750 |
+
# input_key = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key")
|
751 |
+
# output_video = gr.Video(label="Output Video")
|
752 |
+
# input_youtube = gr.Textbox(label="YouTube URL")
|
753 |
+
|
754 |
+
|
755 |
|
756 |
css = '''
|
757 |
#duplicate-button {
|
|
|
762 |
}
|
763 |
'''
|
764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
|
766 |
|
767 |
+
# Gradio interface
|
768 |
+
# gr_interface = gr.Interface(
|
769 |
+
# fn=gradio_generate,
|
770 |
+
# inputs=[input_video, input_primer, input_key ],
|
771 |
+
# outputs=[output_video],
|
772 |
+
# description=description_text,
|
773 |
+
# allow_flagging='never',
|
774 |
+
# cache_examples=True,
|
775 |
+
# )
|
776 |
+
|
777 |
+
# gr_interface2 = gr.Interface(
|
778 |
+
# fn=gradio_generate2,
|
779 |
+
# inputs=[input_youtube, input_primer, input_key ],
|
780 |
+
# outputs=[output_video],
|
781 |
+
# description=description_text,
|
782 |
+
# allow_flagging='never',
|
783 |
+
# cache_examples=True,
|
784 |
+
# )
|
785 |
+
|
786 |
+
def filter(choice):
|
787 |
+
if choice == "Upload Video":
|
788 |
+
return [gr.update(visible=True), gr.update(visible=False)]
|
789 |
+
if choice == "YouTube URL":
|
790 |
+
return [gr.update(visible=False), gr.update(visible=True)]
|
791 |
+
|
792 |
+
|
793 |
# with gr.Blocks() as demo:
|
794 |
with gr.Blocks(css=css) as demo:
|
795 |
title=gr.HTML(f"<h1><center>{title}</center></h1>")
|
796 |
+
gr.Markdown(
|
797 |
+
"""
|
798 |
+
This is the demo for Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model.
|
799 |
+
[Read our paper](https://arxiv.org/abs/2311.00968).
|
800 |
+
"""
|
801 |
+
)
|
802 |
+
with gr.Row():
|
803 |
+
with gr.Column():
|
804 |
+
radio = gr.Radio(["Upload Video", "YouTube URL"], value="Upload Video", label = "Choose the input method")
|
805 |
+
with gr.Row(visible=True) as mainA:
|
806 |
+
with gr.Column(visible=True) as colA:
|
807 |
+
with gr.Row(visible=True) as rowA:
|
808 |
+
|
809 |
+
with gr.Column():
|
810 |
+
input_video = gr.Video(label="Input Video", height=250)
|
811 |
+
with gr.Column():
|
812 |
+
with gr.Row():
|
813 |
+
input_primer = gr.Textbox(label="Input Primer", value="C Am F G")
|
814 |
+
input_key = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key")
|
815 |
+
with gr.Column():
|
816 |
+
btn = gr.Button("Generate")
|
817 |
+
|
818 |
+
with gr.Row(visible=False) as rowB:
|
819 |
+
with gr.Column():
|
820 |
+
input_video_yt = gr.Textbox(label="YouTube URL")
|
821 |
+
with gr.Column():
|
822 |
+
with gr.Row():
|
823 |
+
input_primer_yt = gr.Textbox(label="Input Primer", value="C Am F G")
|
824 |
+
input_key_yt = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key")
|
825 |
+
with gr.Column():
|
826 |
+
btn_yt = gr.Button("Generate")
|
827 |
+
|
828 |
+
with gr.Column():
|
829 |
+
with gr.Row():
|
830 |
+
output_video = gr.Video(label="Output Video")
|
831 |
+
|
832 |
+
radio.change(filter, radio, [rowA, rowB])
|
833 |
+
|
834 |
+
btn.click(
|
835 |
+
fn=gradio_generate,
|
836 |
+
inputs=[input_video,input_primer,input_key],
|
837 |
+
outputs=[output_video],
|
838 |
+
)
|
839 |
+
|
840 |
+
btn_yt.click(
|
841 |
+
fn=gradio_generate2,
|
842 |
+
inputs=[input_video_yt,input_primer_yt,input_key_yt],
|
843 |
+
outputs=[output_video],
|
844 |
+
)
|
845 |
|
846 |
#demo.queue()
|
847 |
# demo.launch(debug=True)
|