OpenSUNO

Paused

App Files Files Community

ginipick commited on 13 days ago

Commit

6a9adba

verified ·

1 Parent(s): a7b49e3

Update app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +175 -189

app-backup.py CHANGED Viewed

@@ -20,6 +20,31 @@ logging.basicConfig(
     ]
 )
 def analyze_lyrics(lyrics, repeat_chorus=2):
     lines = [line.strip() for line in lyrics.split('\n') if line.strip()]
@@ -36,42 +61,55 @@ def analyze_lyrics(lyrics, repeat_chorus=2):
         'chorus': [],
         'bridge': []
     }
-    for line in lines:
         lower_line = line.lower()
         if '[verse]' in lower_line:
             current_section = 'verse'
             sections['verse'] += 1
             continue
         elif '[chorus]' in lower_line:
             current_section = 'chorus'
             sections['chorus'] += 1
             continue
         elif '[bridge]' in lower_line:
             current_section = 'bridge'
             sections['bridge'] += 1
             continue
-        # 현재 섹션에 라인 추가
-        if current_section:
-            section_lines[current_section].append(line)
-    # 만약 코러스가 1회만 있고, repeat_chorus > 1이면 반복해서 붙이기
-    # chorus 섹션 전체 블록을 복제
-    if sections['chorus'] == 1 and repeat_chorus > 1:
-        chorus_block = section_lines['chorus'][:]
         for _ in range(repeat_chorus - 1):
-            section_lines['chorus'].extend(chorus_block)
-    # 라인 수 재계산
-    new_total_lines = sum(len(section_lines[sec]) for sec in section_lines)
-    return sections, (sections['verse'] + sections['chorus'] + sections['bridge']), new_total_lines, {
-        'verse': len(section_lines['verse']),
-        'chorus': len(section_lines['chorus']),
-        'bridge': len(section_lines['bridge'])
-    }
 def calculate_generation_params(lyrics):
     sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
@@ -83,30 +121,31 @@ def calculate_generation_params(lyrics):
         'bridge': 5    # bridge는 한 줄당 5초
     }
-    # 각 섹션별 예상 시간 계산
-    section_durations = {
-        'verse': section_lines['verse'] * time_per_line['verse'],
-        'chorus': section_lines['chorus'] * time_per_line['chorus'],
-        'bridge': section_lines['bridge'] * time_per_line['bridge']
-    }
-    total_duration = sum(section_durations.values())
-    total_duration = max(60, total_duration)  # 최소 60초
-    # 토큰 계산 (더 보수적인 값 사용)
-    base_tokens = 3000  # 기본 토큰 수
-    tokens_per_line = 200  # 줄당 토큰 수
-    total_tokens = base_tokens + (total_lines * tokens_per_line)
-    # 섹션 기반 세그먼트 수 계산
     if sections['chorus'] > 0:
-        num_segments = 3  # 코러스가 있는 경우 3개 세그먼트
     else:
-        num_segments = 2  # 코러스가 없는 경우 2개 세그먼트
-    # 토큰 수 제한
-    max_tokens = min(8000, total_tokens)  # 최대 8000 토큰으로 제한
     return {
         'max_tokens': max_tokens,
@@ -118,43 +157,15 @@ def calculate_generation_params(lyrics):
         'has_chorus': sections['chorus'] > 0
     }
-def get_audio_duration(file_path):
-    try:
-        import librosa
-        duration = librosa.get_duration(path=file_path)
-        return duration
-    except Exception as e:
-        logging.error(f"Failed to get audio duration: {e}")
-        return None
-# 언어 감지 및 모델 선택 함수
 def detect_and_select_model(text):
-    if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):  # 한글
         return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
-    elif re.search(r'[\u4e00-\u9fff]', text):  # 중국어
         return "m-a-p/YuE-s1-7B-anneal-zh-cot"
-    elif re.search(r'[\u3040-\u309F\u30A0-\u30FF]', text):  # 일본어
         return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
-    else:  # 영어/기타
-        return "m-a-p/YuE-s1-7B-anneal-en-cot"
-# GPU 설정 최적화
-def optimize_gpu_settings():
-    if torch.cuda.is_available():
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.benchmark = True
-        torch.backends.cudnn.deterministic = False
-        torch.backends.cudnn.enabled = True
-        torch.cuda.empty_cache()
-        torch.cuda.set_device(0)
-        logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
-        logging.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
     else:
-        logging.warning("GPU not available!")
 def install_flash_attn():
     try:
@@ -176,17 +187,13 @@ def install_flash_attn():
         except ImportError:
             logging.info("Installing flash-attn...")
-        try:
-            subprocess.run(
-                ["pip", "install", "flash-attn", "--no-build-isolation"],
-                check=True,
-                capture_output=True
-            )
-            logging.info("flash-attn installed successfully!")
-            return True
-        except subprocess.CalledProcessError:
-            logging.warning("Failed to install flash-attn via pip, skipping...")
-            return False
     except Exception as e:
         logging.warning(f"Failed to install flash-attn: {e}")
@@ -194,19 +201,27 @@ def install_flash_attn():
 def initialize_system():
     optimize_gpu_settings()
-    has_flash_attn = install_flash_attn()
-    from huggingface_hub import snapshot_download
-    folder_path = './inference/xcodec_mini_infer'
-    os.makedirs(folder_path, exist_ok=True)
-    logging.info(f"Created folder at: {folder_path}")
-    snapshot_download(
-        repo_id="m-a-p/xcodec_mini_infer",
-        local_dir="./inference/xcodec_mini_infer",
-        resume_download=True
-    )
     try:
         os.chdir("./inference")
@@ -215,7 +230,7 @@ def initialize_system():
         logging.error(f"Directory error: {e}")
         raise
-@lru_cache(maxsize=50)
 def get_cached_file_path(content_hash, prefix):
     return create_temp_file(content_hash, prefix)
@@ -247,84 +262,50 @@ def get_last_mp3_file(output_dir):
     mp3_files_with_path.sort(key=os.path.getmtime, reverse=True)
     return mp3_files_with_path[0]
-def optimize_model_selection(lyrics, genre):
-    model_path = detect_and_select_model(lyrics)
-    params = calculate_generation_params(lyrics)
-    # 코러스 존재 여부에 따른 설정 조정
-    has_chorus = params['sections']['chorus'] > 0
-    # 토큰 수 계산
-    tokens_per_segment = params['max_tokens'] // params['num_segments']
-    model_config = {
-        "m-a-p/YuE-s1-7B-anneal-en-cot": {
-            "max_tokens": params['max_tokens'],
-            "temperature": 0.8,
-            "batch_size": 8,
-            "num_segments": params['num_segments'],
-            "estimated_duration": params['estimated_duration']
-        },
-        "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
-            "max_tokens": params['max_tokens'],
-            "temperature": 0.7,
-            "batch_size": 8,
-            "num_segments": params['num_segments'],
-            "estimated_duration": params['estimated_duration']
-        },
-        "m-a-p/YuE-s1-7B-anneal-zh-cot": {
-            "max_tokens": params['max_tokens'],
-            "temperature": 0.7,
-            "batch_size": 8,
-            "num_segments": params['num_segments'],
-            "estimated_duration": params['estimated_duration']
-        }
-    }
-    # 코러스가 있는 경우 토큰 수 증가
-    if has_chorus:
-        for config in model_config.values():
-            config['max_tokens'] = int(config['max_tokens'] * 1.5)  # 50% 더 많은 토큰 할당
-    return model_path, model_config[model_path], params
 def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
     genre_txt_path = None
     lyrics_txt_path = None
     try:
-        # 모델 선택 및 설정
         model_path, config, params = optimize_model_selection(lyrics_txt_content, genre_txt_content)
         logging.info(f"Selected model: {model_path}")
         logging.info(f"Lyrics analysis: {params}")
-        # 코러스 섹션 확인 및 로깅
         has_chorus = params['sections']['chorus'] > 0
         estimated_duration = params.get('estimated_duration', 90)
-        # 토큰 수와 세그먼트 수 조정
         if has_chorus:
-            actual_max_tokens = min(8000, int(config['max_tokens'] * 1.2))  # 20% 증가, 최대 8000
-            actual_num_segments = 3
         else:
-            actual_max_tokens = config['max_tokens']
-            actual_num_segments = 2
         logging.info(f"Estimated duration: {estimated_duration} seconds")
         logging.info(f"Has chorus sections: {has_chorus}")
         logging.info(f"Using segments: {actual_num_segments}, tokens: {actual_max_tokens}")
-        # 임시 파일 생성
         genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
         lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
         output_dir = "./output"
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
-        # 기본 명령어 구성
         command = [
             "python", "infer.py",
             "--stage1_model", model_path,
@@ -332,19 +313,13 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
             "--genre_txt", genre_txt_path,
             "--lyrics_txt", lyrics_txt_path,
             "--run_n_segments", str(actual_num_segments),
-            "--stage2_batch_size", "4",  # 배치 사이즈 감소
             "--output_dir", output_dir,
             "--cuda_idx", "0",
-            "--max_new_tokens", str(actual_max_tokens)
         ]
-        # GPU 설정
-        if torch.cuda.is_available():
-            command.append("--disable_offload_model")
-        # GPU 설정
-        # CUDA 환경 변수 설정
         env = os.environ.copy()
         if torch.cuda.is_available():
             env.update({
@@ -352,7 +327,8 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
                 "CUDA_HOME": "/usr/local/cuda",
                 "PATH": f"/usr/local/cuda/bin:{env.get('PATH', '')}",
                 "LD_LIBRARY_PATH": f"/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}",
-                "PYTORCH_CUDA_ALLOC_CONF": f"max_split_size_mb:512"
             })
         # transformers 캐시 마이그레이션 처리
@@ -362,7 +338,6 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         except Exception as e:
             logging.warning(f"Cache migration warning (non-critical): {e}")
-        # 명령 실행
         process = subprocess.run(
             command,
             env=env,
@@ -371,7 +346,6 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
             text=True
         )
-        # 실행 결과 로깅
         logging.info(f"Command output: {process.stdout}")
         if process.stderr:
             logging.error(f"Command error: {process.stderr}")
@@ -381,7 +355,6 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
             logging.error(f"Command: {' '.join(command)}")
             raise RuntimeError(f"Inference failed: {process.stderr}")
-        # 결과 처리
         last_mp3 = get_last_mp3_file(output_dir)
         if last_mp3:
             try:
@@ -391,7 +364,6 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
                     logging.info(f"Audio duration: {duration:.2f} seconds")
                     logging.info(f"Expected duration: {estimated_duration} seconds")
-                    # 생성된 음악이 너무 짧은 경우 경고
                     if duration < estimated_duration * 0.8:
                         logging.warning(f"Generated audio is shorter than expected: {duration:.2f}s < {estimated_duration:.2f}s")
             except Exception as e:
@@ -405,27 +377,55 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         logging.error(f"Inference error: {e}")
         raise
     finally:
-        # 임시 파일 정리
-        if genre_txt_path and os.path.exists(genre_txt_path):
-            try:
-                os.remove(genre_txt_path)
-                logging.debug(f"Removed temporary file: {genre_txt_path}")
-            except Exception as e:
-                logging.warning(f"Failed to remove temporary file {genre_txt_path}: {e}")
-        if lyrics_txt_path and os.path.exists(lyrics_txt_path):
-            try:
-                os.remove(lyrics_txt_path)
-                logging.debug(f"Removed temporary file: {lyrics_txt_path}")
-            except Exception as e:
-                logging.warning(f"Failed to remove temporary file {lyrics_txt_path}: {e}")
 def main():
-    # Gradio 인터페이스
     with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("# Open SUNO: Full-Song Generation (Multi-Language Support)")
             with gr.Row():
                 with gr.Column():
@@ -462,10 +462,8 @@ def main():
                     submit_btn = gr.Button("Generate Music", variant="primary")
                     music_out = gr.Audio(label="Generated Audio")
-            # 다국어 예제
             gr.Examples(
                 examples=[
-                    # 영어 예제
                     [
                         "female blues airy vocal bright vocal piano sad romantic guitar jazz",
                         """[verse]
@@ -490,36 +488,27 @@ Guiding me back homeward, making my heart rejoice
 Don't let this moment fade, hold me close tonight
 With you here beside me, everything's alright
 Can't imagine life alone, don't want to let you go
-Stay with me forever, let our love just flow
-                        """
                     ],
-                    # 한국어 예제
                     [
                         "K-pop bright energetic synth dance electronic",
                         """[verse]
 언젠가 마주한 눈빛 속에서
-우린 서로를 알아보았지
 [chorus]
 다시 한 번 내게 말해줘
-너의 진심을 숨기지 말아 줘
 [verse]
 어두운 밤을 지날 때마다
-너의 목소리를 떠올려
 [chorus]
 다시 한 번 내게 말해줘
-너의 진심을 숨기지 말아 줘
-                        """
                     ]
                 ],
                 inputs=[genre_txt, lyrics_txt]
             )
-        # 시스템 초기화
         initialize_system()
         def update_info(lyrics):
@@ -533,9 +522,6 @@ Stay with me forever, let our love just flow
                 f"Verses: {sections['verse']}, Chorus: {sections['chorus']} (Expected full length including chorus)"
             )
-        # 이벤트 핸들러
         lyrics_txt.change(
             fn=update_info,
             inputs=[lyrics_txt],
@@ -558,5 +544,5 @@ if __name__ == "__main__":
         share=True,
         show_api=True,
         show_error=True,
-        max_threads=2
-    )

     ]
 )
+def optimize_gpu_settings():
+    if torch.cuda.is_available():
+        # GPU 메모리 관리 최적화
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cudnn.enabled = True
+        torch.backends.cudnn.deterministic = False
+        # L40S에 최적화된 메모리 설정
+        torch.cuda.empty_cache()
+        torch.cuda.set_device(0)
+        # CUDA 스트림 최적화
+        torch.cuda.Stream(0)
+        # 메모리 할당 최적화
+        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
+        logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+        logging.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
+        # L40S 특화 설정
+        if 'L40S' in torch.cuda.get_device_name(0):
+            torch.cuda.set_per_process_memory_fraction(0.95)
 def analyze_lyrics(lyrics, repeat_chorus=2):
     lines = [line.strip() for line in lyrics.split('\n') if line.strip()]
         'chorus': [],
         'bridge': []
     }
+    last_section = None
+    # 마지막 섹션 태그 찾기
+    for i, line in enumerate(lines):
+        if '[verse]' in line.lower() or '[chorus]' in line.lower() or '[bridge]' in line.lower():
+            last_section = i
+    for i, line in enumerate(lines):
         lower_line = line.lower()
+        # 섹션 태그 처리
         if '[verse]' in lower_line:
+            if current_section:  # 이전 섹션의 라인들 저장
+                section_lines[current_section].extend(lines[last_section_start:i])
             current_section = 'verse'
             sections['verse'] += 1
+            last_section_start = i + 1
             continue
         elif '[chorus]' in lower_line:
+            if current_section:
+                section_lines[current_section].extend(lines[last_section_start:i])
             current_section = 'chorus'
             sections['chorus'] += 1
+            last_section_start = i + 1
             continue
         elif '[bridge]' in lower_line:
+            if current_section:
+                section_lines[current_section].extend(lines[last_section_start:i])
             current_section = 'bridge'
             sections['bridge'] += 1
+            last_section_start = i + 1
             continue
+    # 마지막 섹션의 라인들 추가
+    if current_section and last_section_start < len(lines):
+        section_lines[current_section].extend(lines[last_section_start:])
+    # 코러스 반복 처리
+    if sections['chorus'] > 0 and repeat_chorus > 1:
+        original_chorus = section_lines['chorus'][:]
         for _ in range(repeat_chorus - 1):
+            section_lines['chorus'].extend(original_chorus)
+    # 섹션별 라인 수 확인 로깅
+    logging.info(f"Section line counts - Verse: {len(section_lines['verse'])}, "
+                f"Chorus: {len(section_lines['chorus'])}, "
+                f"Bridge: {len(section_lines['bridge'])}")
+    return sections, (sections['verse'] + sections['chorus'] + sections['bridge']), len(lines), section_lines
 def calculate_generation_params(lyrics):
     sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
         'bridge': 5    # bridge는 한 줄당 5초
     }
+    # 각 섹션별 예상 시간 계산 (마지막 섹션 포함)
+    section_durations = {}
+    for section_type in ['verse', 'chorus', 'bridge']:
+        lines_count = len(section_lines[section_type])
+        section_durations[section_type] = lines_count * time_per_line[section_type]
+    # 전�� 시간 계산 (여유 시간 추가)
+    total_duration = sum(duration for duration in section_durations.values())
+    total_duration = max(60, int(total_duration * 1.2))  # 20% 여유 시간 추가
+    # 토큰 계산 (마지막 섹션을 위한 추가 토큰)
+    base_tokens = 3000
+    tokens_per_line = 200
+    extra_tokens = 1000  # 마지막 섹션을 위한 추가 토큰
+    total_tokens = base_tokens + (total_lines * tokens_per_line) + extra_tokens
+    # 세그먼트 수 계산 (마지막 섹션을 위한 추가 세그먼트)
     if sections['chorus'] > 0:
+        num_segments = 4  # 코러스가 있는 경우 4개 세그먼트
     else:
+        num_segments = 3  # 코러스가 없는 경우 3개 세그먼트
+    # 토큰 수 제한 (더 큰 제한)
+    max_tokens = min(12000, total_tokens)  # 최대 토큰 수 증가
     return {
         'max_tokens': max_tokens,
         'has_chorus': sections['chorus'] > 0
     }
 def detect_and_select_model(text):
+    if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):
         return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
+    elif re.search(r'[\u4e00-\u9fff]', text):
         return "m-a-p/YuE-s1-7B-anneal-zh-cot"
+    elif re.search(r'[\u3040-\u309F\u30A0-\u30FF]', text):
         return "m-a-p/YuE-s1-7B-anneal-jp-kr-cot"
     else:
+        return "m-a-p/YuE-s1-7B-anneal-en-cot"
 def install_flash_attn():
     try:
         except ImportError:
             logging.info("Installing flash-attn...")
+        subprocess.run(
+            ["pip", "install", "flash-attn", "--no-build-isolation"],
+            check=True,
+            capture_output=True
+        )
+        logging.info("flash-attn installed successfully!")
+        return True
     except Exception as e:
         logging.warning(f"Failed to install flash-attn: {e}")
 def initialize_system():
     optimize_gpu_settings()
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = []
+        futures.append(executor.submit(install_flash_attn))
+        from huggingface_hub import snapshot_download
+        folder_path = './inference/xcodec_mini_infer'
+        os.makedirs(folder_path, exist_ok=True)
+        logging.info(f"Created folder at: {folder_path}")
+        futures.append(executor.submit(
+            snapshot_download,
+            repo_id="m-a-p/xcodec_mini_infer",
+            local_dir="./inference/xcodec_mini_infer",
+            resume_download=True
+        ))
+        for future in futures:
+            future.result()
     try:
         os.chdir("./inference")
         logging.error(f"Directory error: {e}")
         raise
+@lru_cache(maxsize=100)
 def get_cached_file_path(content_hash, prefix):
     return create_temp_file(content_hash, prefix)
     mp3_files_with_path.sort(key=os.path.getmtime, reverse=True)
     return mp3_files_with_path[0]
+def get_audio_duration(file_path):
+    try:
+        import librosa
+        duration = librosa.get_duration(path=file_path)
+        return duration
+    except Exception as e:
+        logging.error(f"Failed to get audio duration: {e}")
+        return None
 def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
     genre_txt_path = None
     lyrics_txt_path = None
     try:
         model_path, config, params = optimize_model_selection(lyrics_txt_content, genre_txt_content)
         logging.info(f"Selected model: {model_path}")
         logging.info(f"Lyrics analysis: {params}")
         has_chorus = params['sections']['chorus'] > 0
         estimated_duration = params.get('estimated_duration', 90)
+        # 세그먼트 및 토큰 수 설정
         if has_chorus:
+            actual_max_tokens = min(12000, int(config['max_tokens'] * 1.3))  # 30% 더 많은 토큰
+            actual_num_segments = min(5, params['num_segments'] + 2)  # 추가 세그먼트
         else:
+            actual_max_tokens = min(10000, int(config['max_tokens'] * 1.2))
+            actual_num_segments = min(4, params['num_segments'] + 1)
         logging.info(f"Estimated duration: {estimated_duration} seconds")
         logging.info(f"Has chorus sections: {has_chorus}")
         logging.info(f"Using segments: {actual_num_segments}, tokens: {actual_max_tokens}")
         genre_txt_path = create_temp_file(genre_txt_content, prefix="genre_")
         lyrics_txt_path = create_temp_file(lyrics_txt_content, prefix="lyrics_")
         output_dir = "./output"
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
+        # 수정된 command - 지원되지 않는 인수 제거
         command = [
             "python", "infer.py",
             "--stage1_model", model_path,
             "--genre_txt", genre_txt_path,
             "--lyrics_txt", lyrics_txt_path,
             "--run_n_segments", str(actual_num_segments),
+            "--stage2_batch_size", "16",
             "--output_dir", output_dir,
             "--cuda_idx", "0",
+            "--max_new_tokens", str(actual_max_tokens),
+            "--disable_offload_model"  # GPU 메모리 최적화를 위해 추가
         ]
         env = os.environ.copy()
         if torch.cuda.is_available():
             env.update({
                 "CUDA_HOME": "/usr/local/cuda",
                 "PATH": f"/usr/local/cuda/bin:{env.get('PATH', '')}",
                 "LD_LIBRARY_PATH": f"/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}",
+                "PYTORCH_CUDA_ALLOC_CONF": "max_split_size_mb:512",
+                "CUDA_LAUNCH_BLOCKING": "0"
             })
         # transformers 캐시 마이그레이션 처리
         except Exception as e:
             logging.warning(f"Cache migration warning (non-critical): {e}")
         process = subprocess.run(
             command,
             env=env,
             text=True
         )
         logging.info(f"Command output: {process.stdout}")
         if process.stderr:
             logging.error(f"Command error: {process.stderr}")
             logging.error(f"Command: {' '.join(command)}")
             raise RuntimeError(f"Inference failed: {process.stderr}")
         last_mp3 = get_last_mp3_file(output_dir)
         if last_mp3:
             try:
                     logging.info(f"Audio duration: {duration:.2f} seconds")
                     logging.info(f"Expected duration: {estimated_duration} seconds")
                     if duration < estimated_duration * 0.8:
                         logging.warning(f"Generated audio is shorter than expected: {duration:.2f}s < {estimated_duration:.2f}s")
             except Exception as e:
         logging.error(f"Inference error: {e}")
         raise
     finally:
+        for path in [genre_txt_path, lyrics_txt_path]:
+            if path and os.path.exists(path):
+                try:
+                    os.remove(path)
+                    logging.debug(f"Removed temporary file: {path}")
+                except Exception as e:
+                    logging.warning(f"Failed to remove temporary file {path}: {e}")
+def optimize_model_selection(lyrics, genre):
+    model_path = detect_and_select_model(lyrics)
+    params = calculate_generation_params(lyrics)
+    has_chorus = params['sections']['chorus'] > 0
+    tokens_per_segment = params['max_tokens'] // params['num_segments']
+    model_config = {
+        "m-a-p/YuE-s1-7B-anneal-en-cot": {
+            "max_tokens": params['max_tokens'],
+            "temperature": 0.8,
+            "batch_size": 16,
+            "num_segments": params['num_segments'],
+            "estimated_duration": params['estimated_duration']
+        },
+        "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
+            "max_tokens": params['max_tokens'],
+            "temperature": 0.7,
+            "batch_size": 16,
+            "num_segments": params['num_segments'],
+            "estimated_duration": params['estimated_duration']
+        },
+        "m-a-p/YuE-s1-7B-anneal-zh-cot": {
+            "max_tokens": params['max_tokens'],
+            "temperature": 0.7,
+            "batch_size": 16,
+            "num_segments": params['num_segments'],
+            "estimated_duration": params['estimated_duration']
+        }
+    }
+    if has_chorus:
+        for config in model_config.values():
+            config['max_tokens'] = int(config['max_tokens'] * 1.5)
+    return model_path, model_config[model_path], params
 def main():
     with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("# Open SUNO: Full-Song Generation (Multi-Language Support)")
             with gr.Row():
                 with gr.Column():
                     submit_btn = gr.Button("Generate Music", variant="primary")
                     music_out = gr.Audio(label="Generated Audio")
             gr.Examples(
                 examples=[
                     [
                         "female blues airy vocal bright vocal piano sad romantic guitar jazz",
                         """[verse]
 Don't let this moment fade, hold me close tonight
 With you here beside me, everything's alright
 Can't imagine life alone, don't want to let you go
+Stay with me forever, let our love just flow"""
                     ],
                     [
                         "K-pop bright energetic synth dance electronic",
                         """[verse]
 언젠가 마주한 눈빛 속에서
 [chorus]
 다시 한 번 내게 말해줘
 [verse]
 어두운 밤을 지날 때마다
 [chorus]
 다시 한 번 내게 말해줘
+"""
                     ]
                 ],
                 inputs=[genre_txt, lyrics_txt]
             )
         initialize_system()
         def update_info(lyrics):
                 f"Verses: {sections['verse']}, Chorus: {sections['chorus']} (Expected full length including chorus)"
             )
         lyrics_txt.change(
             fn=update_info,
             inputs=[lyrics_txt],
         share=True,
         show_api=True,
         show_error=True,
+        max_threads=8
+    )