1- import os , sys , subprocess , logging
1+ import os
2+ import sys
3+ import subprocess
4+ import logging
25from logging .handlers import RotatingFileHandler
36from contextlib import suppress
47
5- import yt_dlp , gradio as gr , librosa , numpy as np , soundfile as sf
8+ import yt_dlp
9+ import gradio as gr
10+ import librosa
11+ import numpy as np
12+ import soundfile as sf
613from pydub import AudioSegment
714
815try :
@@ -43,8 +50,7 @@ def setup_logging(level=logging.DEBUG, log_file="adrvc.log"):
4350 logging .error ("scrpt.py not found in %s" , current_dir )
4451 raise FileNotFoundError ("scrpt.py not found in current directory." )
4552
46-
47-
53+ # --- Helper Functions ---
4854def load_audio (file_path ):
4955 if file_path and os .path .exists (file_path ):
5056 try :
@@ -54,7 +60,6 @@ def load_audio(file_path):
5460 logging .warning ("Audio file not found or failed to load: %s" , file_path )
5561 return None
5662
57-
5863def get_model_folders ():
5964 """Fetch model file paths and extract their parent folder names."""
6065 names = [
@@ -73,7 +78,6 @@ def refresh_folders():
7378 """Refresh folder list."""
7479 return gr .update (choices = get_model_folders ())
7580
76- # --- Helper Functions ---
7781def download_youtube_audio (url , download_dir ):
7882 os .makedirs (download_dir , exist_ok = True )
7983 ydl_opts = {
@@ -110,7 +114,7 @@ def separator_uvr(input_audio, output_dir):
110114 if len (sep_files ) < 2 :
111115 raise RuntimeError ("UVR separation failed (instrumental/vocals)." )
112116
113- # Resolve file paths (in case the returned paths are not absolute)
117+ # Resolve file paths
114118 file0 = sep_files [0 ] if os .path .isabs (sep_files [0 ]) else os .path .join (output_dir , sep_files [0 ])
115119 file1 = sep_files [1 ] if os .path .isabs (sep_files [1 ]) else os .path .join (output_dir , sep_files [1 ])
116120 instrumental = os .path .join (output_dir , 'Instrumental.wav' )
@@ -136,7 +140,6 @@ def separator_uvr(input_audio, output_dir):
136140
137141 return lead , backing , instrumental
138142
139-
140143def run_rvc (f0_up_key , filter_radius , rms_mix_rate , index_rate , hop_length , protect ,
141144 f0_method , input_path , output_path , pth_file , index_file , split_audio ,
142145 clean_audio , clean_strength , export_format , f0_autotune ,
@@ -176,7 +179,6 @@ def run_rvc(f0_up_key, filter_radius, rms_mix_rate, index_rate, hop_length, prot
176179 logging .error ("RVC process failed with error: %s" , e .stderr )
177180 raise RuntimeError ("RVC process failed" ) from e
178181
179-
180182def run_advanced_rvc (model_name , youtube_url , export_format , f0_method , f0_up_key , filter_radius ,
181183 rms_mix_rate , protect , index_rate , hop_length , clean_strength , split_audio ,
182184 clean_audio , f0_autotune , backing_vocal_infer , embedder_model , embedder_model_custom ):
@@ -244,7 +246,7 @@ def run_advanced_rvc(model_name, youtube_url, export_format, f0_method, f0_up_ke
244246
245247 lead_audio = load_audio (rvc_lead )
246248 instrumental_audio = load_audio (instrumental )
247- # If RVC was run for backing vocals, use that result; otherwise, use the original backing.
249+ # Use the RVC output for backing if available, else the original backing
248250 backing_audio = load_audio (rvc_backing ) if backing_vocal_infer else load_audio (backing )
249251
250252 if not instrumental_audio :
@@ -264,69 +266,93 @@ def run_advanced_rvc(model_name, youtube_url, export_format, f0_method, f0_up_ke
264266 logging .exception ("Error during advanced RVC pipeline: %s" , e )
265267 return f"An error occurred: { e } " , None , None , None
266268
267-
268-
269269# --- Gradio UI ---
270270def inference_tab ():
271+ # In Gradio 3.49.0 it is recommended to wrap tabs inside a Blocks container and use Tab labels.
271272 with gr .Tabs ():
272-
273- with gr .Row ():
274- model_name_input = gr .Dropdown (choices = get_model_folders (), label = "Select Model Folder" , interactive = True )
275- refresh_button = gr .Button ("Refresh" )
276- with gr .Row ():
277- youtube_url_input = gr .Textbox (label = "YouTube URL" , value = "https://youtu.be/eCkWlRL3_N0?si=y6xHAs1m8fYVLTUV" )
278- with gr .Row ():
279- export_format_input = gr .Dropdown (
280- label = "Export Format" , choices = ["WAV" , "MP3" , "FLAC" , "OGG" , "M4A" ], value = "WAV"
273+ with gr .Tab ("Advanced RVC" ):
274+ with gr .Row ():
275+ model_name_input = gr .Dropdown (
276+ choices = get_model_folders (),
277+ label = "Select Model Folder" ,
278+ interactive = True
279+ )
280+ refresh_button = gr .Button ("Refresh" )
281+ with gr .Row ():
282+ youtube_url_input = gr .Textbox (
283+ label = "YouTube URL" ,
284+ value = "https://youtu.be/eCkWlRL3_N0?si=y6xHAs1m8fYVLTUV"
285+ )
286+ with gr .Row ():
287+ export_format_input = gr .Dropdown (
288+ label = "Export Format" ,
289+ choices = ["WAV" , "MP3" , "FLAC" , "OGG" , "M4A" ],
290+ value = "WAV"
291+ )
292+ f0_method_input = gr .Dropdown (
293+ label = "F0 Method" ,
294+ choices = ["crepe" , "crepe-tiny" , "rmvpe" , "fcpe" , "hybrid[rmvpe+fcpe]" ],
295+ value = "hybrid[rmvpe+fcpe]"
296+ )
297+ with gr .Row ():
298+ f0_up_key_input = gr .Slider (
299+ label = "F0 Up Key" , minimum = - 24 , maximum = 24 , step = 1 , value = 0
300+ )
301+ filter_radius_input = gr .Slider (
302+ label = "Filter Radius" , minimum = 0 , maximum = 10 , step = 1 , value = 3
303+ )
304+ rms_mix_rate_input = gr .Slider (
305+ label = "RMS Mix Rate" , minimum = 0.0 , maximum = 1.0 , step = 0.1 , value = 0.8
306+ )
307+ protect_input = gr .Slider (
308+ label = "Protect" , minimum = 0.0 , maximum = 0.5 , step = 0.1 , value = 0.5
309+ )
310+ with gr .Row ():
311+ index_rate_input = gr .Slider (
312+ label = "Index Rate" , minimum = 0.0 , maximum = 1.0 , step = 0.1 , value = 0.6
313+ )
314+ hop_length_input = gr .Slider (
315+ label = "Hop Length" , minimum = 1 , maximum = 512 , step = 1 , value = 128
316+ )
317+ clean_strength_input = gr .Slider (
318+ label = "Clean Strength" , minimum = 0.0 , maximum = 1.0 , step = 0.1 , value = 0.7
319+ )
320+ split_audio_input = gr .Checkbox (label = "Split Audio" , value = False )
321+ with gr .Row ():
322+ clean_audio_input = gr .Checkbox (label = "Clean Audio" , value = False )
323+ f0_autotune_input = gr .Checkbox (label = "F0 Autotune" , value = False )
324+ backing_vocal_infer_input = gr .Checkbox (label = "Infer Backing Vocals" , value = False )
325+ with gr .Row ():
326+ embedder_model_input = gr .Dropdown (
327+ label = "Embedder Model" ,
328+ choices = ["contentvec" , "chinese-hubert-base" , "japanese-hubert-base" , "korean-hubert-base" , "custom" ],
329+ value = "contentvec"
330+ )
331+ embedder_model_custom_input = gr .Textbox (
332+ label = "Custom Embedder Model" , value = ""
333+ )
334+ with gr .Row ():
335+ run_button = gr .Button ("Convert" )
336+ with gr .Row ():
337+ output_message = gr .Textbox (label = "Status" )
338+ output_audio = gr .Audio (label = "Final Mixed Audio" , type = "filepath" )
339+ with gr .Row ():
340+ output_lead = gr .Audio (label = "Output Lead Ai Cover" , type = "filepath" )
341+ output_backing = gr .Audio (label = "Output Backing Ai Cover" , type = "filepath" )
342+
343+ refresh_button .click (
344+ refresh_folders ,
345+ outputs = model_name_input
281346 )
282- f0_method_input = gr .Dropdown (
283- label = "F0 Method" ,
284- choices = ["crepe" , "crepe-tiny" , "rmvpe" , "fcpe" , "hybrid[rmvpe+fcpe]" ],
285- value = "hybrid[rmvpe+fcpe]"
347+ run_button .click (
348+ run_advanced_rvc ,
349+ inputs = [
350+ model_name_input , youtube_url_input , export_format_input , f0_method_input ,
351+ f0_up_key_input , filter_radius_input , rms_mix_rate_input , protect_input ,
352+ index_rate_input , hop_length_input , clean_strength_input , split_audio_input ,
353+ clean_audio_input , f0_autotune_input , backing_vocal_infer_input ,
354+ embedder_model_input , embedder_model_custom_input
355+ ],
356+ outputs = [output_message , output_audio , output_lead , output_backing ]
286357 )
287- with gr .Row ():
288- f0_up_key_input = gr .Slider (label = "F0 Up Key" , minimum = - 24 , maximum = 24 , step = 1 , value = 0 )
289- filter_radius_input = gr .Slider (label = "Filter Radius" , minimum = 0 , maximum = 10 , step = 1 , value = 3 )
290- rms_mix_rate_input = gr .Slider (label = "RMS Mix Rate" , minimum = 0.0 , maximum = 1.0 , step = 0.1 , value = 0.8 )
291- protect_input = gr .Slider (label = "Protect" , minimum = 0.0 , maximum = 0.5 , step = 0.1 , value = 0.5 )
292- with gr .Row ():
293- index_rate_input = gr .Slider (label = "Index Rate" , minimum = 0.0 , maximum = 1.0 , step = 0.1 , value = 0.6 )
294- hop_length_input = gr .Slider (label = "Hop Length" , minimum = 1 , maximum = 512 , step = 1 , value = 128 )
295- clean_strength_input = gr .Slider (label = "Clean Strength" , minimum = 0.0 , maximum = 1.0 , step = 0.1 , value = 0.7 )
296- split_audio_input = gr .Checkbox (label = "Split Audio" , value = False )
297- with gr .Row ():
298- clean_audio_input = gr .Checkbox (label = "Clean Audio" , value = False )
299- f0_autotune_input = gr .Checkbox (label = "F0 Autotune" , value = False )
300- backing_vocal_infer_input = gr .Checkbox (label = "Infer Backing Vocals" , value = False )
301- with gr .Row ():
302- embedder_model_input = gr .Dropdown (
303- label = "Embedder Model" ,
304- choices = ["contentvec" , "chinese-hubert-base" , "japanese-hubert-base" , "korean-hubert-base" , "custom" ],
305- value = "contentvec"
306- )
307- embedder_model_custom_input = gr .Textbox (label = "Custom Embedder Model" , value = "" )
308- with gr .Row ():
309- run_button = gr .Button ("Convert" )
310- with gr .Row ():
311- output_message = gr .Textbox (label = "Status" )
312- output_audio = gr .Audio (label = "Final Mixed Audio" , type = "filepath" )
313- with gr .Row ():
314- output_lead = gr .Audio (label = "Output Lead Ai Cover" , type = "filepath" )
315- output_backing = gr .Audio (label = "Output Backing Ai Cover" , type = "filepath" )
316-
317- refresh_button .click (
318- refresh_folders ,
319- outputs = model_name_input
320- )
321- run_button .click (
322- run_advanced_rvc ,
323- inputs = [
324- model_name_input , youtube_url_input , export_format_input , f0_method_input ,
325- f0_up_key_input , filter_radius_input , rms_mix_rate_input , protect_input ,
326- index_rate_input , hop_length_input , clean_strength_input , split_audio_input ,
327- clean_audio_input , f0_autotune_input , backing_vocal_infer_input ,
328- embedder_model_input , embedder_model_custom_input
329- ],
330- outputs = [output_message , output_audio , output_lead , output_backing ]
331- )
332358
0 commit comments