Text-to-Speech
Transformers
Safetensors
parler_tts
text2text-generation
annotation
ylacombe commited on
Commit
c302b00
1 Parent(s): 36da796

Update README.md (#3)

Browse files

- Update README.md (66e3f72fc0637aeec2b5913a7f899ea5465ebe3c)

Files changed (1) hide show
  1. README.md +9 -9
README.md CHANGED
@@ -124,10 +124,10 @@ description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder.
124
  prompt = "Hey, how are you doing today?"
125
  description = "A female speaker with a British accent delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
126
 
127
- input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to(device)
128
- prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
129
 
130
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
131
  audio_arr = generation.cpu().numpy().squeeze()
132
  sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
133
  ```
@@ -163,10 +163,10 @@ description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder.
163
  prompt = "अरे, तुम आज कैसे हो?"
164
  description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
165
 
166
- input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to(device)
167
- prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
168
 
169
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
170
  audio_arr = generation.cpu().numpy().squeeze()
171
  sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
172
  ```
@@ -191,10 +191,10 @@ description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder.
191
  prompt = "अरे, तुम आज कैसे हो?"
192
  description = "Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."
193
 
194
- input_ids = description_tokenizer(description, return_tensors="pt").input_ids.to(device)
195
- prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
196
 
197
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
198
  audio_arr = generation.cpu().numpy().squeeze()
199
  sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
200
  ```
 
124
  prompt = "Hey, how are you doing today?"
125
  description = "A female speaker with a British accent delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
126
 
127
+ description_input_ids = description_tokenizer(description, return_tensors="pt").to(device)
128
+ prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
129
 
130
+ generation = model.generate(input_ids=description_input_ids.input_ids, attention_mask=description_input_ids.attention_mask, prompt_input_ids=prompt_input_ids.input_ids, prompt_attention_mask=prompt_input_ids.attention_mask)
131
  audio_arr = generation.cpu().numpy().squeeze()
132
  sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
133
  ```
 
163
  prompt = "अरे, तुम आज कैसे हो?"
164
  description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
165
 
166
+ description_input_ids = description_tokenizer(description, return_tensors="pt").to(device)
167
+ prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
168
 
169
+ generation = model.generate(input_ids=description_input_ids.input_ids, attention_mask=description_input_ids.attention_mask, prompt_input_ids=prompt_input_ids.input_ids, prompt_attention_mask=prompt_input_ids.attention_mask)
170
  audio_arr = generation.cpu().numpy().squeeze()
171
  sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
172
  ```
 
191
  prompt = "अरे, तुम आज कैसे हो?"
192
  description = "Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."
193
 
194
+ description_input_ids = description_tokenizer(description, return_tensors="pt").to(device)
195
+ prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
196
 
197
+ generation = model.generate(input_ids=description_input_ids.input_ids, attention_mask=description_input_ids.attention_mask, prompt_input_ids=prompt_input_ids.input_ids, prompt_attention_mask=prompt_input_ids.attention_mask)
198
  audio_arr = generation.cpu().numpy().squeeze()
199
  sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
200
  ```