specialized-agents / static /indic-text-to-speech.html
pvanand's picture
Update static/indic-text-to-speech.html
f317957 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Indic Audio Tools</title>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/vue.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@sanskrit-coders/[email protected]/sanscript.min.js"></script>
<link href="https://unpkg.com/tailwindcss@^2/dist/tailwind.min.css" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.plyr.io/3.6.8/plyr.css" />
<script src="https://cdn.plyr.io/3.6.8/plyr.polyfilled.js"></script>
</head>
<body class="bg-gray-100">
<div id="app" class="container mx-auto px-4 py-8">
<h1 class="text-3xl font-bold text-center mb-8 text-indigo-600">Indic Audio Tools</h1>
<!-- Tabs -->
<div class="flex justify-center mb-6">
<div class="inline-flex rounded-md shadow-sm" role="group">
<button @click="activeTab = 'transcription'"
:class="{ 'bg-indigo-600 text-white': activeTab === 'transcription', 'bg-white text-indigo-600': activeTab !== 'transcription' }"
class="px-4 py-2 text-sm font-medium border border-indigo-600 rounded-l-lg focus:z-10 focus:ring-2 focus:ring-indigo-500 focus:text-indigo-100 transition-colors duration-200">
Audio Transcription
</button>
<button @click="activeTab = 'tts'"
:class="{ 'bg-indigo-600 text-white': activeTab === 'tts', 'bg-white text-indigo-600': activeTab !== 'tts' }"
class="px-4 py-2 text-sm font-medium border border-indigo-600 rounded-r-lg focus:z-10 focus:ring-2 focus:ring-indigo-500 focus:text-indigo-100 transition-colors duration-200">
Text-to-Speech
</button>
</div>
</div>
<!-- Transcription Tab Content -->
<div v-if="activeTab === 'transcription'" class="max-w-2xl mx-auto bg-white rounded-lg shadow-md p-6">
<!-- Transcription content here -->
<div class="mb-4">
<label for="audioFile" class="block text-sm font-medium text-gray-700 mb-2">Select Audio File</label>
<input type="file" id="audioFile" @change="handleFileChange" accept="audio/*" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500">
</div>
<div class="mb-4 flex items-center">
<button @click="toggleRecording" class="mr-4 px-4 py-2 bg-red-500 text-white rounded-md hover:bg-green-600 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500" :class="{ 'bg-gray-600 hover:bg-gray-700': isRecording }">
<i class="fas" :class="isRecording ? 'fa-stop' : 'fa-microphone'"></i>
{{ isRecording ? 'Stop Recording' : 'Record Audio' }}
</button>
<span v-if="isRecording" class="text-red-500">Recording... {{ recordingTime }}s</span>
<span v-else-if="audioBlob" class="text-green-600">Recording complete ({{ recordingTime }}s)</span>
</div>
<div class="flex justify-center" v-if="audioFile">
<button @click="transcribeAudio" class="px-4 py-2 bg-indigo-600 text-white rounded-md hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500" :disabled="isLoading">
<span v-if="!isLoading">Transcribe Uploaded Audio</span>
<span v-else class="flex items-center">
<svg class="animate-spin -ml-1 mr-3 h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
Transcribing...
</span>
</button>
</div>
<div v-if="transcription" class="mt-6 bg-gray-100 p-4 rounded-lg">
<h3 class="text-lg font-semibold text-gray-700 mb-2">Transcription Result</h3>
<p class="text-gray-600">{{ transcription }}</p>
</div>
</div>
<!-- Text-to-Speech Tab Content -->
<div v-if="activeTab === 'tts'" class="max-w-2xl mx-auto bg-white rounded-lg shadow-md p-6">
<!-- TTS content here -->
<div class="mb-4">
<label for="language" class="block text-sm font-medium text-gray-700 mb-2">Select Language</label>
<select id="language" v-model="selectedLanguage" @change="updateExample" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500">
<option v-for="lang in languages" :value="lang.code">{{ lang.name }}</option>
<option value="san">Sanskrit (Devanagari)</option>
</select>
</div>
<div class="mb-4">
<label for="inputText" class="block text-sm font-medium text-gray-700 mb-2">Enter Text</label>
<textarea id="inputText" v-model="inputText" rows="4" class="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500" :placeholder="currentExample"></textarea>
</div>
<div class="flex justify-center">
<button @click="generateSpeech" class="px-4 py-2 bg-indigo-600 text-white rounded-md hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500" :disabled="isLoading">
<span v-if="!isLoading">Generate Speech</span>
<span v-else class="flex items-center">
<svg class="animate-spin -ml-1 mr-3 h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
Generating...
</span>
</button>
</div>
<div v-if="audioUrl" class="mt-6 bg-gray-100 p-4 rounded-lg">
<div class="flex items-center justify-between mb-2">
<h3 class="text-lg font-semibold text-gray-700">Generated Audio</h3>
<button @click="saveAudio" class="px-3 py-1 bg-green-600 text-white text-sm rounded-md hover:bg-green-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-green-500">
Save Audio
</button>
</div>
<audio ref="audioPlayer" :src="audioUrl"></audio>
</div>
</div>
</div>
<script>
new Vue({
el: '#app',
data: {
activeTab: 'transcription',
// Transcription data
audioFile: null,
audioBlob: null,
transcription: '',
isLoading: false,
isRecording: false,
mediaRecorder: null,
audioChunks: [],
recordingTime: 0,
recordingInterval: null,
// TTS data
languages: [],
selectedLanguage: '',
inputText: '',
audioUrl: null,
player: null,
examples: {
hin: "भारत विविधता में एकता का देश है। यहाँ अनेक भाषाएँ, धर्म और संस्कृतियाँ एक साथ फलती-फूलती हैं।",
ben: "বাংলা সাহিত্যের ঐতিহ্য বিশ্বব্যাপী স্বীকৃত। রবীন্দ্রনাথ ঠাকুর এর একজন মহান প্রতিনিধি।",
mar: "महाराष्ट्र हे भारतातील एक प्रगत राज्य आहे. मुंबई ही या राज्याची राजधानी आणि आर्थिक केंद्र आहे।",
tel: "తెలుగు భాష మధురమైనది మరియు సంగీతాత్మకమైనది. ఆంధ్ర ప్రదేశ్ మరియు తెలంగాణ రాష్ట్రాల అధికార భాష ఇదే.",
tam: "தமிழ் மொழி மிகவும் பழமையான மொழிகளில் ஒன்றாகும். இது தென்னிந்தியாவில் பரவலாகப் பேசப்படுகிறது.",
guj: "ગુજરાત એ ભારતનું એક સમૃદ્ધ રાજ્ય છે. અહીં આધુનિકતા અને પરંપરા સાથે ચાલે છે.",
"urd-script_arabic": "اردو ایک خوبصورت زبان ہے جو پاکستان اور بھارت میں بولی جاتی ہے۔ اس کی شاعری دنیا بھر میں مشہور ہے۔",
kan: "ಕರ್ನಾಟಕ ರಾಜ್ಯವು ಅದ್ಭುತ ಸಂಸ್ಕೃತಿ ಮತ್ತು ಪ್ರಕೃತಿ ಸೌಂದರ್ಯಕ್ಕೆ ಹೆಸರುವಾಸಿಯಾಗಿದೆ. ಇದರ ರಾಜಧಾನಿ ಬೆಂಗಳೂರು ಭಾರತದ ತಂತ್ರಜ್ಞಾನ ಕೇಂದ್ರವಾಗಿದೆ.",
mal: "കേരളം ഇന്ത്യയുടെ തെക്കുപടിഞ്ഞാറൻ സംസ്ഥാനമാണ്. ഇവിടെ പ്രകൃതിയുടെ സൗന്ദര്യവും സമൃദ്ധമായ സംസ്കാരവും കാണാം.",
pan: "ਪੰਜਾਬ ਦੀ ਧਰਤੀ ਬਹੁਤ ਉਪਜਾਊ ਹੈ। ਇੱਥੇ ਦੀ ਖੇਤੀਬਾੜੀ ਅਤੇ ਸੱਭਿਆਚਾਰ ਪੂਰੇ ਭਾਰਤ ਵਿੱਚ ਪ੍ਰਸਿੱਧ ਹਨ।",
san: "संस्कृतं भारतस्य प्राचीनतमा भाषा अस्ति। इयं देवभाषा इति अपि कथ्यते। अस्याः साहित्यं विपुलं वैविध्यपूर्णं च विद्यते।"
},
currentExample: ''
},
mounted() {
this.fetchLanguages();
},
updated() {
this.$nextTick(() => {
if (this.audioUrl && this.$refs.audioPlayer) {
if (this.player) {
this.player.destroy();
}
this.player = new Plyr(this.$refs.audioPlayer, {
controls: ['play', 'progress', 'current-time', 'mute', 'volume']
});
}
});
},
methods: {
// Transcription methods
handleFileChange(event) {
this.audioFile = event.target.files[0];
this.audioBlob = null;
this.transcription = '';
},
async toggleRecording() {
if (this.isRecording) {
this.stopRecording();
} else {
await this.startRecording();
}
},
async startRecording() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.mediaRecorder = new MediaRecorder(stream);
this.audioChunks = [];
this.mediaRecorder.ondataavailable = (event) => {
this.audioChunks.push(event.data);
};
this.mediaRecorder.onstop = () => {
this.audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
this.audioFile = null;
this.transcribeAudio(); // Automatically transcribe after recording stops
};
this.mediaRecorder.start();
this.isRecording = true;
this.recordingTime = 0;
this.recordingInterval = setInterval(() => {
this.recordingTime++;
}, 1000);
} catch (error) {
console.error('Error accessing microphone:', error);
alert('Unable to access the microphone. Please make sure it\'s connected and you\'ve granted permission.');
}
},
stopRecording() {
if (this.mediaRecorder) {
this.mediaRecorder.stop();
this.isRecording = false;
clearInterval(this.recordingInterval);
}
},
transcribeAudio() {
if (!this.audioFile && !this.audioBlob) {
alert('Please select an audio file or record audio');
return;
}
this.isLoading = true;
const formData = new FormData();
if (this.audioFile) {
formData.append('file', this.audioFile);
} else {
formData.append('file', this.audioBlob, 'recorded_audio.wav');
}
axios.post('https://pvanand-audio-chat-indic.hf.space/transcribe/', formData, {
headers: {
'Content-Type': 'multipart/form-data'
}
})
.then(response => {
this.transcription = response.data.transcript;
this.isLoading = false;
})
.catch(error => {
console.error('Error transcribing audio:', error);
alert('Error transcribing audio. Please try again.');
this.isLoading = false;
});
},
// TTS methods
fetchLanguages() {
axios.get('https://pvanand-audio-chat-indic.hf.space/languages')
.then(response => {
this.languages = response.data;
if (this.languages.length > 0) {
this.selectedLanguage = this.languages[0].code;
this.updateExample();
}
})
.catch(error => console.error('Error fetching languages:', error));
},
updateExample() {
this.currentExample = this.examples[this.selectedLanguage] || '';
this.inputText = '';
this.audioUrl = null;
},
generateSpeech() {
this.isLoading = true;
let text = this.inputText.trim() || this.currentExample;
let lang = this.selectedLanguage;
if (this.selectedLanguage === 'san') {
if (typeof Sanscript !== 'undefined') {
text = Sanscript.t(text, 'devanagari', 'kannada');
lang = 'kan'; // Use Kannada endpoint for Sanskrit
} else {
console.error('Sanscript library not available');
alert('Sanskrit conversion is not available at the moment.');
this.isLoading = false;
return;
}
}
const url = `https://pvanand-audio-chat-indic.hf.space/tts?text=${encodeURIComponent(text)}&lang=${lang}`;
axios.get(url, { responseType: 'blob' })
.then(response => {
const blob = new Blob([response.data], { type: 'audio/mp3' });
this.audioUrl = URL.createObjectURL(blob);
this.isLoading = false;
})
.catch(error => {
console.error('Error generating speech:', error);
this.isLoading = false;
});
},
saveAudio() {
const link = document.createElement('a');
link.href = this.audioUrl;
link.download = 'generated_speech.mp3';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
});
</script>
</body>
</html>