Spaces:
Sleeping
Sleeping
Update static/multi-agent.html
Browse files- static/multi-agent.html +16 -0
static/multi-agent.html
CHANGED
@@ -436,8 +436,24 @@ img, video {
|
|
436 |
}
|
437 |
}
|
438 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
async function convertHtmlToFile(html, fileType, customFileName = '') {
|
|
|
440 |
const url = 'https://pvanand-web-scraping.hf.space/html_to_' + fileType;
|
|
|
|
|
441 |
function sanitizeFileName(name) {
|
442 |
return (name.replace(/[^a-z0-9\s]/gi, '')
|
443 |
.toLowerCase()
|
|
|
436 |
}
|
437 |
}
|
438 |
|
439 |
+
function extractCustomFileName(html) {
|
440 |
+
// Try to find the first h1-h4 tag content
|
441 |
+
const headingMatch = html.match(/<h[1-4][^>]*>(.*?)<\/h[1-4]>/i);
|
442 |
+
if (headingMatch) {
|
443 |
+
return headingMatch[1].trim();
|
444 |
+
}
|
445 |
+
|
446 |
+
// If no heading found, use the first 7 words
|
447 |
+
const textContent = html.replace(/<[^>]+>/g, ' ').trim();
|
448 |
+
const words = textContent.split(/\s+/);
|
449 |
+
return words.slice(0, 7).join(' ');
|
450 |
+
}
|
451 |
+
|
452 |
async function convertHtmlToFile(html, fileType, customFileName = '') {
|
453 |
+
|
454 |
const url = 'https://pvanand-web-scraping.hf.space/html_to_' + fileType;
|
455 |
+
customFileName = extractCustomFileName(html);
|
456 |
+
|
457 |
function sanitizeFileName(name) {
|
458 |
return (name.replace(/[^a-z0-9\s]/gi, '')
|
459 |
.toLowerCase()
|