Upload 12 files

Browse files

Files changed (12) hide show

1 +21 -0
2ocr.sh +32 -0
aidocs.py +155 -0
jpegdir.py +98 -0
random/index.html +100 -0
shove.sh +38 -0
showfiles +98 -0
skel.py +143 -0
summ +0 -0
summarize2 +415 -0
tetris32b.html +275 -0
vttclean.py +74 -0

1 ADDED Viewed

	@@ -0,0 +1,21 @@

+FAILURE: Build failed with an exception.
+* What went wrong:
+Directory '/Users/jim/work/hacks' does not contain a Gradle build.
+A Gradle build's root directory should contain one of the possible settings files: settings.gradle, settings.gradle.kts, settings.gradle.dcl.It may also contain one of the possible build files: build.gradle, build.gradle.kts, build.gradle.dcl.
+To create a new Gradle build in this directory run 'gradle init'
+For more information about the 'init' task, please refer to https://docs.gradle.org/8.12-rc-1/userguide/build_init_plugin.html in the Gradle documentation.
+For more details on creating a Gradle build, please refer to https://docs.gradle.org/8.12-rc-1/userguide/tutorial_using_tasks.html in the Gradle documentation.
+* Try:
+> Run gradle init to create a new Gradle build in this directory.
+> Run with --stacktrace option to get the stack trace.
+> Run with --info or --debug option to get more log output.
+> Get more help at https://help.gradle.org.
+BUILD FAILED in 413ms

2ocr.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/bin/bash
+# Directory containing TIFF files
+INPUT_DIR="atreatiseonlawp00chitgoog_tif"
+OUTPUT_PDF="output_searchable.pdf"
+TEMP_DIR="temp_ocr"
+# Create a temporary directory to store processed files
+mkdir -p "$TEMP_DIR"
+# Process each TIFF file
+for file in "$INPUT_DIR"/*.tif; do
+  # Extract the filename without extension
+  filename=$(basename "$file" .tif)
+  # Run Tesseract on each file and output a PDF for each page
+  tesseract "$file" "$TEMP_DIR/$filename" -l eng pdf
+done
+# Combine all individual page PDFs into a single PDF
+if command -v pdfunite >/dev/null 2>&1; then
+  # If pdfunite is available (from poppler-utils), use it
+  pdfunite "$TEMP_DIR"/*.pdf "$OUTPUT_PDF"
+else
+  # Fallback to using ImageMagick's `convert` if `pdfunite` isn't available
+  convert "$TEMP_DIR"/*.pdf "$OUTPUT_PDF"
+fi
+# Clean up temporary directory
+rm -r "$TEMP_DIR"
+echo "Searchable PDF created as $OUTPUT_PDF"

aidocs.py ADDED Viewed

	@@ -0,0 +1,155 @@

+from dataclasses import dataclass, field
+from typing import Optional, List, Dict, Set, Literal, Tuple, NamedTuple, Union
+from pathlib import Path
+import re
+import fnmatch
+import glob
+from itertools import chain
+@dataclass
+class PathPattern:
+    """Represents either a direct mapping or a wildcard pattern."""
+    pattern: str
+    target_template: Optional[str] = None
+    @classmethod
+    def parse(cls, spec: str) -> 'PathPattern':
+        """Parse path specification into pattern and optional target."""
+        if ':' in spec:
+            source, target = spec.split(':', 1)
+            return cls(source, target)
+        return cls(spec)
+    def resolve(self, root_dir: Path) -> List[PathMapping]:
+        """Resolve pattern into concrete path mappings."""
+        if self.target_template is not None:
+            # Direct mapping case
+            return [PathMapping(Path(self.pattern), Path(self.target_template))]
+        # Wildcard pattern case
+        matches = []
+        for path in glob.glob(self.pattern, recursive=True):
+            source = Path(path)
+            if source.is_file():
+                # For files, maintain relative structure
+                relative = source.relative_to(root_dir) if root_dir in source.parents else source
+                matches.append(PathMapping(source, relative))
+        return matches
+    def validate(self) -> None:
+        """Validate pattern constraints."""
+        if self.target_template:
+            # Check for path traversal in target
+            if '..' in self.target_template:
+                raise ValueError(f"Target path '{self.target_template}' cannot contain '..'")
+            # Normalize path separators
+            if '\\' in self.target_template:
+                raise ValueError(f"Target path must use forward slashes")
+        # Validate wildcard pattern
+        if any(c in self.pattern for c in '<>|"'):
+            raise ValueError(f"Invalid characters in pattern: {self.pattern}")
+class WikiTransformer:
+    def __init__(self, size_limit: 'SizeSpec', output_dir: Path,
+                 merge_strategy: MergeStrategy,
+                 debug: bool = False):
+        self.validator = SizeValidator(size_limit)
+        self.output_dir = output_dir
+        self.merge_strategy = merge_strategy
+        self.debug = debug
+        self.console = Console()
+        self.log = self._setup_logging()
+        self.processed_inodes: Set[int] = set()
+        self.root_dir = Path.cwd()
+    async def resolve_patterns(self, patterns: List[str]) -> List[PathMapping]:
+        """Resolve all patterns into concrete mappings."""
+        mappings = []
+        for spec in patterns:
+            try:
+                pattern = PathPattern.parse(spec)
+                pattern.validate()
+                resolved = pattern.resolve(self.root_dir)
+                if not resolved:
+                    self.log.warning(f"Pattern '{spec}' matched no files")
+                mappings.extend(resolved)
+            except ValueError as e:
+                self.log.error(f"Invalid pattern '{spec}': {e}")
+                continue
+        return mappings
+    async def transform(self, patterns: List[str]):
+        """Transform source trees based on patterns and mappings."""
+        mappings = await self.resolve_patterns(patterns)
+        if not mappings:
+            raise ValueError("No valid paths matched the specified patterns")
+        if not self.merge_strategy.validate_target(self.output_dir):
+            raise ValueError(
+                f"Target filesystem doesn't support {self.merge_strategy.link_type} links"
+            )
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        with Progress() as progress:
+            task = progress.add_task(
+                "[green]Processing files...",
+                total=len(mappings)
+            )
+            for mapping in mappings:
+                try:
+                    await self.process_mapping(mapping)
+                    progress.update(task, advance=1)
+                except Exception as e:
+                    self.log.error(f"Failed to process {mapping}: {e}")
+@click.command()
+@click.argument('patterns', nargs=-1, required=True,
+                help="Path patterns (e.g., 'src:docs/api' or '**/*.md')")
+@click.option('-l', '--limit', type=SIZE, default='1M',
+              help='Per-document size limit (e.g., 500K, 2M, 1G)')
+@click.option('-d', '--debug', is_flag=True, help='Enable debug logging')
+@click.option('-o', '--output-dir', type=click.Path(), default='wiki',
+              help='Output directory')
+@click.option('--link-type', type=click.Choice(['symlink', 'hardlink', 'copy']),
+              default='symlink', help='File linking strategy')
+@click.option('--follow-links/--no-follow-links', default=False,
+              help='Follow symbolic links during traversal')
+def main(patterns: List[str], limit: SizeSpec, debug: bool,
+         output_dir: str, link_type: str, follow_links: bool):
+    """Transform files into wiki structure using patterns or mappings.
+    PATTERNS can be either:
+    1. Colon-separated mappings: 'source:target'
+    2. Wildcard patterns: '**/*.md', 'docs/**/*.rst'
+    Examples:
+        # Explicit mapping
+        wiki_transform.py src/api:docs/api docs/intro:guide/start
+        # Wildcard patterns
+        wiki_transform.py '**/*.md' 'docs/**/*.rst'
+        # Mixed usage
+        wiki_transform.py src:api '**/*.md' 'legacy:archive'
+    """
+    strategy = MergeStrategy(
+        link_type=None if link_type == 'copy' else link_type,
+        follow_links=follow_links
+    )
+    transformer = WikiTransformer(
+        size_limit=limit,
+        output_dir=Path(output_dir),
+        merge_strategy=strategy,
+        debug=debug
+    )
+    asyncio.run(transformer.transform(patterns))
+if __name__ == '__main__':
+    main()

jpegdir.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+from PIL import Image
+import pytesseract
+from pathlib import Path
+import json
+from typing import Dict, List
+from concurrent.futures import ProcessPoolExecutor
+import multiprocessing
+def process_image(args) -> tuple:
+    """
+    Process a single image file.
+    Args:
+        args: Tuple of (filename, input_dir, output_dir)
+    Returns:
+        Tuple of (filename, extracted_text)
+    """
+    filename, input_dir, output_dir = args
+    try:
+        # Full path to image
+        image_path = os.path.join(input_dir, filename)
+        # Open and process image
+        with Image.open(image_path) as img:
+            # Extract text using pytesseract
+            text = pytesseract.image_to_string(img)
+            # Save individual text file
+            text_filename = Path(filename).stem + '.txt'
+            text_path = os.path.join(output_dir, text_filename)
+            with open(text_path, 'w', encoding='utf-8') as f:
+                f.write(text)
+        print(f"Processed: {filename}")
+        return filename, text
+    except Exception as e:
+        print(f"Error processing {filename}: {str(e)}")
+        return filename, f"ERROR: {str(e)}"
+def process_directory(input_dir: str, output_dir: str, max_workers: int = None) -> Dict[str, str]:
+    """
+    Process all JPEG files in a directory and perform OCR using multiple processes.
+    Args:
+        input_dir: Directory containing JPEG files
+        output_dir: Directory to save OCR results
+        max_workers: Maximum number of worker processes (defaults to CPU count)
+    Returns:
+        Dictionary mapping filenames to extracted text
+    """
+    # Create output directory if it doesn't exist
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    # If max_workers not specified, use CPU count
+    if max_workers is None:
+        max_workers = multiprocessing.cpu_count()
+    # Supported image extensions
+    valid_extensions = {'.jpg', '.jpeg', '.JPG', '.JPEG'}
+    # Get list of valid image files
+    image_files = [
+        f for f in os.listdir(input_dir)
+        if Path(f).suffix in valid_extensions
+    ]
+    # Prepare arguments for worker processes
+    work_args = [(f, input_dir, output_dir) for f in image_files]
+    # Process files concurrently
+    results = {}
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        for filename, text in executor.map(process_image, work_args):
+            results[filename] = text
+    # Save consolidated results to JSON
+    json_path = os.path.join(output_dir, 'ocr_results.json')
+    with open(json_path, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+    return results
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Perform OCR on all JPEG files in a directory')
+    parser.add_argument('input_dir', help='Input directory containing JPEG files')
+    parser.add_argument('output_dir', help='Output directory for OCR results')
+    parser.add_argument('--workers', type=int, help='Number of worker processes (default: CPU count)',
+                      default=None)
+    args = parser.parse_args()
+    results = process_directory(args.input_dir, args.output_dir, args.workers)
+    print(f"\nProcessed {len(results)} files. Results saved to {args.output_dir}")

random/index.html ADDED Viewed

	@@ -0,0 +1,100 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Force-Directed Voronoi Diagram</title>
+  <script src="https://d3js.org/d3.v7.min.js"></script>
+  <style>
+    body {
+        margin: 0;
+        overflow: hidden;
+    }
+    svg {
+        display: block;
+    }
+  </style>
+</head>
+<body>
+<script>
+  // Dimensions
+  const width = window.innerWidth;
+  const height = window.innerHeight;
+  // Create SVG container
+  const svg = d3.select("body")
+      .append("svg")
+      .attr("width", width)
+      .attr("height", height);
+  // Random initial dataset
+  let data = d3.range(20).map(() => ({
+      x: Math.random() * width,
+      y: Math.random() * height,
+      value: Math.random()
+  }));
+  // Force simulation
+  const simulation = d3.forceSimulation(data)
+      .force("x", d3.forceX(d => d.x).strength(0.5))
+      .force("y", d3.forceY(d => d.y).strength(0.5))
+      .force("collide", d3.forceCollide(50))
+      .on("tick", update);
+  // Voronoi diagram generator
+  const voronoi = d3.voronoi()
+      .x(d => d.x)
+      .y(d => d.y)
+      .extent([[0, 0], [width, height]]);
+  // Group for Voronoi cells
+  const voronoiGroup = svg.append("g");
+  // Group for circles
+  const circleGroup = svg.append("g");
+  function update() {
+      const diagram = voronoi(data);
+      // Update Voronoi cells
+      const cells = voronoiGroup.selectAll("path")
+          .data(data);
+      cells.enter()
+          .append("path")
+          .merge(cells)
+          .attr("d", (d, i) => diagram.renderCell(i))
+          .attr("fill", d => d3.interpolateRainbow(d.value))
+          .attr("stroke", "#000");
+      cells.exit().remove();
+      // Update circles
+      const circles = circleGroup.selectAll("circle")
+          .data(data);
+      circles.enter()
+          .append("circle")
+          .merge(circles)
+          .attr("r", 5)
+          .attr("fill", "black")
+          .attr("cx", d => d.x)
+          .attr("cy", d => d.y);
+      circles.exit().remove();
+  }
+  // Add a new data point every 2 seconds
+  setInterval(() => {
+      data.push({
+          x: Math.random() * width,
+          y: Math.random() * height,
+          value: Math.random()
+      });
+      simulation.nodes(data);
+      simulation.alpha(1).restart();
+  }, 2000);
+</script>
+</body>
+</html>

shove.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/bin/bash
+# Variables
+BATCH_SIZE=30
+COUNTER=0
+OUTPUT_DIR="batches"
+RESULTS_FILE="ocr_results.txt"
+# Create output directory if not exists
+mkdir -p "$OUTPUT_DIR"
+# Clear results file
+: > "$RESULTS_FILE"
+# Loop through PNG files in batches of $BATCH_SIZE
+for FILE in output-*.png; do
+    # Add file to batch array
+    BATCH_FILES+=("$FILE")
+    COUNTER=$((COUNTER + 1))
+    # Process batch when size is reached or on the last file
+    if (( COUNTER % BATCH_SIZE == 0 || COUNTER == $(ls output-*.png | wc -l) )); then
+        # Create batch file name
+        BATCH_NAME="${OUTPUT_DIR}/batch_$((COUNTER / BATCH_SIZE)).png"
+        # Use ffmpeg to concatenate files vertically
+        ffmpeg -y -i "concat:$(printf '%s|' "${BATCH_FILES[@]}" | sed 's/|$//')" -vf vstack "$BATCH_NAME"
+        # Run easyocr on the concatenated image
+        echo "Processing $BATCH_NAME..."
+        easyocr -l en -f "$BATCH_NAME" --gpu True >> "$RESULTS_FILE"
+        # Reset batch files array
+        BATCH_FILES=()
+    fi
+done
+echo "OCR processing complete. Results saved to $RESULTS_FILE."

showfiles ADDED Viewed

	@@ -0,0 +1,98 @@

+#!/bin/bash
+# Mission Statement:
+# This script displays the contents of specified files with formatted headers.
+# It provides optional file size limits through the -k flag (specified in KB).
+# Without the -k flag, files are shown in their entirety.
+# With -k flag, files larger than the specified size are truncated with a warning.
+# The script handles both Linux and MacOS systems using compatible stat commands.
+# Color output is available via the -c flag for better visual organization.
+# ANSI color codes
+BLUE='\033[0;34m'
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+usage() {
+    echo "Usage: $(basename $0) [-k size_in_kb] [-c] files..."
+    echo "  -k: Maximum file size in KB (optional)"
+    echo "  -c: Enable color output"
+    exit 1
+}
+# Initialize variables
+COLOR=false
+MAX_SIZE_KB=""
+# Parse command line options
+while getopts "k:c" opt; do
+    case $opt in
+        k) MAX_SIZE_KB="$OPTARG";;
+        c) COLOR=true;;
+        ?) usage;;
+    esac
+done
+# Shift past the options
+shift $((OPTIND-1))
+# Check if any files were specified
+if [ $# -eq 0 ]; then
+    usage
+fi
+# Get file size in KB (compatible with both Linux and MacOS)
+get_file_size() {
+    if [[ "$OSTYPE" == "darwin"* ]]; then
+        stat -f %z "$1"
+    else
+        stat --format=%s "$1"
+    fi
+}
+# Format and display file header
+show_header() {
+    local file="$1"
+    local size_bytes=$(get_file_size "$file")
+    local size_kb=$((size_bytes / 1024))
+    if $COLOR; then
+        echo -e "\n${BLUE}=== File: ${GREEN}$file${BLUE} (${size_kb}KB) ===${NC}"
+    else
+        echo -e "\n=== File: $file (${size_kb}KB) ==="
+    fi
+}
+# Process each file
+for file in "$@"; do
+    if [ ! -f "$file" ]; then
+        if $COLOR; then
+            echo -e "${RED}Error: '$file' does not exist or is not a regular file${NC}" >&2
+        else
+            echo "Error: '$file' does not exist or is not a regular file" >&2
+        fi
+        continue
+    fi
+    show_header "$file"
+    if [ -n "$MAX_SIZE_KB" ]; then
+        size_bytes=$(get_file_size "$file")
+        size_kb=$((size_bytes / 1024))
+        if [ $size_kb -gt $MAX_SIZE_KB ]; then
+            if $COLOR; then
+                echo -e "${RED}File size ($size_kb KB) exceeds limit ($MAX_SIZE_KB KB). Showing first $MAX_SIZE_KB KB:${NC}"
+            else
+                echo "File size ($size_kb KB) exceeds limit ($MAX_SIZE_KB KB). Showing first $MAX_SIZE_KB KB:"
+            fi
+            head -c $((MAX_SIZE_KB * 1024)) "$file"
+            echo -e "\n[Truncated...]"
+        else
+            cat "$file"
+        fi
+    else
+        cat "$file"
+    fi
+done

skel.py ADDED Viewed

	@@ -0,0 +1,143 @@

+#!python3
+import unittest
+from pathlib import Path
+import tempfile
+import os
+class TestSkeletonMapper(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.mkdtemp()
+        self.patterns = create_language_patterns()
+    def create_test_file(self, content: str, extension: str) -> str:
+        path = Path(self.temp_dir) / f"test{extension}"
+        path.write_text(content)
+        return str(path)
+    def test_kotlin_edge_cases(self):
+        kotlin_code = '''
+        @DslMarker
+        annotation class NioProxyDsl
+        interface EnhancedNioProxy<T : Any> {
+            val original: T
+            fun verifyIdentity(): Boolean = enhanced.equals(original)
+        }
+        class ProxyContext {
+            private val _events = MutableSharedFlow<ProxyEvent>()
+        }
+        '''
+        file_path = self.create_test_file(kotlin_code, ".kt")
+        results = extract_skeleton(file_path, self.patterns)
+        # BUG 1: Missing generic type parameters in class/interface detection
+        self.assertIn("interface EnhancedNioProxy<T : Any>", results['interface'])
+        # BUG 2: Property detection fails with initialization
+        self.assertIn("val original: T", results['property'])
+        # BUG 3: Annotation detection drops parameters
+        self.assertIn("@DslMarker", results['annotation'])
+def fix_kotlin_patterns():
+    return {
+        'class': r'^\s*(?:data\s+)?class\s+(\w+)(?:<[^>]+>)?',
+        'function': r'^\s*fun\s+(\w+)(?:<[^>]+>)?',
+        'property': r'^\s*(?:var|val)\s+(\w+)(?:\s*:\s*[^=]+)?(?:\s*=.+)?',
+        'interface': r'^\s*interface\s+(\w+)(?:<[^>]+>)?',
+        'annotation': r'^\s*@(\w+)(?:\s*[\w\s.()]+)?',
+        'suspend': r'^\s*suspend\s+fun\s+\w+',
+    }
+# Critical fixes for main implementation
+def patch_implementation():
+    """
+    Critical patches for identified issues
+    """
+    # 1. Fix subprocess handling for large files
+    def safe_grep(cmd: str, timeout: int = 30) -> str:
+        try:
+            return subprocess.run(
+                cmd,
+                shell=True,
+                text=True,
+                capture_output=True,
+                timeout=timeout
+            ).stdout
+        except subprocess.TimeoutExpired:
+            return ""
+    # 2. Fix pattern escaping in grep command
+    def escape_grep_pattern(pattern: str) -> str:
+        return pattern.replace('(', '\\(').replace(')', '\\)')
+    # 3. Add file encoding handling
+    def read_file_safe(file_path: str) -> str:
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except UnicodeDecodeError:
+            try:
+                with open(file_path, 'r', encoding='latin-1') as f:
+                    return f.read()
+            except Exception:
+                return ""
+    return safe_grep, escape_grep_pattern, read_file_safe
+# Shell script fixes
+def generate_fixed_shell_script():
+    return '''
+    #!/bin/bash
+    # Fixed file handling
+    while IFS= read -r -d '' file; do
+        if [[ ! -f "$file" ]]; then
+            continue
+        fi
+        # Handle filenames with spaces and special chars
+        file_ext="${file##*.}"
+        file_name=$(printf '%q' "$file")
+        # Prevent grep pattern injection
+        safe_grep() {
+            local pattern=$1
+            local file=$2
+            grep -E "^[[:space:]]*${pattern}" "$file" 2>/dev/null || true
+        }
+        case "$file_ext" in
+            kt|kts)
+                safe_grep "(@\\w+|class\\s+\\w+|interface\\s+\\w+|fun\\s+\\w+)" "$file_name"
+                ;;
+            # ... other extensions
+        esac
+    done < <(find . -type f -print0)
+    '''
+# Runtime monitoring hooks
+def add_monitoring():
+    import time
+    import psutil
+    def monitor_execution(func):
+        def wrapper(*args, **kwargs):
+            start = time.time()
+            process = psutil.Process()
+            mem_before = process.memory_info().rss
+            result = func(*args, **kwargs)
+            elapsed = time.time() - start
+            mem_after = process.memory_info().rss
+            mem_delta = mem_after - mem_before
+            if elapsed > 5.0 or mem_delta > 100*1024*1024:  # 100MB
+                print(f"Warning: High resource usage in {func.__name__}")
+            return result
+        return wrapper
+    return monitor_execution

summ ADDED Viewed

File without changes

summarize2 ADDED Viewed

	@@ -0,0 +1,415 @@

+#!/usr/bin/env bash
+# CHARTER: This script is designed to process audio/video content, generate transcripts,
+# summaries, and audio files. The following tasks are defined and must not be altered by any LLM:
+# 1. Download content if a URL is provided
+# 2. Generate or use existing VTT transcript
+# 3. Clean the VTT transcript
+# 4. Generate a summary of the transcript
+# 5. Create a 16k VBR Opus OGG file for audio tracks (unless audio download is disabled)
+# 6. Output both the cleaned VTT text and the summary
+# 7. Exclude the WAV file from the results
+# 8. Include the OGG file in the results only if both WAV and OGG were created
+# This charter is unalterable and defines the core functionality of the script.
+# Configuration (adjust these paths)
+WHISPCC="$HOME/work/whisper.cpp" # ./main to run ; ./models for models
+MODEL_PATH="$WHISPCC/models/ggml-small.en-tdrz.bin"
+OUTPUT_DIR="$HOME/processed_audio"
+CACHE_DIR="/tmp/summarize_cache"
+OLLAMA_MODEL="llama3.1:latest"
+OLLAMA_MODEL="deepseek-coder-v2:16b"
+# Prompts for different segments
+FIRST_PROMPT="Summarize this beginning part of a transcript in one sentence, then provide bullet points with timestamps (00:00:00 sentence)."
+MIDDLE_PROMPT="Summarize the key points of this part of the transcript in bullet points with timestamps (00:00:00 sentence)."
+LAST_PROMPT="Summarize the main takeaways of this final part of the transcript in bullet points with timestamps (00:00:00 sentence)."
+# Global variable to track job queue
+JOB_QUEUE=()
+# Ensure output and cache directories exist
+mkdir -p "$OUTPUT_DIR" "$CACHE_DIR"
+# Parse command line options
+USE_FABRIC=false
+DISABLE_AUDIO=false
+DURATION=""
+while getopts "fnad:" opt; do
+  case $opt in
+    f)
+      USE_FABRIC=true
+      ;;
+    n)
+      DISABLE_AUDIO=true
+      ;;
+    a)
+      DISABLE_AUDIO=false
+      ;;
+    d)
+      DURATION="$OPTARG"
+      ;;
+    \?)
+      echo "Invalid option: -$OPTARG" >&2
+      exit 1
+      ;;
+  esac
+done
+shift $((OPTIND-1))
+# Function to get MD5 hash of a file
+get_md5() {
+    md5sum "$1" | cut -d' ' -f1
+}
+# Function to cache a file using hardlinks (atomic)
+cache_file() {
+    local INPUT_FILE="$1"
+    local EXTENSION="$2"
+    # Check if the input file exists and is not empty
+    if [ ! -s "$INPUT_FILE" ]; then
+        echo "Error: Input file is empty or does not exist." >&2
+        return 1
+    fi
+    local MD5=$(get_md5 "$INPUT_FILE")
+    local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}"
+    local SAFE_FILENAME=$(echo "$INPUT_FILE" | sed 's/[^a-zA-Z0-9._-]/_/g')
+    local CACHE_FILE="$CACHE_SUBDIR/${MD5}_${SAFE_FILENAME}${EXTENSION}"
+    echo "Cache operation: MD5 sum = $MD5" >&2
+    echo "Cache file: $CACHE_FILE" >&2
+    # Create cache subdirectory if it doesn't exist
+    if ! mkdir -p "$CACHE_SUBDIR"; then
+        echo "Error: Failed to create cache subdirectory." >&2
+        return 1
+    fi
+    # Attempt to create the hardlink
+    if ln -f "$INPUT_FILE" "$CACHE_FILE"; then
+        echo "Cache file created: $CACHE_FILE" >&2
+        echo "$CACHE_FILE"
+        return 0
+    else
+        echo "Error: Failed to create cache file." >&2
+        return 1
+    fi
+}
+# Function to sanitize a string for use as a filename
+sanitize_filename() {
+    local STRING="$1"
+    echo "$STRING" | iconv -c -t ascii//translit | sed 's/[^A-Za-z0-9._-]/_/g' | tr '[:upper:]' '[:lower:]'
+}
+# Function to clean text from a VTT file
+clean_text() {
+    sed 's/<[^>]*>//g' | tr -s ' ' | sed 's/^[ \t]*//;s/[ \t]*$//'
+}
+# Function to summarize a segment of text
+summarize_segment() {
+    local SEGMENT_TEXT="$1"
+    local PROMPT="$2"
+    local SUMMARY_OUTPUT=""
+    # Count the number of lines in the input
+    local LINE_COUNT=$(echo "$SEGMENT_TEXT" | wc -l)
+    # If the input has less than 12 lines, remove cache and return a simple response
+    if [ "$LINE_COUNT" -lt 12 ]; then
+        local MD5=$(echo "$SEGMENT_TEXT" | md5sum | cut -d' ' -f1)
+        local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}"
+        rm -f "$CACHE_SUBDIR/$MD5"*
+        echo "The input is too short for meaningful summarization. Cache entry removed. Here's the original text:"
+        echo "$SEGMENT_TEXT"
+        return 0
+    fi
+    if $USE_FABRIC; then
+        SUMMARY_OUTPUT=$(fabric -p summarize "$SEGMENT_TEXT" 2>&1)
+    else
+        # Use ollama for summarization
+        SUMMARY_OUTPUT=$(ollama run "$OLLAMA_MODEL" "$PROMPT" "$SEGMENT_TEXT" 2>&1)
+    fi
+    if [ $? -ne 0 ]; then
+        echo "Error in summarization: $SUMMARY_OUTPUT" >&2
+        return 1
+    fi
+    echo "$SUMMARY_OUTPUT"
+}
+# Function to add a job to the queue
+add_job() {
+    JOB_QUEUE+=("$@")
+}
+# Function to update the progress bar for a job
+update_job_progress() {
+    local JOB_INDEX="$1"
+    local TOTAL_STEPS="$2"
+    local CURRENT_STEP="$3"
+    local JOB_MESSAGE="$4"
+    # ... (Implementation for updating the TUI progress bar)
+    # You can use a library like 'whiptail' or 'dialog' for TUI elements
+    # Example using echo for now:
+    echo "Job $((JOB_INDEX+1))/$JOB_COUNT: $JOB_MESSAGE ($CURRENT_STEP/$TOTAL_STEPS)"
+}
+# Function to process the job queue
+process_job_queue() {
+    local JOB_COUNT=${#JOB_QUEUE[@]}
+    echo "Processing job queue ($JOB_COUNT jobs)..."
+     for (( i=0; i<JOB_COUNT; i++ )); do
+        # Remove update_job_progress calls
+        eval "${JOB_QUEUE[$i]}"
+    done
+}
+# Function to process a single segment
+process_segment() {
+    local SEGMENT_TEXT="$1"
+    local PROMPT="$2"
+    local OUTPUT_FILE="$3"
+    local SUMMARY_OUTPUT=""
+    # Count the number of lines in the input
+    local LINE_COUNT=$(echo "$SEGMENT_TEXT" | wc -l)
+    # If the input has less than 12 lines, remove cache and return a simple response
+    if [ "$LINE_COUNT" -lt 12 ]; then
+        local MD5=$(echo "$SEGMENT_TEXT" | md5sum | cut -d' ' -f1)
+        local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}"
+        rm -f "$CACHE_SUBDIR/$MD5"*
+        echo "The input is too short for meaningful summarization. Cache entry removed. Here's the original text:"
+        echo "$SEGMENT_TEXT" > "$OUTPUT_FILE"
+        return 0
+    fi
+    if $USE_FABRIC; then
+        SUMMARY_OUTPUT=$(fabric -p summarize "$SEGMENT_TEXT" 2>&1)
+    else
+        # Use ollama for summarization
+        SUMMARY_OUTPUT=$(ollama run "$OLLAMA_MODEL" "$PROMPT" "$SEGMENT_TEXT" 2>&1)
+    fi
+    if [ $? -ne 0 ]; then
+        echo "Error in summarization: $SUMMARY_OUTPUT" >&2
+        return 1
+    fi
+    # Write the summary to the specified output file
+    echo "$SUMMARY_OUTPUT" > "$OUTPUT_FILE"
+}
+# Function to process a VTT file (generate summary and handle versioning)
+process_vtt() {
+    local VTT_FILE=$1
+    local URL=$2
+    local TEMP_DIR=$(mktemp -d)
+    local BASE_NAME="${TEMP_DIR}/temp" # Temporary base name
+    local CLEANED_TRANSCRIPT="${BASE_NAME}_cleaned.txt"
+    local SUMMARY_FILE="${OUTPUT_DIR}/$(basename "$VTT_FILE" .vtt)_summary.txt"
+    echo "Processing VTT file: $VTT_FILE"
+    # Clean the VTT transcript
+    if ! python3 "$(dirname "$0")/vttclean.py" "$VTT_FILE" > "$CLEANED_TRANSCRIPT" 2>"${CLEANED_TRANSCRIPT}.error"; then
+        echo "Error: Failed to clean the VTT file. Error log:" >&2
+        cat "${CLEANED_TRANSCRIPT}.error" >&2
+        exit 1
+    fi
+    # Check if the cleaned transcript is empty
+    if [ ! -s "$CLEANED_TRANSCRIPT" ]; then
+        echo "Error: Cleaned transcript is empty." >&2
+        exit 1
+    fi
+    # Generate summary
+    echo "Summarizing transcript..."
+    local TOTAL_LINES=$(wc -l < "$CLEANED_TRANSCRIPT")
+    local SEGMENT_SIZE=$((TOTAL_LINES / 3))
+    local FIRST_SEGMENT=$(head -n $SEGMENT_SIZE "$CLEANED_TRANSCRIPT")
+    local MIDDLE_SEGMENT=$(sed -n "$((SEGMENT_SIZE + 1)),$((2 * SEGMENT_SIZE))p" "$CLEANED_TRANSCRIPT")
+    local LAST_SEGMENT=$(tail -n $SEGMENT_SIZE "$CLEANED_TRANSCRIPT")
+    {
+        echo "Generating summary for first segment..."
+        if $USE_FABRIC; then
+            fabric -p summarize "$FIRST_SEGMENT"
+        else
+            ollama run "$OLLAMA_MODEL" "$FIRST_PROMPT" "$FIRST_SEGMENT"
+        fi
+        echo "Generating summary for middle segment..."
+        if $USE_FABRIC; then
+            fabric -p summarize "$MIDDLE_SEGMENT"
+        else
+            ollama run "$OLLAMA_MODEL" "$MIDDLE_PROMPT" "$MIDDLE_SEGMENT"
+        fi
+        echo "Generating summary for last segment..."
+        if $USE_FABRIC; then
+            fabric -p summarize "$LAST_SEGMENT"
+        else
+            ollama run "$OLLAMA_MODEL" "$LAST_PROMPT" "$LAST_SEGMENT"
+        fi
+    } > "$SUMMARY_FILE"
+    if [ ! -s "$SUMMARY_FILE" ]; then
+        echo "Error: Summary generation failed." >&2
+        exit 1
+    fi
+    echo "Summarization complete."
+    # Display the content of the summary file
+    echo "Summary content:"
+    echo "----------------------------------------"
+    cat "$SUMMARY_FILE"
+    echo "----------------------------------------"
+    # Clean up
+    rm -rf "$TEMP_DIR"
+}
+# Function to calculate the time difference between two timestamps in HH:MM:SS format
+time_difference() {
+    local TIME1="$1"  # Format: HH:MM:SS
+    local TIME2="$2"  # Format: HH:MM:SS
+    # Extract hours, minutes, and seconds from timestamps
+    local TIME1_HOUR=$(echo "$TIME1" | cut -d: -f1)
+    local TIME1_MINUTE=$(echo "$TIME1" | cut -d: -f2)
+    local TIME1_SECOND=$(echo "$TIME1" | cut -d: -f3)
+    local TIME2_HOUR=$(echo "$TIME2" | cut -d: -f1)
+    local TIME2_MINUTE=$(echo "$TIME2" | cut -d: -f2)
+    local TIME2_SECOND=$(echo "$TIME2" | cut -d: -f3)
+    # Calculate total seconds for each timestamp
+    local TIME1_TOTAL_SECONDS=$((TIME1_HOUR * 3600 + TIME1_MINUTE * 60 + TIME1_SECOND))
+    local TIME2_TOTAL_SECONDS=$((TIME2_HOUR * 3600 + TIME2_MINUTE * 60 + TIME2_SECOND))
+    # Calculate the difference in seconds
+    local DIFF_SECONDS=$((TIME1_TOTAL_SECONDS - TIME2_TOTAL_SECONDS))
+    # Return the difference (could be negative if TIME2 is later than TIME1)
+    echo "$DIFF_SECONDS"
+}
+# Main script logic
+if [ $# -eq 0 ]; then
+    echo "Error: No input provided. Please provide a valid URL, VTT file, or a local audio file."
+    exit 1
+fi
+if [[ "$1" == *.vtt ]]; then
+    echo "Processing as VTT file..."
+    add_job "process_vtt \"$1\" \"$1\""
+elif [[ "$1" == *"http"* ]]; then
+    echo "Processing as YouTube URL..."
+    # Extract the video title
+    VIDEO_TITLE=$(yt-dlp --get-title "$1")
+    FINAL_BASE_NAME=$(sanitize_filename "$VIDEO_TITLE")
+    # Attempt to download subtitles first
+    yt-dlp -N 3 --skip-download --write-auto-sub --sub-lang en \
+           --cookies-from-browser brave --output "$OUTPUT_DIR/${FINAL_BASE_NAME}.%(ext)s" "$1"
+    VTT_FILE=$(find "$OUTPUT_DIR" -name "${FINAL_BASE_NAME}.vtt" | head -n 1)
+    if [ -n "$VTT_FILE" ]; then
+        echo "Subtitles found, processing VTT file..."
+        add_job "process_vtt \"$VTT_FILE\" \"$1\""
+    else
+        echo "No subtitles found, downloading audio and generating transcript..."
+        if [ "$DISABLE_AUDIO" = false ]; then
+            if ! yt-dlp -N 3 -x --audio-format wav --postprocessor-args "-ar 16k" \
+                   --cookies-from-browser brave --output "$OUTPUT_DIR/${FINAL_BASE_NAME}.%(ext)s" "$1"; then
+                echo "Error: Failed to download audio using yt-dlp. Check the URL and your internet connection." >&2
+                exit 1
+            fi
+            WAV_FILE=$(find "$OUTPUT_DIR" -name "${FINAL_BASE_NAME}.wav" | head -n 1)
+            if [ -z "$WAV_FILE" ]; then
+                echo "Error: WAV file not found after download. Check yt-dlp output." >&2
+                exit 1
+            fi
+            echo "Running Whisper-CPP to generate VTT transcript..."
+            if ! "$WHISPCC"/main -ovtt -tdrz -m "$MODEL_PATH" "$WAV_FILE"; then
+                echo "Error: Whisper-CPP transcription failed. Check the model path and audio file." >&2
+                exit 1
+            fi
+            VTT_FILE="${WAV_FILE%.*}.vtt"
+            add_job "process_vtt \"$VTT_FILE\" \"$1\""
+            # Convert WAV to OGG Opus
+            echo "Converting WAV to OGG Opus..."
+            OGG_FILE="${WAV_FILE%.wav}.ogg"
+            if ! ffmpeg -i "$WAV_FILE" -c:a libopus -b:a 16k -vbr on -compression_level 10 -y "$OGG_FILE"; then
+                echo "Error: Failed to convert to OGG format." >&2
+                exit 1
+            fi
+            echo " - Audio: $OGG_FILE"
+            # Remove the WAV file
+            rm "$WAV_FILE"
+        fi
+    fi
+elif [ -f "$1" ]; then
+    echo "Processing as local audio file..."
+    INPUT_FILE="$1"
+    WAV_FILE="${INPUT_FILE%.*}.wav"
+    # Convert to WAV first if not already WAV
+    if [[ "$INPUT_FILE" != *.wav ]]; then
+        echo "Converting input to WAV format..."
+        if ! ffmpeg -i "$INPUT_FILE" -ar 16000 -ac 1 -c:a pcm_s16le ${DURATION:+-t "$DURATION"} -y "$WAV_FILE"; then
+            echo "Error: Failed to convert input to WAV format." >&2
+            exit 1
+        fi
+    else
+        WAV_FILE="$INPUT_FILE"
+    fi
+    echo "Running Whisper-CPP to generate VTT transcript..."
+    if ! "$WHISPCC"/main -ovtt -tdrz -m "$MODEL_PATH" "$WAV_FILE" ; then
+        echo "Error: Whisper-CPP transcription failed." >&2
+        exit 1
+    fi
+    VTT_FILE="${WAV_FILE%.wav}.vtt"
+    mv "${WAV_FILE}.vtt" "$VTT_FILE"
+    add_job "process_vtt \"$VTT_FILE\" \"$1\""
+    if [ "$DISABLE_AUDIO" = false ]; then
+        # Convert to OGG Opus
+        echo "Converting to OGG Opus..."
+        OGG_FILE="${WAV_FILE%.*}.ogg"
+        if ! ffmpeg -i "$WAV_FILE" -c:a libopus -b:a 16k -vbr on -compression_level 10 -y "$OGG_FILE"; then
+            echo "Error: Failed to convert to OGG format." >&2
+            exit 1
+        fi
+        echo " - Audio: $OGG_FILE"
+        # Remove the WAV file per CHARTER point 7
+        rm "$WAV_FILE"
+    fi
+else
+    echo "Error: Invalid input. Provide a valid URL, VTT file, or a local audio file."
+    exit 1
+fi
+process_job_queue

tetris32b.html ADDED Viewed

	@@ -0,0 +1,275 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Tetris Game</title>
+    <style>
+        body {
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 100vh;
+            margin: 0;
+            background-color: #282c34;
+        }
+        canvas {
+            border: 1px solid #fff;
+        }
+    </style>
+</head>
+<body>
+<canvas id="tetris" width="320" height="640"></canvas>
+<script>
+    const canvas = document.getElementById('tetris');
+    const context = canvas.getContext('2d');
+    context.scale(20, 20);
+    function arenaSweep() {
+        let rowCount = 1;
+        outer: for (let y = arena.length - 1; y > 0; --y) {
+            for (let x = 0; x < arena[y].length; ++x) {
+                if (arena[y][x] === 0) {
+                    continue outer;
+                }
+            }
+            const row = arena.splice(y, 1)[0].fill(0);
+            arena.unshift(row);
+            ++y;
+            player.score += rowCount * 10;
+            rowCount *= 2;
+        }
+    }
+    function collide(arena, player) {
+        const [m, o] = [player.matrix, player.pos];
+        for (let y = 0; y < m.length; ++y) {
+            for (let x = 0; x < m[y].length; ++x) {
+                if (m[y][x] !== 0 &&
+                    (arena[y + o.y] &&
+                     arena[y + o.y][x + o.x]) !== 0) {
+                        return true;
+                }
+            }
+        }
+        return false;
+    }
+    function createMatrix(w, h) {
+        const matrix = [];
+        while (h--) {
+            matrix.push(new Array(w).fill(0));
+        }
+        return matrix;
+    }
+    function createPiece(type) {
+        if (type === 'T') {
+            return [
+                [0, 0, 0],
+                [1, 1, 1],
+                [0, 1, 0],
+            ];
+        } else if (type === 'O') {
+            return [
+                [2, 2],
+                [2, 2],
+            ];
+        } else if (type === 'L') {
+            return [
+                [0, 3, 0],
+                [0, 3, 0],
+                [0, 3, 3],
+            ];
+        } else if (type === 'J') {
+            return [
+                [0, 4, 0],
+                [0, 4, 0],
+                [4, 4, 0],
+            ];
+        } else if (type === 'I') {
+            return [
+                [0, 5, 0, 0],
+                [0, 5, 0, 0],
+                [0, 5, 0, 0],
+                [0, 5, 0, 0],
+            ];
+        } else if (type === 'S') {
+            return [
+                [0, 6, 6],
+                [6, 6, 0],
+                [0, 0, 0],
+            ];
+        } else if (type === 'Z') {
+            return [
+                [7, 7, 0],
+                [0, 7, 7],
+                [0, 0, 0],
+            ];
+        }
+    }
+    function draw() {
+        context.fillStyle = '#282c34';
+        context.fillRect(0, 0, canvas.width, canvas.height);
+        drawMatrix(arena, { x: 0, y: 0 });
+        drawMatrix(player.matrix, player.pos);
+    }
+    function drawMatrix(matrix, offset) {
+        matrix.forEach((row, y) => {
+            row.forEach((value, x) => {
+                if (value !== 0) {
+                    context.fillStyle = colors[value];
+                    context.fillRect(x + offset.x,
+                                     y + offset.y,
+                                     1, 1);
+                }
+            });
+        });
+    }
+    function merge(arena, player) {
+        player.matrix.forEach((row, y) => {
+            row.forEach((value, x) => {
+                if (value !== 0) {
+                    arena[y + player.pos.y][x + player.pos.x] = value;
+                }
+            });
+        });
+    }
+    function playerDrop() {
+        player.pos.y++;
+        if (collide(arena, player)) {
+            player.pos.y--;
+            merge(arena, player);
+            playerReset();
+            arenaSweep();
+            updateScore();
+        }
+        dropCounter = 0;
+    }
+    function playerMove(dir) {
+        player.pos.x += dir;
+        if (collide(arena, player)) {
+            player.pos.x -= dir;
+        }
+    }
+    function playerRotate(dir) {
+        const pos = player.pos.x;
+        let offset = 1;
+        rotate(player.matrix, dir);
+        while (collide(arena, player)) {
+            player.pos.x += offset;
+            offset = -(offset + (offset > 0 ? 1 : -1));
+            if (offset > player.matrix[0].length) {
+                rotate(player.matrix, -dir);
+                player.pos.x = pos;
+                return;
+            }
+        }
+    }
+    function rotate(matrix, dir) {
+        for (let y = 0; y < matrix.length; ++y) {
+            for (let x = 0; x < y; ++x) {
+                [
+                    matrix[x][y],
+                    matrix[y][x],
+                ] = [
+                    matrix[y][x],
+                    matrix[x][y],
+                ];
+            }
+        }
+        if (dir > 0) {
+            matrix.forEach(row => row.reverse());
+        } else {
+            matrix.reverse();
+        }
+    }
+    function playerReset() {
+        const pieces = 'ILJOTSZ';
+        player.matrix = createPiece(pieces[pieces.length * Math.random() | 0]);
+        player.pos.y = 0;
+        player.pos.x = (arena[0].length / 2 | 0) -
+                       (player.matrix[0].length / 2 | 0);
+        if (collide(arena, player)) {
+            arena.forEach(row => row.fill(0));
+            player.score = 0;
+            updateScore();
+        }
+    }
+    let dropCounter = 0;
+    let dropInterval = 1000;
+    let lastTime = 0;
+    function update(time = 0) {
+        const deltaTime = time - lastTime;
+        dropCounter += deltaTime;
+        if (dropCounter > dropInterval) {
+            playerDrop();
+        }
+        lastTime = time;
+        draw();
+        requestAnimationFrame(update);
+    }
+    function updateScore() {
+        document.getElementById('score').innerText = player.score;
+    }
+    const colors = [
+        null,
+        '#FF0D72',
+        '#0DC2FF',
+        '#0DFF72',
+        '#F538FF',
+        '#FF8E0D',
+        '#FFE138',
+        '#3877FF',
+    ];
+    const arena = createMatrix(12, 20);
+    const player = {
+        pos: {x: 0, y: 0},
+        matrix: null,
+        score: 0,
+    };
+    document.addEventListener('keydown', event => {
+        if (event.keyCode === 37) {
+            playerMove(-1);
+        } else if (event.keyCode === 39) {
+            playerMove(1);
+        } else if (event.keyCode === 40) {
+            playerDrop();
+        } else if (event.keyCode === 81) {
+            playerRotate(-1);
+        } else if (event.keyCode === 87) {
+            playerRotate(1);
+        }
+    });
+    playerReset();
+    updateScore();
+    update();
+</script>
+</body>
+</html>

vttclean.py ADDED Viewed

	@@ -0,0 +1,74 @@

+#!/usr/bin/python3
+import re
+import datetime
+import glob
+import sys
+def clean_text(text):
+    # Remove HTML tags
+    text = re.sub(r'<[^>]+>', '', text)
+    # Remove multiple spaces
+    text = re.sub(r'\s+', ' ', text)
+    # Remove leading/trailing whitespace
+    return text.strip()
+def is_prefix(a, b):
+    return b.startswith(a)
+def process_vtt(content):
+    # Remove WEBVTT header and metadata
+    content = re.sub(r'^WEBVTT\n.*?\n\n', '', content, flags=re.DOTALL)
+    # Split into captions
+    captions = re.split(r'\n\n+', content)
+    processed_captions = []
+    buffer = []
+    def flush_buffer():
+        if buffer:
+            processed_captions.append(buffer[-1])  # Keep the last (most complete) line
+            buffer.clear()
+    for caption in captions:
+        lines = caption.split('\n')
+        if len(lines) >= 2:
+            # Extract only the start time and remove milliseconds
+            timestamp_match = re.match(r'(\d{2}:\d{2}:\d{2})\.(\d{3})', lines[0])
+            if timestamp_match:
+                timestamp = f"{timestamp_match.group(1)}.{timestamp_match.group(2)}"
+                text = ' '.join(lines[1:])
+                clean_caption = clean_text(text)
+                if clean_caption:
+                    current_line = f"{timestamp} {clean_caption}"
+                    if not buffer:
+                        buffer.append(current_line)
+                    else:
+                        _, prev_text = buffer[-1].split(' ', 1)
+                        if is_prefix(prev_text, clean_caption):
+                            buffer.append(current_line)
+                        else:
+                            flush_buffer()
+                            buffer.append(current_line)
+    flush_buffer()  # Don't forget to flush the buffer at the end
+    return '\n'.join(processed_captions)
+if __name__ == "__main__":
+    try:
+        if len(sys.argv) < 2:
+            print("Usage: python vttclean.py <file_pattern>", file=sys.stderr)
+            sys.exit(1)
+        file_pattern = sys.argv[1]
+        for filename in glob.glob(file_pattern):
+            with open(filename, 'r', encoding='utf-8') as file:
+                content = file.read()
+                result = process_vtt(content)
+                print(result)
+    except Exception as e:
+        print(f"Error processing input: {e}", file=sys.stderr)
+        sys.exit(1)