Upload 12 files
Browse files- 1 +21 -0
- 2ocr.sh +32 -0
- aidocs.py +155 -0
- jpegdir.py +98 -0
- random/index.html +100 -0
- shove.sh +38 -0
- showfiles +98 -0
- skel.py +143 -0
- summ +0 -0
- summarize2 +415 -0
- tetris32b.html +275 -0
- vttclean.py +74 -0
1
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
FAILURE: Build failed with an exception.
|
3 |
+
|
4 |
+
* What went wrong:
|
5 |
+
Directory '/Users/jim/work/hacks' does not contain a Gradle build.
|
6 |
+
|
7 |
+
A Gradle build's root directory should contain one of the possible settings files: settings.gradle, settings.gradle.kts, settings.gradle.dcl.It may also contain one of the possible build files: build.gradle, build.gradle.kts, build.gradle.dcl.
|
8 |
+
|
9 |
+
To create a new Gradle build in this directory run 'gradle init'
|
10 |
+
|
11 |
+
For more information about the 'init' task, please refer to https://docs.gradle.org/8.12-rc-1/userguide/build_init_plugin.html in the Gradle documentation.
|
12 |
+
|
13 |
+
For more details on creating a Gradle build, please refer to https://docs.gradle.org/8.12-rc-1/userguide/tutorial_using_tasks.html in the Gradle documentation.
|
14 |
+
|
15 |
+
* Try:
|
16 |
+
> Run gradle init to create a new Gradle build in this directory.
|
17 |
+
> Run with --stacktrace option to get the stack trace.
|
18 |
+
> Run with --info or --debug option to get more log output.
|
19 |
+
> Get more help at https://help.gradle.org.
|
20 |
+
|
21 |
+
BUILD FAILED in 413ms
|
2ocr.sh
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Directory containing TIFF files
|
4 |
+
INPUT_DIR="atreatiseonlawp00chitgoog_tif"
|
5 |
+
OUTPUT_PDF="output_searchable.pdf"
|
6 |
+
TEMP_DIR="temp_ocr"
|
7 |
+
|
8 |
+
# Create a temporary directory to store processed files
|
9 |
+
mkdir -p "$TEMP_DIR"
|
10 |
+
|
11 |
+
# Process each TIFF file
|
12 |
+
for file in "$INPUT_DIR"/*.tif; do
|
13 |
+
# Extract the filename without extension
|
14 |
+
filename=$(basename "$file" .tif)
|
15 |
+
|
16 |
+
# Run Tesseract on each file and output a PDF for each page
|
17 |
+
tesseract "$file" "$TEMP_DIR/$filename" -l eng pdf
|
18 |
+
done
|
19 |
+
|
20 |
+
# Combine all individual page PDFs into a single PDF
|
21 |
+
if command -v pdfunite >/dev/null 2>&1; then
|
22 |
+
# If pdfunite is available (from poppler-utils), use it
|
23 |
+
pdfunite "$TEMP_DIR"/*.pdf "$OUTPUT_PDF"
|
24 |
+
else
|
25 |
+
# Fallback to using ImageMagick's `convert` if `pdfunite` isn't available
|
26 |
+
convert "$TEMP_DIR"/*.pdf "$OUTPUT_PDF"
|
27 |
+
fi
|
28 |
+
|
29 |
+
# Clean up temporary directory
|
30 |
+
rm -r "$TEMP_DIR"
|
31 |
+
|
32 |
+
echo "Searchable PDF created as $OUTPUT_PDF"
|
aidocs.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
from typing import Optional, List, Dict, Set, Literal, Tuple, NamedTuple, Union
|
3 |
+
from pathlib import Path
|
4 |
+
import re
|
5 |
+
import fnmatch
|
6 |
+
import glob
|
7 |
+
from itertools import chain
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class PathPattern:
|
11 |
+
"""Represents either a direct mapping or a wildcard pattern."""
|
12 |
+
pattern: str
|
13 |
+
target_template: Optional[str] = None
|
14 |
+
|
15 |
+
@classmethod
|
16 |
+
def parse(cls, spec: str) -> 'PathPattern':
|
17 |
+
"""Parse path specification into pattern and optional target."""
|
18 |
+
if ':' in spec:
|
19 |
+
source, target = spec.split(':', 1)
|
20 |
+
return cls(source, target)
|
21 |
+
return cls(spec)
|
22 |
+
|
23 |
+
def resolve(self, root_dir: Path) -> List[PathMapping]:
|
24 |
+
"""Resolve pattern into concrete path mappings."""
|
25 |
+
if self.target_template is not None:
|
26 |
+
# Direct mapping case
|
27 |
+
return [PathMapping(Path(self.pattern), Path(self.target_template))]
|
28 |
+
|
29 |
+
# Wildcard pattern case
|
30 |
+
matches = []
|
31 |
+
for path in glob.glob(self.pattern, recursive=True):
|
32 |
+
source = Path(path)
|
33 |
+
if source.is_file():
|
34 |
+
# For files, maintain relative structure
|
35 |
+
relative = source.relative_to(root_dir) if root_dir in source.parents else source
|
36 |
+
matches.append(PathMapping(source, relative))
|
37 |
+
return matches
|
38 |
+
|
39 |
+
def validate(self) -> None:
|
40 |
+
"""Validate pattern constraints."""
|
41 |
+
if self.target_template:
|
42 |
+
# Check for path traversal in target
|
43 |
+
if '..' in self.target_template:
|
44 |
+
raise ValueError(f"Target path '{self.target_template}' cannot contain '..'")
|
45 |
+
|
46 |
+
# Normalize path separators
|
47 |
+
if '\\' in self.target_template:
|
48 |
+
raise ValueError(f"Target path must use forward slashes")
|
49 |
+
|
50 |
+
# Validate wildcard pattern
|
51 |
+
if any(c in self.pattern for c in '<>|"'):
|
52 |
+
raise ValueError(f"Invalid characters in pattern: {self.pattern}")
|
53 |
+
|
54 |
+
class WikiTransformer:
|
55 |
+
def __init__(self, size_limit: 'SizeSpec', output_dir: Path,
|
56 |
+
merge_strategy: MergeStrategy,
|
57 |
+
debug: bool = False):
|
58 |
+
self.validator = SizeValidator(size_limit)
|
59 |
+
self.output_dir = output_dir
|
60 |
+
self.merge_strategy = merge_strategy
|
61 |
+
self.debug = debug
|
62 |
+
self.console = Console()
|
63 |
+
self.log = self._setup_logging()
|
64 |
+
self.processed_inodes: Set[int] = set()
|
65 |
+
self.root_dir = Path.cwd()
|
66 |
+
|
67 |
+
async def resolve_patterns(self, patterns: List[str]) -> List[PathMapping]:
|
68 |
+
"""Resolve all patterns into concrete mappings."""
|
69 |
+
mappings = []
|
70 |
+
for spec in patterns:
|
71 |
+
try:
|
72 |
+
pattern = PathPattern.parse(spec)
|
73 |
+
pattern.validate()
|
74 |
+
resolved = pattern.resolve(self.root_dir)
|
75 |
+
if not resolved:
|
76 |
+
self.log.warning(f"Pattern '{spec}' matched no files")
|
77 |
+
mappings.extend(resolved)
|
78 |
+
except ValueError as e:
|
79 |
+
self.log.error(f"Invalid pattern '{spec}': {e}")
|
80 |
+
continue
|
81 |
+
return mappings
|
82 |
+
|
83 |
+
async def transform(self, patterns: List[str]):
|
84 |
+
"""Transform source trees based on patterns and mappings."""
|
85 |
+
mappings = await self.resolve_patterns(patterns)
|
86 |
+
|
87 |
+
if not mappings:
|
88 |
+
raise ValueError("No valid paths matched the specified patterns")
|
89 |
+
|
90 |
+
if not self.merge_strategy.validate_target(self.output_dir):
|
91 |
+
raise ValueError(
|
92 |
+
f"Target filesystem doesn't support {self.merge_strategy.link_type} links"
|
93 |
+
)
|
94 |
+
|
95 |
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
96 |
+
|
97 |
+
with Progress() as progress:
|
98 |
+
task = progress.add_task(
|
99 |
+
"[green]Processing files...",
|
100 |
+
total=len(mappings)
|
101 |
+
)
|
102 |
+
|
103 |
+
for mapping in mappings:
|
104 |
+
try:
|
105 |
+
await self.process_mapping(mapping)
|
106 |
+
progress.update(task, advance=1)
|
107 |
+
except Exception as e:
|
108 |
+
self.log.error(f"Failed to process {mapping}: {e}")
|
109 |
+
|
110 |
+
@click.command()
|
111 |
+
@click.argument('patterns', nargs=-1, required=True,
|
112 |
+
help="Path patterns (e.g., 'src:docs/api' or '**/*.md')")
|
113 |
+
@click.option('-l', '--limit', type=SIZE, default='1M',
|
114 |
+
help='Per-document size limit (e.g., 500K, 2M, 1G)')
|
115 |
+
@click.option('-d', '--debug', is_flag=True, help='Enable debug logging')
|
116 |
+
@click.option('-o', '--output-dir', type=click.Path(), default='wiki',
|
117 |
+
help='Output directory')
|
118 |
+
@click.option('--link-type', type=click.Choice(['symlink', 'hardlink', 'copy']),
|
119 |
+
default='symlink', help='File linking strategy')
|
120 |
+
@click.option('--follow-links/--no-follow-links', default=False,
|
121 |
+
help='Follow symbolic links during traversal')
|
122 |
+
def main(patterns: List[str], limit: SizeSpec, debug: bool,
|
123 |
+
output_dir: str, link_type: str, follow_links: bool):
|
124 |
+
"""Transform files into wiki structure using patterns or mappings.
|
125 |
+
|
126 |
+
PATTERNS can be either:
|
127 |
+
1. Colon-separated mappings: 'source:target'
|
128 |
+
2. Wildcard patterns: '**/*.md', 'docs/**/*.rst'
|
129 |
+
|
130 |
+
Examples:
|
131 |
+
# Explicit mapping
|
132 |
+
wiki_transform.py src/api:docs/api docs/intro:guide/start
|
133 |
+
|
134 |
+
# Wildcard patterns
|
135 |
+
wiki_transform.py '**/*.md' 'docs/**/*.rst'
|
136 |
+
|
137 |
+
# Mixed usage
|
138 |
+
wiki_transform.py src:api '**/*.md' 'legacy:archive'
|
139 |
+
"""
|
140 |
+
strategy = MergeStrategy(
|
141 |
+
link_type=None if link_type == 'copy' else link_type,
|
142 |
+
follow_links=follow_links
|
143 |
+
)
|
144 |
+
|
145 |
+
transformer = WikiTransformer(
|
146 |
+
size_limit=limit,
|
147 |
+
output_dir=Path(output_dir),
|
148 |
+
merge_strategy=strategy,
|
149 |
+
debug=debug
|
150 |
+
)
|
151 |
+
|
152 |
+
asyncio.run(transformer.transform(patterns))
|
153 |
+
|
154 |
+
if __name__ == '__main__':
|
155 |
+
main()
|
jpegdir.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from PIL import Image
|
3 |
+
import pytesseract
|
4 |
+
from pathlib import Path
|
5 |
+
import json
|
6 |
+
from typing import Dict, List
|
7 |
+
from concurrent.futures import ProcessPoolExecutor
|
8 |
+
import multiprocessing
|
9 |
+
|
10 |
+
def process_image(args) -> tuple:
|
11 |
+
"""
|
12 |
+
Process a single image file.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
args: Tuple of (filename, input_dir, output_dir)
|
16 |
+
Returns:
|
17 |
+
Tuple of (filename, extracted_text)
|
18 |
+
"""
|
19 |
+
filename, input_dir, output_dir = args
|
20 |
+
try:
|
21 |
+
# Full path to image
|
22 |
+
image_path = os.path.join(input_dir, filename)
|
23 |
+
|
24 |
+
# Open and process image
|
25 |
+
with Image.open(image_path) as img:
|
26 |
+
# Extract text using pytesseract
|
27 |
+
text = pytesseract.image_to_string(img)
|
28 |
+
|
29 |
+
# Save individual text file
|
30 |
+
text_filename = Path(filename).stem + '.txt'
|
31 |
+
text_path = os.path.join(output_dir, text_filename)
|
32 |
+
with open(text_path, 'w', encoding='utf-8') as f:
|
33 |
+
f.write(text)
|
34 |
+
|
35 |
+
print(f"Processed: {filename}")
|
36 |
+
return filename, text
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
print(f"Error processing {filename}: {str(e)}")
|
40 |
+
return filename, f"ERROR: {str(e)}"
|
41 |
+
|
42 |
+
def process_directory(input_dir: str, output_dir: str, max_workers: int = None) -> Dict[str, str]:
|
43 |
+
"""
|
44 |
+
Process all JPEG files in a directory and perform OCR using multiple processes.
|
45 |
+
|
46 |
+
Args:
|
47 |
+
input_dir: Directory containing JPEG files
|
48 |
+
output_dir: Directory to save OCR results
|
49 |
+
max_workers: Maximum number of worker processes (defaults to CPU count)
|
50 |
+
|
51 |
+
Returns:
|
52 |
+
Dictionary mapping filenames to extracted text
|
53 |
+
"""
|
54 |
+
# Create output directory if it doesn't exist
|
55 |
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
56 |
+
|
57 |
+
# If max_workers not specified, use CPU count
|
58 |
+
if max_workers is None:
|
59 |
+
max_workers = multiprocessing.cpu_count()
|
60 |
+
|
61 |
+
# Supported image extensions
|
62 |
+
valid_extensions = {'.jpg', '.jpeg', '.JPG', '.JPEG'}
|
63 |
+
|
64 |
+
# Get list of valid image files
|
65 |
+
image_files = [
|
66 |
+
f for f in os.listdir(input_dir)
|
67 |
+
if Path(f).suffix in valid_extensions
|
68 |
+
]
|
69 |
+
|
70 |
+
# Prepare arguments for worker processes
|
71 |
+
work_args = [(f, input_dir, output_dir) for f in image_files]
|
72 |
+
|
73 |
+
# Process files concurrently
|
74 |
+
results = {}
|
75 |
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
76 |
+
for filename, text in executor.map(process_image, work_args):
|
77 |
+
results[filename] = text
|
78 |
+
|
79 |
+
# Save consolidated results to JSON
|
80 |
+
json_path = os.path.join(output_dir, 'ocr_results.json')
|
81 |
+
with open(json_path, 'w', encoding='utf-8') as f:
|
82 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
83 |
+
|
84 |
+
return results
|
85 |
+
|
86 |
+
if __name__ == "__main__":
|
87 |
+
import argparse
|
88 |
+
|
89 |
+
parser = argparse.ArgumentParser(description='Perform OCR on all JPEG files in a directory')
|
90 |
+
parser.add_argument('input_dir', help='Input directory containing JPEG files')
|
91 |
+
parser.add_argument('output_dir', help='Output directory for OCR results')
|
92 |
+
parser.add_argument('--workers', type=int, help='Number of worker processes (default: CPU count)',
|
93 |
+
default=None)
|
94 |
+
|
95 |
+
args = parser.parse_args()
|
96 |
+
|
97 |
+
results = process_directory(args.input_dir, args.output_dir, args.workers)
|
98 |
+
print(f"\nProcessed {len(results)} files. Results saved to {args.output_dir}")
|
random/index.html
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Force-Directed Voronoi Diagram</title>
|
7 |
+
<script src="https://d3js.org/d3.v7.min.js"></script>
|
8 |
+
<style>
|
9 |
+
body {
|
10 |
+
margin: 0;
|
11 |
+
overflow: hidden;
|
12 |
+
}
|
13 |
+
svg {
|
14 |
+
display: block;
|
15 |
+
}
|
16 |
+
</style>
|
17 |
+
</head>
|
18 |
+
<body>
|
19 |
+
<script>
|
20 |
+
// Dimensions
|
21 |
+
const width = window.innerWidth;
|
22 |
+
const height = window.innerHeight;
|
23 |
+
|
24 |
+
// Create SVG container
|
25 |
+
const svg = d3.select("body")
|
26 |
+
.append("svg")
|
27 |
+
.attr("width", width)
|
28 |
+
.attr("height", height);
|
29 |
+
|
30 |
+
// Random initial dataset
|
31 |
+
let data = d3.range(20).map(() => ({
|
32 |
+
x: Math.random() * width,
|
33 |
+
y: Math.random() * height,
|
34 |
+
value: Math.random()
|
35 |
+
}));
|
36 |
+
|
37 |
+
// Force simulation
|
38 |
+
const simulation = d3.forceSimulation(data)
|
39 |
+
.force("x", d3.forceX(d => d.x).strength(0.5))
|
40 |
+
.force("y", d3.forceY(d => d.y).strength(0.5))
|
41 |
+
.force("collide", d3.forceCollide(50))
|
42 |
+
.on("tick", update);
|
43 |
+
|
44 |
+
// Voronoi diagram generator
|
45 |
+
const voronoi = d3.voronoi()
|
46 |
+
.x(d => d.x)
|
47 |
+
.y(d => d.y)
|
48 |
+
.extent([[0, 0], [width, height]]);
|
49 |
+
|
50 |
+
// Group for Voronoi cells
|
51 |
+
const voronoiGroup = svg.append("g");
|
52 |
+
|
53 |
+
// Group for circles
|
54 |
+
const circleGroup = svg.append("g");
|
55 |
+
|
56 |
+
function update() {
|
57 |
+
const diagram = voronoi(data);
|
58 |
+
|
59 |
+
// Update Voronoi cells
|
60 |
+
const cells = voronoiGroup.selectAll("path")
|
61 |
+
.data(data);
|
62 |
+
|
63 |
+
cells.enter()
|
64 |
+
.append("path")
|
65 |
+
.merge(cells)
|
66 |
+
.attr("d", (d, i) => diagram.renderCell(i))
|
67 |
+
.attr("fill", d => d3.interpolateRainbow(d.value))
|
68 |
+
.attr("stroke", "#000");
|
69 |
+
|
70 |
+
cells.exit().remove();
|
71 |
+
|
72 |
+
// Update circles
|
73 |
+
const circles = circleGroup.selectAll("circle")
|
74 |
+
.data(data);
|
75 |
+
|
76 |
+
circles.enter()
|
77 |
+
.append("circle")
|
78 |
+
.merge(circles)
|
79 |
+
.attr("r", 5)
|
80 |
+
.attr("fill", "black")
|
81 |
+
.attr("cx", d => d.x)
|
82 |
+
.attr("cy", d => d.y);
|
83 |
+
|
84 |
+
circles.exit().remove();
|
85 |
+
}
|
86 |
+
|
87 |
+
// Add a new data point every 2 seconds
|
88 |
+
setInterval(() => {
|
89 |
+
data.push({
|
90 |
+
x: Math.random() * width,
|
91 |
+
y: Math.random() * height,
|
92 |
+
value: Math.random()
|
93 |
+
});
|
94 |
+
|
95 |
+
simulation.nodes(data);
|
96 |
+
simulation.alpha(1).restart();
|
97 |
+
}, 2000);
|
98 |
+
</script>
|
99 |
+
</body>
|
100 |
+
</html>
|
shove.sh
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Variables
|
4 |
+
BATCH_SIZE=30
|
5 |
+
COUNTER=0
|
6 |
+
OUTPUT_DIR="batches"
|
7 |
+
RESULTS_FILE="ocr_results.txt"
|
8 |
+
|
9 |
+
# Create output directory if not exists
|
10 |
+
mkdir -p "$OUTPUT_DIR"
|
11 |
+
|
12 |
+
# Clear results file
|
13 |
+
: > "$RESULTS_FILE"
|
14 |
+
|
15 |
+
# Loop through PNG files in batches of $BATCH_SIZE
|
16 |
+
for FILE in output-*.png; do
|
17 |
+
# Add file to batch array
|
18 |
+
BATCH_FILES+=("$FILE")
|
19 |
+
COUNTER=$((COUNTER + 1))
|
20 |
+
|
21 |
+
# Process batch when size is reached or on the last file
|
22 |
+
if (( COUNTER % BATCH_SIZE == 0 || COUNTER == $(ls output-*.png | wc -l) )); then
|
23 |
+
# Create batch file name
|
24 |
+
BATCH_NAME="${OUTPUT_DIR}/batch_$((COUNTER / BATCH_SIZE)).png"
|
25 |
+
|
26 |
+
# Use ffmpeg to concatenate files vertically
|
27 |
+
ffmpeg -y -i "concat:$(printf '%s|' "${BATCH_FILES[@]}" | sed 's/|$//')" -vf vstack "$BATCH_NAME"
|
28 |
+
|
29 |
+
# Run easyocr on the concatenated image
|
30 |
+
echo "Processing $BATCH_NAME..."
|
31 |
+
easyocr -l en -f "$BATCH_NAME" --gpu True >> "$RESULTS_FILE"
|
32 |
+
|
33 |
+
# Reset batch files array
|
34 |
+
BATCH_FILES=()
|
35 |
+
fi
|
36 |
+
done
|
37 |
+
|
38 |
+
echo "OCR processing complete. Results saved to $RESULTS_FILE."
|
showfiles
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Mission Statement:
|
4 |
+
# This script displays the contents of specified files with formatted headers.
|
5 |
+
# It provides optional file size limits through the -k flag (specified in KB).
|
6 |
+
# Without the -k flag, files are shown in their entirety.
|
7 |
+
# With -k flag, files larger than the specified size are truncated with a warning.
|
8 |
+
# The script handles both Linux and MacOS systems using compatible stat commands.
|
9 |
+
# Color output is available via the -c flag for better visual organization.
|
10 |
+
|
11 |
+
# ANSI color codes
|
12 |
+
BLUE='\033[0;34m'
|
13 |
+
GREEN='\033[0;32m'
|
14 |
+
RED='\033[0;31m'
|
15 |
+
NC='\033[0m' # No Color
|
16 |
+
|
17 |
+
usage() {
|
18 |
+
echo "Usage: $(basename $0) [-k size_in_kb] [-c] files..."
|
19 |
+
echo " -k: Maximum file size in KB (optional)"
|
20 |
+
echo " -c: Enable color output"
|
21 |
+
exit 1
|
22 |
+
}
|
23 |
+
|
24 |
+
# Initialize variables
|
25 |
+
COLOR=false
|
26 |
+
MAX_SIZE_KB=""
|
27 |
+
|
28 |
+
# Parse command line options
|
29 |
+
while getopts "k:c" opt; do
|
30 |
+
case $opt in
|
31 |
+
k) MAX_SIZE_KB="$OPTARG";;
|
32 |
+
c) COLOR=true;;
|
33 |
+
?) usage;;
|
34 |
+
esac
|
35 |
+
done
|
36 |
+
|
37 |
+
# Shift past the options
|
38 |
+
shift $((OPTIND-1))
|
39 |
+
|
40 |
+
# Check if any files were specified
|
41 |
+
if [ $# -eq 0 ]; then
|
42 |
+
usage
|
43 |
+
fi
|
44 |
+
|
45 |
+
# Get file size in KB (compatible with both Linux and MacOS)
|
46 |
+
get_file_size() {
|
47 |
+
if [[ "$OSTYPE" == "darwin"* ]]; then
|
48 |
+
stat -f %z "$1"
|
49 |
+
else
|
50 |
+
stat --format=%s "$1"
|
51 |
+
fi
|
52 |
+
}
|
53 |
+
|
54 |
+
# Format and display file header
|
55 |
+
show_header() {
|
56 |
+
local file="$1"
|
57 |
+
local size_bytes=$(get_file_size "$file")
|
58 |
+
local size_kb=$((size_bytes / 1024))
|
59 |
+
|
60 |
+
if $COLOR; then
|
61 |
+
echo -e "\n${BLUE}=== File: ${GREEN}$file${BLUE} (${size_kb}KB) ===${NC}"
|
62 |
+
else
|
63 |
+
echo -e "\n=== File: $file (${size_kb}KB) ==="
|
64 |
+
fi
|
65 |
+
}
|
66 |
+
|
67 |
+
# Process each file
|
68 |
+
for file in "$@"; do
|
69 |
+
if [ ! -f "$file" ]; then
|
70 |
+
if $COLOR; then
|
71 |
+
echo -e "${RED}Error: '$file' does not exist or is not a regular file${NC}" >&2
|
72 |
+
else
|
73 |
+
echo "Error: '$file' does not exist or is not a regular file" >&2
|
74 |
+
fi
|
75 |
+
continue
|
76 |
+
fi
|
77 |
+
|
78 |
+
show_header "$file"
|
79 |
+
|
80 |
+
if [ -n "$MAX_SIZE_KB" ]; then
|
81 |
+
size_bytes=$(get_file_size "$file")
|
82 |
+
size_kb=$((size_bytes / 1024))
|
83 |
+
|
84 |
+
if [ $size_kb -gt $MAX_SIZE_KB ]; then
|
85 |
+
if $COLOR; then
|
86 |
+
echo -e "${RED}File size ($size_kb KB) exceeds limit ($MAX_SIZE_KB KB). Showing first $MAX_SIZE_KB KB:${NC}"
|
87 |
+
else
|
88 |
+
echo "File size ($size_kb KB) exceeds limit ($MAX_SIZE_KB KB). Showing first $MAX_SIZE_KB KB:"
|
89 |
+
fi
|
90 |
+
head -c $((MAX_SIZE_KB * 1024)) "$file"
|
91 |
+
echo -e "\n[Truncated...]"
|
92 |
+
else
|
93 |
+
cat "$file"
|
94 |
+
fi
|
95 |
+
else
|
96 |
+
cat "$file"
|
97 |
+
fi
|
98 |
+
done
|
skel.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!python3
|
2 |
+
import unittest
|
3 |
+
from pathlib import Path
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
+
|
7 |
+
class TestSkeletonMapper(unittest.TestCase):
|
8 |
+
def setUp(self):
|
9 |
+
self.temp_dir = tempfile.mkdtemp()
|
10 |
+
self.patterns = create_language_patterns()
|
11 |
+
|
12 |
+
def create_test_file(self, content: str, extension: str) -> str:
|
13 |
+
path = Path(self.temp_dir) / f"test{extension}"
|
14 |
+
path.write_text(content)
|
15 |
+
return str(path)
|
16 |
+
|
17 |
+
def test_kotlin_edge_cases(self):
|
18 |
+
kotlin_code = '''
|
19 |
+
@DslMarker
|
20 |
+
annotation class NioProxyDsl
|
21 |
+
|
22 |
+
interface EnhancedNioProxy<T : Any> {
|
23 |
+
val original: T
|
24 |
+
fun verifyIdentity(): Boolean = enhanced.equals(original)
|
25 |
+
}
|
26 |
+
|
27 |
+
class ProxyContext {
|
28 |
+
private val _events = MutableSharedFlow<ProxyEvent>()
|
29 |
+
}
|
30 |
+
'''
|
31 |
+
file_path = self.create_test_file(kotlin_code, ".kt")
|
32 |
+
results = extract_skeleton(file_path, self.patterns)
|
33 |
+
|
34 |
+
# BUG 1: Missing generic type parameters in class/interface detection
|
35 |
+
self.assertIn("interface EnhancedNioProxy<T : Any>", results['interface'])
|
36 |
+
|
37 |
+
# BUG 2: Property detection fails with initialization
|
38 |
+
self.assertIn("val original: T", results['property'])
|
39 |
+
|
40 |
+
# BUG 3: Annotation detection drops parameters
|
41 |
+
self.assertIn("@DslMarker", results['annotation'])
|
42 |
+
|
43 |
+
def fix_kotlin_patterns():
|
44 |
+
return {
|
45 |
+
'class': r'^\s*(?:data\s+)?class\s+(\w+)(?:<[^>]+>)?',
|
46 |
+
'function': r'^\s*fun\s+(\w+)(?:<[^>]+>)?',
|
47 |
+
'property': r'^\s*(?:var|val)\s+(\w+)(?:\s*:\s*[^=]+)?(?:\s*=.+)?',
|
48 |
+
'interface': r'^\s*interface\s+(\w+)(?:<[^>]+>)?',
|
49 |
+
'annotation': r'^\s*@(\w+)(?:\s*[\w\s.()]+)?',
|
50 |
+
'suspend': r'^\s*suspend\s+fun\s+\w+',
|
51 |
+
}
|
52 |
+
|
53 |
+
# Critical fixes for main implementation
|
54 |
+
def patch_implementation():
|
55 |
+
"""
|
56 |
+
Critical patches for identified issues
|
57 |
+
"""
|
58 |
+
# 1. Fix subprocess handling for large files
|
59 |
+
def safe_grep(cmd: str, timeout: int = 30) -> str:
|
60 |
+
try:
|
61 |
+
return subprocess.run(
|
62 |
+
cmd,
|
63 |
+
shell=True,
|
64 |
+
text=True,
|
65 |
+
capture_output=True,
|
66 |
+
timeout=timeout
|
67 |
+
).stdout
|
68 |
+
except subprocess.TimeoutExpired:
|
69 |
+
return ""
|
70 |
+
|
71 |
+
# 2. Fix pattern escaping in grep command
|
72 |
+
def escape_grep_pattern(pattern: str) -> str:
|
73 |
+
return pattern.replace('(', '\\(').replace(')', '\\)')
|
74 |
+
|
75 |
+
# 3. Add file encoding handling
|
76 |
+
def read_file_safe(file_path: str) -> str:
|
77 |
+
try:
|
78 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
79 |
+
return f.read()
|
80 |
+
except UnicodeDecodeError:
|
81 |
+
try:
|
82 |
+
with open(file_path, 'r', encoding='latin-1') as f:
|
83 |
+
return f.read()
|
84 |
+
except Exception:
|
85 |
+
return ""
|
86 |
+
|
87 |
+
return safe_grep, escape_grep_pattern, read_file_safe
|
88 |
+
|
89 |
+
# Shell script fixes
|
90 |
+
def generate_fixed_shell_script():
|
91 |
+
return '''
|
92 |
+
#!/bin/bash
|
93 |
+
|
94 |
+
# Fixed file handling
|
95 |
+
while IFS= read -r -d '' file; do
|
96 |
+
if [[ ! -f "$file" ]]; then
|
97 |
+
continue
|
98 |
+
fi
|
99 |
+
|
100 |
+
# Handle filenames with spaces and special chars
|
101 |
+
file_ext="${file##*.}"
|
102 |
+
file_name=$(printf '%q' "$file")
|
103 |
+
|
104 |
+
# Prevent grep pattern injection
|
105 |
+
safe_grep() {
|
106 |
+
local pattern=$1
|
107 |
+
local file=$2
|
108 |
+
grep -E "^[[:space:]]*${pattern}" "$file" 2>/dev/null || true
|
109 |
+
}
|
110 |
+
|
111 |
+
case "$file_ext" in
|
112 |
+
kt|kts)
|
113 |
+
safe_grep "(@\\w+|class\\s+\\w+|interface\\s+\\w+|fun\\s+\\w+)" "$file_name"
|
114 |
+
;;
|
115 |
+
# ... other extensions
|
116 |
+
esac
|
117 |
+
done < <(find . -type f -print0)
|
118 |
+
'''
|
119 |
+
|
120 |
+
# Runtime monitoring hooks
|
121 |
+
def add_monitoring():
|
122 |
+
import time
|
123 |
+
import psutil
|
124 |
+
|
125 |
+
def monitor_execution(func):
|
126 |
+
def wrapper(*args, **kwargs):
|
127 |
+
start = time.time()
|
128 |
+
process = psutil.Process()
|
129 |
+
mem_before = process.memory_info().rss
|
130 |
+
|
131 |
+
result = func(*args, **kwargs)
|
132 |
+
|
133 |
+
elapsed = time.time() - start
|
134 |
+
mem_after = process.memory_info().rss
|
135 |
+
mem_delta = mem_after - mem_before
|
136 |
+
|
137 |
+
if elapsed > 5.0 or mem_delta > 100*1024*1024: # 100MB
|
138 |
+
print(f"Warning: High resource usage in {func.__name__}")
|
139 |
+
|
140 |
+
return result
|
141 |
+
return wrapper
|
142 |
+
|
143 |
+
return monitor_execution
|
summ
ADDED
File without changes
|
summarize2
ADDED
@@ -0,0 +1,415 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# CHARTER: This script is designed to process audio/video content, generate transcripts,
|
4 |
+
# summaries, and audio files. The following tasks are defined and must not be altered by any LLM:
|
5 |
+
# 1. Download content if a URL is provided
|
6 |
+
# 2. Generate or use existing VTT transcript
|
7 |
+
# 3. Clean the VTT transcript
|
8 |
+
# 4. Generate a summary of the transcript
|
9 |
+
# 5. Create a 16k VBR Opus OGG file for audio tracks (unless audio download is disabled)
|
10 |
+
# 6. Output both the cleaned VTT text and the summary
|
11 |
+
# 7. Exclude the WAV file from the results
|
12 |
+
# 8. Include the OGG file in the results only if both WAV and OGG were created
|
13 |
+
# This charter is unalterable and defines the core functionality of the script.
|
14 |
+
|
15 |
+
# Configuration (adjust these paths)
|
16 |
+
WHISPCC="$HOME/work/whisper.cpp" # ./main to run ; ./models for models
|
17 |
+
MODEL_PATH="$WHISPCC/models/ggml-small.en-tdrz.bin"
|
18 |
+
OUTPUT_DIR="$HOME/processed_audio"
|
19 |
+
CACHE_DIR="/tmp/summarize_cache"
|
20 |
+
OLLAMA_MODEL="llama3.1:latest"
|
21 |
+
OLLAMA_MODEL="deepseek-coder-v2:16b"
|
22 |
+
|
23 |
+
# Prompts for different segments
|
24 |
+
FIRST_PROMPT="Summarize this beginning part of a transcript in one sentence, then provide bullet points with timestamps (00:00:00 sentence)."
|
25 |
+
MIDDLE_PROMPT="Summarize the key points of this part of the transcript in bullet points with timestamps (00:00:00 sentence)."
|
26 |
+
LAST_PROMPT="Summarize the main takeaways of this final part of the transcript in bullet points with timestamps (00:00:00 sentence)."
|
27 |
+
|
28 |
+
# Global variable to track job queue
|
29 |
+
JOB_QUEUE=()
|
30 |
+
|
31 |
+
# Ensure output and cache directories exist
|
32 |
+
mkdir -p "$OUTPUT_DIR" "$CACHE_DIR"
|
33 |
+
|
34 |
+
# Parse command line options
|
35 |
+
USE_FABRIC=false
|
36 |
+
DISABLE_AUDIO=false
|
37 |
+
DURATION=""
|
38 |
+
while getopts "fnad:" opt; do
|
39 |
+
case $opt in
|
40 |
+
f)
|
41 |
+
USE_FABRIC=true
|
42 |
+
;;
|
43 |
+
n)
|
44 |
+
DISABLE_AUDIO=true
|
45 |
+
;;
|
46 |
+
a)
|
47 |
+
DISABLE_AUDIO=false
|
48 |
+
;;
|
49 |
+
d)
|
50 |
+
DURATION="$OPTARG"
|
51 |
+
;;
|
52 |
+
\?)
|
53 |
+
echo "Invalid option: -$OPTARG" >&2
|
54 |
+
exit 1
|
55 |
+
;;
|
56 |
+
esac
|
57 |
+
done
|
58 |
+
shift $((OPTIND-1))
|
59 |
+
|
60 |
+
# Function to get MD5 hash of a file
|
61 |
+
get_md5() {
|
62 |
+
md5sum "$1" | cut -d' ' -f1
|
63 |
+
}
|
64 |
+
|
65 |
+
# Function to cache a file using hardlinks (atomic)
|
66 |
+
cache_file() {
|
67 |
+
local INPUT_FILE="$1"
|
68 |
+
local EXTENSION="$2"
|
69 |
+
|
70 |
+
# Check if the input file exists and is not empty
|
71 |
+
if [ ! -s "$INPUT_FILE" ]; then
|
72 |
+
echo "Error: Input file is empty or does not exist." >&2
|
73 |
+
return 1
|
74 |
+
fi
|
75 |
+
|
76 |
+
local MD5=$(get_md5 "$INPUT_FILE")
|
77 |
+
local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}"
|
78 |
+
local SAFE_FILENAME=$(echo "$INPUT_FILE" | sed 's/[^a-zA-Z0-9._-]/_/g')
|
79 |
+
local CACHE_FILE="$CACHE_SUBDIR/${MD5}_${SAFE_FILENAME}${EXTENSION}"
|
80 |
+
|
81 |
+
echo "Cache operation: MD5 sum = $MD5" >&2
|
82 |
+
echo "Cache file: $CACHE_FILE" >&2
|
83 |
+
|
84 |
+
# Create cache subdirectory if it doesn't exist
|
85 |
+
if ! mkdir -p "$CACHE_SUBDIR"; then
|
86 |
+
echo "Error: Failed to create cache subdirectory." >&2
|
87 |
+
return 1
|
88 |
+
fi
|
89 |
+
|
90 |
+
# Attempt to create the hardlink
|
91 |
+
if ln -f "$INPUT_FILE" "$CACHE_FILE"; then
|
92 |
+
echo "Cache file created: $CACHE_FILE" >&2
|
93 |
+
echo "$CACHE_FILE"
|
94 |
+
return 0
|
95 |
+
else
|
96 |
+
echo "Error: Failed to create cache file." >&2
|
97 |
+
return 1
|
98 |
+
fi
|
99 |
+
}
|
100 |
+
|
101 |
+
# Function to sanitize a string for use as a filename
|
102 |
+
sanitize_filename() {
|
103 |
+
local STRING="$1"
|
104 |
+
echo "$STRING" | iconv -c -t ascii//translit | sed 's/[^A-Za-z0-9._-]/_/g' | tr '[:upper:]' '[:lower:]'
|
105 |
+
}
|
106 |
+
|
107 |
+
# Function to clean text from a VTT file
|
108 |
+
clean_text() {
|
109 |
+
sed 's/<[^>]*>//g' | tr -s ' ' | sed 's/^[ \t]*//;s/[ \t]*$//'
|
110 |
+
}
|
111 |
+
|
112 |
+
# Function to summarize a segment of text
|
113 |
+
summarize_segment() {
|
114 |
+
local SEGMENT_TEXT="$1"
|
115 |
+
local PROMPT="$2"
|
116 |
+
local SUMMARY_OUTPUT=""
|
117 |
+
|
118 |
+
# Count the number of lines in the input
|
119 |
+
local LINE_COUNT=$(echo "$SEGMENT_TEXT" | wc -l)
|
120 |
+
|
121 |
+
# If the input has less than 12 lines, remove cache and return a simple response
|
122 |
+
if [ "$LINE_COUNT" -lt 12 ]; then
|
123 |
+
local MD5=$(echo "$SEGMENT_TEXT" | md5sum | cut -d' ' -f1)
|
124 |
+
local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}"
|
125 |
+
rm -f "$CACHE_SUBDIR/$MD5"*
|
126 |
+
echo "The input is too short for meaningful summarization. Cache entry removed. Here's the original text:"
|
127 |
+
echo "$SEGMENT_TEXT"
|
128 |
+
return 0
|
129 |
+
fi
|
130 |
+
|
131 |
+
if $USE_FABRIC; then
|
132 |
+
SUMMARY_OUTPUT=$(fabric -p summarize "$SEGMENT_TEXT" 2>&1)
|
133 |
+
else
|
134 |
+
# Use ollama for summarization
|
135 |
+
SUMMARY_OUTPUT=$(ollama run "$OLLAMA_MODEL" "$PROMPT" "$SEGMENT_TEXT" 2>&1)
|
136 |
+
fi
|
137 |
+
|
138 |
+
if [ $? -ne 0 ]; then
|
139 |
+
echo "Error in summarization: $SUMMARY_OUTPUT" >&2
|
140 |
+
return 1
|
141 |
+
fi
|
142 |
+
|
143 |
+
echo "$SUMMARY_OUTPUT"
|
144 |
+
}
|
145 |
+
|
146 |
+
# Function to add a job to the queue
|
147 |
+
add_job() {
|
148 |
+
JOB_QUEUE+=("$@")
|
149 |
+
}
|
150 |
+
|
151 |
+
# Function to update the progress bar for a job
|
152 |
+
update_job_progress() {
|
153 |
+
local JOB_INDEX="$1"
|
154 |
+
local TOTAL_STEPS="$2"
|
155 |
+
local CURRENT_STEP="$3"
|
156 |
+
local JOB_MESSAGE="$4"
|
157 |
+
|
158 |
+
# ... (Implementation for updating the TUI progress bar)
|
159 |
+
# You can use a library like 'whiptail' or 'dialog' for TUI elements
|
160 |
+
# Example using echo for now:
|
161 |
+
echo "Job $((JOB_INDEX+1))/$JOB_COUNT: $JOB_MESSAGE ($CURRENT_STEP/$TOTAL_STEPS)"
|
162 |
+
}
|
163 |
+
|
164 |
+
# Function to process the job queue
|
165 |
+
process_job_queue() {
|
166 |
+
local JOB_COUNT=${#JOB_QUEUE[@]}
|
167 |
+
echo "Processing job queue ($JOB_COUNT jobs)..."
|
168 |
+
for (( i=0; i<JOB_COUNT; i++ )); do
|
169 |
+
# Remove update_job_progress calls
|
170 |
+
eval "${JOB_QUEUE[$i]}"
|
171 |
+
done
|
172 |
+
}
|
173 |
+
|
174 |
+
# Function to process a single segment
|
175 |
+
process_segment() {
|
176 |
+
local SEGMENT_TEXT="$1"
|
177 |
+
local PROMPT="$2"
|
178 |
+
local OUTPUT_FILE="$3"
|
179 |
+
local SUMMARY_OUTPUT=""
|
180 |
+
|
181 |
+
# Count the number of lines in the input
|
182 |
+
local LINE_COUNT=$(echo "$SEGMENT_TEXT" | wc -l)
|
183 |
+
|
184 |
+
# If the input has less than 12 lines, remove cache and return a simple response
|
185 |
+
if [ "$LINE_COUNT" -lt 12 ]; then
|
186 |
+
local MD5=$(echo "$SEGMENT_TEXT" | md5sum | cut -d' ' -f1)
|
187 |
+
local CACHE_SUBDIR="$CACHE_DIR/${MD5:0:2}/${MD5:2:2}"
|
188 |
+
rm -f "$CACHE_SUBDIR/$MD5"*
|
189 |
+
echo "The input is too short for meaningful summarization. Cache entry removed. Here's the original text:"
|
190 |
+
echo "$SEGMENT_TEXT" > "$OUTPUT_FILE"
|
191 |
+
return 0
|
192 |
+
fi
|
193 |
+
|
194 |
+
if $USE_FABRIC; then
|
195 |
+
SUMMARY_OUTPUT=$(fabric -p summarize "$SEGMENT_TEXT" 2>&1)
|
196 |
+
else
|
197 |
+
# Use ollama for summarization
|
198 |
+
SUMMARY_OUTPUT=$(ollama run "$OLLAMA_MODEL" "$PROMPT" "$SEGMENT_TEXT" 2>&1)
|
199 |
+
fi
|
200 |
+
|
201 |
+
if [ $? -ne 0 ]; then
|
202 |
+
echo "Error in summarization: $SUMMARY_OUTPUT" >&2
|
203 |
+
return 1
|
204 |
+
fi
|
205 |
+
|
206 |
+
# Write the summary to the specified output file
|
207 |
+
echo "$SUMMARY_OUTPUT" > "$OUTPUT_FILE"
|
208 |
+
}
|
209 |
+
|
210 |
+
# Function to process a VTT file (generate summary and handle versioning)
|
211 |
+
process_vtt() {
|
212 |
+
local VTT_FILE=$1
|
213 |
+
local URL=$2
|
214 |
+
local TEMP_DIR=$(mktemp -d)
|
215 |
+
local BASE_NAME="${TEMP_DIR}/temp" # Temporary base name
|
216 |
+
local CLEANED_TRANSCRIPT="${BASE_NAME}_cleaned.txt"
|
217 |
+
local SUMMARY_FILE="${OUTPUT_DIR}/$(basename "$VTT_FILE" .vtt)_summary.txt"
|
218 |
+
|
219 |
+
echo "Processing VTT file: $VTT_FILE"
|
220 |
+
|
221 |
+
# Clean the VTT transcript
|
222 |
+
if ! python3 "$(dirname "$0")/vttclean.py" "$VTT_FILE" > "$CLEANED_TRANSCRIPT" 2>"${CLEANED_TRANSCRIPT}.error"; then
|
223 |
+
echo "Error: Failed to clean the VTT file. Error log:" >&2
|
224 |
+
cat "${CLEANED_TRANSCRIPT}.error" >&2
|
225 |
+
exit 1
|
226 |
+
fi
|
227 |
+
|
228 |
+
# Check if the cleaned transcript is empty
|
229 |
+
if [ ! -s "$CLEANED_TRANSCRIPT" ]; then
|
230 |
+
echo "Error: Cleaned transcript is empty." >&2
|
231 |
+
exit 1
|
232 |
+
fi
|
233 |
+
|
234 |
+
# Generate summary
|
235 |
+
echo "Summarizing transcript..."
|
236 |
+
local TOTAL_LINES=$(wc -l < "$CLEANED_TRANSCRIPT")
|
237 |
+
local SEGMENT_SIZE=$((TOTAL_LINES / 3))
|
238 |
+
local FIRST_SEGMENT=$(head -n $SEGMENT_SIZE "$CLEANED_TRANSCRIPT")
|
239 |
+
local MIDDLE_SEGMENT=$(sed -n "$((SEGMENT_SIZE + 1)),$((2 * SEGMENT_SIZE))p" "$CLEANED_TRANSCRIPT")
|
240 |
+
local LAST_SEGMENT=$(tail -n $SEGMENT_SIZE "$CLEANED_TRANSCRIPT")
|
241 |
+
|
242 |
+
{
|
243 |
+
echo "Generating summary for first segment..."
|
244 |
+
if $USE_FABRIC; then
|
245 |
+
fabric -p summarize "$FIRST_SEGMENT"
|
246 |
+
else
|
247 |
+
ollama run "$OLLAMA_MODEL" "$FIRST_PROMPT" "$FIRST_SEGMENT"
|
248 |
+
fi
|
249 |
+
|
250 |
+
echo "Generating summary for middle segment..."
|
251 |
+
if $USE_FABRIC; then
|
252 |
+
fabric -p summarize "$MIDDLE_SEGMENT"
|
253 |
+
else
|
254 |
+
ollama run "$OLLAMA_MODEL" "$MIDDLE_PROMPT" "$MIDDLE_SEGMENT"
|
255 |
+
fi
|
256 |
+
|
257 |
+
echo "Generating summary for last segment..."
|
258 |
+
if $USE_FABRIC; then
|
259 |
+
fabric -p summarize "$LAST_SEGMENT"
|
260 |
+
else
|
261 |
+
ollama run "$OLLAMA_MODEL" "$LAST_PROMPT" "$LAST_SEGMENT"
|
262 |
+
fi
|
263 |
+
} > "$SUMMARY_FILE"
|
264 |
+
|
265 |
+
if [ ! -s "$SUMMARY_FILE" ]; then
|
266 |
+
echo "Error: Summary generation failed." >&2
|
267 |
+
exit 1
|
268 |
+
fi
|
269 |
+
|
270 |
+
echo "Summarization complete."
|
271 |
+
|
272 |
+
# Display the content of the summary file
|
273 |
+
echo "Summary content:"
|
274 |
+
echo "----------------------------------------"
|
275 |
+
cat "$SUMMARY_FILE"
|
276 |
+
echo "----------------------------------------"
|
277 |
+
|
278 |
+
# Clean up
|
279 |
+
rm -rf "$TEMP_DIR"
|
280 |
+
}
|
281 |
+
|
282 |
+
# Function to calculate the time difference between two timestamps in HH:MM:SS format
|
283 |
+
time_difference() {
|
284 |
+
local TIME1="$1" # Format: HH:MM:SS
|
285 |
+
local TIME2="$2" # Format: HH:MM:SS
|
286 |
+
|
287 |
+
# Extract hours, minutes, and seconds from timestamps
|
288 |
+
local TIME1_HOUR=$(echo "$TIME1" | cut -d: -f1)
|
289 |
+
local TIME1_MINUTE=$(echo "$TIME1" | cut -d: -f2)
|
290 |
+
local TIME1_SECOND=$(echo "$TIME1" | cut -d: -f3)
|
291 |
+
|
292 |
+
local TIME2_HOUR=$(echo "$TIME2" | cut -d: -f1)
|
293 |
+
local TIME2_MINUTE=$(echo "$TIME2" | cut -d: -f2)
|
294 |
+
local TIME2_SECOND=$(echo "$TIME2" | cut -d: -f3)
|
295 |
+
|
296 |
+
# Calculate total seconds for each timestamp
|
297 |
+
local TIME1_TOTAL_SECONDS=$((TIME1_HOUR * 3600 + TIME1_MINUTE * 60 + TIME1_SECOND))
|
298 |
+
local TIME2_TOTAL_SECONDS=$((TIME2_HOUR * 3600 + TIME2_MINUTE * 60 + TIME2_SECOND))
|
299 |
+
|
300 |
+
# Calculate the difference in seconds
|
301 |
+
local DIFF_SECONDS=$((TIME1_TOTAL_SECONDS - TIME2_TOTAL_SECONDS))
|
302 |
+
|
303 |
+
# Return the difference (could be negative if TIME2 is later than TIME1)
|
304 |
+
echo "$DIFF_SECONDS"
|
305 |
+
}
|
306 |
+
|
307 |
+
# Main script logic
|
308 |
+
if [ $# -eq 0 ]; then
|
309 |
+
echo "Error: No input provided. Please provide a valid URL, VTT file, or a local audio file."
|
310 |
+
exit 1
|
311 |
+
fi
|
312 |
+
|
313 |
+
if [[ "$1" == *.vtt ]]; then
|
314 |
+
echo "Processing as VTT file..."
|
315 |
+
add_job "process_vtt \"$1\" \"$1\""
|
316 |
+
elif [[ "$1" == *"http"* ]]; then
|
317 |
+
echo "Processing as YouTube URL..."
|
318 |
+
|
319 |
+
# Extract the video title
|
320 |
+
VIDEO_TITLE=$(yt-dlp --get-title "$1")
|
321 |
+
FINAL_BASE_NAME=$(sanitize_filename "$VIDEO_TITLE")
|
322 |
+
|
323 |
+
# Attempt to download subtitles first
|
324 |
+
yt-dlp -N 3 --skip-download --write-auto-sub --sub-lang en \
|
325 |
+
--cookies-from-browser brave --output "$OUTPUT_DIR/${FINAL_BASE_NAME}.%(ext)s" "$1"
|
326 |
+
|
327 |
+
VTT_FILE=$(find "$OUTPUT_DIR" -name "${FINAL_BASE_NAME}.vtt" | head -n 1)
|
328 |
+
|
329 |
+
if [ -n "$VTT_FILE" ]; then
|
330 |
+
echo "Subtitles found, processing VTT file..."
|
331 |
+
add_job "process_vtt \"$VTT_FILE\" \"$1\""
|
332 |
+
else
|
333 |
+
echo "No subtitles found, downloading audio and generating transcript..."
|
334 |
+
if [ "$DISABLE_AUDIO" = false ]; then
|
335 |
+
if ! yt-dlp -N 3 -x --audio-format wav --postprocessor-args "-ar 16k" \
|
336 |
+
--cookies-from-browser brave --output "$OUTPUT_DIR/${FINAL_BASE_NAME}.%(ext)s" "$1"; then
|
337 |
+
echo "Error: Failed to download audio using yt-dlp. Check the URL and your internet connection." >&2
|
338 |
+
exit 1
|
339 |
+
fi
|
340 |
+
|
341 |
+
WAV_FILE=$(find "$OUTPUT_DIR" -name "${FINAL_BASE_NAME}.wav" | head -n 1)
|
342 |
+
|
343 |
+
if [ -z "$WAV_FILE" ]; then
|
344 |
+
echo "Error: WAV file not found after download. Check yt-dlp output." >&2
|
345 |
+
exit 1
|
346 |
+
fi
|
347 |
+
|
348 |
+
echo "Running Whisper-CPP to generate VTT transcript..."
|
349 |
+
if ! "$WHISPCC"/main -ovtt -tdrz -m "$MODEL_PATH" "$WAV_FILE"; then
|
350 |
+
echo "Error: Whisper-CPP transcription failed. Check the model path and audio file." >&2
|
351 |
+
exit 1
|
352 |
+
fi
|
353 |
+
VTT_FILE="${WAV_FILE%.*}.vtt"
|
354 |
+
|
355 |
+
add_job "process_vtt \"$VTT_FILE\" \"$1\""
|
356 |
+
|
357 |
+
# Convert WAV to OGG Opus
|
358 |
+
echo "Converting WAV to OGG Opus..."
|
359 |
+
OGG_FILE="${WAV_FILE%.wav}.ogg"
|
360 |
+
if ! ffmpeg -i "$WAV_FILE" -c:a libopus -b:a 16k -vbr on -compression_level 10 -y "$OGG_FILE"; then
|
361 |
+
echo "Error: Failed to convert to OGG format." >&2
|
362 |
+
exit 1
|
363 |
+
fi
|
364 |
+
echo " - Audio: $OGG_FILE"
|
365 |
+
# Remove the WAV file
|
366 |
+
rm "$WAV_FILE"
|
367 |
+
fi
|
368 |
+
fi
|
369 |
+
elif [ -f "$1" ]; then
|
370 |
+
echo "Processing as local audio file..."
|
371 |
+
INPUT_FILE="$1"
|
372 |
+
WAV_FILE="${INPUT_FILE%.*}.wav"
|
373 |
+
|
374 |
+
# Convert to WAV first if not already WAV
|
375 |
+
if [[ "$INPUT_FILE" != *.wav ]]; then
|
376 |
+
echo "Converting input to WAV format..."
|
377 |
+
if ! ffmpeg -i "$INPUT_FILE" -ar 16000 -ac 1 -c:a pcm_s16le ${DURATION:+-t "$DURATION"} -y "$WAV_FILE"; then
|
378 |
+
echo "Error: Failed to convert input to WAV format." >&2
|
379 |
+
exit 1
|
380 |
+
fi
|
381 |
+
else
|
382 |
+
WAV_FILE="$INPUT_FILE"
|
383 |
+
fi
|
384 |
+
|
385 |
+
echo "Running Whisper-CPP to generate VTT transcript..."
|
386 |
+
if ! "$WHISPCC"/main -ovtt -tdrz -m "$MODEL_PATH" "$WAV_FILE" ; then
|
387 |
+
echo "Error: Whisper-CPP transcription failed." >&2
|
388 |
+
exit 1
|
389 |
+
fi
|
390 |
+
|
391 |
+
VTT_FILE="${WAV_FILE%.wav}.vtt"
|
392 |
+
mv "${WAV_FILE}.vtt" "$VTT_FILE"
|
393 |
+
add_job "process_vtt \"$VTT_FILE\" \"$1\""
|
394 |
+
|
395 |
+
if [ "$DISABLE_AUDIO" = false ]; then
|
396 |
+
# Convert to OGG Opus
|
397 |
+
echo "Converting to OGG Opus..."
|
398 |
+
OGG_FILE="${WAV_FILE%.*}.ogg"
|
399 |
+
if ! ffmpeg -i "$WAV_FILE" -c:a libopus -b:a 16k -vbr on -compression_level 10 -y "$OGG_FILE"; then
|
400 |
+
echo "Error: Failed to convert to OGG format." >&2
|
401 |
+
exit 1
|
402 |
+
fi
|
403 |
+
echo " - Audio: $OGG_FILE"
|
404 |
+
# Remove the WAV file per CHARTER point 7
|
405 |
+
rm "$WAV_FILE"
|
406 |
+
fi
|
407 |
+
|
408 |
+
|
409 |
+
|
410 |
+
else
|
411 |
+
echo "Error: Invalid input. Provide a valid URL, VTT file, or a local audio file."
|
412 |
+
exit 1
|
413 |
+
fi
|
414 |
+
|
415 |
+
process_job_queue
|
tetris32b.html
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Tetris Game</title>
|
7 |
+
<style>
|
8 |
+
body {
|
9 |
+
display: flex;
|
10 |
+
justify-content: center;
|
11 |
+
align-items: center;
|
12 |
+
height: 100vh;
|
13 |
+
margin: 0;
|
14 |
+
background-color: #282c34;
|
15 |
+
}
|
16 |
+
canvas {
|
17 |
+
border: 1px solid #fff;
|
18 |
+
}
|
19 |
+
</style>
|
20 |
+
</head>
|
21 |
+
<body>
|
22 |
+
<canvas id="tetris" width="320" height="640"></canvas>
|
23 |
+
<script>
|
24 |
+
const canvas = document.getElementById('tetris');
|
25 |
+
const context = canvas.getContext('2d');
|
26 |
+
|
27 |
+
context.scale(20, 20);
|
28 |
+
|
29 |
+
function arenaSweep() {
|
30 |
+
let rowCount = 1;
|
31 |
+
outer: for (let y = arena.length - 1; y > 0; --y) {
|
32 |
+
for (let x = 0; x < arena[y].length; ++x) {
|
33 |
+
if (arena[y][x] === 0) {
|
34 |
+
continue outer;
|
35 |
+
}
|
36 |
+
}
|
37 |
+
|
38 |
+
const row = arena.splice(y, 1)[0].fill(0);
|
39 |
+
arena.unshift(row);
|
40 |
+
++y;
|
41 |
+
|
42 |
+
player.score += rowCount * 10;
|
43 |
+
rowCount *= 2;
|
44 |
+
}
|
45 |
+
}
|
46 |
+
|
47 |
+
function collide(arena, player) {
|
48 |
+
const [m, o] = [player.matrix, player.pos];
|
49 |
+
for (let y = 0; y < m.length; ++y) {
|
50 |
+
for (let x = 0; x < m[y].length; ++x) {
|
51 |
+
if (m[y][x] !== 0 &&
|
52 |
+
(arena[y + o.y] &&
|
53 |
+
arena[y + o.y][x + o.x]) !== 0) {
|
54 |
+
return true;
|
55 |
+
}
|
56 |
+
}
|
57 |
+
}
|
58 |
+
return false;
|
59 |
+
}
|
60 |
+
|
61 |
+
function createMatrix(w, h) {
|
62 |
+
const matrix = [];
|
63 |
+
while (h--) {
|
64 |
+
matrix.push(new Array(w).fill(0));
|
65 |
+
}
|
66 |
+
return matrix;
|
67 |
+
}
|
68 |
+
|
69 |
+
function createPiece(type) {
|
70 |
+
if (type === 'T') {
|
71 |
+
return [
|
72 |
+
[0, 0, 0],
|
73 |
+
[1, 1, 1],
|
74 |
+
[0, 1, 0],
|
75 |
+
];
|
76 |
+
} else if (type === 'O') {
|
77 |
+
return [
|
78 |
+
[2, 2],
|
79 |
+
[2, 2],
|
80 |
+
];
|
81 |
+
} else if (type === 'L') {
|
82 |
+
return [
|
83 |
+
[0, 3, 0],
|
84 |
+
[0, 3, 0],
|
85 |
+
[0, 3, 3],
|
86 |
+
];
|
87 |
+
} else if (type === 'J') {
|
88 |
+
return [
|
89 |
+
[0, 4, 0],
|
90 |
+
[0, 4, 0],
|
91 |
+
[4, 4, 0],
|
92 |
+
];
|
93 |
+
} else if (type === 'I') {
|
94 |
+
return [
|
95 |
+
[0, 5, 0, 0],
|
96 |
+
[0, 5, 0, 0],
|
97 |
+
[0, 5, 0, 0],
|
98 |
+
[0, 5, 0, 0],
|
99 |
+
];
|
100 |
+
} else if (type === 'S') {
|
101 |
+
return [
|
102 |
+
[0, 6, 6],
|
103 |
+
[6, 6, 0],
|
104 |
+
[0, 0, 0],
|
105 |
+
];
|
106 |
+
} else if (type === 'Z') {
|
107 |
+
return [
|
108 |
+
[7, 7, 0],
|
109 |
+
[0, 7, 7],
|
110 |
+
[0, 0, 0],
|
111 |
+
];
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
function draw() {
|
116 |
+
context.fillStyle = '#282c34';
|
117 |
+
context.fillRect(0, 0, canvas.width, canvas.height);
|
118 |
+
|
119 |
+
drawMatrix(arena, { x: 0, y: 0 });
|
120 |
+
drawMatrix(player.matrix, player.pos);
|
121 |
+
}
|
122 |
+
|
123 |
+
function drawMatrix(matrix, offset) {
|
124 |
+
matrix.forEach((row, y) => {
|
125 |
+
row.forEach((value, x) => {
|
126 |
+
if (value !== 0) {
|
127 |
+
context.fillStyle = colors[value];
|
128 |
+
context.fillRect(x + offset.x,
|
129 |
+
y + offset.y,
|
130 |
+
1, 1);
|
131 |
+
}
|
132 |
+
});
|
133 |
+
});
|
134 |
+
}
|
135 |
+
|
136 |
+
function merge(arena, player) {
|
137 |
+
player.matrix.forEach((row, y) => {
|
138 |
+
row.forEach((value, x) => {
|
139 |
+
if (value !== 0) {
|
140 |
+
arena[y + player.pos.y][x + player.pos.x] = value;
|
141 |
+
}
|
142 |
+
});
|
143 |
+
});
|
144 |
+
}
|
145 |
+
|
146 |
+
function playerDrop() {
|
147 |
+
player.pos.y++;
|
148 |
+
if (collide(arena, player)) {
|
149 |
+
player.pos.y--;
|
150 |
+
merge(arena, player);
|
151 |
+
playerReset();
|
152 |
+
arenaSweep();
|
153 |
+
updateScore();
|
154 |
+
}
|
155 |
+
dropCounter = 0;
|
156 |
+
}
|
157 |
+
|
158 |
+
function playerMove(dir) {
|
159 |
+
player.pos.x += dir;
|
160 |
+
if (collide(arena, player)) {
|
161 |
+
player.pos.x -= dir;
|
162 |
+
}
|
163 |
+
}
|
164 |
+
|
165 |
+
function playerRotate(dir) {
|
166 |
+
const pos = player.pos.x;
|
167 |
+
let offset = 1;
|
168 |
+
rotate(player.matrix, dir);
|
169 |
+
while (collide(arena, player)) {
|
170 |
+
player.pos.x += offset;
|
171 |
+
offset = -(offset + (offset > 0 ? 1 : -1));
|
172 |
+
if (offset > player.matrix[0].length) {
|
173 |
+
rotate(player.matrix, -dir);
|
174 |
+
player.pos.x = pos;
|
175 |
+
return;
|
176 |
+
}
|
177 |
+
}
|
178 |
+
}
|
179 |
+
|
180 |
+
function rotate(matrix, dir) {
|
181 |
+
for (let y = 0; y < matrix.length; ++y) {
|
182 |
+
for (let x = 0; x < y; ++x) {
|
183 |
+
[
|
184 |
+
matrix[x][y],
|
185 |
+
matrix[y][x],
|
186 |
+
] = [
|
187 |
+
matrix[y][x],
|
188 |
+
matrix[x][y],
|
189 |
+
];
|
190 |
+
}
|
191 |
+
}
|
192 |
+
|
193 |
+
if (dir > 0) {
|
194 |
+
matrix.forEach(row => row.reverse());
|
195 |
+
} else {
|
196 |
+
matrix.reverse();
|
197 |
+
}
|
198 |
+
}
|
199 |
+
|
200 |
+
function playerReset() {
|
201 |
+
const pieces = 'ILJOTSZ';
|
202 |
+
player.matrix = createPiece(pieces[pieces.length * Math.random() | 0]);
|
203 |
+
player.pos.y = 0;
|
204 |
+
player.pos.x = (arena[0].length / 2 | 0) -
|
205 |
+
(player.matrix[0].length / 2 | 0);
|
206 |
+
if (collide(arena, player)) {
|
207 |
+
arena.forEach(row => row.fill(0));
|
208 |
+
player.score = 0;
|
209 |
+
updateScore();
|
210 |
+
}
|
211 |
+
}
|
212 |
+
|
213 |
+
let dropCounter = 0;
|
214 |
+
let dropInterval = 1000;
|
215 |
+
|
216 |
+
let lastTime = 0;
|
217 |
+
|
218 |
+
function update(time = 0) {
|
219 |
+
const deltaTime = time - lastTime;
|
220 |
+
|
221 |
+
dropCounter += deltaTime;
|
222 |
+
if (dropCounter > dropInterval) {
|
223 |
+
playerDrop();
|
224 |
+
}
|
225 |
+
|
226 |
+
lastTime = time;
|
227 |
+
|
228 |
+
draw();
|
229 |
+
requestAnimationFrame(update);
|
230 |
+
}
|
231 |
+
|
232 |
+
function updateScore() {
|
233 |
+
document.getElementById('score').innerText = player.score;
|
234 |
+
}
|
235 |
+
|
236 |
+
const colors = [
|
237 |
+
null,
|
238 |
+
'#FF0D72',
|
239 |
+
'#0DC2FF',
|
240 |
+
'#0DFF72',
|
241 |
+
'#F538FF',
|
242 |
+
'#FF8E0D',
|
243 |
+
'#FFE138',
|
244 |
+
'#3877FF',
|
245 |
+
];
|
246 |
+
|
247 |
+
const arena = createMatrix(12, 20);
|
248 |
+
|
249 |
+
const player = {
|
250 |
+
pos: {x: 0, y: 0},
|
251 |
+
matrix: null,
|
252 |
+
score: 0,
|
253 |
+
};
|
254 |
+
|
255 |
+
document.addEventListener('keydown', event => {
|
256 |
+
if (event.keyCode === 37) {
|
257 |
+
playerMove(-1);
|
258 |
+
} else if (event.keyCode === 39) {
|
259 |
+
playerMove(1);
|
260 |
+
} else if (event.keyCode === 40) {
|
261 |
+
playerDrop();
|
262 |
+
} else if (event.keyCode === 81) {
|
263 |
+
playerRotate(-1);
|
264 |
+
} else if (event.keyCode === 87) {
|
265 |
+
playerRotate(1);
|
266 |
+
}
|
267 |
+
});
|
268 |
+
|
269 |
+
playerReset();
|
270 |
+
updateScore();
|
271 |
+
update();
|
272 |
+
|
273 |
+
</script>
|
274 |
+
</body>
|
275 |
+
</html>
|
vttclean.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
|
3 |
+
import re
|
4 |
+
import datetime
|
5 |
+
import glob
|
6 |
+
import sys
|
7 |
+
|
8 |
+
def clean_text(text):
|
9 |
+
# Remove HTML tags
|
10 |
+
text = re.sub(r'<[^>]+>', '', text)
|
11 |
+
# Remove multiple spaces
|
12 |
+
text = re.sub(r'\s+', ' ', text)
|
13 |
+
# Remove leading/trailing whitespace
|
14 |
+
return text.strip()
|
15 |
+
|
16 |
+
def is_prefix(a, b):
|
17 |
+
return b.startswith(a)
|
18 |
+
|
19 |
+
def process_vtt(content):
|
20 |
+
# Remove WEBVTT header and metadata
|
21 |
+
content = re.sub(r'^WEBVTT\n.*?\n\n', '', content, flags=re.DOTALL)
|
22 |
+
|
23 |
+
# Split into captions
|
24 |
+
captions = re.split(r'\n\n+', content)
|
25 |
+
|
26 |
+
processed_captions = []
|
27 |
+
buffer = []
|
28 |
+
|
29 |
+
def flush_buffer():
|
30 |
+
if buffer:
|
31 |
+
processed_captions.append(buffer[-1]) # Keep the last (most complete) line
|
32 |
+
buffer.clear()
|
33 |
+
|
34 |
+
for caption in captions:
|
35 |
+
lines = caption.split('\n')
|
36 |
+
if len(lines) >= 2:
|
37 |
+
# Extract only the start time and remove milliseconds
|
38 |
+
timestamp_match = re.match(r'(\d{2}:\d{2}:\d{2})\.(\d{3})', lines[0])
|
39 |
+
if timestamp_match:
|
40 |
+
timestamp = f"{timestamp_match.group(1)}.{timestamp_match.group(2)}"
|
41 |
+
text = ' '.join(lines[1:])
|
42 |
+
clean_caption = clean_text(text)
|
43 |
+
if clean_caption:
|
44 |
+
current_line = f"{timestamp} {clean_caption}"
|
45 |
+
|
46 |
+
if not buffer:
|
47 |
+
buffer.append(current_line)
|
48 |
+
else:
|
49 |
+
_, prev_text = buffer[-1].split(' ', 1)
|
50 |
+
if is_prefix(prev_text, clean_caption):
|
51 |
+
buffer.append(current_line)
|
52 |
+
else:
|
53 |
+
flush_buffer()
|
54 |
+
buffer.append(current_line)
|
55 |
+
|
56 |
+
flush_buffer() # Don't forget to flush the buffer at the end
|
57 |
+
|
58 |
+
return '\n'.join(processed_captions)
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
try:
|
62 |
+
if len(sys.argv) < 2:
|
63 |
+
print("Usage: python vttclean.py <file_pattern>", file=sys.stderr)
|
64 |
+
sys.exit(1)
|
65 |
+
|
66 |
+
file_pattern = sys.argv[1]
|
67 |
+
for filename in glob.glob(file_pattern):
|
68 |
+
with open(filename, 'r', encoding='utf-8') as file:
|
69 |
+
content = file.read()
|
70 |
+
result = process_vtt(content)
|
71 |
+
print(result)
|
72 |
+
except Exception as e:
|
73 |
+
print(f"Error processing input: {e}", file=sys.stderr)
|
74 |
+
sys.exit(1)
|