|
from dataclasses import dataclass, field |
|
from typing import Optional, List, Dict, Set, Literal, Tuple, NamedTuple, Union |
|
from pathlib import Path |
|
import re |
|
import fnmatch |
|
import glob |
|
from itertools import chain |
|
|
|
@dataclass |
|
class PathPattern: |
|
"""Represents either a direct mapping or a wildcard pattern.""" |
|
pattern: str |
|
target_template: Optional[str] = None |
|
|
|
@classmethod |
|
def parse(cls, spec: str) -> 'PathPattern': |
|
"""Parse path specification into pattern and optional target.""" |
|
if ':' in spec: |
|
source, target = spec.split(':', 1) |
|
return cls(source, target) |
|
return cls(spec) |
|
|
|
def resolve(self, root_dir: Path) -> List[PathMapping]: |
|
"""Resolve pattern into concrete path mappings.""" |
|
if self.target_template is not None: |
|
|
|
return [PathMapping(Path(self.pattern), Path(self.target_template))] |
|
|
|
|
|
matches = [] |
|
for path in glob.glob(self.pattern, recursive=True): |
|
source = Path(path) |
|
if source.is_file(): |
|
|
|
relative = source.relative_to(root_dir) if root_dir in source.parents else source |
|
matches.append(PathMapping(source, relative)) |
|
return matches |
|
|
|
def validate(self) -> None: |
|
"""Validate pattern constraints.""" |
|
if self.target_template: |
|
|
|
if '..' in self.target_template: |
|
raise ValueError(f"Target path '{self.target_template}' cannot contain '..'") |
|
|
|
|
|
if '\\' in self.target_template: |
|
raise ValueError(f"Target path must use forward slashes") |
|
|
|
|
|
if any(c in self.pattern for c in '<>|"'): |
|
raise ValueError(f"Invalid characters in pattern: {self.pattern}") |
|
|
|
class WikiTransformer: |
|
def __init__(self, size_limit: 'SizeSpec', output_dir: Path, |
|
merge_strategy: MergeStrategy, |
|
debug: bool = False): |
|
self.validator = SizeValidator(size_limit) |
|
self.output_dir = output_dir |
|
self.merge_strategy = merge_strategy |
|
self.debug = debug |
|
self.console = Console() |
|
self.log = self._setup_logging() |
|
self.processed_inodes: Set[int] = set() |
|
self.root_dir = Path.cwd() |
|
|
|
async def resolve_patterns(self, patterns: List[str]) -> List[PathMapping]: |
|
"""Resolve all patterns into concrete mappings.""" |
|
mappings = [] |
|
for spec in patterns: |
|
try: |
|
pattern = PathPattern.parse(spec) |
|
pattern.validate() |
|
resolved = pattern.resolve(self.root_dir) |
|
if not resolved: |
|
self.log.warning(f"Pattern '{spec}' matched no files") |
|
mappings.extend(resolved) |
|
except ValueError as e: |
|
self.log.error(f"Invalid pattern '{spec}': {e}") |
|
continue |
|
return mappings |
|
|
|
async def transform(self, patterns: List[str]): |
|
"""Transform source trees based on patterns and mappings.""" |
|
mappings = await self.resolve_patterns(patterns) |
|
|
|
if not mappings: |
|
raise ValueError("No valid paths matched the specified patterns") |
|
|
|
if not self.merge_strategy.validate_target(self.output_dir): |
|
raise ValueError( |
|
f"Target filesystem doesn't support {self.merge_strategy.link_type} links" |
|
) |
|
|
|
self.output_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
with Progress() as progress: |
|
task = progress.add_task( |
|
"[green]Processing files...", |
|
total=len(mappings) |
|
) |
|
|
|
for mapping in mappings: |
|
try: |
|
await self.process_mapping(mapping) |
|
progress.update(task, advance=1) |
|
except Exception as e: |
|
self.log.error(f"Failed to process {mapping}: {e}") |
|
|
|
@click.command() |
|
@click.argument('patterns', nargs=-1, required=True, |
|
help="Path patterns (e.g., 'src:docs/api' or '**/*.md')") |
|
@click.option('-l', '--limit', type=SIZE, default='1M', |
|
help='Per-document size limit (e.g., 500K, 2M, 1G)') |
|
@click.option('-d', '--debug', is_flag=True, help='Enable debug logging') |
|
@click.option('-o', '--output-dir', type=click.Path(), default='wiki', |
|
help='Output directory') |
|
@click.option('--link-type', type=click.Choice(['symlink', 'hardlink', 'copy']), |
|
default='symlink', help='File linking strategy') |
|
@click.option('--follow-links/--no-follow-links', default=False, |
|
help='Follow symbolic links during traversal') |
|
def main(patterns: List[str], limit: SizeSpec, debug: bool, |
|
output_dir: str, link_type: str, follow_links: bool): |
|
"""Transform files into wiki structure using patterns or mappings. |
|
|
|
PATTERNS can be either: |
|
1. Colon-separated mappings: 'source:target' |
|
2. Wildcard patterns: '**/*.md', 'docs/**/*.rst' |
|
|
|
Examples: |
|
# Explicit mapping |
|
wiki_transform.py src/api:docs/api docs/intro:guide/start |
|
|
|
# Wildcard patterns |
|
wiki_transform.py '**/*.md' 'docs/**/*.rst' |
|
|
|
# Mixed usage |
|
wiki_transform.py src:api '**/*.md' 'legacy:archive' |
|
""" |
|
strategy = MergeStrategy( |
|
link_type=None if link_type == 'copy' else link_type, |
|
follow_links=follow_links |
|
) |
|
|
|
transformer = WikiTransformer( |
|
size_limit=limit, |
|
output_dir=Path(output_dir), |
|
merge_strategy=strategy, |
|
debug=debug |
|
) |
|
|
|
asyncio.run(transformer.transform(patterns)) |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|