from dataclasses import dataclass, field from typing import Optional, List, Dict, Set, Literal, Tuple, NamedTuple, Union from pathlib import Path import re import fnmatch import glob from itertools import chain @dataclass class PathPattern: """Represents either a direct mapping or a wildcard pattern.""" pattern: str target_template: Optional[str] = None @classmethod def parse(cls, spec: str) -> 'PathPattern': """Parse path specification into pattern and optional target.""" if ':' in spec: source, target = spec.split(':', 1) return cls(source, target) return cls(spec) def resolve(self, root_dir: Path) -> List[PathMapping]: """Resolve pattern into concrete path mappings.""" if self.target_template is not None: # Direct mapping case return [PathMapping(Path(self.pattern), Path(self.target_template))] # Wildcard pattern case matches = [] for path in glob.glob(self.pattern, recursive=True): source = Path(path) if source.is_file(): # For files, maintain relative structure relative = source.relative_to(root_dir) if root_dir in source.parents else source matches.append(PathMapping(source, relative)) return matches def validate(self) -> None: """Validate pattern constraints.""" if self.target_template: # Check for path traversal in target if '..' in self.target_template: raise ValueError(f"Target path '{self.target_template}' cannot contain '..'") # Normalize path separators if '\\' in self.target_template: raise ValueError(f"Target path must use forward slashes") # Validate wildcard pattern if any(c in self.pattern for c in '<>|"'): raise ValueError(f"Invalid characters in pattern: {self.pattern}") class WikiTransformer: def __init__(self, size_limit: 'SizeSpec', output_dir: Path, merge_strategy: MergeStrategy, debug: bool = False): self.validator = SizeValidator(size_limit) self.output_dir = output_dir self.merge_strategy = merge_strategy self.debug = debug self.console = Console() self.log = self._setup_logging() self.processed_inodes: Set[int] = set() self.root_dir = Path.cwd() async def resolve_patterns(self, patterns: List[str]) -> List[PathMapping]: """Resolve all patterns into concrete mappings.""" mappings = [] for spec in patterns: try: pattern = PathPattern.parse(spec) pattern.validate() resolved = pattern.resolve(self.root_dir) if not resolved: self.log.warning(f"Pattern '{spec}' matched no files") mappings.extend(resolved) except ValueError as e: self.log.error(f"Invalid pattern '{spec}': {e}") continue return mappings async def transform(self, patterns: List[str]): """Transform source trees based on patterns and mappings.""" mappings = await self.resolve_patterns(patterns) if not mappings: raise ValueError("No valid paths matched the specified patterns") if not self.merge_strategy.validate_target(self.output_dir): raise ValueError( f"Target filesystem doesn't support {self.merge_strategy.link_type} links" ) self.output_dir.mkdir(parents=True, exist_ok=True) with Progress() as progress: task = progress.add_task( "[green]Processing files...", total=len(mappings) ) for mapping in mappings: try: await self.process_mapping(mapping) progress.update(task, advance=1) except Exception as e: self.log.error(f"Failed to process {mapping}: {e}") @click.command() @click.argument('patterns', nargs=-1, required=True, help="Path patterns (e.g., 'src:docs/api' or '**/*.md')") @click.option('-l', '--limit', type=SIZE, default='1M', help='Per-document size limit (e.g., 500K, 2M, 1G)') @click.option('-d', '--debug', is_flag=True, help='Enable debug logging') @click.option('-o', '--output-dir', type=click.Path(), default='wiki', help='Output directory') @click.option('--link-type', type=click.Choice(['symlink', 'hardlink', 'copy']), default='symlink', help='File linking strategy') @click.option('--follow-links/--no-follow-links', default=False, help='Follow symbolic links during traversal') def main(patterns: List[str], limit: SizeSpec, debug: bool, output_dir: str, link_type: str, follow_links: bool): """Transform files into wiki structure using patterns or mappings. PATTERNS can be either: 1. Colon-separated mappings: 'source:target' 2. Wildcard patterns: '**/*.md', 'docs/**/*.rst' Examples: # Explicit mapping wiki_transform.py src/api:docs/api docs/intro:guide/start # Wildcard patterns wiki_transform.py '**/*.md' 'docs/**/*.rst' # Mixed usage wiki_transform.py src:api '**/*.md' 'legacy:archive' """ strategy = MergeStrategy( link_type=None if link_type == 'copy' else link_type, follow_links=follow_links ) transformer = WikiTransformer( size_limit=limit, output_dir=Path(output_dir), merge_strategy=strategy, debug=debug ) asyncio.run(transformer.transform(patterns)) if __name__ == '__main__': main()