jnorthrup's picture
Upload 12 files
14daa4c verified
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Set, Literal, Tuple, NamedTuple, Union
from pathlib import Path
import re
import fnmatch
import glob
from itertools import chain
@dataclass
class PathPattern:
"""Represents either a direct mapping or a wildcard pattern."""
pattern: str
target_template: Optional[str] = None
@classmethod
def parse(cls, spec: str) -> 'PathPattern':
"""Parse path specification into pattern and optional target."""
if ':' in spec:
source, target = spec.split(':', 1)
return cls(source, target)
return cls(spec)
def resolve(self, root_dir: Path) -> List[PathMapping]:
"""Resolve pattern into concrete path mappings."""
if self.target_template is not None:
# Direct mapping case
return [PathMapping(Path(self.pattern), Path(self.target_template))]
# Wildcard pattern case
matches = []
for path in glob.glob(self.pattern, recursive=True):
source = Path(path)
if source.is_file():
# For files, maintain relative structure
relative = source.relative_to(root_dir) if root_dir in source.parents else source
matches.append(PathMapping(source, relative))
return matches
def validate(self) -> None:
"""Validate pattern constraints."""
if self.target_template:
# Check for path traversal in target
if '..' in self.target_template:
raise ValueError(f"Target path '{self.target_template}' cannot contain '..'")
# Normalize path separators
if '\\' in self.target_template:
raise ValueError(f"Target path must use forward slashes")
# Validate wildcard pattern
if any(c in self.pattern for c in '<>|"'):
raise ValueError(f"Invalid characters in pattern: {self.pattern}")
class WikiTransformer:
def __init__(self, size_limit: 'SizeSpec', output_dir: Path,
merge_strategy: MergeStrategy,
debug: bool = False):
self.validator = SizeValidator(size_limit)
self.output_dir = output_dir
self.merge_strategy = merge_strategy
self.debug = debug
self.console = Console()
self.log = self._setup_logging()
self.processed_inodes: Set[int] = set()
self.root_dir = Path.cwd()
async def resolve_patterns(self, patterns: List[str]) -> List[PathMapping]:
"""Resolve all patterns into concrete mappings."""
mappings = []
for spec in patterns:
try:
pattern = PathPattern.parse(spec)
pattern.validate()
resolved = pattern.resolve(self.root_dir)
if not resolved:
self.log.warning(f"Pattern '{spec}' matched no files")
mappings.extend(resolved)
except ValueError as e:
self.log.error(f"Invalid pattern '{spec}': {e}")
continue
return mappings
async def transform(self, patterns: List[str]):
"""Transform source trees based on patterns and mappings."""
mappings = await self.resolve_patterns(patterns)
if not mappings:
raise ValueError("No valid paths matched the specified patterns")
if not self.merge_strategy.validate_target(self.output_dir):
raise ValueError(
f"Target filesystem doesn't support {self.merge_strategy.link_type} links"
)
self.output_dir.mkdir(parents=True, exist_ok=True)
with Progress() as progress:
task = progress.add_task(
"[green]Processing files...",
total=len(mappings)
)
for mapping in mappings:
try:
await self.process_mapping(mapping)
progress.update(task, advance=1)
except Exception as e:
self.log.error(f"Failed to process {mapping}: {e}")
@click.command()
@click.argument('patterns', nargs=-1, required=True,
help="Path patterns (e.g., 'src:docs/api' or '**/*.md')")
@click.option('-l', '--limit', type=SIZE, default='1M',
help='Per-document size limit (e.g., 500K, 2M, 1G)')
@click.option('-d', '--debug', is_flag=True, help='Enable debug logging')
@click.option('-o', '--output-dir', type=click.Path(), default='wiki',
help='Output directory')
@click.option('--link-type', type=click.Choice(['symlink', 'hardlink', 'copy']),
default='symlink', help='File linking strategy')
@click.option('--follow-links/--no-follow-links', default=False,
help='Follow symbolic links during traversal')
def main(patterns: List[str], limit: SizeSpec, debug: bool,
output_dir: str, link_type: str, follow_links: bool):
"""Transform files into wiki structure using patterns or mappings.
PATTERNS can be either:
1. Colon-separated mappings: 'source:target'
2. Wildcard patterns: '**/*.md', 'docs/**/*.rst'
Examples:
# Explicit mapping
wiki_transform.py src/api:docs/api docs/intro:guide/start
# Wildcard patterns
wiki_transform.py '**/*.md' 'docs/**/*.rst'
# Mixed usage
wiki_transform.py src:api '**/*.md' 'legacy:archive'
"""
strategy = MergeStrategy(
link_type=None if link_type == 'copy' else link_type,
follow_links=follow_links
)
transformer = WikiTransformer(
size_limit=limit,
output_dir=Path(output_dir),
merge_strategy=strategy,
debug=debug
)
asyncio.run(transformer.transform(patterns))
if __name__ == '__main__':
main()