#!/usr/bin/env python3
# SPDX-FileCopyrightText: Chris Pressey, the creator of this work, has dedicated it to the public domain.
# For more information, please refer to <https://unlicense.org/>
# SPDX-License-Identifier: Unlicense
import os
import sys
import argparse
from pathlib import Path
from typing import Iterator, Set, List
def parse_args() -> argparse.Namespace:
"""
Parse and return command line arguments.
"""
parser = argparse.ArgumentParser(
description='Traverse directory trees and output contents of text files found therein.'
)
parser.add_argument(
'directories',
nargs='+',
type=Path,
help='Directories to process'
)
parser.add_argument(
'--exclude-dir',
action='append',
default=[],
help='Directory names to exclude (can be specified multiple times)'
)
return parser.parse_args()
def is_text_file(file_path: Path) -> bool:
"""
Determine if a file is likely a text file by attempting to read it as UTF-8.
Returns True if file can be read as text, False otherwise.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
f.read(1024) # Try reading first 1KB
return True
except UnicodeDecodeError:
return False
except Exception:
return False
def should_skip_path(path: Path, excluded_dirs: Set[str]) -> bool:
"""
Determine if a path should be skipped based on exclusion rules.
Returns True if path should be skipped, False otherwise.
"""
# Skip hidden files and directories (starting with '.')
if path.name.startswith('.'):
return True
# Skip excluded directory names
if path.name in excluded_dirs:
return True
# Skip symlinks
if path.is_symlink():
return True
return False
def find_text_files(directory: Path, excluded_dirs: Set[str]) -> Iterator[Path]:
"""
Recursively find all text files in the given directory.
Skips symlinks, hidden files/dirs, and excluded directories.
Yields Path objects for each text file found.
"""
try:
for path in directory.iterdir():
# Check if path should be skipped
if should_skip_path(path, excluded_dirs):
continue
if path.is_file() and is_text_file(path):
yield path
elif path.is_dir():
yield from find_text_files(path, excluded_dirs)
except PermissionError as e:
print(f"Permission denied accessing {e.filename}", file=sys.stderr)
except Exception as e:
print(f"Error accessing path: {e}", file=sys.stderr)
def print_file_contents(file_path: Path) -> None:
"""
Print a header containing the file path, followed by its contents.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
print(f"\n{'#' * 10} {file_path}\n")
print(f.read())
except Exception as e:
print(f"Error reading {file_path}: {e}", file=sys.stderr)
def process_directory(directory: Path, excluded_dirs: Set[str]) -> None:
"""
Process a single directory, printing contents of all text files found.
"""
if not directory.exists():
print(f"Error: {directory} does not exist", file=sys.stderr)
return
if not directory.is_dir():
print_file_contents(directory)
return
try:
for file_path in find_text_files(directory, excluded_dirs):
print_file_contents(file_path)
except Exception as e:
print(f"Error processing directory {directory}: {e}", file=sys.stderr)
def main():
args = parse_args()
# Convert excluded_dirs to a set for faster lookup
excluded_dirs = set(args.exclude_dir)
# Process each directory in turn
for directory in args.directories:
process_directory(directory, excluded_dirs)
if __name__ == '__main__':
main()