diff --git a/README.md b/README.md index bb6295c..f2e850a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +Here's the updated README.md that reflects the new CLI structure: + +```markdown # codemap `codemap` is a tool that helps annotate code files with structural information to provide better context for LLMs (Large Language Models). It adds a single line of metadata at the top of each file containing the file path, package name and language information. @@ -19,37 +22,71 @@ make build Or use docker: ```bash -docker run -v $(pwd):/app krzko/codemap +docker run -v $(pwd):/app ghcr.io/krzko/codemap apply --dry-run ``` -## Usage +## Commands + +### Apply Annotations -Basic usage: ```bash -# Process files in current directory -codemap +# Add annotations to files in current directory +codemap apply -# Process files in a specific directory -codemap -dir=/path/to/your/project +# Add annotations with dry-run (show what would change) +codemap apply --dry-run -# Remove annotations -codemap -clean +# Add annotations to specific directory +codemap apply -d /path/to/project -# Enable verbose logging -codemap -verbose +# Add annotations with verbose output +codemap apply -V ``` -### Command Line Options +### Clean Annotations + +```bash +# Remove annotations from files in current directory +codemap clean + +# Remove annotations with dry-run +codemap clean --dry-run + +# Clean specific directory +codemap clean -d /path/to/project +``` + +### List Files + +```bash +# List files that would be processed +codemap list +# or +codemap ls + +# List files in specific directory +codemap list -d /path/to/project +``` + +### Show Statistics + +```bash +# Show annotation statistics for current directory +codemap stats + +# Show stats for specific directory +codemap stats -d /path/to/project +``` -- `-dir`: Directory to process (default: current directory) -- `-clean`: Remove existing annotations -- `-verbose`: Enable verbose logging -- `-types`: Comma-separated list of file extensions to process (default: "go,py,js,jsx,ts,tsx") -- `-version`: Print version information +### Common Options -### Example +All commands support these options: +- `-d, --dir`: Directory to process (default: current directory) +- `-t, --types`: Comma-separated list of file extensions (default: "go,py,js,jsx,ts,tsx") +- `-V, --verbose`: Enable verbose logging +- `-v, --version`: Display version information -Processing a file will add a single line annotation at the top: +### Example Annotation Before: ```go @@ -80,6 +117,7 @@ func main() { - Python (.py) - JavaScript (.js, .jsx) - TypeScript (.ts, .tsx) +- Dockerfile ### Default Exclusions @@ -112,21 +150,21 @@ Files: ## Examples ```bash -# Process all supported files in current directory -codemap +# Add annotations with dry-run +codemap apply --dry-run -# Process only Go files in current directory -codemap -types=go +# Process only Go files +codemap apply -t go -# Process Python and JavaScript files in a specific directory -codemap -dir=/path/to/project -types=py,js +# Clean TypeScript files with dry-run +codemap clean -t ts,tsx --dry-run -# Clean annotations from all files in current directory -codemap -clean +# List all Python and JavaScript files +codemap list -t py,js -# Clean annotations from TypeScript files -codemap -clean -types=ts,tsx +# Show stats for Go files +codemap stats -t go -# Process all supported files with verbose logging -codemap -verbose +# Process all files with verbose output +codemap apply -V ``` diff --git a/cmd/codemap/main.go b/cmd/codemap/main.go index 638c7b2..781b51c 100644 --- a/cmd/codemap/main.go +++ b/cmd/codemap/main.go @@ -1,14 +1,12 @@ package main import ( - "flag" - "fmt" "log" "os" - "path/filepath" "runtime" - "github.com/krzko/codemap/internal/processor" + "github.com/krzko/codemap/internal/cli" + ucli "github.com/urfave/cli/v2" ) var ( @@ -17,62 +15,20 @@ var ( date = "unknown" ) -func init() { - log.SetFlags(log.Ltime) -} - -func displayVersion() { - fmt.Printf("codemap version %s\n", version) - fmt.Printf(" Build date: %s\n", date) - fmt.Printf(" Git commit: %s\n", commit) - fmt.Printf(" Go version: %s\n", runtime.Version()) - fmt.Printf(" OS/Arch: %s/%s\n", runtime.GOOS, runtime.GOARCH) -} - func main() { - showVersion := flag.Bool("version", false, "Display version information") - - dir := flag.String("dir", ".", "Directory to process (defaults to current directory)") - clean := flag.Bool("clean", false, "Remove existing annotations") - verbose := flag.Bool("verbose", false, "Enable verbose logging") - fileTypes := flag.String("types", "go,py,js,jsx,ts,tsx", "Comma-separated list of file types to process") - - versionShort := flag.Bool("v", false, "Display version information") - - flag.Parse() - - // Check for version flag - if *showVersion || *versionShort { - displayVersion() - os.Exit(0) + app := &ucli.App{ + Name: "codemap", + Usage: "Annotate code files with structural information for LLMs", + Version: version, + Metadata: map[string]interface{}{ + "commit": commit, + "buildDate": date, + "goVersion": runtime.Version(), + }, + Commands: cli.Commands(), } - if *verbose { - log.SetFlags(log.Ltime | log.Lshortfile) + if err := app.Run(os.Args); err != nil { + log.Fatal(err) } - - absPath, err := filepath.Abs(*dir) - if err != nil { - log.Printf("Warning: Could not resolve absolute path for %s: %v", *dir, err) - absPath = *dir - } - - log.Printf("Processing directory: %s", absPath) - log.Printf("Looking for file types: %s", *fileTypes) - - opts := processor.DefaultOptions() - opts.Directory = *dir - opts.Clean = *clean - - proc, err := processor.New(opts) - if err != nil { - log.Fatalf("Failed to initialise processor: %v", err) - } - - if err := proc.Process(); err != nil { - log.Fatalf("Failed to process directory: %v", err) - os.Exit(1) - } - - log.Println("Processing completed successfully") } diff --git a/go.mod b/go.mod index eb23d43..9eae9ec 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,13 @@ module github.com/krzko/codemap go 1.23.2 -require github.com/gobwas/glob v0.2.3 +require ( + github.com/gobwas/glob v0.2.3 + github.com/urfave/cli/v2 v2.27.5 +) + +require ( + github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect +) diff --git a/go.sum b/go.sum index 39fa9fa..ced7627 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,10 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= +github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= +github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= diff --git a/internal/cli/apply.go b/internal/cli/apply.go new file mode 100644 index 0000000..8ddce64 --- /dev/null +++ b/internal/cli/apply.go @@ -0,0 +1,55 @@ +package cli + +import ( + "fmt" + "log" + + "github.com/urfave/cli/v2" +) + +func ApplyCommand() *cli.Command { + return &cli.Command{ + Name: "apply", + Usage: "Add annotations to files", + Flags: append(commonFlags, + &cli.BoolFlag{ + Name: "dry-run", + Aliases: []string{"n"}, + Usage: "Show what would be done without making changes", + }, + &cli.BoolFlag{ + Name: "recursive", + Aliases: []string{"r"}, + Usage: "Process directories recursively", + Value: true, + }, + ), + Action: runApply, + } +} + +func runApply(c *cli.Context) error { + proc, err := createProcessor(c) + if err != nil { + return fmt.Errorf("failed to initialize processor: %w", err) + } + + files, err := proc.ListFiles() + if err != nil { + return fmt.Errorf("failed to list files: %w", err) + } + + if c.Bool("dry-run") { + log.Printf("Would process %d files in %s", len(files), c.String("dir")) + for _, file := range files { + log.Printf("Would annotate: %s", file) + } + return nil + } + + if err := proc.Process(); err != nil { + return fmt.Errorf("failed to process files: %w", err) + } + + return nil +} diff --git a/internal/cli/clean.go b/internal/cli/clean.go new file mode 100644 index 0000000..5ba34d9 --- /dev/null +++ b/internal/cli/clean.go @@ -0,0 +1,49 @@ +package cli + +import ( + "fmt" + "log" + + "github.com/urfave/cli/v2" +) + +func CleanCommand() *cli.Command { + return &cli.Command{ + Name: "clean", + Usage: "Remove annotations from files", + Flags: append(commonFlags, + &cli.BoolFlag{ + Name: "dry-run", + Aliases: []string{"n"}, + Usage: "Show what would be done without making changes", + }, + ), + Action: runClean, + } +} + +func runClean(c *cli.Context) error { + proc, err := createProcessor(c) + if err != nil { + return fmt.Errorf("failed to initialize processor: %w", err) + } + + files, err := proc.ListFiles() + if err != nil { + return fmt.Errorf("failed to list files: %w", err) + } + + if c.Bool("dry-run") { + log.Printf("Would clean %d files in %s", len(files), c.String("dir")) + for _, file := range files { + log.Printf("Would clean: %s", file) + } + return nil + } + + if err := proc.Clean(); err != nil { + return fmt.Errorf("failed to clean files: %w", err) + } + + return nil +} diff --git a/internal/cli/commands.go b/internal/cli/commands.go new file mode 100644 index 0000000..50b98ce --- /dev/null +++ b/internal/cli/commands.go @@ -0,0 +1,34 @@ +package cli + +import ( + "github.com/urfave/cli/v2" +) + +func Commands() []*cli.Command { + return []*cli.Command{ + ApplyCommand(), + CleanCommand(), + ListCommand(), + StatsCommand(), + } +} + +var commonFlags = []cli.Flag{ + &cli.StringFlag{ + Name: "dir", + Aliases: []string{"d"}, + Usage: "Directory to process", + Value: ".", + }, + &cli.StringFlag{ + Name: "types", + Aliases: []string{"t"}, + Usage: "Comma-separated list of file types to process", + Value: "go,py,js,jsx,ts,tsx", + }, + &cli.BoolFlag{ + Name: "verbose", + Aliases: []string{"V"}, + Usage: "Enable verbose logging", + }, +} diff --git a/internal/cli/common.go b/internal/cli/common.go new file mode 100644 index 0000000..2081554 --- /dev/null +++ b/internal/cli/common.go @@ -0,0 +1,37 @@ +package cli + +import ( + "log" + "strings" + + "github.com/krzko/codemap/internal/processor" + "github.com/urfave/cli/v2" +) + +// createProcessor creates a new processor with options from CLI context +func createProcessor(c *cli.Context) (*processor.Processor, error) { + opts := processor.DefaultOptions() + opts.Directory = c.String("dir") + opts.Recursive = c.Bool("recursive") + opts.Verbose = c.Bool("verbose") + + // Parse file types + if types := c.String("types"); types != "" { + typeList := strings.Split(types, ",") + for i, t := range typeList { + t = strings.TrimSpace(t) + if !strings.HasPrefix(t, ".") { + typeList[i] = "." + t + } + } + opts.SupportedTypes = typeList + } + + if c.Bool("verbose") { + log.SetFlags(log.Ltime | log.Lshortfile) + } else { + log.SetFlags(log.Ltime) + } + + return processor.New(opts) +} diff --git a/internal/cli/list.go b/internal/cli/list.go new file mode 100644 index 0000000..048cbef --- /dev/null +++ b/internal/cli/list.go @@ -0,0 +1,36 @@ +package cli + +import ( + "fmt" + + "github.com/urfave/cli/v2" +) + +func ListCommand() *cli.Command { + return &cli.Command{ + Name: "list", + Aliases: []string{"ls"}, + Usage: "List files that would be processed", + Flags: commonFlags, + Action: runList, + } +} + +func runList(c *cli.Context) error { + proc, err := createProcessor(c) + if err != nil { + return fmt.Errorf("failed to initialize processor: %w", err) + } + + files, err := proc.ListFiles() + if err != nil { + return fmt.Errorf("failed to list files: %w", err) + } + + fmt.Printf("Found %d files in %s:\n", len(files), c.String("dir")) + for _, file := range files { + fmt.Println(file) + } + + return nil +} diff --git a/internal/cli/stats.go b/internal/cli/stats.go new file mode 100644 index 0000000..3f0840e --- /dev/null +++ b/internal/cli/stats.go @@ -0,0 +1,41 @@ +package cli + +import ( + "fmt" + + "github.com/urfave/cli/v2" +) + +func StatsCommand() *cli.Command { + return &cli.Command{ + Name: "stats", + Usage: "Show statistics about annotations", + Flags: commonFlags, + Action: runStats, + } +} + +func runStats(c *cli.Context) error { + proc, err := createProcessor(c) + if err != nil { + return fmt.Errorf("failed to initialize processor: %w", err) + } + + stats, err := proc.GetStats() + if err != nil { + return fmt.Errorf("failed to get statistics: %w", err) + } + + // Print statistics + fmt.Printf("Statistics for %s:\n", c.String("dir")) + fmt.Printf("Total files processed: %d\n", stats.TotalFiles) + fmt.Printf("Files with annotations: %d\n", stats.AnnotatedFiles) + fmt.Printf("Files without annotations: %d\n", stats.UnannotatedFiles) + + fmt.Println("\nBreakdown by language:") + for lang, count := range stats.FilesByLanguage { + fmt.Printf(" %s: %d files\n", lang, count) + } + + return nil +} diff --git a/internal/languages/dockerfile.go b/internal/languages/dockerfile.go new file mode 100644 index 0000000..becdbf5 --- /dev/null +++ b/internal/languages/dockerfile.go @@ -0,0 +1,23 @@ +package languages + +type Dockerfile struct{} + +func (d *Dockerfile) FileExtensions() []string { + return []string{".dockerfile", ""} // Empty string for files named exactly "Dockerfile" +} + +func (d *Dockerfile) CommentStart() string { + return "#" +} + +func (d *Dockerfile) CommentEnd() string { + return "" +} + +func (d *Dockerfile) MultiLineCommentStart() string { + return "#" +} + +func (d *Dockerfile) IsSpecialComment(line string) bool { + return false +} \ No newline at end of file diff --git a/internal/processor/options.go b/internal/processor/options.go index 87d8367..f97ac69 100644 --- a/internal/processor/options.go +++ b/internal/processor/options.go @@ -17,6 +17,8 @@ type Options struct { MaxWorkers int // SupportedTypes lists the file extensions to process SupportedTypes []string + // Verbose enables detailed logging + Verbose bool } func DefaultOptions() Options { @@ -48,8 +50,18 @@ func DefaultOptions() Options { "*.sum", "*.mod", }, - Concurrent: true, - MaxWorkers: 4, - SupportedTypes: []string{".go", ".py", ".js", ".jsx", ".ts", ".tsx"}, + Concurrent: true, + MaxWorkers: 4, + SupportedTypes: []string{ + ".go", + ".py", + ".js", + ".jsx", + ".ts", + ".tsx", + ".dockerfile", + "", + }, + Verbose: false, } } \ No newline at end of file diff --git a/internal/processor/processor.go b/internal/processor/processor.go index ece5c92..9a876a7 100644 --- a/internal/processor/processor.go +++ b/internal/processor/processor.go @@ -18,6 +18,13 @@ type Processor struct { walker *walker.Walker } +type Stats struct { + TotalFiles int + AnnotatedFiles int + UnannotatedFiles int + FilesByLanguage map[string]int +} + // New creates a new Processor instance func New(opts Options) (*Processor, error) { w, err := walker.New( @@ -36,14 +43,93 @@ func New(opts Options) (*Processor, error) { }, nil } +// Clean removes annotations from files +func (p *Processor) Clean() error { + files, err := p.ListFiles() + if err != nil { + return err + } + + log.Printf("Found %d total files", len(files)) + + supportedCount := 0 + for _, file := range files { + // Skip unsupported files + if !p.isSupported(file) { + if p.opts.Verbose { + log.Printf("Skipping unsupported file: %s", file) + } + continue + } + + if err := p.annotator.RemoveAnnotation(file); err != nil { + if p.opts.Verbose { + log.Printf("Error cleaning %s: %v", file, err) + } + continue + } + supportedCount++ + } + + log.Printf("Successfully processed %d supported files", supportedCount) + return nil +} + +// GetStats returns statistics about the files +func (p *Processor) GetStats() (*Stats, error) { + files, err := p.ListFiles() + if err != nil { + return nil, err + } + + stats := &Stats{ + TotalFiles: len(files), + FilesByLanguage: make(map[string]int), + } + + for _, file := range files { + ext := filepath.Ext(file) + stats.FilesByLanguage[ext]++ + + content, err := os.ReadFile(file) + if err != nil { + return nil, err + } + + if p.annotator.HasAnnotation(string(content)) { + stats.AnnotatedFiles++ + } else { + stats.UnannotatedFiles++ + } + } + + return stats, nil +} + +// ListFiles returns a list of files that would be processed +func (p *Processor) ListFiles() ([]string, error) { + return p.walker.Walk() +} + // Process handles the file processing func (p *Processor) Process() error { - files, err := p.walker.Walk() + files, err := p.ListFiles() if err != nil { - return fmt.Errorf("failed to walk directory: %w", err) + return err } - log.Printf("Found %d files to process", len(files)) + log.Printf("Found %d total files", len(files)) + + supportedFiles := []string{} + for _, file := range files { + if p.isSupported(file) { + supportedFiles = append(supportedFiles, file) + } else if p.opts.Verbose { + log.Printf("Skipping unsupported file: %s", file) + } + } + + log.Printf("Found %d supported files", len(supportedFiles)) if p.opts.Clean { log.Printf("Running in clean mode - removing annotations") @@ -53,11 +139,11 @@ func (p *Processor) Process() error { if p.opts.Concurrent { log.Printf("Processing files concurrently with %d workers", p.opts.MaxWorkers) - return p.processConcurrent(files) + return p.processConcurrent(supportedFiles) } log.Printf("Processing files sequentially") - return p.processSequential(files) + return p.processSequential(supportedFiles) } func (p *Processor) processConcurrent(files []string) error { @@ -133,8 +219,11 @@ func (p *Processor) processFile(path string) error { return nil } -// Add the missing methods func (p *Processor) determineLanguage(path string) string { + if filepath.Base(path) == "Dockerfile" { + return "Dockerfile" + } + ext := filepath.Ext(path) switch ext { case ".go": @@ -145,6 +234,8 @@ func (p *Processor) determineLanguage(path string) string { return "JavaScript" case ".ts", ".tsx": return "TypeScript" + case ".dockerfile": + return "Dockerfile" default: return "Unknown" } @@ -179,6 +270,11 @@ func (p *Processor) determinePackageName(path string) string { return p.readGoPackageName(path) } + // For Dockerfile, use "docker" as package name + if filepath.Base(path) == "Dockerfile" || filepath.Ext(path) == ".dockerfile" { + return "docker" + } + // For other files, use the directory name as package name return filepath.Base(filepath.Dir(path)) } diff --git a/pkg/annotator/annotator.go b/pkg/annotator/annotator.go index 8488e6d..cb44d9a 100644 --- a/pkg/annotator/annotator.go +++ b/pkg/annotator/annotator.go @@ -23,12 +23,14 @@ type DefaultAnnotator struct { func New() Annotator { return &DefaultAnnotator{ languages: map[string]languages.Language{ - ".go": &languages.GoLang{}, - ".py": &languages.Python{}, - ".js": &languages.JavaScript{}, - ".jsx": &languages.JavaScript{}, - ".ts": &languages.JavaScript{}, - ".tsx": &languages.JavaScript{}, + ".go": &languages.GoLang{}, + ".py": &languages.Python{}, + ".js": &languages.JavaScript{}, + ".jsx": &languages.JavaScript{}, + ".ts": &languages.JavaScript{}, + ".tsx": &languages.JavaScript{}, + ".dockerfile": &languages.Dockerfile{}, + "": &languages.Dockerfile{}, }, } } @@ -46,7 +48,7 @@ func (a *DefaultAnnotator) AddAnnotation(info FileInfo) error { } // Check if annotation already exists - if a.hasAnnotation(string(content), lang) { + if a.hasAnnotationWithLang(string(content), lang) { relPath, err := filepath.Rel(".", info.Path) if err != nil { relPath = info.Path @@ -88,7 +90,7 @@ func (a *DefaultAnnotator) RemoveAnnotation(path string) error { return err } - if !a.hasAnnotation(string(content), lang) { + if !a.hasAnnotationWithLang(string(content), lang) { relPath, err := filepath.Rel(".", path) if err != nil { relPath = path @@ -140,7 +142,19 @@ func (a *DefaultAnnotator) createAnnotation(lang languages.Language, info FileIn info.Language) } -func (a *DefaultAnnotator) hasAnnotation(content string, lang languages.Language) bool { +// HasAnnotation checks if a file has a codemap annotation +func (a *DefaultAnnotator) HasAnnotation(content string) bool { + scanner := bufio.NewScanner(strings.NewReader(content)) + // Only check the first line + if !scanner.Scan() { + return false + } + firstLine := scanner.Text() + return strings.Contains(firstLine, annotationPattern) +} + +// hasAnnotationWithLang is an internal helper that checks for language-specific annotation +func (a *DefaultAnnotator) hasAnnotationWithLang(content string, lang languages.Language) bool { scanner := bufio.NewScanner(strings.NewReader(content)) // Only check the first line if !scanner.Scan() { diff --git a/pkg/annotator/types.go b/pkg/annotator/types.go index 6823d6a..aa0fcd9 100644 --- a/pkg/annotator/types.go +++ b/pkg/annotator/types.go @@ -7,9 +7,12 @@ type FileInfo struct { PackageName string } +// Annotator interface defines the methods for file annotation handling type Annotator interface { // AddAnnotation adds file structure information to the file AddAnnotation(info FileInfo) error // RemoveAnnotation removes existing annotation from the file RemoveAnnotation(path string) error + // HasAnnotation checks if a file has a codemap annotation + HasAnnotation(content string) bool } \ No newline at end of file