Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 3 additions & 50 deletions .github/workflows/test-summarize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
os: [Ubuntu-latest, macOS-latest]
go-version: ['1.23.8', '1.24.2']
go-version: ['1.24.0']
fail-fast: false # Continue testing all combinations even if one fails

steps:
Expand Down Expand Up @@ -47,13 +47,12 @@ jobs:
run: |
mkdir -p ${{ github.workspace }}/anotherProject
echo -e "package main\n\nfunc main() {\n println(\"Hello, World!\")\n}" > ${{ github.workspace }}/anotherProject/hello.go
mkdir -p ${{ github.workspace }}/summaries
shell: bash

- name: Test 1 Step 5 Run summarize with command-line arguments
run: |
cd ${{ github.workspace }}/anotherProject
${{ github.workspace }}/summarize -d . -o ${{ github.workspace }}/summaries
cd ${{ github.workspace }}
${{ github.workspace }}/summarize -d anotherProject -o ${{ github.workspace }}/summaries
ls -lh ${{ github.workspace }}/summaries/
shell: bash

Expand Down Expand Up @@ -90,49 +89,3 @@ jobs:
echo "Contents of $SUMMARY_FILE:"
cat "$SUMMARY_FILE"
shell: bash

# Step 9: Verify the summary contains the hello.go source code
- name: Test 1 Step 9 Verify summary contains hello.go source code (command-line usage)
run: |
SUMMARY_FILE="${{ steps.find-summary-cli.outputs.summary_file }}"
RANDOM_FILE="hello.go"
RANDOM_FILE_ABS="${{ github.workspace }}/anotherProject/hello.go"
echo "Checking if $SUMMARY_FILE contains the source code of $RANDOM_FILE"

SECTION_START=$(grep -n "^## $RANDOM_FILE$" "$SUMMARY_FILE" | cut -d: -f1)
if [ -z "$SECTION_START" ]; then
echo "Error: Could not find section for $RANDOM_FILE in $SUMMARY_FILE"
echo "Listing all section headers in $SUMMARY_FILE:"
grep "^## " "$SUMMARY_FILE"
exit 1
fi

CODE_START=$((SECTION_START + 3))
CODE_END=$(tail -n +$CODE_START "$SUMMARY_FILE" | grep -n "^\`\`\`$" | head -n 1 | cut -d: -f1 || true)
if [ -z "$CODE_END" ] || [ "$CODE_END" -eq 0 ]; then
echo "Error: Could not find code block end for $RANDOM_FILE in $SUMMARY_FILE"
echo "Dumping lines after section start for debugging (up to 20 lines):"
tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20
exit 1
fi
CODE_LINES=$((CODE_END - 1))
if [ $CODE_LINES -le 0 ]; then
echo "Error: Invalid code block length ($CODE_LINES lines) for $RANDOM_FILE"
echo "Dumping lines after section start for debugging (up to 20 lines):"
tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20
exit 1
fi
tail -n +$CODE_START "$SUMMARY_FILE" > temp_code_block.txt
head -n $CODE_LINES temp_code_block.txt > extracted_code.txt
rm temp_code_block.txt

cat "$RANDOM_FILE_ABS" > original_code.txt

diff -wB extracted_code.txt original_code.txt > diff_output.txt
if [ $? -ne 0 ]; then
echo "Error: The source code in the summary does not match the original file"
cat diff_output.txt
exit 1
fi
echo "Success: The source code of $RANDOM_FILE in $SUMMARY_FILE matches the original file"
shell: bash
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ summarize
.idea
.DS_Store
*.log
summaries/
summaries/
bin/
70 changes: 70 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Generic Makefile for Any Go Project (Lines 1-65)
MAIN_PATH=.
APP_NAME := $(shell basename "$(shell realpath $(MAIN_PATH))")
BIN_DIR=bin

# Go build flags
# -s: Strip symbols (reduces binary size)
# -w: Omit DWARF debugging information
LDFLAGS=-ldflags "-s -w"

.PHONY: all clean summary install darwin-amd64 darwin-amd64 linux-amd64 linux-arm64 windows-amd64

# Create build directory if it doesn't exist
$(BIN_DIR):
@mkdir -p $(BIN_DIR)

# Build for all platforms
all: darwin-amd64 darwin-arm64 linux-amd64 linux-arm64 windows-amd64 install

summary:
@if ! command -v summarize > /dev/null; then \
go install github.com/andreimerlescu/summarize@latest; \
fi
@summarize -i "go,Makefile,mod" -debug -print

install: $(BIN_DIR)
@if [[ "$(shell go env GOOS)" == "windows" ]]; then \
cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH).exe "$(shell go env GOBIN)/$(APP_NAME).exe"; \
else \
cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH) "$(shell go env GOBIN)/$(APP_NAME)"; \
fi
@echo "NEW: $(shell which $(APP_NAME))"

# Build for macOS Intel (amd64)
darwin-amd64: $(BIN_DIR)
@GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-amd64 $(MAIN_PATH)
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64"

# Build for macOS Silicon (arm64)
darwin-arm64: $(BIN_DIR)
@GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-arm64 $(MAIN_PATH)
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64"

# Build for Linux ARM64
linux-arm64: $(BIN_DIR)
@GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-arm64 $(MAIN_PATH)
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-arm64"

# Build for Linux AMD64
linux-amd64: $(BIN_DIR)
@GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-amd64 $(MAIN_PATH)
@echo "NEW: $(BIN_DIR)/$(APP_NAME)-linux-amd64"

# Build for Windows AMD64
windows-amd64: $(BIN_DIR)
@GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME).exe $(MAIN_PATH)
@echo "NEW: $(BIN_DIR)/$(APP_NAME).exe"

# Clean build artifacts
clean:
@rm -rf $(BIN_DIR)
@echo "REMOVED: $(BIN_DIR)"

# Project Specific

.PHONY: test

# Run tests
test:
./test.sh
189 changes: 174 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,55 @@
# Summarize

A go utility that will capture files with an extension pattern into a single markdown formatted
file that looks like:
The **Summarize** package was designed for developers who wish to leverage the use of Artificial Intelligence while
working on a project. The `summarize` command give you a powerful interface that is managed by arguments and environment
variables that define include/exclude extensions, and avoid substrings list while parsing paths. The binary has
concurrency built into it and has limits for the output file. It ignores its default output directory so it won't
recursively build summaries upon itself. It defaults to writing to a new directory that it'll try to create in the
current working directory called `summaries`, that I recommend that you add to your `.gitignore` and `.dockerignore`.

I've found it useful to leverage the `make summary` command in all of my projects. This way, if I need to ask an AI a
question about a piece of code, I can capture the source code of the entire directory quickly and then just `cat` the
output file path provided and _voila_! The `-print` argument allows you to display the summary contents in the STDOUT
instead of the `Summary generated: summaries/summary.2025.07.29.08.59.03.UTC.md` that it would normally generate.

The **Environment** can be used to control the native behavior of the `summarize` binary, such that you won't be required
to type the arguments out each time. If you use _JSON_ all the time, you can enable its output format on every command
by using the `SUMMARIZE_ALWAYS_JSON`. If you always want to write the summary, you can use the `SUMMARIZE_ALWAYS_WRITE`
variable. If you want to always print the summary to STDOUT instead of the success message, you can use the variable
`SUMMARIZE_ALWAYS_PRINT`. If you want to compress the rendered summary every time, you can use the variable
`SUMMARIZE_ALWAYS_COMPRESS`. These `SUMMARIZE_ALWAYS_*` environment variables are responsible for customizing the
runtime of the `summarize` application.

When the `summarize` binary runs, it'll do its best to ignore files that it can't render to a text file. This includes
images, videos, binary files, and text files that are commonly linked to secrets.

The developer experience while using `summarize` is designed to enable quick use with just running `summarize` from
where ever you wish to summarize. The `-d` for **source directory** defaults to `.` and the `-o`/`-f` for **output path**
defaults to a new timestamped file (`-f`) in the (`-o`) `summaries/` directory from the `.` context. The `-i` and `-x` are used to
define what to <b>i</b>nclude and e<b>x</b>clude various file extensions like `go,ts,py` etc.. The `-s` is used to
**skip** over substrings within a scanned path. Dotfiles can completely be ignored by all paths by using `-ndf` as a flag.

Performance of the application can be tuned using the `-mf=<int>` to assign **Max Files** that will concurrently be
processed. The default is 369. The `-max=<int64>` represents a limit on how large the rendered summary can become.

Once the program finishes running, the rendered file will look similar to:

```md
# Project Summary

### `filename.ext`
<AI prompt description>

### `filename.go`

<File Info>

<full source code>

### `filename.ext`
### `filename.cs`

<File Info>

<full source code>

... etc.

Expand Down Expand Up @@ -49,19 +88,139 @@ cd ~/work/anotherProject
summarize -d anotherProject -o /home/user/summaries/anotherProject
```

Since `figtree` is designed to be very functional, its lightweight but feature
intense design through simple biology memetics makes it well suited for this program.

## Options

| Name | Argument | Type | Usage |
|-----------------|----------|----------|--------------------------------------------------------|
| `kSourceDir` | -d` | `string` | Source directory path. |
| `kOutputDir` | -o` | `string` | Summary destination output directory path. |
| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. |
| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. |
| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. |
| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). |
| Name | Argument | Type | Usage |
|------------------|----------|----------|-------------------------------------------------------------------|
| `kSourceDir` | `-d` | `string` | Source directory path. |
| `kOutputDir` | `-o` | `string` | Summary destination output directory path. |
| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. |
| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. |
| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. |
| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). |
| `kVersion` | `-v` | `bool` | When `true`, the binary version is shown |
| `kCompress` | `-gz` | `bool` | When `true`, **gzip** is used on the contents of the summary |
| `kMaxOutputSize` | `-max` | `int64` | Maximum size of the generated summary allowed |
| `kPrint` | `-print` | `bool` | Uses STDOUT to write contents of summary |
| `kWrite` | `-write` | `bool` | Uses the filesystem to save contents of summary |
| `kDebug` | `-debug` | `bool` | When `true`, extra content is written to STDOUT aside from report |


## Environment

| Environment Variable | Type | Default Value | Usage |
|-----------------------------|----------|------------------------|-------------------------------------------------------------------------------------------------------------|
| `SUMMARIZE_CONFIG_FILE` | `String` | `./config.yaml` | Contents of the YAML Configuration to use for [figtree](https://github.com/andreimerlescu/figtree). |
| `SUMMARIZE_IGNORE_CONTAINS` | `List` | \* see below | Add items to this default list by creating your own new list here, they get concatenated. |
| `SUMMARIZE_INCLUDE_EXT` | `List` | \*\* see below \* | Add extensions to include in the summary in this environment variable, comma separated. |
| `SUMMARIZE_EXCLUDE_EXT` | `List` | \*\*\* see below \* \* | Add exclusionary extensions to ignore to this environment variable, comma separated. |
| `SUMMARIZE_ALWAYS_PRINT` | `Bool` | `false` | When `true`, the `-print` will write the summary to STDOUT. |
| `SUMMARIZE_ALWAYS_WRITE` | `Bool` | `false` | When `true`, the `-write` will write to a new file on the disk. |
| `SUMMARIZE_ALWAYS_JSON` | `Bool` | `false` | When `true`, the `-json` flag will render JSON output to the console. |
| `SUMMARIZE_ALWAYS_COMPRESS` | `Bool` | `false` | When `true`, the `-gz` flag will use gzip to compress the summary contents and appends `.gz` to the output. |


### \* Default `SUMMARIZE_IGNORE_CONTAINS` Value

```json
7z,gz,xz,zst,zstd,bz,bz2,bzip2,zip,tar,rar,lz4,lzma,cab,arj,crt,cert,cer,key,pub,asc,pem,p12,pfx,jks,keystore,id_rsa,id_dsa,id_ed25519,id_ecdsa,gpg,pgp,exe,dll,so,dylib,bin,out,o,obj,a,lib,dSYM,class,pyc,pyo,__pycache__,jar,war,ear,apk,ipa,dex,odex,wasm,node,beam,elc,iso,img,dmg,vhd,vdi,vmdk,qcow2,db,sqlite,sqlite3,db3,mdb,accdb,sdf,ldb,log,trace,dump,crash,jpg,jpeg,png,gif,bmp,tiff,tif,webp,ico,svg,heic,heif,raw,cr2,nef,dng,mp3,wav,flac,aac,ogg,wma,m4a,opus,aiff,mp4,avi,mov,mkv,webm,flv,wmv,m4v,3gp,ogv,ttf,otf,woff,woff2,eot,fon,pfb,pfm,pdf,doc,docx,xls,xlsx,ppt,pptx,odt,ods,odp,rtf,suo,sln,user,ncb,pdb,ipch,ilk,tlog,idb,aps,res,iml,idea,vscode,project,classpath,factorypath,prefs,vcxproj,vcproj,filters,xcworkspace,xcuserstate,xcscheme,pbxproj,DS_Store,Thumbs.db,desktop.ini,lock,sum,resolved,tmp,temp,swp,swo,bak,backup,orig,rej,patch,~,old,new,part,incomplete,map,min.js,min.css,bundle.js,bundle.css,chunk.js,dat,data,cache,pid,sock,pack,idx,rev,pickle,pkl,npy,npz,mat,rdata,rds
```

```go

// defaultExclude are the -exc list of extensions that will be skipped automatically
defaultExclude = []string{
// Compressed archives
"7z", "gz", "xz", "zst", "zstd", "bz", "bz2", "bzip2", "zip", "tar", "rar", "lz4", "lzma", "cab", "arj",

// Encryption, certificates, and sensitive keys
"crt", "cert", "cer", "key", "pub", "asc", "pem", "p12", "pfx", "jks", "keystore",
"id_rsa", "id_dsa", "id_ed25519", "id_ecdsa", "gpg", "pgp",

// Binary & executable artifacts
"exe", "dll", "so", "dylib", "bin", "out", "o", "obj", "a", "lib", "dSYM",
"class", "pyc", "pyo", "__pycache__",
"jar", "war", "ear", "apk", "ipa", "dex", "odex",
"wasm", "node", "beam", "elc",

// System and disk images
"iso", "img", "dmg", "vhd", "vdi", "vmdk", "qcow2",

// Database files
"db", "sqlite", "sqlite3", "db3", "mdb", "accdb", "sdf", "ldb",

// Log files
"log", "trace", "dump", "crash",

// Media files - Images
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "ico", "svg", "heic", "heif", "raw", "cr2", "nef", "dng",

// Media files - Audio
"mp3", "wav", "flac", "aac", "ogg", "wma", "m4a", "opus", "aiff",

// Media files - Video
"mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "m4v", "3gp", "ogv",

// Font files
"ttf", "otf", "woff", "woff2", "eot", "fon", "pfb", "pfm",

// Document formats (typically not source code)
"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf",

// IDE/Editor/Tooling artifacts
"suo", "sln", "user", "ncb", "pdb", "ipch", "ilk", "tlog", "idb", "aps", "res",
"iml", "idea", "vscode", "project", "classpath", "factorypath", "prefs",
"vcxproj", "vcproj", "filters", "xcworkspace", "xcuserstate", "xcscheme", "pbxproj",
"DS_Store", "Thumbs.db", "desktop.ini",

// Package manager and build artifacts
"lock", "sum", "resolved", // package-lock.json, go.sum, yarn.lock, etc.

// Temporary and backup files
"tmp", "temp", "swp", "swo", "bak", "backup", "orig", "rej", "patch",
"~", "old", "new", "part", "incomplete",

// Source maps and minified files (usually generated)
"map", "min.js", "min.css", "bundle.js", "bundle.css", "chunk.js",

// Configuration that's typically binary or generated
"dat", "data", "cache", "pid", "sock",

// Version control artifacts (though usually in ignored directories)
"pack", "idx", "rev",

// Other binary formats
"pickle", "pkl", "npy", "npz", "mat", "rdata", "rds",
}

```

### \* \* Default `SUMMARIZE_INCLUDE_EXT`

```json
go,ts,tf,sh,py,js,Makefile,mod,Dockerfile,dockerignore,gitignore,esconfigs,md
```

```go
// defaultInclude are the -inc list of extensions that will be included in the summary
defaultInclude = []string{
"go", "ts", "tf", "sh", "py", "js", "Makefile", "mod", "Dockerfile", "dockerignore", "gitignore", "esconfigs", "md",
}
```

### \* \* \* Default `SUMMARIZE_EXCLUDE_EXT`

```json
.min.js,.min.css,.git/,.svn/,.vscode/,.vs/,.idea/,logs/,secrets/,.venv/,/site-packages,.terraform/,summaries/,node_modules/,/tmp,tmp/,logs/
```

```go
// defaultAvoid are the -avoid list of substrings in file path names to avoid in the summary
defaultAvoid = []string{
".min.js", ".min.css", ".git/", ".svn/", ".vscode/", ".vs/", ".idea/", "logs/", "secrets/",
".venv/", "/site-packages", ".terraform/", "summaries/", "node_modules/", "/tmp", "tmp/", "logs/",
}
```

## Contribution

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.0.0
v1.0.2
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
module github.com/andreimerlescu/summarize

go 1.23.7
go 1.24.5

require (
github.com/andreimerlescu/checkfs v1.0.2
github.com/andreimerlescu/figtree/v2 v2.0.3
github.com/andreimerlescu/checkfs v1.0.4
github.com/andreimerlescu/figtree/v2 v2.0.14
github.com/andreimerlescu/sema v1.0.0
)

require (
github.com/andreimerlescu/bump v1.0.3 // indirect
github.com/go-ini/ini v1.67.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/term v0.32.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
Loading
Loading