diff --git a/.github/workflows/test-summarize.yml b/.github/workflows/test-summarize.yml
index 5126bea..1c59996 100644
--- a/.github/workflows/test-summarize.yml
+++ b/.github/workflows/test-summarize.yml
@@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
os: [Ubuntu-latest, macOS-latest]
- go-version: ['1.23.8', '1.24.2']
+ go-version: ['1.24.0']
fail-fast: false # Continue testing all combinations even if one fails
steps:
@@ -47,13 +47,12 @@ jobs:
run: |
mkdir -p ${{ github.workspace }}/anotherProject
echo -e "package main\n\nfunc main() {\n println(\"Hello, World!\")\n}" > ${{ github.workspace }}/anotherProject/hello.go
- mkdir -p ${{ github.workspace }}/summaries
shell: bash
- name: Test 1 Step 5 Run summarize with command-line arguments
run: |
- cd ${{ github.workspace }}/anotherProject
- ${{ github.workspace }}/summarize -d . -o ${{ github.workspace }}/summaries
+ cd ${{ github.workspace }}
+ ${{ github.workspace }}/summarize -d anotherProject -o ${{ github.workspace }}/summaries
ls -lh ${{ github.workspace }}/summaries/
shell: bash
@@ -90,49 +89,3 @@ jobs:
echo "Contents of $SUMMARY_FILE:"
cat "$SUMMARY_FILE"
shell: bash
-
- # Step 9: Verify the summary contains the hello.go source code
- - name: Test 1 Step 9 Verify summary contains hello.go source code (command-line usage)
- run: |
- SUMMARY_FILE="${{ steps.find-summary-cli.outputs.summary_file }}"
- RANDOM_FILE="hello.go"
- RANDOM_FILE_ABS="${{ github.workspace }}/anotherProject/hello.go"
- echo "Checking if $SUMMARY_FILE contains the source code of $RANDOM_FILE"
-
- SECTION_START=$(grep -n "^## $RANDOM_FILE$" "$SUMMARY_FILE" | cut -d: -f1)
- if [ -z "$SECTION_START" ]; then
- echo "Error: Could not find section for $RANDOM_FILE in $SUMMARY_FILE"
- echo "Listing all section headers in $SUMMARY_FILE:"
- grep "^## " "$SUMMARY_FILE"
- exit 1
- fi
-
- CODE_START=$((SECTION_START + 3))
- CODE_END=$(tail -n +$CODE_START "$SUMMARY_FILE" | grep -n "^\`\`\`$" | head -n 1 | cut -d: -f1 || true)
- if [ -z "$CODE_END" ] || [ "$CODE_END" -eq 0 ]; then
- echo "Error: Could not find code block end for $RANDOM_FILE in $SUMMARY_FILE"
- echo "Dumping lines after section start for debugging (up to 20 lines):"
- tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20
- exit 1
- fi
- CODE_LINES=$((CODE_END - 1))
- if [ $CODE_LINES -le 0 ]; then
- echo "Error: Invalid code block length ($CODE_LINES lines) for $RANDOM_FILE"
- echo "Dumping lines after section start for debugging (up to 20 lines):"
- tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20
- exit 1
- fi
- tail -n +$CODE_START "$SUMMARY_FILE" > temp_code_block.txt
- head -n $CODE_LINES temp_code_block.txt > extracted_code.txt
- rm temp_code_block.txt
-
- cat "$RANDOM_FILE_ABS" > original_code.txt
-
- diff -wB extracted_code.txt original_code.txt > diff_output.txt
- if [ $? -ne 0 ]; then
- echo "Error: The source code in the summary does not match the original file"
- cat diff_output.txt
- exit 1
- fi
- echo "Success: The source code of $RANDOM_FILE in $SUMMARY_FILE matches the original file"
- shell: bash
diff --git a/.gitignore b/.gitignore
index 15fe251..3b89247 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@ summarize
.idea
.DS_Store
*.log
-summaries/
\ No newline at end of file
+summaries/
+bin/
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..447549c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,70 @@
+# Generic Makefile for Any Go Project (Lines 1-65)
+MAIN_PATH=.
+APP_NAME := $(shell basename "$(shell realpath $(MAIN_PATH))")
+BIN_DIR=bin
+
+# Go build flags
+# -s: Strip symbols (reduces binary size)
+# -w: Omit DWARF debugging information
+LDFLAGS=-ldflags "-s -w"
+
+.PHONY: all clean summary install darwin-amd64 darwin-amd64 linux-amd64 linux-arm64 windows-amd64
+
+# Create build directory if it doesn't exist
+$(BIN_DIR):
+ @mkdir -p $(BIN_DIR)
+
+# Build for all platforms
+all: darwin-amd64 darwin-arm64 linux-amd64 linux-arm64 windows-amd64 install
+
+summary:
+ @if ! command -v summarize > /dev/null; then \
+ go install github.com/andreimerlescu/summarize@latest; \
+ fi
+ @summarize -i "go,Makefile,mod" -debug -print
+
+install: $(BIN_DIR)
+ @if [[ "$(shell go env GOOS)" == "windows" ]]; then \
+ cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH).exe "$(shell go env GOBIN)/$(APP_NAME).exe"; \
+ else \
+ cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH) "$(shell go env GOBIN)/$(APP_NAME)"; \
+ fi
+ @echo "NEW: $(shell which $(APP_NAME))"
+
+# Build for macOS Intel (amd64)
+darwin-amd64: $(BIN_DIR)
+ @GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-amd64 $(MAIN_PATH)
+ @echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64"
+
+# Build for macOS Silicon (arm64)
+darwin-arm64: $(BIN_DIR)
+ @GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-arm64 $(MAIN_PATH)
+ @echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64"
+
+# Build for Linux ARM64
+linux-arm64: $(BIN_DIR)
+ @GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-arm64 $(MAIN_PATH)
+ @echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-arm64"
+
+# Build for Linux AMD64
+linux-amd64: $(BIN_DIR)
+ @GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-amd64 $(MAIN_PATH)
+ @echo "NEW: $(BIN_DIR)/$(APP_NAME)-linux-amd64"
+
+# Build for Windows AMD64
+windows-amd64: $(BIN_DIR)
+ @GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME).exe $(MAIN_PATH)
+ @echo "NEW: $(BIN_DIR)/$(APP_NAME).exe"
+
+# Clean build artifacts
+clean:
+ @rm -rf $(BIN_DIR)
+ @echo "REMOVED: $(BIN_DIR)"
+
+# Project Specific
+
+.PHONY: test
+
+# Run tests
+test:
+ ./test.sh
diff --git a/README.md b/README.md
index 1c63e46..fe6b66a 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,55 @@
# Summarize
-A go utility that will capture files with an extension pattern into a single markdown formatted
-file that looks like:
+The **Summarize** package was designed for developers who wish to leverage the use of Artificial Intelligence while
+working on a project. The `summarize` command give you a powerful interface that is managed by arguments and environment
+variables that define include/exclude extensions, and avoid substrings list while parsing paths. The binary has
+concurrency built into it and has limits for the output file. It ignores its default output directory so it won't
+recursively build summaries upon itself. It defaults to writing to a new directory that it'll try to create in the
+current working directory called `summaries`, that I recommend that you add to your `.gitignore` and `.dockerignore`.
+
+I've found it useful to leverage the `make summary` command in all of my projects. This way, if I need to ask an AI a
+question about a piece of code, I can capture the source code of the entire directory quickly and then just `cat` the
+output file path provided and _voila_! The `-print` argument allows you to display the summary contents in the STDOUT
+instead of the `Summary generated: summaries/summary.2025.07.29.08.59.03.UTC.md` that it would normally generate.
+
+The **Environment** can be used to control the native behavior of the `summarize` binary, such that you won't be required
+to type the arguments out each time. If you use _JSON_ all the time, you can enable its output format on every command
+by using the `SUMMARIZE_ALWAYS_JSON`. If you always want to write the summary, you can use the `SUMMARIZE_ALWAYS_WRITE`
+variable. If you want to always print the summary to STDOUT instead of the success message, you can use the variable
+`SUMMARIZE_ALWAYS_PRINT`. If you want to compress the rendered summary every time, you can use the variable
+`SUMMARIZE_ALWAYS_COMPRESS`. These `SUMMARIZE_ALWAYS_*` environment variables are responsible for customizing the
+runtime of the `summarize` application.
+
+When the `summarize` binary runs, it'll do its best to ignore files that it can't render to a text file. This includes
+images, videos, binary files, and text files that are commonly linked to secrets.
+
+The developer experience while using `summarize` is designed to enable quick use with just running `summarize` from
+where ever you wish to summarize. The `-d` for **source directory** defaults to `.` and the `-o`/`-f` for **output path**
+defaults to a new timestamped file (`-f`) in the (`-o`) `summaries/` directory from the `.` context. The `-i` and `-x` are used to
+define what to include and exclude various file extensions like `go,ts,py` etc.. The `-s` is used to
+**skip** over substrings within a scanned path. Dotfiles can completely be ignored by all paths by using `-ndf` as a flag.
+
+Performance of the application can be tuned using the `-mf=` to assign **Max Files** that will concurrently be
+processed. The default is 369. The `-max=` represents a limit on how large the rendered summary can become.
+
+Once the program finishes running, the rendered file will look similar to:
```md
# Project Summary
-### `filename.ext`
+
+
+### `filename.go`
+
+
-### `filename.ext`
+### `filename.cs`
+
+
+
+
... etc.
@@ -49,19 +88,139 @@ cd ~/work/anotherProject
summarize -d anotherProject -o /home/user/summaries/anotherProject
```
-Since `figtree` is designed to be very functional, its lightweight but feature
-intense design through simple biology memetics makes it well suited for this program.
-
## Options
-| Name | Argument | Type | Usage |
-|-----------------|----------|----------|--------------------------------------------------------|
-| `kSourceDir` | -d` | `string` | Source directory path. |
-| `kOutputDir` | -o` | `string` | Summary destination output directory path. |
-| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. |
-| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. |
-| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. |
-| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). |
+| Name | Argument | Type | Usage |
+|------------------|----------|----------|-------------------------------------------------------------------|
+| `kSourceDir` | `-d` | `string` | Source directory path. |
+| `kOutputDir` | `-o` | `string` | Summary destination output directory path. |
+| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. |
+| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. |
+| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. |
+| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). |
+| `kVersion` | `-v` | `bool` | When `true`, the binary version is shown |
+| `kCompress` | `-gz` | `bool` | When `true`, **gzip** is used on the contents of the summary |
+| `kMaxOutputSize` | `-max` | `int64` | Maximum size of the generated summary allowed |
+| `kPrint` | `-print` | `bool` | Uses STDOUT to write contents of summary |
+| `kWrite` | `-write` | `bool` | Uses the filesystem to save contents of summary |
+| `kDebug` | `-debug` | `bool` | When `true`, extra content is written to STDOUT aside from report |
+
+
+## Environment
+
+| Environment Variable | Type | Default Value | Usage |
+|-----------------------------|----------|------------------------|-------------------------------------------------------------------------------------------------------------|
+| `SUMMARIZE_CONFIG_FILE` | `String` | `./config.yaml` | Contents of the YAML Configuration to use for [figtree](https://github.com/andreimerlescu/figtree). |
+| `SUMMARIZE_IGNORE_CONTAINS` | `List` | \* see below | Add items to this default list by creating your own new list here, they get concatenated. |
+| `SUMMARIZE_INCLUDE_EXT` | `List` | \*\* see below \* | Add extensions to include in the summary in this environment variable, comma separated. |
+| `SUMMARIZE_EXCLUDE_EXT` | `List` | \*\*\* see below \* \* | Add exclusionary extensions to ignore to this environment variable, comma separated. |
+| `SUMMARIZE_ALWAYS_PRINT` | `Bool` | `false` | When `true`, the `-print` will write the summary to STDOUT. |
+| `SUMMARIZE_ALWAYS_WRITE` | `Bool` | `false` | When `true`, the `-write` will write to a new file on the disk. |
+| `SUMMARIZE_ALWAYS_JSON` | `Bool` | `false` | When `true`, the `-json` flag will render JSON output to the console. |
+| `SUMMARIZE_ALWAYS_COMPRESS` | `Bool` | `false` | When `true`, the `-gz` flag will use gzip to compress the summary contents and appends `.gz` to the output. |
+
+
+### \* Default `SUMMARIZE_IGNORE_CONTAINS` Value
+
+```json
+7z,gz,xz,zst,zstd,bz,bz2,bzip2,zip,tar,rar,lz4,lzma,cab,arj,crt,cert,cer,key,pub,asc,pem,p12,pfx,jks,keystore,id_rsa,id_dsa,id_ed25519,id_ecdsa,gpg,pgp,exe,dll,so,dylib,bin,out,o,obj,a,lib,dSYM,class,pyc,pyo,__pycache__,jar,war,ear,apk,ipa,dex,odex,wasm,node,beam,elc,iso,img,dmg,vhd,vdi,vmdk,qcow2,db,sqlite,sqlite3,db3,mdb,accdb,sdf,ldb,log,trace,dump,crash,jpg,jpeg,png,gif,bmp,tiff,tif,webp,ico,svg,heic,heif,raw,cr2,nef,dng,mp3,wav,flac,aac,ogg,wma,m4a,opus,aiff,mp4,avi,mov,mkv,webm,flv,wmv,m4v,3gp,ogv,ttf,otf,woff,woff2,eot,fon,pfb,pfm,pdf,doc,docx,xls,xlsx,ppt,pptx,odt,ods,odp,rtf,suo,sln,user,ncb,pdb,ipch,ilk,tlog,idb,aps,res,iml,idea,vscode,project,classpath,factorypath,prefs,vcxproj,vcproj,filters,xcworkspace,xcuserstate,xcscheme,pbxproj,DS_Store,Thumbs.db,desktop.ini,lock,sum,resolved,tmp,temp,swp,swo,bak,backup,orig,rej,patch,~,old,new,part,incomplete,map,min.js,min.css,bundle.js,bundle.css,chunk.js,dat,data,cache,pid,sock,pack,idx,rev,pickle,pkl,npy,npz,mat,rdata,rds
+```
+
+```go
+
+// defaultExclude are the -exc list of extensions that will be skipped automatically
+defaultExclude = []string{
+ // Compressed archives
+ "7z", "gz", "xz", "zst", "zstd", "bz", "bz2", "bzip2", "zip", "tar", "rar", "lz4", "lzma", "cab", "arj",
+
+ // Encryption, certificates, and sensitive keys
+ "crt", "cert", "cer", "key", "pub", "asc", "pem", "p12", "pfx", "jks", "keystore",
+ "id_rsa", "id_dsa", "id_ed25519", "id_ecdsa", "gpg", "pgp",
+
+ // Binary & executable artifacts
+ "exe", "dll", "so", "dylib", "bin", "out", "o", "obj", "a", "lib", "dSYM",
+ "class", "pyc", "pyo", "__pycache__",
+ "jar", "war", "ear", "apk", "ipa", "dex", "odex",
+ "wasm", "node", "beam", "elc",
+
+ // System and disk images
+ "iso", "img", "dmg", "vhd", "vdi", "vmdk", "qcow2",
+
+ // Database files
+ "db", "sqlite", "sqlite3", "db3", "mdb", "accdb", "sdf", "ldb",
+
+ // Log files
+ "log", "trace", "dump", "crash",
+
+ // Media files - Images
+ "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "ico", "svg", "heic", "heif", "raw", "cr2", "nef", "dng",
+
+ // Media files - Audio
+ "mp3", "wav", "flac", "aac", "ogg", "wma", "m4a", "opus", "aiff",
+
+ // Media files - Video
+ "mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "m4v", "3gp", "ogv",
+
+ // Font files
+ "ttf", "otf", "woff", "woff2", "eot", "fon", "pfb", "pfm",
+
+ // Document formats (typically not source code)
+ "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf",
+
+ // IDE/Editor/Tooling artifacts
+ "suo", "sln", "user", "ncb", "pdb", "ipch", "ilk", "tlog", "idb", "aps", "res",
+ "iml", "idea", "vscode", "project", "classpath", "factorypath", "prefs",
+ "vcxproj", "vcproj", "filters", "xcworkspace", "xcuserstate", "xcscheme", "pbxproj",
+ "DS_Store", "Thumbs.db", "desktop.ini",
+
+ // Package manager and build artifacts
+ "lock", "sum", "resolved", // package-lock.json, go.sum, yarn.lock, etc.
+
+ // Temporary and backup files
+ "tmp", "temp", "swp", "swo", "bak", "backup", "orig", "rej", "patch",
+ "~", "old", "new", "part", "incomplete",
+
+ // Source maps and minified files (usually generated)
+ "map", "min.js", "min.css", "bundle.js", "bundle.css", "chunk.js",
+
+ // Configuration that's typically binary or generated
+ "dat", "data", "cache", "pid", "sock",
+
+ // Version control artifacts (though usually in ignored directories)
+ "pack", "idx", "rev",
+
+ // Other binary formats
+ "pickle", "pkl", "npy", "npz", "mat", "rdata", "rds",
+}
+
+```
+
+### \* \* Default `SUMMARIZE_INCLUDE_EXT`
+
+```json
+go,ts,tf,sh,py,js,Makefile,mod,Dockerfile,dockerignore,gitignore,esconfigs,md
+```
+
+```go
+// defaultInclude are the -inc list of extensions that will be included in the summary
+defaultInclude = []string{
+ "go", "ts", "tf", "sh", "py", "js", "Makefile", "mod", "Dockerfile", "dockerignore", "gitignore", "esconfigs", "md",
+}
+```
+
+### \* \* \* Default `SUMMARIZE_EXCLUDE_EXT`
+
+```json
+.min.js,.min.css,.git/,.svn/,.vscode/,.vs/,.idea/,logs/,secrets/,.venv/,/site-packages,.terraform/,summaries/,node_modules/,/tmp,tmp/,logs/
+```
+
+```go
+// defaultAvoid are the -avoid list of substrings in file path names to avoid in the summary
+defaultAvoid = []string{
+ ".min.js", ".min.css", ".git/", ".svn/", ".vscode/", ".vs/", ".idea/", "logs/", "secrets/",
+ ".venv/", "/site-packages", ".terraform/", "summaries/", "node_modules/", "/tmp", "tmp/", "logs/",
+}
+```
## Contribution
diff --git a/VERSION b/VERSION
index 60453e6..b482243 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-v1.0.0
\ No newline at end of file
+v1.0.2
\ No newline at end of file
diff --git a/go.mod b/go.mod
index 53fe9d4..4eb1111 100644
--- a/go.mod
+++ b/go.mod
@@ -1,16 +1,17 @@
module github.com/andreimerlescu/summarize
-go 1.23.7
+go 1.24.5
require (
- github.com/andreimerlescu/checkfs v1.0.2
- github.com/andreimerlescu/figtree/v2 v2.0.3
+ github.com/andreimerlescu/checkfs v1.0.4
+ github.com/andreimerlescu/figtree/v2 v2.0.14
github.com/andreimerlescu/sema v1.0.0
)
require (
+ github.com/andreimerlescu/bump v1.0.3 // indirect
github.com/go-ini/ini v1.67.0 // indirect
- golang.org/x/sys v0.31.0 // indirect
- golang.org/x/term v0.30.0 // indirect
+ golang.org/x/sys v0.33.0 // indirect
+ golang.org/x/term v0.32.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
diff --git a/go.sum b/go.sum
index 6ac5645..b51abf5 100644
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,11 @@
-github.com/andreimerlescu/checkfs v1.0.2 h1:U7maY2jYqzb+ranBSWiZamBDapAWvBCeokOktn4gong=
-github.com/andreimerlescu/checkfs v1.0.2/go.mod h1:ADaqjiRJf3gmyENLS3v9bJIaEH00IOeM48cXxVwy1JY=
-github.com/andreimerlescu/figtree/v2 v2.0.3 h1:BfBGZ7729shM9jvl2nHnumQJjpP51C3MEGe6TXJQu0c=
-github.com/andreimerlescu/figtree/v2 v2.0.3/go.mod h1:cIwo9LqOWCjnB3354D34U7KH9D30PdVUqkdq4BncCzY=
+github.com/andreimerlescu/bump v1.0.3 h1:RAmNPjS8lGhgiBhiTMEaRl1ydex7Z3YYuyiQohC+ShY=
+github.com/andreimerlescu/bump v1.0.3/go.mod h1:ud9Sqvt+zM0sBDhK3Dghq2hGTWrlVIvMqLAzpWQjIy0=
+github.com/andreimerlescu/checkfs v1.0.4 h1:pRXZGW1sfe+yXyWNUxmPC2IiX5yT3vF1V5O8PXulnFc=
+github.com/andreimerlescu/checkfs v1.0.4/go.mod h1:ADaqjiRJf3gmyENLS3v9bJIaEH00IOeM48cXxVwy1JY=
+github.com/andreimerlescu/figtree/v2 v2.0.10 h1:UWKBVpwa4lI+mp3VxUy7MzkzaigROZd4zOGJrarNpv0=
+github.com/andreimerlescu/figtree/v2 v2.0.10/go.mod h1:PymPGUzzP/UuxZ4mqC5JIrDZJIVcjZ3GMc/MC2GB6Ek=
+github.com/andreimerlescu/figtree/v2 v2.0.14 h1:pwDbHpfiAdSnaNnxyV2GpG1rG9cmGiHhjXOvBEoVj2w=
+github.com/andreimerlescu/figtree/v2 v2.0.14/go.mod h1:PymPGUzzP/UuxZ4mqC5JIrDZJIVcjZ3GMc/MC2GB6Ek=
github.com/andreimerlescu/sema v1.0.0 h1:8ai/kqAci7QKUenAJWX13aYtWpjvD0CQW39CFzNIRQs=
github.com/andreimerlescu/sema v1.0.0/go.mod h1:VCRQkKVknOKKPtAqvrNHL7hxxfoX5O7it2lWBzVxUs0=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -12,10 +16,10 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
-golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
-golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
+golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
diff --git a/main.go b/main.go
index d37ae0b..c1f87ec 100644
--- a/main.go
+++ b/main.go
@@ -2,16 +2,20 @@ package main
import (
"bytes"
+ "compress/gzip"
"embed"
+ "encoding/json"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"runtime"
- "sort"
+ "slices"
+ "strconv"
"strings"
"sync"
+ "sync/atomic"
"time"
check "github.com/andreimerlescu/checkfs"
@@ -38,41 +42,115 @@ func Version() string {
}
const (
- projectName string = "github.com/andreimerlescu/summarize"
- tFormat string = "2006.01.02.15.04.05.UTC"
- eConfigFile string = "SUMMARIZE_CONFIG_FILE"
- kSourceDir string = "d"
- kOutputDir string = "o"
- kIncludeExt string = "i"
- kExcludeExt string = "x"
- kSkipContains string = "s"
- kFilename string = "f"
- kVersion string = "v"
- kDotFiles string = "ndf"
- kMaxFiles string = "mf"
+ projectName string = "github.com/andreimerlescu/summarize"
+ tFormat string = "2006.01.02.15.04.05.UTC"
+
+ eConfigFile string = "SUMMARIZE_CONFIG_FILE"
+ eAddIgnoreInPathList string = "SUMMARIZE_IGNORE_CONTAINS"
+ eAddIncludeExtList string = "SUMMARIZE_INCLUDE_EXT"
+ eAddExcludeExtList string = "SUMMARIZE_EXCLUDE_EXT"
+ eAlwaysWrite string = "SUMMARIZE_ALWAYS_WRITE"
+ eAlwaysPrint string = "SUMMARIZE_ALWAYS_PRINT"
+ eAlwaysJson string = "SUMMARIZE_ALWAYS_JSON"
+ eAlwaysCompress string = "SUMMARIZE_ALWAYS_COMPRESS"
+
+ kSourceDir string = "d"
+ kOutputDir string = "o"
+ kIncludeExt string = "i"
+ kExcludeExt string = "x"
+ kSkipContains string = "s"
+ kFilename string = "f"
+ kPrint string = "print"
+ kMaxOutputSize string = "max"
+ kWrite string = "write"
+ kVersion string = "v"
+ kDotFiles string = "ndf"
+ kMaxFiles string = "mf"
+ kDebug string = "debug"
+ kJson string = "json"
+ kCompress string = "gz"
)
var (
// figs is a figtree of fruit for configurable command line arguments that bear fruit
- figs figtree.Fruit
+ figs figtree.Plant
+
+ alwaysWrite = true
// defaultExclude are the -exc list of extensions that will be skipped automatically
defaultExclude = []string{
- "7z", "gz", "xz", "zstd", "bz", "bzip2", "zip", "part", // compressed files
- "crt", "key", "asc", "id_rsa", "id_dsa", "id_ed25519", // encryption files
- "log", "dll", "so", "bin", "exe", // executable binaries
- "jpg", "png", "mov", "mp3", "mp4", "heic", "avi", // media
- "ttf", "woff", "woff2", "otf", // fonts
+ // Compressed archives
+ "7z", "gz", "xz", "zst", "zstd", "bz", "bz2", "bzip2", "zip", "tar", "rar", "lz4", "lzma", "cab", "arj",
+
+ // Encryption, certificates, and sensitive keys
+ "crt", "cert", "cer", "key", "pub", "asc", "pem", "p12", "pfx", "jks", "keystore",
+ "id_rsa", "id_dsa", "id_ed25519", "id_ecdsa", "gpg", "pgp",
+
+ // Binary & executable artifacts
+ "exe", "dll", "so", "dylib", "bin", "out", "o", "obj", "a", "lib", "dSYM",
+ "class", "pyc", "pyo", "__pycache__",
+ "jar", "war", "ear", "apk", "ipa", "dex", "odex",
+ "wasm", "node", "beam", "elc",
+
+ // System and disk images
+ "iso", "img", "dmg", "vhd", "vdi", "vmdk", "qcow2",
+
+ // Database files
+ "db", "sqlite", "sqlite3", "db3", "mdb", "accdb", "sdf", "ldb",
+
+ // Log files
+ "log", "trace", "dump", "crash",
+
+ // Media files - Images
+ "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "ico", "svg", "heic", "heif", "raw", "cr2", "nef", "dng",
+
+ // Media files - Audio
+ "mp3", "wav", "flac", "aac", "ogg", "wma", "m4a", "opus", "aiff",
+
+ // Media files - Video
+ "mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "m4v", "3gp", "ogv",
+
+ // Font files
+ "ttf", "otf", "woff", "woff2", "eot", "fon", "pfb", "pfm",
+
+ // Document formats (typically not source code)
+ "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf",
+
+ // IDE/Editor/Tooling artifacts
+ "suo", "sln", "user", "ncb", "pdb", "ipch", "ilk", "tlog", "idb", "aps", "res",
+ "iml", "idea", "vscode", "project", "classpath", "factorypath", "prefs",
+ "vcxproj", "vcproj", "filters", "xcworkspace", "xcuserstate", "xcscheme", "pbxproj",
+ "DS_Store", "Thumbs.db", "desktop.ini",
+
+ // Package manager and build artifacts
+ "lock", "sum", "resolved", // package-lock.json, go.sum, yarn.lock, etc.
+
+ // Temporary and backup files
+ "tmp", "temp", "swp", "swo", "bak", "backup", "orig", "rej", "patch",
+ "~", "old", "new", "part", "incomplete",
+
+ // Source maps and minified files (usually generated)
+ "map", "min.js", "min.css", "bundle.js", "bundle.css", "chunk.js",
+
+ // Configuration that's typically binary or generated
+ "dat", "data", "cache", "pid", "sock",
+
+ // Version control artifacts (though usually in ignored directories)
+ "pack", "idx", "rev",
+
+ // Other binary formats
+ "pickle", "pkl", "npy", "npz", "mat", "rdata", "rds",
}
// defaultInclude are the -inc list of extensions that will be included in the summary
defaultInclude = []string{
- "go", "ts", "tf", "sh", "py", "js",
+ "go", "ts", "tf", "sh", "py", "js", "Makefile", "mod", "Dockerfile", "dockerignore", "gitignore", "esconfigs", "md",
}
// defaultAvoid are the -avoid list of substrings in file path names to avoid in the summary
defaultAvoid = []string{
".min.js", ".min.css", ".git/", ".svn/", ".vscode/", ".vs/", ".idea/", "logs/", "secrets/",
+ ".venv/", "/site-packages", ".terraform/", "summaries/", "node_modules/", "/tmp", "tmp/", "logs/",
}
)
@@ -82,65 +160,105 @@ var newSummaryFilename = func() string {
}
// init creates a new figtree with options to use CONFIG_FILE as a way of reading a YAML file while ignoring the env
-func init() {
+func configure() {
figs = figtree.With(figtree.Options{
Harvest: 9,
IgnoreEnvironment: true,
- ConfigFile: os.Getenv(eConfigFile),
+ ConfigFile: envVal(eConfigFile, "./config.yaml"),
})
// properties
- figs.NewString(kSourceDir, ".", "Absolute path of directory you want to summarize.")
- figs.NewString(kOutputDir, filepath.Join(".", "summaries"), fmt.Sprintf("Path of the directory to write the %s file to", newSummaryFilename()))
- figs.NewString(kFilename, newSummaryFilename(), "Output file of summary.md")
- figs.NewList(kIncludeExt, defaultInclude, "List of extensions to include in summary.")
- figs.NewList(kExcludeExt, defaultExclude, "List of extensions to include in summary.")
- figs.NewList(kSkipContains, defaultAvoid, "List of extensions to avoid.")
- figs.NewInt(kMaxFiles, 20, "Maximum number of files to process concurrently")
- figs.NewBool(kDotFiles, false, "Include dot files by setting this true")
- figs.NewBool(kVersion, false, "Display current version of summarize")
+ figs = figs.NewString(kSourceDir, ".", "Absolute path of directory you want to summarize.")
+ figs = figs.NewString(kOutputDir, filepath.Join(".", "summaries"), fmt.Sprintf("Path of the directory to write the %s file to", newSummaryFilename()))
+ figs = figs.NewString(kFilename, newSummaryFilename(), "Output file of summary.md")
+ figs = figs.NewList(kIncludeExt, defaultInclude, "List of extensions to INCLUDE in summary.")
+ figs = figs.NewList(kExcludeExt, defaultExclude, "List of extensions to EXCLUDE in summary.")
+ figs = figs.NewList(kSkipContains, defaultAvoid, "List of path substrings if present to skip over full path.")
+ figs = figs.NewInt(kMaxFiles, 369, "Maximum number of files to process concurrently")
+ figs = figs.NewInt64(kMaxOutputSize, 1_776_369, "Maximum file size of output file")
+ figs = figs.NewBool(kDotFiles, false, "Any path that is considered a dotfile can be included by setting this to true")
+ figs = figs.NewBool(kPrint, envIs(eAlwaysPrint), "Print generated file contents to STDOUT")
+ figs = figs.NewBool(kWrite, envIs(eAlwaysWrite), "Write generated contents to file")
+ figs = figs.NewBool(kJson, envIs(eAlwaysJson), "Enable JSON formatting")
+ figs = figs.NewBool(kCompress, envIs(eAlwaysCompress), "Use gzip compression in output")
+ figs = figs.NewBool(kVersion, false, "Display current version of summarize")
+ figs = figs.NewBool(kDebug, false, "Enable debug mode")
// validators
- figs.WithValidator(kSourceDir, figtree.AssureStringNotEmpty)
- figs.WithValidator(kOutputDir, figtree.AssureStringNotEmpty)
- figs.WithValidator(kFilename, figtree.AssureStringNotEmpty)
- figs.WithValidator(kMaxFiles, figtree.AssureIntInRange(1, 63339))
+ figs = figs.WithValidator(kSourceDir, figtree.AssureStringNotEmpty)
+ figs = figs.WithValidator(kOutputDir, figtree.AssureStringNotEmpty)
+ figs = figs.WithValidator(kFilename, figtree.AssureStringNotEmpty)
+ figs = figs.WithValidator(kMaxFiles, figtree.AssureIntInRange(1, 17_369))
+ figs = figs.WithValidator(kMaxOutputSize, figtree.AssureInt64InRange(369, 369_369_369_369))
// callbacks
- figs.WithCallback(kSourceDir, figtree.CallbackAfterVerify, callbackVerifyReadableDirectory)
- figs.WithCallback(kFilename, figtree.CallbackAfterVerify, callbackVerifyFile)
- figs.WithCallback(kOutputDir, figtree.CallbackAfterVerify, func(value interface{}) error {
- return check.Directory(toString(value), directory.Options{
- WillCreate: true,
- Create: directory.Create{
- Kind: directory.IfNotExists,
- Path: toString(value),
- FileMode: 0755,
- },
- })
- })
- capture(figs.Load())
+ figs = figs.WithCallback(kSourceDir, figtree.CallbackAfterVerify, callbackVerifyReadableDirectory)
+ figs = figs.WithCallback(kFilename, figtree.CallbackAfterVerify, callbackVerifyFile)
+}
+
+type result struct {
+ Path string `yaml:"path" json:"path"`
+ Contents []byte `yaml:"contents" json:"contents"`
+ Size int64 `yaml:"size" json:"size"`
+}
+
+type final struct {
+ Path string `yaml:"path" json:"path"`
+ Contents string `yaml:"contents" json:"contents"`
+ Size int64 `yaml:"size" json:"size"`
}
func main() {
+ configure()
+ capture("figs loading environment", figs.Load())
+ isDebug := *figs.Bool(kDebug)
if *figs.Bool(kVersion) {
fmt.Println(Version())
os.Exit(0)
}
var (
- data map[string][]string // data is map[ext][]path of found files to summarize
- dataMutex = sync.RWMutex{} // adding concurrency
+ lIncludeExt = *figs.List(kIncludeExt)
+ lExcludeExt = *figs.List(kExcludeExt)
+ lSkipContains = *figs.List(kSkipContains)
+
+ sourceDir = *figs.String(kSourceDir)
+ outputDir = *figs.String(kOutputDir)
+ )
+
+ capture("checking output directory", check.Directory(outputDir, directory.Options{
+ WillCreate: true,
+ Create: directory.Create{
+ Kind: directory.IfNotExists,
+ Path: outputDir,
+ FileMode: 0755,
+ },
+ }))
+
+ addFromEnv(eAddIgnoreInPathList, &lSkipContains)
+ addFromEnv(eAddIncludeExtList, &lIncludeExt)
+ addFromEnv(eAddExcludeExtList, &lExcludeExt)
+
+ var (
wg = sync.WaitGroup{}
throttler = sema.New(runtime.GOMAXPROCS(0))
)
// initialize the data map with all -inc extensions
var errs []error
- data = make(map[string][]string)
- for _, inc := range *figs.List(kIncludeExt) {
- data[inc] = []string{}
+
+ type mapData struct {
+ Ext string
+ Paths []string
+ }
+
+ data := &sync.Map{}
+ for _, inc := range lIncludeExt {
+ data.Store(inc, mapData{
+ Ext: inc,
+ Paths: []string{},
+ })
}
// populate data with the kSourceDir files based on -inc -exc -avoid lists
- capture(filepath.Walk(*figs.String(kSourceDir), func(path string, info fs.FileInfo, err error) error {
+ capture("walking source directory", filepath.Walk(sourceDir, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err // return the error received
}
@@ -156,10 +274,35 @@ func main() {
}
// check the -avoid list
- for _, avoidThis := range *figs.List(kSkipContains) {
- if strings.Contains(filename, avoidThis) {
+ for _, avoidThis := range lSkipContains {
+ a := strings.Contains(filename, avoidThis) || strings.Contains(path, avoidThis)
+ b := strings.HasPrefix(filename, avoidThis) || strings.HasPrefix(path, avoidThis)
+ c := strings.HasSuffix(filename, avoidThis) || strings.HasSuffix(path, avoidThis)
+ if a || b || c {
+ if isDebug {
+ fmt.Printf("ignoring %s in %s\n", filename, path)
+ }
return nil // skip without error
}
+
+ parts, err := filepath.Glob(path)
+ if err != nil {
+ errs = append(errs, err)
+ continue
+ }
+ for i := 0; i < len(parts); i++ {
+ part := parts[i]
+ if part == "/" {
+ continue
+ }
+ if strings.Contains(part, avoidThis) || strings.HasPrefix(part, avoidThis) || strings.HasSuffix(part, avoidThis) {
+ if isDebug {
+ fmt.Printf("skipping file %q\n", part)
+ }
+ return nil
+ }
+ }
+
}
// get the extension
@@ -167,16 +310,40 @@ func main() {
ext = strings.ToLower(ext)
ext = strings.TrimPrefix(ext, ".")
+ if isDebug {
+ fmt.Printf("ext: %s\n", ext)
+ }
+
// check the -exc list
- for _, excludeThis := range *figs.List(kExcludeExt) {
+ for _, excludeThis := range lExcludeExt {
if strings.EqualFold(excludeThis, ext) {
+ if isDebug {
+ fmt.Printf("ignoring %s\n", path)
+ }
return nil // skip without error
}
}
+ var toUpdate []mapData
// populate the -inc list in data
- if _, exists := data[ext]; exists {
- data[ext] = append(data[ext], path)
+ data.Range(func(e any, p any) bool {
+ key, ok := e.(string)
+ if !ok {
+ return true // continue
+ }
+ value, ok := p.(mapData)
+ if !ok {
+ return true
+ }
+ if strings.EqualFold(key, ext) {
+ value.Ext = key
+ }
+ value.Paths = append(value.Paths, path)
+ toUpdate = append(toUpdate, value)
+ return true
+ })
+ for _, value := range toUpdate {
+ data.Store(value.Ext, value)
}
}
@@ -184,47 +351,183 @@ func main() {
return nil
}))
+ if isDebug {
+ fmt.Println("data received: ")
+ data.Range(func(e any, p any) bool {
+ ext, ok := e.(string)
+ if !ok {
+ return true // continue
+ }
+ thisData, ok := p.(mapData)
+ if !ok {
+ return true // continue
+ }
+ fmt.Printf("%s: %s\n", ext, strings.Join(thisData.Paths, ", "))
+ return true // continue
+ })
+ }
+
maxFileSemaphore := sema.New(*figs.Int(kMaxFiles))
- writeChan := make(chan []byte, 10240)
+ resultsChan := make(chan result, *figs.Int(kMaxFiles))
writerWG := sync.WaitGroup{}
writerWG.Add(1)
go func() {
defer writerWG.Done()
// Create output file
+ srcDir := *figs.String(kSourceDir)
outputFileName := filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename))
var buf bytes.Buffer
- buf.WriteString(fmt.Sprintf("# Project Summary - %s\nGenerated by %s %s\n\n## Workspace\n\n%s
\n\n\n",
- filepath.Base(*figs.String(kFilename)), projectName, Version(), *figs.String(kSourceDir)))
+ buf.WriteString("# Project Summary - " + filepath.Base(*figs.String(kFilename)) + "\n")
+ buf.WriteString("Generated by " + projectName + " " + Version() + "\n\n")
+ buf.WriteString("AI Instructions are the user requests that you analyze their project workspace ")
+ buf.WriteString("as provided here by filename followed by the contents. You are to answer their ")
+ buf.WriteString("question using the source code provided as the basis of your responses. You are to ")
+ buf.WriteString("completely modify each individual file as per-the request and provide the completely ")
+ buf.WriteString("updated form of the file. Do not abbreviate the file, and if the file is excessive in ")
+ buf.WriteString("length, then print the entire contents in your response with your updates to the ")
+ buf.WriteString("specific components while retaining all existing functionality and maintaining comments ")
+ buf.WriteString("within the code. \n\n")
+ buf.WriteString("### Workspace\n\n")
+ abs, err := filepath.Abs(srcDir)
+ if err == nil {
+ buf.WriteString("" + abs + "\n\n")
+ } else {
+ buf.WriteString("" + srcDir + "\n\n")
+ }
- for data := range writeChan {
- buf.Write(data)
+ renderMu := &sync.Mutex{}
+ renderedPaths := make(map[string]int64)
+ totalSize := int64(buf.Len())
+ for in := range resultsChan {
+ if _, exists := renderedPaths[in.Path]; exists {
+ continue
+ }
+ runningSize := atomic.AddInt64(&totalSize, in.Size)
+ if runningSize >= *figs.Int64(kMaxOutputSize) {
+ continue
+ }
+ renderMu.Lock()
+ renderedPaths[in.Path] = in.Size
+ buf.Write(in.Contents)
+ renderMu.Unlock()
+ }
+
+ shouldPrint := *figs.Bool(kPrint)
+ canWrite := *figs.Bool(kWrite)
+ showJson := *figs.Bool(kJson)
+ wrote := false
+
+ if *figs.Bool(kCompress) {
+ compressed, err := compress(bytes.Clone(buf.Bytes()))
+ capture("compressing bytes buffer", err)
+ buf.Reset()
+ buf.Write(compressed)
+ outputFileName += ".gz"
+ }
+
+ if !shouldPrint && !canWrite {
+ capture("saving output file during write", os.WriteFile(outputFileName, buf.Bytes(), 0644))
+ wrote = true
+ }
+
+ if canWrite && !wrote {
+ capture("saving output file during write", os.WriteFile(outputFileName, buf.Bytes(), 0644))
+ wrote = true
+ }
+
+ if shouldPrint {
+ if showJson {
+ r := final{
+ Path: outputFileName,
+ Size: int64(buf.Len()),
+ Contents: buf.String(),
+ }
+ jb, err := json.MarshalIndent(r, "", " ")
+ if err != nil {
+ _, _ = fmt.Fprintln(os.Stderr, err)
+ }
+ fmt.Println(string(jb))
+ } else {
+ fmt.Println(buf.String())
+ }
+ os.Exit(0)
}
- capture(os.WriteFile(outputFileName, buf.Bytes(), 0644))
}()
- for ext, paths := range data { // range over data to get ext and paths
- throttler.Acquire() // throttler is used to protect the runtime from excessive use
- wg.Add(1) // wg is used to prevent the runtime from exiting early
- go func(ext string, paths []string) { // run this extension in a goroutine
+ var toUpdate []mapData
+
+ seen := seenStrings{m: make(map[string]bool)}
+
+ data.Range(func(e any, p any) bool {
+ ext, ok := e.(string)
+ if !ok {
+ return true // continue
+ }
+ thisData, ok := p.(mapData)
+ if !ok {
+ return true // continue
+ }
+ paths := slices.Clone(thisData.Paths)
+
+ throttler.Acquire() // throttler is used to protect the runtime from excessive use
+ wg.Add(1) // wg is used to prevent the runtime from exiting early
+ go func(innerData *mapData, toUpdate *[]mapData, ext string, paths []string) { // run this extension in a goroutine
defer throttler.Release() // when we're done, release the throttler
defer wg.Done() // then tell the sync.WaitGroup that we are done
- sort.Strings(paths) // sort the paths we receive
- dataMutex.Lock() // lock the data map
- data[ext] = paths // write the sorted paths
- dataMutex.Unlock() // unlock the map
+
+ paths = simplify(paths)
+
+ innerData.Paths = paths
+ *toUpdate = append(*toUpdate, *innerData)
// process each file in the ext list (one ext per throttle slot in the semaphore)
for _, filePath := range paths {
+ if seen.Exists(filePath) {
+ continue
+ }
maxFileSemaphore.Acquire()
wg.Add(1)
go func(ext, filePath string) {
- defer maxFileSemaphore.Release() // maxFileSemaphore prevents excessive files from being opened
- defer wg.Done() // keep the main thread running while this file is being processed
- var sb bytes.Buffer // capture what we write to file in a bytes buffer
- sb.WriteString(fmt.Sprintf("## %s\n\n```%s\n", filePath, ext)) // write the header of the summary for the file
- content, err := os.ReadFile(filePath) // open the file and get its contents
+ defer maxFileSemaphore.Release() // maxFileSemaphore prevents excessive files from being opened
+ defer wg.Done() // keep the main thread running while this file is being processed
+ if strings.HasSuffix(filePath, ".DS_Store") ||
+ strings.HasSuffix(filePath, ".exe") ||
+ strings.HasSuffix(filePath, "-amd64") ||
+ strings.HasSuffix(filePath, "-arm64") ||
+ strings.HasSuffix(filePath, "aarch64") {
+ return
+ }
+ type tFileInfo struct {
+ Name string `json:"name"`
+ Size int64 `json:"size"`
+ Mode os.FileMode `json:"mode"`
+ }
+ info, err := os.Stat(filePath)
+ if err != nil {
+ errs = append(errs, err)
+ return
+ }
+ fileInfo := &tFileInfo{
+ Name: filepath.Base(filePath),
+ Size: info.Size(),
+ Mode: info.Mode(),
+ }
+ infoJson, err := json.MarshalIndent(fileInfo, "", " ")
+ if err != nil {
+ errs = append(errs, err)
+ return
+ }
+ var sb bytes.Buffer // capture what we write to file in a bytes buffer
+ sb.WriteString("## " + filepath.Base(filePath) + "\n\n")
+ sb.WriteString("The `os.Stat` for the " + filePath + " is: \n\n")
+ sb.WriteString("```json\n")
+ sb.WriteString(string(infoJson) + "\n")
+ sb.WriteString("```\n\n")
+ sb.WriteString("Source Code:\n\n")
+ sb.WriteString("```" + ext + "\n")
+ content, err := os.ReadFile(filePath) // open the file and get its contents
if err != nil {
errs = append(errs, fmt.Errorf("Error reading file %s: %v\n", filePath, err))
return
@@ -233,26 +536,52 @@ func main() {
errs = append(errs, fmt.Errorf("Error writing file %s: %v\n", filePath, err))
return
}
- content = []byte{} // clear memory after its written
- sb.WriteString("\n```\n") // close out the file footer
- writeChan <- sb.Bytes()
+ content = []byte{} // clear memory after its written
+ sb.WriteString("\n```\n\n") // close out the file footer
+ seen.Add(filePath)
+ resultsChan <- result{
+ Path: filePath,
+ Contents: sb.Bytes(),
+ Size: int64(sb.Len()),
+ }
}(ext, filePath)
}
- }(ext, paths)
+ }(&thisData, &toUpdate, ext, paths)
+ return true
+ })
+
+ wg.Wait() // wait for all files to finish processing
+
+ for _, innerData := range toUpdate {
+ data.Store(innerData.Ext, innerData)
}
- wg.Wait() // wait for all files to finish processing
- close(writeChan) // Signal the writer goroutine to finish
- writerWG.Wait() // Wait for the writer to flush and close the file
+ close(resultsChan) // Signal the writer goroutine to finish
+ writerWG.Wait() // Wait for the writer to flush and close the file
if len(errs) > 0 {
terminate(os.Stderr, "Error writing to output file: %v\n", errs)
}
// Print completion message
- fmt.Printf("Summary generated: %s\n",
- filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename)))
+ if *figs.Bool(kJson) {
+ r := m{
+ Message: fmt.Sprintf("Summary generated: %s\n",
+ filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename)),
+ ),
+ }
+ jb, err := json.MarshalIndent(r, "", " ")
+ if err != nil {
+ terminate(os.Stderr, "Error marshalling results: %v\n", err)
+ } else {
+ fmt.Println(string(jb))
+ }
+ } else {
+ fmt.Printf("Summary generated: %s\n",
+ filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename)),
+ )
+ }
}
var callbackVerifyFile = func(value interface{}) error {
@@ -270,18 +599,160 @@ var toString = func(value interface{}) string {
case *string:
return *v
default:
- return ""
+ flesh := figtree.NewFlesh(value)
+ f := fmt.Sprintf("%v", flesh.ToString())
+ return f
}
}
-var capture = func(d ...error) {
+var capture = func(msg string, d ...error) {
if len(d) == 0 || (len(d) == 1 && d[0] == nil) {
return
}
- terminate(os.Stderr, "captured error: %v\n", d)
+ terminate(os.Stderr, "[EXCUSE ME, BUT] %s\n\ncaptured error: %v\n", msg, d)
+}
+
+type m struct {
+ Message string `json:"message"`
}
var terminate = func(d io.Writer, i string, e ...interface{}) {
+ for _, f := range os.Args {
+ if strings.HasPrefix(f, "-json") {
+ mm := m{Message: fmt.Sprintf(i, e...)}
+ jb, err := json.MarshalIndent(mm, "", " ")
+ if err != nil {
+ _, _ = fmt.Fprintf(os.Stderr, "Error serializing json: %v\n", err)
+ _, _ = fmt.Fprintf(d, i, e...)
+ } else {
+ fmt.Println(string(jb))
+ }
+ os.Exit(1)
+ }
+ }
_, _ = fmt.Fprintf(d, i, e...)
os.Exit(1)
}
+
+func simplify(t []string) []string {
+ seen := make(map[string]bool)
+ for _, v := range t {
+ seen[v] = true
+ }
+ results := make([]string, len(t))
+ for i, v := range t {
+ if seen[v] {
+ results[i] = v
+ }
+ }
+ return results
+}
+func addFromEnv(e string, l *[]string) {
+ v, ok := os.LookupEnv(e)
+ if ok {
+ flesh := figtree.NewFlesh(v)
+ maybeAdd := flesh.ToList()
+ for _, entry := range maybeAdd {
+ *l = append(*l, entry)
+ }
+ }
+ *l = simplify(*l)
+}
+
+type seenStrings struct {
+ mu sync.RWMutex
+ m map[string]bool
+}
+
+func (s *seenStrings) Add(entry string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.m[entry] = true
+}
+func (s *seenStrings) Remove(entry string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ delete(s.m, entry)
+}
+
+func (s *seenStrings) Len() int {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ return len(s.m)
+}
+
+func (s *seenStrings) String() string {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ return fmt.Sprint(s.m)
+}
+
+func (s *seenStrings) True(entry string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.m[entry] = true
+}
+
+func (s *seenStrings) False(entry string) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ delete(s.m, entry)
+}
+
+func (s *seenStrings) Exists(entry string) bool {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ return s.m[entry]
+}
+
+func envVal(name, fallback string) string {
+ v, ok := os.LookupEnv(name)
+ if !ok {
+ return fallback
+ }
+ return v
+}
+
+func envIs(name string) bool {
+ v, ok := os.LookupEnv(name)
+ if !ok {
+ return false
+ }
+ vb, err := strconv.ParseBool(v)
+ if err != nil {
+ return false
+ }
+ return vb
+}
+
+// compress compresses a string using gzip and returns the compressed bytes
+func compress(s []byte) ([]byte, error) {
+ var buf bytes.Buffer
+ gzWriter := gzip.NewWriter(&buf)
+ _, err := gzWriter.Write(s)
+ if err != nil {
+ return nil, fmt.Errorf("failed to write to gzip writer: %w", err)
+ }
+ err = gzWriter.Close()
+ if err != nil {
+ return nil, fmt.Errorf("failed to close gzip writer: %w", err)
+ }
+ return buf.Bytes(), nil
+}
+
+// decompress decompresses gzip compressed bytes back to a string
+func decompress(compressed []byte) (string, error) {
+ buf := bytes.NewReader(compressed)
+ gzReader, err := gzip.NewReader(buf)
+ if err != nil {
+ return "", fmt.Errorf("failed to create gzip reader: %w", err)
+ }
+ defer func() {
+ _ = gzReader.Close()
+ }()
+ decompressed, err := io.ReadAll(gzReader)
+ if err != nil {
+ return "", fmt.Errorf("failed to read from gzip reader: %w", err)
+ }
+ return string(decompressed), nil
+}