diff --git a/.github/workflows/test-summarize.yml b/.github/workflows/test-summarize.yml index 5126bea..1c59996 100644 --- a/.github/workflows/test-summarize.yml +++ b/.github/workflows/test-summarize.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: os: [Ubuntu-latest, macOS-latest] - go-version: ['1.23.8', '1.24.2'] + go-version: ['1.24.0'] fail-fast: false # Continue testing all combinations even if one fails steps: @@ -47,13 +47,12 @@ jobs: run: | mkdir -p ${{ github.workspace }}/anotherProject echo -e "package main\n\nfunc main() {\n println(\"Hello, World!\")\n}" > ${{ github.workspace }}/anotherProject/hello.go - mkdir -p ${{ github.workspace }}/summaries shell: bash - name: Test 1 Step 5 Run summarize with command-line arguments run: | - cd ${{ github.workspace }}/anotherProject - ${{ github.workspace }}/summarize -d . -o ${{ github.workspace }}/summaries + cd ${{ github.workspace }} + ${{ github.workspace }}/summarize -d anotherProject -o ${{ github.workspace }}/summaries ls -lh ${{ github.workspace }}/summaries/ shell: bash @@ -90,49 +89,3 @@ jobs: echo "Contents of $SUMMARY_FILE:" cat "$SUMMARY_FILE" shell: bash - - # Step 9: Verify the summary contains the hello.go source code - - name: Test 1 Step 9 Verify summary contains hello.go source code (command-line usage) - run: | - SUMMARY_FILE="${{ steps.find-summary-cli.outputs.summary_file }}" - RANDOM_FILE="hello.go" - RANDOM_FILE_ABS="${{ github.workspace }}/anotherProject/hello.go" - echo "Checking if $SUMMARY_FILE contains the source code of $RANDOM_FILE" - - SECTION_START=$(grep -n "^## $RANDOM_FILE$" "$SUMMARY_FILE" | cut -d: -f1) - if [ -z "$SECTION_START" ]; then - echo "Error: Could not find section for $RANDOM_FILE in $SUMMARY_FILE" - echo "Listing all section headers in $SUMMARY_FILE:" - grep "^## " "$SUMMARY_FILE" - exit 1 - fi - - CODE_START=$((SECTION_START + 3)) - CODE_END=$(tail -n +$CODE_START "$SUMMARY_FILE" | grep -n "^\`\`\`$" | head -n 1 | cut -d: -f1 || true) - if [ -z "$CODE_END" ] || [ "$CODE_END" -eq 0 ]; then - echo "Error: Could not find code block end for $RANDOM_FILE in $SUMMARY_FILE" - echo "Dumping lines after section start for debugging (up to 20 lines):" - tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20 - exit 1 - fi - CODE_LINES=$((CODE_END - 1)) - if [ $CODE_LINES -le 0 ]; then - echo "Error: Invalid code block length ($CODE_LINES lines) for $RANDOM_FILE" - echo "Dumping lines after section start for debugging (up to 20 lines):" - tail -n +$SECTION_START "$SUMMARY_FILE" | head -n 20 - exit 1 - fi - tail -n +$CODE_START "$SUMMARY_FILE" > temp_code_block.txt - head -n $CODE_LINES temp_code_block.txt > extracted_code.txt - rm temp_code_block.txt - - cat "$RANDOM_FILE_ABS" > original_code.txt - - diff -wB extracted_code.txt original_code.txt > diff_output.txt - if [ $? -ne 0 ]; then - echo "Error: The source code in the summary does not match the original file" - cat diff_output.txt - exit 1 - fi - echo "Success: The source code of $RANDOM_FILE in $SUMMARY_FILE matches the original file" - shell: bash diff --git a/.gitignore b/.gitignore index 15fe251..3b89247 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ summarize .idea .DS_Store *.log -summaries/ \ No newline at end of file +summaries/ +bin/ \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..447549c --- /dev/null +++ b/Makefile @@ -0,0 +1,70 @@ +# Generic Makefile for Any Go Project (Lines 1-65) +MAIN_PATH=. +APP_NAME := $(shell basename "$(shell realpath $(MAIN_PATH))") +BIN_DIR=bin + +# Go build flags +# -s: Strip symbols (reduces binary size) +# -w: Omit DWARF debugging information +LDFLAGS=-ldflags "-s -w" + +.PHONY: all clean summary install darwin-amd64 darwin-amd64 linux-amd64 linux-arm64 windows-amd64 + +# Create build directory if it doesn't exist +$(BIN_DIR): + @mkdir -p $(BIN_DIR) + +# Build for all platforms +all: darwin-amd64 darwin-arm64 linux-amd64 linux-arm64 windows-amd64 install + +summary: + @if ! command -v summarize > /dev/null; then \ + go install github.com/andreimerlescu/summarize@latest; \ + fi + @summarize -i "go,Makefile,mod" -debug -print + +install: $(BIN_DIR) + @if [[ "$(shell go env GOOS)" == "windows" ]]; then \ + cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH).exe "$(shell go env GOBIN)/$(APP_NAME).exe"; \ + else \ + cp $(BIN_DIR)/$(APP_NAME)-$(shell go env GOOS)-$(shell go env GOARCH) "$(shell go env GOBIN)/$(APP_NAME)"; \ + fi + @echo "NEW: $(shell which $(APP_NAME))" + +# Build for macOS Intel (amd64) +darwin-amd64: $(BIN_DIR) + @GOOS=darwin GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-amd64 $(MAIN_PATH) + @echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64" + +# Build for macOS Silicon (arm64) +darwin-arm64: $(BIN_DIR) + @GOOS=darwin GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-darwin-arm64 $(MAIN_PATH) + @echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-amd64" + +# Build for Linux ARM64 +linux-arm64: $(BIN_DIR) + @GOOS=linux GOARCH=arm64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-arm64 $(MAIN_PATH) + @echo "NEW: $(BIN_DIR)/$(APP_NAME)-darwin-arm64" + +# Build for Linux AMD64 +linux-amd64: $(BIN_DIR) + @GOOS=linux GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME)-linux-amd64 $(MAIN_PATH) + @echo "NEW: $(BIN_DIR)/$(APP_NAME)-linux-amd64" + +# Build for Windows AMD64 +windows-amd64: $(BIN_DIR) + @GOOS=windows GOARCH=amd64 go build $(LDFLAGS) -o $(BIN_DIR)/$(APP_NAME).exe $(MAIN_PATH) + @echo "NEW: $(BIN_DIR)/$(APP_NAME).exe" + +# Clean build artifacts +clean: + @rm -rf $(BIN_DIR) + @echo "REMOVED: $(BIN_DIR)" + +# Project Specific + +.PHONY: test + +# Run tests +test: + ./test.sh diff --git a/README.md b/README.md index 1c63e46..fe6b66a 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,55 @@ # Summarize -A go utility that will capture files with an extension pattern into a single markdown formatted -file that looks like: +The **Summarize** package was designed for developers who wish to leverage the use of Artificial Intelligence while +working on a project. The `summarize` command give you a powerful interface that is managed by arguments and environment +variables that define include/exclude extensions, and avoid substrings list while parsing paths. The binary has +concurrency built into it and has limits for the output file. It ignores its default output directory so it won't +recursively build summaries upon itself. It defaults to writing to a new directory that it'll try to create in the +current working directory called `summaries`, that I recommend that you add to your `.gitignore` and `.dockerignore`. + +I've found it useful to leverage the `make summary` command in all of my projects. This way, if I need to ask an AI a +question about a piece of code, I can capture the source code of the entire directory quickly and then just `cat` the +output file path provided and _voila_! The `-print` argument allows you to display the summary contents in the STDOUT +instead of the `Summary generated: summaries/summary.2025.07.29.08.59.03.UTC.md` that it would normally generate. + +The **Environment** can be used to control the native behavior of the `summarize` binary, such that you won't be required +to type the arguments out each time. If you use _JSON_ all the time, you can enable its output format on every command +by using the `SUMMARIZE_ALWAYS_JSON`. If you always want to write the summary, you can use the `SUMMARIZE_ALWAYS_WRITE` +variable. If you want to always print the summary to STDOUT instead of the success message, you can use the variable +`SUMMARIZE_ALWAYS_PRINT`. If you want to compress the rendered summary every time, you can use the variable +`SUMMARIZE_ALWAYS_COMPRESS`. These `SUMMARIZE_ALWAYS_*` environment variables are responsible for customizing the +runtime of the `summarize` application. + +When the `summarize` binary runs, it'll do its best to ignore files that it can't render to a text file. This includes +images, videos, binary files, and text files that are commonly linked to secrets. + +The developer experience while using `summarize` is designed to enable quick use with just running `summarize` from +where ever you wish to summarize. The `-d` for **source directory** defaults to `.` and the `-o`/`-f` for **output path** +defaults to a new timestamped file (`-f`) in the (`-o`) `summaries/` directory from the `.` context. The `-i` and `-x` are used to +define what to include and exclude various file extensions like `go,ts,py` etc.. The `-s` is used to +**skip** over substrings within a scanned path. Dotfiles can completely be ignored by all paths by using `-ndf` as a flag. + +Performance of the application can be tuned using the `-mf=` to assign **Max Files** that will concurrently be +processed. The default is 369. The `-max=` represents a limit on how large the rendered summary can become. + +Once the program finishes running, the rendered file will look similar to: ```md # Project Summary -### `filename.ext` + + +### `filename.go` + + -### `filename.ext` +### `filename.cs` + + + + ... etc. @@ -49,19 +88,139 @@ cd ~/work/anotherProject summarize -d anotherProject -o /home/user/summaries/anotherProject ``` -Since `figtree` is designed to be very functional, its lightweight but feature -intense design through simple biology memetics makes it well suited for this program. - ## Options -| Name | Argument | Type | Usage | -|-----------------|----------|----------|--------------------------------------------------------| -| `kSourceDir` | -d` | `string` | Source directory path. | -| `kOutputDir` | -o` | `string` | Summary destination output directory path. | -| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. | -| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. | -| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. | -| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). | +| Name | Argument | Type | Usage | +|------------------|----------|----------|-------------------------------------------------------------------| +| `kSourceDir` | `-d` | `string` | Source directory path. | +| `kOutputDir` | `-o` | `string` | Summary destination output directory path. | +| `kExcludeExt` | `-x` | `list` | Comma separated string list of extensions to exclude. | +| `kSkipContains` | `-s` | `list` | Comma separated string to filename substrings to skip. | +| `kIncludeExt` | `-i` | `list` | Comma separated string of extensions to include. | +| `kFilename` | `-f` | `string` | Summary filename (writes to `-o` dir). | +| `kVersion` | `-v` | `bool` | When `true`, the binary version is shown | +| `kCompress` | `-gz` | `bool` | When `true`, **gzip** is used on the contents of the summary | +| `kMaxOutputSize` | `-max` | `int64` | Maximum size of the generated summary allowed | +| `kPrint` | `-print` | `bool` | Uses STDOUT to write contents of summary | +| `kWrite` | `-write` | `bool` | Uses the filesystem to save contents of summary | +| `kDebug` | `-debug` | `bool` | When `true`, extra content is written to STDOUT aside from report | + + +## Environment + +| Environment Variable | Type | Default Value | Usage | +|-----------------------------|----------|------------------------|-------------------------------------------------------------------------------------------------------------| +| `SUMMARIZE_CONFIG_FILE` | `String` | `./config.yaml` | Contents of the YAML Configuration to use for [figtree](https://github.com/andreimerlescu/figtree). | +| `SUMMARIZE_IGNORE_CONTAINS` | `List` | \* see below | Add items to this default list by creating your own new list here, they get concatenated. | +| `SUMMARIZE_INCLUDE_EXT` | `List` | \*\* see below \* | Add extensions to include in the summary in this environment variable, comma separated. | +| `SUMMARIZE_EXCLUDE_EXT` | `List` | \*\*\* see below \* \* | Add exclusionary extensions to ignore to this environment variable, comma separated. | +| `SUMMARIZE_ALWAYS_PRINT` | `Bool` | `false` | When `true`, the `-print` will write the summary to STDOUT. | +| `SUMMARIZE_ALWAYS_WRITE` | `Bool` | `false` | When `true`, the `-write` will write to a new file on the disk. | +| `SUMMARIZE_ALWAYS_JSON` | `Bool` | `false` | When `true`, the `-json` flag will render JSON output to the console. | +| `SUMMARIZE_ALWAYS_COMPRESS` | `Bool` | `false` | When `true`, the `-gz` flag will use gzip to compress the summary contents and appends `.gz` to the output. | + + +### \* Default `SUMMARIZE_IGNORE_CONTAINS` Value + +```json +7z,gz,xz,zst,zstd,bz,bz2,bzip2,zip,tar,rar,lz4,lzma,cab,arj,crt,cert,cer,key,pub,asc,pem,p12,pfx,jks,keystore,id_rsa,id_dsa,id_ed25519,id_ecdsa,gpg,pgp,exe,dll,so,dylib,bin,out,o,obj,a,lib,dSYM,class,pyc,pyo,__pycache__,jar,war,ear,apk,ipa,dex,odex,wasm,node,beam,elc,iso,img,dmg,vhd,vdi,vmdk,qcow2,db,sqlite,sqlite3,db3,mdb,accdb,sdf,ldb,log,trace,dump,crash,jpg,jpeg,png,gif,bmp,tiff,tif,webp,ico,svg,heic,heif,raw,cr2,nef,dng,mp3,wav,flac,aac,ogg,wma,m4a,opus,aiff,mp4,avi,mov,mkv,webm,flv,wmv,m4v,3gp,ogv,ttf,otf,woff,woff2,eot,fon,pfb,pfm,pdf,doc,docx,xls,xlsx,ppt,pptx,odt,ods,odp,rtf,suo,sln,user,ncb,pdb,ipch,ilk,tlog,idb,aps,res,iml,idea,vscode,project,classpath,factorypath,prefs,vcxproj,vcproj,filters,xcworkspace,xcuserstate,xcscheme,pbxproj,DS_Store,Thumbs.db,desktop.ini,lock,sum,resolved,tmp,temp,swp,swo,bak,backup,orig,rej,patch,~,old,new,part,incomplete,map,min.js,min.css,bundle.js,bundle.css,chunk.js,dat,data,cache,pid,sock,pack,idx,rev,pickle,pkl,npy,npz,mat,rdata,rds +``` + +```go + +// defaultExclude are the -exc list of extensions that will be skipped automatically +defaultExclude = []string{ + // Compressed archives + "7z", "gz", "xz", "zst", "zstd", "bz", "bz2", "bzip2", "zip", "tar", "rar", "lz4", "lzma", "cab", "arj", + + // Encryption, certificates, and sensitive keys + "crt", "cert", "cer", "key", "pub", "asc", "pem", "p12", "pfx", "jks", "keystore", + "id_rsa", "id_dsa", "id_ed25519", "id_ecdsa", "gpg", "pgp", + + // Binary & executable artifacts + "exe", "dll", "so", "dylib", "bin", "out", "o", "obj", "a", "lib", "dSYM", + "class", "pyc", "pyo", "__pycache__", + "jar", "war", "ear", "apk", "ipa", "dex", "odex", + "wasm", "node", "beam", "elc", + + // System and disk images + "iso", "img", "dmg", "vhd", "vdi", "vmdk", "qcow2", + + // Database files + "db", "sqlite", "sqlite3", "db3", "mdb", "accdb", "sdf", "ldb", + + // Log files + "log", "trace", "dump", "crash", + + // Media files - Images + "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "ico", "svg", "heic", "heif", "raw", "cr2", "nef", "dng", + + // Media files - Audio + "mp3", "wav", "flac", "aac", "ogg", "wma", "m4a", "opus", "aiff", + + // Media files - Video + "mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "m4v", "3gp", "ogv", + + // Font files + "ttf", "otf", "woff", "woff2", "eot", "fon", "pfb", "pfm", + + // Document formats (typically not source code) + "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf", + + // IDE/Editor/Tooling artifacts + "suo", "sln", "user", "ncb", "pdb", "ipch", "ilk", "tlog", "idb", "aps", "res", + "iml", "idea", "vscode", "project", "classpath", "factorypath", "prefs", + "vcxproj", "vcproj", "filters", "xcworkspace", "xcuserstate", "xcscheme", "pbxproj", + "DS_Store", "Thumbs.db", "desktop.ini", + + // Package manager and build artifacts + "lock", "sum", "resolved", // package-lock.json, go.sum, yarn.lock, etc. + + // Temporary and backup files + "tmp", "temp", "swp", "swo", "bak", "backup", "orig", "rej", "patch", + "~", "old", "new", "part", "incomplete", + + // Source maps and minified files (usually generated) + "map", "min.js", "min.css", "bundle.js", "bundle.css", "chunk.js", + + // Configuration that's typically binary or generated + "dat", "data", "cache", "pid", "sock", + + // Version control artifacts (though usually in ignored directories) + "pack", "idx", "rev", + + // Other binary formats + "pickle", "pkl", "npy", "npz", "mat", "rdata", "rds", +} + +``` + +### \* \* Default `SUMMARIZE_INCLUDE_EXT` + +```json +go,ts,tf,sh,py,js,Makefile,mod,Dockerfile,dockerignore,gitignore,esconfigs,md +``` + +```go +// defaultInclude are the -inc list of extensions that will be included in the summary +defaultInclude = []string{ + "go", "ts", "tf", "sh", "py", "js", "Makefile", "mod", "Dockerfile", "dockerignore", "gitignore", "esconfigs", "md", +} +``` + +### \* \* \* Default `SUMMARIZE_EXCLUDE_EXT` + +```json +.min.js,.min.css,.git/,.svn/,.vscode/,.vs/,.idea/,logs/,secrets/,.venv/,/site-packages,.terraform/,summaries/,node_modules/,/tmp,tmp/,logs/ +``` + +```go +// defaultAvoid are the -avoid list of substrings in file path names to avoid in the summary +defaultAvoid = []string{ + ".min.js", ".min.css", ".git/", ".svn/", ".vscode/", ".vs/", ".idea/", "logs/", "secrets/", + ".venv/", "/site-packages", ".terraform/", "summaries/", "node_modules/", "/tmp", "tmp/", "logs/", +} +``` ## Contribution diff --git a/VERSION b/VERSION index 60453e6..b482243 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v1.0.0 \ No newline at end of file +v1.0.2 \ No newline at end of file diff --git a/go.mod b/go.mod index 53fe9d4..4eb1111 100644 --- a/go.mod +++ b/go.mod @@ -1,16 +1,17 @@ module github.com/andreimerlescu/summarize -go 1.23.7 +go 1.24.5 require ( - github.com/andreimerlescu/checkfs v1.0.2 - github.com/andreimerlescu/figtree/v2 v2.0.3 + github.com/andreimerlescu/checkfs v1.0.4 + github.com/andreimerlescu/figtree/v2 v2.0.14 github.com/andreimerlescu/sema v1.0.0 ) require ( + github.com/andreimerlescu/bump v1.0.3 // indirect github.com/go-ini/ini v1.67.0 // indirect - golang.org/x/sys v0.31.0 // indirect - golang.org/x/term v0.30.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/term v0.32.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 6ac5645..b51abf5 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,11 @@ -github.com/andreimerlescu/checkfs v1.0.2 h1:U7maY2jYqzb+ranBSWiZamBDapAWvBCeokOktn4gong= -github.com/andreimerlescu/checkfs v1.0.2/go.mod h1:ADaqjiRJf3gmyENLS3v9bJIaEH00IOeM48cXxVwy1JY= -github.com/andreimerlescu/figtree/v2 v2.0.3 h1:BfBGZ7729shM9jvl2nHnumQJjpP51C3MEGe6TXJQu0c= -github.com/andreimerlescu/figtree/v2 v2.0.3/go.mod h1:cIwo9LqOWCjnB3354D34U7KH9D30PdVUqkdq4BncCzY= +github.com/andreimerlescu/bump v1.0.3 h1:RAmNPjS8lGhgiBhiTMEaRl1ydex7Z3YYuyiQohC+ShY= +github.com/andreimerlescu/bump v1.0.3/go.mod h1:ud9Sqvt+zM0sBDhK3Dghq2hGTWrlVIvMqLAzpWQjIy0= +github.com/andreimerlescu/checkfs v1.0.4 h1:pRXZGW1sfe+yXyWNUxmPC2IiX5yT3vF1V5O8PXulnFc= +github.com/andreimerlescu/checkfs v1.0.4/go.mod h1:ADaqjiRJf3gmyENLS3v9bJIaEH00IOeM48cXxVwy1JY= +github.com/andreimerlescu/figtree/v2 v2.0.10 h1:UWKBVpwa4lI+mp3VxUy7MzkzaigROZd4zOGJrarNpv0= +github.com/andreimerlescu/figtree/v2 v2.0.10/go.mod h1:PymPGUzzP/UuxZ4mqC5JIrDZJIVcjZ3GMc/MC2GB6Ek= +github.com/andreimerlescu/figtree/v2 v2.0.14 h1:pwDbHpfiAdSnaNnxyV2GpG1rG9cmGiHhjXOvBEoVj2w= +github.com/andreimerlescu/figtree/v2 v2.0.14/go.mod h1:PymPGUzzP/UuxZ4mqC5JIrDZJIVcjZ3GMc/MC2GB6Ek= github.com/andreimerlescu/sema v1.0.0 h1:8ai/kqAci7QKUenAJWX13aYtWpjvD0CQW39CFzNIRQs= github.com/andreimerlescu/sema v1.0.0/go.mod h1:VCRQkKVknOKKPtAqvrNHL7hxxfoX5O7it2lWBzVxUs0= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -12,10 +16,10 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= -golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= +golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/main.go b/main.go index d37ae0b..c1f87ec 100644 --- a/main.go +++ b/main.go @@ -2,16 +2,20 @@ package main import ( "bytes" + "compress/gzip" "embed" + "encoding/json" "fmt" "io" "io/fs" "os" "path/filepath" "runtime" - "sort" + "slices" + "strconv" "strings" "sync" + "sync/atomic" "time" check "github.com/andreimerlescu/checkfs" @@ -38,41 +42,115 @@ func Version() string { } const ( - projectName string = "github.com/andreimerlescu/summarize" - tFormat string = "2006.01.02.15.04.05.UTC" - eConfigFile string = "SUMMARIZE_CONFIG_FILE" - kSourceDir string = "d" - kOutputDir string = "o" - kIncludeExt string = "i" - kExcludeExt string = "x" - kSkipContains string = "s" - kFilename string = "f" - kVersion string = "v" - kDotFiles string = "ndf" - kMaxFiles string = "mf" + projectName string = "github.com/andreimerlescu/summarize" + tFormat string = "2006.01.02.15.04.05.UTC" + + eConfigFile string = "SUMMARIZE_CONFIG_FILE" + eAddIgnoreInPathList string = "SUMMARIZE_IGNORE_CONTAINS" + eAddIncludeExtList string = "SUMMARIZE_INCLUDE_EXT" + eAddExcludeExtList string = "SUMMARIZE_EXCLUDE_EXT" + eAlwaysWrite string = "SUMMARIZE_ALWAYS_WRITE" + eAlwaysPrint string = "SUMMARIZE_ALWAYS_PRINT" + eAlwaysJson string = "SUMMARIZE_ALWAYS_JSON" + eAlwaysCompress string = "SUMMARIZE_ALWAYS_COMPRESS" + + kSourceDir string = "d" + kOutputDir string = "o" + kIncludeExt string = "i" + kExcludeExt string = "x" + kSkipContains string = "s" + kFilename string = "f" + kPrint string = "print" + kMaxOutputSize string = "max" + kWrite string = "write" + kVersion string = "v" + kDotFiles string = "ndf" + kMaxFiles string = "mf" + kDebug string = "debug" + kJson string = "json" + kCompress string = "gz" ) var ( // figs is a figtree of fruit for configurable command line arguments that bear fruit - figs figtree.Fruit + figs figtree.Plant + + alwaysWrite = true // defaultExclude are the -exc list of extensions that will be skipped automatically defaultExclude = []string{ - "7z", "gz", "xz", "zstd", "bz", "bzip2", "zip", "part", // compressed files - "crt", "key", "asc", "id_rsa", "id_dsa", "id_ed25519", // encryption files - "log", "dll", "so", "bin", "exe", // executable binaries - "jpg", "png", "mov", "mp3", "mp4", "heic", "avi", // media - "ttf", "woff", "woff2", "otf", // fonts + // Compressed archives + "7z", "gz", "xz", "zst", "zstd", "bz", "bz2", "bzip2", "zip", "tar", "rar", "lz4", "lzma", "cab", "arj", + + // Encryption, certificates, and sensitive keys + "crt", "cert", "cer", "key", "pub", "asc", "pem", "p12", "pfx", "jks", "keystore", + "id_rsa", "id_dsa", "id_ed25519", "id_ecdsa", "gpg", "pgp", + + // Binary & executable artifacts + "exe", "dll", "so", "dylib", "bin", "out", "o", "obj", "a", "lib", "dSYM", + "class", "pyc", "pyo", "__pycache__", + "jar", "war", "ear", "apk", "ipa", "dex", "odex", + "wasm", "node", "beam", "elc", + + // System and disk images + "iso", "img", "dmg", "vhd", "vdi", "vmdk", "qcow2", + + // Database files + "db", "sqlite", "sqlite3", "db3", "mdb", "accdb", "sdf", "ldb", + + // Log files + "log", "trace", "dump", "crash", + + // Media files - Images + "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tif", "webp", "ico", "svg", "heic", "heif", "raw", "cr2", "nef", "dng", + + // Media files - Audio + "mp3", "wav", "flac", "aac", "ogg", "wma", "m4a", "opus", "aiff", + + // Media files - Video + "mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "m4v", "3gp", "ogv", + + // Font files + "ttf", "otf", "woff", "woff2", "eot", "fon", "pfb", "pfm", + + // Document formats (typically not source code) + "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf", + + // IDE/Editor/Tooling artifacts + "suo", "sln", "user", "ncb", "pdb", "ipch", "ilk", "tlog", "idb", "aps", "res", + "iml", "idea", "vscode", "project", "classpath", "factorypath", "prefs", + "vcxproj", "vcproj", "filters", "xcworkspace", "xcuserstate", "xcscheme", "pbxproj", + "DS_Store", "Thumbs.db", "desktop.ini", + + // Package manager and build artifacts + "lock", "sum", "resolved", // package-lock.json, go.sum, yarn.lock, etc. + + // Temporary and backup files + "tmp", "temp", "swp", "swo", "bak", "backup", "orig", "rej", "patch", + "~", "old", "new", "part", "incomplete", + + // Source maps and minified files (usually generated) + "map", "min.js", "min.css", "bundle.js", "bundle.css", "chunk.js", + + // Configuration that's typically binary or generated + "dat", "data", "cache", "pid", "sock", + + // Version control artifacts (though usually in ignored directories) + "pack", "idx", "rev", + + // Other binary formats + "pickle", "pkl", "npy", "npz", "mat", "rdata", "rds", } // defaultInclude are the -inc list of extensions that will be included in the summary defaultInclude = []string{ - "go", "ts", "tf", "sh", "py", "js", + "go", "ts", "tf", "sh", "py", "js", "Makefile", "mod", "Dockerfile", "dockerignore", "gitignore", "esconfigs", "md", } // defaultAvoid are the -avoid list of substrings in file path names to avoid in the summary defaultAvoid = []string{ ".min.js", ".min.css", ".git/", ".svn/", ".vscode/", ".vs/", ".idea/", "logs/", "secrets/", + ".venv/", "/site-packages", ".terraform/", "summaries/", "node_modules/", "/tmp", "tmp/", "logs/", } ) @@ -82,65 +160,105 @@ var newSummaryFilename = func() string { } // init creates a new figtree with options to use CONFIG_FILE as a way of reading a YAML file while ignoring the env -func init() { +func configure() { figs = figtree.With(figtree.Options{ Harvest: 9, IgnoreEnvironment: true, - ConfigFile: os.Getenv(eConfigFile), + ConfigFile: envVal(eConfigFile, "./config.yaml"), }) // properties - figs.NewString(kSourceDir, ".", "Absolute path of directory you want to summarize.") - figs.NewString(kOutputDir, filepath.Join(".", "summaries"), fmt.Sprintf("Path of the directory to write the %s file to", newSummaryFilename())) - figs.NewString(kFilename, newSummaryFilename(), "Output file of summary.md") - figs.NewList(kIncludeExt, defaultInclude, "List of extensions to include in summary.") - figs.NewList(kExcludeExt, defaultExclude, "List of extensions to include in summary.") - figs.NewList(kSkipContains, defaultAvoid, "List of extensions to avoid.") - figs.NewInt(kMaxFiles, 20, "Maximum number of files to process concurrently") - figs.NewBool(kDotFiles, false, "Include dot files by setting this true") - figs.NewBool(kVersion, false, "Display current version of summarize") + figs = figs.NewString(kSourceDir, ".", "Absolute path of directory you want to summarize.") + figs = figs.NewString(kOutputDir, filepath.Join(".", "summaries"), fmt.Sprintf("Path of the directory to write the %s file to", newSummaryFilename())) + figs = figs.NewString(kFilename, newSummaryFilename(), "Output file of summary.md") + figs = figs.NewList(kIncludeExt, defaultInclude, "List of extensions to INCLUDE in summary.") + figs = figs.NewList(kExcludeExt, defaultExclude, "List of extensions to EXCLUDE in summary.") + figs = figs.NewList(kSkipContains, defaultAvoid, "List of path substrings if present to skip over full path.") + figs = figs.NewInt(kMaxFiles, 369, "Maximum number of files to process concurrently") + figs = figs.NewInt64(kMaxOutputSize, 1_776_369, "Maximum file size of output file") + figs = figs.NewBool(kDotFiles, false, "Any path that is considered a dotfile can be included by setting this to true") + figs = figs.NewBool(kPrint, envIs(eAlwaysPrint), "Print generated file contents to STDOUT") + figs = figs.NewBool(kWrite, envIs(eAlwaysWrite), "Write generated contents to file") + figs = figs.NewBool(kJson, envIs(eAlwaysJson), "Enable JSON formatting") + figs = figs.NewBool(kCompress, envIs(eAlwaysCompress), "Use gzip compression in output") + figs = figs.NewBool(kVersion, false, "Display current version of summarize") + figs = figs.NewBool(kDebug, false, "Enable debug mode") // validators - figs.WithValidator(kSourceDir, figtree.AssureStringNotEmpty) - figs.WithValidator(kOutputDir, figtree.AssureStringNotEmpty) - figs.WithValidator(kFilename, figtree.AssureStringNotEmpty) - figs.WithValidator(kMaxFiles, figtree.AssureIntInRange(1, 63339)) + figs = figs.WithValidator(kSourceDir, figtree.AssureStringNotEmpty) + figs = figs.WithValidator(kOutputDir, figtree.AssureStringNotEmpty) + figs = figs.WithValidator(kFilename, figtree.AssureStringNotEmpty) + figs = figs.WithValidator(kMaxFiles, figtree.AssureIntInRange(1, 17_369)) + figs = figs.WithValidator(kMaxOutputSize, figtree.AssureInt64InRange(369, 369_369_369_369)) // callbacks - figs.WithCallback(kSourceDir, figtree.CallbackAfterVerify, callbackVerifyReadableDirectory) - figs.WithCallback(kFilename, figtree.CallbackAfterVerify, callbackVerifyFile) - figs.WithCallback(kOutputDir, figtree.CallbackAfterVerify, func(value interface{}) error { - return check.Directory(toString(value), directory.Options{ - WillCreate: true, - Create: directory.Create{ - Kind: directory.IfNotExists, - Path: toString(value), - FileMode: 0755, - }, - }) - }) - capture(figs.Load()) + figs = figs.WithCallback(kSourceDir, figtree.CallbackAfterVerify, callbackVerifyReadableDirectory) + figs = figs.WithCallback(kFilename, figtree.CallbackAfterVerify, callbackVerifyFile) +} + +type result struct { + Path string `yaml:"path" json:"path"` + Contents []byte `yaml:"contents" json:"contents"` + Size int64 `yaml:"size" json:"size"` +} + +type final struct { + Path string `yaml:"path" json:"path"` + Contents string `yaml:"contents" json:"contents"` + Size int64 `yaml:"size" json:"size"` } func main() { + configure() + capture("figs loading environment", figs.Load()) + isDebug := *figs.Bool(kDebug) if *figs.Bool(kVersion) { fmt.Println(Version()) os.Exit(0) } var ( - data map[string][]string // data is map[ext][]path of found files to summarize - dataMutex = sync.RWMutex{} // adding concurrency + lIncludeExt = *figs.List(kIncludeExt) + lExcludeExt = *figs.List(kExcludeExt) + lSkipContains = *figs.List(kSkipContains) + + sourceDir = *figs.String(kSourceDir) + outputDir = *figs.String(kOutputDir) + ) + + capture("checking output directory", check.Directory(outputDir, directory.Options{ + WillCreate: true, + Create: directory.Create{ + Kind: directory.IfNotExists, + Path: outputDir, + FileMode: 0755, + }, + })) + + addFromEnv(eAddIgnoreInPathList, &lSkipContains) + addFromEnv(eAddIncludeExtList, &lIncludeExt) + addFromEnv(eAddExcludeExtList, &lExcludeExt) + + var ( wg = sync.WaitGroup{} throttler = sema.New(runtime.GOMAXPROCS(0)) ) // initialize the data map with all -inc extensions var errs []error - data = make(map[string][]string) - for _, inc := range *figs.List(kIncludeExt) { - data[inc] = []string{} + + type mapData struct { + Ext string + Paths []string + } + + data := &sync.Map{} + for _, inc := range lIncludeExt { + data.Store(inc, mapData{ + Ext: inc, + Paths: []string{}, + }) } // populate data with the kSourceDir files based on -inc -exc -avoid lists - capture(filepath.Walk(*figs.String(kSourceDir), func(path string, info fs.FileInfo, err error) error { + capture("walking source directory", filepath.Walk(sourceDir, func(path string, info fs.FileInfo, err error) error { if err != nil { return err // return the error received } @@ -156,10 +274,35 @@ func main() { } // check the -avoid list - for _, avoidThis := range *figs.List(kSkipContains) { - if strings.Contains(filename, avoidThis) { + for _, avoidThis := range lSkipContains { + a := strings.Contains(filename, avoidThis) || strings.Contains(path, avoidThis) + b := strings.HasPrefix(filename, avoidThis) || strings.HasPrefix(path, avoidThis) + c := strings.HasSuffix(filename, avoidThis) || strings.HasSuffix(path, avoidThis) + if a || b || c { + if isDebug { + fmt.Printf("ignoring %s in %s\n", filename, path) + } return nil // skip without error } + + parts, err := filepath.Glob(path) + if err != nil { + errs = append(errs, err) + continue + } + for i := 0; i < len(parts); i++ { + part := parts[i] + if part == "/" { + continue + } + if strings.Contains(part, avoidThis) || strings.HasPrefix(part, avoidThis) || strings.HasSuffix(part, avoidThis) { + if isDebug { + fmt.Printf("skipping file %q\n", part) + } + return nil + } + } + } // get the extension @@ -167,16 +310,40 @@ func main() { ext = strings.ToLower(ext) ext = strings.TrimPrefix(ext, ".") + if isDebug { + fmt.Printf("ext: %s\n", ext) + } + // check the -exc list - for _, excludeThis := range *figs.List(kExcludeExt) { + for _, excludeThis := range lExcludeExt { if strings.EqualFold(excludeThis, ext) { + if isDebug { + fmt.Printf("ignoring %s\n", path) + } return nil // skip without error } } + var toUpdate []mapData // populate the -inc list in data - if _, exists := data[ext]; exists { - data[ext] = append(data[ext], path) + data.Range(func(e any, p any) bool { + key, ok := e.(string) + if !ok { + return true // continue + } + value, ok := p.(mapData) + if !ok { + return true + } + if strings.EqualFold(key, ext) { + value.Ext = key + } + value.Paths = append(value.Paths, path) + toUpdate = append(toUpdate, value) + return true + }) + for _, value := range toUpdate { + data.Store(value.Ext, value) } } @@ -184,47 +351,183 @@ func main() { return nil })) + if isDebug { + fmt.Println("data received: ") + data.Range(func(e any, p any) bool { + ext, ok := e.(string) + if !ok { + return true // continue + } + thisData, ok := p.(mapData) + if !ok { + return true // continue + } + fmt.Printf("%s: %s\n", ext, strings.Join(thisData.Paths, ", ")) + return true // continue + }) + } + maxFileSemaphore := sema.New(*figs.Int(kMaxFiles)) - writeChan := make(chan []byte, 10240) + resultsChan := make(chan result, *figs.Int(kMaxFiles)) writerWG := sync.WaitGroup{} writerWG.Add(1) go func() { defer writerWG.Done() // Create output file + srcDir := *figs.String(kSourceDir) outputFileName := filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename)) var buf bytes.Buffer - buf.WriteString(fmt.Sprintf("# Project Summary - %s\nGenerated by %s %s\n\n## Workspace\n\n
%s
\n\n\n", - filepath.Base(*figs.String(kFilename)), projectName, Version(), *figs.String(kSourceDir))) + buf.WriteString("# Project Summary - " + filepath.Base(*figs.String(kFilename)) + "\n") + buf.WriteString("Generated by " + projectName + " " + Version() + "\n\n") + buf.WriteString("AI Instructions are the user requests that you analyze their project workspace ") + buf.WriteString("as provided here by filename followed by the contents. You are to answer their ") + buf.WriteString("question using the source code provided as the basis of your responses. You are to ") + buf.WriteString("completely modify each individual file as per-the request and provide the completely ") + buf.WriteString("updated form of the file. Do not abbreviate the file, and if the file is excessive in ") + buf.WriteString("length, then print the entire contents in your response with your updates to the ") + buf.WriteString("specific components while retaining all existing functionality and maintaining comments ") + buf.WriteString("within the code. \n\n") + buf.WriteString("### Workspace\n\n") + abs, err := filepath.Abs(srcDir) + if err == nil { + buf.WriteString("" + abs + "\n\n") + } else { + buf.WriteString("" + srcDir + "\n\n") + } - for data := range writeChan { - buf.Write(data) + renderMu := &sync.Mutex{} + renderedPaths := make(map[string]int64) + totalSize := int64(buf.Len()) + for in := range resultsChan { + if _, exists := renderedPaths[in.Path]; exists { + continue + } + runningSize := atomic.AddInt64(&totalSize, in.Size) + if runningSize >= *figs.Int64(kMaxOutputSize) { + continue + } + renderMu.Lock() + renderedPaths[in.Path] = in.Size + buf.Write(in.Contents) + renderMu.Unlock() + } + + shouldPrint := *figs.Bool(kPrint) + canWrite := *figs.Bool(kWrite) + showJson := *figs.Bool(kJson) + wrote := false + + if *figs.Bool(kCompress) { + compressed, err := compress(bytes.Clone(buf.Bytes())) + capture("compressing bytes buffer", err) + buf.Reset() + buf.Write(compressed) + outputFileName += ".gz" + } + + if !shouldPrint && !canWrite { + capture("saving output file during write", os.WriteFile(outputFileName, buf.Bytes(), 0644)) + wrote = true + } + + if canWrite && !wrote { + capture("saving output file during write", os.WriteFile(outputFileName, buf.Bytes(), 0644)) + wrote = true + } + + if shouldPrint { + if showJson { + r := final{ + Path: outputFileName, + Size: int64(buf.Len()), + Contents: buf.String(), + } + jb, err := json.MarshalIndent(r, "", " ") + if err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + } + fmt.Println(string(jb)) + } else { + fmt.Println(buf.String()) + } + os.Exit(0) } - capture(os.WriteFile(outputFileName, buf.Bytes(), 0644)) }() - for ext, paths := range data { // range over data to get ext and paths - throttler.Acquire() // throttler is used to protect the runtime from excessive use - wg.Add(1) // wg is used to prevent the runtime from exiting early - go func(ext string, paths []string) { // run this extension in a goroutine + var toUpdate []mapData + + seen := seenStrings{m: make(map[string]bool)} + + data.Range(func(e any, p any) bool { + ext, ok := e.(string) + if !ok { + return true // continue + } + thisData, ok := p.(mapData) + if !ok { + return true // continue + } + paths := slices.Clone(thisData.Paths) + + throttler.Acquire() // throttler is used to protect the runtime from excessive use + wg.Add(1) // wg is used to prevent the runtime from exiting early + go func(innerData *mapData, toUpdate *[]mapData, ext string, paths []string) { // run this extension in a goroutine defer throttler.Release() // when we're done, release the throttler defer wg.Done() // then tell the sync.WaitGroup that we are done - sort.Strings(paths) // sort the paths we receive - dataMutex.Lock() // lock the data map - data[ext] = paths // write the sorted paths - dataMutex.Unlock() // unlock the map + + paths = simplify(paths) + + innerData.Paths = paths + *toUpdate = append(*toUpdate, *innerData) // process each file in the ext list (one ext per throttle slot in the semaphore) for _, filePath := range paths { + if seen.Exists(filePath) { + continue + } maxFileSemaphore.Acquire() wg.Add(1) go func(ext, filePath string) { - defer maxFileSemaphore.Release() // maxFileSemaphore prevents excessive files from being opened - defer wg.Done() // keep the main thread running while this file is being processed - var sb bytes.Buffer // capture what we write to file in a bytes buffer - sb.WriteString(fmt.Sprintf("## %s\n\n```%s\n", filePath, ext)) // write the header of the summary for the file - content, err := os.ReadFile(filePath) // open the file and get its contents + defer maxFileSemaphore.Release() // maxFileSemaphore prevents excessive files from being opened + defer wg.Done() // keep the main thread running while this file is being processed + if strings.HasSuffix(filePath, ".DS_Store") || + strings.HasSuffix(filePath, ".exe") || + strings.HasSuffix(filePath, "-amd64") || + strings.HasSuffix(filePath, "-arm64") || + strings.HasSuffix(filePath, "aarch64") { + return + } + type tFileInfo struct { + Name string `json:"name"` + Size int64 `json:"size"` + Mode os.FileMode `json:"mode"` + } + info, err := os.Stat(filePath) + if err != nil { + errs = append(errs, err) + return + } + fileInfo := &tFileInfo{ + Name: filepath.Base(filePath), + Size: info.Size(), + Mode: info.Mode(), + } + infoJson, err := json.MarshalIndent(fileInfo, "", " ") + if err != nil { + errs = append(errs, err) + return + } + var sb bytes.Buffer // capture what we write to file in a bytes buffer + sb.WriteString("## " + filepath.Base(filePath) + "\n\n") + sb.WriteString("The `os.Stat` for the " + filePath + " is: \n\n") + sb.WriteString("```json\n") + sb.WriteString(string(infoJson) + "\n") + sb.WriteString("```\n\n") + sb.WriteString("Source Code:\n\n") + sb.WriteString("```" + ext + "\n") + content, err := os.ReadFile(filePath) // open the file and get its contents if err != nil { errs = append(errs, fmt.Errorf("Error reading file %s: %v\n", filePath, err)) return @@ -233,26 +536,52 @@ func main() { errs = append(errs, fmt.Errorf("Error writing file %s: %v\n", filePath, err)) return } - content = []byte{} // clear memory after its written - sb.WriteString("\n```\n") // close out the file footer - writeChan <- sb.Bytes() + content = []byte{} // clear memory after its written + sb.WriteString("\n```\n\n") // close out the file footer + seen.Add(filePath) + resultsChan <- result{ + Path: filePath, + Contents: sb.Bytes(), + Size: int64(sb.Len()), + } }(ext, filePath) } - }(ext, paths) + }(&thisData, &toUpdate, ext, paths) + return true + }) + + wg.Wait() // wait for all files to finish processing + + for _, innerData := range toUpdate { + data.Store(innerData.Ext, innerData) } - wg.Wait() // wait for all files to finish processing - close(writeChan) // Signal the writer goroutine to finish - writerWG.Wait() // Wait for the writer to flush and close the file + close(resultsChan) // Signal the writer goroutine to finish + writerWG.Wait() // Wait for the writer to flush and close the file if len(errs) > 0 { terminate(os.Stderr, "Error writing to output file: %v\n", errs) } // Print completion message - fmt.Printf("Summary generated: %s\n", - filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename))) + if *figs.Bool(kJson) { + r := m{ + Message: fmt.Sprintf("Summary generated: %s\n", + filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename)), + ), + } + jb, err := json.MarshalIndent(r, "", " ") + if err != nil { + terminate(os.Stderr, "Error marshalling results: %v\n", err) + } else { + fmt.Println(string(jb)) + } + } else { + fmt.Printf("Summary generated: %s\n", + filepath.Join(*figs.String(kOutputDir), *figs.String(kFilename)), + ) + } } var callbackVerifyFile = func(value interface{}) error { @@ -270,18 +599,160 @@ var toString = func(value interface{}) string { case *string: return *v default: - return "" + flesh := figtree.NewFlesh(value) + f := fmt.Sprintf("%v", flesh.ToString()) + return f } } -var capture = func(d ...error) { +var capture = func(msg string, d ...error) { if len(d) == 0 || (len(d) == 1 && d[0] == nil) { return } - terminate(os.Stderr, "captured error: %v\n", d) + terminate(os.Stderr, "[EXCUSE ME, BUT] %s\n\ncaptured error: %v\n", msg, d) +} + +type m struct { + Message string `json:"message"` } var terminate = func(d io.Writer, i string, e ...interface{}) { + for _, f := range os.Args { + if strings.HasPrefix(f, "-json") { + mm := m{Message: fmt.Sprintf(i, e...)} + jb, err := json.MarshalIndent(mm, "", " ") + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "Error serializing json: %v\n", err) + _, _ = fmt.Fprintf(d, i, e...) + } else { + fmt.Println(string(jb)) + } + os.Exit(1) + } + } _, _ = fmt.Fprintf(d, i, e...) os.Exit(1) } + +func simplify(t []string) []string { + seen := make(map[string]bool) + for _, v := range t { + seen[v] = true + } + results := make([]string, len(t)) + for i, v := range t { + if seen[v] { + results[i] = v + } + } + return results +} +func addFromEnv(e string, l *[]string) { + v, ok := os.LookupEnv(e) + if ok { + flesh := figtree.NewFlesh(v) + maybeAdd := flesh.ToList() + for _, entry := range maybeAdd { + *l = append(*l, entry) + } + } + *l = simplify(*l) +} + +type seenStrings struct { + mu sync.RWMutex + m map[string]bool +} + +func (s *seenStrings) Add(entry string) { + s.mu.Lock() + defer s.mu.Unlock() + s.m[entry] = true +} +func (s *seenStrings) Remove(entry string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.m, entry) +} + +func (s *seenStrings) Len() int { + s.mu.RLock() + defer s.mu.RUnlock() + return len(s.m) +} + +func (s *seenStrings) String() string { + s.mu.RLock() + defer s.mu.RUnlock() + return fmt.Sprint(s.m) +} + +func (s *seenStrings) True(entry string) { + s.mu.Lock() + defer s.mu.Unlock() + s.m[entry] = true +} + +func (s *seenStrings) False(entry string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.m, entry) +} + +func (s *seenStrings) Exists(entry string) bool { + s.mu.RLock() + defer s.mu.RUnlock() + return s.m[entry] +} + +func envVal(name, fallback string) string { + v, ok := os.LookupEnv(name) + if !ok { + return fallback + } + return v +} + +func envIs(name string) bool { + v, ok := os.LookupEnv(name) + if !ok { + return false + } + vb, err := strconv.ParseBool(v) + if err != nil { + return false + } + return vb +} + +// compress compresses a string using gzip and returns the compressed bytes +func compress(s []byte) ([]byte, error) { + var buf bytes.Buffer + gzWriter := gzip.NewWriter(&buf) + _, err := gzWriter.Write(s) + if err != nil { + return nil, fmt.Errorf("failed to write to gzip writer: %w", err) + } + err = gzWriter.Close() + if err != nil { + return nil, fmt.Errorf("failed to close gzip writer: %w", err) + } + return buf.Bytes(), nil +} + +// decompress decompresses gzip compressed bytes back to a string +func decompress(compressed []byte) (string, error) { + buf := bytes.NewReader(compressed) + gzReader, err := gzip.NewReader(buf) + if err != nil { + return "", fmt.Errorf("failed to create gzip reader: %w", err) + } + defer func() { + _ = gzReader.Close() + }() + decompressed, err := io.ReadAll(gzReader) + if err != nil { + return "", fmt.Errorf("failed to read from gzip reader: %w", err) + } + return string(decompressed), nil +}