diff --git a/README.md b/README.md index f288531..17f0cb0 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,11 @@ This repo provides utilities for managing copyright headers and license files across many repos at scale. -You can use it to add or validate copyright headers on source code files, add a -LICENSE file to a repo, report on what licenses repos are using, and more. +Features: +- Add or validate copyright headers on source code files +- Add and/or manage LICENSE files with git-aware copyright year detection +- Report on licenses used across multiple repositories +- Automate compliance checks in CI/CD pipelines ## Getting Started @@ -33,7 +36,7 @@ Usage: copywrite [command] Common Commands: - headers Adds missing copyright headers to all source code files + headers Adds missing copyright headers and updates existing headers' year information. init Generates a .copywrite.hcl config for a new project license Validates that a LICENSE file is present and remediates any issues if found @@ -62,8 +65,18 @@ scan all files in your repo and copyright headers to any that are missing: copywrite headers --spdx "MPL-2.0" ``` -You may omit the `--spdx` flag if you add a `.copywrite.hcl` config, as outlined -[here](#config-structure). +The `copywrite license` command validates and manages LICENSE files with git-aware copyright years: + +```sh +copywrite license --spdx "MPL-2.0" +``` + +**Copyright Year Behavior:** +- **Start Year**: Auto-detected from config file and if not found defaults to repository's first commit +- **End Year**: Set to current year when an update is triggered (git history only determines if update is needed) +- **Update Trigger**: Git detects if file was modified since the copyright end year + +You may omit the `--spdx` flag if you add a `.copywrite.hcl` config, as outlined [here](#config-structure). ### `--plan` Flag @@ -72,6 +85,23 @@ performs a dry-run and will outline what changes would be made. This flag also returns a non-zero exit code if any changes are needed. As such, it can be used to validate if a repo is in compliance or not. +## Technical Details + +### Copyright Year Logic + +**Source File Headers:** +- End year: Set to current year when file is modified +- Git history determines if update is needed (compares file's last commit year to copyright end year) +- When triggered, end year updates to current year + +**LICENSE Files:** +- End year: Set to current year when any project file is modified +- Git history determines if update is needed (compares repo's last commit year to copyright end year) +- When triggered, end year updates to current year +- Preserves historical accuracy for archived projects (no forced updates) + +**Key Distinction:** Git history is used as a trigger to determine *whether* an update is needed, but the actual end year value is always set to the current year when an update occurs. + ## Config Structure > :bulb: You can automatically generate a new `.copywrite.hcl` config with the @@ -99,8 +129,8 @@ project { # (OPTIONAL) Represents the year that the project initially began # This is used as the starting year in copyright statements - # If set and different from current year, headers will show: "copyright_year, current_year" - # If set and same as current year, headers will show: "current_year" + # If set and different from current year, headers will show: "copyright_year, year-2" + # If set and same as year-2, headers will show: "copyright_year" # If not set (0), the tool will auto-detect from git history (first commit year) # If auto-detection fails, it will fallback to current year only # Default: 0 (auto-detect) diff --git a/addlicense/main.go b/addlicense/main.go index 6f311db..8c6f603 100644 --- a/addlicense/main.go +++ b/addlicense/main.go @@ -280,7 +280,7 @@ func walk(ch chan<- *file, start string, logger *log.Logger) error { if fi.IsDir() { return nil } - if fileMatches(path, ignorePatterns) { + if FileMatches(path, ignorePatterns) { // The [DEBUG] level is inferred by go-hclog as a debug statement logger.Printf("[DEBUG] skipping: %s", path) return nil @@ -290,9 +290,9 @@ func walk(ch chan<- *file, start string, logger *log.Logger) error { }) } -// fileMatches determines if path matches one of the provided file patterns. +// FileMatches determines if path matches one of the provided file patterns. // Patterns are assumed to be valid. -func fileMatches(path string, patterns []string) bool { +func FileMatches(path string, patterns []string) bool { for _, p := range patterns { if runtime.GOOS == "windows" { diff --git a/addlicense/main_test.go b/addlicense/main_test.go index 9c00803..ecd6560 100644 --- a/addlicense/main_test.go +++ b/addlicense/main_test.go @@ -471,7 +471,7 @@ func TestFileMatches(t *testing.T) { for _, tt := range tests { patterns := []string{tt.pattern} - if got := fileMatches(tt.path, patterns); got != tt.wantMatch { + if got := FileMatches(tt.path, patterns); got != tt.wantMatch { t.Errorf("fileMatches(%q, %q) returned %v, want %v", tt.path, patterns, got, tt.wantMatch) } } diff --git a/cmd/headers.go b/cmd/headers.go index 6902699..f776cae 100644 --- a/cmd/headers.go +++ b/cmd/headers.go @@ -6,8 +6,11 @@ package cmd import ( "fmt" "os" + "path/filepath" + "strings" "github.com/hashicorp/copywrite/addlicense" + "github.com/hashicorp/copywrite/licensecheck" "github.com/hashicorp/go-hclog" "github.com/jedib0t/go-pretty/v6/text" "github.com/samber/lo" @@ -87,10 +90,23 @@ config, see the "copywrite init" command.`, ".github/workflows/**", ".github/dependabot.yml", "**/node_modules/**", + ".copywrite.hcl", } ignoredPatterns := lo.Union(conf.Project.HeaderIgnore, autoSkippedPatterns) - // Construct the configuration addLicense needs to properly format headers + // STEP 1: Update existing copyright headers + gha.StartGroup("Updating existing copyright headers:") + updatedCount, anyFileUpdated, licensePath := updateExistingHeaders(cmd, ignoredPatterns, plan) + gha.EndGroup() + if updatedCount > 0 { + if plan { + cmd.Printf("\n%s\n\n", text.FgYellow.Sprintf("[DRY RUN] Would update %d file(s) with new copyright years", updatedCount)) + } else { + cmd.Printf("\n%s\n\n", text.FgGreen.Sprintf("Successfully updated %d file(s) with new copyright years", updatedCount)) + } + } + + // STEP 2: Construct the configuration addLicense needs to properly format headers licenseData := addlicense.LicenseData{ Year: conf.FormatCopyrightYears(), // Format year(s) for copyright statements Holder: conf.Project.CopyrightHolder, @@ -112,10 +128,21 @@ config, see the "copywrite init" command.`, // cobra.CheckErr on the return, which will indeed output to stderr and // return a non-zero error code. - gha.StartGroup("The following files are missing headers:") + // STEP 3: Add missing headers + gha.StartGroup("Adding missing copyright headers:") err := addlicense.Run(ignoredPatterns, "only", licenseData, "", verbose, plan, []string{"."}, stdcliLogger) gha.EndGroup() + // STEP 4: Update LICENSE file if any files were modified (either updated or added headers) + // In plan mode: if addlicense found missing headers (returns error), assume files would be modified + // In normal mode: if addlicense succeeded, assume files were modified + if err != nil || (!plan && err == nil) { + anyFileUpdated = true + } + + updateLicenseFile(cmd, licensePath, anyFileUpdated, plan) + + // Check for errors after LICENSE file update so we still show what would happen cobra.CheckErr(err) }, } @@ -131,3 +158,86 @@ func init() { headersCmd.Flags().StringP("spdx", "s", "", "SPDX-compliant license identifier (e.g., 'MPL-2.0')") headersCmd.Flags().StringP("copyright-holder", "c", "", "Copyright holder (default \"IBM Corp.\")") } + +// updateExistingHeaders walks through files and updates copyright headers based on config and git history +// Returns the count of updated files, a boolean indicating if any file was updated, and the LICENSE file path (if found) +func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun bool) (int, bool, string) { + targetHolder := conf.Project.CopyrightHolder + if targetHolder == "" { + targetHolder = "IBM Corp." + } + + configYear := conf.Project.CopyrightYear + updatedCount := 0 + anyFileUpdated := false + var licensePath string + + // Walk through all files in current directory + _ = filepath.Walk(".", func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() { + return nil + } + + // Check if file should be ignored + if addlicense.FileMatches(path, ignoredPatterns) { + return nil + } + + // Capture LICENSE file path but skip processing it here - it will be handled separately + base := filepath.Base(path) + if strings.EqualFold(base, "LICENSE") || strings.EqualFold(base, "LICENSE.TXT") || strings.EqualFold(base, "LICENSE.MD") { + licensePath = path + return nil + } + + // Try to update copyright in this file + if !dryRun { + updated, err := licensecheck.UpdateCopyrightHeader(path, targetHolder, configYear, false) + if err == nil && updated { + cmd.Printf(" %s\n", path) + updatedCount++ + anyFileUpdated = true + } + } else { + // In dry-run mode, check if update would happen + needsUpdate, err := licensecheck.NeedsUpdate(path, targetHolder, configYear, false) + if err == nil && needsUpdate { + cmd.Printf(" %s\n", path) + updatedCount++ + anyFileUpdated = true + } + } + + return nil + }) + + return updatedCount, anyFileUpdated, licensePath +} + +// updateLicenseFile updates the LICENSE file with current year if any files were modified +func updateLicenseFile(cmd *cobra.Command, licensePath string, anyFileUpdated bool, dryRun bool) { + // If no LICENSE file was found during the walk, nothing to do + if licensePath == "" { + return + } + + targetHolder := conf.Project.CopyrightHolder + if targetHolder == "" { + targetHolder = "IBM Corp." + } + + configYear := conf.Project.CopyrightYear + + // Update LICENSE file, forcing current year if any file was updated + if !dryRun { + updated, err := licensecheck.UpdateCopyrightHeader(licensePath, targetHolder, configYear, anyFileUpdated) + if err == nil && updated { + cmd.Printf("\nUpdated LICENSE file: %s\n", licensePath) + } + } else { + needsUpdate, err := licensecheck.NeedsUpdate(licensePath, targetHolder, configYear, anyFileUpdated) + if err == nil && needsUpdate { + cmd.Printf("\n[DRY RUN] Would update LICENSE file: %s\n", licensePath) + } + } +} diff --git a/cmd/license.go b/cmd/license.go index ebaad57..5e3f529 100644 --- a/cmd/license.go +++ b/cmd/license.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "path/filepath" + "strconv" + "time" "github.com/hashicorp/copywrite/github" "github.com/hashicorp/copywrite/licensecheck" @@ -63,10 +65,14 @@ var licenseCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { cmd.Printf("Licensing under the following terms: %s\n", conf.Project.License) - cmd.Printf("Using copyright years: %v\n", conf.FormatCopyrightYears()) + + // Determine appropriate copyright years for LICENSE file + licenseYears := determineLicenseCopyrightYears(dirPath) + + cmd.Printf("Using copyright years: %v\n", licenseYears) cmd.Printf("Using copyright holder: %v\n\n", conf.Project.CopyrightHolder) - copyright := "Copyright " + conf.FormatCopyrightYears() + " " + conf.Project.CopyrightHolder + copyright := "Copyright " + conf.Project.CopyrightHolder + " " + licenseYears licenseFiles, err := licensecheck.FindLicenseFiles(dirPath) if err != nil { @@ -175,3 +181,34 @@ func init() { licenseCmd.Flags().StringP("spdx", "s", "", "SPDX License Identifier indicating what the LICENSE file should represent") licenseCmd.Flags().StringP("copyright-holder", "c", "", "Copyright holder (default \"IBM Corp.\")") } + +// determineLicenseCopyrightYears determines the appropriate copyright year range for LICENSE file +// Uses git history to get the start year (first commit) and end year (last commit) +func determineLicenseCopyrightYears(dirPath string) string { + currentYear := time.Now().Year() + startYear := conf.Project.CopyrightYear + + // If no start year configured, try to auto-detect from git + if startYear == 0 { + if detectedYear, err := licensecheck.GetRepoFirstCommitYear(dirPath); err == nil && detectedYear > 0 { + startYear = detectedYear + } else { + // Fallback to current year + return strconv.Itoa(currentYear) + } + } + + // Determine end year from repository's last commit year + endYear := currentYear // Default fallback + if lastRepoCommitYear, err := licensecheck.GetRepoLastCommitYear(dirPath); err == nil && lastRepoCommitYear > 0 && lastRepoCommitYear <= currentYear { + endYear = lastRepoCommitYear + } + + // If start year equals end year, return single year + if startYear == endYear { + return strconv.Itoa(endYear) + } + + // Return year range: "startYear, endYear" + return fmt.Sprintf("%d, %d", startYear, endYear) +} diff --git a/licensecheck/update.go b/licensecheck/update.go new file mode 100644 index 0000000..b5b0536 --- /dev/null +++ b/licensecheck/update.go @@ -0,0 +1,582 @@ +// Copyright IBM Corp. 2023, 2026 +// SPDX-License-Identifier: MPL-2.0 + +package licensecheck + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +// CopyrightInfo holds parsed copyright information from a file +type CopyrightInfo struct { + LineNumber int + OriginalLine string + Holder string + StartYear int + EndYear int + Prefix string // Comment prefix (e.g., "// ", "# ") + TrailingText string // Any text after the years +} + +// extractAllCopyrightInfo extracts all copyright information from a file +func extractAllCopyrightInfo(filePath string) ([]*CopyrightInfo, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer func() { _ = file.Close() }() + + scanner := bufio.NewScanner(file) + lineNum := 0 + var copyrights []*CopyrightInfo + + // Scan entire file for all copyright statements + for scanner.Scan() { + lineNum++ + line := scanner.Text() + + // Check if line contains "copyright" + if strings.Contains(strings.ToLower(line), "copyright") { + info := parseCopyrightLine(line, lineNum) + if info != nil { + copyrights = append(copyrights, info) + } + } + } + + return copyrights, scanner.Err() +} + +// extractCopyrightInfo extracts the first copyright information from a file (for compatibility) +func extractCopyrightInfo(filePath string) (*CopyrightInfo, error) { + copyrights, err := extractAllCopyrightInfo(filePath) + if err != nil { + return nil, err + } + if len(copyrights) == 0 { + return nil, nil + } + return copyrights[0], nil +} + +// parseCopyrightLine extracts copyright details from a line +func parseCopyrightLine(line string, lineNum int) *CopyrightInfo { + // Extract comment prefix + prefix := extractCommentPrefix(line) + + // Get the content after the prefix + contentStart := len(prefix) + if contentStart >= len(line) { + return nil + } + content := line[contentStart:] + + // Must start with "copyright" (case-insensitive) - not just contain it anywhere + // This ensures we only match actual copyright statements, not comments that mention copyright + content = strings.TrimSpace(content) + if !regexp.MustCompile(`(?i)^copyright\b`).MatchString(content) { + return nil + } + + info := &CopyrightInfo{ + LineNumber: lineNum, + OriginalLine: line, + Prefix: prefix, + } + + // Remove "Copyright" and optional (c) from the beginning + re := regexp.MustCompile(`(?i)^copyright\s*(?:\(c\))?\s*`) + afterCopyright := re.ReplaceAllString(content, "") + afterCopyright = strings.TrimSpace(afterCopyright) + + // Strategy: Find all 4-digit years in the line + yearPattern := regexp.MustCompile(`\b(\d{4})\b`) + yearMatches := yearPattern.FindAllStringIndex(afterCopyright, -1) + + if len(yearMatches) == 0 { + // No year found, everything is the holder + info.Holder = strings.TrimSpace(afterCopyright) + return info + } + + // Find the last occurrence of years (which should be the copyright years) + // Look for patterns like "YYYY" or "YYYY, YYYY" or "YYYY-YYYY" + lastYearIdx := yearMatches[len(yearMatches)-1] + + // Extract years - check if there's a year before the last one (start year) + if len(yearMatches) >= 2 { + // Check if the previous year is close to the last year (within 20 chars) + prevYearIdx := yearMatches[len(yearMatches)-2] + between := afterCopyright[prevYearIdx[1]:lastYearIdx[0]] + + // If only separators between them, treat as start and end year + if strings.TrimSpace(strings.Trim(between, "-, ")) == "" { + startYearStr := afterCopyright[prevYearIdx[0]:prevYearIdx[1]] + if year, err := strconv.Atoi(startYearStr); err == nil { + info.StartYear = year + } + } + } + + // Extract the last year (end year or only year) + endYearStr := afterCopyright[lastYearIdx[0]:lastYearIdx[1]] + if year, err := strconv.Atoi(endYearStr); err == nil { + info.EndYear = year + if info.StartYear == 0 { + info.StartYear = year + } + } + + // Everything before the first year (or before the pair of years) is the holder + holderEndIdx := yearMatches[0][0] + if len(yearMatches) >= 2 && info.StartYear != 0 { + holderEndIdx = yearMatches[len(yearMatches)-2][0] + } + + holder := strings.TrimSpace(afterCopyright[:holderEndIdx]) + info.Holder = holder + + // Everything after the last year is trailing text - preserve it exactly + if lastYearIdx[1] < len(afterCopyright) { + trailing := afterCopyright[lastYearIdx[1]:] + if trailing != "" { + info.TrailingText = trailing + } + } + + return info +} + +// extractCommentPrefix extracts comment markers from the beginning of a line +func extractCommentPrefix(line string) string { + trimmed := strings.TrimLeft(line, " \t") + leadingSpace := line[:len(line)-len(trimmed)] + + // Check for common comment prefixes (ordered by specificity - longer prefixes first) + commentPrefixes := []string{ + "<%/* ", "<%/*", // EJS templates + "(** ", "(**", // OCaml + "/** ", "/**", // JSDoc-style comments + "