diff --git a/lib/library_version_analysis/check_version_status.rb b/lib/library_version_analysis/check_version_status.rb index 6a79192..5207528 100755 --- a/lib/library_version_analysis/check_version_status.rb +++ b/lib/library_version_analysis/check_version_status.rb @@ -3,6 +3,7 @@ require "pry-byebug" require "library_version_analysis/library_tracking" require "library_version_analysis/configuration" +require "library_version_analysis/ownership" module LibraryVersionAnalysis Versionline = Struct.new( @@ -251,7 +252,11 @@ def server_data(results, repository, source) # rubocop:disable Metrics/AbcSize, results.each do |real_name, row| name = OBFUSCATE_WORDS ? obfuscate(real_name) : real_name - libraries.push({name: name, owner: row.owner, owner_reason: row.owner_reason, version: row.current_version}) + # Canonicalize owner on the outbound payload only. Do not mutate + # row.owner — downstream branching (e.g. line.owner == :attention_needed + # in #get_mode_summary) depends on the original symbol/string. See + # openspec change `emit-canonical-owner-names` (LIBTRACK-132). + libraries.push({name: name, owner: LibraryVersionAnalysis::Ownership.canonicalize(row.owner), owner_reason: row.owner_reason, version: row.current_version}) row.vulnerabilities&.each do |vuln| permalink = OBFUSCATE_WORDS ? "https://github.com/advisories" : vuln.permalink identifier = OBFUSCATE_WORDS ? "\"GHSA-XXX\", \"CVE-XXX\"" : vuln.identifier.join(", ") diff --git a/lib/library_version_analysis/ownership.rb b/lib/library_version_analysis/ownership.rb index 20c9d25..0275849 100644 --- a/lib/library_version_analysis/ownership.rb +++ b/lib/library_version_analysis/ownership.rb @@ -2,6 +2,25 @@ module LibraryVersionAnalysis module Ownership OWNER_REASON_ASSIGNED = "-assigned-".freeze + # Canonicalize an owner team name for use in the outbound upload payload. + # + # This rule MUST stay in lockstep with `library_tracking`'s + # `Owner.canonicalize_team`. If you change one, change the other in the same + # release. See openspec change `emit-canonical-owner-names` (LIBTRACK-132) + # for the shared canonicalization contract. + # + # Accepts strings or symbols. Returns a canonical lowercase string with + # surrounding whitespace, quotes (`"`/`'`), and colons stripped. Empty, + # nil, or punctuation-only inputs fall back to the literal `"unknown"`. + def self.canonicalize(name) + s = name.to_s.strip + s = s.gsub(/\A["']+|["']+\z/, "") + s = s.gsub(/\A:+|:+\z/, "") + s = s.strip + s = s.downcase + s.empty? ? "unknown" : s + end + def add_transitive_ownerships(parsed_results) parsed_results.select { |_, result_data| unknown_owner?(result_data.owner) }.each do |name, line_data| @current_library = name diff --git a/openspec/changes/emit-canonical-owner-names/.openspec.yaml b/openspec/changes/emit-canonical-owner-names/.openspec.yaml new file mode 100644 index 0000000..28882f7 --- /dev/null +++ b/openspec/changes/emit-canonical-owner-names/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-19 diff --git a/openspec/changes/emit-canonical-owner-names/README.md b/openspec/changes/emit-canonical-owner-names/README.md new file mode 100644 index 0000000..310c221 --- /dev/null +++ b/openspec/changes/emit-canonical-owner-names/README.md @@ -0,0 +1,3 @@ +# emit-canonical-owner-names + +Canonicalize owner team names in upload payloads so library_tracking does not have to absorb spelling variants like :bizops vs bizops vs :bizops: diff --git a/openspec/changes/emit-canonical-owner-names/design.md b/openspec/changes/emit-canonical-owner-names/design.md new file mode 100644 index 0000000..2c94fb4 --- /dev/null +++ b/openspec/changes/emit-canonical-owner-names/design.md @@ -0,0 +1,75 @@ +## Context + +This gem analyzes a repository's dependency tree and produces an upload payload that `library_tracking` ingests. Owner team names enter the in-memory data model from several parser paths: + +| Source | Example value emitted | Notes | +| ------------------------------------- | ------------------------ | --------------------------------------------------------------------- | +| `gemfile.rb#add_ownership_from_gemfile` | `":bizops"` | Regex `/\s*jgem\s*(\S*),\s*"(\S*)"/` captures the symbol verbatim | +| `gemfile.rb` with quoted owner | `"\"bizops\""` | Same regex captures quotes when style is `jgem "bizops", "..."` | +| `gemfile.rb#add_ownership_from_gemspecs` | `":bizops"` | Explicitly prefixes `:` if missing; no trailing-colon defence | +| `npm.rb`, `pnpm.rb` | varies by YAML / config | Whatever the workspace ownership config contains | +| `configuration.rb` special-case map | varies | Raw YAML values | +| `ownership.rb#add_attention_needed` | `:attention_needed` | Ruby symbol; serializes as `"attention_needed"` via `to_json` | + +Every one of these paths terminates at one place: `CheckVersionStatus#server_data`, which loops over `results` and calls `libraries.push({name: ..., owner: row.owner, ...})`. That is the single seam through which the value reaches the upload payload. + +## Goals / Non-Goals + +**Goals:** + +- One canonicalization rule, defined once, applied at one place. +- Identical rule to the server-side `Owner.canonicalize_team` rule so both repos agree on canonical form even when versions skew. +- Idempotent and deterministic: `canonicalize(canonicalize(x)) == canonicalize(x)`. +- Preserve the in-memory `Versionline.owner` value so parsers, logging, and unit tests that inspect intermediate state are unaffected. + +**Non-Goals:** + +- Normalizing at the parser level. Multiple call sites means duplication and drift risk. +- Touching the Ruby symbol vs string distinction in the data model. `Versionline.owner` may continue to be a `Symbol` (`:attention_needed`, `:unspecified`) or a `String`. The canonicalizer accepts both. +- Changing what `Ownership#unknown_owner?` considers unknown. That predicate operates on in-memory parser state, not on outbound payload state. +- Handling `frontend_foundations` typo cleanup — explicitly deferred. + +## Decisions + +### Decision: Canonicalization rule (mirrors server) + +```ruby +def self.canonicalize(name) + s = name.to_s.strip + s = s.gsub(/\A["']+|["']+\z/, "") + s = s.gsub(/\A:+|:+\z/, "") + s = s.strip + s = s.downcase + s.empty? ? "unknown" : s +end +``` + +This is byte-for-byte the same rule the server applies. Two repositories, one rule. If the rule ever needs to change, both repos update together — by convention captured in this design doc and in the server change's design doc. + +Alternative considered: weaker stripping (only `:`). Rejected for the same reason as the server: the Gemfile regex captures quote characters too, so quote-wrapped names are a real source of variation. + +### Decision: Apply at one seam — `#server_data` + +The function `CheckVersionStatus#server_data` is where in-memory parser results become an outbound payload. Apply `Ownership.canonicalize` to `row.owner` when constructing the `libraries.push({...})` hash. The same value is also referenced from `vulns.push(...)` and `new_versions.push(...)` but those don't include the owner field, so a single edit covers the payload's owner surface. + +Alternative considered: a `before_serialize` hook on `Versionline`. Rejected as overkill — there is no serialization framework here, just a manually-built hash. + +Alternative considered: apply at parser level (inside `gemfile.rb`, `pnpm.rb`, etc.). Rejected — invites drift between parsers, makes unit tests interpret raw parser output differently, and produces no behavioural benefit since nothing else consumes parser output before `#server_data` does. + +### Decision: Keep `Versionline.owner` untouched + +Some parser paths use `row.owner == :attention_needed` for control-flow decisions (e.g. counting `unowned_issues` in `mode_summary`). Canonicalizing in-place would convert those symbols to strings and break the equality check. Confining normalization to the outbound payload sidesteps the problem entirely. + +## Risks / Trade-offs + +- **Risk**: a downstream tool reads the `[upload]` debug log and is surprised that the rendered owner names look different. → Mitigation: this is a strictly improving change (names get cleaner), and the log output is for operator inspection. +- **Risk**: payload size increases because every owner is recomputed instead of passed by reference. → Negligible — same string length category, single `Library` row per library. +- **Trade-off**: we apply normalization only on the outbound side, so the gem's internal data model can still carry raw parser output. That is intentional (see decisions) but means a future contributor reading `row.owner` could be surprised to find it differs from what the server sees. The canonicalization function and its single call site should be commented to flag this. + +## Migration Plan + +No data migration. This is a code-only change that affects future uploads only. Ship via the usual release cycle; consumers do not need to coordinate. If the server change (`normalize-owner-team-names` in `library_tracking`) has not yet shipped, this gem change is still safe: it produces strictly cleaner inputs to an unchanged server. + +## Open Questions + +None. diff --git a/openspec/changes/emit-canonical-owner-names/proposal.md b/openspec/changes/emit-canonical-owner-names/proposal.md new file mode 100644 index 0000000..fe343bd --- /dev/null +++ b/openspec/changes/emit-canonical-owner-names/proposal.md @@ -0,0 +1,29 @@ +## Why + +LIBTRACK-132: `library_tracking`'s Owners tab shows duplicate rows for the same team (e.g. `:bizops`, `bizops`, `:bizops:`) because the upload payload produced by this gem contains owner names in whatever shape each parser happens to extract — colons leak in from Ruby symbol syntax, quotes leak in from Gemfile regex captures, and the `:attention_needed` symbol serializes inconsistently with other sources. While the server is being hardened in parallel to canonicalize on write, fixing the source means future uploads are clean regardless of which gem version is talking to which server, and operators reading the payload logs see canonical names instead of raw parser output. + +## What Changes + +- Introduce a single canonicalization helper, `LibraryVersionAnalysis::Ownership.canonicalize` (class-level), that returns the canonical form of any owner name. + - Canonical form: stringify, strip surrounding whitespace, strip surrounding `"` and `'` characters, strip leading and trailing `:` characters, strip whitespace again, downcase. An empty result becomes `unknown`. +- Apply the helper at the single seam where upload payloads are assembled: `LibraryVersionAnalysis::CheckVersionStatus#server_data`, immediately before each `libraries.push({...})` call. This ensures every parser path (Gemfile, gemspec, pnpm, npm, special-case YAML, transitive ownership, `:attention_needed` placeholder) goes through one normalization point. +- No changes to parser internals. The in-memory `Versionline.owner` field continues to carry whatever the parser produced; only the outbound payload is normalized. +- Out of scope: this change does not touch any `library_tracking` server code, does not migrate or rewrite any existing data, and does not address the `frontend_foundations` naming variants (those will be handled manually per ticket guidance). + +## Capabilities + +### New Capabilities + +- `owner-name-canonicalization`: canonical owner name handling for outbound upload payloads. + +### Modified Capabilities + +None. There are no existing `openspec/specs/` entries in this repository. + +## Impact + +- Code: `lib/library_version_analysis/ownership.rb` (new class method), `lib/library_version_analysis/check_version_status.rb` (single call site in `#server_data`). +- Tests: add unit specs for the helper; extend existing `check_version_analysis_spec.rb` (or add a focused spec) to confirm the payload is canonical for representative inputs. +- Consumers: `library_tracking`'s `/api/libraries/upload` endpoint will receive canonical names. Behaviour is unchanged because the server is being updated to normalize on write anyway — this change just means the server's normalization is a no-op for payloads from current versions of the gem. +- Logs: the `[upload]` log lines emitted by `log_server_payload` will show canonical names, making operator review easier. +- Risk: very low. The transformation is idempotent and deterministic; downstream code paths that store owner names see the same or stricter spelling, never a new variation. diff --git a/openspec/changes/emit-canonical-owner-names/specs/owner-name-canonicalization/spec.md b/openspec/changes/emit-canonical-owner-names/specs/owner-name-canonicalization/spec.md new file mode 100644 index 0000000..3947f06 --- /dev/null +++ b/openspec/changes/emit-canonical-owner-names/specs/owner-name-canonicalization/spec.md @@ -0,0 +1,49 @@ +## ADDED Requirements + +### Requirement: Canonical owner name function + +The gem SHALL expose `LibraryVersionAnalysis::Ownership.canonicalize(name)` that returns the canonical form of any input. The canonical form is produced by: stringifying the input, stripping outer whitespace, stripping any surrounding `"` or `'` characters, stripping any leading or trailing `:` characters, stripping outer whitespace again, downcasing, and replacing an empty result with the literal string `unknown`. The function MUST be idempotent: `canonicalize(canonicalize(x))` returns the same value as `canonicalize(x)` for every input. + +#### Scenario: Strips leading and trailing colons +- **WHEN** the input is `:bizops:` +- **THEN** the result is `bizops` + +#### Scenario: Strips wrapping quotes captured by the Gemfile regex +- **WHEN** the input is `"bizops"` or `'bizops'` +- **THEN** the result is `bizops` + +#### Scenario: Downcases mixed case +- **WHEN** the input is `:Bizops` +- **THEN** the result is `bizops` + +#### Scenario: Handles Ruby symbols +- **WHEN** the input is `:attention_needed` as a Ruby `Symbol` +- **THEN** the result is `attention_needed` + +#### Scenario: Empty or punctuation-only input falls back to unknown +- **WHEN** the input is the empty string, `nil`, only whitespace, or only colons/quotes +- **THEN** the result is `unknown` + +#### Scenario: Function is idempotent +- **WHEN** the input is any string `s` +- **THEN** `canonicalize(canonicalize(s))` equals `canonicalize(s)` + +### Requirement: Upload payload owner values are canonical + +The gem SHALL apply `Ownership.canonicalize` to every owner value placed into the upload payload by `CheckVersionStatus#server_data`. The in-memory `Versionline.owner` field MAY retain its pre-canonicalization value; only the outbound payload is required to be canonical. + +#### Scenario: Gemfile-sourced owners are stripped of colons in the payload +- **GIVEN** a parser populates `row.owner` as `":bizops"` +- **WHEN** `CheckVersionStatus#server_data` builds the upload payload +- **THEN** the corresponding entry in `payload[:libraries]` has `owner` equal to `bizops` + +#### Scenario: attention_needed symbol becomes the canonical string in the payload +- **GIVEN** a parser populates `row.owner` as the Ruby symbol `:attention_needed` +- **WHEN** `CheckVersionStatus#server_data` builds the upload payload +- **THEN** the corresponding entry in `payload[:libraries]` has `owner` equal to `attention_needed` + +#### Scenario: In-memory Versionline.owner is unchanged +- **GIVEN** a parser populates `row.owner` as `:attention_needed` +- **WHEN** `CheckVersionStatus#server_data` runs +- **THEN** `row.owner` still equals `:attention_needed` after the payload is built +- **AND** downstream logic that branches on `row.owner == :attention_needed` continues to work diff --git a/openspec/changes/emit-canonical-owner-names/tasks.md b/openspec/changes/emit-canonical-owner-names/tasks.md new file mode 100644 index 0000000..66b1619 --- /dev/null +++ b/openspec/changes/emit-canonical-owner-names/tasks.md @@ -0,0 +1,22 @@ +## 1. Canonicalization helper + +- [x] 1.1 Add `LibraryVersionAnalysis::Ownership.canonicalize(name)` as a class method that implements the spec rule (stringify, strip whitespace, strip surrounding quotes, strip leading/trailing colons, downcase, fallback to `unknown` on empty). +- [x] 1.2 Add a comment above the method noting that it must stay in lockstep with `library_tracking`'s `Owner.canonicalize_team`. +- [x] 1.3 Add unit specs covering: colon stripping, quote stripping, downcase, `:attention_needed` symbol, empty/`nil`/whitespace fallback, and the idempotency requirement. + +## 2. Apply at the payload seam + +- [x] 2.1 In `CheckVersionStatus#server_data`, wrap the `owner` value in `LibraryVersionAnalysis::Ownership.canonicalize(...)` at the `libraries.push({...})` call site. +- [x] 2.2 Verify no other `payload[...]` builder includes the owner field (sanity check `vulns.push` and `new_versions.push`). +- [x] 2.3 Confirm `row.owner` is not mutated; only the payload entry is normalized. + +## 3. Tests + +- [x] 3.1 Extend `spec/check_version_analysis_spec.rb` (or add a focused spec file) covering the three payload scenarios: Gemfile-style `:bizops` input, `:attention_needed` symbol input, and quote-wrapped `"bizops"` input. +- [x] 3.2 Add a regression assertion confirming `row.owner` retains its pre-payload value after `#server_data` runs. +- [x] 3.3 Run `bundle exec rspec`; all specs pass. + +## 4. Verification + +- [ ] 4.1 Manual run: execute the gem against a sample repo and inspect the `[upload]` log output; confirm owner names are canonical. +- [ ] 4.2 Confirm payload sent to `library_tracking` contains canonical owner names by hitting a staging server or by intercepting the serialized payload in a spec. diff --git a/spec/check_version_analysis_spec.rb b/spec/check_version_analysis_spec.rb index 20e7028..0355eb9 100644 --- a/spec/check_version_analysis_spec.rb +++ b/spec/check_version_analysis_spec.rb @@ -64,4 +64,43 @@ expect(mode.unowned_issues).to eq(3) end end + + describe "#server_data" do + let(:payload_results) do + { + gem_a: LibraryVersionAnalysis::Versionline.new(owner: ":bizops", owner_reason: nil, current_version: "1.0.0", latest_version: nil, major: 0, minor: 0, patch: 0, vulnerabilities: nil, dependency_graph: nil), + gem_b: LibraryVersionAnalysis::Versionline.new(owner: :attention_needed, owner_reason: nil, current_version: "2.0.0", latest_version: nil, major: 0, minor: 0, patch: 0, vulnerabilities: nil, dependency_graph: nil), + gem_c: LibraryVersionAnalysis::Versionline.new(owner: '"bizops"', owner_reason: nil, current_version: "3.0.0", latest_version: nil, major: 0, minor: 0, patch: 0, vulnerabilities: nil, dependency_graph: nil), + } + end + + it "canonicalizes Gemfile-style symbol owners like :bizops to bizops" do + payload = subject.server_data(payload_results, "jobber", "gemfile") + entry = payload[:libraries].find { |l| l[:name] == :gem_a } + expect(entry[:owner]).to eq("bizops") + end + + it "canonicalizes the :attention_needed symbol to the string attention_needed" do + payload = subject.server_data(payload_results, "jobber", "gemfile") + entry = payload[:libraries].find { |l| l[:name] == :gem_b } + expect(entry[:owner]).to eq("attention_needed") + end + + it "canonicalizes quote-wrapped owner names captured from the Gemfile regex" do + payload = subject.server_data(payload_results, "jobber", "gemfile") + entry = payload[:libraries].find { |l| l[:name] == :gem_c } + expect(entry[:owner]).to eq("bizops") + end + + it "does not mutate Versionline#owner on the in-memory rows" do + original_owners = payload_results.transform_values(&:owner) + + subject.server_data(payload_results, "jobber", "gemfile") + + payload_results.each do |name, row| + expect(row.owner).to eq(original_owners[name]), + "expected row.owner for #{name} to remain #{original_owners[name].inspect} after #server_data, got #{row.owner.inspect}" + end + end + end end diff --git a/spec/ownership_spec.rb b/spec/ownership_spec.rb new file mode 100644 index 0000000..a5cb173 --- /dev/null +++ b/spec/ownership_spec.rb @@ -0,0 +1,73 @@ +require "library_version_analysis/ownership" + +RSpec.describe LibraryVersionAnalysis::Ownership do + describe ".canonicalize" do + it "strips leading and trailing colons" do + expect(described_class.canonicalize(":bizops:")).to eq("bizops") + expect(described_class.canonicalize(":bizops")).to eq("bizops") + expect(described_class.canonicalize("bizops:")).to eq("bizops") + end + + it "strips surrounding double quotes" do + expect(described_class.canonicalize('"bizops"')).to eq("bizops") + end + + it "strips surrounding single quotes" do + expect(described_class.canonicalize("'bizops'")).to eq("bizops") + end + + it "downcases mixed case input" do + expect(described_class.canonicalize(":Bizops")).to eq("bizops") + expect(described_class.canonicalize("BIZOPS")).to eq("bizops") + end + + it "handles Ruby symbols" do + expect(described_class.canonicalize(:attention_needed)).to eq("attention_needed") + expect(described_class.canonicalize(:bizops)).to eq("bizops") + end + + it "falls back to 'unknown' for nil" do + expect(described_class.canonicalize(nil)).to eq("unknown") + end + + it "falls back to 'unknown' for empty string" do + expect(described_class.canonicalize("")).to eq("unknown") + end + + it "falls back to 'unknown' for whitespace-only input" do + expect(described_class.canonicalize(" ")).to eq("unknown") + end + + it "falls back to 'unknown' for punctuation-only input" do + expect(described_class.canonicalize(":")).to eq("unknown") + expect(described_class.canonicalize("::")).to eq("unknown") + expect(described_class.canonicalize('""')).to eq("unknown") + expect(described_class.canonicalize("''")).to eq("unknown") + end + + it "strips surrounding whitespace" do + expect(described_class.canonicalize(" bizops ")).to eq("bizops") + expect(described_class.canonicalize(" :bizops: ")).to eq("bizops") + end + + it "is idempotent for representative inputs" do + [ + ":bizops", + ":bizops:", + '"bizops"', + "'bizops'", + ":Bizops", + :attention_needed, + nil, + "", + " ", + "bizops", + "Frontend_Foundations", + ].each do |input| + once = described_class.canonicalize(input) + twice = described_class.canonicalize(once) + expect(twice).to eq(once), "expected canonicalize to be idempotent for #{input.inspect}, got #{once.inspect} -> #{twice.inspect}" + end + end + end +end