From 627aefc2dccfcafa0a5f565478e7764db9e5d61c Mon Sep 17 00:00:00 2001 From: Facundo Rodriguez Date: Wed, 24 Jun 2026 20:38:02 -0300 Subject: [PATCH 1/3] fix(purl): pin packageurl-js serializer dependency --- package-lock.json | 2 +- package.json | 2 +- src/eol/utils.test.ts | 8 ++++---- src/eol/utils.ts | 5 +++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index 876f734..7e2b609 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,7 @@ "@cyclonedx/cyclonedx-library": "^9.4.1", "fast-xml-parser": "^5.3.3", "json-schema-to-typescript": "^15.0.4", - "packageurl-js": "^2.0.1" + "packageurl-js": "2.0.1" }, "devDependencies": { "@prettier/plugin-oxc": "^0.1.3", diff --git a/package.json b/package.json index 2c9b316..fe85397 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,7 @@ "@cyclonedx/cyclonedx-library": "^9.4.1", "fast-xml-parser": "^5.3.3", "json-schema-to-typescript": "^15.0.4", - "packageurl-js": "^2.0.1" + "packageurl-js": "2.0.1" }, "devDependencies": { "@prettier/plugin-oxc": "^0.1.3", diff --git a/src/eol/utils.test.ts b/src/eol/utils.test.ts index 0539c16..3824fb5 100644 --- a/src/eol/utils.test.ts +++ b/src/eol/utils.test.ts @@ -194,17 +194,17 @@ describe('canonicalizePurl', () => { }); }); - // --- Version byte-preservation --- + // --- Version value preservation --- - describe('version byte-preservation', () => { - test('version with mixed case is not modified — exact canonical output', () => { + describe('version value preservation', () => { + test('version with mixed case is not case-folded — exact canonical output', () => { assert.equal( canonicalizePurl('pkg:nuget/serilog@2.0.0-Beta3'), 'pkg:nuget/serilog@2.0.0-Beta3', ); }); - test('version with +build metadata is not modified — exact canonical output', () => { + test('version with +build metadata preserves value — exact canonical output', () => { assert.equal( canonicalizePurl('pkg:maven/g/a@1.0.0+build5'), 'pkg:maven/g/a@1.0.0+build5', diff --git a/src/eol/utils.ts b/src/eol/utils.ts index 5d7a0ad..40f7777 100644 --- a/src/eol/utils.ts +++ b/src/eol/utils.ts @@ -28,8 +28,9 @@ const CASE_INSENSITIVE_TYPES = new Set(['nuget', 'composer', 'cargo', 'npm']); * 4. Reconstruct with new PackageURL(...).toString() — the library serializes * qualifiers and subpath with canonical percent-encoding. * - * Version is byte-preserved. Qualifiers and subpath are canonically re-encoded - * by the serializer (deterministic and idempotent). + * Version value is preserved; reserved-character encoding may be canonicalized. + * Qualifiers and subpath are canonically re-encoded by the serializer + * (deterministic and idempotent). * * Returns the input unchanged for unparseable strings; never throws. * From cc2fe60ca90deaf1da353fa2e96e7e8dafcb0e70 Mon Sep 17 00:00:00 2001 From: Facundo Rodriguez Date: Thu, 25 Jun 2026 11:37:39 -0300 Subject: [PATCH 2/3] feat(eol): add purl identity normalization --- src/eol/utils.test.ts | 528 ++++++++++++++++++++++++++++++------------ src/eol/utils.ts | 247 ++++++++++++++++---- src/index.ts | 10 +- 3 files changed, 593 insertions(+), 192 deletions(-) diff --git a/src/eol/utils.test.ts b/src/eol/utils.test.ts index 3824fb5..e6aad79 100644 --- a/src/eol/utils.test.ts +++ b/src/eol/utils.test.ts @@ -1,6 +1,11 @@ import { test, describe } from 'node:test'; import { strict as assert } from 'node:assert'; -import { deriveComponentStatus, canonicalizePurl } from './utils.ts'; +import { + deriveComponentStatus, + normalizePurlIdentity, + createPurlIdentity, + canonicalizeVersionFilter, +} from './utils.ts'; import type { EolScanComponentMetadata } from '../types/eol-scan.ts'; // These are required for the object but not used to derive the status @@ -81,188 +86,486 @@ describe('deriveComponentStatus', () => { }); }); -describe('canonicalizePurl', () => { - // --- Canonical output matrix: exact expected strings --- - // Each assertion pins the literal output produced by the library-based implementation. - // Each assertion pins the exact output; a change in alias, case, or encoding fails a concrete string comparison. +describe('createPurlIdentity', () => { + test('creates versioned identity with alias canonical component and version purls', () => { + assert.deepEqual(createPurlIdentity('pkg:go/github.com/foo/bar@v1.0.0'), { + rawPurl: 'pkg:go/github.com/foo/bar@v1.0.0', + canonicalComponentPurl: 'pkg:golang/github.com/foo/bar', + canonicalVersionPurl: 'pkg:golang/github.com/foo/bar@v1.0.0', + legacyComponentPurl: 'pkg:go/github.com/foo/bar', + }); + }); + + test('creates versioned identity with case-folded canonical and raw legacy component purls', () => { + assert.deepEqual( + createPurlIdentity('pkg:nuget/Serilog.Sinks.Console@2.1.0'), + { + rawPurl: 'pkg:nuget/Serilog.Sinks.Console@2.1.0', + canonicalComponentPurl: 'pkg:nuget/serilog.sinks.console', + canonicalVersionPurl: 'pkg:nuget/serilog.sinks.console@2.1.0', + legacyComponentPurl: 'pkg:nuget/Serilog.Sinks.Console', + }, + ); + }); + + test('creates versioned identity without legacy component when case-sensitive canonical output is unchanged', () => { + assert.deepEqual( + createPurlIdentity('pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0'), + { + rawPurl: 'pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0', + canonicalComponentPurl: 'pkg:maven/org.Apache.Commons/commons-Lang3', + canonicalVersionPurl: + 'pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0', + }, + ); + }); + + test('creates versionless identity without adding a version purl', () => { + assert.deepEqual(createPurlIdentity('pkg:cargo/MyLib'), { + rawPurl: 'pkg:cargo/MyLib', + canonicalComponentPurl: 'pkg:cargo/mylib', + legacyComponentPurl: 'pkg:cargo/MyLib', + }); + }); + + test('passes through parse failures as component identity without throwing', () => { + assert.deepEqual(createPurlIdentity('not-a-purl'), { + rawPurl: 'not-a-purl', + canonicalComponentPurl: 'not-a-purl', + }); + }); + + test('creates a versionless Swift component identity from a versioned Swift PURL', () => { + const calls: Array<{ purl: string; error: unknown }> = []; + + assert.deepEqual( + createPurlIdentity( + 'pkg:swift/github.com/apple/swift-argument-parser@1.2.3', + (purl, error) => { + calls.push({ purl, error }); + }, + ), + { + rawPurl: 'pkg:swift/github.com/apple/swift-argument-parser@1.2.3', + canonicalComponentPurl: + 'pkg:swift/github.com/apple/swift-argument-parser', + canonicalVersionPurl: + 'pkg:swift/github.com/apple/swift-argument-parser@1.2.3', + }, + ); + assert.equal(calls.length, 0); + }); + + test('keeps Debian qualified component identities distinct when version is removed', () => { + const i386Identity = createPurlIdentity( + 'pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie', + ); + const amd64Identity = createPurlIdentity( + 'pkg:deb/debian/curl@7.50.3-1?arch=amd64&distro=jessie', + ); + + assert.deepEqual(i386Identity, { + rawPurl: 'pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie', + canonicalComponentPurl: 'pkg:deb/debian/curl?arch=i386&distro=jessie', + canonicalVersionPurl: + 'pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie', + }); + assert.deepEqual(amd64Identity, { + rawPurl: 'pkg:deb/debian/curl@7.50.3-1?arch=amd64&distro=jessie', + canonicalComponentPurl: 'pkg:deb/debian/curl?arch=amd64&distro=jessie', + canonicalVersionPurl: + 'pkg:deb/debian/curl@7.50.3-1?arch=amd64&distro=jessie', + }); + assert.notEqual( + i386Identity.canonicalComponentPurl, + amd64Identity.canonicalComponentPurl, + ); + }); + + test('preserves required Conan qualifiers in versionless component identity', () => { + assert.doesNotThrow(() => + createPurlIdentity( + 'pkg:conan/conan-center/openssl@1.1.1?user=conan&channel=stable', + () => { + throw new Error('boom'); + }, + ), + ); + + assert.deepEqual( + createPurlIdentity( + 'pkg:conan/conan-center/openssl@1.1.1?user=conan&channel=stable', + ), + { + rawPurl: + 'pkg:conan/conan-center/openssl@1.1.1?user=conan&channel=stable', + canonicalComponentPurl: + 'pkg:conan/conan-center/openssl?channel=stable&user=conan', + canonicalVersionPurl: + 'pkg:conan/conan-center/openssl@1.1.1?channel=stable&user=conan', + legacyComponentPurl: + 'pkg:conan/conan-center/openssl?user=conan&channel=stable', + }, + ); + }); + + test('preserves subpath when version is removed from component identity', () => { + assert.deepEqual( + createPurlIdentity( + 'pkg:npm/%40scope/pkg@1.0.0?repository_url=https://example.com/repo#/dist/file.js', + ), + { + rawPurl: + 'pkg:npm/%40scope/pkg@1.0.0?repository_url=https://example.com/repo#/dist/file.js', + canonicalComponentPurl: + 'pkg:npm/%40scope/pkg?repository_url=https%3A%2F%2Fexample.com%2Frepo#dist/file.js', + canonicalVersionPurl: + 'pkg:npm/%40scope/pkg@1.0.0?repository_url=https%3A%2F%2Fexample.com%2Frepo#dist/file.js', + legacyComponentPurl: + 'pkg:npm/%40scope/pkg?repository_url=https://example.com/repo#/dist/file.js', + }, + ); + }); + + test('preserves raw qualifiers for versionless component legacy identity', () => { + assert.deepEqual( + createPurlIdentity( + 'pkg:conan/conan-center/openssl?user=conan&channel=stable', + () => { + throw new Error('boom'); + }, + ), + { + rawPurl: 'pkg:conan/conan-center/openssl?user=conan&channel=stable', + canonicalComponentPurl: + 'pkg:conan/conan-center/openssl?channel=stable&user=conan', + legacyComponentPurl: + 'pkg:conan/conan-center/openssl?user=conan&channel=stable', + }, + ); + }); + + test('normalizes a bare-major Go version without invoking type validators', () => { + const calls: Array<{ purl: string; error: unknown }> = []; + assert.deepEqual( + createPurlIdentity('pkg:go/github.com/foo/bar@v1', (purl, error) => { + calls.push({ purl, error }); + }), + { + rawPurl: 'pkg:go/github.com/foo/bar@v1', + canonicalComponentPurl: 'pkg:golang/github.com/foo/bar', + canonicalVersionPurl: 'pkg:golang/github.com/foo/bar@v1', + legacyComponentPurl: 'pkg:go/github.com/foo/bar', + }, + ); + assert.equal(calls.length, 0); + }); - describe('type alias translation', () => { + test('swallows throwing callbacks without breaking identity creation', () => { + assert.doesNotThrow(() => + createPurlIdentity('pkg:go/github.com/foo/bar@v1', () => { + throw new Error('boom'); + }), + ); + }); + + test('keeps encoded npm canonical identity idempotent across raw and pre-encoded scoped inputs', () => { + const rawIdentity = createPurlIdentity('pkg:npm/@Angular/Core@15.0.0'); + const encodedIdentity = createPurlIdentity( + 'pkg:npm/%40angular/core@15.0.0', + ); + + assert.deepEqual(rawIdentity, { + rawPurl: 'pkg:npm/@Angular/Core@15.0.0', + canonicalComponentPurl: 'pkg:npm/%40angular/core', + canonicalVersionPurl: 'pkg:npm/%40angular/core@15.0.0', + legacyComponentPurl: 'pkg:npm/@Angular/Core', + }); + assert.deepEqual(encodedIdentity, { + rawPurl: 'pkg:npm/%40angular/core@15.0.0', + canonicalComponentPurl: 'pkg:npm/%40angular/core', + canonicalVersionPurl: 'pkg:npm/%40angular/core@15.0.0', + }); + assert.equal( + rawIdentity.canonicalComponentPurl, + encodedIdentity.canonicalComponentPurl, + ); + assert.equal( + rawIdentity.canonicalVersionPurl, + encodedIdentity.canonicalVersionPurl, + ); + }); + + test('does not apply extra pypi normalization beyond packageurl-js canonical output', () => { + assert.deepEqual(createPurlIdentity('pkg:pypi/my-package@1.0.0'), { + rawPurl: 'pkg:pypi/my-package@1.0.0', + canonicalComponentPurl: 'pkg:pypi/my-package', + canonicalVersionPurl: 'pkg:pypi/my-package@1.0.0', + }); + }); + + test('does not apply extra pub normalization beyond packageurl-js canonical output', () => { + assert.deepEqual(createPurlIdentity('pkg:pub/my_package@1.0.0'), { + rawPurl: 'pkg:pub/my_package@1.0.0', + canonicalComponentPurl: 'pkg:pub/my_package', + canonicalVersionPurl: 'pkg:pub/my_package@1.0.0', + }); + }); +}); + +describe('canonicalizeVersionFilter', () => { + test('returns undefined for an undefined filter', () => { + assert.equal(canonicalizeVersionFilter(undefined), undefined); + }); + + test('canonicalizes context and result purls with exact output', () => { + assert.deepEqual( + canonicalizeVersionFilter({ + contextPurls: [ + 'pkg:nuget/Serilog.Sinks.Console@2.1.0', + 'pkg:go/github.com/foo/bar@v1.0.0', + ], + resultPurls: [ + 'pkg:composer/Foo/Bar@1.0.0', + 'pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0', + ], + }), + { + contextPurls: [ + 'pkg:nuget/serilog.sinks.console@2.1.0', + 'pkg:golang/github.com/foo/bar@v1.0.0', + ], + resultPurls: [ + 'pkg:composer/foo/bar@1.0.0', + 'pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0', + ], + }, + ); + }); + + test('preserves additional filter fields while canonicalizing purl arrays', () => { + assert.deepEqual( + canonicalizeVersionFilter({ + contextPurls: ['pkg:cargo/MyLib@0.1.0'], + source: 'reconcile', + }), + { contextPurls: ['pkg:cargo/mylib@0.1.0'], source: 'reconcile' }, + ); + }); +}); + +describe('normalizePurlIdentity', () => { + // --- Preserve-version canonical output matrix: exact expected strings --- + + describe('preserve-version type alias translation', () => { test('go maps to golang — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:go/github.com/foo/bar@v1.0.0'), + normalizePurlIdentity('pkg:go/github.com/foo/bar@v1.0.0'), 'pkg:golang/github.com/foo/bar@v1.0.0', ); }); test('rubygems maps to gem — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:rubygems/rails@7.0.0'), + normalizePurlIdentity('pkg:rubygems/rails@7.0.0'), 'pkg:gem/rails@7.0.0', ); }); test('golang is not re-aliased — idempotent', () => { assert.equal( - canonicalizePurl('pkg:golang/github.com/foo/bar@v1.0.0'), + normalizePurlIdentity('pkg:golang/github.com/foo/bar@v1.0.0'), 'pkg:golang/github.com/foo/bar@v1.0.0', ); }); test('gem is not re-aliased — idempotent', () => { assert.equal( - canonicalizePurl('pkg:gem/rails@7.0.0'), + normalizePurlIdentity('pkg:gem/rails@7.0.0'), 'pkg:gem/rails@7.0.0', ); }); test('npm type is not aliased — passes through unchanged', () => { assert.equal( - canonicalizePurl('pkg:npm/lodash@4.17.21'), + normalizePurlIdentity('pkg:npm/lodash@4.17.21'), 'pkg:npm/lodash@4.17.21', ); }); }); - // --- Case matrix: lowercased types --- - - describe('ecosystem case normalization — lowercased allowlist', () => { + describe('preserve-version ecosystem case normalization', () => { test('nuget name is lowercased — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:nuget/Serilog.Sinks.Console@2.1.0'), + normalizePurlIdentity('pkg:nuget/Serilog.Sinks.Console@2.1.0'), 'pkg:nuget/serilog.sinks.console@2.1.0', ); }); test('composer namespace and name are lowercased — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:composer/Foo/Bar@1.0.0'), + normalizePurlIdentity('pkg:composer/Foo/Bar@1.0.0'), 'pkg:composer/foo/bar@1.0.0', ); }); test('cargo name is lowercased — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:cargo/MyLib@0.1.0'), + normalizePurlIdentity('pkg:cargo/MyLib@0.1.0'), 'pkg:cargo/mylib@0.1.0', ); }); test('npm percent-encoded scoped name is lowercased — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:npm/%40Angular/Core@15.0.0'), + normalizePurlIdentity('pkg:npm/%40Angular/Core@15.0.0'), 'pkg:npm/%40angular/core@15.0.0', ); }); test('npm raw scoped name is percent-encoded and lowercased — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:npm/@angular/core@15.0.0'), + normalizePurlIdentity('pkg:npm/@angular/core@15.0.0'), 'pkg:npm/%40angular/core@15.0.0', ); }); }); - // --- Case matrix: case-preserved types --- - - describe('ecosystem case normalization — case-sensitive types preserved', () => { + describe('preserve-version case-sensitive types preserved', () => { test('maven groupId and artifactId are byte-unchanged — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0'), + normalizePurlIdentity( + 'pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0', + ), 'pkg:maven/org.Apache.Commons/commons-Lang3@3.12.0', ); }); test('golang namespace path is byte-unchanged — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:golang/github.com/BurntSushi/toml@v0.4.1'), + normalizePurlIdentity('pkg:golang/github.com/BurntSushi/toml@v0.4.1'), 'pkg:golang/github.com/BurntSushi/toml@v0.4.1', ); }); test('multi-segment golang path is byte-unchanged — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:golang/github.com/Foo/Bar/Baz@v1.0.0'), + normalizePurlIdentity('pkg:golang/github.com/Foo/Bar/Baz@v1.0.0'), 'pkg:golang/github.com/Foo/Bar/Baz@v1.0.0', ); }); test('rubygems alias applies but gem name is byte-unchanged — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:rubygems/ActiveSupport@7.0.0'), + normalizePurlIdentity('pkg:rubygems/ActiveSupport@7.0.0'), 'pkg:gem/ActiveSupport@7.0.0', ); }); }); - // --- Version value preservation --- - - describe('version value preservation', () => { + describe('preserve-version version value handling', () => { test('version with mixed case is not case-folded — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:nuget/serilog@2.0.0-Beta3'), + normalizePurlIdentity('pkg:nuget/serilog@2.0.0-Beta3'), 'pkg:nuget/serilog@2.0.0-Beta3', ); }); test('version with +build metadata preserves value — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:maven/g/a@1.0.0+build5'), + normalizePurlIdentity('pkg:maven/g/a@1.0.0+build5'), 'pkg:maven/g/a@1.0.0+build5', ); }); test('versionless PURL is accepted — exact canonical output', () => { - assert.equal(canonicalizePurl('pkg:nuget/Serilog'), 'pkg:nuget/serilog'); + assert.equal( + normalizePurlIdentity('pkg:nuget/Serilog'), + 'pkg:nuget/serilog', + ); }); - }); - // --- Qualifier and subpath parsing edges --- + test('go bare-major version is preserved without invoking type validators', () => { + assert.equal( + normalizePurlIdentity('pkg:go/github.com/foo/bar@v1'), + 'pkg:golang/github.com/foo/bar@v1', + ); + }); + }); - describe('qualifier and subpath parsing', () => { - test('@ inside a qualifier value is not treated as the version separator — version is 1.0.0, qualifier canonically re-encoded', () => { - // The version is isolated as "1.0.0"; the @ in git@github.com belongs to the - // qualifier value, which the serializer re-encodes: git@github.com → git%40github.com%2Fx%2Fy.git + describe('preserve-version qualifier and subpath parsing', () => { + test('@ inside a qualifier value is not treated as the version separator', () => { assert.equal( - canonicalizePurl('pkg:npm/foo@1.0.0?vcs_url=git@github.com/x/y.git'), + normalizePurlIdentity( + 'pkg:npm/foo@1.0.0?vcs_url=git@github.com/x/y.git', + ), 'pkg:npm/foo@1.0.0?vcs_url=git%40github.com%2Fx%2Fy.git', ); }); - test('@ inside qualifier value is idempotent — canonicalize twice returns equal', () => { + test('@ inside qualifier value is idempotent — normalize twice returns equal', () => { const input = 'pkg:npm/foo@1.0.0?vcs_url=git@github.com/x/y.git'; - const once = canonicalizePurl(input); - assert.equal(canonicalizePurl(once), once); + const once = normalizePurlIdentity(input); + assert.equal(normalizePurlIdentity(once), once); }); test('# in subpath is parsed correctly — exact canonical output', () => { - // The subpath @ is re-encoded by the serializer: path@x → path%40x assert.equal( - canonicalizePurl('pkg:npm/foo@1.0.0#path@x'), + normalizePurlIdentity('pkg:npm/foo@1.0.0#path@x'), 'pkg:npm/foo@1.0.0#path%40x', ); }); - test('# in subpath is idempotent — canonicalize twice returns equal', () => { + test('# in subpath is idempotent — normalize twice returns equal', () => { const input = 'pkg:npm/foo@1.0.0#path@x'; - const once = canonicalizePurl(input); - assert.equal(canonicalizePurl(once), once); + const once = normalizePurlIdentity(input); + assert.equal(normalizePurlIdentity(once), once); }); }); - // --- pypi: not double-applied --- + describe('omit-version canonical output', () => { + test('removes only version from Swift while keeping namespace and name', () => { + assert.equal( + normalizePurlIdentity( + 'pkg:swift/github.com/apple/swift-argument-parser@1.2.3', + { version: 'omit' }, + ), + 'pkg:swift/github.com/apple/swift-argument-parser', + ); + }); - describe('pypi passes through case normalization unchanged', () => { - test('pypi name is byte-identical to input — exact canonical output', () => { + test('preserves Conan qualifiers when version is omitted', () => { assert.equal( - canonicalizePurl('pkg:pypi/requests@2.28.0'), - 'pkg:pypi/requests@2.28.0', + normalizePurlIdentity( + 'pkg:conan/conan-center/openssl@1.1.1?user=conan&channel=stable', + { version: 'omit' }, + ), + 'pkg:conan/conan-center/openssl?channel=stable&user=conan', + ); + }); + + test('preserves Debian qualifiers when version is omitted', () => { + assert.equal( + normalizePurlIdentity( + 'pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie', + { version: 'omit' }, + ), + 'pkg:deb/debian/curl?arch=i386&distro=jessie', ); }); - }); - // --- Idempotency --- + test('preserves npm subpath and qualifiers when version is omitted', () => { + assert.equal( + normalizePurlIdentity( + 'pkg:npm/%40scope/pkg@1.0.0?repository_url=https://example.com/repo#/dist/file.js', + { version: 'omit' }, + ), + 'pkg:npm/%40scope/pkg?repository_url=https%3A%2F%2Fexample.com%2Frepo#dist/file.js', + ); + }); + }); - describe('idempotency — canonicalize(canonicalize(x)) === canonicalize(x)', () => { + describe('idempotency', () => { const idempotentCases: Array<[string, string]> = [ ['pkg:nuget/Serilog.Sinks.Console@2.1.0', 'nuget mixed case'], ['pkg:composer/Foo/Bar@1.0.0', 'composer mixed case'], @@ -279,139 +582,74 @@ describe('canonicalizePurl', () => { ]; for (const [input, label] of idempotentCases) { - test(`idempotent for ${label}`, () => { - const once = canonicalizePurl(input); - assert.equal(canonicalizePurl(once), once); + test(`preserve-version idempotent for ${label}`, () => { + const once = normalizePurlIdentity(input); + assert.equal(normalizePurlIdentity(once), once); }); } }); - // --- Unparseable passthrough --- - describe('unparseable PURL passthrough — returns input unchanged, never throws', () => { test('string not starting with pkg: is returned unchanged', () => { - assert.equal(canonicalizePurl('not-a-purl'), 'not-a-purl'); + assert.equal(normalizePurlIdentity('not-a-purl'), 'not-a-purl'); }); test('malformed pkg: input is returned unchanged', () => { - assert.equal(canonicalizePurl('pkg:'), 'pkg:'); + assert.equal(normalizePurlIdentity('pkg:'), 'pkg:'); }); test('no exception for non-PURL input', () => { - assert.doesNotThrow(() => canonicalizePurl('not-a-purl')); + assert.doesNotThrow(() => normalizePurlIdentity('not-a-purl')); }); test('no exception for malformed pkg: input', () => { - assert.doesNotThrow(() => canonicalizePurl('pkg:')); - }); - - // A bare-major Go version (@v1, not full semver) is not serializable by the - // pinned packageurl-js, so the input passes through unchanged and is not aliased. - test('go-typed PURL with a bare-major version (@v1) does not throw', () => { - assert.doesNotThrow(() => - canonicalizePurl('pkg:go/github.com/foo/bar@v1'), - ); - }); - - test('go-typed PURL with a bare-major version (@v1) returns input unchanged', () => { - assert.equal( - canonicalizePurl('pkg:go/github.com/foo/bar@v1'), - 'pkg:go/github.com/foo/bar@v1', - ); + assert.doesNotThrow(() => normalizePurlIdentity('pkg:')); }); }); - // --- Encoded separator inside a segment is preserved --- - describe('percent-encoded slash inside a segment is preserved', () => { test('nuget %2F inside name stays encoded and uppercase-hex — exact canonical output', () => { assert.equal( - canonicalizePurl('pkg:nuget/Foo%2FBar@1.0.0'), + normalizePurlIdentity('pkg:nuget/Foo%2FBar@1.0.0'), 'pkg:nuget/foo%2Fbar@1.0.0', ); }); }); - // --- onUncanonicalized observability hook --- - // The hook fires ONLY in the serialize-failure branch (parsed OK, reconstruct threw). - // It MUST NOT fire on parse failures or on success. The function never throws even - // when the callback itself throws. - describe('onUncanonicalized observability hook', () => { - // A golang bare-major version (@v1) parses OK but fails to serialize under - // the packageurl-js golang validator — the canonical serialize-failure scenario. - const serializeFailurePurl = 'pkg:go/github.com/foo/bar@v1'; - - test('hook fires exactly once on serialize failure, receives input purl and an error', () => { - let callCount = 0; - let receivedPurl: string | undefined; - let receivedError: unknown; - - canonicalizePurl(serializeFailurePurl, (purl, error) => { - callCount++; - receivedPurl = purl; - receivedError = error; - }); - - assert.equal(callCount, 1); - assert.equal(receivedPurl, serializeFailurePurl); - assert.ok(receivedError instanceof Error); - }); - - test('return value on serialize failure is the input unchanged', () => { - const result = canonicalizePurl(serializeFailurePurl, () => {}); - assert.equal(result, serializeFailurePurl); - }); - test('hook does NOT fire on parse failure — non-PURL input', () => { let callCount = 0; - canonicalizePurl('not-a-purl', () => { - callCount++; - }); - assert.equal(callCount, 0); - }); - - test('hook does NOT fire on parse failure — malformed pkg: input', () => { - let callCount = 0; - canonicalizePurl('pkg:', () => { - callCount++; + normalizePurlIdentity('not-a-purl', { + version: 'preserve', + onUncanonicalized: () => { + callCount++; + }, }); assert.equal(callCount, 0); }); test('hook does NOT fire on success — nuget canonical output', () => { let callCount = 0; - const result = canonicalizePurl('pkg:nuget/Serilog@1.0.0', () => { - callCount++; + const result = normalizePurlIdentity('pkg:nuget/Serilog@1.0.0', { + version: 'preserve', + onUncanonicalized: () => { + callCount++; + }, }); assert.equal(callCount, 0); assert.equal(result, 'pkg:nuget/serilog@1.0.0'); }); - test('backward compatible — no callback, serialize failure, does not throw', () => { - assert.doesNotThrow(() => canonicalizePurl(serializeFailurePurl)); - }); - - test('backward compatible — no callback, serialize failure, returns input unchanged', () => { - assert.equal( - canonicalizePurl(serializeFailurePurl), - serializeFailurePurl, - ); - }); - - test('a callback that itself throws does not propagate — function does not throw', () => { + test('a callback that itself throws never breaks the identity path', () => { assert.doesNotThrow(() => { - canonicalizePurl(serializeFailurePurl, () => { - throw new Error('boom'); + const result = normalizePurlIdentity('pkg:go/github.com/foo/bar@v1', { + version: 'preserve', + onUncanonicalized: () => { + throw new Error('boom'); + }, }); + assert.equal(result, 'pkg:golang/github.com/foo/bar@v1'); }); }); - - test('a callback that itself throws does not break identity — return value is input unchanged', () => { - const result = canonicalizePurl(serializeFailurePurl, () => { - throw new Error('boom'); - }); - assert.equal(result, serializeFailurePurl); - }); }); }); diff --git a/src/eol/utils.ts b/src/eol/utils.ts index 40f7777..21e43be 100644 --- a/src/eol/utils.ts +++ b/src/eol/utils.ts @@ -16,72 +16,227 @@ const PURL_TYPE_ALIASES: Record = { // golang, maven, gem, pypi, pub, and all others are EXCLUDED — their paths are case-sensitive. const CASE_INSENSITIVE_TYPES = new Set(['nuget', 'composer', 'cargo', 'npm']); +export type PurlIdentity = { + rawPurl: string; + canonicalComponentPurl: string; + canonicalVersionPurl?: string; + legacyComponentPurl?: string; +}; + +export type VersionedPurlFilter = { + contextPurls: string[]; + resultPurls?: string[]; +}; + +export type NormalizePurlIdentityVersionMode = 'preserve' | 'omit'; + +export type NormalizePurlIdentityOptions = { + version: NormalizePurlIdentityVersionMode; + onUncanonicalized?: ((purl: string, error: unknown) => void) | undefined; +}; + +type ParsedPurlIdentity = { + type: string; + namespace: string | undefined; + name: string; + version: string | undefined; + qualifiers: Record | undefined; + subpath: string | undefined; +}; + +function notifyUncanonicalized( + purl: string, + error: unknown, + onUncanonicalized?: (purl: string, error: unknown) => void, +): void { + try { + onUncanonicalized?.(purl, error); + } catch { + // A misbehaving callback must not break the identity path. + } +} + +function normalizeParsedPurlIdentity(purl: string): ParsedPurlIdentity | null { + const [rawType, rawNamespace, rawName, rawVersion, rawQualifiers, rawSubpath] = + PackageURL.parseString(purl); + + const type = PackageURL.Component.type.normalize(rawType); + const name = PackageURL.Component.name.normalize(rawName); + + if (!type || !name) { + return null; + } + + return { + type: PURL_TYPE_ALIASES[type] ?? type, + namespace: + PackageURL.Component.namespace.normalize(rawNamespace) ?? undefined, + name, + version: PackageURL.Component.version.normalize(rawVersion) ?? undefined, + qualifiers: + (PackageURL.Component.qualifiers.normalize(rawQualifiers) as + | Record + | undefined) ?? undefined, + subpath: PackageURL.Component.subpath.normalize(rawSubpath) ?? undefined, + }; +} + +function applyCaseNormalization(parts: ParsedPurlIdentity): ParsedPurlIdentity { + if (!CASE_INSENSITIVE_TYPES.has(parts.type)) { + return parts; + } + + return { + ...parts, + namespace: parts.namespace?.toLowerCase(), + name: parts.name.toLowerCase(), + }; +} + +function serializePurlIdentityParts(parts: ParsedPurlIdentity): string { + let purl = `pkg:${PackageURL.Component.type.encode(parts.type)}/`; + + if (parts.namespace) { + purl += `${PackageURL.Component.namespace.encode(parts.namespace)}/`; + } + + purl += PackageURL.Component.name.encode(parts.name); + + if (parts.version) { + purl += `@${PackageURL.Component.version.encode(parts.version)}`; + } + + if (parts.qualifiers) { + purl += `?${PackageURL.Component.qualifiers.encode(parts.qualifiers)}`; + } + + if (parts.subpath) { + purl += `#${PackageURL.Component.subpath.encode(parts.subpath)}`; + } + + return purl; +} + /** - * Normalizes a PURL string to its canonical identity form. - * - * Applies, in order: - * 1. Parse with PackageURL.fromString — the library handles version isolation, - * qualifier/subpath separation, and percent-encoding correctly. - * 2. Type-alias translation (go → golang, rubygems → gem). - * 3. Ecosystem-aware case normalization (namespace + name lowercased for - * nuget, composer, cargo, npm only). - * 4. Reconstruct with new PackageURL(...).toString() — the library serializes - * qualifiers and subpath with canonical percent-encoding. + * Normalizes a PURL string to this package's canonical identity form. * - * Version value is preserved; reserved-character encoding may be canonicalized. - * Qualifiers and subpath are canonically re-encoded by the serializer - * (deterministic and idempotent). + * The implementation parses with packageurl-js and uses its component-level + * normalizers/encoders, but intentionally avoids PackageURL construction for + * identity serialization. That keeps versionless component identities portable + * for ecosystems whose type validators require a version, such as Swift. * * Returns the input unchanged for unparseable strings; never throws. - * - * The optional `onUncanonicalized` callback is invoked when the PURL parses - * successfully but cannot be re-serialized into canonical form (e.g. the - * packageurl-js golang validator limitation on bare-major versions like `@v1`). - * It is NOT invoked on parse failures (non-PURL / malformed input — those - * are silent passthroughs) and NOT invoked on success. A callback that - * itself throws is silently ignored so it cannot break the identity path. */ -export function canonicalizePurl( +export function normalizePurlIdentity( purl: string, - onUncanonicalized?: (purl: string, error: unknown) => void, + options: NormalizePurlIdentityOptions = { version: 'preserve' }, ): string { - let parsed: PackageURL; + let parts: ParsedPurlIdentity | null; try { - parsed = PackageURL.fromString(purl); + parts = normalizeParsedPurlIdentity(purl); } catch { // Input is not a parseable PURL — return it unchanged. This is expected for // non-PURL input and is intentionally not reported via onUncanonicalized. return purl; } + if (!parts) { + return purl; + } + try { - const type = PURL_TYPE_ALIASES[parsed.type] ?? parsed.type; - let namespace = parsed.namespace; - let name = parsed.name; - if (CASE_INSENSITIVE_TYPES.has(type)) { - namespace = namespace ? namespace.toLowerCase() : namespace; - name = name.toLowerCase(); - } - return new PackageURL( - type, - namespace ?? undefined, - name, - parsed.version ?? undefined, - parsed.qualifiers ?? undefined, - parsed.subpath ?? undefined, - ).toString(); + const normalized = applyCaseNormalization({ + ...parts, + version: options.version === 'omit' ? undefined : parts.version, + }); + + return serializePurlIdentityParts(normalized); } catch (error) { - // The PURL parsed but could not be re-serialized into canonical form. - // Return it unchanged and notify the caller so the anomaly is observable. - try { - onUncanonicalized?.(purl, error); - } catch { - // A misbehaving callback must not break the identity path. - } + // The PURL parsed but could not be normalized or serialized into canonical + // identity form. Return it unchanged and notify the caller so the anomaly + // is observable. + notifyUncanonicalized(purl, error, options.onUncanonicalized); return purl; } } +function rawComponentPurlFromParsed( + purl: string, + parsed: ParsedPurlIdentity, +): string { + const identityEnd = purl.search(/[?#]/); + const identityPart = identityEnd === -1 ? purl : purl.slice(0, identityEnd); + const identityTail = identityEnd === -1 ? '' : purl.slice(identityEnd); + + if (!parsed.version) { + return purl; + } + + const versionSeparator = identityPart.lastIndexOf('@'); + return versionSeparator === -1 + ? purl + : `${identityPart.slice(0, versionSeparator)}${identityTail}`; +} + +export function createPurlIdentity( + purl: string, + onUncanonicalized?: (purl: string, error: unknown) => void, +): PurlIdentity { + let parsed: ParsedPurlIdentity | null; + try { + parsed = normalizeParsedPurlIdentity(purl); + } catch { + return { rawPurl: purl, canonicalComponentPurl: purl }; + } + + if (!parsed) { + return { rawPurl: purl, canonicalComponentPurl: purl }; + } + + const rawComponentPurl = rawComponentPurlFromParsed(purl, parsed); + const canonicalComponentPurl = normalizePurlIdentity(purl, { + version: 'omit', + onUncanonicalized, + }); + const canonicalVersionPurl = parsed.version + ? normalizePurlIdentity(purl, { version: 'preserve', onUncanonicalized }) + : undefined; + + const identity: PurlIdentity = { rawPurl: purl, canonicalComponentPurl }; + + if (canonicalVersionPurl) { + identity.canonicalVersionPurl = canonicalVersionPurl; + } + + if (rawComponentPurl !== canonicalComponentPurl) { + identity.legacyComponentPurl = rawComponentPurl; + } + + return identity; +} + +export function canonicalizeVersionFilter< + TFilter extends { contextPurls: string[]; resultPurls?: string[] }, +>(filter?: TFilter): TFilter | undefined { + if (!filter) { + return undefined; + } + + return { + ...filter, + contextPurls: filter.contextPurls.map((purl) => + normalizePurlIdentity(purl, { version: 'preserve' }), + ), + ...(filter.resultPurls + ? { + resultPurls: filter.resultPurls.map((purl) => + normalizePurlIdentity(purl, { version: 'preserve' }), + ), + } + : {}), + } as TFilter; +} + export function deriveComponentStatus( metadata: EolScanComponentMetadata | null, ): ComponentStatus { diff --git a/src/index.ts b/src/index.ts index f609715..a7f0557 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,15 @@ export { spdxToCdxBom } from './spdx-to-cdx.js'; export { deriveComponentStatus, extractPurlsFromCdxBom, - canonicalizePurl, + normalizePurlIdentity, + createPurlIdentity, + canonicalizeVersionFilter, +} from './eol/utils.js'; +export type { + NormalizePurlIdentityOptions, + NormalizePurlIdentityVersionMode, + PurlIdentity, + VersionedPurlFilter, } from './eol/utils.js'; export type * from './types/eol-scan.js'; From e6d74daf2c251954f14b5d7673e6dbeaecf0daf6 Mon Sep 17 00:00:00 2001 From: Facundo Rodriguez Date: Thu, 25 Jun 2026 11:39:07 -0300 Subject: [PATCH 3/3] style(eol): format purl identity utils --- src/eol/utils.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/eol/utils.ts b/src/eol/utils.ts index 21e43be..aedd571 100644 --- a/src/eol/utils.ts +++ b/src/eol/utils.ts @@ -57,8 +57,14 @@ function notifyUncanonicalized( } function normalizeParsedPurlIdentity(purl: string): ParsedPurlIdentity | null { - const [rawType, rawNamespace, rawName, rawVersion, rawQualifiers, rawSubpath] = - PackageURL.parseString(purl); + const [ + rawType, + rawNamespace, + rawName, + rawVersion, + rawQualifiers, + rawSubpath, + ] = PackageURL.parseString(purl); const type = PackageURL.Component.type.normalize(rawType); const name = PackageURL.Component.name.normalize(rawName);