Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new summarizer for recent ScanCode versions #1056

Merged
merged 37 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
633a49e
Update debscr-license-expression fixture
lumaxis Feb 13, 2024
d760ab2
Add fixtures for new ScanCode version
lumaxis Feb 13, 2024
df88ded
Add initial new ScanCode processing logic
lumaxis Feb 13, 2024
4419d52
Conditionally call new ScanCode summarizer
lumaxis Feb 14, 2024
b6458f1
Update semver
lumaxis Feb 16, 2024
728ef6f
Update Summarizer test
lumaxis Feb 16, 2024
613a985
Use semver to compare scancodeVersion
lumaxis Feb 16, 2024
971483b
Move shared functions to utils
lumaxis Feb 21, 2024
80c2aa4
Ensure version checking code path is tested as well
lumaxis Feb 21, 2024
d4f6c89
Various small cleanups
lumaxis Feb 22, 2024
996473d
Update _getLicenseByFileName
lumaxis Feb 22, 2024
c9ce542
Update and extend tests
lumaxis Feb 22, 2024
f67a3af
Update tests and remove TODOs
lumaxis Apr 2, 2024
29e88f8
Rename variables for clarity
lumaxis Apr 4, 2024
2a20940
Updates for failing tests
lumaxis Apr 4, 2024
f9a61e4
Fix formatting issues
lumaxis Apr 5, 2024
8045dbb
Formatting updates
lumaxis Apr 5, 2024
18d4f8b
Unskip test
lumaxis Apr 5, 2024
3e24d62
Update logic to use license expressions from file detections
lumaxis Apr 5, 2024
1c510c1
Update getLicenseLocations to properly support debsrc
lumaxis Apr 5, 2024
9890edd
Use improved function to find debsrc root files
lumaxis Apr 5, 2024
6153d5a
Update test fixtures
lumaxis Apr 5, 2024
115cd53
Update license text detection and correct threshold
lumaxis Apr 17, 2024
15ed4d3
Add additional test case
lumaxis Apr 29, 2024
5e4f7e9
Use detected_license_expression_spdx consistently
lumaxis Apr 29, 2024
bd747ee
Update to latest ScanCode version
lumaxis May 13, 2024
6f2301f
Update summarizer test
lumaxis May 13, 2024
38b8b0e
Use existing SPDX expressions
lumaxis May 13, 2024
05e77a3
Rename functions
lumaxis May 13, 2024
cf45f89
Update test description
lumaxis May 14, 2024
b67935c
Minor codestyle improvement in test
lumaxis May 30, 2024
5f1d2ab
Merge branch 'master' into updates-new-scancode-version
lumaxis Jun 10, 2024
eb314d0
Restructure logic and add ScanCode delegator
lumaxis Jun 12, 2024
af03090
Update debian package location logic
lumaxis Jun 13, 2024
b5a0d38
Remove duplicated test
lumaxis Jun 14, 2024
202b854
Separate function to better replicate old logic
lumaxis Jun 24, 2024
dcf7963
Merge branch 'master' into updates-new-scancode-version
lumaxis Jun 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 66 additions & 6 deletions lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const { DateTime } = require('luxon')
const { set, unset, union, sortBy, trim, uniqBy } = require('lodash')
const extend = require('extend')
const SPDX = require('@clearlydefined/spdx')
const scancodeMap = require('./scancodeMap')
const coordinatesMapper = require('./coordinatesMapper')()

async function toResultCoordinatesFromRequest(request) {
Expand Down Expand Up @@ -330,16 +331,17 @@ function updateSourceLocation(spec) {
* Checks deeper than the root depending on coordinate type
*
* @param {string} filePath
* @param {EntityCoordinates} coordinates - optional to look deeper than the root based on coordinate type
* @param {EntityCoordinates} [coordinates] - optional to look deeper than the root based on coordinate type
* @param {object} [packages] - optional, to look at package directories
* @returns {boolean}
*/
function isLicenseFile(filePath, coordinates) {
function isLicenseFile(filePath, coordinates, packages) {
if (!filePath) return false
filePath = filePath.toLowerCase()
const basePath = filePath.split('/')[0]
if (_licenseFileNames.includes(basePath)) return true
if (!coordinates) return false
for (const prefix of getLicenseLocations(coordinates) || []) {
for (const prefix of getLicenseLocations(coordinates, packages) || []) {
const prefixLowered = prefix.toLowerCase()
if (_licenseFileNames.includes(filePath.replace(prefixLowered, ''))) return true
}
Expand All @@ -357,25 +359,81 @@ function isDeclaredLicense(identifier) {
return identifier && identifier !== 'NOASSERTION' && identifier !== 'NONE'
}

function getLicenseLocations(coordinates) {
function getLicenseLocations(coordinates, packages) {
const map = {
npm: ['package/'],
maven: ['META-INF/'],
pypi: [`${coordinates.name}-${coordinates.revision}/`],
go: [goLicenseLocations(coordinates)]
go: [goLicenseLocation(coordinates)],
debsrc: packages ? debsrcLicenseLocations(packages) : []
lumaxis marked this conversation as resolved.
Show resolved Hide resolved
}
map.sourcearchive = map.maven
return map[coordinates.type]
}

function goLicenseLocations(coordinates) {
function goLicenseLocation(coordinates) {
if (coordinates.namespace && coordinates.namespace.toLowerCase().includes('%2f')) {
return `${deCodeSlashes(coordinates.namespace)}/${coordinates.name}@${coordinates.revision}/`
} else {
return `${coordinates.namespace}/${coordinates.name}@${coordinates.revision}/`
}
}

function debsrcLicenseLocations(packages) {
const licenseLocations = []

// Split packages of `type: deb` and other packages
const [debPackages, otherPackages] = packages.reduce(
([debPackages, otherPackages], pkg) => {
if (pkg.type === 'deb') {
debPackages.push(pkg)
} else {
otherPackages.push(pkg)
}
return [debPackages, otherPackages]
},
[[], []]
)

// Add default location for debian packages
if (debPackages.length) {
licenseLocations.push('debian/')
}

// Add license locations based on package name and version for other packages
return licenseLocations.concat(
otherPackages.map(otherPackage =>
otherPackage.version ? `${otherPackage.name}-${otherPackage.version}/` : `${otherPackage.name}/`
)
)
}

function joinExpressions(expressions) {
if (!expressions) return null
const list = setToArray(expressions)
if (!list) return null
const joinedExpressionString = `(${list.join(') AND (')})`
return SPDX.normalize(joinedExpressionString)
}

function normalizeLicenseExpression(licenseExpression, logger) {
if (!licenseExpression) return null

const licenseVisitor = rawLicenseExpression => {
const mappedLicenseExpression = scancodeMap.get(rawLicenseExpression)
const licenseExpression = mappedLicenseExpression ? mappedLicenseExpression : rawLicenseExpression

return SPDX.normalizeSingle(licenseExpression)
}

const parsed = SPDX.parse(licenseExpression, licenseVisitor)
const result = SPDX.stringify(parsed)

if (result === 'NOASSERTION') logger.info(`ScanCode NOASSERTION from ${licenseExpression}`)

return result
}

function _normalizeVersion(version) {
if (version == '1') return '1.0.0' // version '1' is not semver valid see https://github.com/clearlydefined/crawler/issues/124
return semver.valid(version) ? version : null
Expand Down Expand Up @@ -552,6 +610,8 @@ module.exports = {
addArrayToSet,
extractLicenseFromLicenseUrl,
getLicenseLocations,
joinExpressions,
normalizeLicenseExpression,
mergeDefinitions,
buildSourceUrl,
deCodeSlashes,
Expand Down
Loading