Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle license variations, add more free licenses and disambiguate packages #24

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 147 additions & 119 deletions vrms_arch/license_finder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import pyalpm
import re
import sys

AMBIGUOUS_LICENSES = [
def clean_license_name(license):
license = license.lower()
license = re.sub('(?:^custom:|[,\s_"-])', '', license)
return license

AMBIGUOUS_LICENSES = [clean_license_name(license) for license in [
"custom",
"other",
"unknown",
Expand All @@ -10,163 +16,173 @@
# which are non-free
"CCPL", # ['claws-mail-themes', '0ad', '0ad-data', 'archlinux-lxdm-theme', 'mari0', 'performous-freesongs']
"CCPL:cc-by-sa-3.0",
]
]]

FREE_LICENSES = [
FREE_LICENSES = [clean_license_name(license) for license in [
'AFL-3.0',
'AGPL',
'AGPL3',
'APACHE',
'Apache',
'Apache 2.0',
'apache',
'Apache 2.0 with LLVM Exception',
'Apache 2.0 with LLVM Execption',
'Apache License (2.0)',
'Apache2',
'Arphic Public License',
'Artistic',
'Artistic2.0',
'boost',
'Artistic 2.0',
'Beerware',
'bitstream-vera',
'Boost',
'BSD',
'bsd',
'BSD2',
'BSD-2-clause',
'BSD3',
'CC-BY-SA 4.0',
'BSD-3-clause',
'BSD-like',
'BSD-style',
'BSL',
'bzip2',
'CC0',
'CC-BY-SA',
'CC-BY-SA-2.5',
'CC-BY-SA-3.0',
'CC BY-SA-4.0',
'CCPL:by-sa',
'CCPL:cc-by-sa',
'CDDL',
'CeCILL',
'CPL',
'CUSTOM',
'Creative Commons, Attribution 3.0 Unported',
'dumb',
'EDL',
'EPL',
'EPL/1.1',
'etpan',
'ex',
'Expat',
'FDL',
'FDL1.2',
'FFSL',
'FIPL',
'font embedding exception',
'GD',
'GFL',
'GPL',
'GPL-2'
'GPL-2.0',
'GPL-3',
'GPL-3.0',
'GPL2',
'GPL-2.0+',
'GPL-2.0',
'GPL3',
'GPL-3.0',
'GPL3+GPLv2',
'GPL3-only',
'GPL3 or any later version',
'GPL/BSD',
'GPL+FE',
'GPLv2',
'GPLv3',
'HPND',
'IBM Public Licence',
'icu',
'ImageMagick',
'Info-ZIP',
'INN',
'ISC',
'isc-dhcp',
'JasPer2.0',
'Khronos',
'LGPL',
'LGPL2',
'LGPL2.1',
'LGPL2.1+',
'LGPL3',
'LGPLv3+',
'libpng',
'libtiff',
'libxcomposite',
'LPPL',
'lsof',
'MirOS',
'MIT',
'MIT/X',
'MITX11',
'MIT-style',
'Modified BSD',
'MPL',
'MPL2',
'Modified BSD',
'MPLv2',
'NCSA',
'neovim',
'nfsidmap',
'NoCopyright',
'none',
'OASIS',
'OFL',
'OFL-1.1',
'PHP',
'PSF',
'OPEN DATA LICENSE',
'OpenLDAP',
'OpenMPI',
'OSGPL',
'perl',
'PerlArtistic',
'PerlArtistic2',
'PHP',
'pil',
'PostgreSQL',
'PSF',
'Public Domain',
'Python',
'RUBY',
'Qhull',
'QPL',
'QPL-1.0',
'qwt',
'Ruby',
'scite',
'scowl',
'Sendmail',
'Sendmail open source license',
'SGI',
'SIL',
'SIL Open Font License',
'SIL Open Font License 1.1 and Bitstream Vera License',
'SIL Open Font License, Version 1.0',
'SIL OPEN FONT LICENSE Version 1.1',
'sip',
'Sleepycat',
'tcl',
'TekHVC',
'TRADEMARKS',
'Ubuntu Font Licence 1.0',
'UCD',
'Unicode-DFS',
'University of Illinois/NCSA Open Source License',
'Unlicense',
'usermin',
'vim',
'voidspace',
'W3C',
'w3m',
'webmin',
'WTF',
'WTFPL',
'ZLIB',
'wxWindows',
'X11',
'X11-DEC',
'XFREE86',
'Xiph',
'zlib',
'zlib/libpng',
'ZPL',
'custom: Arphic Public_License',
'custom: BSD',
'custom: ISC',
'custom: MIT',
'custom: QPL-1.0',
'custom:"IBM Public Licence"',
'custom:"font embedding exception"',
'custom:"icu"',
'custom:"pil"',
'custom:"sip"',
'custom:Arphic Public License',
'custom:Arphic_Public_License',
'custom:Artistic',
'custom:Artistic 2.0',
'custom:Artistic-2.0',
'custom:BSD',
'custom:BSD-like',
'custom:BSD-style',
'custom:BSD3',
'custom:Boost',
'custom:CC0',
'custom:CCBYSA',
'custom:CCBYSA3.0',
'custom:CCPL:by-sa',
'custom:CeCILL',
'custom:Creative Commons, Attribution 3.0 Unported',
'custom:EPL',
'custom:Expat',
'custom:FFSL',
'custom:FIPL',
'custom:GPL',
'custom:GPL/BSD',
'custom:GPL+FE',
'custom:INN',
'custom:ISC',
'custom:JasPer2.0',
'custom:LGPL',
'custom:LGPL2',
'custom:MIT',
'custom:MIT/X',
'custom:MITX11',
'custom:MPL2',
'custom:MPLv2',
'custom:MirOS',
'custom:NoCopyright',
'custom:OASIS',
'custom:OFL',
'custom:OPEN DATA LICENSE',
'custom:OpenLDAP',
'custom:OSGPL',
'custom:PUEL',
'custom:PYTHON',
'custom:PostgreSQL',
'custom:PSF',
'custom:Public Domain',
'custom:Public_Domain',
'custom:PublicDomain',
'custom:QPL',
'custom:SIL',
'custom:Sendmail',
'custom:TRADEMARKS',
'custom:Ubuntu Font Licence 1.0',
'custom:University of Illinois/NCSA Open Source License',
'custom:WTFPL',
'custom:X11',
'custom:XFREE86',
'custom:Xiph',
'custom:ZLIB',
'custom:artistic',
'custom:cc-by-sa-2.5',
'custom:dumb',
'custom:etpan',
'custom:ex',
'custom:icu',
'custom:isc-dhcp',
'custom:nfsidmap',
'custom:none',
'custom:public domain',
'custom:publicdomain',
'custom:qwt',
'custom:scite',
'custom:scowl',
'custom:unknown',
'custom:usermin',
'custom:vim',
'custom:voidspace',
'custom:webmin',
'custom:wxWindows',
'custom:zlib',
'custom:zlib/libpng',
]
]]

class LicenseFinder(object):
def __init__(self):
# all of the seen license names with counts
# all of the seen (clean) license names with counts
self.by_license = {}

# all of the seen (clean) license names with their raw variants
self.license_names = {}

# packages with "custom" license
self.unknown_packages = set()

Expand All @@ -179,19 +195,29 @@ def visit_db(self, db):
free_pkgs = []

for pkg in pkgs:
licenses = []

# get a list of all licenses on the box
for license in pkg.licenses:
# get a list of all licenses on the box
if license not in self.by_license:
self.by_license[license] = [pkg]
clean_license = clean_license_name(license)
licenses.append(clean_license)

if clean_license not in self.by_license:
self.by_license[clean_license] = [pkg]
else:
self.by_license[license].append(pkg)
self.by_license[clean_license].append(pkg)

if clean_license not in self.license_names:
self.license_names[clean_license] = {}
if license not in self.license_names[clean_license]:
self.license_names[clean_license][license] = 0
self.license_names[clean_license][license] += 1

free_licenses = list(filter(lambda x: x in FREE_LICENSES, pkg.licenses))
amb_licenses = list(filter(lambda x: x in AMBIGUOUS_LICENSES, pkg.licenses))
free_licenses = list(filter(lambda x: x in FREE_LICENSES, licenses))
amb_licenses = list(filter(lambda x: x in AMBIGUOUS_LICENSES, licenses))

if len(free_licenses) > 0:
free_pkgs.append(pkg)
continue
elif len(amb_licenses) > 0:
self.unknown_packages.add(pkg)
else:
Expand All @@ -204,7 +230,9 @@ def list_all_licenses_as_python(self):
sorted_by_popularity.sort(key=lambda lic : len(self.by_license[lic]), reverse=True)
for lic in sorted_by_popularity:
pop = len(self.by_license[lic])
print(" \"%s\",%s" % (lic.replace("\"", "\\\""), " # %s" % [ p.name for p in self.by_license[lic] ] if pop < obscure_license_pop_cutoff else ""))
license_names = self.license_names[lic]
license_name = max(license_names, key=license_names.get)
print(" \"%s\",%s" % (license_name.replace("\"", "\\\""), " # %s" % [ p.name for p in self.by_license[lic] ] if pop < obscure_license_pop_cutoff else ""))

def list_all_licenses(self):
sorted_by_popularity = list(self.by_license.keys())
Expand Down
Loading