Skip to content

Commit

Permalink
Merge pull request #1 from gardenappl/expand-more
Browse files Browse the repository at this point in the history
Clean up license names + add more licenses and disambiguations
  • Loading branch information
psvenk authored Nov 15, 2021
2 parents eb23924 + 5a80473 commit 39b1041
Show file tree
Hide file tree
Showing 2 changed files with 250 additions and 165 deletions.
293 changes: 139 additions & 154 deletions vrms_arch/license_finder.py
Original file line number Diff line number Diff line change
@@ -1,215 +1,188 @@
import pyalpm
import re
import sys

AMBIGUOUS_LICENSES = [
def clean_license_name(license):
license = license.lower()
license = re.sub('(?:^custom:|[,\s_"-])', '', license)
return license

AMBIGUOUS_LICENSES = [clean_license_name(license) for license in [
"custom",
"other",
"unknown",
"CUSTOM",
# CCPL (Creative Commons) should be specified with one of the
# sublicenses (one of /usr/share/licenses/common/CCPL/*) , some of
# which are non-free
"CCPL", # ['claws-mail-themes', '0ad', '0ad-data', 'archlinux-lxdm-theme', 'mari0', 'performous-freesongs']
"CCPL:cc-by-sa-3.0",
]
]]

FREE_LICENSES = [
FREE_LICENSES = [clean_license_name(license) for license in [
'AFL-3.0',
'AGPL',
'AGPL3',
'APACHE',
'Apache',
'Apache 2.0',
'Apache 2.0 with LLVM Exception',
'Apache 2.0 with LLVM Execption',
'Apache License (2.0)',
'apache',
'Apache2',
'Arphic Public License',
'Artistic',
'Artistic2.0',
'Artistic 2.0',
'Beerware',
'bitstream-vera',
'Boost',
'boost',
'BSD',
'bsd',
'BSD2',
'BSD-2',
'BSD-2-clause',
'BSD3',
'BSD-3-clause',
'BSD-like',
'BSD-style',
'BSL',
'bzip2',
'CC0',
'CC BY-SA 4.0',
'CC-BY-SA 4.0',
'CC-BY-SA',
'CC-BY-SA-2.5',
'CC-BY-SA-3.0',
'CC BY-SA-4.0',
'CCPL:by-sa',
'CCPL:cc-by-sa',
'CDDL',
'CeCILL',
'CPL',
'Creative Commons, Attribution 3.0 Unported',
'dumb',
'EDL',
'EPL',
'EPL/1.1',
'etpan',
'ex',
'Expat',
'FDL',
'FDL1.2',
'FFSL',
'FIPL',
'font embedding exception',
'GD',
'GFL',
'GPL',
'GPL-2'
'GPL-2.0',
'GPL-2.0+',
'GPL-3',
'GPL-3.0',
'GPL2',
'GPL-2.0+',
'GPL-2.0',
'GPL3',
'GPL3-only',
'GPL-3.0',
'GPL3+GPLv2',
'GPL3-only',
'GPL3 or any later version',
'GPL/BSD',
'GPL+FE',
'GPLv2',
'GPLv3',
'GPL3 or any later version',
'HPND',
'IBM Public Licence',
'icu',
'ImageMagick',
'Info-ZIP',
'INN',
'ISC',
'isc-dhcp',
'JasPer2.0',
'Khronos',
'LGPL',
'LGPL2',
'LGPL2.1',
'LGPL2.1+',
'LGPL3',
'LGPLv3+',
'libpng',
'libtiff',
'libxcomposite',
'LPPL',
'lsof',
'MirOS',
'MIT',
'MIT/X',
'MITX11',
'MIT-style',
'Modified BSD',
'MPL',
'MPL2',
'Modified BSD',
'MPLv2',
'NCSA',
'neovim',
'nfsidmap',
'NoCopyright',
'none',
'OASIS',
'OFL',
'OFL-1.1',
'PHP',
'PSF',
'OPEN DATA LICENSE',
'OpenLDAP',
'OpenMPI',
'OSGPL',
'perl',
'PerlArtistic',
'PerlArtistic2',
'PHP',
'pil',
'PostgreSQL',
'PSF',
'Public Domain',
'Python',
'RUBY',
'Qhull',
'QPL',
'QPL-1.0',
'qwt',
'Ruby',
'scite',
'scowl',
'Sendmail',
'Sendmail open source license',
'SGI',
'SIL',
'SIL OPEN FONT LICENSE Version 1.1',
'SIL Open Font License',
'SIL Open Font License 1.1 and Bitstream Vera License',
'SIL Open Font License, Version 1.0',
'SIL OPEN FONT LICENSE Version 1.1',
'sip',
'Sleepycat',
'tcl',
'TekHVC',
'TRADEMARKS',
'Ubuntu Font Licence 1.0',
'UCD',
'Unicode-DFS',
'University of Illinois/NCSA Open Source License',
'Unlicense',
'usermin',
'vim',
'voidspace',
'W3C',
'w3m',
'webmin',
'WTF',
'WTFPL',
'wxWindows',
'X11',
'ZLIB',
'X11-DEC',
'XFREE86',
'Xiph',
'zlib',
'zlib/libpng',
'ZPL',
'custom: Arphic Public_License',
'custom: BSD',
'custom: ISC',
'custom: MIT',
'custom: OFL',
'custom: SIL Open Font License',
'custom: QPL-1.0',
'custom: public domain',
'custom:"IBM Public Licence"',
'custom:"font embedding exception"',
'custom:"icu"',
'custom:"pil"',
'custom:"sip"',
'custom:Arphic Public License',
'custom:Arphic_Public_License',
'custom:Artistic',
'custom:Artistic 2.0',
'custom:Artistic-2.0',
'custom:Apache 2.0 with LLVM Exception',
'custom:Apache 2.0 with LLVM Execption',
'custom:BSD',
'custom:BSD-like',
'custom:BSD-style',
'custom:BSD2',
'custom:BSD3',
'custom:Boost',
'custom:CC0',
'custom:CCBYSA',
'custom:CCBYSA3.0',
'custom:CCPL:by-sa',
'custom:CeCILL',
'custom:Creative Commons, Attribution 3.0 Unported',
'custom:EPL',
'custom:Expat',
'custom:FFSL',
'custom:FIPL',
'custom:GD',
'custom:GFL',
'custom:GPL',
'custom:GPL/BSD',
'custom:GPL+FE',
'custom:HPND',
'custom:INN',
'custom:Info-ZIP',
'custom:ISC',
'custom:JasPer2.0',
'custom:Khronos',
'custom:LGPL',
'custom:LGPL2',
'custom:MIT',
'custom:MIT/X',
'custom:MITX11',
'custom:MPL2',
'custom:MPLv2',
'custom:MirOS',
'custom:NoCopyright',
'custom:OASIS',
'custom:OFL',
'custom:OPEN DATA LICENSE',
'custom:OpenLDAP',
'custom:OpenMPI',
'custom:OSGPL',
'custom:PYTHON',
'custom:PostgreSQL',
'custom:PSF',
'custom:Public Domain',
'custom:Public_Domain',
'custom:PublicDomain',
'custom:QPL',
'custom:Qhull',
'custom:SGI',
'custom:SIL',
'custom:SIL Open Font License, Version 1.0',
'custom:Sendmail',
'custom:Sendmail open source license',
'custom:Sleepycat',
'custom:TekHVC',
'custom:TRADEMARKS',
'custom:Ubuntu Font Licence 1.0',
'custom:University of Illinois/NCSA Open Source License',
'custom:Unlicense',
'custom:WTFPL',
'custom:X11',
'custom:X11-DEC',
'custom:XFREE86',
'custom:Xiph',
'custom:ZLIB',
'custom:artistic',
'custom:bitstream-vera',
'custom:bzip2',
'custom:cc-by-sa-2.5',
'custom:dumb',
'custom:etpan',
'custom:ex',
'custom:icu',
'custom:isc-dhcp',
'custom:libpng',
'custom:libtiff',
'custom:libxcomposite',
'custom:lsof',
'custom:nfsidmap',
'custom:neovim',
'custom:none',
'custom:public domain',
'custom:publicdomain',
'custom:qwt',
'custom:scite',
'custom:scowl',
'custom:tcl',
'custom:unknown',
'custom:unlicense',
'custom:usermin',
'custom:vim',
'custom:voidspace',
'custom:w3m',
'custom:webmin',
'custom:wxWindows',
'custom:zlib',
'custom:zlib/libpng',
]
]]

class LicenseFinder(object):
def __init__(self):
# all of the seen license names with counts
# all of the seen (clean) license names with counts
self.by_license = {}

# all of the seen (clean) license names with their raw variants
self.license_names = {}

# packages with "custom" license
self.unknown_packages = set()

Expand All @@ -222,19 +195,29 @@ def visit_db(self, db):
free_pkgs = []

for pkg in pkgs:
licenses = []

# get a list of all licenses on the box
for license in pkg.licenses:
# get a list of all licenses on the box
if license not in self.by_license:
self.by_license[license] = [pkg]
clean_license = clean_license_name(license)
licenses.append(clean_license)

if clean_license not in self.by_license:
self.by_license[clean_license] = [pkg]
else:
self.by_license[license].append(pkg)
self.by_license[clean_license].append(pkg)

if clean_license not in self.license_names:
self.license_names[clean_license] = {}
if license not in self.license_names[clean_license]:
self.license_names[clean_license][license] = 0
self.license_names[clean_license][license] += 1

free_licenses = list(filter(lambda x: x in FREE_LICENSES, pkg.licenses))
amb_licenses = list(filter(lambda x: x in AMBIGUOUS_LICENSES, pkg.licenses))
free_licenses = list(filter(lambda x: x in FREE_LICENSES, licenses))
amb_licenses = list(filter(lambda x: x in AMBIGUOUS_LICENSES, licenses))

if len(free_licenses) > 0:
free_pkgs.append(pkg)
continue
elif len(amb_licenses) > 0:
self.unknown_packages.add(pkg)
else:
Expand All @@ -247,7 +230,9 @@ def list_all_licenses_as_python(self):
sorted_by_popularity.sort(key=lambda lic : len(self.by_license[lic]), reverse=True)
for lic in sorted_by_popularity:
pop = len(self.by_license[lic])
print(" \"%s\",%s" % (lic.replace("\"", "\\\""), " # %s" % [ p.name for p in self.by_license[lic] ] if pop < obscure_license_pop_cutoff else ""))
license_names = self.license_names[lic]
license_name = max(license_names, key=license_names.get)
print(" \"%s\",%s" % (license_name.replace("\"", "\\\""), " # %s" % [ p.name for p in self.by_license[lic] ] if pop < obscure_license_pop_cutoff else ""))

def list_all_licenses(self):
sorted_by_popularity = list(self.by_license.keys())
Expand Down
Loading

0 comments on commit 39b1041

Please sign in to comment.