Browse Source

scripts: west: spdx: extract copyright info

use REUSE to extract copyright text from source files and include in

SBOM documents
Signed-off-by: Benjamin Cabé <benjamin@zephyrproject.org>
pull/87630/head
Benjamin Cabé 4 weeks ago committed by Benjamin Cabé
parent
commit
8031f338dd
  1. 7
      doc/develop/west/zephyr-cmds.rst
  2. 1
      scripts/requirements-actions.in
  3. 38
      scripts/requirements-actions.txt
  4. 1
      scripts/requirements-base.txt
  5. 30
      scripts/west_commands/zspdx/scanner.py

7
doc/develop/west/zephyr-cmds.rst

@ -136,6 +136,13 @@ Each file in the bill-of-materials is scanned, so that its hashes (SHA256 and @@ -136,6 +136,13 @@ Each file in the bill-of-materials is scanned, so that its hashes (SHA256 and
SHA1) can be recorded, along with any detected licenses if an
``SPDX-License-Identifier`` comment appears in the file.
Copyright notices are extracted using the third-party :command:`reuse` tool from the REUSE group.
When found, these notices are added to SPDX documents as ``FileCopyrightText`` fields.
.. note::
Copyright extraction uses heuristics that may not capture complete notice text, so
``FileCopyrightText`` content is best-effort. This aligns with SPDX specification recommendations.
SPDX Relationships are created to indicate dependencies between
CMake build targets, build targets that are linked together, and
source files that are compiled to generate the built library files.

1
scripts/requirements-actions.in

@ -28,6 +28,7 @@ pytest @@ -28,6 +28,7 @@ pytest
python-magic-bin; sys_platform == "win32"
python-magic; sys_platform != "win32"
pyyaml
reuse
ruff==0.11.11
setuptools>=70.2.0
spdx-tools

38
scripts/requirements-actions.txt

@ -12,6 +12,10 @@ astroid==3.3.10 \ @@ -12,6 +12,10 @@ astroid==3.3.10 \
--hash=sha256:104fb9cb9b27ea95e847a94c003be03a9e039334a8ebca5ee27dafaf5c5711eb \
--hash=sha256:c332157953060c6deb9caa57303ae0d20b0fbdb2e59b4a4f2a6ba49d0a7961ce
# via pylint
attrs==25.3.0 \
--hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \
--hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b
# via reuse
awscli==1.40.38 \
--hash=sha256:3f90f2815e3b72691785d2bbd4f2c3dcbde311c55e1f5633a37cc3e514ba6fdb \
--hash=sha256:ec9b69ff4600ef151df3336f0e7af4cf30280d27f9d8e67e09c4b386ceb1f154
@ -20,10 +24,16 @@ beartype==0.21.0 \ @@ -20,10 +24,16 @@ beartype==0.21.0 \
--hash=sha256:b6a1bd56c72f31b0a496a36cc55df6e2f475db166ad07fa4acc7e74f4c7f34c0 \
--hash=sha256:f9a5078f5ce87261c2d22851d19b050b64f6a805439e8793aecf01ce660d3244
# via spdx-tools
binaryornot==0.4.4 \
--hash=sha256:359501dfc9d40632edc9fac890e19542db1a287bbcfa58175b66658392018061 \
--hash=sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4
# via reuse
boolean-py==5.0 \
--hash=sha256:60cbc4bad079753721d32649545505362c754e121570ada4658b852a3a318d95 \
--hash=sha256:ef28a70bd43115208441b53a045d1549e2f0ec6e3d08a9d142cbc41c1938e8d9
# via license-expression
# via
# license-expression
# reuse
botocore==1.38.39 \
--hash=sha256:2305f688e9328af473a504197584112f228513e06412038d83205ce8d1456f40 \
--hash=sha256:ee3aa03af1dabed4f3710cd64f6d9d488281eee720710bf1cf9f2b2fd30025ae
@ -118,7 +128,9 @@ cffi==1.17.1 \ @@ -118,7 +128,9 @@ cffi==1.17.1 \
chardet==5.2.0 \
--hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
--hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
# via tox
# via
# binaryornot
# tox
charset-normalizer==3.4.2 \
--hash=sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4 \
--hash=sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45 \
@ -212,7 +224,9 @@ charset-normalizer==3.4.2 \ @@ -212,7 +224,9 @@ charset-normalizer==3.4.2 \
--hash=sha256:fb707f3e15060adf5b7ada797624a6c6e0138e2a26baa089df64c68ee98e040f \
--hash=sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a \
--hash=sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f
# via requests
# via
# python-debian
# requests
clang-format==20.1.6 \
--hash=sha256:0ea008a20951527d35a1e2b8febdca3c47c6f8e9a1bd174601c891e20053ef2e \
--hash=sha256:11530ff352c64176ba4297ad398452d9fcd442b4a8bb2a804cc7915bc94b96e1 \
@ -236,6 +250,7 @@ click==8.1.3 \ @@ -236,6 +250,7 @@ click==8.1.3 \
--hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48
# via
# gitlint-core
# reuse
# spdx-tools
colorama==0.4.6 \
--hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
@ -452,6 +467,7 @@ jinja2==3.1.6 \ @@ -452,6 +467,7 @@ jinja2==3.1.6 \
# via
# gcovr
# junit2html
# reuse
jmespath==1.0.1 \
--hash=sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980 \
--hash=sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe
@ -466,7 +482,9 @@ junitparser==3.2.0 \ @@ -466,7 +482,9 @@ junitparser==3.2.0 \
license-expression==30.4.1 \
--hash=sha256:679646bc3261a17690494a3e1cada446e5ee342dbd87dcfa4a0c24cc5dce13ee \
--hash=sha256:9f02105f9e0fcecba6a85dfbbed7d94ea1c3a70cf23ddbfb5adf3438a6f6fce0
# via spdx-tools
# via
# reuse
# spdx-tools
lxml==5.4.0 \
--hash=sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5 \
--hash=sha256:073eb6dcdf1f587d9b88c8c93528b57eccda40209cf9be549d469b942b41d70b \
@ -906,6 +924,10 @@ python-dateutil==2.9.0.post0 \ @@ -906,6 +924,10 @@ python-dateutil==2.9.0.post0 \
# botocore
# elasticsearch
# pykwalify
python-debian==1.0.1 \
--hash=sha256:3ada9b83a3d671b58081782c0969cffa0102f6ce433fbbc7cf21275b8b5cc771 \
--hash=sha256:8f137c230c1d9279c2ac892b35915068b2aca090c9fd3da5671ff87af32af12c
# via reuse
python-magic==0.4.27 ; sys_platform != 'win32' \
--hash=sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b \
--hash=sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3
@ -1079,6 +1101,10 @@ requests==2.32.4 \ @@ -1079,6 +1101,10 @@ requests==2.32.4 \
--hash=sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c \
--hash=sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422
# via pygithub
reuse==5.0.2 \
--hash=sha256:7a680f00324e87a72061677a892d8cbabfddf7adcf7a5376aeeed2d78995bbbb \
--hash=sha256:878016ae5dd29c10bad4606d6676c12a268c12aa9fcfea66403598e16eed085c
# via -r requirements-actions.in
rsa==4.7.2 \
--hash=sha256:78f9a9bf4e7be0c5ded4583326e7461e3a3c5aae24073648b4bdfa797d78c9d2 \
--hash=sha256:9d689e6ca1b3038bc82bf8d23e944b6b6037bc02301a574935b2dd946e0353b9
@ -1227,7 +1253,9 @@ tomli==2.2.1 \ @@ -1227,7 +1253,9 @@ tomli==2.2.1 \
tomlkit==0.13.3 \
--hash=sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1 \
--hash=sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0
# via pylint
# via
# pylint
# reuse
tox==4.27.0 \
--hash=sha256:2b8a7fb986b82aa2c830c0615082a490d134e0626dbc9189986da46a313c4f20 \
--hash=sha256:b97d5ecc0c0d5755bcc5348387fef793e1bfa68eb33746412f4c60881d7f5f57

1
scripts/requirements-base.txt

@ -24,6 +24,7 @@ pyserial @@ -24,6 +24,7 @@ pyserial
requests>=2.32.0
semver
tqdm>=4.67.1
reuse
# for ram/rom reports
anytree

30
scripts/west_commands/zspdx/scanner.py

@ -6,6 +6,7 @@ import hashlib @@ -6,6 +6,7 @@ import hashlib
import os
import re
from reuse.project import Project
from west import log
from zspdx.licenses import LICENSES
@ -177,6 +178,32 @@ def normalizeExpression(licsConcluded): @@ -177,6 +178,32 @@ def normalizeExpression(licsConcluded):
return " AND ".join(revised)
def getCopyrightInfo(filePath):
"""
Scans the specified file for copyright information using REUSE tools.
Arguments:
- filePath: path to file to scan
Returns: list of copyright statements if found; empty list if not found
"""
log.dbg(f" - getting copyright info for {filePath}")
try:
project = Project(os.path.dirname(filePath))
infos = project.reuse_info_of(filePath)
copyrights = []
for info in infos:
if info.copyright_lines:
copyrights.extend(info.copyright_lines)
return copyrights
except Exception as e:
log.wrn(f"Error getting copyright info for {filePath}: {e}")
return []
def scanDocument(cfg, doc):
"""
Scan for licenses and calculate hashes for all Files and Packages
@ -213,6 +240,9 @@ def scanDocument(cfg, doc): @@ -213,6 +240,9 @@ def scanDocument(cfg, doc):
f.concludedLicense = expression
f.licenseInfoInFile = splitExpression(expression)
if copyrights := getCopyrightInfo(f.abspath):
f.copyrightText = f"<text>\n{'\n'.join(copyrights)}\n</text>"
# check if any custom license IDs should be flagged for document
for lic in f.licenseInfoInFile:
checkLicenseValid(lic, doc)

Loading…
Cancel
Save