diff --git a/component_catalog/admin.py b/component_catalog/admin.py
index e3fba09a..4dff4eb2 100644
--- a/component_catalog/admin.py
+++ b/component_catalog/admin.py
@@ -884,6 +884,7 @@ class PackageAdmin(
"parties",
"datasource_id",
"file_references",
+ "package_content",
)
},
),
diff --git a/component_catalog/api.py b/component_catalog/api.py
index b7054945..ad91d8dc 100644
--- a/component_catalog/api.py
+++ b/component_catalog/api.py
@@ -618,6 +618,7 @@ class PackageSerializer(
required=False,
scope_content_type=True,
)
+ package_content = serializers.ReadOnlyField(source="get_package_content_display")
collect_data = serializers.BooleanField(
write_only=True,
required=False,
@@ -687,6 +688,7 @@ class Meta:
"parties",
"datasource_id",
"file_references",
+ "package_content",
"external_references",
"created_date",
"last_modified_date",
diff --git a/component_catalog/forms.py b/component_catalog/forms.py
index a254fed9..abc22e9f 100644
--- a/component_catalog/forms.py
+++ b/component_catalog/forms.py
@@ -339,6 +339,7 @@ class Meta:
"version",
"qualifiers",
"subpath",
+ "package_content",
"collect_data",
]
widgets = {
@@ -407,7 +408,7 @@ def helper(self):
HTML("
"),
Group("description", "keywords"),
Group("primary_language", "cpe"),
- Group("size", "release_date"),
+ Group("package_content", "size", "release_date"),
Group("dependencies", "notes"),
HTML("
"),
Group("homepage_url", "code_view_url"),
@@ -1183,6 +1184,7 @@ class Meta:
"version",
"qualifiers",
"subpath",
+ "package_content",
]
diff --git a/component_catalog/migrations/0013_package_package_content.py b/component_catalog/migrations/0013_package_package_content.py
new file mode 100644
index 00000000..ccf667de
--- /dev/null
+++ b/component_catalog/migrations/0013_package_package_content.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.2.8 on 2025-11-24 12:00
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('component_catalog', '0012_alter_component_children'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='package',
+ name='package_content',
+ field=models.IntegerField(blank=True, choices=[(1, 'curation'), (2, 'patch'), (3, 'source_repo'), (4, 'source_archive'), (5, 'binary'), (6, 'test'), (7, 'doc')], help_text='Content of this Package as one of: curation, patch, source_repo, source_archive, binary, test, doc', null=True),
+ ),
+ ]
diff --git a/component_catalog/models.py b/component_catalog/models.py
index 0412e681..074858ac 100644
--- a/component_catalog/models.py
+++ b/component_catalog/models.py
@@ -58,6 +58,7 @@
from dejacode_toolkit import spdx
from dejacode_toolkit.purldb import PurlDB
from dejacode_toolkit.purldb import pick_purldb_entry
+from dejacode_toolkit.purldb import pick_source_package
from dejacode_toolkit.scancodeio import ScanCodeIO
from dje import urn
from dje.copier import post_copy
@@ -1652,6 +1653,42 @@ def __str__(self):
return self.label
+class PackageContentFieldMixin(models.Model):
+ """
+ Field extracted from the `purldb.packagedb.models.Package` model.
+ It need to stay aligned with its upstream PurlDB implementation.
+ """
+
+ class PackageContentType(models.IntegerChoices):
+ CURATION = 1, "curation"
+ PATCH = 2, "patch"
+ SOURCE_REPO = 3, "source_repo"
+ SOURCE_ARCHIVE = 4, "source_archive"
+ BINARY = 5, "binary"
+ TEST = 6, "test"
+ DOC = 7, "doc"
+
+ package_content = models.IntegerField(
+ null=True,
+ blank=True,
+ choices=PackageContentType.choices,
+ help_text=_(
+ "Content of this Package as one of: {}".format(", ".join(PackageContentType.labels))
+ ),
+ )
+
+ class Meta:
+ abstract = True
+
+ @classmethod
+ def get_package_content_value_from_label(cls, label):
+ """Convert a package_content string label to its integer value."""
+ try:
+ return cls.PackageContentType[label.upper()].value
+ except (KeyError, AttributeError):
+ return
+
+
PACKAGE_URL_FIELDS = ["type", "namespace", "name", "version", "qualifiers", "subpath"]
@@ -1795,6 +1832,7 @@ class Package(
URLFieldsMixin,
HashFieldsMixin,
PackageURLMixin,
+ PackageContentFieldMixin,
DataspacedModel,
):
filename = models.CharField(
@@ -2504,7 +2542,7 @@ def create_from_url(cls, url, user):
package_for_match = cls(download_url=download_url)
package_for_match.set_package_url(package_url)
purldb_entries = package_for_match.get_purldb_entries(user)
- # Look for one ith the same exact purl in that case
+ # Look for one with the same exact purl in that case
if purldb_data := pick_purldb_entry(purldb_entries, purl=url):
# The format from PurlDB is "2019-11-18T00:00:00Z" from DateTimeField
if release_date := purldb_data.get("release_date"):
@@ -2597,6 +2635,8 @@ def update_from_purldb(self, user):
- Retrieves matching entries from PurlDB using the given user.
- If exactly one match is found, its data is used directly.
+ - If multiple entries are found, leverage the package_content value when
+ available to select a "source" package.
- If multiple entries are found, only values that are non-empty and
common across all entries are merged and used to update the Package.
"""
@@ -2607,6 +2647,8 @@ def update_from_purldb(self, user):
purldb_entries_count = len(purldb_entries)
if purldb_entries_count == 1:
package_data = purldb_entries[0]
+ elif source_package := pick_source_package(purldb_entries):
+ package_data = source_package
else:
package_data = merge_common_non_empty_values(purldb_entries)
@@ -2615,6 +2657,10 @@ def update_from_purldb(self, user):
package_data["release_date"] = release_date.split("T")[0]
package_data["license_expression"] = package_data.get("declared_license_expression")
+ if package_content := package_data.get("package_content"):
+ package_content_value = Package.get_package_content_value_from_label(package_content)
+ package_data["package_content"] = package_content_value
+
# Avoid raising an IntegrityError when the values in `package_data` for the
# identifier fields already exist on another Package instance.
#
@@ -2647,6 +2693,12 @@ def update_from_purldb(self, user):
override=False,
override_unknown=True,
)
+
+ if updated_fields:
+ msg = f"Automatically updated {', '.join(updated_fields)} from PurlDB."
+ logger.debug(f"PurlDB: {msg}")
+ History.log_change(user, self, message=msg)
+
return updated_fields
def update_from_scan(self, user, update_products=False):
diff --git a/component_catalog/tests/test_models.py b/component_catalog/tests/test_models.py
index 788143cf..edb2ec2b 100644
--- a/component_catalog/tests/test_models.py
+++ b/component_catalog/tests/test_models.py
@@ -1366,6 +1366,7 @@ def test_component_catalog_models_get_exclude_candidates_fields(self):
"file_references",
"other_license_expression",
"parties",
+ "package_content",
],
),
)
@@ -2381,6 +2382,16 @@ def test_package_model_github_repo_url(self):
p.download_url = url
self.assertEqual(expected, p.github_repo_url)
+ def test_package_model_get_package_content_value_from_label(self):
+ get_label_func = Package.get_package_content_value_from_label
+ self.assertIsNone(get_label_func(None))
+ self.assertIsNone(get_label_func(100))
+ self.assertIsNone(get_label_func("wrong"))
+
+ self.assertEqual(2, get_label_func("patch"))
+ self.assertEqual(2, get_label_func("Patch"))
+ self.assertEqual(2, get_label_func("PATCH"))
+
@mock.patch("requests.get")
def test_collect_package_data(self, mock_get):
expected_message = (
@@ -2635,6 +2646,7 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
"sha256": "0a1efde1b685a6c30999ba00902f23613cf5db864c5a1532d2edf3eda7896a37",
"copyright": "(c) Copyright",
"declared_license_expression": "(bsd-simplified AND bsd-new)",
+ "package_content": "source_archive",
}
mock_get_purldb_entries.return_value = [purldb_entry]
@@ -2656,12 +2668,13 @@ def test_package_model_update_from_purldb(self, mock_get_purldb_entries):
"sha256",
"copyright",
"declared_license_expression",
+ "package_content",
"license_expression",
]
self.assertEqual(expected, updated_fields)
package1.refresh_from_db()
- # Handle release_date separatly
+ # Handle release_date and package_content separatly
updated_fields.remove("release_date")
self.assertEqual(purldb_entry["release_date"], str(package1.release_date))
@@ -2700,6 +2713,42 @@ def test_package_model_update_from_purldb_multiple_entries(self, mock_get_purldb
self.assertEqual(["Keyword1", "Keyword2"], package1.keywords)
self.assertEqual("Python", package1.primary_language)
+ @mock.patch("component_catalog.models.Package.get_purldb_entries")
+ def test_package_model_update_from_purldb_multiple_entries_package_content(
+ self, mock_get_entries
+ ):
+ purldb_entry_binary = {
+ "uuid": "e133e70b-8dd3-4cf1-9711-72b1f57523a0",
+ "purl": "pkg:pypi/boto3@1.37.26?file_name=boto3-1.37.26-py3-none-any.whl",
+ "type": "pypi",
+ "name": "boto3",
+ "version": "1.37.26",
+ "filename": "boto3-1.37.26-py3-none-any.whl",
+ "download_url": "https://files.pythonhosted.org/packages/boto3-1.37.26-py3-none-any.whl",
+ "package_content": "binary",
+ }
+ purldb_entry_source = {
+ "uuid": "326aa7a8-4f28-406d-89f9-c1404916925b",
+ "purl": "pkg:pypi/boto3@1.37.26?file_name=boto3-1.37.26.tar.gz",
+ "type": "pypi",
+ "name": "boto3",
+ "version": "1.37.26",
+ "filename": "boto3-1.37.26.tar.gz",
+ "download_url": "https://files.pythonhosted.org/packages/boto3-1.37.26.tar.gz",
+ "package_content": "source_archive",
+ }
+
+ mock_get_entries.return_value = [purldb_entry_binary, purldb_entry_source]
+ package1 = make_package(self.dataspace, package_url="pkg:pypi/boto3@1.37.26")
+ updated_fields = package1.update_from_purldb(self.user)
+ expected = ["download_url", "filename", "package_content"]
+ self.assertEqual(expected, sorted(updated_fields))
+
+ package1.refresh_from_db()
+ self.assertEqual(purldb_entry_source["download_url"], package1.download_url)
+ self.assertEqual(purldb_entry_source["filename"], package1.filename)
+ self.assertEqual("source_archive", package1.get_package_content_display())
+
@mock.patch("component_catalog.models.Package.get_purldb_entries")
def test_package_model_update_from_purldb_duplicate_exception(self, mock_get_purldb_entries):
package_url = "pkg:pypi/django@3.0"
diff --git a/component_catalog/tests/test_views.py b/component_catalog/tests/test_views.py
index 715e53e6..4cfd9f18 100644
--- a/component_catalog/tests/test_views.py
+++ b/component_catalog/tests/test_views.py
@@ -1243,6 +1243,7 @@ def test_package_details_view_num_queries(self):
# Create a Package Set
package_url = "pkg:pypi/django@5.0"
self.package1.set_package_url(package_url)
+ self.package1.package_content = Package.PackageContentType.SOURCE_ARCHIVE
self.package1.save()
license_expression = "{} AND {}".format(self.license1.key, self.license2.key)
make_package(self.dataspace, package_url=package_url, license_expression=license_expression)
@@ -3389,6 +3390,7 @@ def test_component_catalog_package_add_view_initial_data(
"description": "Abbot Java GUI Test Library",
"declared_license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
"keywords": ["keyword1", "keyword2"],
+ "package_content": "binary",
}
mock_request_get.return_value = {
"count": 1,
@@ -3411,6 +3413,7 @@ def test_component_catalog_package_add_view_initial_data(
"description": "Abbot Java GUI Test Library",
"license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
"declared_license_expression": "bsd-new OR eps-1.0 OR apache-2.0 OR mit",
+ "package_content": Package.PackageContentType.BINARY,
}
self.assertEqual(expected, response.context["form"].initial)
diff --git a/component_catalog/views.py b/component_catalog/views.py
index 1ba19aef..f24d1a05 100644
--- a/component_catalog/views.py
+++ b/component_catalog/views.py
@@ -1140,6 +1140,7 @@ class PackageDetailsView(
"parties",
"datasource_id",
"file_references",
+ "package_content",
],
},
"components": {
@@ -1293,6 +1294,7 @@ def tab_others(self):
TabField("parties"),
TabField("datasource_id"),
TabField("file_references"),
+ TabField("package_content", source="get_package_content_display"),
]
fields = self.get_tab_fields(tab_fields)
@@ -1930,6 +1932,12 @@ def get_initial(self):
if purldb_entry := self.get_entry_from_purldb():
# Duplicate the declared_license_expression as the "concluded" license_expression
purldb_entry["license_expression"] = purldb_entry.get("declared_license_expression")
+
+ # Convert package_content string label to integer value
+ if content_label := purldb_entry.pop("package_content", None):
+ if content_value := Package.get_package_content_value_from_label(content_label):
+ purldb_entry["package_content"] = content_value
+
model_fields = [field.name for field in Package._meta.get_fields()]
initial_from_purldb_entry = {
field_name: value
diff --git a/dejacode_toolkit/purldb.py b/dejacode_toolkit/purldb.py
index 0f63d7a2..83db1ab4 100644
--- a/dejacode_toolkit/purldb.py
+++ b/dejacode_toolkit/purldb.py
@@ -61,6 +61,8 @@ def get_package_by_purl(self, package_url):
def find_packages(self, payload, timeout=None):
"""Get Packages details using provided `payload` filters on the PurlDB package list."""
+ payload.update({"sort": "package_content"})
+
response = self.request_get(self.package_api_url, params=payload, timeout=timeout)
if response and response.get("count") > 0:
return response.get("results")
@@ -88,3 +90,17 @@ def pick_purldb_entry(purldb_entries, purl=None):
matches = [entry for entry in purldb_entries if entry.get("purl") == purl]
if len(matches) == 1:
return matches[0]
+
+
+def pick_source_package(purldb_entries):
+ """Pick a source package from a list of PurlDB entries."""
+ if not purldb_entries:
+ return
+
+ if len(purldb_entries) == 1:
+ return purldb_entries[0]
+
+ for entry in purldb_entries:
+ package_content = entry.get("package_content")
+ if package_content and package_content.lower() == "source_archive":
+ return entry
diff --git a/dje/tests/testfiles/test_dataset_cc_only.json b/dje/tests/testfiles/test_dataset_cc_only.json
index b80388ec..cab86eff 100644
--- a/dje/tests/testfiles/test_dataset_cc_only.json
+++ b/dje/tests/testfiles/test_dataset_cc_only.json
@@ -292,12 +292,13 @@
"vcs_url": "",
"code_view_url": "",
"bug_tracking_url": "",
+ "md5": "",
+ "sha1": "",
"sha256": "",
"sha512": "",
+ "package_content": null,
"filename": "systemu-2.5.2.gem",
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
- "sha1": "",
- "md5": "",
"size": null,
"release_date": null,
"primary_language": "",
diff --git a/dje/tests/testfiles/test_dataset_pp_only.json b/dje/tests/testfiles/test_dataset_pp_only.json
index 03cb35a5..4b1bba90 100644
--- a/dje/tests/testfiles/test_dataset_pp_only.json
+++ b/dje/tests/testfiles/test_dataset_pp_only.json
@@ -30,6 +30,7 @@
"sha1": "",
"sha256": "",
"sha512": "",
+ "package_content": null,
"filename": "systemu-2.5.2.gem",
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
"size": null,
diff --git a/purldb/tests/test_purldb_toolkit.py b/purldb/tests/test_purldb_toolkit.py
index 17ad45b8..324ef9ba 100644
--- a/purldb/tests/test_purldb_toolkit.py
+++ b/purldb/tests/test_purldb_toolkit.py
@@ -12,6 +12,7 @@
from dejacode_toolkit.purldb import PurlDB
from dejacode_toolkit.purldb import pick_purldb_entry
+from dejacode_toolkit.purldb import pick_source_package
from dje.models import Dataspace
from dje.tests import create_user
@@ -75,3 +76,29 @@ def test_purldb_toolkit_pick_purldb_entry(self):
self.assertEqual(entry2, pick_purldb_entry([entry1, entry2], purl=purl2))
self.assertIsNone(pick_purldb_entry([entry1, entry1], purl=purl1))
self.assertIsNone(pick_purldb_entry([entry1, entry2], purl=purl3))
+
+ def test_purldb_toolkit_pick_source_package(self):
+ self.assertIsNone(pick_source_package(None))
+ self.assertIsNone(pick_source_package([]))
+
+ entry_binary = {
+ "purl": "pkg:pypi/boto3@1.37.26?file_name=boto3-1.37.26-py3-none-any.whl",
+ "filename": "boto3-1.37.26-py3-none-any.whl",
+ "download_url": "https://files.pythonhosted.org/boto3-1.37.26-py3-none-any.whl",
+ "package_content": "binary",
+ }
+ entry_source = {
+ "purl": "pkg:pypi/boto3@1.37.26?file_name=boto3-1.37.26.tar.gz",
+ "filename": "boto3-1.37.26.tar.gz",
+ "download_url": "https://files.pythonhosted.org/boto3-1.37.26.tar.gz",
+ "package_content": "source_archive",
+ }
+
+ self.assertEqual(entry_binary, pick_source_package([entry_binary]))
+ self.assertIsNone(pick_source_package([entry_binary, entry_binary]))
+
+ self.assertEqual(entry_source, pick_source_package([entry_source]))
+ self.assertEqual(entry_source, pick_source_package([entry_source, entry_source]))
+
+ self.assertEqual(entry_source, pick_source_package([entry_source, entry_binary]))
+ self.assertEqual(entry_source, pick_source_package([entry_binary, entry_source]))