Coverage for datacite/management/commands/populate.py: 97%
60 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-29 15:38 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-29 15:38 +0000
1from getpass import getpass
2from typing import Any
4from django.core.management.base import BaseCommand, CommandParser
6from datacite.datacite import DataciteRESTClient
7from datacite.fdsn import FdsnRESTClient
8from datacite.serializers import MetadataDeserializer
9from network.models import Network
12class Command(BaseCommand):
13 help = """
14 Populate the database.
15 Networks: uv run python manage.py populate networks -u INIST.RESIF -p
16 """
18 def add_arguments(self, parser: CommandParser) -> None:
19 # Positional arguments
20 parser.add_argument("item", nargs="+", type=str, help="Item to populate.")
22 # Named (optional) arguments
23 parser.add_argument("-u", "--user", help="User of Datacite prod.", dest="user")
24 parser.add_argument(
25 "-p",
26 "--password",
27 help="Password of Datacite prod.",
28 action="store_true",
29 dest="password",
30 )
32 def handle(self, *args: Any, **options: Any) -> None: # noqa: ARG002
33 if "networks" in options["item"]:
34 self.import_networks(
35 options.get("user"),
36 getpass() if options.get("password", "") else None,
37 )
38 else:
39 self.stdout.write("Arg no recognized.")
41 def import_networks(self, user: str | None, password: str | None) -> None:
42 fdsn_data = FdsnRESTClient.get_all_networks()
43 datacite_data = self.get_datacite_data(user, password)
44 self.stdout.write(
45 f"Fetched {len(datacite_data)} networks from Datacite, "
46 f"inserting them in database..."
47 )
48 n = 0
49 for doi in datacite_data:
50 if "attributes" in doi:
51 try:
52 network = Network.objects.get(doi__iexact=doi["id"])
53 metadata_raw = doi["attributes"]
54 metadata_raw["network"] = network.pk
55 serializer = MetadataDeserializer(
56 instance=network.metadata, data=metadata_raw, partial=True
57 )
58 except Network.DoesNotExist:
59 fdsn_network = self.get_fdsn_network_from(fdsn_data, doi["id"])
61 if not fdsn_network:
62 self.stderr.write(
63 doi["id"] + " failed : missing doi in the FDSN API."
64 )
65 continue
67 network = self.save_network(fdsn_network)
69 metadata = doi["attributes"]
70 metadata["network"] = network.pk
72 serializer = MetadataDeserializer(data=metadata)
74 if serializer.is_valid(): 74 ↛ 78line 74 didn't jump to line 78 because the condition on line 74 was always true
75 serializer.create(serializer.validated_data)
76 n += 1
77 else:
78 self.stderr.write(doi["id"] + " failed : " + str(serializer.errors))
79 else:
80 self.stderr.write(
81 doi["id"] + " failed : missing attributes in Datacite response."
82 )
83 self.stdout.write(
84 f"Successfully inserted {n}/{len(datacite_data)} networks from "
85 f"Datacite into the database.\n"
86 )
87 self.stderr.write(
88 "WARNING : 'sizes' field was rawly casted to fields : size_information, "
89 "size_increment, size_total. Manual verification is recommended."
90 )
91 self.stderr.write(
92 "WARNING : some contributor types are not present in the Epos-France "
93 "recommendations: Project leader, Project member, Funder, Other."
94 )
96 @staticmethod
97 def save_network(network: dict) -> Network:
98 start_date = network.pop("start_date", "")
99 end_date = network.pop("end_date", "")
100 network["start_year"] = int(start_date[:4]) if start_date else ""
101 network["end_year"] = int(end_date[:4]) if end_date else None
102 return Network.objects.get_or_create(**network)[0]
104 @staticmethod
105 def get_fdsn_network_from(fdsn_data: Any, doi: str) -> dict | None:
106 return next(
107 (
108 network
109 for network in fdsn_data
110 if ("doi" not in network or network["doi"].lower() == doi.lower())
111 ),
112 None,
113 )
115 @staticmethod
116 def get_datacite_data(user: str | None, password: str | None) -> list:
117 datacite_client = DataciteRESTClient(
118 base_url="https://api.datacite.org/", user=user, password=password
119 )
120 return (
121 datacite_client.get_doi_metadata(
122 params={
123 "prefix": "10.15778",
124 "publisher": "true",
125 "affiliation": "true",
126 "page[size]": 500,
127 }
128 )
129 or []
130 )