Coverage for datacite/management/commands/populate.py: 97%

60 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-29 15:38 +0000

1from getpass import getpass 

2from typing import Any 

3 

4from django.core.management.base import BaseCommand, CommandParser 

5 

6from datacite.datacite import DataciteRESTClient 

7from datacite.fdsn import FdsnRESTClient 

8from datacite.serializers import MetadataDeserializer 

9from network.models import Network 

10 

11 

12class Command(BaseCommand): 

13 help = """ 

14 Populate the database. 

15 Networks: uv run python manage.py populate networks -u INIST.RESIF -p 

16 """ 

17 

18 def add_arguments(self, parser: CommandParser) -> None: 

19 # Positional arguments 

20 parser.add_argument("item", nargs="+", type=str, help="Item to populate.") 

21 

22 # Named (optional) arguments 

23 parser.add_argument("-u", "--user", help="User of Datacite prod.", dest="user") 

24 parser.add_argument( 

25 "-p", 

26 "--password", 

27 help="Password of Datacite prod.", 

28 action="store_true", 

29 dest="password", 

30 ) 

31 

32 def handle(self, *args: Any, **options: Any) -> None: # noqa: ARG002 

33 if "networks" in options["item"]: 

34 self.import_networks( 

35 options.get("user"), 

36 getpass() if options.get("password", "") else None, 

37 ) 

38 else: 

39 self.stdout.write("Arg no recognized.") 

40 

41 def import_networks(self, user: str | None, password: str | None) -> None: 

42 fdsn_data = FdsnRESTClient.get_all_networks() 

43 datacite_data = self.get_datacite_data(user, password) 

44 self.stdout.write( 

45 f"Fetched {len(datacite_data)} networks from Datacite, " 

46 f"inserting them in database..." 

47 ) 

48 n = 0 

49 for doi in datacite_data: 

50 if "attributes" in doi: 

51 try: 

52 network = Network.objects.get(doi__iexact=doi["id"]) 

53 metadata_raw = doi["attributes"] 

54 metadata_raw["network"] = network.pk 

55 serializer = MetadataDeserializer( 

56 instance=network.metadata, data=metadata_raw, partial=True 

57 ) 

58 except Network.DoesNotExist: 

59 fdsn_network = self.get_fdsn_network_from(fdsn_data, doi["id"]) 

60 

61 if not fdsn_network: 

62 self.stderr.write( 

63 doi["id"] + " failed : missing doi in the FDSN API." 

64 ) 

65 continue 

66 

67 network = self.save_network(fdsn_network) 

68 

69 metadata = doi["attributes"] 

70 metadata["network"] = network.pk 

71 

72 serializer = MetadataDeserializer(data=metadata) 

73 

74 if serializer.is_valid(): 74 ↛ 78line 74 didn't jump to line 78 because the condition on line 74 was always true

75 serializer.create(serializer.validated_data) 

76 n += 1 

77 else: 

78 self.stderr.write(doi["id"] + " failed : " + str(serializer.errors)) 

79 else: 

80 self.stderr.write( 

81 doi["id"] + " failed : missing attributes in Datacite response." 

82 ) 

83 self.stdout.write( 

84 f"Successfully inserted {n}/{len(datacite_data)} networks from " 

85 f"Datacite into the database.\n" 

86 ) 

87 self.stderr.write( 

88 "WARNING : 'sizes' field was rawly casted to fields : size_information, " 

89 "size_increment, size_total. Manual verification is recommended." 

90 ) 

91 self.stderr.write( 

92 "WARNING : some contributor types are not present in the Epos-France " 

93 "recommendations: Project leader, Project member, Funder, Other." 

94 ) 

95 

96 @staticmethod 

97 def save_network(network: dict) -> Network: 

98 start_date = network.pop("start_date", "") 

99 end_date = network.pop("end_date", "") 

100 network["start_year"] = int(start_date[:4]) if start_date else "" 

101 network["end_year"] = int(end_date[:4]) if end_date else None 

102 return Network.objects.get_or_create(**network)[0] 

103 

104 @staticmethod 

105 def get_fdsn_network_from(fdsn_data: Any, doi: str) -> dict | None: 

106 return next( 

107 ( 

108 network 

109 for network in fdsn_data 

110 if ("doi" not in network or network["doi"].lower() == doi.lower()) 

111 ), 

112 None, 

113 ) 

114 

115 @staticmethod 

116 def get_datacite_data(user: str | None, password: str | None) -> list: 

117 datacite_client = DataciteRESTClient( 

118 base_url="https://api.datacite.org/", user=user, password=password 

119 ) 

120 return ( 

121 datacite_client.get_doi_metadata( 

122 params={ 

123 "prefix": "10.15778", 

124 "publisher": "true", 

125 "affiliation": "true", 

126 "page[size]": 500, 

127 } 

128 ) 

129 or [] 

130 )