properly integrating validation rules 0005A and 0005B, which consider the amount of similar shipcall times (eta&etd)

This commit is contained in:
Max Metz 2024-07-31 16:59:30 +02:00
parent be14e3ee1a
commit 982420dd34
2 changed files with 9 additions and 1 deletions

View File

@ -33,7 +33,8 @@ def get_synchronous_shipcall_times_standalone(query_time:pd.Timestamp, all_df_ti
returns: counts
"""
assert isinstance(query_time,pd.Timestamp)
assert isinstance(query_time,pd.Timestamp) or pd.isnull(query_time), f"expected query_time to be a pd.Timestamp or pd.NaT. Found: {type(query_time)}"
assert isinstance(all_df_times,pd.DataFrame)
# get a timedelta for each valid (not Null) time entry
time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)]
@ -439,4 +440,5 @@ class SQLHandler():
def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
"""count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)"""
assert isinstance(all_df_times, pd.DataFrame)
return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold)

View File

@ -929,6 +929,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
query_time = times_agency.iloc[0].eta_berth
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
if all_times_agency is None:
all_times_agency = self.sql_handler.get_times_for_agency(non_null_column="eta_berth")
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
violation_state = counts > maximum_threshold
@ -952,6 +955,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
query_time = times_agency.iloc[0].etd_berth
if all_times_agency is None:
all_times_agency = self.sql_handler.get_times_for_agency(non_null_column="etd_berth")
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
violation_state = counts > maximum_threshold