adaptation of traffic validation (resolving open issue)

This commit is contained in:
Max Metz 2024-08-14 19:09:35 +02:00
parent c4e5764601
commit fcc03b2ade
2 changed files with 12 additions and 1 deletions

View File

@ -33,7 +33,9 @@ def get_synchronous_shipcall_times_standalone(query_time:pd.Timestamp, all_df_ti
returns: counts returns: counts
""" """
assert isinstance(query_time,pd.Timestamp) assert (isinstance(query_time,pd.Timestamp)) or (pd.isnull(query_time)), f"expected a timestamp. Found type: {type(query_time)} with value: {query_time}"
if pd.isnull(query_time):
return 0
# get a timedelta for each valid (not Null) time entry # get a timedelta for each valid (not Null) time entry
time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)] time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)]
@ -442,4 +444,6 @@ class SQLHandler():
def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int: def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
"""count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)""" """count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)"""
if all_df_times is None:
all_df_times = self.df_dict.get("times")
return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold) return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold)

View File

@ -920,6 +920,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
Type: Global Rule Type: Global Rule
Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETA. Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETA.
""" """
if all_times_agency is None:
all_times_agency = self.sql_handler.get_times_for_agency(non_null_column="eta_berth")
# check, if the header is filled in (agency) # check, if the header is filled in (agency)
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): # if len(times_agency) != 1: if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): # if len(times_agency) != 1:
return self.get_no_violation_default_output() return self.get_no_violation_default_output()
@ -929,6 +932,7 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
query_time = times_agency.iloc[0].eta_berth query_time = times_agency.iloc[0].eta_berth
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes) # count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency) counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
violation_state = counts > maximum_threshold violation_state = counts > maximum_threshold
@ -944,6 +948,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
Type: Global Rule Type: Global Rule
Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETD. Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETD.
""" """
if all_times_agency is None:
all_times_agency = self.sql_handler.get_times_for_agency(non_null_column="etd_berth")
# check, if the header is filled in (agency) # check, if the header is filled in (agency)
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): #if len(times_agency) != 1: if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): #if len(times_agency) != 1:
return self.get_no_violation_default_output() return self.get_no_violation_default_output()