adapting validation rules for version 1.2. Rules 0002: the time-difference threshold for 'disagreement' is expanded to 1 hour. There is now element-wise comparison of elements to circumvent instabilities from rounding pd.Timestamp objects. Rules 0001 L&M and 0003: created a feature flag, which skips the evaluation of Terminal times altogether. For version 1.2, this feature flag is enabled.

This commit is contained in:
Max Metz 2024-04-29 12:59:14 +02:00 committed by Daniel Schick
parent f690387be8
commit a92338c92e

View File

@ -57,6 +57,7 @@ class ValidationRuleBaseFunctions():
self.error_message_dict = error_message_dict
# as of 23 dec. 2023 port authority validation is temporarily disabled
self.ignore_port_administration_flag = True # flag to disable all port administration validation rules
self.ignore_terminal_flag = True # flag to disable Terminal validation rules 0001-L & 0001-M
def describe_error_message(self, key)->str:
"""
@ -106,7 +107,7 @@ class ValidationRuleBaseFunctions():
violation_state = (delta<=threshold)
return violation_state
def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type)->bool:
def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type, threshold:int=3600)->bool:
"""
# base function for all validation rules in the group {0002} A-C
@ -117,10 +118,12 @@ class ValidationRuleBaseFunctions():
- the shipcall belongs to a different type than the rule expects
- there are no matching times for the provided {query} (e.g., "eta_berth")
Instead of comparing each individual result, this function counts the amount of unique instances.
When there is not only one unique value, there are deviating time estimates, and a violation occurs
This method computes the absolute time difference between all time entries. A threshold (in seconds) is used
to identify, when the time differences are so large, that participants essentially disagree on the times.
This circumvents previous instabilities, which stem from rounding the pd.Timestamp elements.
To reduce the potential of false violations, the agreement is rounded (e.g., by minute).
options:
threshold: integer. Determines the threshold in seconds, when two Timestamps differ 'too much'
returns: violation_state (bool)
"""
@ -136,14 +139,14 @@ class ValidationRuleBaseFunctions():
participant_types = [ParticipantType.AGENCY.value, ParticipantType.MOORING.value, ParticipantType.PILOT.value, ParticipantType.TUG.value]
agency_times = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value,:]
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
if not len(agency_time):
if len(agency_times)==0:
violation_state = False
return violation_state
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
# exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # df_times = df_times.loc[~df_times[query].isnull(),:]
@ -152,8 +155,24 @@ class ValidationRuleBaseFunctions():
violation_state = False
return violation_state
# this (current) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # consider only pandas Timestamp objects
# measure the time difference between all pairs.
# for each pair of times, the absolute timedifference in seconds (float) is measured
time_absolute_differences = [[abs(time_.to_pydatetime()-time__.to_pydatetime()).total_seconds() for j_, time__ in enumerate(estimated_times) if j_ != i_] for i_, time_ in enumerate(estimated_times)]
# list of lists: for each element in the list, create a boolean that indicates, whether the threshold is exceeded
time_difference_exceeds_threshold = [[time__ >= threshold for time__ in time_] for time_ in time_absolute_differences]
# list of booleans for each time entry separately
time_difference_exceeds_threshold = [any(time_) for time_ in time_difference_exceeds_threshold]
# if *any* of these entries exceeds the threshold, the times are too distinct. In those case, a rule violation occurs
violation_state = any(time_difference_exceeds_threshold)
# this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
# violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
# this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
# Consequently, it treats all times as equally important
@ -581,6 +600,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
- Checks, if times_terminal.operations_start is filled in.
- Measures the difference between 'now' and 'times_agency.eta_berth'.
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.INCOMING.value]:
return self.get_no_violation_default_output()
@ -615,6 +637,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
- Checks, if times_terminal.operations_end is filled in.
- Measures the difference between 'now' and 'times_agency.etd_berth'.
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]:
return self.get_no_violation_default_output()
@ -730,6 +755,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
query time: eta_berth (times_agency)
start_time & end_time: operations_start & operations_end (times_terminal)
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.INCOMING.value]:
return self.get_no_violation_default_output()
@ -770,6 +798,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
query time: eta_berth (times_agency)
start_time & end_time: operations_start & operations_end (times_terminal)
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]:
return self.get_no_violation_default_output()