From a92338c92e7f1428ff64400eba2bded720fc8683 Mon Sep 17 00:00:00 2001 From: Max Metz Date: Mon, 29 Apr 2024 12:59:14 +0200 Subject: [PATCH] adapting validation rules for version 1.2. Rules 0002: the time-difference threshold for 'disagreement' is expanded to 1 hour. There is now element-wise comparison of elements to circumvent instabilities from rounding pd.Timestamp objects. Rules 0001 L&M and 0003: created a feature flag, which skips the evaluation of Terminal times altogether. For version 1.2, this feature flag is enabled. --- .../validators/validation_rule_functions.py | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/src/server/BreCal/validators/validation_rule_functions.py b/src/server/BreCal/validators/validation_rule_functions.py index ef44f08..1c16ce4 100644 --- a/src/server/BreCal/validators/validation_rule_functions.py +++ b/src/server/BreCal/validators/validation_rule_functions.py @@ -57,6 +57,7 @@ class ValidationRuleBaseFunctions(): self.error_message_dict = error_message_dict # as of 23 dec. 2023 port authority validation is temporarily disabled self.ignore_port_administration_flag = True # flag to disable all port administration validation rules + self.ignore_terminal_flag = True # flag to disable Terminal validation rules 0001-L & 0001-M def describe_error_message(self, key)->str: """ @@ -106,7 +107,7 @@ class ValidationRuleBaseFunctions(): violation_state = (delta<=threshold) return violation_state - def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type)->bool: + def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type, threshold:int=3600)->bool: """ # base function for all validation rules in the group {0002} A-C @@ -117,10 +118,12 @@ class ValidationRuleBaseFunctions(): - the shipcall belongs to a different type than the rule expects - there are no matching times for the provided {query} (e.g., "eta_berth") - Instead of comparing each individual result, this function counts the amount of unique instances. - When there is not only one unique value, there are deviating time estimates, and a violation occurs + This method computes the absolute time difference between all time entries. A threshold (in seconds) is used + to identify, when the time differences are so large, that participants essentially disagree on the times. + This circumvents previous instabilities, which stem from rounding the pd.Timestamp elements. - To reduce the potential of false violations, the agreement is rounded (e.g., by minute). + options: + threshold: integer. Determines the threshold in seconds, when two Timestamps differ 'too much' returns: violation_state (bool) """ @@ -136,14 +139,14 @@ class ValidationRuleBaseFunctions(): participant_types = [ParticipantType.AGENCY.value, ParticipantType.MOORING.value, ParticipantType.PILOT.value, ParticipantType.TUG.value] agency_times = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value,:] - df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:] - - - agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] - if not len(agency_time): + if len(agency_times)==0: violation_state = False return violation_state + + df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:] + agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] + # for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame # exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries) estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # df_times = df_times.loc[~df_times[query].isnull(),:] @@ -152,8 +155,24 @@ class ValidationRuleBaseFunctions(): violation_state = False return violation_state - # this (current) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes - violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min")) + # for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame + estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # consider only pandas Timestamp objects + + # measure the time difference between all pairs. + # for each pair of times, the absolute timedifference in seconds (float) is measured + time_absolute_differences = [[abs(time_.to_pydatetime()-time__.to_pydatetime()).total_seconds() for j_, time__ in enumerate(estimated_times) if j_ != i_] for i_, time_ in enumerate(estimated_times)] + + # list of lists: for each element in the list, create a boolean that indicates, whether the threshold is exceeded + time_difference_exceeds_threshold = [[time__ >= threshold for time__ in time_] for time_ in time_absolute_differences] + + # list of booleans for each time entry separately + time_difference_exceeds_threshold = [any(time_) for time_ in time_difference_exceeds_threshold] + + # if *any* of these entries exceeds the threshold, the times are too distinct. In those case, a rule violation occurs + violation_state = any(time_difference_exceeds_threshold) + + # this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes + # violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min")) # this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs # Consequently, it treats all times as equally important @@ -581,6 +600,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): - Checks, if times_terminal.operations_start is filled in. - Measures the difference between 'now' and 'times_agency.eta_berth'. """ + if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals + return self.get_no_violation_default_output() + if not shipcall.type in [ShipcallType.INCOMING.value]: return self.get_no_violation_default_output() @@ -615,6 +637,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): - Checks, if times_terminal.operations_end is filled in. - Measures the difference between 'now' and 'times_agency.etd_berth'. """ + if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals + return self.get_no_violation_default_output() + if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]: return self.get_no_violation_default_output() @@ -730,6 +755,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): query time: eta_berth (times_agency) start_time & end_time: operations_start & operations_end (times_terminal) """ + if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals + return self.get_no_violation_default_output() + if not shipcall.type in [ShipcallType.INCOMING.value]: return self.get_no_violation_default_output() @@ -770,6 +798,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): query time: eta_berth (times_agency) start_time & end_time: operations_start & operations_end (times_terminal) """ + if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals + return self.get_no_violation_default_output() + if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]: return self.get_no_violation_default_output()