Merge pull request #28 from puls200/bugfix/fix_20240429

adapting validation rules for version 1.2. Rules 0002: the time-diffe…
This commit is contained in:
Daniel Schick 2024-04-29 13:43:46 +02:00 committed by GitHub
commit 01753540fe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -57,6 +57,7 @@ class ValidationRuleBaseFunctions():
self.error_message_dict = error_message_dict
# as of 23 dec. 2023 port authority validation is temporarily disabled
self.ignore_port_administration_flag = True # flag to disable all port administration validation rules
self.ignore_terminal_flag = True # flag to disable Terminal validation rules 0001-L & 0001-M
def describe_error_message(self, key)->str:
"""
@ -106,7 +107,7 @@ class ValidationRuleBaseFunctions():
violation_state = (delta<=threshold)
return violation_state
def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type)->bool:
def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type, threshold:int=3600)->bool:
"""
# base function for all validation rules in the group {0002} A-C
@ -117,10 +118,12 @@ class ValidationRuleBaseFunctions():
- the shipcall belongs to a different type than the rule expects
- there are no matching times for the provided {query} (e.g., "eta_berth")
Instead of comparing each individual result, this function counts the amount of unique instances.
When there is not only one unique value, there are deviating time estimates, and a violation occurs
This method computes the absolute time difference between all time entries. A threshold (in seconds) is used
to identify, when the time differences are so large, that participants essentially disagree on the times.
This circumvents previous instabilities, which stem from rounding the pd.Timestamp elements.
To reduce the potential of false violations, the agreement is rounded (e.g., by minute).
options:
threshold: integer. Determines the threshold in seconds, when two Timestamps differ 'too much'
returns: violation_state (bool)
"""
@ -136,14 +139,14 @@ class ValidationRuleBaseFunctions():
participant_types = [ParticipantType.AGENCY.value, ParticipantType.MOORING.value, ParticipantType.PILOT.value, ParticipantType.TUG.value]
agency_times = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value,:]
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
if not len(agency_time):
if len(agency_times)==0:
violation_state = False
return violation_state
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
# exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # df_times = df_times.loc[~df_times[query].isnull(),:]
@ -152,8 +155,24 @@ class ValidationRuleBaseFunctions():
violation_state = False
return violation_state
# this (current) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # consider only pandas Timestamp objects
# measure the time difference between all pairs.
# for each pair of times, the absolute timedifference in seconds (float) is measured
time_absolute_differences = [[abs(time_.to_pydatetime()-time__.to_pydatetime()).total_seconds() for j_, time__ in enumerate(estimated_times) if j_ != i_] for i_, time_ in enumerate(estimated_times)]
# list of lists: for each element in the list, create a boolean that indicates, whether the threshold is exceeded
time_difference_exceeds_threshold = [[time__ >= threshold for time__ in time_] for time_ in time_absolute_differences]
# list of booleans for each time entry separately
time_difference_exceeds_threshold = [any(time_) for time_ in time_difference_exceeds_threshold]
# if *any* of these entries exceeds the threshold, the times are too distinct. In those case, a rule violation occurs
violation_state = any(time_difference_exceeds_threshold)
# this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
# violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
# this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
# Consequently, it treats all times as equally important
@ -581,6 +600,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
- Checks, if times_terminal.operations_start is filled in.
- Measures the difference between 'now' and 'times_agency.eta_berth'.
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.INCOMING.value]:
return self.get_no_violation_default_output()
@ -615,6 +637,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
- Checks, if times_terminal.operations_end is filled in.
- Measures the difference between 'now' and 'times_agency.etd_berth'.
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]:
return self.get_no_violation_default_output()
@ -730,6 +755,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
query time: eta_berth (times_agency)
start_time & end_time: operations_start & operations_end (times_terminal)
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.INCOMING.value]:
return self.get_no_violation_default_output()
@ -770,6 +798,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
query time: eta_berth (times_agency)
start_time & end_time: operations_start & operations_end (times_terminal)
"""
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
return self.get_no_violation_default_output()
if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]:
return self.get_no_violation_default_output()