Merge pull request #28 from puls200/bugfix/fix_20240429
adapting validation rules for version 1.2. Rules 0002: the time-diffe…
This commit is contained in:
commit
01753540fe
@ -57,6 +57,7 @@ class ValidationRuleBaseFunctions():
|
||||
self.error_message_dict = error_message_dict
|
||||
# as of 23 dec. 2023 port authority validation is temporarily disabled
|
||||
self.ignore_port_administration_flag = True # flag to disable all port administration validation rules
|
||||
self.ignore_terminal_flag = True # flag to disable Terminal validation rules 0001-L & 0001-M
|
||||
|
||||
def describe_error_message(self, key)->str:
|
||||
"""
|
||||
@ -106,7 +107,7 @@ class ValidationRuleBaseFunctions():
|
||||
violation_state = (delta<=threshold)
|
||||
return violation_state
|
||||
|
||||
def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type)->bool:
|
||||
def check_participants_agree_on_estimated_time(self, shipcall, query, df_times, applicable_shipcall_type, threshold:int=3600)->bool:
|
||||
"""
|
||||
# base function for all validation rules in the group {0002} A-C
|
||||
|
||||
@ -117,10 +118,12 @@ class ValidationRuleBaseFunctions():
|
||||
- the shipcall belongs to a different type than the rule expects
|
||||
- there are no matching times for the provided {query} (e.g., "eta_berth")
|
||||
|
||||
Instead of comparing each individual result, this function counts the amount of unique instances.
|
||||
When there is not only one unique value, there are deviating time estimates, and a violation occurs
|
||||
This method computes the absolute time difference between all time entries. A threshold (in seconds) is used
|
||||
to identify, when the time differences are so large, that participants essentially disagree on the times.
|
||||
This circumvents previous instabilities, which stem from rounding the pd.Timestamp elements.
|
||||
|
||||
To reduce the potential of false violations, the agreement is rounded (e.g., by minute).
|
||||
options:
|
||||
threshold: integer. Determines the threshold in seconds, when two Timestamps differ 'too much'
|
||||
|
||||
returns: violation_state (bool)
|
||||
"""
|
||||
@ -136,14 +139,14 @@ class ValidationRuleBaseFunctions():
|
||||
participant_types = [ParticipantType.AGENCY.value, ParticipantType.MOORING.value, ParticipantType.PILOT.value, ParticipantType.TUG.value]
|
||||
|
||||
agency_times = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value,:]
|
||||
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
|
||||
|
||||
|
||||
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
|
||||
if not len(agency_time):
|
||||
if len(agency_times)==0:
|
||||
violation_state = False
|
||||
return violation_state
|
||||
|
||||
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
|
||||
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
|
||||
|
||||
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
|
||||
# exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
|
||||
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # df_times = df_times.loc[~df_times[query].isnull(),:]
|
||||
|
||||
@ -152,8 +155,24 @@ class ValidationRuleBaseFunctions():
|
||||
violation_state = False
|
||||
return violation_state
|
||||
|
||||
# this (current) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
|
||||
violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
|
||||
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
|
||||
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # consider only pandas Timestamp objects
|
||||
|
||||
# measure the time difference between all pairs.
|
||||
# for each pair of times, the absolute timedifference in seconds (float) is measured
|
||||
time_absolute_differences = [[abs(time_.to_pydatetime()-time__.to_pydatetime()).total_seconds() for j_, time__ in enumerate(estimated_times) if j_ != i_] for i_, time_ in enumerate(estimated_times)]
|
||||
|
||||
# list of lists: for each element in the list, create a boolean that indicates, whether the threshold is exceeded
|
||||
time_difference_exceeds_threshold = [[time__ >= threshold for time__ in time_] for time_ in time_absolute_differences]
|
||||
|
||||
# list of booleans for each time entry separately
|
||||
time_difference_exceeds_threshold = [any(time_) for time_ in time_difference_exceeds_threshold]
|
||||
|
||||
# if *any* of these entries exceeds the threshold, the times are too distinct. In those case, a rule violation occurs
|
||||
violation_state = any(time_difference_exceeds_threshold)
|
||||
|
||||
# this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
|
||||
# violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
|
||||
|
||||
# this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
|
||||
# Consequently, it treats all times as equally important
|
||||
@ -581,6 +600,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
- Checks, if times_terminal.operations_start is filled in.
|
||||
- Measures the difference between 'now' and 'times_agency.eta_berth'.
|
||||
"""
|
||||
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
if not shipcall.type in [ShipcallType.INCOMING.value]:
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
@ -615,6 +637,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
- Checks, if times_terminal.operations_end is filled in.
|
||||
- Measures the difference between 'now' and 'times_agency.etd_berth'.
|
||||
"""
|
||||
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]:
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
@ -730,6 +755,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
query time: eta_berth (times_agency)
|
||||
start_time & end_time: operations_start & operations_end (times_terminal)
|
||||
"""
|
||||
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
if not shipcall.type in [ShipcallType.INCOMING.value]:
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
@ -770,6 +798,9 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
query time: eta_berth (times_agency)
|
||||
start_time & end_time: operations_start & operations_end (times_terminal)
|
||||
"""
|
||||
if self.ignore_terminal_flag: # this feature flag may disable the validation rule for Terminals
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
if not shipcall.type in [ShipcallType.OUTGOING.value, ShipcallType.SHIFTING.value]:
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user