adapting rule 0005A and refactoring header-checks. Solving a conflict between versions, where there was a premature exit for time-agreement-rules.

This commit is contained in:
Max Metz 2024-04-29 11:30:24 +02:00
parent b32b466f74
commit d0753f0b32
4 changed files with 104 additions and 28 deletions

View File

@ -56,6 +56,15 @@ def PostShipcalls():
logging.log(20, "metz development")
"""
# loadedModel ...
loadedModel.get("ship_id", 0)
2024-04-22 18:21:03,982 | root | INFO | {'ship_id': 1,
'type': 1, 'eta': datetime.datetime(2023, 7, 23, 7, 18, 19),
'voyage': '43B', 'tug_required': False, 'pilot_required': True,
'flags': 0, 'pier_side': False, 'bunkering': True, 'recommended_tugs': 2,
'type_value': 1, 'evaluation_value': 0}
valid_ship_id = check_if_user_data_has_valid_ship_id(ship_id)
valid_berth_id = check_if_user_data_has_valid_berth_id(berth_id)
valid_participant_id = check_if_user_data_has_valid_participant_id(participant_id)

View File

@ -19,7 +19,37 @@ def set_participant_type(x, participant_df)->int:
participant_type = participant_df.loc[participant_id, "type"]
return participant_type
def get_synchronous_shipcall_times_standalone(query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
"""
This function counts all entries in {all_df_times}, which have the same timestamp as {query_time}.
It does so by:
1.) selecting all eta_berth & etd_berth entries
2.) measuring the timedelta towards {query_time}
3.) converting the timedelta to total absolute seconds (positive or negative time differences do not matter)
4.) applying a {delta_threshold} to identify, whether two times are too closely together
5.) counting the times, where the timedelta is below the threshold
returns: counts
"""
assert isinstance(query_time,pd.Timestamp)
# get a timedelta for each valid (not Null) time entry
time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)]
time_deltas_etd = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"etd_berth"] if not pd.isnull(time_)]
# consider both, eta and etd times
time_deltas = time_deltas_eta + time_deltas_etd
# convert the timedelta to absolute total seconds
time_deltas = [abs(delta.total_seconds()) for delta in time_deltas]
# consider only those time deltas, which are <= the determined threshold
# create a list of booleans
time_deltas_filtered = [delta <= delta_threshold for delta in time_deltas]
# booleans can be added/counted in Python by using sum()
counts = sum(time_deltas_filtered) # int
return counts
class SQLHandler():
"""
@ -333,6 +363,10 @@ class SQLHandler():
def get_unique_ship_counts(self, all_df_times:pd.DataFrame, times_agency:pd.DataFrame, query:str, rounding:str="min", maximum_threshold=3):
"""given a dataframe of all agency times, get all unique ship counts, their values (datetime) and the string tags. returns a tuple (values,unique,counts)"""
# #deprecated!
import warnings
warnings.warn(f"SQLHandler.get_unique_ship_counts is deprecated. Instead, please use SQLHandler.count_synchronous_shipcall_times")
# optional: rounding
if rounding is not None:
all_df_times.loc[:, query] = pd.to_datetime(all_df_times.loc[:, query]).dt.round(rounding) # e.g., 'min' --- # correcting the error: 'AttributeError: Can only use .dt accessor with datetimelike values'
@ -348,3 +382,7 @@ class SQLHandler():
# get unique entries and counts
counts = len(values) # unique, counts = np.unique(values, return_counts=True)
return counts # (values, unique, counts)
def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
"""count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)"""
return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold)

View File

@ -38,14 +38,16 @@ error_message_dict = {
"validation_rule_fct_etd_time_not_in_tidal_window":"The tidal window does not fit to the agency's estimated time of departure (ETD) {Rule #0004B}",
# 0005 A+B
"validation_rule_fct_too_many_identical_eta_times":"There are more than three ships with the same planned time of arrival (ETA) {Rule #0005A}",
"validation_rule_fct_too_many_identical_etd_times":"There are more than three ships with the same planned time of departure (ETD) {Rule #0005B}",
"validation_rule_fct_too_many_identical_eta_times":"More than three shipcalls are planned at the same time as the defined ETA {Rule #0005A}",
"validation_rule_fct_too_many_identical_etd_times":"More than three shipcalls are planned at the same time as the defined ETD {Rule #0005B}",
# 0006 A+B
"validation_rule_fct_agency_and_terminal_berth_id_disagreement":"Agency and Terminal are planning with different berths (the berth_id deviates). {Rule #0006A}",
"validation_rule_fct_agency_and_terminal_pier_side_disagreement":"Agency and Terminal are planning with different pier sides (the pier_side deviates). {Rule #0006B}",
}
class ValidationRuleBaseFunctions():
"""
Base object with individual functions, which the {ValidationRuleFunctions}-child refers to.
@ -71,6 +73,18 @@ class ValidationRuleBaseFunctions():
def get_no_violation_default_output(self):
"""return the default output of a validation function with no validation: a tuple of (GREEN state, None)"""
return (StatusFlags.GREEN, None)
def check_if_header_exists(self, df_times:pd.DataFrame, participant_type:ParticipantType)->bool:
"""
Given a pandas DataFrame, which contains times entries for a specific shipcall id,
this function checks, whether one of the times entries belongs to the requested ParticipantType.
returns bool
"""
# empty DataFrames form a special case, as they might miss the 'participant_type' column.
if len(df_times)==0:
return False
return participant_type in df_times.loc[:,"participant_type"].values
def check_time_delta_violation_query_time_to_now(self, query_time:pd.Timestamp, key_time:pd.Timestamp, threshold:float)->bool:
"""
@ -144,7 +158,6 @@ class ValidationRuleBaseFunctions():
return violation_state
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
# exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
@ -172,6 +185,7 @@ class ValidationRuleBaseFunctions():
violation_state = any(time_difference_exceeds_threshold)
# this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
# agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
# violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
# this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
@ -762,10 +776,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
return self.get_no_violation_default_output()
# check, if the header is filled in (agency & terminal)
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
return self.get_no_violation_default_output() # rule not applicable
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
#if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
return self.get_no_violation_default_output() # rule not applicable
# get agency & terminal times
@ -805,10 +821,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
return self.get_no_violation_default_output()
# check, if the header is filled in (agency & terminal)
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
return self.get_no_violation_default_output() # rule not applicable
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
return self.get_no_violation_default_output() # rule not applicable
# get agency & terminal times
@ -845,7 +863,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
return self.get_no_violation_default_output()
# check, if the header is filled in (agency)
if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
# if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
return self.get_no_violation_default_output()
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
@ -876,7 +895,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
return self.get_no_violation_default_output()
# check, if the header is filled in (agency)
if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
# if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
return self.get_no_violation_default_output()
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
@ -898,16 +918,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
"""
Code: #0005-A
Type: Global Rule
Description: this validation rule checks, whether there are too many shipcalls with identical ETA times.
Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETA.
"""
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
# check, if the header is filled in (agency)
if len(times_agency) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): # if len(times_agency) != 1:
return self.get_no_violation_default_output()
# when ANY of the unique values exceeds the threshold, a violation is observed
query = "eta_berth"
violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency)
# get the agency's query time
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
query_time = times_agency.iloc[0].eta_berth
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
violation_state = counts > maximum_threshold
if violation_state:
validation_name = "validation_rule_fct_too_many_identical_eta_times"
@ -919,16 +942,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
"""
Code: #0005-B
Type: Global Rule
Description: this validation rule checks, whether there are too many shipcalls with identical ETD times.
Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETD.
"""
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
# check, if the header is filled in (agency)
if len(times_agency) != 1:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): #if len(times_agency) != 1:
return self.get_no_violation_default_output()
# when ANY of the unique values exceeds the threshold, a violation is observed
query = "etd_berth"
violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency)
# get the agency's query time
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
query_time = times_agency.iloc[0].etd_berth
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
violation_state = counts > maximum_threshold
if violation_state:
validation_name = "validation_rule_fct_too_many_identical_etd_times"
@ -943,10 +969,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
Description: This validation rule checks, whether agency and terminal agree with their designated berth place by checking berth_id.
"""
# check, if the header is filled in (agency & terminal)
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
return self.get_no_violation_default_output() # rule not applicable
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
return self.get_no_violation_default_output() # rule not applicable
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
@ -979,13 +1007,14 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
Description: This validation rule checks, whether agency and terminal agree with their designated pier side by checking pier_side.
"""
# check, if the header is filled in (agency & terminal)
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
return self.get_no_violation_default_output() # rule not applicable
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
return self.get_no_violation_default_output() # rule not applicable
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
times_terminal = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.TERMINAL.value)
# when one of the two values is null, the state is GREEN

View File

@ -30,9 +30,9 @@ class ValidationRules(ValidationRuleFunctions):
returns: (evaluation_state, violations)
"""
# prepare df_times, which every validation rule tends to use
df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame
all_df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame
if len(df_times)==0:
if len(all_df_times)==0:
return (StatusFlags.GREEN.value, [])
spm = self.sql_handler.df_dict["shipcall_participant_map"]