adapting rule 0005A and refactoring header-checks. Solving a conflict between versions, where there was a premature exit for time-agreement-rules.
This commit is contained in:
parent
b32b466f74
commit
d0753f0b32
@ -56,6 +56,15 @@ def PostShipcalls():
|
||||
logging.log(20, "metz development")
|
||||
"""
|
||||
# loadedModel ...
|
||||
loadedModel.get("ship_id", 0)
|
||||
|
||||
2024-04-22 18:21:03,982 | root | INFO | {'ship_id': 1,
|
||||
'type': 1, 'eta': datetime.datetime(2023, 7, 23, 7, 18, 19),
|
||||
'voyage': '43B', 'tug_required': False, 'pilot_required': True,
|
||||
'flags': 0, 'pier_side': False, 'bunkering': True, 'recommended_tugs': 2,
|
||||
'type_value': 1, 'evaluation_value': 0}
|
||||
|
||||
|
||||
valid_ship_id = check_if_user_data_has_valid_ship_id(ship_id)
|
||||
valid_berth_id = check_if_user_data_has_valid_berth_id(berth_id)
|
||||
valid_participant_id = check_if_user_data_has_valid_participant_id(participant_id)
|
||||
|
||||
@ -19,7 +19,37 @@ def set_participant_type(x, participant_df)->int:
|
||||
participant_type = participant_df.loc[participant_id, "type"]
|
||||
return participant_type
|
||||
|
||||
def get_synchronous_shipcall_times_standalone(query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
|
||||
"""
|
||||
This function counts all entries in {all_df_times}, which have the same timestamp as {query_time}.
|
||||
It does so by:
|
||||
1.) selecting all eta_berth & etd_berth entries
|
||||
2.) measuring the timedelta towards {query_time}
|
||||
3.) converting the timedelta to total absolute seconds (positive or negative time differences do not matter)
|
||||
4.) applying a {delta_threshold} to identify, whether two times are too closely together
|
||||
5.) counting the times, where the timedelta is below the threshold
|
||||
|
||||
returns: counts
|
||||
"""
|
||||
assert isinstance(query_time,pd.Timestamp)
|
||||
|
||||
# get a timedelta for each valid (not Null) time entry
|
||||
time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)]
|
||||
time_deltas_etd = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"etd_berth"] if not pd.isnull(time_)]
|
||||
|
||||
# consider both, eta and etd times
|
||||
time_deltas = time_deltas_eta + time_deltas_etd
|
||||
|
||||
# convert the timedelta to absolute total seconds
|
||||
time_deltas = [abs(delta.total_seconds()) for delta in time_deltas]
|
||||
|
||||
# consider only those time deltas, which are <= the determined threshold
|
||||
# create a list of booleans
|
||||
time_deltas_filtered = [delta <= delta_threshold for delta in time_deltas]
|
||||
|
||||
# booleans can be added/counted in Python by using sum()
|
||||
counts = sum(time_deltas_filtered) # int
|
||||
return counts
|
||||
|
||||
class SQLHandler():
|
||||
"""
|
||||
@ -333,6 +363,10 @@ class SQLHandler():
|
||||
|
||||
def get_unique_ship_counts(self, all_df_times:pd.DataFrame, times_agency:pd.DataFrame, query:str, rounding:str="min", maximum_threshold=3):
|
||||
"""given a dataframe of all agency times, get all unique ship counts, their values (datetime) and the string tags. returns a tuple (values,unique,counts)"""
|
||||
# #deprecated!
|
||||
import warnings
|
||||
warnings.warn(f"SQLHandler.get_unique_ship_counts is deprecated. Instead, please use SQLHandler.count_synchronous_shipcall_times")
|
||||
|
||||
# optional: rounding
|
||||
if rounding is not None:
|
||||
all_df_times.loc[:, query] = pd.to_datetime(all_df_times.loc[:, query]).dt.round(rounding) # e.g., 'min' --- # correcting the error: 'AttributeError: Can only use .dt accessor with datetimelike values'
|
||||
@ -348,3 +382,7 @@ class SQLHandler():
|
||||
# get unique entries and counts
|
||||
counts = len(values) # unique, counts = np.unique(values, return_counts=True)
|
||||
return counts # (values, unique, counts)
|
||||
|
||||
def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
|
||||
"""count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)"""
|
||||
return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold)
|
||||
|
||||
@ -38,14 +38,16 @@ error_message_dict = {
|
||||
"validation_rule_fct_etd_time_not_in_tidal_window":"The tidal window does not fit to the agency's estimated time of departure (ETD) {Rule #0004B}",
|
||||
|
||||
# 0005 A+B
|
||||
"validation_rule_fct_too_many_identical_eta_times":"There are more than three ships with the same planned time of arrival (ETA) {Rule #0005A}",
|
||||
"validation_rule_fct_too_many_identical_etd_times":"There are more than three ships with the same planned time of departure (ETD) {Rule #0005B}",
|
||||
"validation_rule_fct_too_many_identical_eta_times":"More than three shipcalls are planned at the same time as the defined ETA {Rule #0005A}",
|
||||
"validation_rule_fct_too_many_identical_etd_times":"More than three shipcalls are planned at the same time as the defined ETD {Rule #0005B}",
|
||||
|
||||
# 0006 A+B
|
||||
"validation_rule_fct_agency_and_terminal_berth_id_disagreement":"Agency and Terminal are planning with different berths (the berth_id deviates). {Rule #0006A}",
|
||||
"validation_rule_fct_agency_and_terminal_pier_side_disagreement":"Agency and Terminal are planning with different pier sides (the pier_side deviates). {Rule #0006B}",
|
||||
}
|
||||
|
||||
|
||||
|
||||
class ValidationRuleBaseFunctions():
|
||||
"""
|
||||
Base object with individual functions, which the {ValidationRuleFunctions}-child refers to.
|
||||
@ -71,6 +73,18 @@ class ValidationRuleBaseFunctions():
|
||||
def get_no_violation_default_output(self):
|
||||
"""return the default output of a validation function with no validation: a tuple of (GREEN state, None)"""
|
||||
return (StatusFlags.GREEN, None)
|
||||
|
||||
def check_if_header_exists(self, df_times:pd.DataFrame, participant_type:ParticipantType)->bool:
|
||||
"""
|
||||
Given a pandas DataFrame, which contains times entries for a specific shipcall id,
|
||||
this function checks, whether one of the times entries belongs to the requested ParticipantType.
|
||||
|
||||
returns bool
|
||||
"""
|
||||
# empty DataFrames form a special case, as they might miss the 'participant_type' column.
|
||||
if len(df_times)==0:
|
||||
return False
|
||||
return participant_type in df_times.loc[:,"participant_type"].values
|
||||
|
||||
def check_time_delta_violation_query_time_to_now(self, query_time:pd.Timestamp, key_time:pd.Timestamp, threshold:float)->bool:
|
||||
"""
|
||||
@ -144,7 +158,6 @@ class ValidationRuleBaseFunctions():
|
||||
return violation_state
|
||||
|
||||
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
|
||||
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
|
||||
|
||||
# for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
|
||||
# exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
|
||||
@ -172,6 +185,7 @@ class ValidationRuleBaseFunctions():
|
||||
violation_state = any(time_difference_exceeds_threshold)
|
||||
|
||||
# this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
|
||||
# agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
|
||||
# violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
|
||||
|
||||
# this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
|
||||
@ -762,10 +776,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
# check, if the header is filled in (agency & terminal)
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
|
||||
#if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
# get agency & terminal times
|
||||
@ -805,10 +821,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
# check, if the header is filled in (agency & terminal)
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
# get agency & terminal times
|
||||
@ -845,7 +863,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
# check, if the header is filled in (agency)
|
||||
if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
|
||||
# if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
|
||||
return self.get_no_violation_default_output()
|
||||
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
|
||||
|
||||
@ -876,7 +895,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
# check, if the header is filled in (agency)
|
||||
if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
|
||||
# if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
|
||||
return self.get_no_violation_default_output()
|
||||
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
|
||||
|
||||
@ -898,16 +918,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
"""
|
||||
Code: #0005-A
|
||||
Type: Global Rule
|
||||
Description: this validation rule checks, whether there are too many shipcalls with identical ETA times.
|
||||
Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETA.
|
||||
"""
|
||||
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
|
||||
# check, if the header is filled in (agency)
|
||||
if len(times_agency) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): # if len(times_agency) != 1:
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
# when ANY of the unique values exceeds the threshold, a violation is observed
|
||||
query = "eta_berth"
|
||||
violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency)
|
||||
# get the agency's query time
|
||||
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
|
||||
query_time = times_agency.iloc[0].eta_berth
|
||||
|
||||
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
|
||||
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
|
||||
violation_state = counts > maximum_threshold
|
||||
|
||||
if violation_state:
|
||||
validation_name = "validation_rule_fct_too_many_identical_eta_times"
|
||||
@ -919,16 +942,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
"""
|
||||
Code: #0005-B
|
||||
Type: Global Rule
|
||||
Description: this validation rule checks, whether there are too many shipcalls with identical ETD times.
|
||||
Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETD.
|
||||
"""
|
||||
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
|
||||
# check, if the header is filled in (agency)
|
||||
if len(times_agency) != 1:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): #if len(times_agency) != 1:
|
||||
return self.get_no_violation_default_output()
|
||||
|
||||
# when ANY of the unique values exceeds the threshold, a violation is observed
|
||||
query = "etd_berth"
|
||||
violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency)
|
||||
# get the agency's query time
|
||||
times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
|
||||
query_time = times_agency.iloc[0].etd_berth
|
||||
|
||||
# count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
|
||||
counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
|
||||
violation_state = counts > maximum_threshold
|
||||
|
||||
if violation_state:
|
||||
validation_name = "validation_rule_fct_too_many_identical_etd_times"
|
||||
@ -943,10 +969,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
Description: This validation rule checks, whether agency and terminal agree with their designated berth place by checking berth_id.
|
||||
"""
|
||||
# check, if the header is filled in (agency & terminal)
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
|
||||
@ -979,13 +1007,14 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
|
||||
Description: This validation rule checks, whether agency and terminal agree with their designated pier side by checking pier_side.
|
||||
"""
|
||||
# check, if the header is filled in (agency & terminal)
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
|
||||
# if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
|
||||
if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
|
||||
return self.get_no_violation_default_output() # rule not applicable
|
||||
|
||||
times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
|
||||
times_terminal = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.TERMINAL.value)
|
||||
|
||||
# when one of the two values is null, the state is GREEN
|
||||
|
||||
@ -30,9 +30,9 @@ class ValidationRules(ValidationRuleFunctions):
|
||||
returns: (evaluation_state, violations)
|
||||
"""
|
||||
# prepare df_times, which every validation rule tends to use
|
||||
df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame
|
||||
all_df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame
|
||||
|
||||
if len(df_times)==0:
|
||||
if len(all_df_times)==0:
|
||||
return (StatusFlags.GREEN.value, [])
|
||||
|
||||
spm = self.sql_handler.df_dict["shipcall_participant_map"]
|
||||
|
||||
Reference in New Issue
Block a user