From d0753f0b32df81d58c7ba67ed62e288779df0203 Mon Sep 17 00:00:00 2001 From: Max Metz Date: Mon, 29 Apr 2024 11:30:24 +0200 Subject: [PATCH] adapting rule 0005A and refactoring header-checks. Solving a conflict between versions, where there was a premature exit for time-agreement-rules. --- src/server/BreCal/api/shipcalls.py | 9 +++ src/server/BreCal/database/sql_handler.py | 38 +++++++++ .../validators/validation_rule_functions.py | 81 +++++++++++++------ .../BreCal/validators/validation_rules.py | 4 +- 4 files changed, 104 insertions(+), 28 deletions(-) diff --git a/src/server/BreCal/api/shipcalls.py b/src/server/BreCal/api/shipcalls.py index 61a7a7c..8adde2f 100644 --- a/src/server/BreCal/api/shipcalls.py +++ b/src/server/BreCal/api/shipcalls.py @@ -56,6 +56,15 @@ def PostShipcalls(): logging.log(20, "metz development") """ # loadedModel ... + loadedModel.get("ship_id", 0) + + 2024-04-22 18:21:03,982 | root | INFO | {'ship_id': 1, + 'type': 1, 'eta': datetime.datetime(2023, 7, 23, 7, 18, 19), + 'voyage': '43B', 'tug_required': False, 'pilot_required': True, + 'flags': 0, 'pier_side': False, 'bunkering': True, 'recommended_tugs': 2, + 'type_value': 1, 'evaluation_value': 0} + + valid_ship_id = check_if_user_data_has_valid_ship_id(ship_id) valid_berth_id = check_if_user_data_has_valid_berth_id(berth_id) valid_participant_id = check_if_user_data_has_valid_participant_id(participant_id) diff --git a/src/server/BreCal/database/sql_handler.py b/src/server/BreCal/database/sql_handler.py index 929f558..741e631 100644 --- a/src/server/BreCal/database/sql_handler.py +++ b/src/server/BreCal/database/sql_handler.py @@ -19,7 +19,37 @@ def set_participant_type(x, participant_df)->int: participant_type = participant_df.loc[participant_id, "type"] return participant_type +def get_synchronous_shipcall_times_standalone(query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int: + """ + This function counts all entries in {all_df_times}, which have the same timestamp as {query_time}. + It does so by: + 1.) selecting all eta_berth & etd_berth entries + 2.) measuring the timedelta towards {query_time} + 3.) converting the timedelta to total absolute seconds (positive or negative time differences do not matter) + 4.) applying a {delta_threshold} to identify, whether two times are too closely together + 5.) counting the times, where the timedelta is below the threshold + returns: counts + """ + assert isinstance(query_time,pd.Timestamp) + + # get a timedelta for each valid (not Null) time entry + time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)] + time_deltas_etd = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"etd_berth"] if not pd.isnull(time_)] + + # consider both, eta and etd times + time_deltas = time_deltas_eta + time_deltas_etd + + # convert the timedelta to absolute total seconds + time_deltas = [abs(delta.total_seconds()) for delta in time_deltas] + + # consider only those time deltas, which are <= the determined threshold + # create a list of booleans + time_deltas_filtered = [delta <= delta_threshold for delta in time_deltas] + + # booleans can be added/counted in Python by using sum() + counts = sum(time_deltas_filtered) # int + return counts class SQLHandler(): """ @@ -333,6 +363,10 @@ class SQLHandler(): def get_unique_ship_counts(self, all_df_times:pd.DataFrame, times_agency:pd.DataFrame, query:str, rounding:str="min", maximum_threshold=3): """given a dataframe of all agency times, get all unique ship counts, their values (datetime) and the string tags. returns a tuple (values,unique,counts)""" + # #deprecated! + import warnings + warnings.warn(f"SQLHandler.get_unique_ship_counts is deprecated. Instead, please use SQLHandler.count_synchronous_shipcall_times") + # optional: rounding if rounding is not None: all_df_times.loc[:, query] = pd.to_datetime(all_df_times.loc[:, query]).dt.round(rounding) # e.g., 'min' --- # correcting the error: 'AttributeError: Can only use .dt accessor with datetimelike values' @@ -348,3 +382,7 @@ class SQLHandler(): # get unique entries and counts counts = len(values) # unique, counts = np.unique(values, return_counts=True) return counts # (values, unique, counts) + + def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int: + """count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)""" + return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold) diff --git a/src/server/BreCal/validators/validation_rule_functions.py b/src/server/BreCal/validators/validation_rule_functions.py index 98fff6e..24bbc62 100644 --- a/src/server/BreCal/validators/validation_rule_functions.py +++ b/src/server/BreCal/validators/validation_rule_functions.py @@ -38,14 +38,16 @@ error_message_dict = { "validation_rule_fct_etd_time_not_in_tidal_window":"The tidal window does not fit to the agency's estimated time of departure (ETD) {Rule #0004B}", # 0005 A+B - "validation_rule_fct_too_many_identical_eta_times":"There are more than three ships with the same planned time of arrival (ETA) {Rule #0005A}", - "validation_rule_fct_too_many_identical_etd_times":"There are more than three ships with the same planned time of departure (ETD) {Rule #0005B}", + "validation_rule_fct_too_many_identical_eta_times":"More than three shipcalls are planned at the same time as the defined ETA {Rule #0005A}", + "validation_rule_fct_too_many_identical_etd_times":"More than three shipcalls are planned at the same time as the defined ETD {Rule #0005B}", # 0006 A+B "validation_rule_fct_agency_and_terminal_berth_id_disagreement":"Agency and Terminal are planning with different berths (the berth_id deviates). {Rule #0006A}", "validation_rule_fct_agency_and_terminal_pier_side_disagreement":"Agency and Terminal are planning with different pier sides (the pier_side deviates). {Rule #0006B}", } + + class ValidationRuleBaseFunctions(): """ Base object with individual functions, which the {ValidationRuleFunctions}-child refers to. @@ -71,6 +73,18 @@ class ValidationRuleBaseFunctions(): def get_no_violation_default_output(self): """return the default output of a validation function with no validation: a tuple of (GREEN state, None)""" return (StatusFlags.GREEN, None) + + def check_if_header_exists(self, df_times:pd.DataFrame, participant_type:ParticipantType)->bool: + """ + Given a pandas DataFrame, which contains times entries for a specific shipcall id, + this function checks, whether one of the times entries belongs to the requested ParticipantType. + + returns bool + """ + # empty DataFrames form a special case, as they might miss the 'participant_type' column. + if len(df_times)==0: + return False + return participant_type in df_times.loc[:,"participant_type"].values def check_time_delta_violation_query_time_to_now(self, query_time:pd.Timestamp, key_time:pd.Timestamp, threshold:float)->bool: """ @@ -144,7 +158,6 @@ class ValidationRuleBaseFunctions(): return violation_state df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:] - agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame # exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries) @@ -172,6 +185,7 @@ class ValidationRuleBaseFunctions(): violation_state = any(time_difference_exceeds_threshold) # this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes + # agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min")) # this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs @@ -762,10 +776,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): return self.get_no_violation_default_output() # check, if the header is filled in (agency & terminal) - if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1: return self.get_no_violation_default_output() # rule not applicable - if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL): + #if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1: return self.get_no_violation_default_output() # rule not applicable # get agency & terminal times @@ -805,10 +821,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): return self.get_no_violation_default_output() # check, if the header is filled in (agency & terminal) - if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1: + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): return self.get_no_violation_default_output() # rule not applicable - if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1: + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL): return self.get_no_violation_default_output() # rule not applicable # get agency & terminal times @@ -845,7 +863,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): return self.get_no_violation_default_output() # check, if the header is filled in (agency) - if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1: + # if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): return self.get_no_violation_default_output() times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value) @@ -876,7 +895,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): return self.get_no_violation_default_output() # check, if the header is filled in (agency) - if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1: + # if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): return self.get_no_violation_default_output() times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value) @@ -898,16 +918,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): """ Code: #0005-A Type: Global Rule - Description: this validation rule checks, whether there are too many shipcalls with identical ETA times. + Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETA. """ - times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value] # check, if the header is filled in (agency) - if len(times_agency) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): # if len(times_agency) != 1: return self.get_no_violation_default_output() - # when ANY of the unique values exceeds the threshold, a violation is observed - query = "eta_berth" - violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency) + # get the agency's query time + times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value] + query_time = times_agency.iloc[0].eta_berth + + # count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes) + counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency) + violation_state = counts > maximum_threshold if violation_state: validation_name = "validation_rule_fct_too_many_identical_eta_times" @@ -919,16 +942,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): """ Code: #0005-B Type: Global Rule - Description: this validation rule checks, whether there are too many shipcalls with identical ETD times. + Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETD. """ - times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value] # check, if the header is filled in (agency) - if len(times_agency) != 1: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): #if len(times_agency) != 1: return self.get_no_violation_default_output() - # when ANY of the unique values exceeds the threshold, a violation is observed - query = "etd_berth" - violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency) + # get the agency's query time + times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value] + query_time = times_agency.iloc[0].etd_berth + + # count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes) + counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency) + violation_state = counts > maximum_threshold if violation_state: validation_name = "validation_rule_fct_too_many_identical_etd_times" @@ -943,10 +969,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): Description: This validation rule checks, whether agency and terminal agree with their designated berth place by checking berth_id. """ # check, if the header is filled in (agency & terminal) - if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0: + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): return self.get_no_violation_default_output() # rule not applicable - if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0: + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL): return self.get_no_violation_default_output() # rule not applicable times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value) @@ -979,13 +1007,14 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions): Description: This validation rule checks, whether agency and terminal agree with their designated pier side by checking pier_side. """ # check, if the header is filled in (agency & terminal) - if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0: + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): return self.get_no_violation_default_output() # rule not applicable - if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0: + # if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0: + if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL): return self.get_no_violation_default_output() # rule not applicable - times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value) times_terminal = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.TERMINAL.value) # when one of the two values is null, the state is GREEN diff --git a/src/server/BreCal/validators/validation_rules.py b/src/server/BreCal/validators/validation_rules.py index 4b56e9b..69cc5ee 100644 --- a/src/server/BreCal/validators/validation_rules.py +++ b/src/server/BreCal/validators/validation_rules.py @@ -30,9 +30,9 @@ class ValidationRules(ValidationRuleFunctions): returns: (evaluation_state, violations) """ # prepare df_times, which every validation rule tends to use - df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame + all_df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame - if len(df_times)==0: + if len(all_df_times)==0: return (StatusFlags.GREEN.value, []) spm = self.sql_handler.df_dict["shipcall_participant_map"]