adapting rule 0005A and refactoring header-checks. Solving a conflict between versions, where there was a premature exit for time-agreement-rules.

2024-04-29 11:30:24 +02:00 · 2024-04-29 11:30:24 +02:00 · d0753f0b32
commit d0753f0b32
parent b32b466f74
4 changed files with 104 additions and 28 deletions
--- a/src/server/BreCal/api/shipcalls.py
+++ b/src/server/BreCal/api/shipcalls.py
@ -56,6 +56,15 @@ def PostShipcalls():
        logging.log(20, "metz development")        
        """
        # loadedModel ...
+        loadedModel.get("ship_id", 0)
+
+        2024-04-22 18:21:03,982 | root | INFO | {'ship_id': 1, 
+        'type': 1, 'eta': datetime.datetime(2023, 7, 23, 7, 18, 19),
+        'voyage': '43B', 'tug_required': False, 'pilot_required': True, 
+        'flags': 0, 'pier_side': False, 'bunkering': True, 'recommended_tugs': 2,
+        'type_value': 1, 'evaluation_value': 0}
+
+
        valid_ship_id = check_if_user_data_has_valid_ship_id(ship_id)
        valid_berth_id = check_if_user_data_has_valid_berth_id(berth_id)
        valid_participant_id = check_if_user_data_has_valid_participant_id(participant_id)
--- a/src/server/BreCal/database/sql_handler.py
+++ b/src/server/BreCal/database/sql_handler.py
@ -19,7 +19,37 @@ def set_participant_type(x, participant_df)->int:
    participant_type = participant_df.loc[participant_id, "type"]
    return participant_type
    
+def get_synchronous_shipcall_times_standalone(query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
+    """
+    This function counts all entries in {all_df_times}, which have the same timestamp as {query_time}.
+    It does so by:
+    1.) selecting all eta_berth & etd_berth entries 
+    2.) measuring the timedelta towards {query_time}
+    3.) converting the timedelta to total absolute seconds (positive or negative time differences do not matter)
+    4.) applying a {delta_threshold} to identify, whether two times are too closely together
+    5.) counting the times, where the timedelta is below the threshold

+    returns: counts
+    """
+    assert isinstance(query_time,pd.Timestamp)
+
+    # get a timedelta for each valid (not Null) time entry
+    time_deltas_eta = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"eta_berth"] if not pd.isnull(time_)]
+    time_deltas_etd = [(query_time.to_pydatetime()-time_.to_pydatetime()) for time_ in all_df_times.loc[:,"etd_berth"] if not pd.isnull(time_)]
+
+    # consider both, eta and etd times
+    time_deltas = time_deltas_eta + time_deltas_etd
+
+    # convert the timedelta to absolute total seconds
+    time_deltas = [abs(delta.total_seconds()) for delta in time_deltas]
+
+    # consider only those time deltas, which are <= the determined threshold
+    # create a list of booleans
+    time_deltas_filtered = [delta <= delta_threshold for delta in time_deltas]
+
+    # booleans can be added/counted in Python by using sum()
+    counts = sum(time_deltas_filtered) # int
+    return counts

 class SQLHandler():
    """
@ -333,6 +363,10 @@ class SQLHandler():
    
    def get_unique_ship_counts(self, all_df_times:pd.DataFrame, times_agency:pd.DataFrame, query:str, rounding:str="min", maximum_threshold=3):
        """given a dataframe of all agency times, get all unique ship counts, their values (datetime) and the string tags. returns a tuple (values,unique,counts)"""
+        # #deprecated!
+        import warnings
+        warnings.warn(f"SQLHandler.get_unique_ship_counts is deprecated. Instead, please use SQLHandler.count_synchronous_shipcall_times")
+
        # optional: rounding
        if rounding is not None:
            all_df_times.loc[:, query] = pd.to_datetime(all_df_times.loc[:, query]).dt.round(rounding) # e.g., 'min' ---  # correcting the error: 'AttributeError: Can only use .dt accessor with datetimelike values'
@ -348,3 +382,7 @@ class SQLHandler():
        # get unique entries and counts
        counts = len(values) # unique, counts = np.unique(values, return_counts=True)
        return counts # (values, unique, counts)
+
+    def count_synchronous_shipcall_times(self, query_time:pd.Timestamp, all_df_times:pd.DataFrame, delta_threshold=900)->int:
+        """count all times entries, which are too close to the query_time. The {delta_threshold} determines the threshold. returns counts (int)"""
+        return get_synchronous_shipcall_times_standalone(query_time, all_df_times, delta_threshold)
--- a/src/server/BreCal/validators/validation_rule_functions.py
+++ b/src/server/BreCal/validators/validation_rule_functions.py
@ -38,14 +38,16 @@ error_message_dict = {
    "validation_rule_fct_etd_time_not_in_tidal_window":"The tidal window does not fit to the agency's estimated time of departure (ETD) {Rule #0004B}",

    # 0005 A+B
-    "validation_rule_fct_too_many_identical_eta_times":"There are more than three ships with the same planned time of arrival (ETA) {Rule #0005A}",
-    "validation_rule_fct_too_many_identical_etd_times":"There are more than three ships with the same planned time of departure (ETD) {Rule #0005B}",
+    "validation_rule_fct_too_many_identical_eta_times":"More than three shipcalls are planned at the same time as the defined ETA {Rule #0005A}",
+    "validation_rule_fct_too_many_identical_etd_times":"More than three shipcalls are planned at the same time as the defined ETD {Rule #0005B}",

    # 0006 A+B
    "validation_rule_fct_agency_and_terminal_berth_id_disagreement":"Agency and Terminal are planning with different berths (the berth_id deviates). {Rule #0006A}",
    "validation_rule_fct_agency_and_terminal_pier_side_disagreement":"Agency and Terminal are planning with different pier sides (the pier_side deviates).  {Rule #0006B}",
 }

+
+
 class ValidationRuleBaseFunctions():
    """
    Base object with individual functions, which the {ValidationRuleFunctions}-child refers to.
@ -71,6 +73,18 @@ class ValidationRuleBaseFunctions():
    def get_no_violation_default_output(self):
        """return the default output of a validation function with no validation: a tuple of (GREEN state, None)"""
        return (StatusFlags.GREEN, None)
+        
+    def check_if_header_exists(self, df_times:pd.DataFrame, participant_type:ParticipantType)->bool:
+        """
+        Given a pandas DataFrame, which contains times entries for a specific shipcall id,
+        this function checks, whether one of the times entries belongs to the requested ParticipantType.
+
+        returns bool
+        """
+        # empty DataFrames form a special case, as they might miss the 'participant_type' column.
+        if len(df_times)==0:
+            return False
+        return participant_type in df_times.loc[:,"participant_type"].values

    def check_time_delta_violation_query_time_to_now(self, query_time:pd.Timestamp, key_time:pd.Timestamp, threshold:float)->bool:
        """
@ -144,7 +158,6 @@ class ValidationRuleBaseFunctions():
            return violation_state

        df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
-        agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]

        # for the given query, e.g., 'eta_berth', sample all times from the pandas DataFrame
        # exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
@ -172,6 +185,7 @@ class ValidationRuleBaseFunctions():
        violation_state = any(time_difference_exceeds_threshold)

        # this (previous) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
+        # agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
        # violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))

        # this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
@ -762,10 +776,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
            return self.get_no_violation_default_output()

        # check, if the header is filled in (agency & terminal)
-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
            return self.get_no_violation_default_output() # rule not applicable

-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
+        #if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
            return self.get_no_violation_default_output() # rule not applicable

        # get agency & terminal times
@ -805,10 +821,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
            return self.get_no_violation_default_output()

        # check, if the header is filled in (agency & terminal)
-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
            return self.get_no_violation_default_output() # rule not applicable

-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
            return self.get_no_violation_default_output() # rule not applicable

        # get agency & terminal times
@ -845,7 +863,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
            return self.get_no_violation_default_output()

        # check, if the header is filled in (agency)
-        if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
+        # if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
            return self.get_no_violation_default_output()
        times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)

@ -876,7 +895,8 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
            return self.get_no_violation_default_output()

        # check, if the header is filled in (agency)
-        if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
+        # if len(df_times.loc[df_times["participant_type"].isin([ParticipantType.AGENCY.value])]) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
            return self.get_no_violation_default_output()
        times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)

@ -898,16 +918,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
        """
        Code: #0005-A
        Type: Global Rule
-        Description: this validation rule checks, whether there are too many shipcalls with identical ETA times.
+        Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETA.
        """
-        times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
        # check, if the header is filled in (agency)
-        if len(times_agency) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): # if len(times_agency) != 1:
            return self.get_no_violation_default_output()

-        # when ANY of the unique values exceeds the threshold, a violation is observed
-        query = "eta_berth"
-        violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency)
+        # get the agency's query time
+        times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
+        query_time = times_agency.iloc[0].eta_berth
+
+        # count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
+        counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
+        violation_state = counts > maximum_threshold

        if violation_state:
            validation_name = "validation_rule_fct_too_many_identical_eta_times"
@ -919,16 +942,19 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
        """
        Code: #0005-B
        Type: Global Rule
-        Description: this validation rule checks, whether there are too many shipcalls with identical ETD times.
+        Description: this validation rule checks, whether there are too many shipcalls with identical times to the query ETD.
        """
-        times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
        # check, if the header is filled in (agency)
-        if len(times_agency) != 1:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY): #if len(times_agency) != 1:
            return self.get_no_violation_default_output()

-        # when ANY of the unique values exceeds the threshold, a violation is observed
-        query = "etd_berth"
-        violation_state = self.check_unique_shipcall_counts(query, times_agency=times_agency, rounding=rounding, maximum_threshold=maximum_threshold, all_times_agency=all_times_agency)
+        # get the agency's query time
+        times_agency = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]
+        query_time = times_agency.iloc[0].etd_berth
+
+        # count the number of times, where a times entry is very close to the query time (uses an internal threshold, such as 15 minutes)
+        counts = self.sql_handler.count_synchronous_shipcall_times(query_time, all_df_times=all_times_agency)
+        violation_state = counts > maximum_threshold

        if violation_state:
            validation_name = "validation_rule_fct_too_many_identical_etd_times"
@ -943,10 +969,12 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
        Description: This validation rule checks, whether agency and terminal agree with their designated berth place by checking berth_id.
        """
        # check, if the header is filled in (agency & terminal)
-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
            return self.get_no_violation_default_output() # rule not applicable

-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
            return self.get_no_violation_default_output() # rule not applicable

        times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
@ -979,13 +1007,14 @@ class ValidationRuleFunctions(ValidationRuleBaseFunctions):
        Description: This validation rule checks, whether agency and terminal agree with their designated pier side by checking pier_side.
        """
        # check, if the header is filled in (agency & terminal)
-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value]) == 0:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.AGENCY):
            return self.get_no_violation_default_output() # rule not applicable

-        if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
+        # if len(df_times.loc[df_times["participant_type"]==ParticipantType.TERMINAL.value]) == 0:
+        if not self.check_if_header_exists(df_times, participant_type=ParticipantType.TERMINAL):
            return self.get_no_violation_default_output() # rule not applicable

-        times_agency = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.AGENCY.value)
        times_terminal = self.sql_handler.get_times_for_participant_type(df_times, participant_type=ParticipantType.TERMINAL.value)

        # when one of the two values is null, the state is GREEN
--- a/src/server/BreCal/validators/validation_rules.py
+++ b/src/server/BreCal/validators/validation_rules.py
@ -30,9 +30,9 @@ class ValidationRules(ValidationRuleFunctions):
        returns: (evaluation_state, violations)
        """
        # prepare df_times, which every validation rule tends to use
-        df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame
+        all_df_times = self.sql_handler.df_dict.get('times', pd.DataFrame()) # -> pd.DataFrame

-        if len(df_times)==0:
+        if len(all_df_times)==0:
            return (StatusFlags.GREEN.value, [])

        spm = self.sql_handler.df_dict["shipcall_participant_map"]