Changed validation rule 002 A-C so that the agency time is set as reference time allowing 15 minute deviation
This commit is contained in:
parent
df5f88d3cf
commit
126d74ec48
@ -134,8 +134,16 @@ class ValidationRuleBaseFunctions():
|
||||
participant_types = [ParticipantType.AGENCY.value, ParticipantType.MOORING.value, ParticipantType.PORT_ADMINISTRATION.value, ParticipantType.PILOT.value, ParticipantType.TUG.value]
|
||||
else:
|
||||
participant_types = [ParticipantType.AGENCY.value, ParticipantType.MOORING.value, ParticipantType.PILOT.value, ParticipantType.TUG.value]
|
||||
|
||||
agency_times = df_times.loc[df_times["participant_type"]==ParticipantType.AGENCY.value,:]
|
||||
df_times = df_times.loc[df_times["participant_type"].isin(participant_types),:]
|
||||
|
||||
|
||||
agency_time = [time_ for time_ in agency_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)]
|
||||
if not len(agency_time):
|
||||
violation_state = False
|
||||
return violation_state
|
||||
|
||||
# exclude missing entries and consider only pd.Timestamp entries (which ignores pd.NaT/null entries)
|
||||
estimated_times = [time_ for time_ in df_times.loc[:,query].tolist() if isinstance(time_, pd.Timestamp)] # df_times = df_times.loc[~df_times[query].isnull(),:]
|
||||
|
||||
@ -143,9 +151,18 @@ class ValidationRuleBaseFunctions():
|
||||
if len(estimated_times)==0:
|
||||
violation_state = False
|
||||
return violation_state
|
||||
|
||||
# this (current) solution compares times to the reference (agency) time and checks if the difference is greater than 15 minutes
|
||||
violation_state = ((np.max(estimated_times) - agency_time[0]) > pd.Timedelta("15min")) or ((agency_time[0] - np.min(estimated_times)) > pd.Timedelta("15min"))
|
||||
|
||||
difference = np.max(estimated_times) - np.min(estimated_times)
|
||||
violation_state = difference > pd.Timedelta("15min")
|
||||
# this solution to the rule compares all times to each other. When there is a total difference of more than 15 minutes, a violation occurs
|
||||
# Consequently, it treats all times as equally important
|
||||
# difference = np.max(estimated_times) - np.min(estimated_times)
|
||||
# violation_state = difference > pd.Timedelta("15min")
|
||||
|
||||
# this solution clamps the times to 15 minute intervals and compares these values. When there is a single time difference, a violation occurs
|
||||
# the drawback is that in some cases if there is a minimal difference say of 1 minute (:22 and :23 minutes after the hour) the violation is
|
||||
# triggered even though the times are very close to each other
|
||||
|
||||
# apply rounding. For example, the agreement of different participants may be required to match minute-wise
|
||||
# '15min' rounds to 'every 15 minutes'. E.g., '2023-09-22 08:18:49' becomes '2023-09-22 08:15:00'
|
||||
@ -156,6 +173,8 @@ class ValidationRuleBaseFunctions():
|
||||
# times_agency.eta_berth==times_mooring.eta_berth==times_portadministration.eta_berth==times_pilot.eta_berth==times_tug.eta_berth
|
||||
# n_unique_times = len(np.unique(estimated_times))
|
||||
# violation_state = n_unique_times!=1
|
||||
|
||||
|
||||
return violation_state
|
||||
|
||||
def check_unique_shipcall_counts(self, query:str, times_agency:pd.DataFrame, rounding="min", maximum_threshold=3, all_times_agency=None)->bool:
|
||||
|
||||
Reference in New Issue
Block a user