Compare commits

...

7 Commits

Author SHA1 Message Date
24e7f0f6f4 WIP 2025-11-13 13:04:23 +01:00
5b61102356 Added a simple AI generated Python web client to run tools and scripts 2025-11-13 13:04:23 +01:00
e4d82835da WIP 2025-11-13 13:04:23 +01:00
b5dd7422f4 Initializing pool connection variable with None.
Release pool connection handle und all circumstances especially also when a query fails
before the call is finished. This should avoid connection starvation.

fix prod. link

production fix

Fixed application path
2025-11-12 15:06:54 +01:00
63a3ce2f6f Improved connection pool init 2025-11-12 13:54:26 +01:00
8cc3444626 Added default port to python run flask settings 2025-11-12 13:53:34 +01:00
14cfb41591 bugfix enum format
fixed required case

fixed more default occurrances

changed validates signature
2025-09-08 14:59:09 +02:00
26 changed files with 1066 additions and 124 deletions

3
.vscode/launch.json vendored
View File

@ -12,7 +12,8 @@
"env": {
"FLASK_APP": "src/server/BreCal",
"FLASK_DEBUG": "1",
"SECRET_KEY" : "zdiTz8P3jXOc7jztIQAoelK4zztyuCpJ" // https://randomkeygen.com/
"SECRET_KEY" : "zdiTz8P3jXOc7jztIQAoelK4zztyuCpJ", // https://randomkeygen.com/
"FLASK_RUN_PORT": "5000"
},
"args": [
"run",

View File

@ -0,0 +1,51 @@
# BreCal API Client
Minimal Python helper for `misc/BreCalApi.yaml`. It focuses on the login, shipcall, and times endpoints needed by CLI tools, but the helper method `BreCalClient.raw_request` makes it straightforward to call any other endpoint defined in the OpenAPI specification.
Dependencies: only the `requests` package in addition to the standard library.
## Endpoint selection
`BreCalClient` reads its default `base_url` from `~/.config/brecal/client.json`. The file lets you define multiple deployments and switch between them without modifying code:
```json
{
"environment": "devel",
"endpoints": {
"local": "http://localhost:5000",
"devel": "https://brecaldevel.bsmd-emswe.eu",
"test": "https://brecaltest.example.net",
"prod": "https://brecal.example.com"
}
}
```
Override the selection at runtime via `BreCalClient(base_url="...")` or the environment variable `BRECAL_BASE_URL`. If no config is present the client falls back to the development server URL.
## Credentials
Store credentials in `~/.config/brecal/credentials.json`:
```json
{
"username": "alfred",
"password": "123456"
}
```
You can override the location when calling `Credentials.load("/path/to/file.json")` or provide credentials from environment variables via `Credentials.from_env()`.
## Example
```python
from brecal_api_client import BreCalClient, Credentials
creds = Credentials.load()
with BreCalClient(credentials=creds) as client:
# list ship calls from the last week
shipcalls = client.get_shipcalls(past_days=7)
# create/update ship calls or times
shipcall_id = client.create_shipcall({...})
times = client.get_times(shipcall_id=shipcall_id)
```

View File

@ -0,0 +1,25 @@
"""Simple Python client for the BreCal REST API."""
from .client import BreCalClient, DEFAULT_BASE_URL
from .config import ClientConfig, get_default_base_url
from .credentials import Credentials
from .exceptions import (
AuthenticationError,
AuthorizationError,
BreCalApiError,
ClientConfigurationError,
)
from .types import LoginResult
__all__ = [
"BreCalClient",
"Credentials",
"ClientConfig",
"get_default_base_url",
"LoginResult",
"DEFAULT_BASE_URL",
"BreCalApiError",
"AuthenticationError",
"AuthorizationError",
"ClientConfigurationError",
]

View File

@ -0,0 +1,248 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, Iterable, Mapping, Optional, Sequence
from urllib.parse import urljoin
import requests
import time
from .config import get_default_base_url
from .credentials import Credentials
from .exceptions import (
AuthenticationError,
AuthorizationError,
BreCalApiError,
ClientConfigurationError,
)
from .types import JsonDict, LoginResult, MutableJsonDict
DEFAULT_BASE_URL = get_default_base_url()
@dataclass
class _RequestContext:
method: str
path: str
expected: Sequence[int]
auth: bool
class BreCalClient:
"""Thin convenience wrapper around the BreCal REST API."""
def __init__(
self,
base_url: Optional[str] = None,
*,
credentials: Optional[Credentials] = None,
timeout: float = 30.0,
session: Optional[requests.Session] = None,
auto_login: bool = True,
) -> None:
resolved_base_url = base_url or get_default_base_url()
if not resolved_base_url:
raise ClientConfigurationError("base_url must be provided.")
self.base_url = resolved_base_url.rstrip("/")
self._timeout = timeout
self._session = session or requests.Session()
self._credentials = credentials
self._login: Optional[LoginResult] = None
if auto_login and credentials is not None:
self.login(credentials)
# -----------------------------------------------------
# lifecycle helpers
# -----------------------------------------------------
def close(self) -> None:
self._session.close()
def __enter__(self) -> "BreCalClient":
return self
def __exit__(self, exc_type, exc, tb) -> None:
self.close()
# -----------------------------------------------------
# authentication
# -----------------------------------------------------
@property
def token(self) -> Optional[str]:
return self._login.token if self._login else None
@property
def login_info(self) -> Optional[LoginResult]:
return self._login
def ensure_authenticated(self) -> None:
if self._login and self._login.expires_at.timestamp() > _epoch_seconds() + 30:
return
if not self._credentials:
raise AuthenticationError(
"Client has no stored credentials. Call login() with credentials first."
)
self.login(self._credentials)
def login(self, credentials: Credentials) -> LoginResult:
payload = {"username": credentials.username, "password": credentials.password}
data = self._request_json(
_RequestContext("POST", "/login", expected=(200,), auth=False),
json=payload,
)
if not isinstance(data, Mapping):
raise AuthenticationError("Login returned unexpected payload.")
result = LoginResult.from_api(data)
if not result.token:
raise AuthenticationError("Login response did not include a token.")
self._login = result
self._credentials = credentials
return result
# -----------------------------------------------------
# shipcalls
# -----------------------------------------------------
def get_shipcalls(self, *, past_days: Optional[int] = None) -> Sequence[JsonDict]:
params: Dict[str, Any] = {}
if past_days is not None:
params["past_days"] = int(past_days)
data = self._request_json(
_RequestContext("GET", "/shipcalls", expected=(200,), auth=True),
params=params or None,
)
return _as_sequence_of_dicts(data)
def create_shipcall(self, shipcall: Mapping[str, Any]) -> int:
payload = _copy_without_keys(shipcall, drop_keys=("id",))
data = self._request_json(
_RequestContext("POST", "/shipcalls", expected=(201,), auth=True),
json=payload,
)
return _extract_id(data)
def update_shipcall(self, shipcall: Mapping[str, Any]) -> int:
if "id" not in shipcall:
raise ValueError("Shipcall update requires an 'id' field.")
data = self._request_json(
_RequestContext("PUT", "/shipcalls", expected=(200,), auth=True),
json=dict(shipcall),
)
return _extract_id(data)
# -----------------------------------------------------
# times
# -----------------------------------------------------
def get_times(self, *, shipcall_id: Optional[int] = None) -> Sequence[JsonDict]:
params = {"shipcall_id": int(shipcall_id)} if shipcall_id is not None else None
data = self._request_json(
_RequestContext("GET", "/times", expected=(200,), auth=True),
params=params,
)
return _as_sequence_of_dicts(data)
def create_times(self, entry: Mapping[str, Any]) -> int:
payload = _copy_without_keys(entry, drop_keys=("id",))
data = self._request_json(
_RequestContext("POST", "/times", expected=(201,), auth=True),
json=payload,
)
return _extract_id(data)
def update_times(self, entry: Mapping[str, Any]) -> int:
if "id" not in entry:
raise ValueError("Times update requires an 'id' field.")
data = self._request_json(
_RequestContext("PUT", "/times", expected=(200,), auth=True),
json=dict(entry),
)
return _extract_id(data)
def delete_times(self, times_id: int) -> int:
data = self._request_json(
_RequestContext("DELETE", "/times", expected=(200,), auth=True),
params={"id": int(times_id)},
)
return _extract_id(data)
# -----------------------------------------------------
# generic helpers
# -----------------------------------------------------
def raw_request(
self,
method: str,
path: str,
*,
expected: Sequence[int] = (200,),
auth: bool = True,
**kwargs: Any,
) -> Any:
"""Expose the low-level request helper for endpoints not wrapped yet."""
ctx = _RequestContext(method.upper(), path, expected, auth)
return self._request_json(ctx, **kwargs)
def _request_json(self, ctx: _RequestContext, **kwargs: Any) -> Any:
url = urljoin(f"{self.base_url}/", ctx.path.lstrip("/"))
headers: Dict[str, str] = kwargs.pop("headers", {})
headers.setdefault("Accept", "application/json")
if "json" in kwargs:
headers.setdefault("Content-Type", "application/json")
if ctx.auth:
self.ensure_authenticated()
headers.setdefault("Authorization", f"Bearer {self.token}")
response = self._session.request(
ctx.method,
url,
timeout=self._timeout,
headers=headers,
**kwargs,
)
if response.status_code == 401 or response.status_code == 403:
raise AuthorizationError(
f"{ctx.method} {ctx.path} returned {response.status_code}",
status_code=response.status_code,
payload=_safe_json(response),
)
if response.status_code not in ctx.expected:
raise BreCalApiError(
f"{ctx.method} {ctx.path} returned {response.status_code}",
status_code=response.status_code,
payload=_safe_json(response),
)
if response.content:
return _safe_json(response)
return None
def _copy_without_keys(
data: Mapping[str, Any], *, drop_keys: Iterable[str]
) -> MutableJsonDict:
payload: MutableJsonDict = dict(data)
for key in drop_keys:
payload.pop(key, None)
return payload
def _extract_id(payload: Any) -> int:
if isinstance(payload, Mapping) and "id" in payload:
return int(payload["id"])
raise BreCalApiError("API response did not include an 'id' field.", payload=payload)
def _as_sequence_of_dicts(data: Any) -> Sequence[JsonDict]:
if isinstance(data, list):
return data
raise BreCalApiError("Expected list response from API.", payload=data)
def _safe_json(response: requests.Response) -> Any:
content_type = response.headers.get("Content-Type", "")
if "application/json" in content_type:
try:
return response.json()
except ValueError:
pass
return response.text
def _epoch_seconds() -> int:
return int(time.time())

View File

@ -0,0 +1,74 @@
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Mapping, Optional, Union
from .exceptions import ClientConfigurationError
ConfigPath = Union[str, Path]
DEFAULT_BASE_URL_FALLBACK = "https://brecaldevel.bsmd-emswe.eu"
CONFIG_FILENAME = "client.json"
def _default_config_path() -> Path:
xdg = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config"))
return (xdg / "brecal" / CONFIG_FILENAME).expanduser()
@dataclass(frozen=True)
class ClientConfig:
base_url: str
environment: Optional[str] = None
@classmethod
def from_mapping(cls, data: Mapping[str, Any]) -> "ClientConfig":
environment = data.get("environment")
base_url = data.get("base_url")
endpoints = data.get("endpoints")
if isinstance(endpoints, Mapping):
if environment and environment in endpoints:
base_url = endpoints[environment]
elif not base_url and endpoints:
# Pick the first entry as a last resort
_, base_url = next(iter(endpoints.items()))
if not base_url:
raise ClientConfigurationError(
"Client configuration requires either 'base_url' or an "
"'endpoints' mapping."
)
return cls(
base_url=str(base_url).rstrip("/"),
environment=str(environment) if environment else None,
)
@classmethod
def load(cls, path: Optional[ConfigPath] = None) -> "ClientConfig":
file_path = Path(path) if path else _default_config_path()
data = json.loads(file_path.read_text(encoding="utf-8"))
return cls.from_mapping(data)
def get_default_base_url(path: Optional[ConfigPath] = None) -> str:
"""Resolve the default base URL using env vars or ~/.config/brecal/client.json."""
env_override = os.getenv("BRECAL_BASE_URL")
if env_override:
return env_override.rstrip("/")
try:
config = ClientConfig.load(path=path)
return config.base_url
except FileNotFoundError:
return DEFAULT_BASE_URL_FALLBACK
except ClientConfigurationError:
raise
except Exception as exc:
raise ClientConfigurationError(
f"Failed to load BreCal client configuration: {exc}"
) from exc

View File

@ -0,0 +1,68 @@
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Mapping, Optional, Union
ConfigPath = Union[str, Path]
def _default_credentials_path() -> Path:
"""Return the default path for the credential file."""
xdg = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config"))
return (xdg / "brecal" / "credentials.json").expanduser()
@dataclass(frozen=True)
class Credentials:
"""Holds username/password pairs for the BreCal API."""
username: str
password: str
@classmethod
def from_mapping(cls, data: Mapping[str, Any]) -> "Credentials":
"""Create credentials from a mapping (dict, TOML config, etc.)."""
username = _coalesce_key(
data, ("username", "user_name", "user"), required="username"
)
password = _coalesce_key(
data, ("password", "pass", "secret"), required="password"
)
if not isinstance(username, str) or not username.strip():
raise ValueError("BreCal credentials require a non-empty username.")
if not isinstance(password, str) or not password:
raise ValueError("BreCal credentials require a non-empty password.")
return cls(username=username.strip(), password=password)
@classmethod
def load(cls, path: Optional[ConfigPath] = None) -> "Credentials":
"""Load credentials from a JSON file."""
file_path = Path(path) if path else _default_credentials_path()
text = file_path.read_text(encoding="utf-8")
data = json.loads(text)
return cls.from_mapping(data)
@classmethod
def from_env(
cls, username_var: str = "BRECAL_USERNAME", password_var: str = "BRECAL_PASSWORD"
) -> "Credentials":
"""Load credentials from environment variables."""
username = os.getenv(username_var)
password = os.getenv(password_var)
if not username or not password:
raise EnvironmentError(
f"Missing credentials in env vars {username_var}/{password_var}"
)
return cls(username=username, password=password)
def _coalesce_key(
data: Mapping[str, Any], keys: tuple[str, ...], *, required: str
) -> Any:
for key in keys:
if key in data:
return data[key]
raise KeyError(f"Missing '{required}' in credentials mapping.")

View File

@ -0,0 +1,30 @@
from __future__ import annotations
from typing import Any, Optional
class BreCalApiError(RuntimeError):
"""Base exception for API client failures."""
def __init__(
self,
message: str,
*,
status_code: Optional[int] = None,
payload: Optional[Any] = None,
) -> None:
super().__init__(message)
self.status_code = status_code
self.payload = payload
class AuthenticationError(BreCalApiError):
"""Raised when login fails."""
class AuthorizationError(BreCalApiError):
"""Raised for 401/403 responses after authentication."""
class ClientConfigurationError(ValueError):
"""Raised for invalid client configuration or missing dependencies."""

View File

@ -0,0 +1,58 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Dict, Mapping, MutableMapping, Optional
JsonDict = Dict[str, Any]
MutableJsonDict = MutableMapping[str, Any]
@dataclass
class LoginResult:
"""Represents the payload returned by /login."""
id: int
participant_id: Optional[int]
first_name: str
last_name: str
user_name: str
user_email: Optional[str]
user_phone: Optional[str]
token: str
exp: int
@classmethod
def from_api(cls, data: Mapping[str, Any]) -> "LoginResult":
return cls(
id=_coerce_int(data.get("id")),
participant_id=_coerce_optional_int(data.get("participant_id")),
first_name=str(data.get("first_name") or ""),
last_name=str(data.get("last_name") or ""),
user_name=str(data.get("user_name") or ""),
user_email=_coerce_optional_str(data.get("user_email")),
user_phone=_coerce_optional_str(data.get("user_phone")),
token=str(data.get("token") or ""),
exp=_coerce_int(data.get("exp")),
)
@property
def expires_at(self) -> datetime:
return datetime.fromtimestamp(self.exp, tz=timezone.utc)
def _coerce_int(value: Any) -> int:
if value is None:
raise ValueError("Expected integer value, got None")
return int(value)
def _coerce_optional_int(value: Any) -> Optional[int]:
return None if value is None else int(value)
def _coerce_optional_str(value: Any) -> Optional[str]:
if value is None:
return None
text = str(value)
return text if text else None

View File

@ -69,7 +69,7 @@ def create_app(test_config=None, instance_path=None):
app.register_blueprint(history.bp)
app.register_blueprint(ports.bp)
logging.basicConfig(filename='brecaldevel.log', level=logging.DEBUG, format='%(asctime)s | %(name)s | %(levelname)s | %(message)s')
logging.basicConfig(filename='brecal.log', level=logging.WARNING, format='%(asctime)s | %(name)s | %(levelname)s | %(message)s')
local_db.initPool(os.path.dirname(app.instance_path))
logging.info('App started')

View File

@ -10,6 +10,7 @@ def GetBerths(options):
No parameters, gets all entries
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)

View File

@ -16,6 +16,8 @@ def GetHistory(options):
options["shipcall_id"]: **Id of shipcall**.
"""
pooledConnection = None
data = []
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -26,10 +28,6 @@ def GetHistory(options):
data = commands.query("SELECT id, participant_id, shipcall_id, timestamp, eta, type, operation FROM history WHERE shipcall_id = ?shipcallid?",
model=History.from_query_row,
param={"shipcallid" : options["shipcall_id"]})
pooledConnection.close()
except Exception as ex:
pdb.pm()
logging.error(ex)
@ -37,6 +35,9 @@ def GetHistory(options):
result = {}
result["error_field"] = "call failed"
return json.dumps("call failed"), 500
finally:
if pooledConnection is not None:
pooledConnection.close()
return json.dumps(data, default=model.obj_dict), 200, {'Content-Type': 'application/json; charset=utf-8'}

View File

@ -6,14 +6,15 @@ import bcrypt
from ..schemas import model
from .. import local_db
from ..services import jwt_handler
from BreCal.database.sql_queries import SQLQuery
def GetUser(options):
pooledConnection = None
try:
if "password" in options and "username" in options:
hash = bcrypt.hashpw(options["password"].encode('utf-8'), bcrypt.gensalt( 12 )).decode('utf8')
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
# query = SQLQuery.get_user()
@ -63,7 +64,3 @@ def GetUser(options):
finally:
if pooledConnection is not None:
pooledConnection.close()
# $2b$12$uWLE0r32IrtCV30WkMbVwOdltgeibymZyYAf4ZnQb2Bip8hrkGGwG
# $2b$12$.vEapj9xU8z0RK0IpIGeYuRIl0ktdMt4XdJQBhVn.3K2hmvm7qD3y
# $2b$12$yL3PiseU70ciwEuMVM4OtuMwR6tNuIT9vvBiBG/uyMrPxa16E2Zqu

View File

@ -11,13 +11,12 @@ def GetNotifications(token):
No parameters, gets all entries
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
data = commands.query("SELECT id, shipcall_id, participant_id, level, type, message, created, modified FROM notification " +
"WHERE level = 2", model=model.Notification.from_query_row)
pooledConnection.close()
except Exception as ex:
logging.error(ex)
@ -25,6 +24,9 @@ def GetNotifications(token):
result = {}
result["error_field"] = "call failed"
return json.dumps(result), 500, {'Content-Type': 'application/json; charset=utf-8'}
finally:
if pooledConnection is not None:
pooledConnection.close()
return json.dumps(data, default=model.obj_dict), 200, {'Content-Type': 'application/json; charset=utf-8'}

View File

@ -12,6 +12,7 @@ def GetParticipant(options):
options["user_id"]: **Id of user**. *Example: 2*. User id returned by login call.
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)

View File

@ -11,6 +11,7 @@ def GetPorts(token):
No parameters, gets all entries
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)

View File

@ -18,8 +18,8 @@ def GetShipcalls(options):
No parameters, gets all entries
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
# query = SQLQuery.get_shipcalls(options)
@ -70,8 +70,8 @@ def PostShipcalls(schemaModel):
"""
# This creates a *new* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -192,8 +192,8 @@ def PutShipcalls(schemaModel):
"""
# This updates an *existing* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -205,7 +205,6 @@ def PutShipcalls(schemaModel):
theshipcall = commands.query_single_or_default("SELECT * FROM shipcall where id = ?id?", sentinel, param={"id" : schemaModel["id"]})
if theshipcall is sentinel:
pooledConnection.close()
return json.dumps("no such record"), 404, {'Content-Type': 'application/json; charset=utf-8'}
was_canceled = theshipcall["canceled"]

View File

@ -11,8 +11,8 @@ def GetShips(token):
No parameters, gets all entries
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
# query = SQLQuery.get_ships()
@ -44,8 +44,8 @@ def PostShip(schemaModel):
# TODO: Validate the incoming data
# This creates a *new* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -83,8 +83,6 @@ def PostShip(schemaModel):
# new_id = commands.execute_scalar(nquery)
new_id = commands.execute_scalar("select last_insert_id()")
pooledConnection.close()
return json.dumps({"id" : new_id}), 201, {'Content-Type': 'application/json; charset=utf-8'}
except Exception as ex:
@ -93,6 +91,9 @@ def PostShip(schemaModel):
result = {}
result["error_field"] = "call failed"
return json.dumps(result), 500, {'Content-Type': 'application/json; charset=utf-8'}
finally:
if pooledConnection is not None:
pooledConnection.close()
def PutShip(schemaModel):
@ -101,8 +102,8 @@ def PutShip(schemaModel):
"""
# This updates an *existing* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -125,8 +126,6 @@ def PutShip(schemaModel):
affected_rows = commands.execute(query, param=schemaModel)
pooledConnection.close()
return json.dumps({"id" : schemaModel["id"]}), 200, {'Content-Type': 'application/json; charset=utf-8'}
except Exception as ex:
@ -135,6 +134,9 @@ def PutShip(schemaModel):
result = {}
result["error_field"] = "call failed"
return json.dumps(result), 500, {'Content-Type': 'application/json; charset=utf-8'}
finally:
if pooledConnection is not None:
pooledConnection.close()
def DeleteShip(options):
@ -143,16 +145,14 @@ def DeleteShip(options):
options["id"]
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
# query = SQLQuery.get_ship_delete_by_id()
# affected_rows = commands.execute(query, param={"id" : options["id"]})
affected_rows = commands.execute("UPDATE ship SET deleted = 1 WHERE id = ?id?", param={"id" : options["id"]})
pooledConnection.close()
if affected_rows == 1:
return json.dumps({"id" : options["id"]}), 200, {'Content-Type': 'application/json; charset=utf-8'}
@ -166,3 +166,6 @@ def DeleteShip(options):
result = {}
result["error_field"] = "call failed"
return json.dumps(result), 500, {'Content-Type': 'application/json; charset=utf-8'}
finally:
if pooledConnection is not None:
pooledConnection.close()

View File

@ -18,8 +18,8 @@ def GetTimes(options):
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
# query = SQLQuery.get_times()
@ -28,7 +28,6 @@ def GetTimes(options):
"zone_entry, zone_entry_fixed, operations_start, operations_end, remarks, shipcall_id, participant_id, " +
"berth_id, berth_info, pier_side, participant_type, created, modified, ata, atd, eta_interval_end, etd_interval_end FROM times " +
"WHERE times.shipcall_id = ?scid?", model=model.Times, param={"scid" : options["shipcall_id"]})
pooledConnection.close()
except Exception as ex:
logging.error(traceback.format_exc())
@ -38,6 +37,10 @@ def GetTimes(options):
result["error_field"] = "call failed"
return json.dumps(result), 500, {'Content-Type': 'application/json; charset=utf-8'}
finally:
if pooledConnection is not None:
pooledConnection.close()
return json.dumps(data, default=model.obj_dict), 200, {'Content-Type': 'application/json; charset=utf-8'}
@ -51,8 +54,8 @@ def PostTimes(schemaModel):
# TODO: Validate the upload data
# This creates a *new* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -119,8 +122,8 @@ def PutTimes(schemaModel):
"""
# This updates an *existing* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -177,8 +180,8 @@ def DeleteTimes(options):
options["id"]
"""
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
shipcall_id = commands.execute_scalar("SELECT shipcall_id FROM times WHERE id = ?id?", param={"id" : options["id"]})

View File

@ -14,8 +14,8 @@ def PutUser(schemaModel):
"""
# This updates an *existing* entry
pooledConnection = None
try:
pooledConnection = local_db.getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -26,7 +26,6 @@ def PutUser(schemaModel):
# theuser = commands.query_single_or_default(query, sentinel, param={"id" : schemaModel["id"]}, model=model.User)
theuser = commands.query_single_or_default("SELECT * FROM user where id = ?id?", sentinel, param={"id" : schemaModel["id"]}, model=model.User)
if theuser is sentinel:
pooledConnection.close()
# #TODO: result = {"message":"no such record"} -> json.dumps
return json.dumps("no such record"), 404, {'Content-Type': 'application/json; charset=utf-8'}

View File

@ -1,58 +1,83 @@
import mysql.connector
from mysql.connector import pooling
import pydapper
import logging
import json
import os
import sys
from BreCal.schemas import defs
config_path = None
_connection_pool = None
def initPool(instancePath, connection_filename="connection_data_devel.json"):
def _load_json(path):
with open(path, encoding="utf-8") as fh:
return json.load(fh)
def _build_pool_config(connection_data, pool_name, pool_size):
pool_config = dict(connection_data)
pool_config.setdefault("pool_name", pool_name)
pool_config.setdefault("pool_size", pool_size)
return pool_config
def initPool(instancePath, connection_filename="connection_data_prod.json",
pool_name="brecal_pool", pool_size=10):
"""
Initialize the MySQL connection pool and load email credentials.
"""
global config_path, _connection_pool
try:
global config_path
if(config_path == None):
config_path = os.path.join(instancePath,f'../../../secure/{connection_filename}') #connection_data_devel.json');
if config_path is None:
config_path = os.path.join(instancePath, f'../../../secure/{connection_filename}')
# config_path = "E:/temp/connection_data.json"
print (config_path)
# config_path = 'C:\\temp\\connection_data_test.json'
print(config_path)
if not os.path.exists(config_path):
print ('cannot find ' + os.path.abspath(config_path))
print('cannot find ' + os.path.abspath(config_path))
print("instance path", instancePath)
sys.exit(1)
f = open(config_path);
connection_data = json.load(f)
f.close()
connection_data = _load_json(config_path)
if _connection_pool is None:
pool_config = _build_pool_config(connection_data, pool_name, pool_size)
_connection_pool = pooling.MySQLConnectionPool(**pool_config)
conn_from_pool = mysql.connector.connect(**connection_data)
conn_from_pool = _connection_pool.get_connection()
try:
commands = pydapper.using(conn_from_pool)
commands.query("SELECT id from `user` LIMIT 1")
print("DB connection successful")
finally:
conn_from_pool.close()
commands = pydapper.using(conn_from_pool)
data = commands.query("SELECT id from `user`")
print("DB connection successful")
conn_from_pool.close()
credentials_file = "email_credentials_test.json"
credentials_path = os.path.join(instancePath, f'../../../secure/{credentials_file}')
credentials_file = "email_credentials_devel.json"
credentials_path = os.path.join(instancePath,f'../../../secure/{credentials_file}')
# credentials_path = "E:/temp/email_credentials_devel.json"
# credentials_path = 'C:\\temp\\email_credentials_test.json'
if not os.path.exists(credentials_path):
print ('cannot find ' + os.path.abspath(credentials_path))
print('cannot find ' + os.path.abspath(credentials_path))
sys.exit(1)
f = open(credentials_path);
defs.email_credentials = json.load(f)
f.close()
defs.email_credentials = _load_json(credentials_path)
except mysql.connector.PoolError as e:
logging.error(f"Failed to create connection pool: {e}")
print(e)
except Exception as e:
logging.error("Failed to initialize DB pool: %s", e)
print(e)
def getPoolConnection():
global config_path
f = open(config_path);
connection_data = json.load(f)
return mysql.connector.connect(**connection_data)
if _connection_pool is None:
raise RuntimeError("Connection pool not initialized. Call initPool first.")
try:
return _connection_pool.get_connection()
except mysql.connector.PoolError as exc:
logging.error("Connection pool exhausted: %s", exc)
raise

View File

@ -201,7 +201,7 @@ class Participant(Schema):
ports: List[int] = field(default_factory=list)
@validates("type")
def validate_type(self, value):
def validate_type(self, value, **kwargs):
# e.g., when an IntFlag has the values 1,2,4; the maximum valid value is 7
max_int = sum([int(val) for val in list(ParticipantType._value2member_map_.values())])
min_int = 0
@ -212,7 +212,7 @@ class Participant(Schema):
@validates("flags")
def validate_flags(self, value):
def validate_flags(self, value, **kwargs):
# e.g., when an IntFlag has the values 1,2,4; the maximum valid value is 7
max_int = sum([int(val) for val in list(ParticipantFlag._value2member_map_.values())])
min_int = 0
@ -237,7 +237,7 @@ class ShipcallSchema(Schema):
id = fields.Integer(required=True)
ship_id = fields.Integer(required=True)
port_id = fields.Integer(required=True)
type = fields.Enum(ShipcallType, default=ShipcallType.undefined)
type = fields.Enum(ShipcallType, load_default=ShipcallType.undefined, dump_default=ShipcallType.undefined)
eta = fields.DateTime(required=False, allow_none=True)
voyage = fields.String(allow_none=True, required=False, validate=[validate.Length(max=16)])
etd = fields.DateTime(required=False, allow_none=True)
@ -258,7 +258,7 @@ class ShipcallSchema(Schema):
anchored = fields.Bool(required=False, allow_none=True)
moored_lock = fields.Bool(required=False, allow_none=True)
canceled = fields.Bool(required=False, allow_none=True)
evaluation = fields.Enum(EvaluationType, required=False, allow_none=True, default=EvaluationType.undefined)
evaluation = fields.Enum(EvaluationType, required=False, allow_none=True, load_default=EvaluationType.undefined, dump_default=ShipcallType.undefined)
evaluation_message = fields.Str(allow_none=True, required=False)
evaluation_time = fields.DateTime(required=False, allow_none=True)
evaluation_notifications_sent = fields.Bool(required=False, allow_none=True)
@ -281,7 +281,7 @@ class ShipcallSchema(Schema):
return data
@validates("type")
def validate_type(self, value):
def validate_type(self, value, **kwargs):
valid_shipcall_type = int(value) in [item.value for item in ShipcallType]
if not valid_shipcall_type:
@ -418,7 +418,7 @@ class TimesSchema(Schema):
berth_info = fields.String(required=False, allow_none=True, validate=[validate.Length(max=512)])
pier_side = fields.Bool(required=False, allow_none = True)
shipcall_id = fields.Integer(required=True)
participant_type = fields.Integer(Required = False, allow_none=True)# TODO: could become Enum. # participant_type = fields.Enum(ParticipantType, required=False, allow_none=True, default=ParticipantType.undefined) #fields.Integer(required=False, allow_none=True)
participant_type = fields.Integer(required = False, allow_none=True) # TODO: could become Enum
ata = fields.DateTime(required=False, allow_none=True)
atd = fields.DateTime(required=False, allow_none=True)
eta_interval_end = fields.DateTime(required=False, allow_none=True)
@ -427,7 +427,7 @@ class TimesSchema(Schema):
modified = fields.DateTime(required=False, allow_none=True)
@validates("participant_type")
def validate_participant_type(self, value):
def validate_participant_type(self, value, **kwargs):
# #TODO: it may also make sense to block multi-assignments, whereas a value could be BSMD+AGENCY
# while the validation fails when one of those multi-assignments is BSMD, it passes in cases,
# such as AGENCY+PILOT
@ -440,56 +440,56 @@ class TimesSchema(Schema):
raise ValidationError({"participant_type":f"the participant_type must not be .BSMD"})
@validates("eta_berth")
def validate_eta_berth(self, value):
def validate_eta_berth(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("etd_berth")
def validate_etd_berth(self, value):
def validate_etd_berth(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("lock_time")
def validate_lock_time(self, value):
def validate_lock_time(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("zone_entry")
def validate_zone_entry(self, value):
def validate_zone_entry(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("operations_start")
def validate_operations_start(self, value):
def validate_operations_start(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("operations_end")
def validate_operations_end(self, value):
def validate_operations_end(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("eta_interval_end")
def validate_eta_interval_end(self, value):
def validate_eta_interval_end(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
return
@validates("etd_interval_end")
def validate_etd_interval_end(self, value):
def validate_etd_interval_end(self, value, **kwargs):
# violation when time is not in the future, but also does not exceed a threshold for the 'reasonable' future
# when 'value' is 'None', a ValidationError is not issued.
valid_time = validate_time_is_in_not_too_distant_future(raise_validation_error=True, value=value, months=12)
@ -516,14 +516,14 @@ class UserSchema(Schema):
notify_on = fields.List(fields.Enum(NotificationType), required=False, allow_none=True)
@validates("user_phone")
def validate_user_phone(self, value):
def validate_user_phone(self, value, **kwargs):
if value is not None:
valid_characters = list(map(str,range(0,10)))+["+", " "]
if not all([v in valid_characters for v in value]):
raise ValidationError({"user_phone":f"one of the phone number values is not valid."})
@validates("user_email")
def validate_user_email(self, value):
def validate_user_email(self, value, **kwargs):
if value and not re.match(r"[^@]+@[^@]+\.[^@]+", value):
raise ValidationError({"user_email":f"invalid email address"})
@ -612,15 +612,15 @@ class ShipSchema(Schema):
participant_id = fields.Int(allow_none=True, required=False)
length = fields.Float(allow_none=True, required=False, validate=[validate.Range(min=0, max=1000, min_inclusive=False, max_inclusive=False)])
width = fields.Float(allow_none=True, required=False, validate=[validate.Range(min=0, max=100, min_inclusive=False, max_inclusive=False)])
is_tug = fields.Bool(allow_none=True, required=False, default=False)
is_tug = fields.Bool(allow_none=True, required=False, load_default=False, dump_default=False)
bollard_pull = fields.Int(allow_none=True, required=False)
eni = fields.Int(allow_none=True, required=False)
created = fields.DateTime(allow_none=True, required=False)
modified = fields.DateTime(allow_none=True, required=False)
deleted = fields.Bool(allow_none=True, required=False, default=False)
deleted = fields.Bool(allow_none=True, required=False, load_default=False, dump_default=False)
@validates("name")
def validate_name(self, value):
def validate_name(self, value, **kwargs):
character_length = len(str(value))
if character_length<1:
raise ValidationError({"name":f"'name' argument should have at least one character"})
@ -632,7 +632,7 @@ class ShipSchema(Schema):
return
@validates("imo")
def validate_imo(self, value):
def validate_imo(self, value, **kwargs):
value = str(value).zfill(7) # 1 becomes '0000001' (7 characters). 12345678 becomes '12345678' (8 characters)
imo_length = len(value)
if imo_length != 7:
@ -640,7 +640,7 @@ class ShipSchema(Schema):
return
@validates("callsign")
def validate_callsign(self, value):
def validate_callsign(self, value, **kwargs):
if value is not None:
callsign_length = len(str(value))
if callsign_length>8:

View File

@ -31,6 +31,7 @@ def UpdateShipcalls(options:dict = {'past_days':2}):
options:
key: 'past_days'. Is used to execute a filtered query of all available shipcalls. Defaults to 2 (days)
"""
pooledConnection = None
try:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -49,10 +50,11 @@ def UpdateShipcalls(options:dict = {'past_days':2}):
# apply 'Traffic Light' evaluation to obtain 'GREEN', 'YELLOW' or 'RED' evaluation state. The function internally updates the mysql database
evaluate_shipcall_state(mysql_connector_instance=pooledConnection, shipcall_id=shipcall_id) # new_id (last insert id) refers to the shipcall id
pooledConnection.close()
except Exception as ex:
logging.error(ex)
finally:
if pooledConnection is not None:
pooledConnection.close()
return
def UpdateNotifications(cooldown_in_mins:int=10):
@ -61,6 +63,7 @@ def UpdateNotifications(cooldown_in_mins:int=10):
notification is updated to state 1 and a notification is received by the user
"""
pooledConnection = None
try:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -70,32 +73,39 @@ def UpdateNotifications(cooldown_in_mins:int=10):
for notification in data:
commands.execute("UPDATE notification SET level = 1 WHERE id = ?id?", param={"id":notification.id})
pooledConnection.close()
except Exception as ex:
logging.error(ex)
finally:
if pooledConnection is not None:
pooledConnection.close()
def ClearNotifications(max_age_in_days:int=3):
"""
This function clears all notifications in state ("level") 2 that are older than x days
"""
pooledConnection = None
try:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)
query = f"DELETE FROM notification WHERE level = 2 and created < TIMESTAMP(NOW() - INTERVAL {max_age_in_days} DAY)"
result = commands.execute(query)
pooledConnection.close()
if(result > 0):
logging.info(f"Deleted {result} notifications")
except Exception as ex:
logging.error(ex)
finally:
if pooledConnection is not None:
pooledConnection.close()
def SendEmails(email_dict):
"""
This function sends emails to all users in the emaildict
"""
pooledConnection = None
conn = None
try:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)
@ -177,10 +187,13 @@ def SendEmails(email_dict):
finally:
if conn is not None:
conn.quit()
if pooledConnection is not None:
pooledConnection.close()
def SendNotifications():
# perhaps this will be moved somewhere else later
pooledConnection = None
try:
# find all notifications in level 1
pooledConnection = getPoolConnection()
@ -271,6 +284,7 @@ def add_function_to_schedule_send_notifications(interval_in_minutes:int=1):
return
def eval_next_24_hrs():
pooledConnection = None
try:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)

View File

@ -90,40 +90,43 @@ class ValidationRules(ValidationRuleFunctions):
if evaluation_states_old is not None and evaluation_states_new is not None:
if len(evaluation_states_old) == 1 and len(evaluation_states_new) == 1:
if evaluation_states_old[0] != evaluation_states_new[0]:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)
notification_type = 3 # RED (mapped to time_conflict)
if evaluation_states_new[0] == 2:
match evaluation_states_old[0]:
case 0:
send_notification = True
case 1:
send_notification = True
notification_type = 6 # YELLOW (mapped to missing_data)
if evaluation_states_new[0] == 3:
match evaluation_states_old[0]:
case 0:
send_notification = True
case 1:
send_notification = True
case 2:
send_notification = True
pooledConnection = None
try:
pooledConnection = getPoolConnection()
commands = pydapper.using(pooledConnection)
notification_type = 3 # RED (mapped to time_conflict)
if evaluation_states_new[0] == 2:
match evaluation_states_old[0]:
case 0:
send_notification = True
case 1:
send_notification = True
notification_type = 6 # YELLOW (mapped to missing_data)
if evaluation_states_new[0] == 3:
match evaluation_states_old[0]:
case 0:
send_notification = True
case 1:
send_notification = True
case 2:
send_notification = True
if send_notification:
query = f"INSERT INTO notification (shipcall_id, type, level, message) VALUES (?shipcall_id?, {notification_type}, 0, ?message?)"
commands.execute(query, param={"shipcall_id" : int(shipcall_df.index[0]), "message" : violations[0]})
if send_notification:
query = f"INSERT INTO notification (shipcall_id, type, level, message) VALUES (?shipcall_id?, {notification_type}, 0, ?message?)"
commands.execute(query, param={"shipcall_id" : int(shipcall_df.index[0]), "message" : violations[0]})
if evaluation_states_new[0] == 1 and evaluation_states_old[0] != 0: # this resolves the conflict
query = f"SELECT * from notification where shipcall_id = ?shipcall_id? and type = {notification_type} and level = 0"
existing_notification = commands.query(query, param={"shipcall_id" : int(shipcall_df.index[0])})
if len(existing_notification) > 0:
query = "DELETE from notification where id = ?id?"
commands.execute(query, param={"id" : existing_notification[0]["id"]})
else:
query = "INSERT INTO notification (shipcall_id, type, level) VALUES (?shipcall_id?, 4, 0)"
commands.execute(query, param={"shipcall_id" : int(shipcall_df.index[0])})
pooledConnection.close()
if evaluation_states_new[0] == 1 and evaluation_states_old[0] != 0: # this resolves the conflict
query = f"SELECT * from notification where shipcall_id = ?shipcall_id? and type = {notification_type} and level = 0"
existing_notification = commands.query(query, param={"shipcall_id" : int(shipcall_df.index[0])})
if len(existing_notification) > 0:
query = "DELETE from notification where id = ?id?"
commands.execute(query, param={"id" : existing_notification[0]["id"]})
else:
query = "INSERT INTO notification (shipcall_id, type, level) VALUES (?shipcall_id?, 4, 0)"
commands.execute(query, param={"shipcall_id" : int(shipcall_df.index[0])})
finally:
if pooledConnection is not None:
pooledConnection.close()
# build the list of 'evaluation_notifications_sent'. The value is 'False', when a notification should be created

View File

@ -2,7 +2,7 @@ import os
import sys
import logging
sys.path.insert(0, '/var/www/brecal_devel/src/server')
sys.path.insert(0, '/var/www/brecal/src/server')
sys.path.insert(0, '/var/www/venv/lib/python3.12/site-packages/')
import schedule

View File

@ -0,0 +1,166 @@
# parser.py
# Utilities to extract text from the PDF and parse rows into records.
# Assumes two tables: first is Harbor A, second is Harbor B.
from __future__ import annotations
from csv import reader
import re
from typing import List, Dict, Any, Tuple
# Optional: If you want OCR fallback later, wire in pdf2image + pytesseract here.
def extract_text_lines(pdf_path: str) -> List[str]:
"""Extract text lines from a PDF using pdfplumber (preferred) with a
light fallback to PyPDF2. Returns a list of raw lines."""
text = ""
try:
import pdfplumber
with pdfplumber.open(pdf_path) as pdf:
pages_text = []
for p in pdf.pages:
t = p.extract_text() or ""
pages_text.append(t)
text = "\n".join(pages_text)
except Exception:
try:
from PyPDF2 import PdfReader
reader = PdfReader(pdf_path)
pages_text = []
for page in reader.pages:
pages_text.append(page.extract_text() or "")
text = "\n".join(pages_text)
except Exception:
text = ""
if not text.strip():
raise RuntimeError("No text extracted. If the PDF is scanned, add OCR fallback (pytesseract).")
# Normalize to individual lines
lines = [ln.strip() for ln in text.splitlines()]
return lines
HEADER_PATTERNS = [
re.compile(r"\bSchiff\b.*\bETA\b.*\bETS\b", re.IGNORECASE),
re.compile(r"Nächster Hafen|Liegeplatz|Ladung|Lotse", re.IGNORECASE),
]
DATE_TOKEN = re.compile(r"\b\d{1,2}\.\d{1,2}\.(?:\d{4})?")
TIME_FRAGMENT = r"(?:\s*/\s*\d{1,2}\.\d{2}\s*Uhr\s*\*?)?"
DT_TOKEN_WITH_TIME = re.compile(r"\d{1,2}\.\d{1,2}\.(?:\d{4})?" + TIME_FRAGMENT)
def cleanse_lines(lines: List[str]) -> List[str]:
"""Remove known header lines and keep data/blank lines."""
out: List[str] = []
for ln in lines:
if not ln:
out.append("")
continue
if any(p.search(ln) for p in HEADER_PATTERNS):
continue
out.append(ln)
return out
def split_into_tables(lines: List[str]) -> List[List[str]]:
"""Find candidate data lines (those containing a date token) and split them
into up to two blocks separated by at least one blank line. Returns a list
of blocks (1 or 2)."""
candidate = [ln for ln in lines if (ln == "" or DATE_TOKEN.search(ln))]
blocks: List[List[str]] = []
current: List[str] = []
seen_data = False
for ln in candidate:
if ln == "":
if seen_data and current:
blocks.append(current)
current = []
seen_data = False
continue
current.append(ln)
seen_data = True
if current:
blocks.append(current)
if len(blocks) > 2:
# Merge any extra blocks into the second
blocks = [blocks[0], sum(blocks[1:], [])]
return blocks
def parse_line_to_record(ln: str) -> Dict[str, Any]:
"""Parse a single table line into a minimal record.
Output fields:
- ship: text before the first date token
- eta_raw: 1st date(+optional time) token as raw string
- ets_raw: 2nd date(+optional time) token as raw string (if present)
- notes: remainder of the line after the last extracted date token
- raw_line: the full original line
"""
# Ship name up to the first date token
first = DATE_TOKEN.search(ln)
ship = ln[: first.start()].strip() if first else ln.strip()
# Extract up to two date(+time) tokens
dt_tokens = DT_TOKEN_WITH_TIME.findall(ln)
eta_raw = dt_tokens[0].strip() if len(dt_tokens) >= 1 else None
ets_raw = dt_tokens[1].strip() if len(dt_tokens) >= 2 else None
# Notes: everything after the last date token we captured
notes = ""
if dt_tokens:
last_match = None
it = DT_TOKEN_WITH_TIME.finditer(ln)
for last_match in it:
pass
if last_match:
notes = ln[last_match.end() :].strip()
return {
"ship": ship,
"eta_raw": eta_raw,
"ets_raw": ets_raw,
"notes": notes,
"raw_line": ln,
}
def parse_pdf_to_records(pdf_path: str) -> List[Dict[str, Any]]:
"""High-level: extract lines, cleanse headers, split into 12 tables,
tag as harbor A/B by order, parse rows records."""
lines = extract_text_lines(pdf_path)
clean = cleanse_lines(lines)
blocks = split_into_tables(clean)
records: List[Dict[str, Any]] = []
for i, block in enumerate(blocks):
harbor = "A" if i == 0 else "B"
for ln in block:
if not ln.strip():
continue
rec = parse_line_to_record(ln)
rec["harbor"] = harbor
records.append(rec)
return records

View File

@ -0,0 +1,172 @@
# pdf_to_records.py
# CLI: parse a PDF and write JSONL (default) or CSV with one record per row.
from __future__ import annotations
import argparse, json, csv, re
from pathlib import Path
from typing import List, Dict, Any
# -----------------------------
# PDF text extraction helpers
# -----------------------------
HEADER_PATTERNS = [
re.compile(r"\bSchiff\b.*\bETA\b.*\bETS\b", re.IGNORECASE),
re.compile(r"Nächster Hafen|Liegeplatz|Ladung|Lotse", re.IGNORECASE),
]
DATE_TOKEN = re.compile(r"\b\d{1,2}\.\d{1,2}\.(?:\d{4})?")
TIME_FRAGMENT = r"(?:\s*/\s*\d{1,2}\.\d{2}\s*Uhr\s*\*?)?"
DT_TOKEN_WITH_TIME = re.compile(r"\d{1,2}\.\d{1,2}\.(?:\d{4})?" + TIME_FRAGMENT)
def extract_text_lines(pdf_path: str) -> List[str]:
"""Extract raw text lines from the PDF. Prefers pdfplumber with PyPDF2 fallback."""
text = ""
try:
import pdfplumber
with pdfplumber.open(pdf_path) as pdf:
pages_text = [(p.extract_text() or "") for p in pdf.pages]
text = "\n".join(pages_text)
except Exception:
try:
from PyPDF2 import PdfReader
reader = PdfReader(pdf_path)
pages_text = [(page.extract_text() or "") for page in reader.pages]
text = "\n".join(pages_text)
except Exception:
text = ""
if not text.strip():
raise RuntimeError(
"No text extracted. If the PDF is scanned, consider adding OCR fallback."
)
return [ln.strip() for ln in text.splitlines()]
def cleanse_lines(lines: List[str]) -> List[str]:
"""Remove headers, keep data lines and blanks for table boundaries."""
cleaned: List[str] = []
for ln in lines:
if not ln:
cleaned.append("")
continue
if any(pattern.search(ln) for pattern in HEADER_PATTERNS):
continue
cleaned.append(ln)
return cleaned
def split_into_tables(lines: List[str]) -> List[List[str]]:
"""Split lines into up to two tables, separated by blank lines."""
candidate = [ln for ln in lines if (ln == "" or DATE_TOKEN.search(ln))]
blocks: List[List[str]] = []
current: List[str] = []
seen_data = False
for ln in candidate:
if ln == "":
if seen_data and current:
blocks.append(current)
current = []
seen_data = False
continue
current.append(ln)
seen_data = True
if current:
blocks.append(current)
if len(blocks) > 2:
blocks = [blocks[0], sum(blocks[1:], [])]
return blocks
def parse_line_to_record(ln: str) -> Dict[str, Any]:
"""Parse a table line into structured fields."""
first = DATE_TOKEN.search(ln)
ship = ln[: first.start()].strip() if first else ln.strip()
dt_tokens = DT_TOKEN_WITH_TIME.findall(ln)
eta_raw = dt_tokens[0].strip() if len(dt_tokens) >= 1 else None
ets_raw = dt_tokens[1].strip() if len(dt_tokens) >= 2 else None
notes = ""
if dt_tokens:
last_match = None
for last_match in DT_TOKEN_WITH_TIME.finditer(ln):
pass
if last_match:
notes = ln[last_match.end() :].strip()
return {
"ship": ship,
"eta_raw": eta_raw,
"ets_raw": ets_raw,
"notes": notes,
"raw_line": ln,
}
def parse_pdf_to_records(pdf_path: str) -> List[Dict[str, Any]]:
"""High-level parser: extract text, sanitize, split per harbor, parse rows."""
lines = extract_text_lines(pdf_path)
clean = cleanse_lines(lines)
blocks = split_into_tables(clean)
records: List[Dict[str, Any]] = []
for i, block in enumerate(blocks):
harbor = "A" if i == 0 else "B"
for ln in block:
if not ln.strip():
continue
rec = parse_line_to_record(ln)
rec["harbor"] = harbor
records.append(rec)
return records
def write_jsonl(path: Path, rows: List[Dict[str, Any]]):
with path.open("w", encoding="utf-8") as f:
for r in rows:
f.write(json.dumps(r, ensure_ascii=False) + "\n")
def write_csv(path: Path, rows: List[Dict[str, Any]]):
if not rows:
path.write_text("", encoding="utf-8")
return
fieldnames = ["harbor", "ship", "eta_raw", "ets_raw", "notes"]
with path.open("w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=fieldnames)
w.writeheader()
for r in rows:
w.writerow({k: r.get(k) for k in fieldnames})
def main():
ap = argparse.ArgumentParser(description="Parse ship tables PDF → records (A/B)")
ap.add_argument("pdf", help="Path to partner PDF")
ap.add_argument("--out", help="Output file path (default: <pdf>.jsonl)")
ap.add_argument("--format", choices=["jsonl", "csv"], default="jsonl")
args = ap.parse_args()
rows = parse_pdf_to_records(args.pdf)
out = Path(args.out) if args.out else Path(args.pdf).with_suffix(".jsonl")
if args.format == "jsonl":
write_jsonl(out, rows)
else:
write_csv(out, rows)
print(f"Wrote {len(rows)} records -> {out}")
if __name__ == "__main__":
main()