diff --git a/src/server/BreCal/database/sql_handler.py b/src/server/BreCal/database/sql_handler.py index 3ae0e27..457b950 100644 --- a/src/server/BreCal/database/sql_handler.py +++ b/src/server/BreCal/database/sql_handler.py @@ -26,7 +26,7 @@ class SQLHandler(): with self.sql_connection.cursor(buffered=True) as cursor: cursor.execute("SHOW TABLES") schema = cursor.fetchall() - all_schemas = [schem[0] for schem in schema] + all_schemas = [schem[0] for schem in schema] return all_schemas def build_str_to_model_dict(self): @@ -42,13 +42,51 @@ class SQLHandler(): def read_mysql_table_to_df(self, table_name:str): """determine a {table_name}, which will be read from a mysql server. returns a pandas DataFrame with the respective data""" - df = pd.read_sql(sql=f"SELECT * FROM {table_name}", con=self.sql_connection) + with self.sql_connection.cursor(buffered=True) as cursor: #df = pd.read_sql(sql=f"SELECT * FROM {table_name}", con=self.sql_connection) + # 1.) get the column names + cursor.execute(f"DESCRIBE {table_name}") + cols = cursor.fetchall() + column_names = [col_name[0] for col_name in cols] + + # 2.) get the data tuples + cursor.execute(f"SELECT * FROM {table_name}") + data = cursor.fetchall() + + # 3.) map the data tuples to the correct column names + data = [{k:v for k,v in zip(column_names, dat)} for dat in data] + + # 4.) build a dataframe from the respective data models (which ensures the correct data type) + data_model = self.str_to_model_dict.get(table_name) + if data_model is not None: + df = pd.DataFrame([data_model(**dat) for dat in data]) + else: + df = pd.DataFrame([dat for dat in data]) return df - def mysql_to_df(self, query): + def mysql_to_df(self, query, table_name): """provide an arbitrary sql query that should be read from a mysql server {sql_connection}. returns a pandas DataFrame with the obtained data""" - df = pd.read_sql(query, self.sql_connection).convert_dtypes() - df = df.set_index('id', inplace=False) # avoid inplace updates, so the raw sql remains unchanged + with self.sql_connection.cursor(buffered=True) as cursor: # df = pd.read_sql(query, self.sql_connection).convert_dtypes() + # 1.) get the column names + cursor.execute(f"DESCRIBE {table_name}") + cols = cursor.fetchall() + column_names = [col_name[0] for col_name in cols] + + # 2.) get the data tuples + cursor.execute(query) + data = cursor.fetchall() + + # 3.) map the data tuples to the correct column names + data = [{k:v for k,v in zip(column_names, dat)} for dat in data] + + # 4.) build a dataframe from the respective data models (which ensures the correct data type) + data_model = self.str_to_model_dict.get(table_name) + if data_model is not None: + df = pd.DataFrame([data_model(**dat) for dat in data]) + else: + df = pd.DataFrame([dat for dat in data]) + + if 'id' in df.columns: + df = df.set_index('id', inplace=False) # avoid inplace updates, so the raw sql remains unchanged return df def read_all(self, all_schemas): @@ -64,7 +102,7 @@ class SQLHandler(): mysql_df_dict = {} for schem in all_schemas: query = f"SELECT * FROM {schem}" - mysql_df_dict[schem] = self.mysql_to_df(query) + mysql_df_dict[schem] = self.mysql_to_df(query, table_name=schem) return mysql_df_dict def initialize_shipcall_participant_list(self):