Source code for webapp.pgconfig

"""Parses PostgreSQL config data from pickled files"""
import os
import logging
import pandas as pd
import pickle


[docs] LOGGER = logging.getLogger(__name__)
[docs] VERSIONS = ['10', '11', '12', '13', '14', '15', '16']
"""list : Versions with data included. Add new versions to this list. When including pre-production versions include a non-production designation, e.g. 16beta1 """
[docs] VERSION_REDIRECTS = [{'version': '12beta4', 'redirect': '12'}, {'version': '16beta1', 'redirect': '16'} ]
"""list : List of dictionaries. Dict keys are 'version' and 'redirect'."""
[docs] NEW_STRING = 'Configuration parameter added'
[docs] REMOVED_STRING = 'Configuration parameter removed'
[docs] def check_redirect(version): """Checks version for defined redirects. e.g. 12beta4 redirects to 12 once the production-ready version is released. Parameters --------------- version : str Version to check for redirects for. Returns --------------- version : str Redirected if necessary, original version if not. """ for redirect in VERSION_REDIRECTS: if version == redirect['version']: LOGGER.info('Redirecting version %s to %s', version, redirect['redirect']) return redirect['redirect'] return version
[docs] def config_changes(vers1: int, vers2: int) -> pd.DataFrame: """Find changes between `vers1` and `vers2`. Parameters ---------------- vers1 : int Version number, e.g. 11 or 16 vers2 : int Version number, e.g. 11 or 16 Returns ----------------- changed : pd.DataFrame """ if vers2 <= vers1: raise ValueError('Version 1 must be lower (before) version 2.') # Have to drop enumvals in comparison to avoid errors comparing using pandas data1 = load_config_data(pg_version=vers1) data2 = load_config_data(pg_version=vers2) data2 = data2.add_suffix('2') combined = pd.concat([data1, data2], axis=1) combined['summary'] = combined.apply(classify_changes, axis=1) combined['change_display'] = combined.apply(calculate_change_display, axis=1) # Create combined columns. Passing in 2nd version first displays latest # version if there are any differences. squash_column_names = ['short_desc2', 'short_desc'] new_column = 'short_desc' combined = squash_columns(data=combined, original_columns=squash_column_names, new_column=new_column) squash_column_names = ['frequent_override2', 'frequent_override'] new_column = 'frequent_override' combined = squash_columns(data=combined, original_columns=squash_column_names, new_column=new_column) squash_column_names = ['category2', 'category'] new_column = 'category' combined = squash_columns(data=combined, original_columns=squash_column_names, new_column=new_column) squash_column_names = ['history_url2', 'history_url'] new_column = 'history_url' combined = squash_columns(data=combined, original_columns=squash_column_names, new_column=new_column) # Limit the columns columns = ['summary', 'frequent_override', 'category', 'short_desc', 'vartype', 'vartype2', 'boot_val_display', 'boot_val_display2', 'enumvals', 'enumvals2', 'change_display', 'history_url' ] changed = combined[combined['summary'] != ''][columns] return changed
[docs] def squash_columns(data: pd.DataFrame, original_columns: list, new_column: str): """Coalesces the values from DataFrame columns in `original_columns` list into the `new_column` name. Drops `original_columns`, so can reuse one of the column names if desired. e.g `short_desc` and `short_desc2` combined into the `short_desc` column. Note: This is useful for added and removed items, NOT changed items. Parameters --------------------- data : pd.DataFrame original_columns : list new_column : str Returns --------------------- data_new : pd.DataFrame """ data['tmp'] = data[original_columns].bfill(axis=1).iloc[:, 0] data_mid = data.drop(columns=original_columns) data_new = data_mid.rename(columns={'tmp': new_column}) return data_new
[docs] def load_config_data(pg_version: int) -> pd.DataFrame: """Loads the pickled config data for `pg_version` into DataFrame with the config name as the index. Returns empty DataFrame on file read error. Parameters ---------------------- pg_version : int Returns ---------------------- df : pd.DataFrame """ base_path = os.path.dirname(os.path.realpath(__file__)) # Checking user input against configured versions to avoid security concerns if str(pg_version) not in VERSIONS: raise ValueError(f'Invalid Postgres version. Options are {VERSIONS}') filename = os.path.join(base_path, 'config', f'pg{pg_version}.pkl') try: with open(filename, 'rb') as data_file: config_data = pickle.load(data_file) df = pd.DataFrame(config_data) except FileNotFoundError: msg = f'File not found for Postgres version {pg_version}' print(msg) LOGGER.error(msg) df = pd.DataFrame() return df # Add hyperlink to the parameter history page html_part1 = '<a href="/param/' html_part2 = '" target="blank"><i class="fa fa-external-link" aria-hidden="true"></i></a>' df['history_url'] = html_part1 + df['name'] + html_part2 df.set_index('name', inplace=True) return df
[docs] def is_NaN(input: str) -> bool: """Checks string values for NaN, aka it isn't equal to itself. Parameters ------------------------ input : str Returns ------------------------ is_nan : bool """ return input != input
[docs] def classify_changes(row: pd.Series) -> str: """Used by dataFrame.apply on the combined DataFrame to check version1 and version2 values, types, etc. for differences. Parameters -------------------------- row : pd.Series Row from combined DataFrame to check details. Returns ------------------------- changes : str Changes are built as a list internally and returned as a string with a comma separated list of changes. """ changes = [] delim = ', ' # When old is empty, and new is not, it's a new parameters if is_NaN(row['default_config_line']) and not is_NaN(row['default_config_line2']): changes.append('Configuration parameter added') return delim.join(changes) # When new is empty and old is not, it was removed if is_NaN(row['default_config_line2']) and not is_NaN(row['default_config_line']): changes.append('Configuration parameter removed') return delim.join(changes) if row['boot_val'] != row['boot_val2']: changes.append('Changed default value') if row['vartype'] != row['vartype2']: changes.append('Changed variable type') return delim.join(changes)
[docs] def calculate_change_display(row: pd.Series) -> str: """Used by dataFrame.apply on the combined DataFrame to create the columns to display for changes. Parameters -------------------------- row : pd.Series Row from combined DataFrame to check details. Returns ------------------------- changes : str Changes are built as a list internally and returned as a string with a comma separated list of changes. """ changes = [] delim = ', ' # If either is Nan, don't calculate if is_NaN(row['default_config_line']) or is_NaN(row['default_config_line2']): return None if row.boot_val != row.boot_val2: changes.append(f'Default value: {row.boot_val} -> {row.boot_val2}') if row['vartype'] != row['vartype2']: changes.append(f'Variable type: <code>{row.vartype}</code> -> <code>{row.vartype2}</code>') return delim.join(changes)
[docs] def config_changes_html(changes: pd.DataFrame) -> dict: """Splits `changes` data into new, removed, and changed. Parameters ------------------- changes : pd.DataFrame Returns ------------------- changes_html : dict Dictionary with keys: * new * removed * changed Each item holds the string HTML for the table of the data from input DataFrame """ # New Section columns_new = ['category', 'short_desc', 'boot_val_display2', 'vartype2', 'enumvals2', 'history_url'] rename_columns_new = {'vartype2': 'Var Type', 'boot_val_display2': 'Default Value', 'enumvals2': 'Enum Values' } new = changes[changes.summary == NEW_STRING][columns_new].rename(columns=rename_columns_new) new_html = _df_to_html(new) # Removed Section columns_removed = ['category', 'short_desc', 'boot_val_display', 'vartype', 'enumvals', 'history_url'] rename_columns_removed = {'vartype': 'Var Type', 'boot_val_display': 'Default Value', 'enumvals': 'Enum Values' } removed = changes[changes.summary == REMOVED_STRING][columns_removed].rename(columns=rename_columns_removed) removed_html = _df_to_html(removed) # Changed section columns_changed = ['category', 'short_desc', 'change_display', 'history_url'] rename_columns_changed = {'vartype': 'Var Type', 'change_display': 'Changed:', 'enumvals': 'Enum Values' } changed = changes[~changes.summary.isin([NEW_STRING, REMOVED_STRING])][columns_changed].rename(columns=rename_columns_changed) changed_html = _df_to_html(changed) return {'new': new_html, 'removed': removed_html, 'changed': changed_html}
[docs] def config_changes_stats(changes: dict) -> dict: """Provides counts of changes by type (new, removed, updated) to display. Parameters --------------------- changes : dict Returns --------------------- stats : dict """ new = changes[changes.summary == NEW_STRING].count().max() removed = changes[changes.summary == REMOVED_STRING].count().max() total = changes.count().max() updated = total - new - removed stats = {'new': new, 'updated': updated, 'removed': removed} return stats
[docs] def _df_to_html(df): """Converts DataFrame to HTML table with classes for formatting. Parameters --------------- df : pandas.DataFrame Returns --------------- str HTML table for display. """ classes = ['table', 'table-hover'] html_raw = '<div id="config_table">{src}</div>' default_renames = {'category': 'Category', 'short_desc': 'Description'} df.rename(columns=default_renames, inplace=True) src = df.to_html(index=True, classes=classes, justify='center', escape=False) html = html_raw.format(src=src) return html
[docs] def get_all_postgres_parameters() -> set: """Returns sorted set of Postgres parameters from all versions. Returns ---------------------- all_parameters : set """ all_version_data = {} all_parameters = set() for version in VERSIONS: all_version_data[version] = load_config_data(version) version_parameters = all_version_data[version].index.values.tolist() all_parameters.update(version_parameters) return sorted(all_parameters)
[docs] def get_pg_param_over_versions(pg_param: str) -> dict: """Return details about pg_param over Postgres major versions. Parameters ------------------------- pg_param : str Name of the Postgres parameter Returns ------------------------- param_details : dict """ param_details = {'name': pg_param} max_version = max(VERSIONS) value_history = {} vartype_history = {} for version in VERSIONS: config_data = load_config_data(version) filtered_data = config_data[config_data.index == pg_param] try: boot_val = filtered_data['boot_val_display'].iloc[0] except IndexError: boot_val = '' try: vartype = filtered_data['vartype'].iloc[0] except IndexError: vartype = '' try: category = filtered_data['category'].iloc[0] except IndexError: category = '' try: short_desc = filtered_data['short_desc'].iloc[0] except IndexError: short_desc = '' try: frequent_override = filtered_data['frequent_override'].iloc[0] except IndexError: frequent_override = '' if version == max_version: details = {'vartype': vartype, 'boot_val': boot_val, 'category': category, 'short_desc': short_desc, 'frequent_override': frequent_override } param_details['details'] = details value_history[version] = boot_val vartype_history[version] = vartype value_history_html = history_df_to_html(history=value_history, pg_param=pg_param) vartype_history_html = history_df_to_html(history=vartype_history, pg_param=pg_param) param_details['value_history'] = value_history_html param_details['vartype_history'] = vartype_history_html return param_details
[docs] def history_df_to_html(history: dict, pg_param: str) -> str: """Converts a dictionary of details across versions to HTML for display on frontend. Parameters ------------------------ history : dict pg_param : str Returns ------------------------ history_html : str """ history_df = pd.DataFrame([history]) history_df['name'] = pg_param history_df.set_index('name', inplace=True) history_html = _df_to_html(history_df) return history_html