Source code for webapp.pgconfig

"""Parses PostgreSQL config data from pickled files"""
import os
import logging
import pandas as pd
import pickle



[docs]
LOGGER = logging.getLogger(__name__)


[docs]
VERSIONS = ['10', '11', '12', '13', '14', '15', '16']

"""list : Versions with data included. Add new versions to this list.
When including pre-production versions include a non-production designation,
e.g. 16beta1
"""


[docs]
VERSION_REDIRECTS = [{'version': '12beta4', 'redirect': '12'},
                     {'version': '16beta1', 'redirect': '16'}
                     ]

"""list : List of dictionaries. Dict keys are 'version' and 'redirect'."""


[docs]
NEW_STRING = 'Configuration parameter added'


[docs]
REMOVED_STRING = 'Configuration parameter removed'




[docs]
def check_redirect(version):
    """Checks version for defined redirects.

    e.g. 12beta4 redirects to 12 once the production-ready version
    is released.

    Parameters
    ---------------
    version : str
        Version to check for redirects for.

    Returns
    ---------------
    version : str
        Redirected if necessary, original version if not.
    """
    for redirect in VERSION_REDIRECTS:
        if version == redirect['version']:
            LOGGER.info('Redirecting version %s to %s', version,
                                                        redirect['redirect'])
            return redirect['redirect']
    return version




[docs]
def config_changes(vers1: int, vers2: int) -> pd.DataFrame:
    """Find changes between `vers1` and `vers2`.

    Parameters
    ----------------
    vers1 : int
        Version number, e.g. 11 or 16

    vers2 : int
        Version number, e.g. 11 or 16

    Returns
    -----------------
    changed : pd.DataFrame
    """
    if vers2 <= vers1:
        raise ValueError('Version 1 must be lower (before) version 2.')

    # Have to drop enumvals in comparison to avoid errors comparing using pandas
    data1 = load_config_data(pg_version=vers1)
    data2 = load_config_data(pg_version=vers2)

    data2 = data2.add_suffix('2')

    combined = pd.concat([data1, data2], axis=1)

    combined['summary'] = combined.apply(classify_changes, axis=1)
    combined['change_display'] = combined.apply(calculate_change_display, axis=1)

    # Create combined columns. Passing in 2nd version first displays latest
    # version if there are any differences.
    squash_column_names = ['short_desc2', 'short_desc']
    new_column = 'short_desc'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)

    squash_column_names = ['frequent_override2', 'frequent_override']
    new_column = 'frequent_override'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)

    squash_column_names = ['category2', 'category']
    new_column = 'category'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)

    squash_column_names = ['history_url2', 'history_url']
    new_column = 'history_url'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)

    # Limit the columns
    columns = ['summary', 'frequent_override',
               'category', 'short_desc',
               'vartype', 'vartype2',
               'boot_val_display', 'boot_val_display2',
               'enumvals', 'enumvals2', 'change_display',
               'history_url'
    ]
    changed = combined[combined['summary'] != ''][columns]

    return changed




[docs]
def squash_columns(data: pd.DataFrame, original_columns: list, new_column: str):
    """Coalesces the values from DataFrame columns in `original_columns` list
    into the `new_column` name.  Drops `original_columns`, so can reuse one of
    the column names if desired. e.g `short_desc` and `short_desc2` combined
    into the `short_desc` column.

    Note: This is useful for added and removed items, NOT changed items.

    Parameters
    ---------------------
    data : pd.DataFrame
    original_columns : list
    new_column : str

    Returns
    ---------------------
    data_new : pd.DataFrame
    """
    data['tmp'] = data[original_columns].bfill(axis=1).iloc[:, 0]
    data_mid = data.drop(columns=original_columns)
    data_new = data_mid.rename(columns={'tmp': new_column})
    return data_new




[docs]
def load_config_data(pg_version: int) -> pd.DataFrame:
    """Loads the pickled config data for `pg_version` into DataFrame with the
    config name as the index.

    Returns empty DataFrame on file read error.

    Parameters
    ----------------------
    pg_version : int

    Returns
    ----------------------
    df : pd.DataFrame
    """
    base_path = os.path.dirname(os.path.realpath(__file__))
    # Checking user input against configured versions to avoid security concerns
    if str(pg_version) not in VERSIONS:
        raise ValueError(f'Invalid Postgres version.  Options are {VERSIONS}')

    filename = os.path.join(base_path, 'config', f'pg{pg_version}.pkl')

    try:
        with open(filename, 'rb') as data_file:
            config_data = pickle.load(data_file)

        df = pd.DataFrame(config_data)
    except FileNotFoundError:
        msg = f'File not found for Postgres version {pg_version}'
        print(msg)
        LOGGER.error(msg)
        df = pd.DataFrame()
        return df

    # Add hyperlink to the parameter history page
    html_part1 = '<a href="/param/'
    html_part2 = '" target="blank"><i class="fa fa-external-link" aria-hidden="true"></i></a>'
    df['history_url'] = html_part1 + df['name'] + html_part2

    df.set_index('name', inplace=True)
    return df




[docs]
def is_NaN(input: str) -> bool:
    """Checks string values for NaN, aka it isn't equal to itself.

    Parameters
    ------------------------
    input : str

    Returns
    ------------------------
    is_nan : bool
    """
    return input != input




[docs]
def classify_changes(row: pd.Series) -> str:
    """Used by dataFrame.apply on the combined DataFrame to check version1 and
    version2 values, types, etc. for differences.

    Parameters
    --------------------------
    row : pd.Series
        Row from combined DataFrame to check details.

    Returns
    -------------------------
    changes : str
        Changes are built as a list internally and returned as a string
        with a comma separated list of changes.
    """
    changes = []
    delim = ', '

    # When old is empty, and new is not, it's a new parameters
    if is_NaN(row['default_config_line']) and not is_NaN(row['default_config_line2']):
        changes.append('Configuration parameter added')
        return delim.join(changes)

    # When new is empty and old is not, it was removed
    if is_NaN(row['default_config_line2']) and not is_NaN(row['default_config_line']):
        changes.append('Configuration parameter removed')
        return delim.join(changes)

    if row['boot_val'] != row['boot_val2']:
        changes.append('Changed default value')
    if row['vartype'] != row['vartype2']:
        changes.append('Changed variable type')
    return delim.join(changes)




[docs]
def calculate_change_display(row: pd.Series) -> str:
    """Used by dataFrame.apply on the combined DataFrame to create the columns
    to display for changes.

    Parameters
    --------------------------
    row : pd.Series
        Row from combined DataFrame to check details.

    Returns
    -------------------------
    changes : str
        Changes are built as a list internally and returned as a string
        with a comma separated list of changes.
    """
    changes = []
    delim = ', '

    # If either is Nan, don't calculate
    if is_NaN(row['default_config_line']) or is_NaN(row['default_config_line2']):
        return None

    if row.boot_val != row.boot_val2:
        changes.append(f'Default value: {row.boot_val} -> {row.boot_val2}')
    if row['vartype'] != row['vartype2']:
        changes.append(f'Variable type: <code>{row.vartype}</code> -> <code>{row.vartype2}</code>')
    return delim.join(changes)




[docs]
def config_changes_html(changes: pd.DataFrame) -> dict:
    """Splits `changes` data into new, removed, and changed.

    Parameters
    -------------------
    changes : pd.DataFrame

    Returns
    -------------------
    changes_html : dict
        Dictionary with keys:
            * new
            * removed
            * changed

        Each item holds the string HTML for the table of the data from input
        DataFrame 
    """
    # New Section
    columns_new = ['category', 'short_desc', 'boot_val_display2',
                   'vartype2', 'enumvals2', 'history_url']
    rename_columns_new = {'vartype2': 'Var Type',
                          'boot_val_display2': 'Default Value',
                          'enumvals2': 'Enum Values'
                          }
    new = changes[changes.summary == NEW_STRING][columns_new].rename(columns=rename_columns_new)

    new_html = _df_to_html(new)

    # Removed Section
    columns_removed = ['category', 'short_desc', 'boot_val_display',
                       'vartype', 'enumvals', 'history_url']
    rename_columns_removed = {'vartype': 'Var Type',
                          'boot_val_display': 'Default Value',
                          'enumvals': 'Enum Values'
                          }
    removed = changes[changes.summary == REMOVED_STRING][columns_removed].rename(columns=rename_columns_removed)
    removed_html = _df_to_html(removed)

    # Changed section
    columns_changed = ['category', 'short_desc', 'change_display',
                       'history_url']
    rename_columns_changed = {'vartype': 'Var Type',
                          'change_display': 'Changed:',
                          'enumvals': 'Enum Values'
                          }
    changed = changes[~changes.summary.isin([NEW_STRING, REMOVED_STRING])][columns_changed].rename(columns=rename_columns_changed)
    changed_html = _df_to_html(changed)

    return {'new': new_html, 'removed': removed_html, 'changed': changed_html}




[docs]
def config_changes_stats(changes: dict) -> dict:
    """Provides counts of changes by type (new, removed, updated) to display.

    Parameters
    ---------------------
    changes : dict

    Returns
    ---------------------
    stats : dict
    """
    new = changes[changes.summary == NEW_STRING].count().max()
    removed = changes[changes.summary == REMOVED_STRING].count().max()
    total = changes.count().max()
    updated = total - new - removed
    stats = {'new': new,
             'updated': updated,
             'removed': removed}
    return stats




[docs]
def _df_to_html(df):
    """Converts DataFrame to HTML table with classes for formatting.

    Parameters
    ---------------
    df : pandas.DataFrame

    Returns
    ---------------
    str
        HTML table for display.
    """
    classes = ['table', 'table-hover']
    html_raw = '<div id="config_table">{src}</div>'
    default_renames = {'category': 'Category', 'short_desc': 'Description'}
    df.rename(columns=default_renames, inplace=True)
    src = df.to_html(index=True,
                     classes=classes,
                     justify='center',
                     escape=False)
    html = html_raw.format(src=src)
    return html




[docs]
def get_all_postgres_parameters() -> set:
    """Returns sorted set of Postgres parameters from all versions.

    Returns
    ----------------------
    all_parameters : set
    """
    all_version_data = {}
    all_parameters = set()

    for version in VERSIONS:
        all_version_data[version] = load_config_data(version)
        version_parameters = all_version_data[version].index.values.tolist()
        all_parameters.update(version_parameters)

    return sorted(all_parameters)




[docs]
def get_pg_param_over_versions(pg_param: str) -> dict:
    """Return details about pg_param over Postgres major versions.

    Parameters
    -------------------------
    pg_param : str
        Name of the Postgres parameter

    Returns
    -------------------------
    param_details : dict
    """
    param_details = {'name': pg_param}
    max_version = max(VERSIONS)
    value_history = {}
    vartype_history = {}

    for version in VERSIONS:
        config_data = load_config_data(version)
        filtered_data = config_data[config_data.index == pg_param]

        try:
            boot_val = filtered_data['boot_val_display'].iloc[0]
        except IndexError:
            boot_val = ''

        try:
            vartype = filtered_data['vartype'].iloc[0]
        except IndexError:
            vartype = ''

        try:
            category = filtered_data['category'].iloc[0]
        except IndexError:
            category = ''

        try:
            short_desc = filtered_data['short_desc'].iloc[0]
        except IndexError:
            short_desc = ''

        try:
            frequent_override = filtered_data['frequent_override'].iloc[0]
        except IndexError:
            frequent_override = ''

        if version == max_version:
            details = {'vartype': vartype,
                       'boot_val': boot_val,
                       'category': category,
                       'short_desc': short_desc,
                       'frequent_override': frequent_override
                    }
            param_details['details'] = details

        value_history[version] = boot_val
        vartype_history[version] = vartype

    value_history_html = history_df_to_html(history=value_history,
                                            pg_param=pg_param)
    vartype_history_html = history_df_to_html(history=vartype_history,
                                            pg_param=pg_param)

    param_details['value_history'] = value_history_html
    param_details['vartype_history'] = vartype_history_html

    return param_details




[docs]
def history_df_to_html(history: dict, pg_param: str) -> str:
    """Converts a dictionary of details across versions to HTML for display on
    frontend.

    Parameters
    ------------------------
    history : dict
    pg_param : str

    Returns
    ------------------------
    history_html : str
    """
    history_df = pd.DataFrame([history])
    history_df['name'] = pg_param
    history_df.set_index('name', inplace=True)
    history_html = _df_to_html(history_df)
    return history_html