"""Parses PostgreSQL config data from pickled files"""
import os
import logging
import pandas as pd
import pickle
[docs]
LOGGER = logging.getLogger(__name__) 
[docs]
VERSIONS = ['10', '11', '12', '13', '14', '15', '16'] 
"""list : Versions with data included. Add new versions to this list.
When including pre-production versions include a non-production designation,
e.g. 16beta1
"""
[docs]
VERSION_REDIRECTS = [{'version': '12beta4', 'redirect': '12'},
                     {'version': '16beta1', 'redirect': '16'}
                     ] 
"""list : List of dictionaries. Dict keys are 'version' and 'redirect'."""
[docs]
NEW_STRING = 'Configuration parameter added' 
[docs]
REMOVED_STRING = 'Configuration parameter removed' 
[docs]
def check_redirect(version):
    """Checks version for defined redirects.
    e.g. 12beta4 redirects to 12 once the production-ready version
    is released.
    Parameters
    ---------------
    version : str
        Version to check for redirects for.
    Returns
    ---------------
    version : str
        Redirected if necessary, original version if not.
    """
    for redirect in VERSION_REDIRECTS:
        if version == redirect['version']:
            LOGGER.info('Redirecting version %s to %s', version,
                                                        redirect['redirect'])
            return redirect['redirect']
    return version 
[docs]
def config_changes(vers1: int, vers2: int) -> pd.DataFrame:
    """Find changes between `vers1` and `vers2`.
    Parameters
    ----------------
    vers1 : int
        Version number, e.g. 11 or 16
    vers2 : int
        Version number, e.g. 11 or 16
    Returns
    -----------------
    changed : pd.DataFrame
    """
    if vers2 <= vers1:
        raise ValueError('Version 1 must be lower (before) version 2.')
    # Have to drop enumvals in comparison to avoid errors comparing using pandas
    data1 = load_config_data(pg_version=vers1)
    data2 = load_config_data(pg_version=vers2)
    data2 = data2.add_suffix('2')
    combined = pd.concat([data1, data2], axis=1)
    combined['summary'] = combined.apply(classify_changes, axis=1)
    combined['change_display'] = combined.apply(calculate_change_display, axis=1)
    # Create combined columns. Passing in 2nd version first displays latest
    # version if there are any differences.
    squash_column_names = ['short_desc2', 'short_desc']
    new_column = 'short_desc'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)
    squash_column_names = ['frequent_override2', 'frequent_override']
    new_column = 'frequent_override'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)
    squash_column_names = ['category2', 'category']
    new_column = 'category'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)
    squash_column_names = ['history_url2', 'history_url']
    new_column = 'history_url'
    combined = squash_columns(data=combined,
                              original_columns=squash_column_names,
                              new_column=new_column)
    # Limit the columns
    columns = ['summary', 'frequent_override',
               'category', 'short_desc',
               'vartype', 'vartype2',
               'boot_val_display', 'boot_val_display2',
               'enumvals', 'enumvals2', 'change_display',
               'history_url'
    ]
    changed = combined[combined['summary'] != ''][columns]
    return changed 
[docs]
def squash_columns(data: pd.DataFrame, original_columns: list, new_column: str):
    """Coalesces the values from DataFrame columns in `original_columns` list
    into the `new_column` name.  Drops `original_columns`, so can reuse one of
    the column names if desired. e.g `short_desc` and `short_desc2` combined
    into the `short_desc` column.
    Note: This is useful for added and removed items, NOT changed items.
    Parameters
    ---------------------
    data : pd.DataFrame
    original_columns : list
    new_column : str
    Returns
    ---------------------
    data_new : pd.DataFrame
    """
    data['tmp'] = data[original_columns].bfill(axis=1).iloc[:, 0]
    data_mid = data.drop(columns=original_columns)
    data_new = data_mid.rename(columns={'tmp': new_column})
    return data_new 
[docs]
def load_config_data(pg_version: int) -> pd.DataFrame:
    """Loads the pickled config data for `pg_version` into DataFrame with the
    config name as the index.
    Returns empty DataFrame on file read error.
    Parameters
    ----------------------
    pg_version : int
    Returns
    ----------------------
    df : pd.DataFrame
    """
    base_path = os.path.dirname(os.path.realpath(__file__))
    # Checking user input against configured versions to avoid security concerns
    if str(pg_version) not in VERSIONS:
        raise ValueError(f'Invalid Postgres version.  Options are {VERSIONS}')
    filename = os.path.join(base_path, 'config', f'pg{pg_version}.pkl')
    try:
        with open(filename, 'rb') as data_file:
            config_data = pickle.load(data_file)
        df = pd.DataFrame(config_data)
    except FileNotFoundError:
        msg = f'File not found for Postgres version {pg_version}'
        print(msg)
        LOGGER.error(msg)
        df = pd.DataFrame()
        return df
    # Add hyperlink to the parameter history page
    html_part1 = '<a href="/param/'
    html_part2 = '" target="blank"><i class="fa fa-external-link" aria-hidden="true"></i></a>'
    df['history_url'] = html_part1 + df['name'] + html_part2
    df.set_index('name', inplace=True)
    return df 
[docs]
def is_NaN(input: str) -> bool:
    """Checks string values for NaN, aka it isn't equal to itself.
    Parameters
    ------------------------
    input : str
    Returns
    ------------------------
    is_nan : bool
    """
    return input != input 
[docs]
def classify_changes(row: pd.Series) -> str:
    """Used by dataFrame.apply on the combined DataFrame to check version1 and
    version2 values, types, etc. for differences.
    Parameters
    --------------------------
    row : pd.Series
        Row from combined DataFrame to check details.
    Returns
    -------------------------
    changes : str
        Changes are built as a list internally and returned as a string
        with a comma separated list of changes.
    """
    changes = []
    delim = ', '
    # When old is empty, and new is not, it's a new parameters
    if is_NaN(row['default_config_line']) and not is_NaN(row['default_config_line2']):
        changes.append('Configuration parameter added')
        return delim.join(changes)
    # When new is empty and old is not, it was removed
    if is_NaN(row['default_config_line2']) and not is_NaN(row['default_config_line']):
        changes.append('Configuration parameter removed')
        return delim.join(changes)
    if row['boot_val'] != row['boot_val2']:
        changes.append('Changed default value')
    if row['vartype'] != row['vartype2']:
        changes.append('Changed variable type')
    return delim.join(changes) 
[docs]
def calculate_change_display(row: pd.Series) -> str:
    """Used by dataFrame.apply on the combined DataFrame to create the columns
    to display for changes.
    Parameters
    --------------------------
    row : pd.Series
        Row from combined DataFrame to check details.
    Returns
    -------------------------
    changes : str
        Changes are built as a list internally and returned as a string
        with a comma separated list of changes.
    """
    changes = []
    delim = ', '
    # If either is Nan, don't calculate
    if is_NaN(row['default_config_line']) or is_NaN(row['default_config_line2']):
        return None
    if row.boot_val != row.boot_val2:
        changes.append(f'Default value: {row.boot_val} -> {row.boot_val2}')
    if row['vartype'] != row['vartype2']:
        changes.append(f'Variable type: <code>{row.vartype}</code> -> <code>{row.vartype2}</code>')
    return delim.join(changes) 
[docs]
def config_changes_html(changes: pd.DataFrame) -> dict:
    """Splits `changes` data into new, removed, and changed.
    Parameters
    -------------------
    changes : pd.DataFrame
    Returns
    -------------------
    changes_html : dict
        Dictionary with keys:
            * new
            * removed
            * changed
        Each item holds the string HTML for the table of the data from input
        DataFrame 
    """
    # New Section
    columns_new = ['category', 'short_desc', 'boot_val_display2',
                   'vartype2', 'enumvals2', 'history_url']
    rename_columns_new = {'vartype2': 'Var Type',
                          'boot_val_display2': 'Default Value',
                          'enumvals2': 'Enum Values'
                          }
    new = changes[changes.summary == NEW_STRING][columns_new].rename(columns=rename_columns_new)
    new_html = _df_to_html(new)
    # Removed Section
    columns_removed = ['category', 'short_desc', 'boot_val_display',
                       'vartype', 'enumvals', 'history_url']
    rename_columns_removed = {'vartype': 'Var Type',
                          'boot_val_display': 'Default Value',
                          'enumvals': 'Enum Values'
                          }
    removed = changes[changes.summary == REMOVED_STRING][columns_removed].rename(columns=rename_columns_removed)
    removed_html = _df_to_html(removed)
    # Changed section
    columns_changed = ['category', 'short_desc', 'change_display',
                       'history_url']
    rename_columns_changed = {'vartype': 'Var Type',
                          'change_display': 'Changed:',
                          'enumvals': 'Enum Values'
                          }
    changed = changes[~changes.summary.isin([NEW_STRING, REMOVED_STRING])][columns_changed].rename(columns=rename_columns_changed)
    changed_html = _df_to_html(changed)
    return {'new': new_html, 'removed': removed_html, 'changed': changed_html} 
[docs]
def config_changes_stats(changes: dict) -> dict:
    """Provides counts of changes by type (new, removed, updated) to display.
    Parameters
    ---------------------
    changes : dict
    Returns
    ---------------------
    stats : dict
    """
    new = changes[changes.summary == NEW_STRING].count().max()
    removed = changes[changes.summary == REMOVED_STRING].count().max()
    total = changes.count().max()
    updated = total - new - removed
    stats = {'new': new,
             'updated': updated,
             'removed': removed}
    return stats 
[docs]
def _df_to_html(df):
    """Converts DataFrame to HTML table with classes for formatting.
    Parameters
    ---------------
    df : pandas.DataFrame
    Returns
    ---------------
    str
        HTML table for display.
    """
    classes = ['table', 'table-hover']
    html_raw = '<div id="config_table">{src}</div>'
    default_renames = {'category': 'Category', 'short_desc': 'Description'}
    df.rename(columns=default_renames, inplace=True)
    src = df.to_html(index=True,
                     classes=classes,
                     justify='center',
                     escape=False)
    html = html_raw.format(src=src)
    return html 
[docs]
def get_all_postgres_parameters() -> set:
    """Returns sorted set of Postgres parameters from all versions.
    Returns
    ----------------------
    all_parameters : set
    """
    all_version_data = {}
    all_parameters = set()
    for version in VERSIONS:
        all_version_data[version] = load_config_data(version)
        version_parameters = all_version_data[version].index.values.tolist()
        all_parameters.update(version_parameters)
    return sorted(all_parameters) 
[docs]
def get_pg_param_over_versions(pg_param: str) -> dict:
    """Return details about pg_param over Postgres major versions.
    Parameters
    -------------------------
    pg_param : str
        Name of the Postgres parameter
    Returns
    -------------------------
    param_details : dict
    """
    param_details = {'name': pg_param}
    max_version = max(VERSIONS)
    value_history = {}
    vartype_history = {}
    for version in VERSIONS:
        config_data = load_config_data(version)
        filtered_data = config_data[config_data.index == pg_param]
        try:
            boot_val = filtered_data['boot_val_display'].iloc[0]
        except IndexError:
            boot_val = ''
        try:
            vartype = filtered_data['vartype'].iloc[0]
        except IndexError:
            vartype = ''
        try:
            category = filtered_data['category'].iloc[0]
        except IndexError:
            category = ''
        try:
            short_desc = filtered_data['short_desc'].iloc[0]
        except IndexError:
            short_desc = ''
        try:
            frequent_override = filtered_data['frequent_override'].iloc[0]
        except IndexError:
            frequent_override = ''
        if version == max_version:
            details = {'vartype': vartype,
                       'boot_val': boot_val,
                       'category': category,
                       'short_desc': short_desc,
                       'frequent_override': frequent_override
                    }
            param_details['details'] = details
        value_history[version] = boot_val
        vartype_history[version] = vartype
    value_history_html = history_df_to_html(history=value_history,
                                            pg_param=pg_param)
    vartype_history_html = history_df_to_html(history=vartype_history,
                                            pg_param=pg_param)
    param_details['value_history'] = value_history_html
    param_details['vartype_history'] = vartype_history_html
    return param_details 
[docs]
def history_df_to_html(history: dict, pg_param: str) -> str:
    """Converts a dictionary of details across versions to HTML for display on
    frontend.
    Parameters
    ------------------------
    history : dict
    pg_param : str
    Returns
    ------------------------
    history_html : str
    """
    history_df = pd.DataFrame([history])
    history_df['name'] = pg_param
    history_df.set_index('name', inplace=True)
    history_html = _df_to_html(history_df)
    return history_html