Source code for copro.selection

import pandas as pd
import geopandas as gpd
import os
from copro import utils

[docs]def filter_conflict_properties(gdf, config):
    """Filters conflict database according to certain conflict properties such as number of casualties, type of violence or country.

    Args:
        gdf (geo-dataframe): geo-dataframe containing entries with conflicts.
        config (ConfigParser-object): object containing the parsed configuration-settings of the model.

    Returns:
        geo-dataframe: geo-dataframe containing filtered entries.
    """    
    
    # create dictionary with all selection criteria
    selection_criteria = {'best': config.getint('conflict', 'min_nr_casualties'),
                          'type_of_violence': (config.get('conflict', 'type_of_violence')).rsplit(',')}
    
    print('INFO: filtering based on conflict properties.')
    
    # go through all criteria
    for key in selection_criteria:

        # for criterion 'best' (i.e. best estimate of fatalities), select all entries above threshold
        if key == 'best':
            if selection_criteria[key] == '':
                pass
            else:
                if config.getboolean('general', 'verbose'): print('DEBUG: filtering key', key, 'with lower value', selection_criteria[key])
                gdf = gdf[gdf['best'] >= selection_criteria['best']]

        # for other criteria, select all entries matching the specified value(s) per criterion
        if key == 'type_of_violence':
            if selection_criteria[key] == '':
                pass
            else:
                if config.getboolean('general', 'verbose'): print('DEBUG: filtering key', key, 'with value(s)', selection_criteria[key])
                gdf = gdf[gdf[key].isin(selection_criteria[key])]

    return gdf

[docs]def select_period(gdf, config):
    """Reducing the geo-dataframe to those entries falling into a specified time period.

    Args:
        gdf (geo-dataframe): geo-dataframe containing entries with conflicts.
        config (ConfigParser-object): object containing the parsed configuration-settings of the model.

    Returns:
        geo-dataframe: geo-dataframe containing filtered entries.
    """    

    # get start and end year of model period
    t0 = config.getint('settings', 'y_start')
    t1 = config.getint('settings', 'y_end')
    
    # select those entries meeting the requirements
    if config.getboolean('general', 'verbose'): print('DEBUG: focussing on period between {} and {}'.format(t0, t1))
    gdf = gdf.loc[(gdf.year >= t0) & (gdf.year <= t1)]
    
    return gdf

[docs]def clip_to_extent(gdf, config, root_dir):
    """As the original conflict data has global extent, this function clips the database to those entries which have occured on a specified continent.

    Args:
        gdf (geo-dataframe): geo-dataframe containing entries with conflicts.
        config (ConfigParser-object): object containing the parsed configuration-settings of the model.
        root_dir (str): path to location of cfg-file.

    Returns:
        geo-dataframe: geo-dataframe containing filtered entries.
        geo-dataframe: geo-dataframe containing country polygons of selected continent.
    """    

    # get path to file with polygons for which analysis is carried out
    shp_fo = os.path.join(root_dir, config.get('general', 'input_dir'), config.get('extent', 'shp'))
    
    # read file
    if config.getboolean('general', 'verbose'): print('DEBUG: reading extent and spatial aggregation level from file {}'.format(shp_fo))
    extent_gdf = gpd.read_file(shp_fo)

    # fixing invalid geometries
    if config.getboolean('general', 'verbose'): print('DEBUG: fixing invalid geometries')
    extent_gdf.geometry = extent_gdf.buffer(0)

    # clip the conflict dataframe to the specified polygons
    if config.getboolean('general', 'verbose'): print('DEBUG: clipping clipping conflict dataset to extent')    
    gdf = gpd.clip(gdf, extent_gdf)
    
    return gdf, extent_gdf

[docs]def climate_zoning(gdf, extent_gdf, config, root_dir):
    """This function allows for selecting only those conflicts and polygons falling in specified climate zones.
    Also, a global dataframe is returned containing the IDs and geometry of all polygons after selection procedure.
    This can be used to add geometry information to model output based on common ID.

    Args:
        gdf (geo-dataframe): geo-dataframe containing conflict data.
        extent_gdf (geo-dataframe): all polygons of study area.
        config (ConfigParser-object): object containing the parsed configuration-settings of the model.
        root_dir (str): path to location of cfg-file.

    Returns:
        geo-dataframe: conflict data clipped to climate zones.
        geo-dataframe: polygons of study area clipped to climate zones.
    """

    # load file with extents of climate zones
    Koeppen_Geiger_fo = os.path.join(root_dir, config.get('general', 'input_dir'), config.get('climate', 'shp'))
    KG_gdf = gpd.read_file(Koeppen_Geiger_fo)
    # load file to look-up climate zone names with codes in shp-file
    code2class_fo = os.path.join(root_dir, config.get('general', 'input_dir'), config.get('climate', 'code2class'))
    code2class = pd.read_csv(code2class_fo, sep='\t')
    
    # if climate zones are specified...
    if config.get('climate', 'zones') != '':

        # get all classes specified
        look_up_classes = config.get('climate', 'zones').rsplit(',')

        # get the corresponding code per class
        code_nrs = []
        for entry in look_up_classes:
            code_nr = int(code2class['code'].loc[code2class['class'] == entry])
            code_nrs.append(code_nr)
    
        # get only those entries with retrieved codes
        KG_gdf = KG_gdf.loc[KG_gdf['GRIDCODE'].isin(code_nrs)]
        
        # make sure EPSG:4236 is used
        if KG_gdf.crs != 'EPSG:4326':
            KG_gdf = KG_gdf.to_crs('EPSG:4326')

        # clip the conflict dataframe to the specified climate zones
        if config.getboolean('general', 'verbose'): print('DEBUG: clipping conflicts to climate zones {}'.format(look_up_classes))
        gdf = gpd.clip(gdf, KG_gdf.buffer(0))

        # clip the studied polygons to the specified climate zones
        if config.getboolean('general', 'verbose'): print('DEBUG: clipping polygons to climate zones {}'.format(look_up_classes))
        polygon_gdf = gpd.clip(extent_gdf, KG_gdf.buffer(0))

    # if not, nothing needs to be done besides aligning names
    else:

        polygon_gdf = extent_gdf.copy()

    return gdf, polygon_gdf

[docs]def select(config, out_dir, root_dir):
    """Main function performing the selection procedure.
    Also stores the selected conflicts and polygons to output directory.

    Args:
        config (ConfigParser-object): object containing the parsed configuration-settings of the model.
        out_dir (str): path to output folder.
        root_dir (str): path to location of cfg-file.

    Returns:
        geo-dataframe: remaining conflict data after selection process.
        geo-dataframe: all polygons of the study area.
        geo-dataframe: remaining polygons after selection process.
        dataframe: global look-up dataframe linking polygon ID with geometry information.
    """  

    # get the conflict data
    gdf = utils.get_geodataframe(config, root_dir)

    # filter based on conflict properties
    gdf = filter_conflict_properties(gdf, config)

    # selected conflicts falling in a specified time period
    gdf = select_period(gdf, config)

    # clip conflicts to a spatial extent defined as polygons
    gdf, extent_gdf = clip_to_extent(gdf, config, root_dir)

    # clip conflicts and polygons to specified climate zones
    gdf, polygon_gdf = climate_zoning(gdf, extent_gdf, config, root_dir)

    # get a dataframe containing the ID and geometry of all polygons after selecting for climate zones
    global_df = utils.global_ID_geom_info(polygon_gdf)

    # save conflict data and polygon to shp-file
    # TODO: save as geoJSON rather than shp
    gdf.to_file(os.path.join(out_dir, 'selected_conflicts.shp'), crs='EPSG:4326')
    polygon_gdf.to_file(os.path.join(out_dir, 'selected_polygons.shp'), crs='EPSG:4326')

    return gdf, polygon_gdf, global_df