Source code for mcot.core._scripts.cifti.correlate

#!/usr/bin/env python
"""Correlates two CIFTI files"""
from nibabel import cifti2
import numpy as np
from scipy import stats
from loguru import logger
import os.path as op
import time


[docs]def correlate_arr(arr1, arr2, collapse=False): """ Computes the Pearson r correlation for two arrays :param arr1: (M, N) array for N observations of M variables :param arr2: (M, K) array for K obsevations of M variables :param collapse: assume N==K and only return (N, ) array with the r correlation :return: (N, K) array with the r correlation (or (N, ) array if collapse is set) """ arr1 = np.atleast_2d(arr1) arr2 = np.atleast_2d(arr2) if collapse: assert arr1.shape == arr2.shape demeaned1 = arr1 - arr1.mean(0) demeaned2 = arr2 - arr2.mean(0) if collapse: r_num = np.einsum('ij,ij->j', demeaned1, demeaned2) r_den = np.sqrt(np.sum(demeaned1 ** 2, 0) * np.sum(demeaned2 ** 2, 0)) else: r_num = demeaned1.T.dot(demeaned2) r_den = np.sqrt( np.sum(demeaned1 ** 2, 0)[:, None] * np.sum(demeaned2 ** 2, 0)[None, :] ) return r_num / r_den
[docs]class FakeDConn(object): """ Pretends to be a nibabel image of the dconn file """
[docs] def __init__(self, dtseries): self.dataobj = FakeDConnDataObject(dtseries)
[docs]class FakeDConnDataObject(object): """ Pretends to be a nibabel dataobject of a dconn file """
[docs] def __init__(self, dtseries): self.dtseries = dtseries
def __getitem__(self, item): slc1, slc2 = item return correlate_arr( self.dtseries.dataobj[:, slc1], self.dtseries.dataobj[:, slc2] )
[docs]def correlate(dconn1, dconn2, select_features=slice(None)): """ Correlates two memory mapped arrays :param dconn1: (M, N) nibabel Cifti2Image (M features for N greyordinates) :param dconn2: (M, N) nibabel Cifti2Image (M features for N greyordinates) :param select_features: which features to select :return: (N, ) in-memory array """ logger.info('Correlating {}'.format(select_features)) ngrey = dconn1.shape[1] step = int(np.ceil(1e8 / ngrey)) res = np.zeros(ngrey) loading_time = 0 correlate_time = 0 for idx in range(0, ngrey, step): logger.debug('Calculating correlation for grayordinates between {} and {}'.format(idx, idx + step)) start_time = time.time() if isinstance(select_features, list) or isinstance(select_features, tuple): local1 = np.concatenate([dconn1.dataobj[sf, idx:idx + step] for sf in select_features], 0) local2 = np.concatenate([dconn2.dataobj[sf, idx:idx + step] for sf in select_features], 0) else: local1 = np.array(dconn1.dataobj[select_features, idx:idx + step]) local2 = np.array(dconn2.dataobj[select_features, idx:idx + step]) mid_time = time.time() loading_time += mid_time - start_time for sub_idx in range(local1.shape[1]): res[idx + sub_idx] = stats.spearmanr(local1[:, sub_idx], local2[:, sub_idx])[0] correlate_time += time.time() - mid_time logger.info('Loading data took {} minutes'.format(loading_time / 60)) logger.info('Correlating the data took {} minutes'.format(correlate_time / 60)) return res
[docs]def run(dconn1, dconn2, split_greyordinates: cifti2.BrainModelAxis=None): """ Computes the correlation between dense connectomes :param dconn1: (M, N) nibabel Cifti2Image :param dconn2: (M, N) nibabel Cifti2Image :param split_greyordinates: defines M voxels/vertices to select sub-set of features the features :return: dictionary mapping names of target regions to (N, ) arrays """ logger.info('Computing full correlation') res = {'full': correlate(dconn1, dconn2)} if split_greyordinates is None: return res cortex = [] sub_cortical = [] left = [] right = [] for struc_name, struc_slice, struc_bm in split_greyordinates.iter_structures(): short_name = struc_name.split('_', maxsplit=2)[2] if 'CORTEX' in short_name: logger.info('Computing correlation relative to %s', short_name) res[short_name] = correlate(dconn1, dconn2, struc_slice) if struc_bm.is_surface.all(): cortex.append(struc_slice) else: sub_cortical.append(struc_slice) if 'LEFT' in short_name: left.append(struc_slice) if 'RIGHT' in short_name: right.append(struc_slice) for name in ('cortex', 'sub_cortical', 'left', 'right'): logger.info('Computing correlation relative to all %s brain structures', name) res[name] = correlate(dconn1, dconn2, locals()[name]) return res
[docs]def run_from_args(args): """ Runs the script based on a Namespace containing the command line arguments """ logger.info('starting %s', op.basename(__file__)) dconn1 = cifti2.load(args.input) axes1 = dconn1.header.get_axis(dconn1) dconn2 = cifti2.load(args.reference) axes2 = dconn2.header.get_axis(dconn2) if not (isinstance(axes1[1], cifti2.BrainModelAxis) or isinstance(axes1[1], cifti2.ParcelsAxis)): raise ValueError("Columns should have greyordinates or parcels") if axes1[1] != axes2[1]: raise ValueError("Compared CIFTI files should have the same greyordinates/parcels along the columns") if args.as_dconn: if isinstance(axes1[0], cifti2.SeriesAxis): axes1 = (axes1[1], ) * 2 dconn1 = FakeDConn(dconn1) elif isinstance(axes2[0], cifti2.SeriesAxis): axes2 = (axes2[1], ) * 2 dconn2 = FakeDConn(dconn2) if args.split_dconn and not isinstance(axes1[0], cifti2.BrainModelAxis): raise ValueError("Rows should be greyordinates when using the option --split_dconn") if axes1[0] != axes2[0]: raise ValueError("Compared CIFTI files should have the same features along the rows") as_dict = run( dconn1=dconn1, dconn2=dconn2, split_greyordinates=axes1[0] if args.split_dconn else None, ) scalar = cifti2.ScalarAxis(list(as_dict.keys())) arr = np.stack(list(as_dict.values()), -1) cifti2.Cifti2Image(arr.T, header=(scalar, axes1[1])).to_filename(args.output) logger.info('ending %s', op.basename(__file__))
[docs]def add_to_parser(parser): """ Creates the parser of the command line arguments """ parser.add_argument("-i", "--input", required=True, help="input parcellated or dense CIFTI file") parser.add_argument("-r", "--reference", required=True, help="reference dense connectome (should have the same greyordinates or parcels)") parser.add_argument("-o", "--output", required=True, help="output dscalar or pscalar file with the correlations") parser.add_argument("--split_dconn", action='store_true', help='If set splits the feature greyordinates into different groups (works for dconn or pdconn)') parser.add_argument("--as_dconn", action='store_true', help="If set treats dense timeseries files and dense connectome files " + "using the Pearson r correlation")