In the generalized model, the relative occurrence of bidirectional connections \(\varrho\) depends on the width \(\sigma\) of the modulating normal distribution.

It is

\begin{align} \varrho = \frac{\operatorname{\mathbf{E}}(P_{ij} P_{ji})}{\mu^2}, \end{align}

where \(\operatorname{\mathbf{E}}(P_{ij} P_{ji})\) is computed as the integral

\begin{align} \operatorname{\mathbf{E}}(P_{ij}P_{ji}) = \int_0^1 \int_0^1 xy\, f_{P_{ji} | P_{ij}}(y \mid x) f_{P_{ij}}(x) \, dx\, dy, \label{eq:dbint} \end{align}

in which the conditional density function is defined as

\begin{align} f_{P_{ji} | P_{ij}} (y \mid x) = \frac{1}{N_{\sigma}(x)} f_{P_{ji}}(y)\, \frac{1}{\sigma \sqrt{2 \pi}} \,e^{\frac{(y-x)^2}{2 \sigma^2}} \label{eq:fpijpji}, \end{align}

with normalizing factor \(N_{\sigma}(x)^{-1}\) that makes sure that \(f_{P_{ji}|P_{ij}} (y \mid x)\) integrates to one,

\begin{align} N_{\sigma}(x) = \int_0^1 f_{P_{ji}}(z)\, \frac{1}{\sigma \sqrt{2 \pi}}\, e^{\frac{(z-x)^2}{2 \sigma^2}} \,dz. \end{align}

The overall connection probability \(\mu\) is computed as

\begin{align} \mu = \frac{1}{2} \int_0^1 x f_{P_{ij}}(x)\,dx + \frac{1}{2} \int_0^1 f_{P_{ij}}(x) \int_0^1 y \,f_{P_{ji}\vert P_{ij}}(y \mid x) \,dy \, dx. \end{align}



Generating the data

We generate the data to show this relationship at the example of the gamma distribution (Figure 2) in three different ways:

Numerical integration

In the numerical integration, using scipy.integrate.quad and scipy.integrate.dblquad, the above equations are solved for the given probability density function \(f_{P_{ij}}(y)\) and with \(\sigma\).

import numpy as np

from scipy import integrate
from scipy.optimize import brentq
from scipy.stats import norm

from gamma_functions import K, fT



class Gamma_network(object):

    def __init__(self, a, mu):

        self.a = a
        self.mu = mu

        self.b = self.get_b()

        self.sigma = 1.


    def get_b(self):

        root_f = lambda b: integrate.quad(
            lambda x: x*fT(x,self.a,b),0,1)[0] - self.mu

        return brentq(root_f, 0.5*self.mu/self.a, 5*self.mu/self.a)


    def xyfxfye2(self,x,y):
        numer =  x*y*fT(x,self.a,self.b)*fT(y,self.a,self.b)\
                 *norm.pdf(y,loc=x, scale=self.sigma) 
        denom =  integrate.quad(lambda y: fT(y,self.a,self.b)\
                                * norm.pdf(y,loc=x, scale=self.sigma),
                                0,1)[0]
        return numer/denom


    def yfxfye2(self,x,y):
        numer =  y*fT(x,self.a,self.b)*fT(y,self.a,self.b)\
                 *norm.pdf(y,loc=x, scale=self.sigma) 
        denom =  integrate.quad(lambda y: fT(y,self.a,self.b)\
                                *norm.pdf(y,loc=x, scale=self.sigma)
                                ,0,1)[0]
        return numer/denom


    def sim(self,sigma):

        self.sigma = sigma

        numer = integrate.dblquad(self.xyfxfye2, 0, 1,
                                  lambda x: 0, lambda x: 1)[0]
        denom_x = integrate.quad(lambda x: x*fT(x,self.a,self.b), 0,1)[0]
        denom_y = integrate.dblquad(self.yfxfye2, 0, 1,
                                    lambda x: 0, lambda x: 1)[0]
        return numer, denom_x, denom_y

Then to get the data, for example for \(\alpha=1\), the following code is executed saving every data point as a dictionary item for full transparency and reproducibility.

if __name__=="__main__":

    import os, pickle

    label = "th_a1_mu01.p"

    gn = Gamma_network(1, 0.1)
    data_frame = []

    with open("data/" + label, "wb") as pfile:
        pickle.dump(data_frame, pfile)

    for sigma in np.arange(0.005,0.75005,0.005):

        numer, denom_x, denom_y  = gn.sim(sigma)
        print( sigma )
        print( "\t numer       : ", numer )
        print( "\t den_x, den_y: ", denom_x, ", ", denom_y )
        print( "\t rho         : ", numer/(denom_x*denom_y) )
        print( "\t alph, bet   : ", gn.a, ", ", gn.b )  

        data = {}
        data["alpha"] = gn.a
        data["beta"] = gn.b
        data["mu"] = gn.mu
        data["sigma"] = sigma
        data["numer"] = numer
        data["den_x"] = denom_x
        data["den_y"] = denom_y

        data_frame.append(data)

        os.rename("data/" + label, "tmp/" + label)
        with open("data/" + label, "wb") as pfile:
            pickle.dump(data_frame, pfile)

Sampling of connection probabilities

The numerical integration becomes very resource demanding for specific distributions. In these situations we connection probabilities from the random variables and compute the mean overrepresentation \(\varrho\). To define random variables with the desired distributions scipy.stats.rv_continuous is used.

import numpy as np
from scipy.stats import norm
from scipy.stats import rv_continuous
from scipy.special import gamma
import scipy.integrate as integrate
from scipy.optimize import brentq

from gamma_functions import fT,K


class Rv_Mult_Norm(rv_continuous):

    def __init__(self, rv, x, sigma):

        self.rv = rv
        self.x = x
        self.sigma = sigma

        self.norm_f = self.compute_norm_f()

        rv_continuous.__init__(self, a=0, b=1)

    def compute_norm_f(self):
        return integrate.quad(lambda y: self.rv.pdf(y)\
                              *norm.pdf(y, loc=self.x, scale=self.sigma), 
                              0, 1)[0]

    def _pdf(self, y):
        return self.rv.pdf(y)*norm.pdf(y, loc=self.x, scale=self.sigma)/self.norm_f


def sample_rv_mult_norm(x, rv, sigma):

    rv_mult_norm = Rv_Mult_Norm(rv, x, sigma)

    return rv_mult_norm.rvs()


class Trunc_Gamma_Rv(rv_continuous):

    def __init__(self, alph, bet, mu):

        self.alph = alph
        self.bet = bet
        self.mu = mu

        assert(abs(bet-self.get_b())<0.01)
        self.K_ab = self.get_K()

        rv_continuous.__init__(self, a=0, b=1)

    def get_b(self):
        root_f = lambda bet: integrate.quad(
            lambda x: x*fT(x,self.alph,bet),0,1)[0] - self.mu
        return brentq(root_f, 0.5*self.mu/self.alph, 5*self.mu/self.alph)

    def get_K(self):
        a,b = self.alph, self.bet
        int_f_0_1 = integrate.quad(
            lambda x: 1/(b**a*gamma(a))*x**(a-1)*np.exp(-x/b),0,1)
        K_ab = 1./(int_f_0_1[0])
        return K_ab

    def _pdf(self, x):
        if x < 0:
            fT_ab = 0
        elif x>1:
            fT_ab = 0
        else:
            fT_ab = self.K_ab * 1/(self.bet**self.alph*gamma(self.alph))\
                    *x**(self.alph-1)*np.exp(-x/self.bet)

        return fT_ab

Then to get the data, for example for \(\alpha=0.248\), the following code is executed saving every data point as a dictionary item for full transparency and reproducibility.

if __name__ == '__main__':

    from params.sigma_dep_network_params import *
    import os, pickle

    data_frame = []
    label = "4_alpha_0-05_sigma.p"

    with open("data/" + label, "wb") as pfile:
        pickle.dump(data_frame, pfile)

    for alph, bet in zip(alphas,betas):

        rv = Trunc_Gamma_Rv(alph, bet, mu)

        for sigma in np.arange(sig_low, sig_high, sig_step):

            for k in range(n_trials):

                print("a: ", alph, "\t sigma: ", sigma, "\t No.: ", k+1)

                data = {"alpha": alph, "beta": bet, "mu": mu, "sigma": sigma}

                xs = rv.rvs(size=n_pairs)
                ys = []
                for x in xs:
                    y = sample_rv_mult_norm(x, rv, sigma)
                    ys.append(y)

                data["xs"]=xs
                data["ys"]=np.array(ys)

                data_frame.append(data)

                os.rename("data/" + label, "tmp/" + label)
                with open("data/" + label, "wb") as pfile:
                    pickle.dump(data_frame, pfile)

Computation in generated networks

Finally we also tested this method by generating networks with such defined connection probabilities and extracted the relative overrepresentation \(\varrho\).

from __future__ import print_function

import numpy as np
from scipy.stats import norm
from scipy.stats import rv_continuous

import resource

def populate_triu(P, c_rv):
    '''
    Populates the upper triangle of P with connection probabilities
    sampled from c_rv. P_ij is the probability for a connection from
    node i to node j.  
    ----------
    P     :  matrix of connection probabiities
    c_rv  :  randomly returns connection probability
    '''

    rows, cols = np.triu_indices_from(P, k=1)

    for i,j in zip(rows,cols):
        P[i][j] = c_rv.rvs()

    assert(np.max(P)<=1)
    assert(np.min(P)>=0)

    return P


def populate_tril(P, c_rvy, params):
    '''
    Populates the lower triangle of P with connection probabilities
    sampled from c_rv, given the corr_method.
    ----------
    P           :  matrix of connection probabiities
    c_rvy      :  class for sampling correlated connection probabilities
    '''

    rows, cols = np.triu_indices_from(P, k=1)

    for i,j in zip(rows,cols):
        c_rvy.x = P[i][j]
        c_rvy.sigma = params["sigma"]
        c_rvy.norm_f = c_rvy.compute_norm_f()
        P[j][i] = c_rvy.rvs()

    assert(np.max(P)<=1)
    assert(np.min(P)>=0)

    return P


class C_rv_mult_norm(rv_continuous):
    '''
    normal modulated random variable for P_ji given P_ij=x
    and width sigma
    '''

    def __init__(self, c_rv, x, sigma):

        self.c_rv = c_rv
        self.x = x
        self.sigma = sigma

        self.norm_f = self.compute_norm_f()

        rv_continuous.__init__(self, a=0, b=1)

    def compute_norm_f(self):
        return integrate.quad(lambda y: self.c_rv.pdf(y)\
                              *norm.pdf(y, loc=self.x, scale=self.sigma),
                              0, 1)[0]

    def _pdf(self, y):
        return self.c_rv.pdf(y)*1/self.norm_f\
               * norm.pdf(y, loc=self.x, scale=self.sigma)



def connect_network(P):

    rel = np.random.uniform(size=np.shape(P))

    return (P>rel).astype(int)


def generate_network(N, c_rv, params):

    P = np.zeros((N,N))
    P = populate_triu(P, c_rv)
    c_rvy = C_rv_mult_norm(c_rv, 1, params["sigma"])
    P = populate_tril(P, c_rvy, params)
    G = connect_network(P)

    return G

def compute_overrep(G):

    N = len(G)

    U = (G+G.T)[np.triu_indices(N,1)]

    n_recip  = float(len(U[np.where(U==2)]))

    p_bar = np.sum(G)/float((N*(N-1)))                
    n_recip_bar = p_bar**2*N*(N-1)/2.

    return n_recip/n_recip_bar

For example, for a=1, we collect the data as follows

if __name__ == '__main__':

    import os, pickle

    from gamma_functions import *

    a = 1.
    mu = 0.1

    c_rv = Trunc_gamma(a, mu)

    N = 250
    trials = 5

    df = []

    label = "a_1_sig_05_set5.p"
    with open("data/" + label, "wb") as pfile:
        pickle.dump(df, pfile)

    for sig in np.arange(0.05, 0.45, 0.05):
        params = {"sigma": sig}
        for i in range(trials):
            data = {}
            G = generate_network(N, c_rv, params)
            rho = compute_overrep(G)
            data["rho"] = rho
            data["sig"] = params["sigma"]
            data["mu"]  = float(np.sum(G))/(N*(N-1))
            data["N"] = len(G)
            data["alpha"] = c_rv.alph
            data["beta"] = c_rv.bet

            df.append(data)

            print( sig )
            print( "\t rho         : ", rho )
            print( "\t mu          : ", float(np.sum(G))/(N*(N-1)))

            os.rename("data/" + label, "tmp/" + label)
            with open("data/" + label, "wb") as pfile:
                pickle.dump(df, pfile)

Plotting the figure

With the above methods we generated data and plotted the figure

where the red data points were obtained from the generated networks and were not shown in the article. The graphic was created with the following code

import matplotlib as mpl
mpl.use('Agg')
import pylab as pl

import numpy as np
from scipy.stats import sem
import pickle

from matplotlib import rc

rc('text', usetex=True)
pl.rcParams['text.latex.preamble'] = [
    r'\usepackage{tgheros}',    # helvetica font
    r'\usepackage{sansmath}',   # math-font matching helvetica
    r'\sansmath'                # actually tell tex to use it!
    r'\usepackage{siunitx}',    # micro symbols
    r'\sisetup{detect-all}',    # force siunitx to use the fonts
]  


# -----  data from numerical integration -----

# a=1
with open("data/th_a1_mu01.p", "rb") as pfile:
    df_a1 = pickle.load(pfile)

# data for sigma=0 from Figure 2    
t_sigs_a1 = [0]
t_rhos_a1 = [1.996]

t_sigs_a1 += [d["sigma"] for d in df_a1]
t_rhos_a1 += [d["numer"]/(1./4*(d["den_x"]+d["den_y"])**2) for d in df_a1]

# a=2
with open("data/th_a2_mu01.p", "rb") as pfile:
    df_a2 = pickle.load(pfile)

# data for sigma=0 from Figure 
t_sigs_a2 = [0]
t_rhos_a2 = [1.500]

t_sigs_a2 = [d["sigma"] for d in df_a2]
t_rhos_a2 = [d["numer"]/(1./4*(d["den_x"]+d["den_y"])**2) for d in df_a2]


# -----  data from sampling connection probabilities -----

with open("data/4_alpha_0-05_sigma.p", "rb") as pfile:
    df = pickle.load(pfile)

alph = 0.248
x = [d for d in df if d["alpha"]==alph]

sample_sigs = list(set([d["sigma"] for d in x]))
rhos_a0248 = []
rhos_a0248_sem = []

for sigma in sample_sigs:    
    df_sig = [d for d in x if d["sigma"]==sigma]
    rhos_sig = [np.mean(d["xs"]*d["ys"])\
                * 1/(np.mean(np.concatenate((d["xs"],d["ys"])))**2)\
                for d in df_sig]
    rhos_a0248.append(np.mean(rhos_sig))
    rhos_a0248_sem.append(sem(rhos_sig))


alph = 1.
x = [d for d in df if d["alpha"]==alph]

sample_sigs_a1 = list(set([d["sigma"] for d in x]))
rhos_a1 = []
rhos_a1_sem = []

for sigma in sample_sigs_a1:
    df_sig = [d for d in x if d["sigma"]==sigma]
    rhos_sig = [np.mean(d["xs"]*d["ys"])\
                * 1/(np.mean(np.concatenate((d["xs"],d["ys"])))**2)\
                for d in df_sig]
    rhos_a1.append(np.mean(rhos_sig))
    rhos_a1_sem.append(sem(rhos_sig))


# ----- fit for alpha=0.248 sampled data -----

xs = np.arange(0.,0.7,0.001)
ys = 1.086317 + (4.043159 - 1.086317)/(1 + (xs/0.2587529)**3.275628)


# ----- data from generated networks -----

# a = 1
with open("data/gn_a1_sig05.p", "rb") as pfile:
    df = pickle.load(pfile, encoding='latin1')

gn_a1_sample_sigs = list(set([d["sig"] for d in df]))

gn_a1_rhos = []
gn_a1_rho_sems = []
for sig in gn_a1_sample_sigs:
    df_sig  = [d for d in df if d["sig"]==sig]
    gn_a1_rhos.append(np.mean([d["rho"] for d in df_sig]))
    gn_a1_rho_sems.append(sem([d["rho"] for d in df_sig]))


# a = 2
with open("data/gn_a2_sig05.p", "rb") as pfile:
    df = pickle.load(pfile, encoding='latin1')

gn_a2_sample_sigs = list(set([d["sig"] for d in df]))

gn_a2_rhos = []
gn_a2_rho_sems = []
for sig in gn_a2_sample_sigs:
    df_sig  = [d for d in df if d["sig"]==sig]
    gn_a2_rhos.append(np.mean([d["rho"] for d in df_sig]))
    gn_a2_rho_sems.append(sem([d["rho"] for d in df_sig]))




fig, ax = pl.subplots(1,1)
fig.set_size_inches(7.5*0.5,2.3)

pl.plot(xs,ys, 'k', label=r'$\alpha=0.248$')
(_, caps, _) = pl.errorbar(sample_sigs, rhos_a0248, yerr=rhos_a0248_sem,
                           fmt="None", ecolor='k', elinewidth=1.5,)
for cap in caps:
    cap.set_markeredgewidth(0.8)

pl.plot(t_sigs_a1, t_rhos_a1, 'k', linestyle=':', label=r'$\alpha=1$')
pl.plot(t_sigs_a2, t_rhos_a2, 'k', linestyle='--', label=r'$\alpha=2$')

(_, caps, _) = pl.errorbar(gn_a1_sample_sigs, gn_a1_rhos, yerr=gn_a1_rho_sems,
                           fmt="None", ecolor = 'r', elinewidth=1.5,)
for cap in caps:
    cap.set_markeredgewidth(0.8)

(_, caps, _) = pl.errorbar(gn_a2_sample_sigs, gn_a2_rhos, yerr=gn_a2_rho_sems,
                           fmt="None", ecolor = 'r', elinewidth=1.5,)
for cap in caps:
    cap.set_markeredgewidth(0.8)

pl.xticks([0.,0.1,0.2,0.3,0.4,0.5,0.6])

ax.set_title(r'$f_{P_{ij}}(y) = f_{\alpha,\beta}^T(y)$', size=13.)
pl.xlim(0,0.6)
pl.ylim(1.,4.05)

pl.legend(prop={'size':12})

pl.xlabel(r'width $\sigma$')
pl.ylabel(r'relative occurrence $\varrho$')

pl.savefig('fig3B.pdf', dpi=600, bbox_inches='tight')