Source code for snipar.ld

from numba import njit, prange
import numpy as np
from snipar.utilities import make_id_dict
from snipar.map import map_from_bed
from pysnptools.snpreader import Bed

#### Compute LD-scores ####
[docs]@njit(parallel=True) def compute_ld_scores(gts,map,max_dist = 1): ldscores = np.zeros((gts.shape[1]),dtype=np.float64) for i in prange(gts.shape[1]): ldscore_i = 1 if i>0: j = i-1 dist = map[i]-map[j] while dist < max_dist and j>=0: ldscore_i += r2_est(gts[...,i],gts[...,j]) j -= 1 if j>=0: dist = map[i]-map[j] if i<(gts.shape[1]-1): j = i + 1 dist = map[j] - map[i] while dist < max_dist and j < gts.shape[1]: ldscore_i += r2_est(gts[..., i], gts[..., j]) j += 1 if j < gts.shape[1]: dist = map[j] - map[i] ldscores[i] = ldscore_i return ldscores
## Unbiased estimator of R^2 between SNPs
[docs]@njit def r2_est(g1,g2): not_nan = np.logical_not(np.logical_or(np.isnan(g1), np.isnan(g2))) r2 = np.power(np.corrcoef(g1[not_nan],g2[not_nan])[0,1],2) return r2-(1-r2)/(np.sum(not_nan)-2)
[docs]def ldscores_from_bed(bedfile, chrom, ld_wind, ld_out = None): # Get map map_snps, map = map_from_bed(bedfile, chrom) not_na = ~np.isnan(map) map = map[not_na] map_snps = map_snps[not_na] map_snp_dict = make_id_dict(map_snps) # Read genotypes print('Reading genotypes') bed = Bed(bedfile, count_A1 = True) bed_in_map = np.array([x in map_snp_dict for x in bed.sid]) gts = bed[:,bed_in_map].read().val sid = bed.sid[bed_in_map] bim = bed.pos[bed_in_map,:] chrom = np.array(bim[:,0],dtype=int).reshape((sid.shape[0],1)) pos = np.array(bim[:,2],dtype=int).reshape((sid.shape[0],1)) map = map[np.array([map_snp_dict[x] for x in sid])] print('Computing LD scores') ldscores = compute_ld_scores(gts,map,ld_wind) if ld_out is not None: ld_outfile = ld_out+str(chrom[0])+'.l2.ldscore.gz' print('Writing LD-scores to '+ld_outfile) ld_outarray = np.hstack((chrom, sid.reshape((sid.shape[0],1)),pos,ldscores.reshape((sid.shape[0],1)))) ld_outarray = np.vstack((np.array(['CHR', 'SNP', 'BP', 'L2']).reshape((1,4)), ld_outarray)) np.savetxt(ld_outfile, ld_outarray, fmt='%s') return ldscores, sid