-
Notifications
You must be signed in to change notification settings - Fork 4
/
haplotype_info_indv.py
58 lines (40 loc) · 1.46 KB
/
haplotype_info_indv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pickle
import os
import pandas as pd
from multiprocessing import Pool
import time
def hlg(gol,pp,scode):
scg=gol.groupby(level=0).apply(lambda x:[(x.iloc[:,0]*scode).sum(),(x.iloc[:,1]*scode).sum()])
ngt=pd.DataFrame(scg.tolist(),index=gol.index.levels[0])
hbs=pd.concat([pd.DataFrame(gol.index.levels[0],index=gol.index.levels[0]),pp,ngt],axis=1)
hbs.columns=range(0,5)
if(len(hbs)==0):
return [None]*(4+nfounder)
if(hbs.shape[0]==1):
return pd.DataFrame([hbs.iloc[0,0]]+hbs.iloc[0,:].tolist()).transpose()
ml=[]
ss=hbs.iloc[0,:].copy()
ed=ss[0]
for r in range(1,hbs.shape[0]):
bon=hbs.iloc[r,:]
natf1=(pd.isnull(bon[3])&pd.isnull(ss[3]))&(bon[4]==ss[4])
natf2=(pd.isnull(bon[4])&pd.isnull(ss[4]))&(bon[3]==ss[3])
natf3=(bon[3:5]==ss[3:5]).all()
natf4=(pd.isnull(bon[3:5])&pd.isnull(ss[3:5])).all()
tf=((natf1 or natf2 or natf3 or natf4) and ((bon[1]-ss[2])<50000))
if(tf):
ss.loc[2]=bon.loc[2]
else:
ml.append([ss[0],ed]+ss[1:len(ss)].tolist())
ss=bon.copy()
ed=bon[0]
ml.append([ss[0],ed]+ss[1:len(ss)].tolist())
hif=pd.DataFrame(ml)
return hif
def dilp(key,sht,cfn,lnm,scode):
lst=[]
for nm in lnm:
roc=hlg(sht.loc[:,nm],cfn,scode=scode)
lst.append(roc)
print("finshed "+key)
return(key,lst)