-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathskimFile.py
121 lines (106 loc) · 3.6 KB
/
skimFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
###Tony: split a single tree into n paterners
import ROOT, rootlogon, helpers
import argparse, copy, glob, os, sys, time
#for parallel processing!
import multiprocessing as mp
import config as CONF
#import tree configuration
ROOT.gROOT.SetBatch(True)
'''this file is used to skim the large MiniNtuple
to events only passing boosted selection!!!
Will not be suited even for semi-leptonic ttbar studies.
Be careful.
Also, current version only support 1 tree, but not the others.
May have some problems for systematics.
Also be extra careful.
'''
#define functions
def options():
parser = argparse.ArgumentParser()
parser.add_argument("--file", default="16_13TeV")
return parser.parse_args()
def selection(config):
##load the target tree
##only skim the MiniNtuple for now!!!
f = ROOT.TFile(config["file"], "read")
t = f.Get("XhhMiniNtuple")
cutflow_weight = f.Get("cutflow_weighted_XhhMiniNtuple").Clone()
cutflow = f.Get("cutflow_XhhMiniNtuple").Clone()
Metadata = f.Get("MetaData_EventCount_XhhMiniNtuple").Clone()
outfile = ROOT.TFile(config["file"] + "_skim", "recreate")
print "skimming: ", config["file"]
#outtree = t.CopyTree("hcand_boosted_n >= 2")
outtree = t.CloneTree(0)
##open and copy
nentries = t.GetEntries()
for n in range(nentries):
t.GetEntry(n)
#add cuts for skimming...so simple...implicit this is a 250, 350 cut
if n % 20000 == 0:
helpers.drawProgressBar(n/(nentries*1.0))
if t.hcand_boosted_n >= 2:
outtree.Fill()
else:
pass
#print n%nfiles
print "skimming done! ", config["file"]
#save the output
outfile.cd()
outtree.Write()
cutflow_weight.Write()
Metadata.Write()
cutflow.Write()
del(t)
del(outtree)
del(cutflow_weight)
del(cutflow)
del(Metadata)
f.Close()
outfile.Close()
return
def skim(targetpath=""):
start_time = time.time()
ops = options()
inputpath = targetpath
outputpath = targetpath
helpers.checkpath(outputpath)
print targetpath
#setup files
files = glob.glob(targetpath + "*.MiniNTuple.root")
config = []
#setup the dictionary
for file in files:
#print file
temp_dic = {}
temp_dic["file"] = file
#add skimming selection now
if ops.file not in file:
continue
#only do skimming once for now!
if not os.path.isfile(temp_dic["file"] + "_skim"):
config.append(temp_dic)
print config
print " Running %s jobs on %s cores" % (len(config), mp.cpu_count()-1)
npool = min(len(config), mp.cpu_count()-1)
pool = mp.Pool(npool)
pool.map(selection, config)
##for debugging
#selection(config[0])
# for conf in config:
# print conf
# selection(conf)
print("--- %s seconds ---" % (time.time() - start_time))
print "Finish!"
def main():
print "make sure you mount eos!"
eospath = CONF.toppath + "/eos/atlas/user/b/btong/bb/"
#skim(targetpath=eospath + "data/vBT-01-00/gridOutput/MiniNTuple/")
#eospath = CONF.toppath + "/eos/atlas/user/g/gputnam/bb/"
#skim(targetpath=eospath + "data/v02-00-00/gridOutput/MiniNTuple/")
#skim(targetpath=eospath + "mc/v02-00-00/gridOutput/MiniNTuple/")
#skim(targetpath=eospath + "mc/v02-02-03/gridOutput/rawDownload/user.btong.mc15_13TeV.410000.PowhegPythiaEvtGen_P2012_ttbar_hdamp172p5_nonallhad.hh4b-02-02-03_MiniNTuple.root/")
#skim(targetpath="../test_mini/data-MiniNTuple/")
#split(targetpath="signal_QCD")
#def clearbranches():
if __name__ == "__main__":
main()