[go: nahoru, domu]

Skip to content

Commit

Permalink
Add code for spliting training and test files
Browse files Browse the repository at this point in the history
  • Loading branch information
slowmoyang authored and watson-ij committed Dec 11, 2017
1 parent 6553154 commit 37941c9
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 0 deletions.
89 changes: 89 additions & 0 deletions SJ-JetImage/step1_selection_and_labeling.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
TString AttachLabel(TString input_path,
TString output_dir){
// Input
TFile* input_file = new TFile(input_path, "READ");
TTree* input_tree = (TTree*) input_file->Get("jetAnalyser");

TString kInputName = gSystem->BaseName(input_path);
int input_entries = input_tree->GetEntries();

bool kIsQQ = kInputName.Contains("qq");
bool kIsGG = kInputName.Contains("gg");
bool kIsZQ = kInputName.Contains("zq");

bool kIsQuarkJets = kIsQQ or kIsZQ;
bool kIsDijet = kIsQQ or kIsGG;

TString criteria_name = kIsDijet ? "balanced" : "passed";
bool criteria
input_tree->SetBranchAddress(criteria_name, &criteria);

// Output
TString output_name = input_name.ReplaceAll("default", criteria_name);
TString output_path = gSystem->ConcatFileName(output_dir, output_name);

TFile* output_file = new TFile(output_path, "RECREATE");
TTree* output_tree = input_tree->CloneTree(0);
output_tree->SetDirectory(output_file);

int label[2];
if(kIsQuarkJets){
label[0] = 1;
label[1] = 0;
}
else{
label[0] = 0;
label[1] = 1;
}

output_tree->Branch("label", &label, "label[2]/I");

for(int i=0; i < input_entries; i++){
input_tree->GetEntry(i);
if(not criteria) continue;

output_tree->Fill();
}

output_file->Write();
output_file->Close();

input_file->Close();

return output_path;
}



std::vector<TString> GetPaths(TString data_dir,
int num_files=20){
TString fmt = TString::Format(
"./%s/mg5_pp_%s_default_pt_50_100_%d.root",
data_dir, "%s", "%d");

std::vector< TString > paths;
for(int i=1; i <= num_files; i++){
for(auto partons : {"qq", "gg", "zq", "zg"}){
TString qq_path = TString::Format(qq_fmt, i);
paths.push_back(path);
}
}

return paths;
}



void macro(){

TString output_dir = "";

if(gSystem->AccessPathName(output_dir)){
gSystem->mkdir(output_dir);
}

std::vector<TString> paths = GetPaths(20);
for(auto input_path : paths){
AttachLabel(input_path, output_dir);
}
}
46 changes: 46 additions & 0 deletions SJ-JetImage/step2_first_merge.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
void merge_dijet(){
TString fmt = "./step1_labeling/mg5_pp_%s_balanced_pt_100_500_%d.root";
TString out_fmt = "./step2_first_merge/dijet_%d.root";

for(int i=1; i<=20; i++){
TChain mychain("jetAnalyser");

TString qq_path = TString::Format(fmt, "qq", i);
TString gg_path = TString::Format(fmt, "gg", i);
TString out_path = TString::Format(out_fmt, i);

mychain.Add(qq_path);
mychain.Add(gg_path);
mychain.Merge(out_path);

cout << out_path << endl;
}
}


void merge_z_jet(){
TString fmt = "./step1_labeling/mg5_pp_%s_passed_pt_100_500_%d.root";
TString out_fmt = "./step2_first_merge/z_jet_%d.root";

for(int i=1; i<=50; i++){
TChain mychain("jetAnalyser");

TString qq_path = TString::Format(fmt, "zq", i);
TString gg_path = TString::Format(fmt, "zg", i);
TString out_path = TString::Format(out_fmt, i);

mychain.Add(qq_path);
mychain.Add(gg_path);
mychain.Merge(out_path);

cout << out_path << endl;
}
}


void macro(){

merge("dijet", 20);
merge("zjet", 20);

}
56 changes: 56 additions & 0 deletions SJ-JetImage/step3_random_shuffle.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
TString shuffle_tree(TString input_path, TString output_dir){
TFile* input_file = new TFile(input_path, "READ");
TTree* input_tree = (TTree*) input_file->Get("jetAnalyser");
Int_t input_entries = input_tree->GetEntries();

TString output_filename = gSystem->BaseName(input_path);
TString output_path = gSystem->ConcatFileName(output_dir, output_filename);

TFile* output_file = new TFile(output_path, "RECREATE");
TTree* output_tree = input_tree->CloneTree(0);
output_tree->SetDirectory(output_file);

int order[input_entries];
for(unsigned int i=0; i<input_entries; i++){
order[i] = i;
}

std::random_shuffle(order, order+input_entries );

unsigned int mycount = 0;
for(unsigned int i=0; i<input_entries; i++){
if(mycount % 100 == 0){
cout << mycount << endl;
}
input_tree->GetEntry(order[i]);
output_tree->Fill();
mycount++;
}
output_file->Write();
output_file->Close();

input_file->Close();

return output_path;
}


void shuffle_dijet(TString which, int num_files){
TString fmt = TString::Format("./step2_first_merge/%s_%s.root", which.Data(), "%d");
for(int i=1; i <= num_files; i++){
TString input_path = TString::Format(fmt, i);
cout << "In[" << i << "]: " << input_path << endl;

TString output_path = shuffle_tree(input_path, "./step3_shuffle");
cout << "Out[" << i << "]: " << output_path << endl << endl;
}
}


void macro(){
if(gSystem->AccessPathName(output_dir)){
gSystem->mkdir(output_dir);
}
shuffle("dijet", 20);
shuffle("zjet", 20);
}
21 changes: 21 additions & 0 deletions SJ-JetImage/step4_second_merge.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
void merge(TString which, int num_files){
TString fmt = TString::Format("./step3_shuffle/%s_%s.root", which.Data(), "%d");
TString output_path = TString::Format("./step4_second_merge/%s.root", which.Data());

TChain mychain("jetAnalyser");
for(int i=1; i <= num_files; i++){
TString path = TString::Format(fmt, i);
mychain.Add(path);
}
mychain.Merge(output_path);
}

void macro(){

if(gSystem->AccessPathName(output_dir)){
gSystem->mkdir(output_dir);
}

merge("dijet", 20);
merge("zjet", 20);
}

0 comments on commit 37941c9

Please sign in to comment.