Skip to content
This repository has been archived by the owner on Jan 7, 2023. It is now read-only.

Commit

Permalink
Merge pull request #216 from ndawe/master
Browse files Browse the repository at this point in the history
[MRG] tree2array: enable tree caching and add cache_size argument
  • Loading branch information
ndawe committed Sep 15, 2015
2 parents c0dbd3b + 74b8887 commit 4bcc47f
Show file tree
Hide file tree
Showing 6 changed files with 2,330 additions and 2,230 deletions.
36 changes: 26 additions & 10 deletions root_numpy/_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def root2array(filenames,
stop=None,
step=None,
include_weight=False,
weight_name='weight'):
weight_name='weight',
cache_size=-1):
"""Convert trees in ROOT files into a numpy structured array.
Refer to the type conversion table :ref:`here <conversion_table>`.
Expand Down Expand Up @@ -130,6 +131,10 @@ def root2array(filenames,
Include a column containing the tree weight.
weight_name : str, optional (default='weight')
The field name for the weight column if ``include_weight=True``.
cache_size : int, optional (default=-1)
Set the size (in bytes) of the TTreeCache used while reading a TTree. A
value of -1 uses ROOT's default cache size. A value of 0 disables the
cache.
Examples
--------
Expand Down Expand Up @@ -197,12 +202,13 @@ def root2array(filenames,
else:
flatten = False

arr = _librootnumpy.root2array_fromFname(
arr = _librootnumpy.root2array_fromfile(
filenames, treename, branches,
selection,
start, stop, step,
include_weight,
weight_name)
weight_name,
cache_size)

if flatten:
# select single column
Expand All @@ -218,7 +224,8 @@ def root2rec(filenames,
stop=None,
step=None,
include_weight=False,
weight_name='weight'):
weight_name='weight',
cache_size=-1):
"""View the result of :func:`root2array` as a record array.
Notes
Expand All @@ -236,7 +243,8 @@ def root2rec(filenames,
branches, selection,
start, stop, step,
include_weight,
weight_name).view(np.recarray)
weight_name,
cache_size).view(np.recarray)


def tree2array(tree,
Expand All @@ -246,7 +254,8 @@ def tree2array(tree,
stop=None,
step=None,
include_weight=False,
weight_name='weight'):
weight_name='weight',
cache_size=-1):
"""Convert a tree into a numpy structured array.
Refer to the type conversion table :ref:`here <conversion_table>`.
Expand All @@ -271,6 +280,10 @@ def tree2array(tree,
Include a column containing the tree weight.
weight_name : str, optional (default='weight')
The field name for the weight column if ``include_weight=True``.
cache_size : int, optional (default=-1)
Set the size (in bytes) of the TTreeCache used while reading a TTree. A
value of -1 uses ROOT's default cache size. A value of 0 disables the
cache.
See Also
--------
Expand All @@ -289,11 +302,12 @@ def tree2array(tree,
else:
flatten = False

arr = _librootnumpy.root2array_fromCObj(
arr = _librootnumpy.root2array_fromtree(
cobj, branches, selection,
start, stop, step,
include_weight,
weight_name)
weight_name,
cache_size)

if flatten:
# select single column
Expand All @@ -308,7 +322,8 @@ def tree2rec(tree,
stop=None,
step=None,
include_weight=False,
weight_name='weight'):
weight_name='weight',
cache_size=-1):
"""View the result of :func:`tree2array` as a record array.
Notes
Expand All @@ -329,7 +344,8 @@ def tree2rec(tree,
stop=stop,
step=step,
include_weight=include_weight,
weight_name=weight_name).view(np.recarray)
weight_name=weight_name,
cache_size=cache_size).view(np.recarray)


def array2tree(arr, name='tree', tree=None):
Expand Down
9 changes: 4 additions & 5 deletions root_numpy/src/Column.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ class FormulaColumn: public Column

void* GetValuePointer()
{
// required, as in TTreePlayer
formula->GetNdata();
formula->GetNdata(); // required, as in TTreePlayer
value[0] = formula->EvalInstance(0);
return value;
}
Expand Down Expand Up @@ -97,13 +96,13 @@ class BranchColumn: public Column

int GetLen()
{
// get len of this block (in unit of element)
// Get length of this block (number of elements)
return leaf->GetLen();
}

int GetCountLen()
{
// get count leaf value
// Get count leaf value
TLeaf* count_leaf = leaf->GetLeafCount();
if (count_leaf != NULL)
{
Expand All @@ -114,7 +113,7 @@ class BranchColumn: public Column

int GetSize()
{
// get size of this block in bytes
// Get size of this block in bytes
return leaf->GetLenType() * leaf->GetLen();
}

Expand Down
108 changes: 67 additions & 41 deletions root_numpy/src/TreeChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,13 @@
#include <TBranch.h>
#include <TLeaf.h>
#include <TTreeFormula.h>
#include <TTreeCache.h>

#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdlib>
#include <cstdio>
#include <cassert>
#include <string>
#include <map>
#include <vector>
#include <set>
#include <utility>

#include "Column.h"
#include "util.h"
Expand Down Expand Up @@ -47,20 +43,21 @@ class TreeChain
{
public:

TreeChain(TTree* fChain):
fChain(fChain),
ientry(0)
TreeChain(TTree* tree, long long cache_size):
tree(tree),
itree(-1),
ientry(0),
cache_size(cache_size)
{
fCurrent = -1;
notifier = new MiniNotify(fChain->GetNotify());
fChain->SetNotify(notifier);
notifier = new MiniNotify(tree->GetNotify());
tree->SetNotify(notifier);
}

~TreeChain()
{
fChain->SetNotify(notifier->oldnotify);
tree->SetNotify(notifier->oldnotify);

// Delete TTreeFormula
// Delete all TTreeFormula
std::vector<TTreeFormula*>::iterator fit;
for (fit = formulae.begin(); fit != formulae.end(); ++fit)
{
Expand All @@ -78,22 +75,21 @@ class TreeChain
// Enable all branches since we don't know yet which branches are
// required by the formulae. The branches must be activated when a
// TTreeFormula is initially created.
fChain->SetBranchStatus("*", true);
//fChain->SetCacheSize(10000000);
tree->SetBranchStatus("*", true);
}
return (int)load;
}

long long LoadTree(long long entry)
{
long long load = fChain->LoadTree(entry);
long long load = tree->LoadTree(entry);
if (load < 0)
{
return load;
}
if (fChain->GetTreeNumber() != fCurrent)
if (tree->GetTreeNumber() != itree)
{
fCurrent = fChain->GetTreeNumber();
itree = tree->GetTreeNumber();
}
if (notifier->notified)
{
Expand All @@ -111,53 +107,81 @@ class TreeChain

void InitBranches()
{
// The branches must be activated when a TTreeFormula is initially created.
// Prepare() must be called before InitBranches()
TFile* file = NULL;
TTreeCache* cache = NULL;
TBranch* branch;
TLeaf* leaf;
std::string bname, lname;
LeafCache::iterator it;

// Only the required branches will be added to the cache below
fChain->DropBranchFromCache("*", true);
// Set the cache size. This should create a new cache if one does not
// already exist.
tree->SetCacheSize(cache_size);

// Get and set up the cache
file = tree->GetCurrentFile();
if (file)
{
cache = dynamic_cast<TTreeCache*>(file->GetCacheRead(tree));
}
if (cache)
{
cache->ResetCache();
cache->StartLearningPhase();
}

for (it = leafcache.begin(); it != leafcache.end(); ++it)
{
bname = it->first.first;
lname = it->first.second;
branch = fChain->GetBranch(bname.c_str());
leaf = branch->FindLeaf(lname.c_str());
leaf = it->second->leaf;
branch = leaf->GetBranch();

// Make the branch active and cache it
branch->SetStatus(true);
fChain->AddBranchToCache(branch, true);
// and the length leaf as well
if (cache)
{
tree->AddBranchToCache(branch, true);
}

// ... and the length leaf as well
// TODO: Does this work if user doesn't want the length column
// in the output structure?
TLeaf* leafCount = leaf->GetLeafCount();
if (leafCount != NULL)
{
branch = leafCount->GetBranch();
branch->SetStatus(true);
fChain->AddBranchToCache(branch, true);
if (cache)
{
tree->AddBranchToCache(branch, true);
}
}
}

// Activate all branches used by the formulae
int ncodes, n;
int ncodes, icode;
std::vector<TTreeFormula*>::iterator fit;
for (fit = formulae.begin(); fit != formulae.end(); ++fit)
{
ncodes = (*fit)->GetNcodes();
for (n = 0; n < ncodes; ++n)
for (icode = 0; icode < ncodes; ++icode)
{
branch = (*fit)->GetLeaf(n)->GetBranch();
branch = (*fit)->GetLeaf(icode)->GetBranch();
// Branch may be a TObject split across multiple
// subbranches. These must be activated recursively.
activate_branch_recursive(branch);
fChain->AddBranchToCache(branch, true);
if (cache)
{
tree->AddBranchToCache(branch, true);
}
}
}
if (cache)
{
// Stop the cache learning phase since we have included a fixed set
// of branches
cache->StopLearningPhase();
}
}

int GetEntry(long long entry)
Expand Down Expand Up @@ -234,18 +258,19 @@ class TreeChain
{
bname = it->first.first;
lname = it->first.second;
branch = fChain->FindBranch(bname.c_str());
branch = tree->FindBranch(bname.c_str());
if (branch == NULL)
{
std::cerr << "WARNING: cannot find branch " << bname
<< std::endl;
<< std::endl;
continue;
}
leaf = branch->FindLeaf(lname.c_str());
if (leaf == NULL)
{
std::cerr << "WARNING: cannot find leaf " << lname
<< " for branch " << bname << std::endl;
<< " for branch " << bname
<< std::endl;
continue;
}
it->second->SetLeaf(leaf, true);
Expand Down Expand Up @@ -273,7 +298,7 @@ class TreeChain
const std::string& leaf_name,
BranchColumn* column)
{
BL bl = make_pair(branch_name, leaf_name);
BranchLeaf bl = make_pair(branch_name, leaf_name);
leafcache.insert(make_pair(bl, column));
}

Expand All @@ -299,15 +324,16 @@ class TreeChain
TObject* oldnotify;
};

TTree* fChain;
int fCurrent;
TTree* tree;
int itree;
long long ientry;
long long cache_size;
MiniNotify* notifier;
std::vector<TTreeFormula*> formulae;

// Branch name to leaf name association
typedef std::pair<std::string, std::string> BL;
typedef std::map<BL, BranchColumn*> LeafCache;
typedef std::pair<std::string, std::string> BranchLeaf;
typedef std::map<BranchLeaf, BranchColumn*> LeafCache;

// Column pointer cache to update leaves
// when new file is loaded in the chain
Expand Down
Loading

0 comments on commit 4bcc47f

Please sign in to comment.