Skip to content

Commit

Permalink
Merge pull request #8 from ponyisi/defer-speedup
Browse files Browse the repository at this point in the history
Speed up + improvements
  • Loading branch information
ponyisi authored Feb 7, 2022
2 parents b9a78bf + 68b8f6e commit 988dfd1
Show file tree
Hide file tree
Showing 16 changed files with 460 additions and 54 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8, 3.9, "3.10"]

steps:
- uses: actions/checkout@v2
Expand All @@ -41,12 +41,17 @@ jobs:
uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
channels: conda-forge
python-version: ${{ matrix.python-version }}
- name: Install conda dependencies
shell: bash -l {0}
run: |
conda install -c conda-forge root
conda install pytest pyyaml pytest-cov
conda install -c conda-forge root
- name: Build
shell: bash -l {0}
run: |
pip install -e .
- name: Test with pytest
shell: bash -l {0}
run: |
Expand Down
12 changes: 6 additions & 6 deletions histgrinder/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Configuration utilities
from typing import Union, IO, List, Any
from typing import Union, IO, List, Any, Mapping
import logging


class TransformationConfiguration(object):
def __init__(self, Input, Output, Function, Description, Parameters={}):
def __init__(self, Input: List[str], Output: List[str], Function: str, Description: str, Parameters: Mapping = {}):
self.input = Input
self.output = Output
self.function = Function
Expand All @@ -20,7 +20,7 @@ def __repr__(self):
def read_configuration(f: Union[str, IO]) -> List[TransformationConfiguration]:
import yaml
if isinstance(f, str):
fobj = open(f)
fobj = open(f, 'r')
else:
fobj = f

Expand All @@ -38,11 +38,11 @@ def read_configuration(f: Union[str, IO]) -> List[TransformationConfiguration]:

def lookup_name(name: str) -> Any:
import importlib
name = name.rsplit('.', 1)
return getattr(importlib.import_module(name[0]), name[1])
spname = name.rsplit('.', 1)
return getattr(importlib.import_module(spname[0]), spname[1])


if __name__ == "__main__":
if __name__ == "__main__": # pragma: no cover
import sys
for _ in read_configuration(sys.argv[1]):
print(_)
27 changes: 21 additions & 6 deletions histgrinder/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def go():
choices=['DEBUG', 'INFO', 'WARNING',
'ERROR', 'CRITICAL'],
default='INFO')
parser.add_argument('--defer', action='store_true', help='Defer processing of histograms until end of input loop')
parser.add_argument('--delaywrite', action='store_true', help='Write histograms at once at end of job')
args = parser.parse_args()

logging.basicConfig(level=args.loglevel,
Expand All @@ -52,7 +54,7 @@ def go():

# Configure output
om = lookup_name(args.outmodule)()
out_configuration = {'target': args.target}
out_configuration = {'target': args.target, 'delay': args.delaywrite}
if args.prefix:
out_configuration['prefix'] = args.prefix
om.configure(out_configuration)
Expand All @@ -61,18 +63,31 @@ def go():
log.info("Warmup")
for obj in im.warmup():
for _ in transformers:
v = _.consider(obj)
_.consider(obj)

# Event loop
log.info("Beginning loop")
eventloop(im, om, transformers, args, log)
log.info("Complete")


def eventloop(im, om, transformers, args, log):
# Event loop
for obj in im:
for _ in transformers:
v = _.consider(obj)
v = _.consider(obj, defer=args.defer)
if v:
om.publish(v)
if args.defer:
log.info("Processing deferred results")
for _ in transformers:
lv = _.transform()
for v in lv:
om.publish(v)

log.info("Complete")
if args.delaywrite:
log.info("Finalizing output")
om.finalize()


if __name__ == '__main__':
if __name__ == '__main__': # pragma: no cover
go()
8 changes: 3 additions & 5 deletions histgrinder/io/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def iterate(self, dryrun) -> Generator[HistObject, None, None]:
import os.path
from collections import deque
log = logging.getLogger(__name__)
infile = ROOT.TFile.Open(self.source)
if not infile:
raise ValueError(f"Unable to open input file {self.source}")
infile = ROOT.TFile.Open(self.source) # failure to open will raise OSError
dirqueue = deque([''])
while dirqueue:
dirname = dirqueue.popleft()
Expand Down Expand Up @@ -111,7 +109,7 @@ def configure(self, options: Mapping[str, Any]) -> None:
"""
if 'target' not in options:
raise ValueError("Must specify 'target' as an option "
"to ROOTInputModule")
"to ROOTOutputModule")
self.target = options['target']
self.overwrite = bool(options.get('overwrite', True))
self.prefix = options.get('prefix', '/')
Expand Down Expand Up @@ -158,7 +156,7 @@ def finalize(self) -> None:
self._write()


if __name__ == '__main__': # pragma: no test
if __name__ == '__main__': # pragma: no cover
import sys
if len(sys.argv) != 3:
print("Provide two arguments (input and output files)")
Expand Down
15 changes: 14 additions & 1 deletion histgrinder/make_sample_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
if __name__ == '__main__': # pragma: no test
if __name__ == '__main__': # pragma: no cover
import ROOT
import random
import array
random.seed(42)

f = ROOT.TFile.Open('example.root', 'RECREATE')
Expand All @@ -11,4 +12,16 @@
for j in range(1000):
h.Fill(random.gauss(0, 1))
h.Write()

# make a graph
x = array.array('f', [1])
y = array.array('f', [1])
g = ROOT.TGraph(1, x, y)
g.Write('graph')

# make a tree
t = ROOT.TTree('tree', 'tree')
t.Branch('x', x, 'x/F')
t.Fill()
t.Write()
f.Close()
57 changes: 36 additions & 21 deletions histgrinder/transform.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from .config import TransformationConfiguration, lookup_name
from .HistObject import HistObject
from typing import Optional, List
from typing import DefaultDict, Optional, List, Tuple, Match, Dict
import re


class Transformer(object):
def __init__(self, tc: TransformationConfiguration):
import re
import string
self.tc = tc
self.matchqueue = set()
# the number of histograms needed for a match
self.inlength = len(tc.input)
# regexes of input
Expand All @@ -26,7 +27,7 @@ def __init__(self, tc: TransformationConfiguration):
for output in self.tc.output])

# one dictionary for each input slot
self.hits = [{} for _ in range(len(self.inregexes))]
self.hits: List[Dict[Tuple[str], HistObject]] = [{} for _ in range(len(self.inregexes))]
try:
self.transform_function = lookup_name(tc.function)
if not callable(self.transform_function):
Expand All @@ -35,7 +36,7 @@ def __init__(self, tc: TransformationConfiguration):
except Exception as e:
raise ValueError(f"Unable to instantiate transformer because: {e}")

def consider(self, obj: HistObject, dryrun: bool = False) -> Optional[List[HistObject]]:
def consider(self, obj: HistObject, defer: bool = False) -> Optional[List[HistObject]]:
""" Emit a new plot if we get a full match, otherwise None """
import logging
log = logging.getLogger(__name__)
Expand All @@ -48,27 +49,41 @@ def consider(self, obj: HistObject, dryrun: bool = False) -> Optional[List[HistO
match = imatch
if match is None:
return None
# Return value list
rv = []

# Given a match, what first position histograms are relevant?
firstmatches = self._getMatchingFirstHists(match)

# Group the first position matches by integration variables
groupedfirstmatches = self._groupMatches(firstmatches)
self.matchqueue.add(match)
if defer:
return None
return self.transform()

# construct iterables & call functions
for tuplist in groupedfirstmatches.values():
hci = HistCombinationIterable(self, tuplist)
if _fullyvalid(hci):
olist = self.transform_function(hci, **self.tc.parameters)
for i, ohist in enumerate(olist):
oname = self.tc.output[i].format(**dict(zip(self.regextupnames[0], tuplist[0])))
rv.append(HistObject(oname, ohist))
def transform(self) -> List[HistObject]:
# Return value list
rv = []

firstmatchset = set()
for match in self.matchqueue:
# Given a match, what first position histograms are relevant?
firstmatches = self._getMatchingFirstHists(match)
firstmatchset.add(tuple(firstmatches))

for firstmatches in firstmatchset:
# Group the first position matches by integration variables
groupedfirstmatches = self._groupMatches(firstmatches)

# construct iterables & call functions
for tuplist in groupedfirstmatches.values():
hci = HistCombinationIterable(self, tuplist)
if _fullyvalid(hci):
olist = self.transform_function(hci, **self.tc.parameters)
if len(olist) != len(self.tc.output):
raise ValueError(f'Function {self.tc.function} gave {len(olist)} return values '
f'but the YAML configuration specifies {len(self.tc.output)}.')
for i, ohist in enumerate(olist):
oname = self.tc.output[i].format(**dict(zip(self.regextupnames[0], tuplist[0])))
rv.append(HistObject(oname, ohist))
self.matchqueue.clear()
return rv

def _getMatchingFirstHists(self, match):
def _getMatchingFirstHists(self, match: Match) -> List[Tuple[str]]:
firstmatches = []
for tup in self.hits[0]:
# does the tuple match in all spots where the pattern name matches, and
Expand All @@ -80,7 +95,7 @@ def _getMatchingFirstHists(self, match):
firstmatches.append(tup)
return firstmatches

def _groupMatches(self, firstmatches):
def _groupMatches(self, firstmatches: List[Tuple[str]]) -> DefaultDict[Tuple[str], List[Tuple[str]]]:
# group matches by self.outputnames
import collections
rv = collections.defaultdict(list)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
],
python_requires='>=3.6',
install_requires=['PyYAML>=5'],
tests_requires=['pytest','pytest-cov'],
tests_requires=['pytest', 'pytest-cov'],
scripts=['bin/histgrinder']
)
6 changes: 6 additions & 0 deletions tests/test_badconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
# Configuration has typo
Inputq: [ 'gaussians/gaus_(?P<id0>[23])(?P<id>\d)', 'gaussians/gaus_5(?P<id>\d)' ]
Output: [ 'gauDiv_{id0}{id}' ]
Function: math.pi
Description: Testing1
6 changes: 6 additions & 0 deletions tests/test_badfunction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
# Function is not a function
Input: [ 'gaussians/gaus_(?P<id0>[23])(?P<id>\d)', 'gaussians/gaus_5(?P<id>\d)' ]
Output: [ 'gauDiv_{id0}{id}' ]
Function: math.pi
Description: Testing1
6 changes: 6 additions & 0 deletions tests/test_badpattern.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
# Bad order
Input: [ 'gaussians/gaus_5(?P<id>\d)', 'gaussians/gaus_(?P<id0>[23])(?P<id>\d)' ]
Output: [ 'gauDiv_{id0}{id}' ]
Function: histgrinder.example.transform_function_divide_ROOT
Description: Testing1
6 changes: 6 additions & 0 deletions tests/test_badreturn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
# Function has wrong number of return arguments
Input: [ 'gaussians/gaus_(?P<id0>[23])(?P<id>\d)', 'gaussians/gaus_5(?P<id>\d)' ]
Output: [ 'gauDiv_{id0}{id}' ]
Function: histgrinder.example.nop
Description: Testing1
6 changes: 6 additions & 0 deletions tests/test_badtype.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
# Function returns a type not handled by histgrinder
Input: [ 'gaussians/gaus_(?P<id0>[23])(?P<id>\d)', 'gaussians/gaus_5(?P<id>\d)' ]
Output: [ 'gauDiv_{id0}{id}' ]
Function: tests.test_run.returncppstr
Description: Testing1
49 changes: 49 additions & 0 deletions tests/test_functional.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Separate blocks with ---
---
# This function takes the ratio of some histograms
Input: [ 'gaussians/gaus_(?P<id0>[23])(?P<id>\d)', 'gaussians/gaus_5(?P<id>\d)' ]
Output: [ 'gauDiv_{id0}{id}' ]
Function: histgrinder.example.transform_function_divide_ROOT
#Here is how you would pass parameters
#Parameters: { variable: eta, binedges: [-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5] }
Description: Testing1
---
# This function sums the ratio of some histograms
Input: [ 'gaussians/gaus_(?P<id0>\d)(?P<id>\d)', 'gaussians/gaus_(?P<id0>\d)(?P<id>\d)' ]
Output: [ 'gauDiv2_{id0}' ]
Function: histgrinder.example.transform_function_divide2_ROOT
#Here is how you would pass parameters
#Parameters: { variable: eta, binedges: [-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5] }
Description: Testing2
---
# This function combines multiple histograms into one
Input: [ 'gaussians/gaus_(?P<id>\d+)' ]
Output: [ 'gauRMS' ]
Function: histgrinder.example.transform_function_rms_ROOT
#Here is how you would pass parameters
#Parameters: { variable: eta, binedges: [-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5] }
Description: Testing3
---
# This function takes the ratio of one pair of histograms
Input: [ 'gaussians/gaus_72', 'gaussians/gaus_18' ]
Output: [ 'gauDivSpecial' ]
Function: histgrinder.example.transform_function_divide_ROOT
#Here is how you would pass parameters
#Parameters: { variable: eta, binedges: [-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5] }
Description: Testing4
---
# This function takes the ratio of one pair of histograms, but reversed from above
Input: [ 'gaussians/gaus_18', 'gaussians/gaus_72' ]
Output: [ 'gauDivSpecialInv' ]
Function: histgrinder.example.transform_function_divide_ROOT
#Here is how you would pass parameters
#Parameters: { variable: eta, binedges: [-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5] }
Description: Testing5
---
# This function does nothing
Input: [ 'gaussians/graph' ]
Output: [ ]
Function: histgrinder.example.nop
#Here is how you would pass parameters
#Parameters: { variable: eta, binedges: [-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5] }
Description: Testing5
Loading

0 comments on commit 988dfd1

Please sign in to comment.