Package madgraph :: Package madevent :: Module gen_ximprove
[hide private]
[frames] | no frames]

Source Code for Module madgraph.madevent.gen_ximprove

   1  ################################################################################ 
   2  # 
   3  # Copyright (c) 2014 The MadGraph5_aMC@NLO Development team and Contributors 
   4  # 
   5  # This file is a part of the MadGraph5_aMC@NLO project, an application which  
   6  # automatically generates Feynman diagrams and matrix elements for arbitrary 
   7  # high-energy processes in the Standard Model and beyond. 
   8  # 
   9  # It is subject to the MadGraph5_aMC@NLO license which should accompany this  
  10  # distribution. 
  11  # 
  12  # For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch 
  13  # 
  14  ################################################################################ 
  15  """ A python file to replace the fortran script gen_ximprove. 
  16      This script analyses the result of the survey/ previous refine and  
  17      creates the jobs for the following script. 
  18  """ 
  19  from __future__ import division 
  20   
  21  from __future__ import absolute_import 
  22  import collections 
  23  import os 
  24  import glob 
  25  import logging 
  26  import math 
  27  import re 
  28  import subprocess 
  29  import shutil 
  30  import stat 
  31  import sys 
  32  from six.moves import range 
  33  from six.moves import zip 
  34   
  35  try: 
  36      import madgraph 
  37  except ImportError: 
  38      MADEVENT = True 
  39      import internal.sum_html as sum_html 
  40      import internal.banner as bannermod 
  41      import internal.misc as misc 
  42      import internal.files as files 
  43      import internal.cluster as cluster 
  44      import internal.combine_grid as combine_grid 
  45      import internal.combine_runs as combine_runs 
  46      import internal.lhe_parser as lhe_parser 
  47  else: 
  48      MADEVENT= False 
  49      import madgraph.madevent.sum_html as sum_html 
  50      import madgraph.various.banner as bannermod 
  51      import madgraph.various.misc as misc 
  52      import madgraph.iolibs.files as files 
  53      import madgraph.various.cluster as cluster 
  54      import madgraph.madevent.combine_grid as combine_grid 
  55      import madgraph.madevent.combine_runs as combine_runs 
  56      import madgraph.various.lhe_parser as lhe_parser 
  57   
  58  logger = logging.getLogger('madgraph.madevent.gen_ximprove') 
  59  pjoin = os.path.join 
60 61 -class gensym(object):
62 """a class to call the fortran gensym executable and handle it's output 63 in order to create the various job that are needed for the survey""" 64 65 #convenient shortcut for the formatting of variable 66 @ staticmethod
67 - def format_variable(*args):
68 return bannermod.ConfigFile.format_variable(*args)
69 70 combining_job = 2 # number of channel by ajob 71 splitted_grid = False 72 min_iterations = 3 73 mode= "survey" 74 75
76 - def __init__(self, cmd, opt=None):
77 78 try: 79 super(gensym, self).__init__(cmd, opt) 80 except TypeError: 81 pass 82 83 # Run statistics, a dictionary of RunStatistics(), with 84 self.run_statistics = {} 85 86 self.cmd = cmd 87 self.run_card = cmd.run_card 88 self.me_dir = cmd.me_dir 89 90 91 # dictionary to keep track of the precision when combining iteration 92 self.cross = collections.defaultdict(int) 93 self.abscross = collections.defaultdict(int) 94 self.sigma = collections.defaultdict(int) 95 self.chi2 = collections.defaultdict(int) 96 97 self.splitted_grid = False 98 if self.cmd.proc_characteristics['loop_induced']: 99 nexternal = self.cmd.proc_characteristics['nexternal'] 100 self.splitted_grid = max(2, (nexternal-2)**2) 101 if hasattr(self.cmd, "opts") and self.cmd.opts['accuracy'] == 0.1: 102 self.cmd.opts['accuracy'] = 0.02 103 104 if isinstance(cmd.cluster, cluster.MultiCore) and self.splitted_grid > 1: 105 self.splitted_grid = int(cmd.cluster.nb_core**0.5) 106 if self.splitted_grid == 1 and cmd.cluster.nb_core >1: 107 self.splitted_grid = 2 108 109 #if the user defines it in the run_card: 110 if self.run_card['survey_splitting'] != -1: 111 self.splitted_grid = self.run_card['survey_splitting'] 112 if self.run_card['survey_nchannel_per_job'] != -1: 113 self.combining_job = self.run_card['survey_nchannel_per_job'] 114 elif self.run_card['hard_survey'] > 1: 115 self.combining_job = 1 116 117 118 119 self.splitted_Pdir = {} 120 self.splitted_for_dir = lambda x,y: self.splitted_grid 121 self.combining_job_for_Pdir = lambda x: self.combining_job 122 self.lastoffset = {}
123
124 - def launch(self, to_submit=True, clean=True):
125 """ """ 126 127 self.subproc = [l.strip() for l in open(pjoin(self.me_dir,'SubProcesses', 128 'subproc.mg'))] 129 subproc = self.subproc 130 131 P_zero_result = [] # check the number of times where they are no phase-space 132 133 nb_tot_proc = len(subproc) 134 job_list = {} 135 for nb_proc,subdir in enumerate(subproc): 136 self.cmd.update_status('Compiling for process %s/%s. <br> (previous processes already running)' % \ 137 (nb_proc+1,nb_tot_proc), level=None) 138 139 subdir = subdir.strip() 140 Pdir = pjoin(self.me_dir, 'SubProcesses',subdir) 141 logger.info(' %s ' % subdir) 142 143 # clean previous run 144 if clean: 145 for match in misc.glob('*ajob*', Pdir): 146 if os.path.basename(match)[:4] in ['ajob', 'wait', 'run.', 'done']: 147 os.remove(match) 148 for match in misc.glob('G*', Pdir): 149 if os.path.exists(pjoin(match,'results.dat')): 150 os.remove(pjoin(match, 'results.dat')) 151 if os.path.exists(pjoin(match, 'ftn25')): 152 os.remove(pjoin(match, 'ftn25')) 153 154 #compile gensym 155 self.cmd.compile(['gensym'], cwd=Pdir) 156 if not os.path.exists(pjoin(Pdir, 'gensym')): 157 raise Exception('Error make gensym not successful') 158 159 # Launch gensym 160 p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, 161 stderr=subprocess.STDOUT, cwd=Pdir) 162 #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts 163 (stdout, _) = p.communicate(''.encode()) 164 stdout = stdout.decode('ascii') 165 if os.path.exists(pjoin(self.me_dir,'error')): 166 files.mv(pjoin(self.me_dir,'error'), pjoin(Pdir,'ajob.no_ps.log')) 167 P_zero_result.append(subdir) 168 continue 169 170 jobs = stdout.split() 171 job_list[Pdir] = jobs 172 try: 173 # check that all input are valid 174 [float(s) for s in jobs] 175 except Exception: 176 logger.debug("unformated string found in gensym. Please check:\n %s" % stdout) 177 done=False 178 job_list[Pdir] = [] 179 lines = stdout.split('\n') 180 for l in lines: 181 try: 182 [float(s) for s in l.split()] 183 except: 184 continue 185 else: 186 if done: 187 raise Exception('Parsing error in gensym: %s' % stdout) 188 job_list[Pdir] = l.split() 189 done = True 190 if not done: 191 raise Exception('Parsing error in gensym: %s' % stdout) 192 193 self.cmd.compile(['madevent'], cwd=Pdir) 194 if to_submit: 195 self.submit_to_cluster(job_list) 196 job_list = {} 197 198 return job_list, P_zero_result
199
200 - def resubmit(self, min_precision=1.0, resubmit_zero=False):
201 """collect the result of the current run and relaunch each channel 202 not completed or optionally a completed one with a precision worse than 203 a threshold (and/or the zero result channel)""" 204 205 job_list, P_zero_result = self.launch(to_submit=False, clean=False) 206 207 for P , jobs in dict(job_list).items(): 208 misc.sprint(jobs) 209 to_resub = [] 210 for job in jobs: 211 if os.path.exists(pjoin(P, 'G%s' % job)) and os.path.exists(pjoin(P, 'G%s' % job, 'results.dat')): 212 one_result = sum_html.OneResult(job) 213 try: 214 one_result.read_results(pjoin(P, 'G%s' % job, 'results.dat')) 215 except: 216 to_resub.append(job) 217 if one_result.xsec == 0: 218 if resubmit_zero: 219 to_resub.append(job) 220 elif max(one_result.xerru, one_result.xerrc)/one_result.xsec > min_precision: 221 to_resub.append(job) 222 else: 223 to_resub.append(job) 224 if to_resub: 225 for G in to_resub: 226 try: 227 shutil.rmtree(pjoin(P, 'G%s' % G)) 228 except Exception as error: 229 misc.sprint(error) 230 pass 231 misc.sprint(to_resub) 232 self.submit_to_cluster({P: to_resub})
233 234 235 236 237 238 239 240 241 242 243
244 - def submit_to_cluster(self, job_list):
245 """ """ 246 247 if self.run_card['job_strategy'] > 0: 248 if len(job_list) >1: 249 for path, dirs in job_list.items(): 250 self.submit_to_cluster({path:dirs}) 251 return 252 path, value = list(job_list.items())[0] 253 nexternal = self.cmd.proc_characteristics['nexternal'] 254 current = open(pjoin(path, "nexternal.inc")).read() 255 ext = re.search(r"PARAMETER \(NEXTERNAL=(\d+)\)", current).group(1) 256 257 if self.run_card['job_strategy'] == 2: 258 self.splitted_grid = 2 259 if nexternal == int(ext): 260 to_split = 2 261 else: 262 to_split = 0 263 if hasattr(self, 'splitted_Pdir'): 264 self.splitted_Pdir[path] = to_split 265 else: 266 self.splitted_Pdir = {path: to_split} 267 self.splitted_for_dir = lambda x,y : self.splitted_Pdir[x] 268 elif self.run_card['job_strategy'] == 1: 269 if nexternal == int(ext): 270 combine = 1 271 else: 272 combine = self.combining_job 273 if hasattr(self, 'splitted_Pdir'): 274 self.splitted_Pdir[path] = combine 275 else: 276 self.splitted_Pdir = {path: combine} 277 self.combining_job_for_Pdir = lambda x : self.splitted_Pdir[x] 278 279 if not self.splitted_grid: 280 return self.submit_to_cluster_no_splitting(job_list) 281 elif self.cmd.cluster_mode == 0: 282 return self.submit_to_cluster_no_splitting(job_list) 283 elif self.cmd.cluster_mode == 2 and self.cmd.options['nb_core'] == 1: 284 return self.submit_to_cluster_no_splitting(job_list) 285 else: 286 return self.submit_to_cluster_splitted(job_list)
287 288
289 - def submit_to_cluster_no_splitting(self, job_list):
290 """submit the survey without the parralelization. 291 This is the old mode which is still usefull in single core""" 292 293 # write the template file for the parameter file 294 self.write_parameter(parralelization=False, Pdirs=list(job_list.keys())) 295 296 297 # launch the job with the appropriate grouping 298 for Pdir, jobs in job_list.items(): 299 jobs = list(jobs) 300 i=0 301 while jobs: 302 i+=1 303 to_submit = ['0'] # the first entry is actually the offset 304 for _ in range(self.combining_job_for_Pdir(Pdir)): 305 if jobs: 306 to_submit.append(jobs.pop(0)) 307 308 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 309 argument=to_submit, 310 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir))
311 312
313 - def create_resubmit_one_iter(self, Pdir, G, submit_ps, nb_job, step=0):
314 """prepare the input_file for submitting the channel""" 315 316 317 if 'SubProcesses' not in Pdir: 318 Pdir = pjoin(self.me_dir, 'SubProcesses', Pdir) 319 320 #keep track of how many job are sended 321 self.splitted_Pdir[(Pdir, G)] = int(nb_job) 322 323 324 # 1. write the new input_app.txt 325 run_card = self.cmd.run_card 326 options = {'event' : submit_ps, 327 'maxiter': 1, 328 'miniter': 1, 329 'accuracy': self.cmd.opts['accuracy'], 330 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 331 else run_card['nhel'], 332 'gridmode': -2, 333 'channel' : G 334 } 335 336 Gdir = pjoin(Pdir, 'G%s' % G) 337 self.write_parameter_file(pjoin(Gdir, 'input_app.txt'), options) 338 339 # 2. check that ftn25 exists. 340 assert os.path.exists(pjoin(Gdir, "ftn25")) 341 342 343 # 3. Submit the new jobs 344 #call back function 345 packet = cluster.Packet((Pdir, G, step+1), 346 self.combine_iteration, 347 (Pdir, G, step+1)) 348 349 if step ==0: 350 self.lastoffset[(Pdir, G)] = 0 351 352 # resubmit the new jobs 353 for i in range(int(nb_job)): 354 name = "G%s_%s" % (G,i+1) 355 self.lastoffset[(Pdir, G)] += 1 356 offset = self.lastoffset[(Pdir, G)] 357 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'refine_splitted.sh'), 358 argument=[name, 'G%s'%G, offset], 359 cwd= Pdir, 360 packet_member=packet)
361 362
363 - def submit_to_cluster_splitted(self, job_list):
364 """ submit the version of the survey with splitted grid creation 365 """ 366 367 #if self.splitted_grid <= 1: 368 # return self.submit_to_cluster_no_splitting(job_list) 369 370 for Pdir, jobs in job_list.items(): 371 if not jobs: 372 continue 373 if self.splitted_for_dir(Pdir, jobs[0]) <= 1: 374 return self.submit_to_cluster_no_splitting({Pdir:jobs}) 375 376 self.write_parameter(parralelization=True, Pdirs=[Pdir]) 377 # launch the job with the appropriate grouping 378 379 for job in jobs: 380 packet = cluster.Packet((Pdir, job, 1), self.combine_iteration, (Pdir, job, 1)) 381 for i in range(self.splitted_for_dir(Pdir, job)): 382 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 383 argument=[i+1, job], 384 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir), 385 packet_member=packet)
386
387 - def combine_iteration(self, Pdir, G, step):
388 389 grid_calculator, cross, error = self.combine_grid(Pdir, G, step) 390 391 # Compute the number of events used for this run. 392 nb_events = grid_calculator.target_evt 393 394 Gdirs = [] #build the the list of directory 395 for i in range(self.splitted_for_dir(Pdir, G)): 396 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 397 Gdirs.append(path) 398 399 # 4. make the submission of the next iteration 400 # Three cases - less than 3 iteration -> continue 401 # - more than 3 and less than 5 -> check error 402 # - more than 5 -> prepare info for refine 403 need_submit = False 404 if step < self.min_iterations and cross != 0: 405 if step == 1: 406 need_submit = True 407 else: 408 across = self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 409 tot_across = self.get_current_axsec() 410 if across / tot_across < 1e-6: 411 need_submit = False 412 elif error < self.cmd.opts['accuracy'] / 100: 413 need_submit = False 414 else: 415 need_submit = True 416 417 elif step >= self.cmd.opts['iterations']: 418 need_submit = False 419 elif self.cmd.opts['accuracy'] < 0: 420 #check for luminosity 421 raise Exception("Not Implemented") 422 elif self.abscross[(Pdir,G)] == 0: 423 need_submit = False 424 else: 425 across = self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 426 tot_across = self.get_current_axsec() 427 if across == 0: 428 need_submit = False 429 elif across / tot_across < 1e-5: 430 need_submit = False 431 elif error > self.cmd.opts['accuracy']: 432 need_submit = True 433 else: 434 need_submit = False 435 436 437 if cross: 438 grid_calculator.write_grid_for_submission(Pdir,G, 439 self.splitted_for_dir(Pdir, G), 440 nb_events,mode=self.mode, 441 conservative_factor=5.0) 442 443 xsec_format = '.%ig'%(max(3,int(math.log10(1.0/float(error)))+2) 444 if float(cross)!=0.0 and float(error)!=0.0 else 8) 445 if need_submit: 446 message = "%%s/G%%s is at %%%s +- %%.3g pb. Now submitting iteration #%s."%(xsec_format, step+1) 447 logger.info(message%\ 448 (os.path.basename(Pdir), G, float(cross), 449 float(error)*float(cross))) 450 self.resubmit_survey(Pdir,G, Gdirs, step) 451 elif cross: 452 logger.info("Survey finished for %s/G%s at %s"%( 453 os.path.basename(Pdir),G,('%%%s +- %%.3g pb'%xsec_format))% 454 (float(cross), float(error)*float(cross))) 455 # prepare information for refine 456 newGpath = pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G) 457 if not os.path.exists(newGpath): 458 os.mkdir(newGpath) 459 460 # copy the new grid: 461 files.cp(pjoin(Gdirs[0], 'ftn25'), 462 pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 'ftn26')) 463 464 # copy the events 465 fsock = open(pjoin(newGpath, 'events.lhe'), 'w') 466 for Gdir in Gdirs: 467 fsock.write(open(pjoin(Gdir, 'events.lhe')).read()) 468 469 # copy one log 470 files.cp(pjoin(Gdirs[0], 'log.txt'), 471 pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G)) 472 473 474 # create the appropriate results.dat 475 self.write_results(grid_calculator, cross, error, Pdir, G, step) 476 else: 477 logger.info("Survey finished for %s/G%s [0 cross]", os.path.basename(Pdir),G) 478 479 Gdir = pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G) 480 if not os.path.exists(Gdir): 481 os.mkdir(Gdir) 482 # copy one log 483 files.cp(pjoin(Gdirs[0], 'log.txt'), Gdir) 484 # create the appropriate results.dat 485 self.write_results(grid_calculator, cross, error, Pdir, G, step) 486 487 return 0
488
489 - def combine_grid(self, Pdir, G, step, exclude_sub_jobs=[]):
490 """ exclude_sub_jobs is to remove some of the subjobs if a numerical 491 issue is detected in one of them. Warning is issue when this occurs. 492 """ 493 494 # 1. create an object to combine the grid information and fill it 495 grid_calculator = combine_grid.grid_information(self.run_card['nhel']) 496 497 for i in range(self.splitted_for_dir(Pdir, G)): 498 if i in exclude_sub_jobs: 499 continue 500 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 501 fsock = misc.mult_try_open(pjoin(path, 'results.dat')) 502 one_result = grid_calculator.add_results_information(fsock) 503 fsock.close() 504 if one_result.axsec == 0: 505 grid_calculator.onefail = True 506 continue # grid_information might not exists 507 fsock = misc.mult_try_open(pjoin(path, 'grid_information')) 508 grid_calculator.add_one_grid_information(fsock) 509 fsock.close() 510 os.remove(pjoin(path, 'results.dat')) 511 #os.remove(pjoin(path, 'grid_information')) 512 513 514 515 #2. combine the information about the total crossection / error 516 # start by keep the interation in memory 517 cross, across, sigma = grid_calculator.get_cross_section() 518 519 #3. Try to avoid one single PS point which ruins the integration 520 # Should be related to loop evaluation instability. 521 maxwgt = grid_calculator.get_max_wgt(0.01) 522 if maxwgt: 523 nunwgt = grid_calculator.get_nunwgt(maxwgt) 524 # Make sure not to apply the security below during the first step of the 525 # survey. Also, disregard channels with a contribution relative to the 526 # total cross-section smaller than 1e-8 since in this case it is unlikely 527 # that this channel will need more than 1 event anyway. 528 apply_instability_security = False 529 rel_contrib = 0.0 530 if (self.__class__ != gensym or step > 1): 531 Pdir_across = 0.0 532 Gdir_across = 0.0 533 for (mPdir,mG) in self.abscross.keys(): 534 if mPdir == Pdir: 535 Pdir_across += (self.abscross[(mPdir,mG)]/ 536 (self.sigma[(mPdir,mG)]+1e-99)) 537 if mG == G: 538 Gdir_across += (self.abscross[(mPdir,mG)]/ 539 (self.sigma[(mPdir,mG)]+1e-99)) 540 rel_contrib = abs(Gdir_across/(Pdir_across+1e-99)) 541 if rel_contrib > (1.0e-8) and \ 542 nunwgt < 2 and len(grid_calculator.results) > 1: 543 apply_instability_security = True 544 545 if apply_instability_security: 546 # check the ratio between the different submit 547 th_maxwgt = [(r.th_maxwgt,i) for i,r in enumerate(grid_calculator.results)] 548 th_maxwgt.sort() 549 ratio = th_maxwgt[-1][0]/th_maxwgt[-2][0] 550 if ratio > 1e4: 551 logger.warning( 552 """"One Event with large weight have been found (ratio = %.3g) in channel G%s (with rel.contrib=%.3g). 553 This is likely due to numerical instabilities. The associated job is discarded to recover. 554 For offline investigation, the problematic discarded events are stored in: 555 %s"""%(ratio,G,rel_contrib,pjoin(Pdir,'DiscardedUnstableEvents'))) 556 exclude_sub_jobs = list(exclude_sub_jobs) 557 exclude_sub_jobs.append(th_maxwgt[-1][1]) 558 grid_calculator.results.run_statistics['skipped_subchannel'] += 1 559 560 # Add some monitoring of the problematic events 561 gPath = pjoin(Pdir, "G%s_%s" % (G, th_maxwgt[-1][1]+1)) 562 if os.path.isfile(pjoin(gPath,'events.lhe')): 563 lhe_file = lhe_parser.EventFile(pjoin(gPath,'events.lhe')) 564 discardedPath = pjoin(Pdir,'DiscardedUnstableEvents') 565 if not os.path.exists(discardedPath): 566 os.mkdir(discardedPath) 567 if os.path.isdir(discardedPath): 568 # Keep only the event with a maximum weight, as it surely 569 # is the problematic one. 570 evtRecord = open(pjoin(discardedPath,'discarded_G%s.dat'%G),'a') 571 lhe_file.seek(0) #rewind the file 572 try: 573 evtRecord.write('\n'+str(max(lhe_file,key=lambda evt:abs(evt.wgt)))) 574 except Exception: 575 #something wrong write the full file. 576 lhe_file.close() 577 evtRecord.write(pjoin(gPath,'events.lhe').read()) 578 evtRecord.close() 579 580 return self.combine_grid(Pdir, G, step, exclude_sub_jobs) 581 582 583 if across !=0: 584 if sigma != 0: 585 self.cross[(Pdir,G)] += cross**3/sigma**2 586 self.abscross[(Pdir,G)] += across * cross**2/sigma**2 587 self.sigma[(Pdir,G)] += cross**2/ sigma**2 588 self.chi2[(Pdir,G)] += cross**4/sigma**2 589 # and use those iteration to get the current estimator 590 cross = self.cross[(Pdir,G)]/self.sigma[(Pdir,G)] 591 if step > 1: 592 error = math.sqrt(abs((self.chi2[(Pdir,G)]/cross**2 - \ 593 self.sigma[(Pdir,G)])/(step-1))/self.sigma[(Pdir,G)]) 594 else: 595 error = sigma/cross 596 else: 597 self.cross[(Pdir,G)] = cross 598 self.abscross[(Pdir,G)] = across 599 self.sigma[(Pdir,G)] = 0 600 self.chi2[(Pdir,G)] = 0 601 cross = self.cross[(Pdir,G)] 602 error = 0 603 604 else: 605 error = 0 606 607 grid_calculator.results.compute_values(update_statistics=True) 608 if (str(os.path.basename(Pdir)), G) in self.run_statistics: 609 self.run_statistics[(str(os.path.basename(Pdir)), G)]\ 610 .aggregate_statistics(grid_calculator.results.run_statistics) 611 else: 612 self.run_statistics[(str(os.path.basename(Pdir)), G)] = \ 613 grid_calculator.results.run_statistics 614 615 self.warnings_from_statistics(G, grid_calculator.results.run_statistics) 616 stats_msg = grid_calculator.results.run_statistics.nice_output( 617 '/'.join([os.path.basename(Pdir),'G%s'%G])) 618 619 if stats_msg: 620 logger.log(5, stats_msg) 621 622 # Clean up grid_information to avoid border effects in case of a crash 623 for i in range(self.splitted_for_dir(Pdir, G)): 624 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 625 try: 626 os.remove(pjoin(path, 'grid_information')) 627 except OSError as oneerror: 628 if oneerror.errno != 2: 629 raise 630 return grid_calculator, cross, error
631
632 - def warnings_from_statistics(self,G,stats):
633 """Possible warn user for worrying MadLoop stats for this channel.""" 634 635 if stats['n_madloop_calls']==0: 636 return 637 638 EPS_fraction = float(stats['exceptional_points'])/stats['n_madloop_calls'] 639 640 msg = "Channel %s has encountered a fraction of %.3g\n"+ \ 641 "of numerically unstable loop matrix element computations\n"+\ 642 "(which could not be rescued using quadruple precision).\n"+\ 643 "The results might not be trusted." 644 645 if 0.01 > EPS_fraction > 0.001: 646 logger.warning(msg%(G,EPS_fraction)) 647 elif EPS_fraction > 0.01: 648 logger.critical((msg%(G,EPS_fraction)).replace('might', 'can')) 649 raise Exception((msg%(G,EPS_fraction)).replace('might', 'can'))
650
651 - def get_current_axsec(self):
652 653 across = 0 654 for (Pdir,G) in self.abscross: 655 across += self.abscross[(Pdir,G)]/(self.sigma[(Pdir,G)]+1e-99) 656 return across
657
658 - def write_results(self, grid_calculator, cross, error, Pdir, G, step):
659 660 #compute the value 661 if cross == 0: 662 abscross,nw, luminosity = 0, 0, 0 663 wgt, maxit,nunwgt, wgt, nevents = 0,0,0,0,0 664 maxwgt = 0 665 error = 0 666 else: 667 grid_calculator.results.compute_values() 668 abscross = self.abscross[(Pdir,G)]/self.sigma[(Pdir,G)] 669 nw = grid_calculator.results.nw 670 wgt = grid_calculator.results.wgt 671 maxit = step 672 wgt = 0 673 nevents = grid_calculator.results.nevents 674 maxwgt = grid_calculator.get_max_wgt() 675 nunwgt = grid_calculator.get_nunwgt() 676 luminosity = nunwgt/cross 677 678 #format the results.dat 679 def fstr(nb): 680 data = '%E' % nb 681 nb, power = data.split('E') 682 nb = float(nb) /10 683 power = int(power) + 1 684 return '%.5fE%+03i' %(nb,power)
685 line = '%s %s %s %i %i %i %i %s %s %s %s 0.0 0\n' % \ 686 (fstr(cross), fstr(error*cross), fstr(error*cross), 687 nevents, nw, maxit,nunwgt, 688 fstr(luminosity), fstr(wgt), fstr(abscross), fstr(maxwgt)) 689 690 fsock = open(pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 691 'results.dat'),'w') 692 fsock.writelines(line) 693 fsock.close()
694
695 - def resubmit_survey(self, Pdir, G, Gdirs, step):
696 """submit the next iteration of the survey""" 697 698 # 1. write the new input_app.txt to double the number of points 699 run_card = self.cmd.run_card 700 options = {'event' : 2**(step) * self.cmd.opts['points'] / self.splitted_grid, 701 'maxiter': 1, 702 'miniter': 1, 703 'accuracy': self.cmd.opts['accuracy'], 704 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 705 else run_card['nhel'], 706 'gridmode': -2, 707 'channel' : '' 708 } 709 710 if int(options['helicity']) == 1: 711 options['event'] = options['event'] * 2**(self.cmd.proc_characteristics['nexternal']//3) 712 713 for Gdir in Gdirs: 714 self.write_parameter_file(pjoin(Gdir, 'input_app.txt'), options) 715 716 717 #2. resubmit the new jobs 718 packet = cluster.Packet((Pdir, G, step+1), self.combine_iteration, \ 719 (Pdir, G, step+1)) 720 nb_step = len(Gdirs) * (step+1) 721 for i,subdir in enumerate(Gdirs): 722 subdir = subdir.rsplit('_',1)[1] 723 subdir = int(subdir) 724 offset = nb_step+i+1 725 offset=str(offset) 726 tag = "%s.%s" % (subdir, offset) 727 728 self.cmd.launch_job(pjoin(self.me_dir, 'SubProcesses', 'survey.sh'), 729 argument=[tag, G], 730 cwd=pjoin(self.me_dir,'SubProcesses' , Pdir), 731 packet_member=packet)
732 733 734 735
736 - def write_parameter_file(self, path, options):
737 """ """ 738 739 template =""" %(event)s %(maxiter)s %(miniter)s !Number of events and max and min iterations 740 %(accuracy)s !Accuracy 741 %(gridmode)s !Grid Adjustment 0=none, 2=adjust 742 1 !Suppress Amplitude 1=yes 743 %(helicity)s !Helicity Sum/event 0=exact 744 %(channel)s """ 745 options['event'] = int(options['event']) 746 open(path, 'w').write(template % options)
747 748 749
750 - def write_parameter(self, parralelization, Pdirs=None):
751 """Write the parameter of the survey run""" 752 753 run_card = self.cmd.run_card 754 755 options = {'event' : self.cmd.opts['points'], 756 'maxiter': self.cmd.opts['iterations'], 757 'miniter': self.min_iterations, 758 'accuracy': self.cmd.opts['accuracy'], 759 'helicity': run_card['nhel_survey'] if 'nhel_survey' in run_card \ 760 else run_card['nhel'], 761 'gridmode': 2, 762 'channel': '' 763 } 764 765 if int(options['helicity'])== 1: 766 options['event'] = options['event'] * 2**(self.cmd.proc_characteristics['nexternal']//3) 767 768 if parralelization: 769 options['gridmode'] = -2 770 options['maxiter'] = 1 #this is automatic in dsample anyway 771 options['miniter'] = 1 #this is automatic in dsample anyway 772 options['event'] /= self.splitted_grid 773 774 if not Pdirs: 775 Pdirs = self.subproc 776 777 for Pdir in Pdirs: 778 path =pjoin(Pdir, 'input_app.txt') 779 self.write_parameter_file(path, options)
780
781 782 783 -class gen_ximprove(object):
784 785 786 # some hardcoded value which impact the generation 787 gen_events_security = 1.2 # multiply the number of requested event by this number for security 788 combining_job = 0 # allow to run multiple channel in sequence 789 max_request_event = 1000 # split jobs if a channel if it needs more than that 790 max_event_in_iter = 5000 791 min_event_in_iter = 1000 792 max_splitting = 130 # maximum duplication of a given channel 793 min_iter = 3 794 max_iter = 9 795 keep_grid_for_refine = False # only apply if needed to split the job 796 797 #convenient shortcut for the formatting of variable 798 @ staticmethod
799 - def format_variable(*args):
800 return bannermod.ConfigFile.format_variable(*args)
801 802
803 - def __new__(cls, cmd, opt):
804 """Choose in which type of refine we want to be""" 805 806 if cmd.proc_characteristics['loop_induced']: 807 return super(gen_ximprove, cls).__new__(gen_ximprove_share) 808 elif gen_ximprove.format_variable(cmd.run_card['gridpack'], bool): 809 return super(gen_ximprove, cls).__new__(gen_ximprove_gridpack) 810 elif cmd.run_card["job_strategy"] == 2: 811 return super(gen_ximprove, cls).__new__(gen_ximprove_share) 812 else: 813 return super(gen_ximprove, cls).__new__(gen_ximprove_v4)
814 815
816 - def __init__(self, cmd, opt=None):
817 818 try: 819 super(gen_ximprove, self).__init__(cmd, opt) 820 except TypeError: 821 pass 822 823 self.run_statistics = {} 824 self.cmd = cmd 825 self.run_card = cmd.run_card 826 run_card = self.run_card 827 self.me_dir = cmd.me_dir 828 829 #extract from the run_card the information that we need. 830 self.gridpack = run_card['gridpack'] 831 self.nhel = run_card['nhel'] 832 if "nhel_refine" in run_card: 833 self.nhel = run_card["nhel_refine"] 834 835 if self.run_card['refine_evt_by_job'] != -1: 836 self.max_request_event = run_card['refine_evt_by_job'] 837 838 839 # Default option for the run 840 self.gen_events = True 841 self.parralel = False 842 # parameter which was input for the normal gen_ximprove run 843 self.err_goal = 0.01 844 self.max_np = 9 845 self.split_channels = False 846 # parameter for the gridpack run 847 self.nreq = 2000 848 self.iseed = 4321 849 850 # placeholder for information 851 self.results = 0 #updated in launch/update_html 852 853 if isinstance(opt, dict): 854 self.configure(opt) 855 elif isinstance(opt, bannermod.GridpackCard): 856 self.configure_gridpack(opt)
857
858 - def __call__(self):
859 return self.launch()
860
861 - def launch(self):
862 """running """ 863 864 #start the run 865 self.handle_seed() 866 self.results = sum_html.collect_result(self.cmd, 867 main_dir=pjoin(self.cmd.me_dir,'SubProcesses')) #main_dir is for gridpack readonly mode 868 if self.gen_events: 869 # We run to provide a given number of events 870 self.get_job_for_event() 871 else: 872 # We run to achieve a given precision 873 self.get_job_for_precision()
874 875
876 - def configure(self, opt):
877 """Defines some parameter of the run""" 878 879 for key, value in opt.items(): 880 if key in self.__dict__: 881 targettype = type(getattr(self, key)) 882 setattr(self, key, self.format_variable(value, targettype, key)) 883 else: 884 raise Exception('%s not define' % key) 885 886 887 # special treatment always do outside the loop to avoid side effect 888 if 'err_goal' in opt: 889 if self.err_goal < 1: 890 logger.info("running for accuracy %s%%" % (self.err_goal*100)) 891 self.gen_events = False 892 elif self.err_goal >= 1: 893 logger.info("Generating %s unweighted events." % self.err_goal) 894 self.gen_events = True 895 self.err_goal = self.err_goal * self.gen_events_security # security
896
897 - def handle_seed(self):
898 """not needed but for gridpack --which is not handle here for the moment""" 899 return
900 901
902 - def find_job_for_event(self):
903 """return the list of channel that need to be improved""" 904 905 assert self.err_goal >=1 906 self.err_goal = int(self.err_goal) 907 908 goal_lum = self.err_goal/(self.results.axsec+1e-99) #pb^-1 909 logger.info('Effective Luminosity %s pb^-1', goal_lum) 910 911 all_channels = sum([list(P) for P in self.results],[]) 912 all_channels.sort(key= lambda x:x.get('luminosity'), reverse=True) 913 914 to_refine = [] 915 for C in all_channels: 916 if C.get('axsec') == 0: 917 continue 918 if goal_lum/(C.get('luminosity')+1e-99) >= 1 + (self.gen_events_security-1)/2: 919 logger.debug("channel %s is at lum=%s (need to improve by %s) (xsec=%s pb)", C.name, C.get('luminosity'), goal_lum/(C.get('luminosity')+1e-99), C.get('xsec')) 920 to_refine.append(C) 921 elif C.get('xerr') > max(C.get('axsec'), 922 (1/(100*math.sqrt(self.err_goal)))*all_channels[-1].get('axsec')): 923 to_refine.append(C) 924 925 logger.info('need to improve %s channels' % len(to_refine)) 926 return goal_lum, to_refine
927
928 - def update_html(self):
929 """update the html from this object since it contains all the information""" 930 931 932 run = self.cmd.results.current['run_name'] 933 if not os.path.exists(pjoin(self.cmd.me_dir, 'HTML', run)): 934 os.mkdir(pjoin(self.cmd.me_dir, 'HTML', run)) 935 936 unit = self.cmd.results.unit 937 P_text = "" 938 if self.results: 939 Presults = self.results 940 else: 941 self.results = sum_html.collect_result(self.cmd, None) 942 Presults = self.results 943 944 for P_comb in Presults: 945 P_text += P_comb.get_html(run, unit, self.cmd.me_dir) 946 947 Presults.write_results_dat(pjoin(self.cmd.me_dir,'SubProcesses', 'results.dat')) 948 949 fsock = open(pjoin(self.cmd.me_dir, 'HTML', run, 'results.html'),'w') 950 fsock.write(sum_html.results_header) 951 fsock.write('%s <dl>' % Presults.get_html(run, unit, self.cmd.me_dir)) 952 fsock.write('%s </dl></body>' % P_text) 953 954 self.cmd.results.add_detail('cross', Presults.xsec) 955 self.cmd.results.add_detail('error', Presults.xerru) 956 957 return Presults.xsec, Presults.xerru
958
959 960 -class gen_ximprove_v4(gen_ximprove):
961 962 # some hardcoded value which impact the generation 963 gen_events_security = 1.2 # multiply the number of requested event by this number for security 964 combining_job = 0 # allow to run multiple channel in sequence 965 max_request_event = 1000 # split jobs if a channel if it needs more than that 966 max_event_in_iter = 5000 967 min_event_in_iter = 1000 968 max_splitting = 130 # maximum duplication of a given channel 969 min_iter = 3 970 max_iter = 9 971 keep_grid_for_refine = False # only apply if needed to split the job 972 973 974
975 - def __init__(self, cmd, opt=None):
976 977 super(gen_ximprove_v4, self).__init__(cmd, opt) 978 979 if cmd.opts['accuracy'] < cmd._survey_options['accuracy'][1]: 980 self.increase_precision(cmd._survey_options['accuracy'][1]/cmd.opts['accuracy'])
981
982 - def reset_multijob(self):
983 984 for path in misc.glob(pjoin('*', '*','multijob.dat'), pjoin(self.me_dir, 'SubProcesses')): 985 open(path,'w').write('0\n')
986
987 - def write_multijob(self, Channel, nb_split):
988 """ """ 989 if nb_split <=1: 990 return 991 f = open(pjoin(self.me_dir, 'SubProcesses', Channel.get('name'), 'multijob.dat'), 'w') 992 f.write('%i\n' % nb_split) 993 f.close()
994
995 - def increase_precision(self, rate=3):
996 misc.sprint(rate) 997 if rate < 3: 998 self.max_event_in_iter = 20000 999 self.min_events = 7500 1000 self.gen_events_security = 1.3 1001 else: 1002 rate = rate -2 1003 self.max_event_in_iter = int((rate+1) * 10000) 1004 self.min_events = int(rate+2) * 2500 1005 self.gen_events_security = 1 + 0.1 * (rate+2) 1006 1007 if int(self.nhel) == 1: 1008 self.min_event_in_iter *= 2**(self.cmd.proc_characteristics['nexternal']//3) 1009 self.max_event_in_iter *= 2**(self.cmd.proc_characteristics['nexternal']//2)
1010 1011 1012 1013 alphabet = "abcdefghijklmnopqrstuvwxyz"
1014 - def get_job_for_event(self):
1015 """generate the script in order to generate a given number of event""" 1016 # correspond to write_gen in the fortran version 1017 1018 1019 goal_lum, to_refine = self.find_job_for_event() 1020 1021 #reset the potential multijob of previous run 1022 self.reset_multijob() 1023 1024 jobs = [] # list of the refine if some job are split is list of 1025 # dict with the parameter of the run. 1026 1027 # try to have a smart load on the cluster (not really important actually) 1028 if self.combining_job >1: 1029 # add a nice ordering for the jobs 1030 new_order = [] 1031 if self.combining_job % 2 == 0: 1032 for i in range(len(to_refine) //2): 1033 new_order.append(to_refine[i]) 1034 new_order.append(to_refine[-i-1]) 1035 if len(to_refine) % 2: 1036 new_order.append(to_refine[i+1]) 1037 else: 1038 for i in range(len(to_refine) //3): 1039 new_order.append(to_refine[i]) 1040 new_order.append(to_refine[-2*i-1]) 1041 new_order.append(to_refine[-2*i-2]) 1042 if len(to_refine) % 3 == 1: 1043 new_order.append(to_refine[i+1]) 1044 elif len(to_refine) % 3 == 2: 1045 new_order.append(to_refine[i+2]) 1046 #ensure that the reordering is done nicely 1047 assert set([id(C) for C in to_refine]) == set([id(C) for C in new_order]) 1048 to_refine = new_order 1049 1050 1051 # loop over the channel to refine 1052 for C in to_refine: 1053 #1. Compute the number of points are needed to reach target 1054 needed_event = goal_lum*C.get('axsec') 1055 nb_split = int(max(1,((needed_event-1)// self.max_request_event) +1)) 1056 if not self.split_channels: 1057 nb_split = 1 1058 if nb_split > self.max_splitting: 1059 nb_split = self.max_splitting 1060 nb_split=max(1, nb_split) 1061 1062 1063 #2. estimate how many points we need in each iteration 1064 if C.get('nunwgt') > 0: 1065 nevents = needed_event / nb_split * (C.get('nevents') / C.get('nunwgt')) 1066 #split by iter 1067 nevents = int(nevents / (2**self.min_iter-1)) 1068 else: 1069 nevents = self.max_event_in_iter 1070 1071 if nevents < self.min_event_in_iter: 1072 nb_split = int(nb_split * nevents / self.min_event_in_iter) + 1 1073 nevents = self.min_event_in_iter 1074 # 1075 # forbid too low/too large value 1076 nevents = max(self.min_event_in_iter, min(self.max_event_in_iter, nevents)) 1077 logger.debug("%s : need %s event. Need %s split job of %s points", C.name, needed_event, nb_split, nevents) 1078 1079 1080 # write the multi-job information 1081 self.write_multijob(C, nb_split) 1082 1083 packet = cluster.Packet((C.parent_name, C.name), 1084 combine_runs.CombineRuns, 1085 (pjoin(self.me_dir, 'SubProcesses', C.parent_name)), 1086 {"subproc": C.name, "nb_split":nb_split}) 1087 1088 1089 #create the info dict assume no splitting for the default 1090 info = {'name': self.cmd.results.current['run_name'], 1091 'script_name': 'unknown', 1092 'directory': C.name, # need to be change for splitted job 1093 'P_dir': C.parent_name, 1094 'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), 1095 'offset': 1, # need to be change for splitted job 1096 'nevents': nevents, 1097 'maxiter': self.max_iter, 1098 'miniter': self.min_iter, 1099 'precision': -goal_lum/nb_split, 1100 'nhel': self.run_card['nhel'], 1101 'channel': C.name.replace('G',''), 1102 'grid_refinment' : 0, #no refinment of the grid 1103 'base_directory': '', #should be change in splitted job if want to keep the grid 1104 'packet': packet, 1105 } 1106 1107 if nb_split == 1: 1108 jobs.append(info) 1109 else: 1110 for i in range(nb_split): 1111 new_info = dict(info) 1112 new_info['offset'] = i+1 1113 new_info['directory'] += self.alphabet[i % 26] + str((i+1)//26) 1114 if self.keep_grid_for_refine: 1115 new_info['base_directory'] = info['directory'] 1116 jobs.append(new_info) 1117 1118 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs)
1119 1120
1121 - def create_ajob(self, template, jobs, write_dir=None):
1122 """create the ajob""" 1123 1124 if not jobs: 1125 return 1126 1127 if not write_dir: 1128 write_dir = pjoin(self.me_dir, 'SubProcesses') 1129 1130 #filter the job according to their SubProcess directory # no mix submition 1131 P2job= collections.defaultdict(list) 1132 for j in jobs: 1133 P2job[j['P_dir']].append(j) 1134 if len(P2job) >1: 1135 for P in P2job.values(): 1136 self.create_ajob(template, P, write_dir) 1137 return 1138 1139 1140 #Here we can assume that all job are for the same directory. 1141 path = pjoin(write_dir, jobs[0]['P_dir']) 1142 1143 template_text = open(template, 'r').read() 1144 # special treatment if needed to combine the script 1145 # computes how many submition miss one job 1146 if self.combining_job > 1: 1147 skip1=0 1148 n_channels = len(jobs) 1149 nb_sub = n_channels // self.combining_job 1150 nb_job_in_last = n_channels % self.combining_job 1151 if nb_sub == 0: 1152 nb_sub = 1 1153 nb_job_in_last =0 1154 if nb_job_in_last: 1155 nb_sub +=1 1156 skip1 = self.combining_job - nb_job_in_last 1157 if skip1 > nb_sub: 1158 self.combining_job -=1 1159 return self.create_ajob(template, jobs, write_dir) 1160 combining_job = self.combining_job 1161 else: 1162 #define the variable for combining jobs even in not combine mode 1163 #such that we can use the same routine 1164 skip1=0 1165 combining_job =1 1166 nb_sub = len(jobs) 1167 1168 1169 nb_use = 0 1170 for i in range(nb_sub): 1171 script_number = i+1 1172 if i < skip1: 1173 nb_job = combining_job -1 1174 else: 1175 nb_job = min(combining_job, len(jobs)) 1176 fsock = open(pjoin(path, 'ajob%i' % script_number), 'w') 1177 for j in range(nb_use, nb_use + nb_job): 1178 if j> len(jobs): 1179 break 1180 info = jobs[j] 1181 info['script_name'] = 'ajob%i' % script_number 1182 info['keeplog'] = 'false' 1183 if "base_directory" not in info: 1184 info["base_directory"] = "./" 1185 fsock.write(template_text % info) 1186 nb_use += nb_job 1187 1188 fsock.close() 1189 return script_number
1190
1191 - def get_job_for_precision(self):
1192 """create the ajob to achieve a give precision on the total cross-section""" 1193 1194 1195 assert self.err_goal <=1 1196 xtot = abs(self.results.xsec) 1197 logger.info("Working on precision: %s %%" %(100*self.err_goal)) 1198 all_channels = sum([list(P) for P in self.results if P.mfactor],[]) 1199 limit = self.err_goal * xtot / len(all_channels) 1200 to_refine = [] 1201 rerr = 0 #error of the job not directly selected 1202 for C in all_channels: 1203 cerr = C.mfactor*(C.xerru + len(all_channels)*C.xerrc) 1204 if cerr > abs(limit): 1205 to_refine.append(C) 1206 else: 1207 rerr += cerr 1208 rerr *=rerr 1209 if not len(to_refine): 1210 return 1211 1212 # change limit since most don't contribute 1213 limit = math.sqrt((self.err_goal * xtot)**2 - rerr/math.sqrt(len(to_refine))) 1214 for C in to_refine[:]: 1215 cerr = C.mfactor*(C.xerru + len(to_refine)*C.xerrc) 1216 if cerr < limit: 1217 to_refine.remove(C) 1218 1219 # all the channel are now selected. create the channel information 1220 logger.info('need to improve %s channels' % len(to_refine)) 1221 1222 1223 jobs = [] # list of the refine if some job are split is list of 1224 # dict with the parameter of the run. 1225 1226 # loop over the channel to refine 1227 for C in to_refine: 1228 1229 #1. Determine how many events we need in each iteration 1230 yerr = C.mfactor*(C.xerru+len(to_refine)*C.xerrc) 1231 nevents = 0.2*C.nevents*(yerr/limit)**2 1232 1233 nb_split = int((nevents*(C.nunwgt/C.nevents)/self.max_request_event/ (2**self.min_iter-1))**(2/3)) 1234 nb_split = max(nb_split, 1) 1235 # **(2/3) to slow down the increase in number of jobs 1236 if nb_split > self.max_splitting: 1237 nb_split = self.max_splitting 1238 1239 if nb_split >1: 1240 nevents = nevents / nb_split 1241 self.write_multijob(C, nb_split) 1242 # forbid too low/too large value 1243 nevents = min(self.min_event_in_iter, max(self.max_event_in_iter, nevents)) 1244 1245 1246 #create the info dict assume no splitting for the default 1247 info = {'name': self.cmd.results.current['run_name'], 1248 'script_name': 'unknown', 1249 'directory': C.name, # need to be change for splitted job 1250 'P_dir': C.parent_name, 1251 'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), 1252 'offset': 1, # need to be change for splitted job 1253 'nevents': nevents, 1254 'maxiter': self.max_iter, 1255 'miniter': self.min_iter, 1256 'precision': yerr/math.sqrt(nb_split)/(C.get('xsec')+ yerr), 1257 'nhel': self.run_card['nhel'], 1258 'channel': C.name.replace('G',''), 1259 'grid_refinment' : 1 1260 } 1261 1262 if nb_split == 1: 1263 jobs.append(info) 1264 else: 1265 for i in range(nb_split): 1266 new_info = dict(info) 1267 new_info['offset'] = i+1 1268 new_info['directory'] += self.alphabet[i % 26] + str((i+1)//26) 1269 jobs.append(new_info) 1270 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs)
1271
1272 - def update_html(self):
1273 """update the html from this object since it contains all the information""" 1274 1275 1276 run = self.cmd.results.current['run_name'] 1277 if not os.path.exists(pjoin(self.cmd.me_dir, 'HTML', run)): 1278 os.mkdir(pjoin(self.cmd.me_dir, 'HTML', run)) 1279 1280 unit = self.cmd.results.unit 1281 P_text = "" 1282 if self.results: 1283 Presults = self.results 1284 else: 1285 self.results = sum_html.collect_result(self.cmd, None) 1286 Presults = self.results 1287 1288 for P_comb in Presults: 1289 P_text += P_comb.get_html(run, unit, self.cmd.me_dir) 1290 1291 Presults.write_results_dat(pjoin(self.cmd.me_dir,'SubProcesses', 'results.dat')) 1292 1293 fsock = open(pjoin(self.cmd.me_dir, 'HTML', run, 'results.html'),'w') 1294 fsock.write(sum_html.results_header) 1295 fsock.write('%s <dl>' % Presults.get_html(run, unit, self.cmd.me_dir)) 1296 fsock.write('%s </dl></body>' % P_text) 1297 1298 self.cmd.results.add_detail('cross', Presults.xsec) 1299 self.cmd.results.add_detail('error', Presults.xerru) 1300 1301 return Presults.xsec, Presults.xerru
1302
1303 1304 1305 1306 -class gen_ximprove_v4_nogridupdate(gen_ximprove_v4):
1307 1308 # some hardcoded value which impact the generation 1309 gen_events_security = 1.1 # multiply the number of requested event by this number for security 1310 combining_job = 0 # allow to run multiple channel in sequence 1311 max_request_event = 400 # split jobs if a channel if it needs more than that 1312 max_event_in_iter = 500 1313 min_event_in_iter = 250 1314 max_splitting = 260 # maximum duplication of a given channel 1315 min_iter = 2 1316 max_iter = 6 1317 keep_grid_for_refine = True 1318 1319
1320 - def __init__(self, cmd, opt=None):
1321 1322 gen_ximprove.__init__(cmd, opt) 1323 1324 if cmd.proc_characteristics['loopinduced'] and \ 1325 cmd.proc_characteristics['nexternal'] > 2: 1326 self.increase_parralelization(cmd.proc_characteristics['nexternal'])
1327
1328 - def increase_parralelization(self, nexternal):
1329 1330 self.max_splitting = 1000 1331 1332 if self.run_card['refine_evt_by_job'] != -1: 1333 pass 1334 elif nexternal == 3: 1335 self.max_request_event = 200 1336 elif nexternal == 4: 1337 self.max_request_event = 100 1338 elif nexternal >= 5: 1339 self.max_request_event = 50 1340 self.min_event_in_iter = 125 1341 self.max_iter = 5
1342
1343 -class gen_ximprove_share(gen_ximprove, gensym):
1344 """Doing the refine in multicore. Each core handle a couple of PS point.""" 1345 1346 nb_ps_by_job = 2000 1347 mode = "refine" 1348 gen_events_security = 1.15 1349 # Note the real security is lower since we stop the jobs if they are at 96% 1350 # of this target. 1351
1352 - def __init__(self, *args, **opts):
1353 1354 super(gen_ximprove_share, self).__init__(*args, **opts) 1355 self.generated_events = {} 1356 self.splitted_for_dir = lambda x,y : self.splitted_Pdir[(x,y)]
1357 1358
1359 - def get_job_for_event(self):
1360 """generate the script in order to generate a given number of event""" 1361 # correspond to write_gen in the fortran version 1362 1363 1364 goal_lum, to_refine = self.find_job_for_event() 1365 self.goal_lum = goal_lum 1366 1367 # loop over the channel to refine to find the number of PS point to launch 1368 total_ps_points = 0 1369 channel_to_ps_point = [] 1370 for C in to_refine: 1371 #0. remove previous events files 1372 try: 1373 os.remove(pjoin(self.me_dir, "SubProcesses",C.parent_name, C.name, "events.lhe")) 1374 except: 1375 pass 1376 1377 #1. Compute the number of points are needed to reach target 1378 needed_event = goal_lum*C.get('axsec') 1379 if needed_event == 0: 1380 continue 1381 #2. estimate how many points we need in each iteration 1382 if C.get('nunwgt') > 0: 1383 nevents = needed_event * (C.get('nevents') / C.get('nunwgt')) 1384 #split by iter 1385 nevents = int(nevents / (2**self.min_iter-1)) 1386 else: 1387 nb_split = int(max(1,((needed_event-1)// self.max_request_event) +1)) 1388 if not self.split_channels: 1389 nb_split = 1 1390 if nb_split > self.max_splitting: 1391 nb_split = self.max_splitting 1392 nevents = self.max_event_in_iter * self.max_splitting 1393 else: 1394 nevents = self.max_event_in_iter * nb_split 1395 1396 if nevents > self.max_splitting*self.max_event_in_iter: 1397 logger.warning("Channel %s/%s has a very low efficiency of unweighting. Might not be possible to reach target" % \ 1398 (C.name, C.parent_name)) 1399 nevents = self.max_event_in_iter * self.max_splitting 1400 1401 total_ps_points += nevents 1402 channel_to_ps_point.append((C, nevents)) 1403 1404 if self.cmd.options["run_mode"] == 1: 1405 if self.cmd.options["cluster_size"]: 1406 nb_ps_by_job = total_ps_points /int(self.cmd.options["cluster_size"]) 1407 else: 1408 nb_ps_by_job = self.nb_ps_by_job 1409 elif self.cmd.options["run_mode"] == 2: 1410 remain = total_ps_points % self.cmd.options["nb_core"] 1411 if remain: 1412 nb_ps_by_job = 1 + (total_ps_points - remain) / self.cmd.options["nb_core"] 1413 else: 1414 nb_ps_by_job = total_ps_points / self.cmd.options["nb_core"] 1415 else: 1416 nb_ps_by_job = self.nb_ps_by_job 1417 1418 nb_ps_by_job = int(max(nb_ps_by_job, 500)) 1419 1420 for C, nevents in channel_to_ps_point: 1421 if nevents % nb_ps_by_job: 1422 nb_job = 1 + int(nevents // nb_ps_by_job) 1423 else: 1424 nb_job = int(nevents // nb_ps_by_job) 1425 submit_ps = min(nevents, nb_ps_by_job) 1426 if nb_job == 1: 1427 submit_ps = max(submit_ps, self.min_event_in_iter) 1428 self.create_resubmit_one_iter(C.parent_name, C.name[1:], submit_ps, nb_job, step=0) 1429 needed_event = goal_lum*C.get('xsec') 1430 logger.debug("%s/%s : need %s event. Need %s split job of %s points", C.parent_name, C.name, needed_event, nb_job, submit_ps)
1431 1432
1433 - def combine_iteration(self, Pdir, G, step):
1434 1435 grid_calculator, cross, error = self.combine_grid(Pdir, G, step) 1436 1437 # collect all the generated_event 1438 Gdirs = [] #build the the list of directory 1439 for i in range(self.splitted_for_dir(Pdir, G)): 1440 path = pjoin(Pdir, "G%s_%s" % (G, i+1)) 1441 Gdirs.append(path) 1442 assert len(grid_calculator.results) == len(Gdirs) == self.splitted_for_dir(Pdir, G) 1443 1444 1445 # Check how many events are going to be kept after un-weighting. 1446 needed_event = cross * self.goal_lum 1447 if needed_event == 0: 1448 return 0 1449 # check that the number of events requested is not higher than the actual 1450 # total number of events to generate. 1451 if self.err_goal >=1: 1452 if needed_event > self.gen_events_security * self.err_goal: 1453 needed_event = int(self.gen_events_security * self.err_goal) 1454 1455 if (Pdir, G) in self.generated_events: 1456 old_nunwgt, old_maxwgt = self.generated_events[(Pdir, G)] 1457 else: 1458 old_nunwgt, old_maxwgt = 0, 0 1459 1460 if old_nunwgt == 0 and os.path.exists(pjoin(Pdir,"G%s" % G, "events.lhe")): 1461 # possible for second refine. 1462 lhe = lhe_parser.EventFile(pjoin(Pdir,"G%s" % G, "events.lhe")) 1463 old_nunwgt = lhe.unweight(None, trunc_error=0.005, log_level=0) 1464 old_maxwgt = lhe.max_wgt 1465 1466 1467 1468 maxwgt = max(grid_calculator.get_max_wgt(), old_maxwgt) 1469 new_evt = grid_calculator.get_nunwgt(maxwgt) 1470 efficiency = new_evt / sum([R.nevents for R in grid_calculator.results]) 1471 nunwgt = old_nunwgt * old_maxwgt / maxwgt 1472 nunwgt += new_evt 1473 1474 # check the number of event for this iteration alone 1475 one_iter_nb_event = max(grid_calculator.get_nunwgt(),1) 1476 drop_previous_iteration = False 1477 # compare the number of events to generate if we discard the previous iteration 1478 n_target_one_iter = (needed_event-one_iter_nb_event) / ( one_iter_nb_event/ sum([R.nevents for R in grid_calculator.results])) 1479 n_target_combined = (needed_event-nunwgt) / efficiency 1480 if n_target_one_iter < n_target_combined: 1481 # the last iteration alone has more event that the combine iteration. 1482 # it is therefore interesting to drop previous iteration. 1483 drop_previous_iteration = True 1484 nunwgt = one_iter_nb_event 1485 maxwgt = grid_calculator.get_max_wgt() 1486 new_evt = nunwgt 1487 efficiency = ( one_iter_nb_event/ sum([R.nevents for R in grid_calculator.results])) 1488 1489 try: 1490 if drop_previous_iteration: 1491 raise IOError 1492 output_file = open(pjoin(Pdir,"G%s" % G, "events.lhe"), 'a') 1493 except IOError: 1494 output_file = open(pjoin(Pdir,"G%s" % G, "events.lhe"), 'w') 1495 1496 misc.call(["cat"] + [pjoin(d, "events.lhe") for d in Gdirs], 1497 stdout=output_file) 1498 output_file.close() 1499 # For large number of iteration. check the number of event by doing the 1500 # real unweighting. 1501 if nunwgt < 0.6 * needed_event and step > self.min_iter: 1502 lhe = lhe_parser.EventFile(output_file.name) 1503 old_nunwgt =nunwgt 1504 nunwgt = lhe.unweight(None, trunc_error=0.01, log_level=0) 1505 1506 1507 self.generated_events[(Pdir, G)] = (nunwgt, maxwgt) 1508 1509 # misc.sprint("Adding %s event to %s. Currently at %s" % (new_evt, G, nunwgt)) 1510 # check what to do 1511 if nunwgt >= int(0.96*needed_event)+1: # 0.96*1.15=1.10 =real security 1512 # We did it. 1513 logger.info("found enough event for %s/G%s" % (os.path.basename(Pdir), G)) 1514 self.write_results(grid_calculator, cross, error, Pdir, G, step, efficiency) 1515 return 0 1516 elif step >= self.max_iter: 1517 logger.debug("fail to find enough event") 1518 self.write_results(grid_calculator, cross, error, Pdir, G, step, efficiency) 1519 return 0 1520 1521 nb_split_before = len(grid_calculator.results) 1522 nevents = grid_calculator.results[0].nevents 1523 if nevents == 0: # possible if some integral returns 0 1524 nevents = max(g.nevents for g in grid_calculator.results) 1525 1526 need_ps_point = (needed_event - nunwgt)/(efficiency+1e-99) 1527 need_job = need_ps_point // nevents + 1 1528 1529 if step < self.min_iter: 1530 # This is normal but check if we are on the good track 1531 job_at_first_iter = nb_split_before/2**(step-1) 1532 expected_total_job = job_at_first_iter * (2**self.min_iter-1) 1533 done_job = job_at_first_iter * (2**step-1) 1534 expected_remaining_job = expected_total_job - done_job 1535 1536 logger.debug("efficiency status (smaller is better): %s", need_job/expected_remaining_job) 1537 # increase if needed but not too much 1538 need_job = min(need_job, expected_remaining_job*1.25) 1539 1540 nb_job = (need_job-0.5)//(2**(self.min_iter-step)-1) + 1 1541 nb_job = max(1, nb_job) 1542 grid_calculator.write_grid_for_submission(Pdir,G, 1543 self.splitted_for_dir(Pdir, G), nb_job*nevents ,mode=self.mode, 1544 conservative_factor=self.max_iter) 1545 logger.info("%s/G%s is at %i/%i (%.2g%%) event. Resubmit %i job at iteration %i." \ 1546 % (os.path.basename(Pdir), G, int(nunwgt),int(needed_event)+1, 1547 (float(nunwgt)/needed_event)*100.0 if needed_event>0.0 else 0.0, 1548 nb_job, step)) 1549 self.create_resubmit_one_iter(Pdir, G, nevents, nb_job, step) 1550 #self.create_job(Pdir, G, nb_job, nevents, step) 1551 1552 elif step < self.max_iter: 1553 if step + 1 == self.max_iter: 1554 need_job = 1.20 * need_job # avoid to have just too few event. 1555 1556 nb_job = int(min(need_job, nb_split_before*1.5)) 1557 grid_calculator.write_grid_for_submission(Pdir,G, 1558 self.splitted_for_dir(Pdir, G), nb_job*nevents ,mode=self.mode, 1559 conservative_factor=self.max_iter) 1560 1561 1562 logger.info("%s/G%s is at %i/%i ('%.2g%%') event. Resubmit %i job at iteration %i." \ 1563 % (os.path.basename(Pdir), G, int(nunwgt),int(needed_event)+1, 1564 (float(nunwgt)/needed_event)*100.0 if needed_event>0.0 else 0.0, 1565 nb_job, step)) 1566 self.create_resubmit_one_iter(Pdir, G, nevents, nb_job, step) 1567 1568 1569 1570 return 0
1571 1572
1573 - def write_results(self, grid_calculator, cross, error, Pdir, G, step, efficiency):
1574 1575 #compute the value 1576 if cross == 0: 1577 abscross,nw, luminosity = 0, 0, 0 1578 wgt, maxit,nunwgt, wgt, nevents = 0,0,0,0,0 1579 error = 0 1580 else: 1581 grid_calculator.results.compute_values() 1582 abscross = self.abscross[(Pdir,G)]/self.sigma[(Pdir,G)] 1583 nunwgt, wgt = self.generated_events[(Pdir, G)] 1584 nw = int(nunwgt / efficiency) 1585 nunwgt = int(nunwgt) 1586 maxit = step 1587 nevents = nunwgt 1588 # make the unweighting to compute the number of events: 1589 luminosity = nunwgt/cross 1590 1591 #format the results.dat 1592 def fstr(nb): 1593 data = '%E' % nb 1594 nb, power = data.split('E') 1595 nb = float(nb) /10 1596 power = int(power) + 1 1597 return '%.5fE%+03i' %(nb,power)
1598 line = '%s %s %s %i %i %i %i %s %s %s 0.0 0.0 0\n' % \ 1599 (fstr(cross), fstr(error*cross), fstr(error*cross), 1600 nevents, nw, maxit,nunwgt, 1601 fstr(luminosity), fstr(wgt), fstr(abscross)) 1602 1603 fsock = open(pjoin(self.me_dir,'SubProcesses' , Pdir, 'G%s' % G, 1604 'results.dat'),'w') 1605 fsock.writelines(line) 1606 fsock.close()
1607
1608 1609 1610 1611 -class gen_ximprove_gridpack(gen_ximprove_v4):
1612 1613 min_iter = 1 1614 max_iter = 13 1615 max_request_event = 1e12 # split jobs if a channel if it needs more than that 1616 max_event_in_iter = 4000 1617 min_event_in_iter = 500 1618 combining_job = sys.maxsize 1619 gen_events_security = 1.00 1620
1621 - def __new__(cls, *args, **opts):
1622 1623 return super(gen_ximprove, cls).__new__(cls, *args, **opts)
1624
1625 - def __init__(self, *args, **opts):
1626 1627 self.ngran = -1 1628 self.gscalefact = {} 1629 self.readonly = False 1630 if 'ngran' in opts: 1631 self.gran = opts['ngran'] 1632 # del opts['ngran'] 1633 if 'readonly' in opts: 1634 self.readonly = opts['readonly'] 1635 super(gen_ximprove_gridpack,self).__init__(*args, **opts) 1636 if self.ngran == -1: 1637 self.ngran = 1
1638
1639 - def find_job_for_event(self):
1640 """return the list of channel that need to be improved""" 1641 import random 1642 1643 assert self.err_goal >=1 1644 self.err_goal = int(self.err_goal) 1645 self.gscalefact = {} 1646 1647 xtot = self.results.axsec 1648 goal_lum = self.err_goal/(xtot+1e-99) #pb^-1 1649 # logger.info('Effective Luminosity %s pb^-1', goal_lum) 1650 1651 all_channels = sum([list(P) for P in self.results],[]) 1652 all_channels.sort(cmp= lambda x,y: 1 if y.get('luminosity') - \ 1653 x.get('luminosity') > 0 else -1) 1654 1655 to_refine = [] 1656 for C in all_channels: 1657 tag = C.get('name') 1658 self.gscalefact[tag] = 0 1659 R = random.random() 1660 if C.get('axsec') == 0: 1661 continue 1662 if (goal_lum * C.get('axsec') < R*self.ngran ): 1663 continue # no event to generate events 1664 self.gscalefact[tag] = max(1, 1/(goal_lum * C.get('axsec')/ self.ngran)) 1665 #need to generate events 1666 logger.debug('request events for ', C.get('name'), 'cross=', 1667 C.get('axsec'), 'needed events = ', goal_lum * C.get('axsec')) 1668 to_refine.append(C) 1669 1670 logger.info('need to improve %s channels' % len(to_refine)) 1671 return goal_lum, to_refine
1672
1673 - def get_job_for_event(self):
1674 """generate the script in order to generate a given number of event""" 1675 # correspond to write_gen in the fortran version 1676 1677 1678 goal_lum, to_refine = self.find_job_for_event() 1679 1680 jobs = [] # list of the refine if some job are split is list of 1681 # dict with the parameter of the run. 1682 1683 # loop over the channel to refine 1684 for C in to_refine: 1685 #1. Compute the number of points are needed to reach target 1686 needed_event = max(goal_lum*C.get('axsec'), self.ngran) 1687 nb_split = 1 1688 1689 #2. estimate how many points we need in each iteration 1690 if C.get('nunwgt') > 0: 1691 nevents = needed_event / nb_split * (C.get('nevents') / C.get('nunwgt')) 1692 #split by iter 1693 nevents = int(nevents / (2**self.min_iter-1)) 1694 else: 1695 nevents = self.max_event_in_iter 1696 1697 if nevents < self.min_event_in_iter: 1698 nevents = self.min_event_in_iter 1699 # 1700 # forbid too low/too large value 1701 nevents = max(self.min_event_in_iter, min(self.max_event_in_iter, nevents)) 1702 logger.debug("%s : need %s event. Need %s split job of %s points", C.name, needed_event, nb_split, nevents) 1703 1704 1705 #create the info dict assume no splitting for the default 1706 info = {'name': self.cmd.results.current['run_name'], 1707 'script_name': 'unknown', 1708 'directory': C.name, # need to be change for splitted job 1709 'P_dir': os.path.basename(C.parent_name), 1710 'offset': 1, # need to be change for splitted job 1711 'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), 1712 'nevents': nevents, #int(nevents*self.gen_events_security)+1, 1713 'maxiter': self.max_iter, 1714 'miniter': self.min_iter, 1715 'precision': -1*int(needed_event)/C.get('axsec'), 1716 'requested_event': needed_event, 1717 'nhel': self.run_card['nhel'], 1718 'channel': C.name.replace('G',''), 1719 'grid_refinment' : 0, #no refinment of the grid 1720 'base_directory': '', #should be change in splitted job if want to keep the grid 1721 'packet': None, 1722 } 1723 1724 1725 jobs.append(info) 1726 1727 1728 write_dir = '.' if self.readonly else None 1729 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), jobs, write_dir) 1730 1731 done = [] 1732 for j in jobs: 1733 if j['P_dir'] in done: 1734 continue 1735 done.append(j['P_dir']) 1736 # set the working directory path. 1737 pwd = pjoin(os.getcwd(),j['P_dir']) if self.readonly else pjoin(self.me_dir, 'SubProcesses', j['P_dir']) 1738 exe = pjoin(pwd, 'ajob1') 1739 st = os.stat(exe) 1740 os.chmod(exe, st.st_mode | stat.S_IEXEC) 1741 1742 # run the code\ 1743 cluster.onecore.launch_and_wait(exe, cwd=pwd, packet_member=j['packet']) 1744 write_dir = '.' if self.readonly else pjoin(self.me_dir, 'SubProcesses') 1745 1746 self.check_events(goal_lum, to_refine, jobs, write_dir)
1747
1748 - def check_events(self, goal_lum, to_refine, jobs, Sdir):
1749 """check that we get the number of requested events if not resubmit.""" 1750 1751 new_jobs = [] 1752 1753 for C, job_info in zip(to_refine, jobs): 1754 P = job_info['P_dir'] 1755 G = job_info['channel'] 1756 axsec = C.get('axsec') 1757 requested_events= job_info['requested_event'] 1758 1759 1760 new_results = sum_html.OneResult((P,G)) 1761 new_results.read_results(pjoin(Sdir,P, 'G%s'%G, 'results.dat')) 1762 1763 # need to resubmit? 1764 if new_results.get('nunwgt') < requested_events: 1765 pwd = pjoin(os.getcwd(),job_info['P_dir'],'G%s'%G) if self.readonly else \ 1766 pjoin(self.me_dir, 'SubProcesses', job_info['P_dir'],'G%s'%G) 1767 job_info['requested_event'] -= new_results.get('nunwgt') 1768 job_info['precision'] -= -1*job_info['requested_event']/axsec 1769 job_info['offset'] += 1 1770 new_jobs.append(job_info) 1771 files.mv(pjoin(pwd, 'events.lhe'), pjoin(pwd, 'events.lhe.previous')) 1772 1773 if new_jobs: 1774 self.create_ajob(pjoin(self.me_dir, 'SubProcesses', 'refine.sh'), new_jobs, Sdir) 1775 1776 done = [] 1777 for j in new_jobs: 1778 if j['P_dir'] in done: 1779 continue 1780 G = j['channel'] 1781 # set the working directory path. 1782 pwd = pjoin(os.getcwd(),j['P_dir']) if self.readonly \ 1783 else pjoin(self.me_dir, 'SubProcesses', j['P_dir']) 1784 exe = pjoin(pwd, 'ajob1') 1785 st = os.stat(exe) 1786 os.chmod(exe, st.st_mode | stat.S_IEXEC) 1787 1788 # run the code 1789 cluster.onecore.launch_and_wait(exe, cwd=pwd, packet_member=j['packet']) 1790 pwd = pjoin(pwd, 'G%s'%G) 1791 # concatanate with old events file 1792 files.put_at_end(pjoin(pwd, 'events.lhe'),pjoin(pwd, 'events.lhe.previous')) 1793 1794 return self.check_events(goal_lum, to_refine, new_jobs, Sdir)
1795