11#!/usr/bin/env python3
22
3- import os ,sys ,getpass ,warnings ,glob ,shlex ,subprocess ,argparse
3+ """Returns a list of files from a dataset including only files that are hosted on disk."""
4+
5+ import os ,sys ,getpass ,warnings ,glob ,shlex ,subprocess ,argparse # pylint: disable=multiple-imports
46from collections import defaultdict
57
8+ """Gets OS version from shell (other methods return host OS when in container)"""
69def getOS ():
710 cmd = r"sed -nr 's/[^0-9]*([0-9]+).*/\1/p' /etc/redhat-release"
811 osv = subprocess .check_output (shlex .split (cmd ), encoding = "utf-8" ).rstrip ()
912 return osv
1013
14+ """Gets list of files on disk for a dataset, and list of sites along with how many files each site has"""
1115def getHosted (dataset ):
1216 osv = getOS ()
1317 rucio_path = f'/cvmfs/cms.cern.ch/rucio/x86_64/rhel{ osv } /py3/current'
@@ -17,7 +21,7 @@ def getHosted(dataset):
1721 sys .path .insert (0 ,full_rucio_path + '/site-packages/' )
1822
1923 warnings .filterwarnings ("ignore" , message = ".*cryptography.*" )
20- from rucio .client .client import Client
24+ from rucio .client .client import Client # pylint: disable=import-error,import-outside-toplevel
2125 client = Client ()
2226
2327 # loop over blocks to avoid timeout error from too-large response
@@ -27,12 +31,13 @@ def getHosted(dataset):
2731 nblocks = 10
2832 block_groups = [all_blocks [i :i + nblocks ] for i in range (0 , len (all_blocks ), nblocks )]
2933
30- from rucio .client .replicaclient import ReplicaClient
34+ from rucio .client .replicaclient import ReplicaClient # pylint: disable=import-error,import-outside-toplevel
3135 rep_client = ReplicaClient ()
3236
3337 filelist = set ()
3438 sitelist = defaultdict (int )
35- sitecond = lambda site : "_Tape" not in site
39+ def sitecond (site ):
40+ return "_Tape" not in site
3641 for block_group in block_groups :
3742 reps = list (rep_client .list_replicas ([{'scope' : 'cms' , 'name' : block ['name' ]} for block in block_group ]))
3843 for rep in reps :
@@ -44,16 +49,17 @@ def getHosted(dataset):
4449 sys .path .pop (0 )
4550 return filelist , sitelist
4651
52+ """Prints file list and site list"""
4753def main (dataset , outfile = None , verbose = False ):
4854 filelist , sitelist = getHosted (dataset )
4955
5056 if verbose :
5157 print ("Site list:" )
5258 print ("\n " .join (f'{ k } : { v } ' for k ,v in sitelist .items ()))
5359
54- file = open (outfile ,'w' ) if outfile is not None else sys .stdout
60+ file = open (outfile ,'w' ) if outfile is not None else sys .stdout # pylint: disable=consider-using-with,unspecified-encoding
5561 print ("\n " .join (filelist ), file = file )
56- if outfile is not None : file .close ()
62+ if outfile is not None : file .close () # pylint: disable=multiple-statements
5763
5864if __name__ == "__main__" :
5965 parser = argparse .ArgumentParser (
0 commit comments