From 32b28e934ea04514167ee2168431b67ee341df51 Mon Sep 17 00:00:00 2001 From: Dario Berzano Date: Tue, 17 Sep 2013 17:15:58 +0200 Subject: [PATCH 1/4] PROOF: check if single ds before tokenizing name The specified datasets string is checked to see if it corresponds to one dataset only. If not, it is broken into pieces (with "|," as separators) and each token is considered a separated dataset name. This should fix an issue where a single dataset name might contain commas. --- proof/proof/src/TProof.cxx | 121 +++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 38 deletions(-) diff --git a/proof/proof/src/TProof.cxx b/proof/proof/src/TProof.cxx index 76163d2a9cbec..dc1fa3c874a33 100644 --- a/proof/proof/src/TProof.cxx +++ b/proof/proof/src/TProof.cxx @@ -5568,7 +5568,7 @@ Long64_t TProof::Process(const char *dsetname, const char *selector, } else if (fSelector) { retval = Process(dset, fSelector, option, nentries, first); } else { - Error("Process", "neither a selecrot file nor a selector object have" + Error("Process", "neither a selector file nor a selector object have" " been specified: cannot process!"); } // Cleanup @@ -12446,49 +12446,94 @@ Int_t TProof::AssertDataSet(TDSet *dset, TList *input, // defined: assume that a dataset, stored on the PROOF master by that // name, should be processed. if (!dataset) { - TString dsns(dsname.Data()), dsn1; - Int_t from1 = 0; - while (dsns.Tokenize(dsn1, from1, "[, ]")) { - TString dsn2, enl; - Int_t from2 = 0; - TFileCollection *fc = 0; - while (dsn1.Tokenize(dsn2, from2, "|")) { - enl = ""; - Int_t ienl = dsn2.Index("?enl="); - if (ienl != kNPOS) { - enl = dsn2(ienl + 5, dsn2.Length()); - dsn2.Remove(ienl); + + // First of all check if the full string (except the "entry list" part) + // is the name of a single existing dataset: if it is, don't break it + // into parts + TString dsns( dsname.Data() ), enl; + Ssiz_t eli = dsns.Index("?enl="); + TFileCollection *fc = 0; + if (eli != kNPOS) { + dsns.Remove(eli, dsns.Length()-eli); + enl = dsns(eli+5, dsns.Length()); + } + + if (( fc = mgr->GetDataSet(dsns) )) { + + // + // String corresponds to ONE dataset only + // + + TIter nxfi(fc->GetList()); + TFileInfo *fi; + while (( fi = (TFileInfo *)nxfi() )) + fi->SetTitle(dsns.Data()); + dataset = fc; + dsnparse = dsns; // without entry list + + // Adds the entry list (or empty string if not specified) + datasets->Add( new TPair(dataset, new TObjString( enl.Data() )) ); + + } + else { + + // + // String does NOT correspond to one dataset: check if many datasets + // were specified instead + // + + dsns = dsname.Data(); + TString dsn1; + Int_t from1 = 0; + while (dsns.Tokenize(dsn1, from1, "[, ]")) { + TString dsn2; + Int_t from2 = 0; + fc = 0; + while (dsn1.Tokenize(dsn2, from2, "|")) { + enl = ""; + Int_t ienl = dsn2.Index("?enl="); + if (ienl != kNPOS) { + enl = dsn2(ienl + 5, dsn2.Length()); + dsn2.Remove(ienl); + } + if ((fc = mgr->GetDataSet(dsn2.Data()))) { + // Save dataset name in TFileInfo's title to use it in TDset + TIter nxfi(fc->GetList()); + TFileInfo *fi; + while ((fi = (TFileInfo *) nxfi())) { fi->SetTitle(dsn2.Data()); } + dsnparse = dsn2; + if (!dataset) { + // This is our dataset + dataset = fc; + } else { + // Add it to the dataset + dataset->Add(fc); + SafeDelete(fc); + } + } } - if ((fc = mgr->GetDataSet(dsn2.Data()))) { - // Save dataset name in TFileInfo's title to use it in TDset - TIter nxfi(fc->GetList()); - TFileInfo *fi = 0; - while ((fi = (TFileInfo *) nxfi())) { fi->SetTitle(dsn2.Data()); } - dsnparse = dsn2; - if (!dataset) { - // This is our dataset - dataset = fc; + // The dataset name(s) in the first element + if (dataset) { + if (dataset->GetList()->First()) + ((TFileInfo *)(dataset->GetList()->First()))->SetTitle(dsn1.Data()); + // Add it to the local list + if (enl.IsNull()) { + datasets->Add(new TPair(dataset, new TObjString(""))); } else { - // Add it to the dataset - dataset->Add(fc); - SafeDelete(fc); + datasets->Add(new TPair(dataset, new TObjString(enl.Data()))); } } + // Reset the pointer + dataset = 0; } - // The dataset name(s) in the first element - if (dataset) { - if (dataset->GetList()->First()) - ((TFileInfo *)(dataset->GetList()->First()))->SetTitle(dsn1.Data()); - // Add it to the local list - if (enl.IsNull()) { - datasets->Add(new TPair(dataset, new TObjString(""))); - } else { - datasets->Add(new TPair(dataset, new TObjString(enl.Data()))); - } - } - // Reset the pointer - dataset = 0; + } + + // + // At this point the dataset(s) to be processed, if any, are found in the + // "datasets" variable + // + if (!datasets || datasets->GetSize() <= 0) { emsg.Form("no dataset(s) found on the master corresponding to: %s", dsname.Data()); return -1; From e4daba23b0d69f2ba0e231b5be9aa3ba34bccbaa Mon Sep 17 00:00:00 2001 From: Dario Berzano Date: Thu, 19 Sep 2013 16:03:07 +0200 Subject: [PATCH 2/4] Minor PROOF cleanups --- proof/proof/src/TProof.cxx | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/proof/proof/src/TProof.cxx b/proof/proof/src/TProof.cxx index dc1fa3c874a33..d55ca60eb9b55 100644 --- a/proof/proof/src/TProof.cxx +++ b/proof/proof/src/TProof.cxx @@ -12452,7 +12452,7 @@ Int_t TProof::AssertDataSet(TDSet *dset, TList *input, // into parts TString dsns( dsname.Data() ), enl; Ssiz_t eli = dsns.Index("?enl="); - TFileCollection *fc = 0; + TFileCollection *fc; if (eli != kNPOS) { dsns.Remove(eli, dsns.Length()-eli); enl = dsns(eli+5, dsns.Length()); @@ -12488,7 +12488,6 @@ Int_t TProof::AssertDataSet(TDSet *dset, TList *input, while (dsns.Tokenize(dsn1, from1, "[, ]")) { TString dsn2; Int_t from2 = 0; - fc = 0; while (dsn1.Tokenize(dsn2, from2, "|")) { enl = ""; Int_t ienl = dsn2.Index("?enl="); @@ -12517,11 +12516,7 @@ Int_t TProof::AssertDataSet(TDSet *dset, TList *input, if (dataset->GetList()->First()) ((TFileInfo *)(dataset->GetList()->First()))->SetTitle(dsn1.Data()); // Add it to the local list - if (enl.IsNull()) { - datasets->Add(new TPair(dataset, new TObjString(""))); - } else { - datasets->Add(new TPair(dataset, new TObjString(enl.Data()))); - } + datasets->Add(new TPair(dataset, new TObjString(enl.Data()))); } // Reset the pointer dataset = 0; From 1f5f75a607c9cd06e93bf0d4212e80b8933fbca0 Mon Sep 17 00:00:00 2001 From: Dario Berzano Date: Thu, 19 Sep 2013 17:18:06 +0200 Subject: [PATCH 3/4] PROOF: check for unparsed AliEn dataset names Unparsed portions in data specification trigger an error. Also, some generic functions have been declared static. --- proof/proof/inc/TDataSetManagerAliEn.h | 6 +- proof/proof/src/TDataSetManagerAliEn.cxx | 121 ++++++++++++++++------- 2 files changed, 86 insertions(+), 41 deletions(-) diff --git a/proof/proof/inc/TDataSetManagerAliEn.h b/proof/proof/inc/TDataSetManagerAliEn.h index 17f4c4d2b19ba..45a117f4b2ce0 100644 --- a/proof/proof/inc/TDataSetManagerAliEn.h +++ b/proof/proof/inc/TDataSetManagerAliEn.h @@ -103,13 +103,13 @@ class TDataSetManagerAliEn : public TDataSetManager { TDataSetManagerFile *fCache; Long_t fCacheExpire_s; - std::vector *ExpandRunSpec(TString &runSpec); + static std::vector *ExpandRunSpec(TString &runSpec); - virtual Bool_t ParseCustomFindUri(TString &uri, TString &basePath, + static Bool_t ParseCustomFindUri(TString &uri, TString &basePath, TString &fileName, TString &anchor, TString &treeName, TString ®exp); - virtual Bool_t ParseOfficialDataUri(TString &uri, Bool_t sim, + static Bool_t ParseOfficialDataUri(TString &uri, Bool_t sim, TString &period, Int_t &year, std::vector *&runList, Bool_t &esd, Int_t &aodNum, TString &pass); diff --git a/proof/proof/src/TDataSetManagerAliEn.cxx b/proof/proof/src/TDataSetManagerAliEn.cxx index bb66409cb0a02..97dd37febd959 100644 --- a/proof/proof/src/TDataSetManagerAliEn.cxx +++ b/proof/proof/src/TDataSetManagerAliEn.cxx @@ -19,6 +19,7 @@ ////////////////////////////////////////////////////////////////////////// #include "TDataSetManagerAliEn.h" +#include "TError.h" ClassImp(TAliEnFind); @@ -541,43 +542,65 @@ Bool_t TDataSetManagerAliEn::ParseCustomFindUri(TString &uri, TString ®exp) { + // Copy URI to a dummy URI parsed to look for unrecognized stuff; initial + // part is known ("Find;") and stripped + TString checkUri = uri(5, uri.Length()); + // Base path - TPMERegexp reBasePath("(^|;)BasePath=([^; ]+)(;|$)"); - if (reBasePath.Match(uri) != 4) { - Error("ParseCustomFindUri", "Base path not specified"); + TPMERegexp reBasePath("(^|;)(BasePath=([^; ]+))(;|$)"); + if (reBasePath.Match(uri) != 5) { + ::Error("TDataSetManagerAliEn::ParseCustomFindUri", + "Base path not specified"); return kFALSE; } - basePath = reBasePath[2]; + checkUri.ReplaceAll(reBasePath[2], ""); + basePath = reBasePath[3]; // File name - TPMERegexp reFileName("(^|;)FileName=([^; ]+)(;|$)"); - if (reFileName.Match(uri) != 4) { - Error("ParseCustomFindUri", "File name not specified"); + TPMERegexp reFileName("(^|;)(FileName=([^; ]+))(;|$)"); + if (reFileName.Match(uri) != 5) { + ::Error("TDataSetManagerAliEn::ParseCustomFindUri", + "File name not specified"); return kFALSE; } - fileName = reFileName[2]; + checkUri.ReplaceAll(reFileName[2], ""); + fileName = reFileName[3]; // Anchor (optional) - TPMERegexp reAnchor("(^|;)Anchor=([^; ]+)(;|$)"); - if (reAnchor.Match(uri) != 4) + TPMERegexp reAnchor("(^|;)(Anchor=([^; ]+))(;|$)"); + if (reAnchor.Match(uri) != 5) anchor = ""; - else - anchor = reAnchor[2]; + else { + checkUri.ReplaceAll(reAnchor[2], ""); + anchor = reAnchor[3]; + } // Tree name (optional) - TPMERegexp reTreeName("(^|;)Tree=(/[^; ]+)(;|$)"); - if (reTreeName.Match(uri) != 4) + TPMERegexp reTreeName("(^|;)(Tree=(/[^; ]+))(;|$)"); + if (reTreeName.Match(uri) != 5) treeName = ""; - else - treeName = reTreeName[2]; + else { + checkUri.ReplaceAll(reTreeName[2], ""); + treeName = reTreeName[3]; + } // Regexp (optional) - TPMERegexp reRegexp("(^|;)Regexp=([^; ]+)(;|$)"); - if (reRegexp.Match(uri) != 4) + TPMERegexp reRegexp("(^|;)(Regexp=([^; ]+))(;|$)"); + if (reRegexp.Match(uri) != 5) regexp = ""; - else - regexp = reRegexp[2]; + else { + checkUri.ReplaceAll(reRegexp[2], ""); + regexp = reRegexp[3]; + } + // Check for unparsed stuff; parsed stuff has been stripped from checkUri + checkUri.ReplaceAll(";", ""); + checkUri.ReplaceAll(" ", ""); + if (!checkUri.IsNull()) { + ::Error("TDataSetManagerAliEn::ParseCustomFindUri", + "There are unrecognized parameters in the dataset find string"); + return kFALSE; + } return kTRUE; } @@ -587,62 +610,84 @@ Bool_t TDataSetManagerAliEn::ParseOfficialDataUri(TString &uri, Bool_t sim, Int_t &aodNum, TString &pass) { + // Copy URI to a dummy URI parsed to look for unrecognized stuff + TString checkUri; + + // Strip the initial part (either "Data;" or "Sim;") + { + Ssiz_t idx = uri.Index(";"); + checkUri = uri(idx, uri.Length()); + } + // // Parse LHC period // - TPMERegexp rePeriod("(^|;)Period=(LHC([0-9]{2})[^;]*)(;|$)"); - if (rePeriod.Match(uri) != 5) { - Error("ParseOfficialDataUri", + TPMERegexp rePeriod("(^|;)(Period=(LHC([0-9]{2})[^;]*))(;|$)"); + if (rePeriod.Match(uri) != 6) { + ::Error("TDataSetManagerAliEn::ParseOfficialDataUri", "LHC period not specified (e.g. Period=LHC10h)"); return kFALSE; } - period = rePeriod[2]; - year = rePeriod[3].Atoi() + 2000; + checkUri.ReplaceAll(rePeriod[2], ""); + period = rePeriod[3]; + year = rePeriod[4].Atoi() + 2000; // // Parse data format (ESDs or AODXXX) // - TPMERegexp reFormat("(^|;)Variant=(ESDs?|AOD([0-9]{3}))(;|$)"); - if (reFormat.Match(uri) != 5) { - Error("ParseOfficialDataUri", + TPMERegexp reFormat("(^|;)(Variant=(ESDs?|AOD([0-9]{3})))(;|$)"); + if (reFormat.Match(uri) != 6) { + ::Error("TDataSetManagerAliEn::ParseOfficialDataUri", "Data variant (e.g., Variant=ESD or AOD079) not specified"); return kFALSE; } - if (reFormat[2].BeginsWith("ESD")) esd = kTRUE; + checkUri.ReplaceAll(reFormat[2], ""); + if (reFormat[3].BeginsWith("ESD")) esd = kTRUE; else { esd = kFALSE; - aodNum = reFormat[3].Atoi(); + aodNum = reFormat[4].Atoi(); } // // Parse pass: mandatory on Data, useless on Sim // - TPMERegexp rePass("(^|;)Pass=([a-zA-Z_0-9-]+)(;|$)"); - if ((rePass.Match(uri) != 4) && (!sim)) { - Error("ParseOfficialDataUri", + TPMERegexp rePass("(^|;)(Pass=([a-zA-Z_0-9-]+))(;|$)"); + if ((!sim) && (rePass.Match(uri) != 5)) { + ::Error("TDataSetManagerAliEn::ParseOfficialDataUri", "Pass (e.g., Pass=cpass1_muon) is mandatory on real data"); return kFALSE; } - pass = rePass[2]; + checkUri.ReplaceAll(rePass[2], ""); + pass = rePass[3]; // // Parse run list // - TPMERegexp reRun("(^|;)Run=([0-9,-]+)(;|$)"); - if (reRun.Match(uri) != 4) { - Error("ParseOfficialDataUri", + TPMERegexp reRun("(^|;)(Run=([0-9,-]+))(;|$)"); + if (reRun.Match(uri) != 5) { + ::Error("TDataSetManagerAliEn::ParseOfficialDataUri", "Run or run range not specified (e.g., Run=139104-139107,139306)"); return kFALSE; } - TString runListStr = reRun[2]; + checkUri.ReplaceAll(reRun[2], ""); + TString runListStr = reRun[3]; runList = ExpandRunSpec(runListStr); // must be freed by caller + // Check for unparsed stuff; parsed stuff has been stripped from checkUri + checkUri.ReplaceAll(";", ""); + checkUri.ReplaceAll(" ", ""); + if (!checkUri.IsNull()) { + ::Error("TDataSetManagerAliEn::ParseOfficialDataUri", + "There are unrecognized parameters in dataset string"); + return kFALSE; + } + return kTRUE; } From 11a5b03023943b032c96863d9547166bc713f84d Mon Sep 17 00:00:00 2001 From: Dario Berzano Date: Thu, 19 Sep 2013 19:18:04 +0200 Subject: [PATCH 4/4] PROOF: fixed issue with multi-ds and entry list --- proof/proof/src/TProof.cxx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/proof/proof/src/TProof.cxx b/proof/proof/src/TProof.cxx index d55ca60eb9b55..3b5513ad53e63 100644 --- a/proof/proof/src/TProof.cxx +++ b/proof/proof/src/TProof.cxx @@ -12454,11 +12454,17 @@ Int_t TProof::AssertDataSet(TDSet *dset, TList *input, Ssiz_t eli = dsns.Index("?enl="); TFileCollection *fc; if (eli != kNPOS) { - dsns.Remove(eli, dsns.Length()-eli); enl = dsns(eli+5, dsns.Length()); + dsns.Remove(eli, dsns.Length()-eli); } - if (( fc = mgr->GetDataSet(dsns) )) { + // Check if the entry list is valid. If it has spaces, commas, or pipes, + // it is not considered as valid and we revert to the "multiple datasets" + // case + Bool_t validEnl = ((enl.Index("|") == kNPOS) && + (enl.Index(",") == kNPOS) && (enl.Index(" ") == kNPOS)); + + if (validEnl && (( fc = mgr->GetDataSet(dsns) ))) { // // String corresponds to ONE dataset only