This repository has been archived by the owner on Dec 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #22 from databio/dev_newplots
Some changes
- Loading branch information
Showing
7 changed files
with
167 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
bedfile statistics generating pipeline | ||
""" | ||
|
||
__author__ = ["Michal Stolarczyk", "Ognen Duzlevski"] | ||
__author__ = ["Michal Stolarczyk", "Ognen Duzlevski", "Jose Verdezoto"] | ||
__email__ = "[email protected]" | ||
__version__ = "0.0.1" | ||
|
||
|
@@ -12,41 +12,64 @@ | |
import json | ||
import yaml | ||
import os | ||
import warnings | ||
|
||
import pypiper | ||
from bbconf.const import * | ||
import pypiper | ||
import bbconf | ||
|
||
parser = ArgumentParser(description="A pipeline to read a file in BED format and produce metadata in JSON format.") | ||
parser = ArgumentParser( | ||
description="A pipeline to read a file in BED format and produce metadata " | ||
"in JSON format.") | ||
|
||
parser.add_argument('--bedfile', help='a full path to bed file to process', required=True) | ||
parser.add_argument("--bedbase-config", dest="bedbase_config", type=str, required=False, default=None, | ||
help="a path to the bedbase configuratiion file") | ||
parser.add_argument("-y", "--sample-yaml", dest="sample_yaml", type=str, required=False, | ||
help="a yaml config file with sample attributes to pass on more metadata into the database") | ||
parser.add_argument( | ||
'--bedfile', help='a full path to bed file to process', required=True) | ||
parser.add_argument( | ||
'--open-signal-matrix', type=str, required=False, default=None, | ||
help='a full path to the openSignalMatrix required for the tissue ' | ||
'specificity plots') | ||
parser.add_argument( | ||
"--bedbase-config", dest="bedbase_config", type=str, default=None, | ||
help="a path to the bedbase configuratiion file") | ||
parser.add_argument( | ||
"-y", "--sample-yaml", dest="sample_yaml", type=str, required=False, | ||
help="a yaml config file with sample attributes to pass on more metadata " | ||
"into the database") | ||
exclusive_group = parser.add_mutually_exclusive_group() | ||
exclusive_group.add_argument('--no-db-commit', action='store_true', | ||
help='whether the JSON commit to the database should be skipped') | ||
exclusive_group.add_argument('--just-db-commit', action='store_true', | ||
help='whether just to commit the JSON to the database') | ||
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "looper", "ngs"]) | ||
exclusive_group.add_argument( | ||
'--no-db-commit', action='store_true', | ||
help='whether the JSON commit to the database should be skipped') | ||
exclusive_group.add_argument( | ||
'--just-db-commit', action='store_true', | ||
help='whether just to commit the JSON to the database') | ||
parser = pypiper.add_pypiper_args(parser, | ||
groups=["pypiper", "common", "looper", "ngs"]) | ||
|
||
args = parser.parse_args() | ||
|
||
bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config)) | ||
|
||
bed_digest = md5(open(args.bedfile, 'rb').read()).hexdigest() | ||
bedfile_name = os.path.split(args.bedfile)[1] | ||
fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0] # twice since there are 2 exts | ||
outfolder = os.path.abspath(os.path.join(bbc[CFG_PATH_KEY][CFG_BEDSTAT_OUTPUT_KEY], bed_digest)) | ||
# need to split twice since there are 2 exts | ||
fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0] | ||
outfolder = os.path.abspath(os.path.join( | ||
bbc[CFG_PATH_KEY][CFG_BEDSTAT_OUTPUT_KEY], bed_digest)) | ||
json_file_path = os.path.abspath(os.path.join(outfolder, fileid + ".json")) | ||
|
||
if not args.just_db_commit: | ||
pm = pypiper.PipelineManager(name="bedstat-pipeline", outfolder=outfolder, args=args) | ||
rscript_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "tools", "regionstat.R") | ||
assert os.path.exists(rscript_path), FileNotFoundError("'{}' script not found".format(rscript_path)) | ||
cmd_vars = dict(rscript=rscript_path, bed=args.bedfile, id=fileid, out=outfolder, genome=args.genome_assembly, digest=bed_digest) | ||
command = "Rscript {rscript} --bedfile={bed} --fileId={id} --outputfolder={out} --genome={genome} --digest={digest}".format(**cmd_vars) | ||
pm = pypiper.PipelineManager(name="bedstat-pipeline", outfolder=outfolder, | ||
args=args) | ||
rscript_path = os.path.join(os.path.dirname( | ||
os.path.dirname(os.path.abspath(__file__))), "tools", "regionstat.R") | ||
assert os.path.exists(rscript_path), \ | ||
FileNotFoundError("'{}' script not found".format(rscript_path)) | ||
cmd_vars = dict(rscript=rscript_path, bed=args.bedfile, id=fileid, | ||
matrix=args.open_signal_matrix, out=outfolder, | ||
genome=args.genome_assembly, digest=bed_digest) | ||
command = "Rscript {rscript} --bedfile={bed} --fileId={id} " \ | ||
"--openSignalMatrix={matrix} --outputfolder={out} " \ | ||
"--genome={genome} --digest={digest}".format(**cmd_vars) | ||
pm.run(cmd=command, target=json_file_path) | ||
pm.stop_pipeline() | ||
|
||
|
@@ -68,13 +91,15 @@ | |
except KeyError: | ||
print("'{}' metadata not available".format(key)) | ||
else: | ||
warnings.warn("Specified sample_yaml path does not exist: {}".format(args.sample_yaml)) | ||
warnings.warn("Specified sample_yaml path does not exist: {}". | ||
format(args.sample_yaml)) | ||
# enrich the data from R with the data from the sample line itself | ||
# the bedfile_path below needs to be overwritten in Elastic in case the pipeline run was split | ||
# into two computing environments. Currently used for the development. | ||
# This concept leverages the potability introduced by environment variable in the | ||
# bedfile path in the PEP. Locally the environment variable points to a different path than on | ||
# the compute cluster where the heavy calculations happen. | ||
# the bedfile_path below needs to be overwritten in Elastic in case the | ||
# pipeline run was split into two computing environments. Currently used | ||
# for the development. This concept leverages the potability introduced by | ||
# environment variable in the bedfile path in the PEP. Locally the | ||
# environment variable points to a different path than on the compute | ||
# cluster where the heavy calculations happen. | ||
data[BEDFILE_PATH_KEY] = [args.bedfile] | ||
print("Data: {}".format(data)) | ||
bbc.insert_bedfiles_data(data=data, doc_id=bed_digest) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,11 @@ | ||
protocol_mapping: | ||
bedstat: bedstat | ||
|
||
pipelines: | ||
bedstat: | ||
name: BEDSTAT | ||
path: pipeline/bedstat.py | ||
schema: pep_schema.yaml | ||
looper_args: True | ||
command_template: > | ||
{ pipeline.path } | ||
--bedfile { sample.output_file_path } | ||
--genome { sample.genome } | ||
pipeline_name: BEDSTAT | ||
pipeline_type: sample | ||
path: pipeline/bedstat.py | ||
input_schema: http://schema.databio.org/pipelines/bedstat.yaml | ||
command_template: > | ||
{pipeline.path} | ||
--bedfile {sample.output_file_path} | ||
--genome {sample.genome} | ||
--sample-yaml {sample.yaml_file} | ||
{% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %} | ||
{% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
pep_version: 2.0.0 | ||
sample_table: path | ||
looper: | ||
output_dir: output | ||
sample_modifiers: | ||
append: | ||
pipeline_interfaces: ../pipeline_interface_new.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters