Skip to content

Commit 9fcef44

Browse files
committed
New instructions and own conda enviroment
1 parent 263a9be commit 9fcef44

File tree

5 files changed

+124
-15
lines changed

5 files changed

+124
-15
lines changed

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
# Python scripts for running GraphChainer experiments
22

3-
- Use same conda enviroment as GraphChainer's
4-
- Modify `.env` file with the following variables `GRAPHALIGNER`, `GRAPHCHAINER`, `MINIGRAPH`, `MINICHAIN` (paths to the corresponding binaries)
3+
- `conda env create -f environment.yml`
4+
- `conda acctivate GraphChainerScripts`
5+
- Compile and place the aligner's binaries on the `bin/` folder (they are assumed to be called `GraphAligner, GraphChainer, minigraph, minichain` respectively)
56
- Place Badread's repo in this directory `git clone https://github.com/rrwick/Badread.git`
67

7-
## Some instructions
8+
## Instructions
89

910
- Every python script has its own instructions/helper
10-
- The scripts should be run in the order `[generate_sim_reads.py], run_experiment.py, compute_summary.py, compute_metrics.py`
11+
- The scripts should be run in the order `[generate_sim_reads.py], run_experiment.py, compute_summary.py, compute_metrics.py` (if real reads are used the first script is skipped)
12+
-- `generate_sim_reads.py` : Takes a graph as input and generates simulated reads from a random path of the graph using `GraphChainer` random path generator and `Bardread` simulator.
13+
-- `run_experiment.py` : Runs the four tools on the specified graph and fastq files.
14+
-- `compute_summary.py` : Computes csv tables with the metrics used in the paper, it receives as input the graph and the fastq file, or in the case of simulated reads, the files output by `generate_sim_reads.py`.
15+
-- `compute_metrics.py` : Plots a graph with the summary (csv) files output by `compute_sumary.py`

compute_summary.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def compute_overlap(read, node):
229229

230230
def compute_summary(args):
231231

232-
vertex_labels, edges = load_graph(args.gfa_graph)
232+
vertex_labels, edges = load_graph(args.graph)
233233
reads, ref_seq, ref_seq_rev_comp, ref_path = load_reads_and_ref(args.fastq, args.fasta, args.path)
234234
node_limits = list(cumsum([len(vertex_labels[v]) for v in ref_path]))
235235

@@ -340,7 +340,7 @@ def compute_edit_kernel(tid):
340340
)
341341

342342
requiredNamed = parser.add_argument_group('required arguments')
343-
requiredNamed.add_argument('-gfa', '--gfa-graph', type=str, help='Input gfa file', required=True)
343+
requiredNamed.add_argument('-g', '--graph', type=str, help='Input gfa file', required=True)
344344
requiredNamed.add_argument('-fq', '--fastq', type=str, help='Input fastq file', required=True)
345345
requiredNamed.add_argument(
346346
'-als', '--alignments', type=str, help='Output gam/gaf files (with extension, each)', required=True, nargs='+'

environment.yml

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
name: GraphChainerScripts
2+
channels:
3+
- anaconda
4+
- conda-forge
5+
- https://conda.anaconda.org/gurobi
6+
- defaults
7+
dependencies:
8+
- _libgcc_mutex=0.1=main
9+
- _openmp_mutex=5.1=1_gnu
10+
- brotli=1.0.9=h166bdaf_7
11+
- brotli-bin=1.0.9=h166bdaf_7
12+
- bzip2=1.0.8=h7b6447c_0
13+
- ca-certificates=2022.12.7=ha878542_0
14+
- certifi=2022.12.7=pyhd8ed1ab_0
15+
- click=8.1.3=unix_pyhd8ed1ab_2
16+
- contourpy=1.0.5=py310hdb19cb5_0
17+
- cycler=0.11.0=pyhd8ed1ab_0
18+
- dbus=1.13.18=hb2f20db_0
19+
- expat=2.2.10=h9c3ff4c_0
20+
- fontconfig=2.14.1=hef1e5e3_0
21+
- fonttools=4.25.0=pyhd3eb1b0_0
22+
- freetype=2.10.4=h0708190_1
23+
- giflib=5.2.1=h36c2ea0_2
24+
- glib=2.69.1=h4ff587b_1
25+
- gst-plugins-base=1.14.1=h6a678d5_1
26+
- gstreamer=1.14.1=h5eee18b_1
27+
- icu=58.2=hf484d3e_1000
28+
- joblib=1.1.1=py310h06a4308_0
29+
- jpeg=9e=h166bdaf_1
30+
- keyutils=1.6.1=h166bdaf_0
31+
- kiwisolver=1.4.4=py310h6a678d5_0
32+
- krb5=1.19.3=h3790be6_0
33+
- lcms2=2.12=h3be6417_0
34+
- ld_impl_linux-64=2.38=h1181459_1
35+
- lerc=3.0=h295c915_0
36+
- libblas=3.9.0=15_linux64_openblas
37+
- libbrotlicommon=1.0.9=h166bdaf_7
38+
- libbrotlidec=1.0.9=h166bdaf_7
39+
- libbrotlienc=1.0.9=h166bdaf_7
40+
- libcblas=3.9.0=15_linux64_openblas
41+
- libclang=10.0.1=default_hb85057a_2
42+
- libdeflate=1.17=h5eee18b_0
43+
- libedit=3.1.20191231=he28a2e2_2
44+
- libevent=2.1.12=h8f2d780_0
45+
- libffi=3.3=he6710b0_2
46+
- libgcc-ng=11.2.0=h1234567_1
47+
- libgfortran-ng=12.2.0=h69a702a_19
48+
- libgfortran5=12.2.0=h337968e_19
49+
- libgomp=11.2.0=h1234567_1
50+
- liblapack=3.9.0=15_linux64_openblas
51+
- libllvm10=10.0.1=he513fc3_3
52+
- libopenblas=0.3.20=pthreads_h78a6416_0
53+
- libpng=1.6.39=h5eee18b_0
54+
- libpq=12.9=h16c4e8d_3
55+
- libprotobuf=3.20.3=he621ea3_0
56+
- libstdcxx-ng=11.2.0=h1234567_1
57+
- libtiff=4.5.0=h6a678d5_2
58+
- libuuid=1.41.5=h5eee18b_0
59+
- libwebp=1.2.4=h11a3e52_1
60+
- libwebp-base=1.2.4=h5eee18b_1
61+
- libxcb=1.15=h7f8727e_0
62+
- libxkbcommon=1.0.1=hfa300c1_0
63+
- libxml2=2.9.14=h74e7548_0
64+
- libxslt=1.1.35=h4e12654_0
65+
- lz4-c=1.9.3=h9c3ff4c_1
66+
- matplotlib=3.7.0=py310h06a4308_0
67+
- matplotlib-base=3.7.0=py310h1128e8f_0
68+
- munkres=1.1.4=pyh9f0ad1d_0
69+
- ncurses=6.4=h6a678d5_0
70+
- nspr=4.33=h295c915_0
71+
- nss=3.74=h0370c37_0
72+
- openssl=1.1.1s=h7f8727e_0
73+
- packaging=23.0=pyhd8ed1ab_0
74+
- pcre=8.45=h9c3ff4c_0
75+
- pillow=9.4.0=py310h6a678d5_0
76+
- pip=23.0.1=py310h06a4308_0
77+
- ply=3.11=py_1
78+
- protobuf=3.20.3=py310h6a678d5_0
79+
- pyparsing=3.0.9=pyhd8ed1ab_0
80+
- pyqt=5.15.7=py310h6a678d5_1
81+
- python=3.10.0=h12debd9_5
82+
- python-dateutil=2.8.2=pyhd8ed1ab_0
83+
- python-dotenv=1.0.0=pyhd8ed1ab_0
84+
- python_abi=3.10=2_cp310
85+
- qt-main=5.15.2=h327a75a_7
86+
- qt-webengine=5.15.9=hd2b0992_4
87+
- qtwebkit=5.212=h4eab89a_4
88+
- readline=8.2=h5eee18b_0
89+
- setuptools=65.6.3=py310h06a4308_0
90+
- sip=6.6.2=py310h6a678d5_0
91+
- six=1.16.0=pyh6c4a22f_0
92+
- sqlite=3.40.1=h5082296_0
93+
- tk=8.6.12=h1ccaba5_0
94+
- toml=0.10.2=pyhd8ed1ab_0
95+
- tornado=6.1=py310h5764c6d_3
96+
- tzdata=2022g=h04d1e81_0
97+
- wheel=0.38.4=py310h06a4308_0
98+
- xz=5.2.10=h5eee18b_1
99+
- zlib=1.2.13=h5eee18b_0
100+
- zstd=1.5.2=ha4553b6_0
101+
- pip:
102+
- edlib==1.3.9
103+
- numpy==1.24.2
104+
- pyqt5-sip==12.11.0
105+
- scipy==1.10.1

generate_sim_reads.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def generate_sim_reads(args):
1111
graphchainer = getenv('GRAPHCHAINER')
1212

1313
run(
14-
f'{graphchainer} --generate-path --generate-path-seed {args.seed} -g {args.vg_graph} -f {args.fasta} -a tmp.gam'
14+
f'{graphchainer} --generate-path --generate-path-seed {args.seed} -g {args.graph} -f {args.fasta} -a tmp.gam'
1515
.split()
1616
)
1717

@@ -29,15 +29,15 @@ def generate_sim_reads(args):
2929

3030
parser = ArgumentParser(
3131
description='''
32-
Generates simulated reads from a random path of an input vg file using the Badread simulator.
32+
Generates simulated reads from a random path of an input vg/gfa file using the Badread simulator.
3333
Badread parameters are fixed to --identity 85,95,5 --length 15000,10000 --error_model pacbio2016
3434
--junk_reads 0 --random_reads 0 --chimeras 0.
3535
''',
3636
formatter_class=RawTextHelpFormatter
3737
)
3838

3939
requiredNamed = parser.add_argument_group('required arguments')
40-
requiredNamed.add_argument('-vg', '--vg-graph', type=str, help='Input vg file', required=True)
40+
requiredNamed.add_argument('-g', '--graph', type=str, help='Input vg/gfa file', required=True)
4141
requiredNamed.add_argument('-fq', '--fastq', type=str, help='Output fastq file', required=True)
4242

4343
parser.add_argument('-s', '--seed', type=int, help='Seed for random path generator and Badread', default=0)

run_experiment.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,25 @@ def run_experiment(args):
99

1010
graphaligner = getenv('GRAPHALIGNER')
1111
run(
12-
f'{graphaligner} -t {args.threads} -x vg -f {args.fastq} -g {args.vg_graph} '
12+
f'{graphaligner} -t {args.threads} -x vg -f {args.fastq} -g {args.graph} '
1313
f'-a {args.alignment}_graphaligner.gam'.split()
1414
)
1515

1616
graphchainer = getenv('GRAPHCHAINER')
1717
run(
18-
f'{graphchainer} -t {args.threads} -f {args.fastq} -g {args.vg_graph} '
18+
f'{graphchainer} -t {args.threads} -f {args.fastq} -g {args.graph} '
1919
f'-a {args.alignment}_graphchainer.gam '.split()
2020
)
2121

2222
minigraph = getenv('MINIGRAPH')
2323
run(
24-
f'{minigraph} -t {args.threads} -c {args.gfa_graph} {args.fastq}'.split(),
24+
f'{minigraph} -t {args.threads} -c {args.graph} {args.fastq}'.split(),
2525
stdout=open(f'{args.alignment}_minigraph.gaf', 'wb')
2626
)
2727

2828
minichain = getenv('MINICHAIN')
2929
run(
30-
f'{minichain} -t {args.threads} -c {args.gfa_graph} {args.fastq}'.split(),
30+
f'{minichain} -t {args.threads} -c {args.graph} {args.fastq}'.split(),
3131
stdout=open(f'{args.alignment}_minichain.gaf', 'wb')
3232
)
3333

@@ -42,8 +42,7 @@ def run_experiment(args):
4242
)
4343

4444
requiredNamed = parser.add_argument_group('required arguments')
45-
requiredNamed.add_argument('-vg', '--vg-graph', type=str, help='Input vg file', required=True)
46-
requiredNamed.add_argument('-gfa', '--gfa-graph', type=str, help='Input gfa file', required=True)
45+
requiredNamed.add_argument('-g', '--graph', type=str, help='Input vg/gfa file', required=True)
4746
requiredNamed.add_argument('-fq', '--fastq', type=str, help='Input fastq file', required=True)
4847
requiredNamed.add_argument(
4948
'-a', '--alignment', type=str, help='Output gam/gaf files (without extension)', required=True

0 commit comments

Comments
 (0)