|
24 | 24 | from data_pipeline.datasets.exac.exac_regional_missense_constraint import prepare_exac_regional_missense_constraint |
25 | 25 | from data_pipeline.datasets.gnomad_v2.gnomad_v2_constraint import prepare_gnomad_v2_constraint |
26 | 26 | from data_pipeline.datasets.gnomad_v2.gnomad_v2_regional_missense_constraint import ( |
27 | | - prepare_gnomad_v2_regional_missense_constraint, |
| 27 | + prepare_gnomad_regional_missense_constraint, |
28 | 28 | ) |
29 | 29 |
|
30 | 30 | from data_pipeline.pipelines.variant_cooccurrence_counts import ( |
|
256 | 256 | { |
257 | 257 | "gtex_struct_path": pipeline.get_task("prepare_gtex_v10_expression_data"), |
258 | 258 | }, |
| 259 | + { |
| 260 | + "exclude_v10_tissues": True, |
| 261 | + }, |
259 | 262 | ) |
260 | 263 |
|
261 | 264 | pipeline.add_task( |
|
315 | 318 |
|
316 | 319 | pipeline.add_task( |
317 | 320 | "prepare_gnomad_v2_regional_missense_constraint", |
318 | | - prepare_gnomad_v2_regional_missense_constraint, |
| 321 | + prepare_gnomad_regional_missense_constraint, |
319 | 322 | f"/{constraint_subdir}/gnomad_v2_regional_missense_constraint.ht", |
320 | | - {"path": "gs://gcp-public-data--gnomad/release/2.1.1/regional_missense_constraint/gnomad_v2.1.1_rmc.ht"}, |
| 323 | + { |
| 324 | + "path": "gs://gcp-public-data--gnomad/release/2.1.1/regional_missense_constraint/gnomad_v2.1.1_rmc.ht", |
| 325 | + }, |
| 326 | +) |
| 327 | + |
| 328 | +pipeline.add_task( |
| 329 | + "prepare_gnomad_v4_regional_missense_constraint", |
| 330 | + prepare_gnomad_regional_missense_constraint, |
| 331 | + f"/{constraint_subdir}/gnomad_v4_regional_missense_constraint.ht", |
| 332 | + { |
| 333 | + "path": "gs://gnomad-v4-data-pipeline/inputs/regional_missense_constraint/rmc_browser.ht", |
| 334 | + }, |
321 | 335 | ) |
322 | 336 |
|
323 | 337 | ############################################### |
@@ -377,7 +391,7 @@ def annotate_with_preferred_transcript(table_path): |
377 | 391 | "exac_constraint": pipeline.get_task("prepare_exac_constraint"), |
378 | 392 | "exac_regional_missense_constraint": pipeline.get_task("prepare_exac_regional_missense_constraint"), |
379 | 393 | "gnomad_constraint": pipeline.get_task("prepare_gnomad_v2_constraint"), |
380 | | - "gnomad_v2_regional_missense_constraint": pipeline.get_task("prepare_gnomad_v2_regional_missense_constraint"), |
| 394 | + "gnomad_regional_missense_constraint": pipeline.get_task("prepare_gnomad_v2_regional_missense_constraint"), |
381 | 395 | }, |
382 | 396 | {"join_on": "preferred_transcript_id"}, |
383 | 397 | ) |
@@ -469,19 +483,30 @@ def annotate_with_constraint(genes_path, constraint_path): |
469 | 483 |
|
470 | 484 | pipeline.add_task( |
471 | 485 | "annotate_grch38_genes_step_6", |
472 | | - reject_par_y_genes, |
| 486 | + annotate_table, |
473 | 487 | f"/{genes_subdir}/genes_grch38_annotated_6.ht", |
474 | 488 | { |
475 | | - "genes_path": pipeline.get_task("annotate_grch38_genes_step_5"), |
| 489 | + "table_path": pipeline.get_task("annotate_grch38_genes_step_5"), |
| 490 | + "gnomad_regional_missense_constraint": pipeline.get_task("prepare_gnomad_v4_regional_missense_constraint"), |
| 491 | + }, |
| 492 | + {"join_on": "preferred_transcript_id"}, |
| 493 | +) |
| 494 | + |
| 495 | +pipeline.add_task( |
| 496 | + "annotate_grch38_genes_step_7", |
| 497 | + reject_par_y_genes, |
| 498 | + f"/{genes_subdir}/genes_grch38_annotated_7.ht", |
| 499 | + { |
| 500 | + "genes_path": pipeline.get_task("annotate_grch38_genes_step_6"), |
476 | 501 | }, |
477 | 502 | ) |
478 | 503 |
|
479 | 504 | pipeline.add_task( |
480 | 505 | "remove_grch38_genes_constraint_for_release", |
481 | 506 | remove_gnomad_v4_constraint, |
482 | | - f"/{genes_subdir}/genes_grch38_annotate_5_removed_constraint", |
| 507 | + f"/{genes_subdir}/genes_grch38_annotate_7_removed_constraint", |
483 | 508 | { |
484 | | - "genes_path": pipeline.get_task("annotate_grch38_genes_step_5"), |
| 509 | + "genes_path": pipeline.get_task("annotate_grch38_genes_step_7"), |
485 | 510 | }, |
486 | 511 | ) |
487 | 512 |
|
@@ -548,7 +573,7 @@ def annotate_with_constraint(genes_path, constraint_path): |
548 | 573 | pipeline.set_outputs( |
549 | 574 | { |
550 | 575 | "genes_grch37": "annotate_grch37_genes_step_5", |
551 | | - "genes_grch38": "annotate_grch38_genes_step_6", |
| 576 | + "genes_grch38": "annotate_grch38_genes_step_7", |
552 | 577 | "base_transcripts_grch37": "extract_grch37_transcripts", |
553 | 578 | "base_transcripts_grch38": "extract_grch38_transcripts", |
554 | 579 | "transcripts_grch37": "annotate_grch37_transcripts", |
|
0 commit comments