Skip to content

Data Sources

Federico López Gómez edited this page Mar 9, 2020 · 17 revisions

IMPC ETL Data Sources

DCC XML Files

Experiment files

Expand dataframe Schema

root
 |-- _VALUE: string (nullable = true)
 |-- _procedureID: string (nullable = true)
 |-- mediaParameter: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _URI: string (nullable = true)
 |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _fileType: string (nullable = true)
 |    |    |-- _parameterID: string (nullable = true)
 |    |    |-- parameterStatus: string (nullable = true)
 |-- ontologyParameter: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _parameterID: string (nullable = true)
 |    |    |-- _sequenceID: long (nullable = true)
 |    |    |-- parameterStatus: string (nullable = true)
 |    |    |-- term: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |-- procedureMetadata: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _parameterID: string (nullable = true)
 |    |    |-- parameterStatus: string (nullable = true)
 |    |    |-- value: string (nullable = true)
 |-- seriesMediaParameter: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _parameterID: string (nullable = true)
 |    |    |-- parameterStatus: string (nullable = true)
 |    |    |-- value: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- _URI: string (nullable = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- _fileType: string (nullable = true)
 |    |    |    |    |-- _incrementValue: string (nullable = true)
 |    |    |    |    |-- _link: string (nullable = true)
 |    |    |    |    |-- parameterAssociation: array (nullable = true)
 |    |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |    |-- _Hjid: long (nullable = true)
 |    |    |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |    |    |-- _parameterID: string (nullable = true)
 |    |    |    |    |    |    |-- _sequenceID: long (nullable = true)
 |    |    |    |    |-- procedureMetadata: struct (nullable = true)
 |    |    |    |    |    |-- _parameterID: string (nullable = true)
 |    |    |    |    |    |-- value: double (nullable = true)
 |-- seriesParameter: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _parameterID: string (nullable = true)
 |    |    |-- _unit: string (nullable = true)
 |    |    |-- parameterStatus: string (nullable = true)
 |    |    |-- value: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- _VALUE: string (nullable = true)
 |    |    |    |    |-- _incrementStatus: string (nullable = true)
 |    |    |    |    |-- _incrementValue: string (nullable = true)
 |-- simpleParameter: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _parameterID: string (nullable = true)
 |    |    |-- _sequenceID: long (nullable = true)
 |    |    |-- _unit: string (nullable = true)
 |    |    |-- parameterStatus: string (nullable = true)
 |    |    |-- value: string (nullable = true)
 |-- _dateOfExperiment: string (nullable = true)
 |-- _experimentID: string (nullable = true)
 |-- _sequenceID: string (nullable = true)
 |-- specimenID: string (nullable = true)
 |-- statusCode: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _date: string (nullable = true)
 |-- _centreID: string (nullable = true)
 |-- _pipeline: string (nullable = true)
 |-- _project: string (nullable = true)
 |-- _sourceFile: string (nullable = true)
 |-- _dataSource: string (nullable = true)
 |-- _type: string (nullable = true)

Specimen files

Expand dataframe Schema

root
 |-- _DOB: string (nullable = true)
 |-- _colonyID: string (nullable = true)
 |-- _corrupt_record: string (nullable = true)
 |-- _gender: string (nullable = true)
 |-- _isBaseline: boolean (nullable = true)
 |-- _litterId: string (nullable = true)
 |-- _phenotypingCentre: string (nullable = true)
 |-- _pipeline: string (nullable = true)
 |-- _productionCentre: string (nullable = true)
 |-- _project: string (nullable = true)
 |-- _specimenID: string (nullable = true)
 |-- _strainID: string (nullable = true)
 |-- _zygosity: string (nullable = true)
 |-- genotype: struct (nullable = true)
 |    |-- _MGIAlleleId: string (nullable = true)
 |    |-- _MGIGeneId: string (nullable = true)
 |    |-- _VALUE: string (nullable = true)
 |    |-- _fatherZygosity: string (nullable = true)
 |    |-- _geneSymbol: string (nullable = true)
 |    |-- _motherZygosity: string (nullable = true)
 |-- relatedSpecimen: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _VALUE: string (nullable = true)
 |    |    |-- _relationship: string (nullable = true)
 |    |    |-- _specimenID: string (nullable = true)
 |-- type: string (nullable = false)
 |-- _stage: string (nullable = true)
 |-- _stageUnit: string (nullable = true)

IMPRESS API

Expand dataframe Schema

root
 |-- centreName: string (nullable = true)
 |-- description: string (nullable = true)
 |-- impc: long (nullable = true)
 |-- isActive: boolean (nullable = true)
 |-- isDeleted: boolean (nullable = true)
 |-- isDeprecated: boolean (nullable = true)
 |-- isInternal: boolean (nullable = true)
 |-- isVisible: boolean (nullable = true)
 |-- majorVersion: long (nullable = true)
 |-- minorVersion: long (nullable = true)
 |-- name: string (nullable = true)
 |-- pipelineId: long (nullable = true)
 |-- pipelineKey: string (nullable = true)
 |-- pipelineType: string (nullable = true)
 |-- scheduleCollection: array (nullable = true)
 |    |-- element: long (containsNull = true)
 |-- weight: long (nullable = true)
 |-- scheduleId: long (nullable = true)
 |-- schedule: struct (nullable = true)
 |    |-- isActive: boolean (nullable = true)
 |    |-- isDeprecated: boolean (nullable = true)
 |    |-- pipelineId: long (nullable = true)
 |    |-- procedureCollection: array (nullable = true)
 |    |    |-- element: long (containsNull = true)
 |    |-- scheduleId: long (nullable = true)
 |    |-- stage: string (nullable = true)
 |    |-- time: string (nullable = true)
 |    |-- timeLabel: string (nullable = true)
 |    |-- timeUnit: string (nullable = true)
 |-- procedureId: long (nullable = true)
 |-- procedure: struct (nullable = true)
 |    |-- description: string (nullable = true)
 |    |-- isInternal: boolean (nullable = true)
 |    |-- isMandatory: boolean (nullable = true)
 |    |-- isVisible: boolean (nullable = true)
 |    |-- level: string (nullable = true)
 |    |-- majorVersion: long (nullable = true)
 |    |-- minAnimals: long (nullable = true)
 |    |-- minFemales: long (nullable = true)
 |    |-- minMales: long (nullable = true)
 |    |-- minorVersion: long (nullable = true)
 |    |-- name: string (nullable = true)
 |    |-- oldProcedureKey: string (nullable = true)
 |    |-- parameterCollection: array (nullable = true)
 |    |    |-- element: long (containsNull = true)
 |    |-- procedureId: long (nullable = true)
 |    |-- procedureKey: string (nullable = true)
 |    |-- scheduleId: long (nullable = true)
 |    |-- type: long (nullable = true)
 |-- parameterId: long (nullable = true)
 |-- parameter: struct (nullable = true)
 |    |-- dataAnalysisNotes: string (nullable = true)
 |    |-- derivation: string (nullable = true)
 |    |-- description: string (nullable = true)
 |    |-- graphType: string (nullable = true)
 |    |-- incrementCollection: array (nullable = true)
 |    |    |-- element: string (containsNull = true)
 |    |-- isActive: boolean (nullable = true)
 |    |-- isAnnotation: boolean (nullable = true)
 |    |-- isDeleted: boolean (nullable = true)
 |    |-- isDeprecated: boolean (nullable = true)
 |    |-- isDerived: boolean (nullable = true)
 |    |-- isImportant: boolean (nullable = true)
 |    |-- isIncrement: boolean (nullable = true)
 |    |-- isInternal: boolean (nullable = true)
 |    |-- isMedia: boolean (nullable = true)
 |    |-- isOption: boolean (nullable = true)
 |    |-- isRequired: boolean (nullable = true)
 |    |-- isVisible: boolean (nullable = true)
 |    |-- majorVersion: long (nullable = true)
 |    |-- minorVersion: long (nullable = true)
 |    |-- name: string (nullable = true)
 |    |-- oldParameterKey: string (nullable = true)
 |    |-- ontologyGroupId: string (nullable = true)
 |    |-- optionCollection: array (nullable = true)
 |    |    |-- element: string (containsNull = true)
 |    |-- originalParamId: long (nullable = true)
 |    |-- parameterId: long (nullable = true)
 |    |-- parameterKey: string (nullable = true)
 |    |-- parammptermCollection: array (nullable = true)
 |    |    |-- element: long (containsNull = true)
 |    |-- procedureId: long (nullable = true)
 |    |-- qcCheck: boolean (nullable = true)
 |    |-- qcMax: string (nullable = true)
 |    |-- qcMin: string (nullable = true)
 |    |-- qcNotes: string (nullable = true)
 |    |-- type: string (nullable = true)
 |    |-- unit: long (nullable = true)
 |    |-- valueType: string (nullable = true)
 |    |-- weight: long (nullable = true)
 |-- incrementId: string (nullable = true)
 |-- increment: struct (nullable = true)
 |    |-- incrementId: string (nullable = true)
 |    |-- incrementMin: string (nullable = true)
 |    |-- incrementString: string (nullable = true)
 |    |-- incrementType: string (nullable = true)
 |    |-- incrementUnit: string (nullable = true)
 |    |-- isActive: boolean (nullable = true)
 |    |-- isDeleted: boolean (nullable = true)
 |    |-- originalId: string (nullable = true)
 |    |-- parameterId: string (nullable = true)
 |    |-- weight: long (nullable = true)
 |-- optionId: string (nullable = true)
 |-- option: struct (nullable = true)
 |    |-- description: string (nullable = true)
 |    |-- isActive: boolean (nullable = true)
 |    |-- isDefault: boolean (nullable = true)
 |    |-- isDeleted: boolean (nullable = true)
 |    |-- name: string (nullable = true)
 |    |-- optionId: string (nullable = true)
 |    |-- parameterId: string (nullable = true)
 |    |-- parentId: string (nullable = true)
 |    |-- phoweight: long (nullable = true)
 |    |-- poweight: long (nullable = true)
 |-- parammptermId: long (nullable = true)
 |-- parammpterm: struct (nullable = true)
 |    |-- incrementId: string (nullable = true)
 |    |-- isDeleted: boolean (nullable = true)
 |    |-- ontologyTermId: string (nullable = true)
 |    |-- optionText: string (nullable = true)
 |    |-- paramMptermId: string (nullable = true)
 |    |-- parameterId: string (nullable = true)
 |    |-- selectionOutcome: string (nullable = true)
 |    |-- sex: string (nullable = true)
 |    |-- weight: long (nullable = true)

IMITS TSV Files

Allele2 file

IMITS alleles report, available from

Expand dataframe Schema

root
 |-- type: string (nullable = true)
 |-- allele_design_project: string (nullable = true)
 |-- marker_symbol: string (nullable = true)
 |-- marker_mgi_accession_id: string (nullable = true)
 |-- marker_type: string (nullable = true)
 |-- marker_name: string (nullable = true)
 |-- marker_synonym: string (nullable = true)
 |-- human_gene_symbol: string (nullable = true)
 |-- human_entrez_gene_id: string (nullable = true)
 |-- human_homolo_gene_id: string (nullable = true)
 |-- feature_type: string (nullable = true)
 |-- feature_chromosome: string (nullable = true)
 |-- feature_strand: string (nullable = true)
 |-- feature_coord_start: string (nullable = true)
 |-- feature_coord_end: string (nullable = true)
 |-- genetic_map_links: string (nullable = true)
 |-- sequence_map_links: string (nullable = true)
 |-- gene_model_ids: string (nullable = true)
 |-- allele_symbol: string (nullable = true)
 |-- allele_name: string (nullable = true)
 |-- allele_type: string (nullable = true)
 |-- allele_subtype: string (nullable = true)
 |-- allele_mgi_accession_id: string (nullable = true)
 |-- mgi_accession_id: string (nullable = true)
 |-- synonym: string (nullable = true)
 |-- allele_symbol_search_variants: string (nullable = true)
 |-- allele_description: string (nullable = true)
 |-- full_allele_description: string (nullable = true)
 |-- design_id: string (nullable = true)
 |-- cassette: string (nullable = true)
 |-- ikmc_project: string (nullable = true)
 |-- pipeline: string (nullable = true)
 |-- allele_features: string (nullable = true)
 |-- without_allele_features: string (nullable = true)
 |-- mutation_type: string (nullable = true)
 |-- allele_category: string (nullable = true)
 |-- targeting_vector_available: string (nullable = true)
 |-- es_cell_available: string (nullable = true)
 |-- mouse_available: string (nullable = true)
 |-- genbank_file: string (nullable = true)
 |-- allele_image: string (nullable = true)
 |-- allele_simple_image: string (nullable = true)
 |-- vector_genbank_file: string (nullable = true)
 |-- vector_allele_image: string (nullable = true)
 |-- sequence_files_urls: string (nullable = true)
 |-- vcf_file_urls: string (nullable = true)
 |-- bam_file_urls: string (nullable = true)
 |-- genoverse_design_track_url: string (nullable = true)
 |-- genoverse_mut_allele_track_url: string (nullable = true)
 |-- genoverse_predicted_allele_track_url: string (nullable = true)
 |-- links: string (nullable = true)
 |-- es_cell_status: string (nullable = true)
 |-- mouse_status: string (nullable = true)
 |-- phenotype_status: string (nullable = true)
 |-- late_adult_phenotype_status: string (nullable = true)
 |-- conditional_mouse_status: string (nullable = true)
 |-- deletion_mouse_status: string (nullable = true)
 |-- disease_model_status: string (nullable = true)
 |-- latest_es_cell_status: string (nullable = true)
 |-- latest_mouse_status: string (nullable = true)
 |-- latest_project_status_legacy: string (nullable = true)
 |-- latest_project_status: string (nullable = true)
 |-- latest_phenotype_status: string (nullable = true)
 |-- production_centre: string (nullable = true)
 |-- phenotyping_centre: string (nullable = true)
 |-- production_centres: string (nullable = true)
 |-- phenotyping_centres: string (nullable = true)
 |-- late_adult_phenotyping_centre: string (nullable = true)
 |-- late_adult_phenotyping_centres: string (nullable = true)
 |-- latest_production_centre: string (nullable = true)
 |-- latest_phenotyping_centre: string (nullable = true)
 |-- latest_phenotype_started: string (nullable = true)
 |-- latest_phenotype_complete: string (nullable = true)
 |-- late_adult_phenotype_started: string (nullable = true)
 |-- late_adult_phenotype_complete: string (nullable = true)
 |-- tissues_available: string (nullable = true)
 |-- tissue_types: string (nullable = true)
 |-- tissue_enquiry_links: string (nullable = true)
 |-- tissue_distribution_centres: string (nullable = true)
 |-- notes: string (nullable = true)

Product entries file

Expand dataframe Schema

root
 |-- allele_design_project: string (nullable = true)
 |-- product_id: string (nullable = true)
 |-- marker_symbol: string (nullable = true)
 |-- marker_mgi_accession_id: string (nullable = true)
 |-- mgi_accession_id: string (nullable = true)
 |-- marker_type: string (nullable = true)
 |-- marker_name: string (nullable = true)
 |-- marker_synonym: string (nullable = true)
 |-- allele_mgi_accession_id: string (nullable = true)
 |-- allele_symbol: string (nullable = true)
 |-- name: string (nullable = true)
 |-- allele_type: string (nullable = true)
 |-- allele_name: string (nullable = true)
 |-- allele_has_issue: string (nullable = true)
 |-- allele_id: string (nullable = true)
 |-- type: string (nullable = true)
 |-- allele_synonym: string (nullable = true)
 |-- genetic_info: string (nullable = true)
 |-- production_centre: string (nullable = true)
 |-- production_pipeline: string (nullable = true)
 |-- production_completed: string (nullable = true)
 |-- production_info: string (nullable = true)
 |-- status: string (nullable = true)
 |-- status_date: string (nullable = true)
 |-- qc_data: string (nullable = true)
 |-- associated_product_colony_name: string (nullable = true)
 |-- associated_product_es_cell_name: string (nullable = true)
 |-- associated_product_vector_name: string (nullable = true)
 |-- associated_products_colony_names: string (nullable = true)
 |-- associated_products_es_cell_names: string (nullable = true)
 |-- associated_products_vector_names: string (nullable = true)
 |-- order_names: string (nullable = true)
 |-- order_links: string (nullable = true)
 |-- contact_names: string (nullable = true)
 |-- contact_links: string (nullable = true)
 |-- other_links: string (nullable = true)
 |-- loa_assays: string (nullable = true)
 |-- ikmc_project_id: string (nullable = true)
 |-- design_id: string (nullable = true)
 |-- cassette: string (nullable = true)

QMUL

Static Files

Ontologies

IMPC Centres

Human-Gene orthologues

Clone this wiki locally