This is Command Line If create data proc from gcloud command
gcloud dataproc clusters create project-dezoomcamp --region asia-southeast2 --zone asia-southeast2-c --single-node --master-machine-type n1-standard-4 --master-boot-disk-size 500 --image-version 2.0-debian10 --optional-components JUPYTER,DOCKER --max-idle 604800s --project applied-mystery-341809
This is if using REST API
POST /v1/projects/applied-mystery-341809/regions/asia-southeast2/clusters/
{
"projectId" : " applied-mystery-341809" ,
"clusterName" : " project-dezoomcamp" ,
"config" : {
"configBucket" : " " ,
"gceClusterConfig" : {
"networkUri" : " default" ,
"subnetworkUri" : " " ,
"internalIpOnly" : false ,
"zoneUri" : " asia-southeast2-c" ,
"metadata" : {},
"tags" : [],
"shieldedInstanceConfig" : {
"enableSecureBoot" : false ,
"enableVtpm" : false ,
"enableIntegrityMonitoring" : false
}
},
"masterConfig" : {
"numInstances" : 1 ,
"machineTypeUri" : " n1-standard-4" ,
"diskConfig" : {
"bootDiskType" : " pd-standard" ,
"bootDiskSizeGb" : 500 ,
"numLocalSsds" : 0
},
"minCpuPlatform" : " " ,
"imageUri" : " "
},
"softwareConfig" : {
"imageVersion" : " 2.0-debian10" ,
"properties" : {
"dataproc:dataproc.allow.zero.workers" : " true"
},
"optionalComponents" : [
" JUPYTER" ,
" DOCKER"
]
},
"lifecycleConfig" : {
"idleDeleteTtl" : " 604800s"
},
"initializationActions" : [],
"encryptionConfig" : {
"gcePdKmsKeyName" : " "
},
"autoscalingConfig" : {
"policyUri" : " "
},
"endpointConfig" : {
"enableHttpPortAccess" : false
},
"securityConfig" : {
"kerberosConfig" : {}
}
},
"labels" : {},
"status" : {},
"statusHistory" : [
{}
],
"metrics" : {}
}
Copy File And Jar to Google Cloud Storage For Run DataProc
gsutil cp data-ingestion/dags/bq_dim_artists.py gs://dtc_data_lake_applied-mystery-341809/code/bq_dim_artists.py
gsutil cp data-ingestion/dags/bq_dim_artists.py gs://dtc_data_lake_applied-mystery-341809/code/bq_fact_tracks.py
gsutil cp data-ingestion/spark/resources/jars/* .jar gs://dtc_data_lake_applied-mystery-341809/code/jars