forked from microsoft/FHIR-Analytics-Pipelines
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSet-SynapseEnvironment.ps1
More file actions
339 lines (287 loc) · 11.7 KB
/
Set-SynapseEnvironment.ps1
File metadata and controls
339 lines (287 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
<#
.SYNOPSIS
Create database on Synapse SQL pool, and create default EXTERNAL TABLEs and VIEWs for customers.
.DESCRIPTION
Create database on Synapse SQL pool, and create default EXTERNAL TABLEs and VIEWs for customers.
Customers can query data on created EXTERNAL TABLEs and VIEWs on Synapse.
.PARAMETER SynapseWorkspaceName
Name of Synapse workspace instance, will create tag on it and create EXTERNAL TABLEs and VIEWs in its serverless SQL pool.
.PARAMETER Database
Default: fhirdb
Name of database to be created on Synapse serverless SQL server pool.
.PARAMETER StorageName
Name of storage where parquet FHIR data be exported to.
.PARAMETER Container
Default: fhir
Name of container on storage where parquet FHIR data be exported to.
.PARAMETER ResultPath
Default: result
Path to the parquet FHIR data.
.PARAMETER SqlScriptCollectionPath
Default: sql/Resources
Path to the sql scripts directory to create EXTERNAL TABLEs and VIEWs.
.PARAMETER MasterKey
Default: "FhirSynapseLink0!"
Master key that will be set in created database. Database need to have master key then we can create EXTERNAL TABLEs and VIEWs on it.
.PARAMETER Concurrent
Default: 30
Max concurrent tasks number that will be used to upload place holder files and execute SQL scripts.
#>
[cmdletbinding()]
Param(
[parameter(Mandatory=$true)]
[string]$SynapseWorkspaceName,
[string]$Database = "fhirdb",
[parameter(Mandatory=$true)]
[string]$StorageName,
[string]$Container = "fhir",
[string]$ResultPath = "result",
[string]$SqlScriptCollectionPath = "sql/Resources",
[string]$MasterKey = "FhirSynapseLink0!",
[int]$Concurrent = 30
)
$jobName = "FhirSynapseJob"
$readmePath = ".readme.txt"
# TODO: Align tags here and ARM template, maybe save schemas in Storage/ACR and run remotely.
$tags = @{
"FhirAnalyticsPipeline" = "FhirToDataLake"
"FhirSchemaVersion" = "v0.3.0"
}
function New-CustomDatabase
{
param([string]$serviceEndpoint, [string]$databaseName)
$sqlAccessToken = (Get-AzAccessToken -ResourceUrl https://database.windows.net).Token
try {
Invoke-Sqlcmd -ServerInstance $serviceEndpoint -Database "master" -AccessToken $sqlAccessToken `
-Query "CREATE DATABASE $($databaseName)" -ErrorAction Stop
}
catch {
Write-Host "Create database '$databaseName' on '$serviceEndpoint' failed: $($_.ToString())"
throw
}
}
function Remove-CustomDatabase
{
param([string]$serviceEndpoint, [string]$databaseName)
[System.Data.SqlClient.SqlConnection]::ClearAllPools()
$sqlAccessToken = (Get-AzAccessToken -ResourceUrl https://database.windows.net).Token
try {
Invoke-Sqlcmd -ServerInstance $serviceEndpoint -Database "master" -AccessToken $sqlAccessToken `
-Query "DROP DATABASE $($databaseName)" -ErrorAction Stop
}
catch {
Write-Host "Remove database '$databaseName' on '$serviceEndpoint' failed: $($_.ToString())"
throw
}
}
function Set-InitializeEnvironment
{
param([string]$serviceEndpoint, [string]$databaseName, [string]$masterKey, [string]$storageName, [string]$container, [string]$resultPath)
$locationPath = "https://$storageName.blob.core.windows.net/$container/$resultPath"
$initializeEnvironmentSql = "
CREATE MASTER KEY ENCRYPTION BY PASSWORD = '$masterKey';
CREATE DATABASE SCOPED CREDENTIAL SynapseIdentity
WITH IDENTITY = 'Managed Identity';
CREATE EXTERNAL DATA SOURCE ParquetSource WITH (
LOCATION = '$locationPath',
CREDENTIAL = SynapseIdentity
);
CREATE EXTERNAL FILE FORMAT ParquetFormat WITH ( FORMAT_TYPE = PARQUET );
GO
CREATE SCHEMA fhir;
GO
USE [master]"
$sqlAccessToken = (Get-AzAccessToken -ResourceUrl https://database.windows.net).Token
try {
Invoke-Sqlcmd -ServerInstance $serviceEndpoint -Database $databaseName -AccessToken $sqlAccessToken `
-Query $initializeEnvironmentSql -ErrorAction Stop
}
catch {
Write-Host "Initialize environment on '$databaseName' of '$serviceEndpoint' failed: $($_.ToString())"
throw
}
}
function New-ContainerIfNotExists
{
param([string]$storageName, [string]$containerName)
$storageContext = New-AzStorageContext -StorageAccountName $storageName -UseConnectedAccount
if(Get-AzStorageContainer -Name $containerName -Context $storageContext -ErrorAction SilentlyContinue) {
Write-Host " -> Container '$containerName' already exists." -ForegroundColor Green
}
else{
Write-Host " -> Create container '$containerName'." -ForegroundColor Green
New-AzStorageContainer -Name $containerName -Context $storageContext -ErrorAction Stop
}
}
function New-PlaceHolderBlobs
{
param([string]$storageName, [string]$container, [string]$resultPath)
$files = Get-ChildItem $SqlScriptCollectionPath -Filter "*.sql" -Name
foreach ($f in $files) {
# Upload placeholder files.
$resourceType = $($f -split "\.")[0]
$blobName = "$resultPath/$resourceType/.readme.txt"
$jobs = @(Get-Job -Name $jobName -ErrorAction Ignore)
if ($jobs.Count -ge $Concurrent) {
$finishedJob = (Get-Job -Name $jobName | Wait-Job -Any)
Remove-Job -Job $finishedJob
if ($finishedJob.State -eq 'Failed') {
Write-Host " -> $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)" -ForegroundColor Red
Get-Job -Name $jobName | Wait-Job | Remove-Job | Out-Null
throw " -> Uploading readme files to storage failed: $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)"
}
}
# Create TABLES and VIEWs for resouces
Write-Host " -> Upload blob '$blobName'."
Start-Job -Name $jobName -ScriptBlock{
$storageContext = New-AzStorageContext -StorageAccountName $args[3] -UseConnectedAccount
Set-AzStorageBlobContent `
-File $args[0]`
-Container $args[1] `
-Blob $args[2] `
-Context $storageContext `
-Force `
-ErrorAction stop
} -ArgumentList "$(Get-Location)/$readmePath", $container, $blobName, $storageName | Out-Null
}
foreach ($finishedJob in (Get-Job -Name $jobName | Wait-Job)) {
Remove-Job -Job $finishedJob
if ($finishedJob.State -eq 'Failed') {
Write-Host " -> $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)" -ForegroundColor Red
Get-Job -Name $jobName | Wait-Job | Remove-Job | Out-Null
throw " -> Uploading readme files to storage failed: $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)"
}
}
}
function New-TableAndViewsForResources
{
param([string]$serviceEndpoint, [string]$databaseName, [string]$masterKey, [string]$storageName, [string]$container)
$files = Get-ChildItem $SqlScriptCollectionPath -Filter "*.sql"
$sqlAccessToken = (Get-AzAccessToken -ResourceUrl https://database.windows.net).Token
Write-Host " -> Start creating TABLEs and VIEWs on '$databaseName' of '$serviceEndpoint'" -ForegroundColor Green
foreach ($file in $files) {
$jobs = @(Get-Job -Name $jobName -ErrorAction Ignore)
if ($jobs.Count -ge $Concurrent) {
$finishedJob = (Get-Job -Name $jobName | Wait-Job -Any)
Remove-Job -Job $finishedJob
if ($finishedJob.State -eq 'Failed') {
Write-Host " -> $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)" -ForegroundColor Red
Get-Job -Name $jobName | Wait-Job | Remove-Job | Out-Null
throw "Creating Table and Views job failed: $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)"
}
}
$filePath = $file.FullName
# Create TABLES and VIEWs for resouces
Write-Host " -> Executing script $filePath"
Start-Job -Name $jobName -ScriptBlock{
Invoke-Sqlcmd `
-ServerInstance $args[0] `
-Database $args[1] `
-AccessToken $args[2] `
-InputFile $args[3] `
-ConnectionTimeout 120 `
-ErrorAction Stop
} -ArgumentList $serviceEndpoint, $databaseName, $sqlAccessToken, $filePath | Out-Null
}
foreach ($finishedJob in (Get-Job -Name $jobName | Wait-Job)) {
Remove-Job -Job $finishedJob
if ($finishedJob.State -eq 'Failed') {
Write-Host " -> $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)" -ForegroundColor Red
Get-Job -Name $jobName | Wait-Job | Remove-Job | Out-Null
throw "Creating Table and Views job failed: $($finishedJob.ChildJobs[0].JobStateInfo.Reason.Message)"
}
}
}
$stopwatch = [system.diagnostics.stopwatch]::StartNew()
Get-Job -Name $jobName -ErrorAction Ignore | Remove-Job | Out-Null
###
# Try get Synapse instance.
###
try {
$synapse = Get-AzSynapseWorkspace -Name $SynapseWorkspaceName -ErrorAction Stop
}
catch {
Write-Host "Get Synapse instance '$synapseWorkspaceName' failed: $($_.ToString())."
throw
}
# Get synapse serverless SQL server endpoint
$synapseSqlServerEndpoint = $synapse.ConnectivityEndpoints.sqlOnDemand
# Test connection to Synapse SQL server.
try {
$sqlAccessToken = (Get-AzAccessToken -ResourceUrl https://database.windows.net).Token
$dbId = Invoke-Sqlcmd -ServerInstance $synapseSqlServerEndpoint -Database "master" -AccessToken $sqlAccessToken `
-Query "SELECT DB_ID('$Database')" -ErrorAction Stop
}
catch {
Write-Host "Failed to connect to '$synapseSqlServerEndpoint': $($_.ToString())."
throw
}
# Throw exception if database already exists.
if ([string]$dbId.Column1)
{
throw "Database '$Database' already exist, please use another database name or drop it."
}
# Create tag on Synapse
Update-AzTag -ResourceId $synapse.Id -Tag $tags -Operation "Merge" | Out-Null
Write-Host " -> Created Tags on Synapse '$synapseWorkspaceName'." -ForegroundColor Green
try {
###
# 1.Create container on Storage if not exists.
###
New-ContainerIfNotExists `
-storageName $StorageName `
-containerName $Container
}
catch {
Write-Host "Create container '$Container' on '$StorageName' failed: $($_.ToString())."
throw
}
try{
###
# 2. Place holder blobs
###
New-PlaceHolderBlobs -storage $StorageName -container $Container -resultPath $ResultPath
}
catch
{
Write-Host "Create place holder blobs failed: $($_.ToString())."
throw
}
###
# 1. Create database.
###
New-CustomDatabase `
-databaseName $Database `
-serviceEndpoint $synapseSqlServerEndpoint
# Try to create TABLEs and VIEWs for all resource types.
# And will try to drop database if failed to create TABLEs and VIEWs.
try{
###
# 2. Initialize database environment.
###
Set-InitializeEnvironment `
-serviceEndpoint $synapseSqlServerEndpoint `
-databaseName $Database `
-masterKey $MasterKey `
-storage $StorageName `
-container $Container `
-resultPath $ResultPath
###
# 3. Create TABLEs and VIEWs on Synapse.
###
New-TableAndViewsForResources `
-serviceEndpoint $synapseSqlServerEndpoint `
-databaseName $Database `
-masterKey $MasterKey `
-storage $StorageName `
-container $Container
}
catch{
Write-Host " -> Create TABLEs and VIEWs Failed, will try to drop database '$Database'." -ForegroundColor Red
Remove-CustomDatabase -serviceEndpoint $synapseSqlServerEndpoint -databaseName $Database
throw
}
$stopwatch.Stop()
$time = $stopwatch.Elapsed.TotalSeconds
Write-Host " -> Create Synapse environment finished with $time seconds." -ForegroundColor Green
[System.Data.SqlClient.SqlConnection]::ClearAllPools()