Skip to content

Commit 5156c66

Browse files
Merge pull request #162 from TIDES-transit/sample-data-doc
Sample Data: document, update scripts to validate, update tests
2 parents 314f4aa + 4806405 commit 5156c66

39 files changed

Lines changed: 2038 additions & 2443 deletions

.github/workflows/validate_package_schema.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
uses: TIDES-transit/json-schema-validator@master
1818
with:
1919
token: ${{ secrets.GITHUB_TOKEN }}
20-
json_schema: spec/tides-data-package.json
20+
json_schema: spec/tides-datapackage-profile.json
2121
json_path_pattern: ^spec/tides\.spec\.json$
2222
send_comment: false
2323
clear_comments: false

.github/workflows/validate_samples.yml

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,31 +29,47 @@ jobs:
2929
run: |
3030
pwd
3131
samples=$(find ./samples -type d -name 'TIDES')
32-
echo "{name}={value}" >> $GITHUB_OUTPUT
32+
echo $samples
33+
echo "samples=${samples}" >> $GITHUB_OUTPUT
3334
- name: Validate Sample Data
3435
id: validation
3536
run: |
3637
samples=(${{ steps.sample_folders.outputs.samples }})
37-
results=""
38+
results="{\"values\": ["
39+
result=""
3840
for sample in "${samples[@]}"; do
39-
result=$(frictionless validate --schema-sync "$sample/datapackage.json" --json)
40-
results+="$result\n"
41+
results+="$result"
42+
result=$(frictionless validate --schema-sync "${sample}/datapackage.json" --json) || true
43+
result="${result//\'s/}"
44+
result="${result//\\\"/\\\\\"}"
45+
result="$result,"
4146
done
42-
echo "{name}=${results}" >> $GITHUB_ENV
47+
48+
result="${result%?}"
49+
results+="$result"
50+
results+="]}"
51+
52+
echo "RESULTS<<EOF" >> $GITHUB_OUTPUT
53+
echo ${results} >> $GITHUB_OUTPUT
54+
echo "EOF" >> $GITHUB_OUTPUT
4355
- name: Comment on PR
4456
if: github.event_name == 'pull_request'
4557
uses: actions/github-script@v4
4658
with:
4759
github-token: ${{ secrets.GITHUB_TOKEN }}
4860
script: |
49-
const samples = '${{ env.samples }}'.trim().split('\n');
50-
const results = '${{ env.results }}'.trim().split('\n');
61+
console.log("Begin sample construction")
62+
const samples = '${{ steps.sample_folders.outputs.samples }}'.split('\n');
63+
const results = JSON.parse('${{ steps.validation.outputs.results }}');
5164
let comment = `**Data Validation Report**\n\n`;
5265
comment += '| Sample | Status |\n';
5366
comment += '| ------ | ------ |\n';
67+
console.log("Constructed header")
5468
for (let i = 0; i < samples.length; i++) {
69+
console.log(samples[i])
70+
console.log(results[i])
5571
const sample = samples[i];
56-
const result = JSON.parse(results[i]);
72+
const result = results.values[i];
5773
let status = '';
5874
if (result.valid) {
5975
status = ':heavy_check_mark:';

.github/workflows/validate_table_schemas.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ jobs:
2121
json_schema: spec/table-schema.json
2222
json_path_pattern: ^spec/.*\.schema\.json$
2323
send_comment: false
24-
clear_comments: false
24+
clear_comments: false

CONTRIBUTING.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ By making any contribution to the projects, contributors self-certify to the [Co
3434

3535
1. [Create a branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository) to work on a new issue (or checkout an existing one where the issue is being worked on).
3636
2. Make your changes.
37-
3. Run `/tests/test_all` script to check and fix formatting, validate schemas, and build documentation locally to preview
38-
4. [Commit](#commits) your work in `git`
39-
5. `push` your changes to Github and submit a [`pull request`](#pull-requests)
37+
3. Run `tests/test_local_spec` script to check and fix formatting, validate profile and schemas with frictionless and with each other, and confirm that documentation can be built locally.
38+
4. Run `tests/test_samples_to_local` script to check if samples conform to any changes to the spec.
39+
5. [Commit](#commits) your work in `git`
40+
6. `push` your changes to Github and submit a [`pull request`](#pull-requests)
4041

4142
### Issues
4243

@@ -106,7 +107,7 @@ When a change is pushed to the TIDES specification repository, Github Actions de
106107
| **Name** | **What it does** |
107108
| -------- | ----------------- |
108109
| GitHub Actions | Runs following workflow on each push to the TIDES github repository: /.github/workflows/docs.yml |
109-
| mike | runs mkdocs and puts output in a folder in gh_pages branch which corresponds to the name of the branch (i.e. main, develop, pr-163, etc) <br> For new branches with documentation, adds an entry in `versions.json` |
110+
| mike | runs mkdocs and puts output in a folder in gh_pages branch which corresponds to the name of the branch (i.e. main, develop, pr-163, etc) <br> For new branches with documentation, adds an entry in `versions.json` |
110111
| `mkdocs` | Package which generates documentation from markdown and code |
111112

112113
??? info "Overview of Documentation Building Process"
@@ -117,7 +118,7 @@ When a change is pushed to the TIDES specification repository, Github Actions de
117118
subgraph mkdocs["<b>mkdocs:</b> run on execution of mike"]
118119
md_mike["mike"] -->|runs for current branch| md_mkdocs["mkdocs"]
119120
md_mkdocs.yml["mkdocs.yml"] -->|specifieds parameters| md_mkdocs["mkdocs"]
120-
md_mkdocs_macros["mkdocs-macros"] -->|"plugin for"| md_mkdocs["mkdocs"]
121+
md_mkdocs_macros["mkdocs-macros"] -->|"plugin for"| md_mkdocs["mkdocs"]
121122
main.py[/"main.py"/] -->|defines macros in code available for| md_mkdocs_macros["mkdocs-macros"]
122123
end
123124

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,35 @@ Human-friendlier documentation is auto-generated and available at:
1212
- [Architecture](https://tides-transit.github.io/TIDES/main/architecture)
1313
- [Table Schemas](https://tides-transit.github.io/TIDES/main/tables)
1414

15+
### Data Package
16+
17+
Directories with TIDES data must contain metadata in a [`datapackage.json`](https://tides-transit.github.io/TIDES/main/datapackage) file in a format compliant with the [`tides-datapackage-profile`](https://tides-transit.github.io/TIDES/main/datapackage) of a [`frictionless data package`](https://specs.frictionlessdata.io/data-package/).
18+
19+
[`/samples/template/datapackage.json`](https://raw.githubusercontent.com/TIDES-transit/TIDES/main/samples/template/datapackage.json) has a template datapackage which can be used.
20+
21+
## Sample Data
22+
23+
[Sample data](https://tides-transit.github.io/TIDES/main/samples) can be found in the `/samples` directory, with one directory for each sample.
24+
25+
### Template
26+
27+
Templates of `datapackage.json` and each TIDES file type are located in the `/samples/template` directory.
28+
29+
## Validating TIDES data
30+
31+
TIDES data with a valid [`datapackage.json`](#data-package) can be easily validated using the [frictionless framework](https://framework.frictionlessdata.io/), which can be installed and invoked as follows:
32+
33+
```bash
34+
pip install frictionless
35+
frictionless validate --schema-sync path/to/your/datapackage.json
36+
```
37+
38+
Several other validation scripts and tools with more flexibility such as validating to the canonical, named version or a local spec can be found in the `/bin` directory, with usage available with the `--help` flag.
39+
40+
```bash
41+
bin/validate-datapackage [-v remote_spec_ref | -l local_spec_path] [-d dataset_path]
42+
```
43+
1544
## Contributing to TIDES
1645

1746
Those who want to help with the development of the TIDES specification should review the guidance in [CONTRIBUTING.md](CONTRIBUTING.md).

bin/replace-spec-in-datapackage

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env bash
2+
3+
description="Create a temporary data package if a spec_path_prefix is provided."
4+
5+
usage="
6+
Usage: replace-spec-in-datapackage <dataset_path> [spec_path_prefix] [output_file]
7+
8+
Arguments:
9+
<dataset_path> The path to the dataset directory containing the 'datapackage.json' file.
10+
<spec_path_prefix> The path or URL to the spec to be referenced in the updated data package.
11+
[output_file] (Optional) The path to save the temporary data package. If not provided, the temporary data package will be saved as 'datapackage.tmp.json' in the dataset directory.
12+
13+
"
14+
15+
example_usage="
16+
Example Usage:
17+
bin/replace-spec-in-datapackage samples/template/TIDES spec samples/template/TIDES/datapackage.tmp.json
18+
"
19+
20+
################################################################################
21+
# Help #
22+
################################################################################
23+
24+
# Display help message
25+
function display_help() {
26+
echo "$description"
27+
echo "$usage"
28+
echo "$example_usage"
29+
}
30+
31+
# Check for help flag
32+
if [ "$1" == "--help" ]; then
33+
display_help
34+
exit 0
35+
fi
36+
37+
################################################################################
38+
# SET DEFAULTS #
39+
################################################################################
40+
DEFAULT_TMP_DATAPACKAGE="datapackage.tmp.json"
41+
PROFILE_FILE="tides-datapackage-profile.json"
42+
43+
################################################################################
44+
# MAIN #
45+
################################################################################
46+
47+
echo "$description"
48+
49+
################################################################################
50+
# Check Requirements #
51+
################################################################################
52+
source "$(dirname "${BASH_SOURCE[0]}")/utils"
53+
check_jq
54+
55+
################################################################################
56+
# Process the input options. #
57+
################################################################################
58+
dataset_path=$1
59+
spec_path_prefix=$2
60+
output_file=${3:-"$dataset_path/$DEFAULT_TMP_DATAPACKAGE"}
61+
62+
echo "Parameters:
63+
dataset_path: $dataset_path
64+
spec_path_prefix: $spec_path_prefix
65+
output_file: $output_file
66+
"
67+
68+
# Check if required arguments are missing
69+
if [ -z "$dataset_path" ]; then
70+
echo "Error: Missing dataset_path argument." >&2
71+
display_help
72+
exit 1
73+
fi
74+
75+
# Check if required arguments are missing
76+
if [ -z "spec_path_prefix" ]; then
77+
echo "Error: Missing spec_path_prefix argument." >&2
78+
display_help
79+
exit 1
80+
fi
81+
82+
datapackage_file="$dataset_path/datapackage.json"
83+
84+
check_valid_path "$datapackage_file"
85+
86+
profile_file="$spec_path_prefix/$PROFILE_FILE"
87+
check_valid_path "$profile_file"
88+
89+
################################################################################
90+
# Create updated datapackage #
91+
################################################################################
92+
cp "$datapackage_file" "$output_file"
93+
jq --arg spec_path_prefix "$spec_path_prefix" --arg profile_file "$profile_file" '
94+
.resources |= map(.schema |= ($spec_path_prefix + "/\(. | split("/") | last)"))
95+
| .profile = ($profile_file)
96+
' "$output_file" > "$output_file.tmp" && mv "$output_file.tmp" "$output_file"
97+
98+
echo "$output_file"

bin/utils

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env bash
2+
3+
# Check if jsonschema-cli is installed
4+
check_jsonschema-cli() {
5+
if ! command -v jsonschema-cli >/dev/null 2>&1; then
6+
echo >&2 "\033[31m!!! jsonschema-cli is required but not found.\033[0m
7+
8+
You can install it using 'pip install jsonschema-cli'. Aborting."
9+
exit 1
10+
fi
11+
}
12+
13+
# Check if frictionless is installed
14+
check_frictionless() {
15+
if ! command -v frictionless >/dev/null 2>&1; then
16+
echo >&2 "\033[31m!!! frictionless is required but not found.\033[0m
17+
18+
You can install it using 'pip install frictionless'. Aborting."
19+
exit 1
20+
fi
21+
}
22+
23+
# Function to check jq and provide installation instructions if not found
24+
check_jq() {
25+
if ! command -v jq >/dev/null 2>&1; then
26+
echo >&2 "\033[31m!!! jq is required but not found.\033[0m"
27+
28+
# Determine the operating system
29+
os_type=$(uname -s)
30+
31+
case "$os_type" in
32+
Linux*)
33+
echo >&2 "Please install jq using your package manager."
34+
echo >&2 "For example, on Debian/Ubuntu-based systems, you can use:"
35+
echo >&2 "sudo apt-get install jq"
36+
;;
37+
Darwin*)
38+
echo >&2 "Please install jq using Homebrew."
39+
echo >&2 "If you don't have Homebrew installed, you can install it from https://brew.sh/."
40+
echo >&2 "Once Homebrew is installed, run the following command:"
41+
echo >&2 "brew install jq"
42+
;;
43+
CYGWIN*|MINGW32*|MSYS*|MINGW*)
44+
echo >&2 "Please download the jq binary for Windows from https://stedolan.github.io/jq/download/."
45+
echo >&2 "Extract the downloaded ZIP file and add the 'jq.exe' binary to your PATH..."
46+
echo >&2 "Or run the following command: "
47+
echo >&2 "curl -L -o /usr/bin/jq.exe https://github.com/stedolan/jq/releases/latest/download/jq-win64.exe"
48+
;;
49+
*)
50+
echo >&2 "Unknown operating system. Please install jq from https://stedolan.github.io/jq/download/."
51+
;;
52+
esac
53+
54+
exit 1
55+
fi
56+
}
57+
58+
# Function to check if this is a valid spec path
59+
check_valid_path() {
60+
local path=$1
61+
62+
if [ -n "$path" ]; then
63+
if [ ! -e "$path" ] && [[ ! "$path" =~ ^http ]]; then
64+
echo "\033[31mError: $path is an invalid path. It must be a valid file path, directory path, or a URL starting with 'http'.\033[0m" >&2
65+
display_help
66+
exit 1
67+
elif [[ "$path" =~ ^http ]]; then
68+
# Check if the URL is valid
69+
if ! curl --output /dev/null --silent --head --fail "$path"; then
70+
echo "\033[31mError: Invalid URL. The $path URL is not reachable or does not exist.\033[0m" >&2
71+
exit 1
72+
fi
73+
fi
74+
fi
75+
}

0 commit comments

Comments
 (0)