databrickslabs · ronanstokes-db · Dec 6, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
@@ -45,6 +45,9 @@ jobs:
       - name: Install dependencies
         run: pipenv install --dev
 
+      - name: Install Coverage
+        run: pip install coverage
+
       - name: Lint
         run: |
           pipenv run prospector --profile prospector.yaml
@@ -53,9 +56,12 @@ jobs:
         run: make test
 
       - name: Publish test coverage to coverage site
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           name: dbldatagen
+          verbose: true
           files: ./coverage.xml
-
+          env_vars: OS,PYTHON
+          fail_ci_if_error: true
+          flags: unittests
@@ -19,6 +19,8 @@
 [![downloads](https://img.shields.io/github/downloads/databrickslabs/dbldatagen/total.svg)](https://hanadigital.github.io/grev/?user=databrickslabs&repo=dbldatagen)
 -->
 
+TEST PR - Not meant for commit
+
 ## Project Description
 The `dbldatagen` Databricks Labs project is a Python library for generating synthetic data within the Databricks 
 environment using Spark. The generated data may be used for testing, benchmarking, demos, and many 
@@ -65,7 +67,7 @@ details of use and many examples.
 
 Release notes and details of the latest changes for this specific release
 can be found in the GitHub repository
-[here](https://github.com/databrickslabs/dbldatagen/blob/release/v0.4.0post2/CHANGELOG.md)
+[here](https://github.com/databrickslabs/dbldatagen/blob/release/v0.4.001/CHANGELOG.md)
 
 # Installation
 

@@ -34,7 +34,7 @@ def get_version(version):
     return version_info
 
 
-__version__ = "0.4.0post2"  # DO NOT EDIT THIS DIRECTLY!  It is managed by bumpversion
+__version__ = "0.4.001"  # DO NOT EDIT THIS DIRECTLY!  It is managed by bumpversion
 __version_info__ = get_version(__version__)
 
 

@@ -36,13 +36,25 @@ class ColumnSpecOptions(object):
 
     :param step: Step to use for range of generated value. As an alternative, you may use the `dataRange` parameter
 
-    :param numColumns: generate `n` columns numbered from 1 .. n-1 with same definition
+    :param numColumns: generate `n` columns numbered from 1 .. n-1 with same definition. If generating random column
+                       values,  it is recommended to use the `hash_fieldname` mechanism to generate random values
+                       to avoid all columns having the same value sequence.
 
     :param numFeatures: generate `n` columns numbered from 0 .. n-1 with same definition. Alias for `numColumns`
 
     :param structType: If specified as "array" and used with numColumns / numFeatures, will combine columns as array
 
-    :param random: If True, will generate random values for column value. Defaults to `False`
+    :param random: If True, will generate random values for column value. Defaults to `False`. When set to true,
+                      `randomSeed` and `randomSeedMethod` govern how the random values are generated.
+
+    :param randomSeed: If set, sets a value for the randomSeed. This will override the setting for the data generator
+                         object for this column. If set to `-1` generates a true psuedo random number (as opposed to
+                         one based on the randomSeed value)
+
+    :param randomSeedMethod: Controls how the random values are generated from the random seed.
+                             This may have the values `fixed`, `hash_fieldname` or None.
+                             If set to `hash_fieldname`, the `randomSeed` value is ignored and a hash of the field name
+                             is used as the seed.
 
     :param baseColumn: Either the string name of the base column, or a list of columns to use to
                         control data generation. The option ``baseColumns`` is an alias for ``baseColumn``.