diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 0833406..1060314 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -168,6 +168,92 @@ Here is an example of using the ``json_extract`` function to extract a field fro res = stackql.execute(query) print(res) +Overriding Parameters per Query +================================ + +The :meth:`pystackql.StackQL.execute` and :meth:`pystackql.StackQL.executeStmt` methods support keyword arguments that can override parameters set in the constructor for individual query executions. This is useful when you need to: + +- Change the output format for specific queries +- Adjust CSV formatting (separator, headers) for specific exports +- Override authentication for specific providers +- Change other execution parameters on a per-query basis + +**Example: Overriding Output Format** + +You can create a StackQL instance with a default output format, then override it for specific queries: + +.. code-block:: python + + from pystackql import StackQL + + # Create instance with CSV output by default + provider_auth = { + "github": { + "credentialsenvvar": "GITHUBCREDS", + "type": "basic" + } + } + stackql = StackQL(auth=provider_auth, output="csv") + + # This returns CSV format (default) + csv_result = stackql.execute("select id, name from github.repos.repos where org = 'stackql'") + print(csv_result) + # Output: + # id,name + # 443987542,stackql + # 441087132,stackql-provider-registry + # ... + + # This overrides to dict format for this query only + dict_result = stackql.execute("select id, name from github.repos.repos where org = 'stackql'", output="dict") + print(dict_result) + # Output: + # [{"id":"443987542","name":"stackql"},{"id":"441087132","name":"stackql-provider-registry"},...] + + # Subsequent calls without override use the original CSV format + csv_result2 = stackql.execute("select id, name from github.repos.repos where org = 'stackql' limit 1") + +**Example: Overriding CSV Formatting** + +You can also override CSV-specific parameters like separator and headers: + +.. code-block:: python + + from pystackql import StackQL + + # Create instance with default CSV settings + stackql = StackQL(output="csv", sep=",", header=False) + + # Override to use pipe separator and include headers for this query + result = stackql.execute( + "select id, name from github.repos.repos where org = 'stackql' limit 3", + sep="|", + header=True + ) + +**Supported Override Parameters** + +The following parameters can be overridden in :meth:`pystackql.StackQL.execute` and :meth:`pystackql.StackQL.executeStmt`: + +- ``output``: Output format ('dict', 'pandas', or 'csv') +- ``sep``: CSV delimiter/separator (when output='csv') +- ``header``: Include headers in CSV output (when output='csv') +- ``auth``: Custom authentication for providers +- ``custom_registry``: Custom StackQL provider registry URL +- ``max_results``: Maximum results per HTTP request +- ``page_limit``: Maximum pages per resource +- ``max_depth``: Maximum depth for indirect queries +- ``api_timeout``: API request timeout +- ``http_debug``: Enable HTTP debug logging +- Proxy settings: ``proxy_host``, ``proxy_port``, ``proxy_user``, ``proxy_password``, ``proxy_scheme`` +- Backend settings: ``backend_storage_mode``, ``backend_file_storage_location``, ``app_root`` +- Execution settings: ``execution_concurrency_limit``, ``dataflow_dependency_max``, ``dataflow_components_max`` + +.. note:: + + Parameter overrides only affect the specific query execution and do not modify the StackQL instance's configuration. Subsequent queries will use the original constructor parameters unless overridden again. + + Using the Jupyter Magic Extension ================================= diff --git a/pystackql/core/query.py b/pystackql/core/query.py index 96a0238..427ced0 100644 --- a/pystackql/core/query.py +++ b/pystackql/core/query.py @@ -50,18 +50,19 @@ def _debug_log(self, message): with open(self.debug_log_file, "a") as log_file: log_file.write(message + "\n") - def execute(self, query, custom_auth=None, env_vars=None): + def execute(self, query, custom_auth=None, env_vars=None, override_params=None): """Execute a StackQL query. Args: query (str): The query to execute custom_auth (dict, optional): Custom authentication dictionary. Defaults to None. env_vars (dict, optional): Environment variables for the subprocess. Defaults to None. + override_params (list, optional): Override parameters for this execution. Defaults to None. Returns: dict: The query results """ - local_params = self.params.copy() + local_params = (override_params if override_params is not None else self.params).copy() script_path = None # Format query for platform diff --git a/pystackql/core/stackql.py b/pystackql/core/stackql.py index ab6c479..2606c3c 100644 --- a/pystackql/core/stackql.py +++ b/pystackql/core/stackql.py @@ -263,7 +263,7 @@ def upgrade(self, showprogress=True): return message - def executeStmt(self, query, custom_auth=None, env_vars=None): + def executeStmt(self, query, custom_auth=None, env_vars=None, **kwargs): """Executes a query using the StackQL instance and returns the output as a string. This is intended for operations which do not return a result set, for example a mutation operation such as an `INSERT` or a `DELETE` or life cycle method such as an `EXEC` operation @@ -279,6 +279,12 @@ def executeStmt(self, query, custom_auth=None, env_vars=None): :type custom_auth: dict, optional :param env_vars: Command-specific environment variables for this execution. :type env_vars: dict, optional + :param kwargs: Additional keyword arguments that override constructor parameters for this execution. + Supported overrides: output, sep, header, auth, custom_registry, max_results, page_limit, + max_depth, api_timeout, http_debug, proxy_host, proxy_port, proxy_user, proxy_password, + proxy_scheme, backend_storage_mode, backend_file_storage_location, app_root, + execution_concurrency_limit, dataflow_dependency_max, dataflow_components_max + :type kwargs: optional :return: The output result of the query in string format. If in `server_mode`, it returns a JSON string representation of the result. @@ -292,25 +298,47 @@ def executeStmt(self, query, custom_auth=None, env_vars=None): >>> result """ if self.server_mode: + # Server mode: handle output override + output_format = kwargs.get('output', self.output) + result = self.server_connection.execute_query(query, is_statement=True) # Format result based on output type - if self.output == 'pandas': + if output_format == 'pandas': import pandas as pd return pd.DataFrame(result) - elif self.output == 'csv': + elif output_format == 'csv': # Return the string representation of the result return result[0]['message'] else: return result else: + # Local mode: handle parameter overrides + override_params = None + output_format = kwargs.get('output', self.output) + + # If custom_auth is provided as kwarg, use it + if 'auth' in kwargs: + custom_auth = kwargs['auth'] + + # Generate override params if kwargs provided + if kwargs: + from ..utils import generate_params_for_execution + override_params = generate_params_for_execution(self._base_kwargs, kwargs) + # Execute the query - result = self.local_query_executor.execute(query, custom_auth=custom_auth, env_vars=env_vars) + result = self.local_query_executor.execute(query, custom_auth=custom_auth, env_vars=env_vars, override_params=override_params) - # Format the result - return self.local_output_formatter.format_statement_result(result) + # Format the result with appropriate output formatter + if output_format != self.output: + # Create a temporary formatter for this execution + from .output import OutputFormatter + temp_formatter = OutputFormatter(output_format) + return temp_formatter.format_statement_result(result) + else: + return self.local_output_formatter.format_statement_result(result) - def execute(self, query, suppress_errors=True, custom_auth=None, env_vars=None): + def execute(self, query, suppress_errors=True, custom_auth=None, env_vars=None, **kwargs): """ Executes a StackQL query and returns the output based on the specified output format. @@ -325,6 +353,12 @@ def execute(self, query, suppress_errors=True, custom_auth=None, env_vars=None): :type custom_auth: dict, optional :param env_vars: Command-specific environment variables for this execution. :type env_vars: dict, optional + :param kwargs: Additional keyword arguments that override constructor parameters for this execution. + Supported overrides: output, sep, header, auth, custom_registry, max_results, page_limit, + max_depth, api_timeout, http_debug, proxy_host, proxy_port, proxy_user, proxy_password, + proxy_scheme, backend_storage_mode, backend_file_storage_location, app_root, + execution_concurrency_limit, dataflow_dependency_max, dataflow_components_max + :type kwargs: optional :return: The output of the query, which can be a list of dictionary objects, a Pandas DataFrame, or a raw CSV string, depending on the configured output format. @@ -344,29 +378,52 @@ def execute(self, query, suppress_errors=True, custom_auth=None, env_vars=None): >>> result = stackql.execute(query) """ if self.server_mode: + # Server mode: handle output override + output_format = kwargs.get('output', self.output) + result = self.server_connection.execute_query(query) # Format result based on output type - if self.output == 'pandas': + if output_format == 'pandas': import pandas as pd import json from io import StringIO json_str = json.dumps(result) return pd.read_json(StringIO(json_str)) - elif self.output == 'csv': + elif output_format == 'csv': raise ValueError("CSV output is not supported in server_mode.") else: # Assume 'dict' output return result else: + # Local mode: handle parameter overrides + override_params = None + output_format = kwargs.get('output', self.output) + http_debug = kwargs.get('http_debug', self.http_debug) + + # If custom_auth is provided as kwarg, use it + if 'auth' in kwargs: + custom_auth = kwargs['auth'] + + # Generate override params if kwargs provided + if kwargs: + from ..utils import generate_params_for_execution + override_params = generate_params_for_execution(self._base_kwargs, kwargs) + # Apply HTTP debug setting - if self.http_debug: + if http_debug: suppress_errors = False # Execute the query - output = self.local_query_executor.execute(query, custom_auth=custom_auth, env_vars=env_vars) + output = self.local_query_executor.execute(query, custom_auth=custom_auth, env_vars=env_vars, override_params=override_params) - # Format the result - return self.local_output_formatter.format_query_result(output, suppress_errors) + # Format the result with appropriate output formatter + if output_format != self.output: + # Create a temporary formatter for this execution + from .output import OutputFormatter + temp_formatter = OutputFormatter(output_format) + return temp_formatter.format_query_result(output, suppress_errors) + else: + return self.local_output_formatter.format_query_result(output, suppress_errors) async def executeQueriesAsync(self, queries): """Executes multiple StackQL queries asynchronously using the current StackQL instance. diff --git a/pystackql/utils/__init__.py b/pystackql/utils/__init__.py index 27f6e7c..c927f48 100644 --- a/pystackql/utils/__init__.py +++ b/pystackql/utils/__init__.py @@ -23,7 +23,7 @@ ) from .auth import format_auth -from .params import setup_local_mode +from .params import setup_local_mode, generate_params_for_execution __all__ = [ # Platform utilities @@ -45,5 +45,6 @@ 'format_auth', # Parameter utilities - 'setup_local_mode' + 'setup_local_mode', + 'generate_params_for_execution' ] \ No newline at end of file diff --git a/pystackql/utils/params.py b/pystackql/utils/params.py index 1b039b5..b4d8eec 100644 --- a/pystackql/utils/params.py +++ b/pystackql/utils/params.py @@ -22,6 +22,113 @@ def _set_param(params, param_name, value): params.append(str(value)) return params +def generate_params_for_execution(base_kwargs, override_kwargs=None): + """Generate parameters for a single execution with optional overrides. + + This function generates command-line parameters for executing a query, + optionally overriding base parameters with execution-specific ones. + + :param base_kwargs: Base keyword arguments (from constructor) + :param override_kwargs: Keyword arguments to override (from execute/executeStmt) + :return: List of parameters for StackQL binary + """ + # Merge kwargs, with override_kwargs taking precedence + merged_kwargs = base_kwargs.copy() + if override_kwargs: + merged_kwargs.update(override_kwargs) + + # Initialize parameter list + params = ["exec"] + + # Extract parameters from merged_kwargs + output = merged_kwargs.get('output', 'dict') + backend_storage_mode = merged_kwargs.get('backend_storage_mode', 'memory') + backend_file_storage_location = merged_kwargs.get('backend_file_storage_location', 'stackql.db') + app_root = merged_kwargs.get('app_root', None) + execution_concurrency_limit = merged_kwargs.get('execution_concurrency_limit', -1) + dataflow_dependency_max = merged_kwargs.get('dataflow_dependency_max', 50) + dataflow_components_max = merged_kwargs.get('dataflow_components_max', 50) + custom_registry = merged_kwargs.get('custom_registry', None) + custom_auth = merged_kwargs.get('custom_auth', None) + sep = merged_kwargs.get('sep', ',') + header = merged_kwargs.get('header', False) + max_results = merged_kwargs.get('max_results', -1) + page_limit = merged_kwargs.get('page_limit', 20) + max_depth = merged_kwargs.get('max_depth', 5) + api_timeout = merged_kwargs.get('api_timeout', 45) + http_debug = merged_kwargs.get('http_debug', False) + proxy_host = merged_kwargs.get('proxy_host', None) + proxy_port = merged_kwargs.get('proxy_port', -1) + proxy_user = merged_kwargs.get('proxy_user', None) + proxy_password = merged_kwargs.get('proxy_password', None) + proxy_scheme = merged_kwargs.get('proxy_scheme', 'http') + + # Set output format + params.append("--output") + if output.lower() == "csv": + params.append("csv") + else: + params.append("json") + + # Backend storage settings + if backend_storage_mode == 'file': + params.append("--sqlBackend") + params.append(json.dumps({ "dsn": f"file:{backend_file_storage_location}" })) + + # If app_root is set, use it + if app_root is not None: + _set_param(params, 'approot', app_root) + + # Set execution parameters + _set_param(params, 'execution.concurrency.limit', execution_concurrency_limit) + _set_param(params, 'dataflow.dependency.max', dataflow_dependency_max) + _set_param(params, 'dataflow.components.max', dataflow_components_max) + + # If custom_auth is set, use it + if custom_auth is not None: + authobj, authstr = format_auth(custom_auth) + params.append("--auth") + params.append(authstr) + + # If custom_registry is set, use it + if custom_registry is not None: + params.append("--registry") + params.append(json.dumps({ "url": custom_registry })) + + # CSV output settings + if output.lower() == "csv": + _set_param(params, 'delimiter', sep) + + if not header: + params.append("--hideheaders") + + # App behavioral properties + _set_param(params, 'http.response.maxResults', max_results) + _set_param(params, 'http.response.pageLimit', page_limit) + _set_param(params, 'indirect.depth.max', max_depth) + _set_param(params, 'apirequesttimeout', api_timeout) + + if http_debug: + params.append("--http.log.enabled") + + # Proxy settings + if proxy_host is not None: + # Set basic proxy parameters + _set_param(params, 'http.proxy.host', proxy_host) + _set_param(params, 'http.proxy.port', proxy_port) + _set_param(params, 'http.proxy.user', proxy_user) + _set_param(params, 'http.proxy.password', proxy_password) + + # Validate and set proxy scheme + ALLOWED_PROXY_SCHEMES = {'http', 'https'} + if proxy_scheme.lower() not in ALLOWED_PROXY_SCHEMES: + raise ValueError(f"Invalid proxy_scheme. Expected one of {ALLOWED_PROXY_SCHEMES}, got {proxy_scheme}.") + + _set_param(params, 'http.proxy.scheme', proxy_scheme.lower()) + + # Return the params list + return params + def setup_local_mode(instance, **kwargs): """Set up local mode for a StackQL instance. @@ -32,6 +139,9 @@ def setup_local_mode(instance, **kwargs): :param kwargs: Keyword arguments from the constructor :return: List of parameters for StackQL binary """ + # Store base kwargs for later use + instance._base_kwargs = kwargs.copy() + # Initialize parameter list params = ["exec"] diff --git a/tests/test_kwargs_override.py b/tests/test_kwargs_override.py new file mode 100644 index 0000000..d8fab98 --- /dev/null +++ b/tests/test_kwargs_override.py @@ -0,0 +1,160 @@ +# tests/test_kwargs_override.py + +""" +Tests for kwargs override functionality in execute and executeStmt methods. + +This module tests the ability to override constructor parameters via kwargs +passed to execute() and executeStmt() methods. +""" + +import os +import sys +import pytest +import pandas as pd + +# Add the parent directory to the path so we can import from pystackql +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +# Add the current directory to the path so we can import test_constants +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) + +from pystackql import StackQL +from tests.test_constants import ( + LITERAL_INT_QUERY, + LITERAL_STRING_QUERY, + print_test_result, + pystackql_test_setup +) + +class TestKwargsOverride: + """Tests for kwargs override in execute and executeStmt methods.""" + + StackQL = StackQL # For use with pystackql_test_setup decorator + + @pystackql_test_setup(output='csv') + def test_execute_output_override_csv_to_dict(self): + """Test that output format can be overridden from csv to dict in execute().""" + # Instance is configured with CSV output + assert self.stackql.output == 'csv', "Instance should be configured with CSV output" + + # Execute with dict output override + result = self.stackql.execute(LITERAL_INT_QUERY, output='dict') + + # Check result structure - should be dict format, not csv + assert isinstance(result, list), "Result should be a list (dict format)" + assert len(result) > 0, "Result should not be empty" + assert isinstance(result[0], dict), "Result items should be dicts" + + print_test_result(f"Execute output override csv to dict test\nRESULT TYPE: {type(result)}", + isinstance(result, list) and isinstance(result[0], dict)) + + @pystackql_test_setup(output='dict') + def test_execute_output_override_dict_to_pandas(self): + """Test that output format can be overridden from dict to pandas in execute().""" + # Instance is configured with dict output + assert self.stackql.output == 'dict', "Instance should be configured with dict output" + + # Execute with pandas output override + result = self.stackql.execute(LITERAL_STRING_QUERY, output='pandas') + + # Check result structure - should be pandas DataFrame, not dict + assert isinstance(result, pd.DataFrame), "Result should be a pandas DataFrame" + assert not result.empty, "DataFrame should not be empty" + + print_test_result(f"Execute output override dict to pandas test\nRESULT TYPE: {type(result)}", + isinstance(result, pd.DataFrame)) + + @pystackql_test_setup(output='pandas') + def test_execute_output_override_pandas_to_csv(self): + """Test that output format can be overridden from pandas to csv in execute().""" + # Instance is configured with pandas output + assert self.stackql.output == 'pandas', "Instance should be configured with pandas output" + + # Execute with csv output override + result = self.stackql.execute(LITERAL_INT_QUERY, output='csv') + + # Check result structure - should be csv string, not pandas + assert isinstance(result, str), "Result should be a string (csv format)" + assert "1" in result, "Result should contain the value '1'" + + print_test_result(f"Execute output override pandas to csv test\nRESULT: {result}", + isinstance(result, str)) + + @pystackql_test_setup(output='dict') + def test_execute_multiple_overrides_in_sequence(self): + """Test that multiple execute calls with different overrides work correctly.""" + # Instance is configured with dict output + assert self.stackql.output == 'dict', "Instance should be configured with dict output" + + # First execution with dict (default) + result1 = self.stackql.execute(LITERAL_INT_QUERY) + assert isinstance(result1, list), "First result should be dict format" + + # Second execution with pandas override + result2 = self.stackql.execute(LITERAL_STRING_QUERY, output='pandas') + assert isinstance(result2, pd.DataFrame), "Second result should be pandas format" + + # Third execution with csv override + result3 = self.stackql.execute(LITERAL_INT_QUERY, output='csv') + assert isinstance(result3, str), "Third result should be csv format" + + # Fourth execution should still use dict (instance default) + result4 = self.stackql.execute(LITERAL_INT_QUERY) + assert isinstance(result4, list), "Fourth result should be dict format again" + + print_test_result(f"Multiple overrides in sequence test\nTypes: {[type(r).__name__ for r in [result1, result2, result3, result4]]}", + isinstance(result1, list) and + isinstance(result2, pd.DataFrame) and + isinstance(result3, str) and + isinstance(result4, list)) + + @pystackql_test_setup(output='csv', header=False) + def test_execute_csv_header_override(self): + """Test that CSV header setting can be overridden in execute().""" + # Instance is configured with CSV output and no header + assert self.stackql.output == 'csv', "Instance should be configured with CSV output" + assert self.stackql.header is False, "Instance should be configured with header=False" + + # Execute with header override + result = self.stackql.execute(LITERAL_INT_QUERY, header=True) + + # Check result structure - should be csv string + assert isinstance(result, str), "Result should be a string (csv format)" + + print_test_result(f"CSV header override test\nRESULT: {result}", + isinstance(result, str)) + + @pystackql_test_setup(output='csv', sep=',') + def test_execute_csv_separator_override(self): + """Test that CSV separator can be overridden in execute().""" + # Instance is configured with CSV output and comma separator + assert self.stackql.output == 'csv', "Instance should be configured with CSV output" + assert self.stackql.sep == ',', "Instance should be configured with comma separator" + + # Execute with pipe separator override + result = self.stackql.execute(LITERAL_INT_QUERY, sep='|') + + # Check result structure - should be csv string + assert isinstance(result, str), "Result should be a string (csv format)" + + print_test_result(f"CSV separator override test\nRESULT: {result}", + isinstance(result, str)) + + @pystackql_test_setup(output='dict') + def test_executeStmt_output_override(self): + """Test that output format can be overridden in executeStmt().""" + # Instance is configured with dict output + assert self.stackql.output == 'dict', "Instance should be configured with dict output" + + # Execute a statement with pandas override + # Using a simple SELECT that works as a statement + result = self.stackql.executeStmt(LITERAL_INT_QUERY, output='pandas') + + # Check result structure - should be pandas DataFrame + assert isinstance(result, pd.DataFrame), "Result should be a pandas DataFrame" + + print_test_result(f"ExecuteStmt output override test\nRESULT TYPE: {type(result)}", + isinstance(result, pd.DataFrame)) + +if __name__ == "__main__": + pytest.main(["-v", __file__])