Skip to content

Commit

Permalink
Add 'gdal vector sql', as standalone or part of 'gdal vector pipeline'
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Feb 9, 2025
1 parent 6332083 commit 90d9637
Show file tree
Hide file tree
Showing 16 changed files with 778 additions and 26 deletions.
1 change: 1 addition & 0 deletions apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ add_library(
gdalalg_vector_read.cpp
gdalalg_vector_filter.cpp
gdalalg_vector_reproject.cpp
gdalalg_vector_sql.cpp
gdalalg_vector_write.cpp
gdalinfo_lib.cpp
gdalbuildvrt_lib.cpp
Expand Down
15 changes: 15 additions & 0 deletions apps/gdalalg_abstract_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "cpl_json.h"
#include "gdalalgorithm.h"

#include <algorithm>

template <class StepAlgorithm>
class GDALAbstractPipelineAlgorithm CPL_NON_FINAL : public StepAlgorithm
{
Expand All @@ -44,6 +46,19 @@ class GDALAbstractPipelineAlgorithm CPL_NON_FINAL : public StepAlgorithm
{
}

~GDALAbstractPipelineAlgorithm() override
{
// Destroy steps in the reverse order they have been constructed,
// as a step can create object that depends on the validity of
// objects of previous steps, and while cleaning them it needs those
// prior objects to be still alive.
// Typically for "gdal vector pipeline read ... ! sql ..."
for (auto it = std::rbegin(m_steps); it != std::rend(m_steps); it++)
{
it->reset();
}
}

virtual GDALArgDatasetValue &GetOutputDataset() = 0;

std::string m_pipeline{};
Expand Down
2 changes: 2 additions & 0 deletions apps/gdalalg_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "gdalalg_vector_pipeline.h"
#include "gdalalg_vector_filter.h"
#include "gdalalg_vector_reproject.h"
#include "gdalalg_vector_sql.h"

/************************************************************************/
/* GDALVectorAlgorithm */
Expand All @@ -43,6 +44,7 @@ class GDALVectorAlgorithm final : public GDALAlgorithm
RegisterSubAlgorithm<GDALVectorPipelineAlgorithm>();
RegisterSubAlgorithm<GDALVectorFilterAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorReprojectAlgorithmStandalone>();
RegisterSubAlgorithm<GDALVectorSQLAlgorithmStandalone>();
}

private:
Expand Down
13 changes: 9 additions & 4 deletions apps/gdalalg_vector_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "gdalalg_vector_clip.h"
#include "gdalalg_vector_filter.h"
#include "gdalalg_vector_reproject.h"
#include "gdalalg_vector_sql.h"
#include "gdalalg_vector_write.h"

#include "cpl_conv.h"
Expand Down Expand Up @@ -94,10 +95,13 @@ void GDALVectorPipelineStepAlgorithm::AddOutputArgs(
&m_appendLayer)
.SetDefault(false)
.SetHiddenForCLI(hiddenForCLI);
AddArg("output-layer", shortNameOutputLayerAllowed ? 'l' : 0,
_("Output layer name"), &m_outputLayerName)
.AddHiddenAlias("nln") // For ogr2ogr nostalgic people
.SetHiddenForCLI(hiddenForCLI);
if (GetName() != "sql")
{
AddArg("output-layer", shortNameOutputLayerAllowed ? 'l' : 0,
_("Output layer name"), &m_outputLayerName)
.AddHiddenAlias("nln") // For ogr2ogr nostalgic people
.SetHiddenForCLI(hiddenForCLI);
}
}

/************************************************************************/
Expand Down Expand Up @@ -178,6 +182,7 @@ GDALVectorPipelineAlgorithm::GDALVectorPipelineAlgorithm()
m_stepRegistry.Register<GDALVectorClipAlgorithm>();
m_stepRegistry.Register<GDALVectorReprojectAlgorithm>();
m_stepRegistry.Register<GDALVectorFilterAlgorithm>();
m_stepRegistry.Register<GDALVectorSQLAlgorithm>();
}

/************************************************************************/
Expand Down
306 changes: 306 additions & 0 deletions apps/gdalalg_vector_sql.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
/******************************************************************************
*
* Project: GDAL
* Purpose: "sql" step of "vector pipeline"
* Author: Even Rouault <even dot rouault at spatialys.com>
*
******************************************************************************
* Copyright (c) 2025, Even Rouault <even dot rouault at spatialys.com>
*
* SPDX-License-Identifier: MIT
****************************************************************************/

#include "gdalalg_vector_sql.h"

#include "gdal_priv.h"
#include "ogrsf_frmts.h"
#include "ogrlayerpool.h"

#include <set>

//! @cond Doxygen_Suppress

#ifndef _
#define _(x) (x)
#endif

/************************************************************************/
/* GDALVectorSQLAlgorithm::GDALVectorSQLAlgorithm() */
/************************************************************************/

GDALVectorSQLAlgorithm::GDALVectorSQLAlgorithm(bool standaloneStep)
: GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL,
standaloneStep)
{
AddArg("sql", 0, _("SQL statement(s)"), &m_sql)
.SetPositional()
.SetRequired()
.SetPackedValuesAllowed(false)
.SetReadFromFileAtSyntaxAllowed()
.SetMetaVar("<statement>|@<filename>")
.SetRemoveSQLCommentsEnabled();
AddArg("output-layer", standaloneStep ? 0 : 'l', _("Output layer name(s)"),
&m_outputLayer);
AddArg("dialect", 0, _("SQL dialect (e.g. OGRSQL, SQLITE)"), &m_dialect);
}

/************************************************************************/
/* GDALVectorSQLAlgorithmDataset */
/************************************************************************/

namespace
{
class GDALVectorSQLAlgorithmDataset final : public GDALDataset
{
GDALDataset *m_poSrcDS = nullptr;
std::vector<OGRLayer *> m_layers{};

CPL_DISALLOW_COPY_ASSIGN(GDALVectorSQLAlgorithmDataset)

public:
explicit GDALVectorSQLAlgorithmDataset(GDALDataset *poSrcDS)
: m_poSrcDS(poSrcDS)
{
}

~GDALVectorSQLAlgorithmDataset() override
{
for (OGRLayer *poLayer : m_layers)
m_poSrcDS->ReleaseResultSet(poLayer);
}

void AddLayer(OGRLayer *poLayer)
{
m_layers.push_back(poLayer);
}

int GetLayerCount() override
{
return static_cast<int>(m_layers.size());
}

OGRLayer *GetLayer(int idx) override
{
return idx >= 0 && idx < GetLayerCount() ? m_layers[idx] : nullptr;
}
};
} // namespace

/************************************************************************/
/* GDALVectorSQLAlgorithmDatasetMultiLayer */
/************************************************************************/

namespace
{

class ProxiedSQLLayer final : public OGRProxiedLayer
{
OGRFeatureDefn *m_poLayerDefn = nullptr;

CPL_DISALLOW_COPY_ASSIGN(ProxiedSQLLayer)

public:
ProxiedSQLLayer(const std::string &osName, OGRLayerPool *poPoolIn,
OpenLayerFunc pfnOpenLayerIn,
ReleaseLayerFunc pfnReleaseLayerIn,
FreeUserDataFunc pfnFreeUserDataIn, void *pUserDataIn)
: OGRProxiedLayer(poPoolIn, pfnOpenLayerIn, pfnReleaseLayerIn,
pfnFreeUserDataIn, pUserDataIn)
{
SetDescription(osName.c_str());
}

~ProxiedSQLLayer()
{
if (m_poLayerDefn)
m_poLayerDefn->Release();
}

const char *GetName() override
{
return GetDescription();
}

OGRFeatureDefn *GetLayerDefn() override
{
if (!m_poLayerDefn)
{
m_poLayerDefn = OGRProxiedLayer::GetLayerDefn()->Clone();
m_poLayerDefn->SetName(GetDescription());
}
return m_poLayerDefn;
}
};

class GDALVectorSQLAlgorithmDatasetMultiLayer final : public GDALDataset
{
// We can't safely have 2 SQL layers active simultaneously on the same
// source dataset. So each time we access one, we must close the last
// active one.
OGRLayerPool m_oPool{1};
GDALDataset *m_poSrcDS = nullptr;
std::vector<std::unique_ptr<OGRLayer>> m_layers{};

CPL_DISALLOW_COPY_ASSIGN(GDALVectorSQLAlgorithmDatasetMultiLayer)

public:
explicit GDALVectorSQLAlgorithmDatasetMultiLayer(GDALDataset *poSrcDS)
: m_poSrcDS(poSrcDS)
{
}

void AddLayer(const std::string &osSQL, const std::string &osDialect,
const std::string &osLayerName)
{
struct UserData
{
GDALDataset *poSrcDS = nullptr;
std::string osDialect{};
std::string osSQL{};
std::string osLayerName{};

UserData() = default;
CPL_DISALLOW_COPY_ASSIGN(UserData)
};

const auto OpenLayer = [](void *pUserDataIn)
{
UserData *pUserData = static_cast<UserData *>(pUserDataIn);
return pUserData->poSrcDS->ExecuteSQL(
pUserData->osSQL.c_str(), nullptr,
pUserData->osDialect.empty() ? nullptr
: pUserData->osDialect.c_str());
};

const auto CloseLayer = [](OGRLayer *poLayer, void *pUserDataIn)
{
UserData *pUserData = static_cast<UserData *>(pUserDataIn);
pUserData->poSrcDS->ReleaseResultSet(poLayer);
};

const auto DeleteUserData = [](void *pUserDataIn)
{ delete static_cast<UserData *>(pUserDataIn); };

auto pUserData = new UserData;
pUserData->poSrcDS = m_poSrcDS;
pUserData->osDialect = osDialect;
pUserData->osSQL = osSQL;
pUserData->osLayerName = osLayerName;
auto poLayer = std::make_unique<ProxiedSQLLayer>(
osLayerName, &m_oPool, OpenLayer, CloseLayer, DeleteUserData,
pUserData);
m_layers.push_back(std::move(poLayer));
}

int GetLayerCount() override
{
return static_cast<int>(m_layers.size());
}

OGRLayer *GetLayer(int idx) override
{
return idx >= 0 && idx < GetLayerCount() ? m_layers[idx].get()
: nullptr;
}
};
} // namespace

/************************************************************************/
/* GDALVectorSQLAlgorithm::RunStep() */
/************************************************************************/

bool GDALVectorSQLAlgorithm::RunStep(GDALProgressFunc, void *)
{
CPLAssert(m_inputDataset.GetDatasetRef());
CPLAssert(m_outputDataset.GetName().empty());
CPLAssert(!m_outputDataset.GetDatasetRef());

if (!m_outputLayer.empty() && m_outputLayer.size() != m_sql.size())
{
ReportError(CE_Failure, CPLE_AppDefined,
"There should be as many layer names in --output-layer as "
"in --statement");
return false;
}

auto poSrcDS = m_inputDataset.GetDatasetRef();

if (m_sql.size() == 1)
{
auto outDS = std::make_unique<GDALVectorSQLAlgorithmDataset>(poSrcDS);
outDS->SetDescription(poSrcDS->GetDescription());

OGRLayer *poLayer = poSrcDS->ExecuteSQL(
m_sql[0].c_str(), nullptr,
m_dialect.empty() ? nullptr : m_dialect.c_str());
if (!poLayer)
return false;

if (!m_outputLayer.empty())
{
const std::string &osLayerName = m_outputLayer[0];
poLayer->GetLayerDefn()->SetName(osLayerName.c_str());
poLayer->SetDescription(osLayerName.c_str());
}
outDS->AddLayer(poLayer);
m_outputDataset.Set(std::move(outDS));
}
else
{
// First pass to check all statements are valid and figure out layer
// names
std::set<std::string> setOutputLayerNames;
std::vector<std::string> aosLayerNames;
for (const std::string &sql : m_sql)
{
auto poLayer = poSrcDS->ExecuteSQL(
sql.c_str(), nullptr,
m_dialect.empty() ? nullptr : m_dialect.c_str());
if (!poLayer)
return false;

std::string osLayerName;

if (!m_outputLayer.empty())
{
osLayerName = m_outputLayer[aosLayerNames.size()];
}
else if (cpl::contains(setOutputLayerNames,
poLayer->GetDescription()))
{
int num = 1;
do
{
osLayerName = poLayer->GetDescription();
++num;
osLayerName += std::to_string(num);
} while (cpl::contains(setOutputLayerNames, osLayerName));
}

if (!osLayerName.empty())
{
poLayer->GetLayerDefn()->SetName(osLayerName.c_str());
poLayer->SetDescription(osLayerName.c_str());
}
setOutputLayerNames.insert(poLayer->GetDescription());
aosLayerNames.push_back(poLayer->GetDescription());

poSrcDS->ReleaseResultSet(poLayer);
}

auto outDS =
std::make_unique<GDALVectorSQLAlgorithmDatasetMultiLayer>(poSrcDS);
outDS->SetDescription(poSrcDS->GetDescription());

for (size_t i = 0; i < aosLayerNames.size(); ++i)
{
outDS->AddLayer(m_sql[i], m_dialect, aosLayerNames[i]);
}

m_outputDataset.Set(std::move(outDS));
}

return true;
}

//! @endcond
Loading

0 comments on commit 90d9637

Please sign in to comment.