diff --git a/.travis.yml b/.travis.yml index 12cad75b7..9fa8c6ee6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ addons: - pandoc install: - - pip install rasterio shapely pandas numpy pweave + - pip install shapely>=1.6.0 pandas>=0.25.0 numpy>=1.7 pweave rasterio>=1.0.0 - wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf - script: diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala index 481155f24..62e55313c 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala @@ -78,7 +78,7 @@ object GDALRasterSource extends LazyLogging { val _ = new GDALWarp() true } catch { - case _: UnsatisfiedLinkError => + case _:UnsatisfiedLinkError | _:NoClassDefFoundError => logger.warn("GDAL native bindings are not available. Falling back to JVM-based reader for GeoTIFF format.") false } diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala index 2d8f1bac8..0fc0691ce 100644 --- a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala @@ -101,18 +101,21 @@ object RasterSource extends LazyLogging { ExpressionEncoder() } + def apply(source: String): RasterSource = apply(new URI(source)) + def apply(source: URI): RasterSource = rsCache.get( - source.toASCIIString, _ => source match { - case IsGDAL() => GDALRasterSource(source) - case IsHadoopGeoTiff() => - // TODO: How can we get the active hadoop configuration - // TODO: without having to pass it through? - val config = () => new Configuration() - HadoopGeoTiffRasterSource(source, config) - case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source) - case s => throw new UnsupportedOperationException(s"Reading '$s' not supported") - } + source.toASCIIString, _ => + source match { + case IsGDAL() => GDALRasterSource(source) + case IsHadoopGeoTiff() => + // TODO: How can we get the active hadoop configuration + // TODO: without having to pass it through? + val config = () => new Configuration() + HadoopGeoTiffRasterSource(source, config) + case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source) + case s => throw new UnsupportedOperationException(s"Reading '$s' not supported") + } ) object IsGDAL { @@ -122,6 +125,8 @@ object RasterSource extends LazyLogging { val gdalOnlyExtensions = Seq(".jp2", ".mrf", ".hdf", ".vrt") + val blacklistedSchemes = Seq("s3a", "s3n", "wasbs") + def gdalOnly(source: URI): Boolean = if (gdalOnlyExtensions.exists(source.getPath.toLowerCase.endsWith)) { require(GDALRasterSource.hasGDAL, s"Can only read $source if GDAL is available") @@ -130,26 +135,43 @@ object RasterSource extends LazyLogging { /** Extractor for determining if a scheme indicates GDAL preference. */ def unapply(source: URI): Boolean = { + + lazy val schemeIsNotHadoop = Option(source.getScheme()) + .filter(blacklistedSchemes.contains) + .isEmpty + lazy val schemeIsGdal = Option(source.getScheme()) - .exists(_.startsWith("gdal")) + .exists(_ == "gdal") && schemeIsNotHadoop + + (gdalOnly(source) && schemeIsNotHadoop) || + (GDALRasterSource.hasGDAL && + (preferGdal && schemeIsGdal) || + (preferGdal && schemeIsNotHadoop) + ) - gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL) } } object IsDefaultGeoTiff { - def unapply(source: URI): Boolean = source.getScheme match { - case "file" | "http" | "https" | "s3" => true - case null | "" ⇒ true - case _ => false + import IsGDAL.gdalOnly + def unapply(source: URI): Boolean = { + if (gdalOnly(source)) false + else source.getScheme match { + case "file" | "http" | "https" | "s3" => true + case null | "" ⇒ true + case _ => false + } } } object IsHadoopGeoTiff { - def unapply(source: URI): Boolean = source.getScheme match { - case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true - case _ => false - } + import IsGDAL.gdalOnly + def unapply(source: URI): Boolean = + if (gdalOnly(source)) false + else source.getScheme match { + case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true + case _ => false + } } trait URIRasterSource { _: RasterSource => diff --git a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala index 6b3371ea3..de0aa1cb3 100644 --- a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala @@ -156,7 +156,6 @@ class RasterSourceSpec extends TestEnvironment with TestData { gdal.layoutExtents(dims) should contain allElementsOf jvm.layoutExtents(dims) } - it("should support vsi file paths") { val archivePath = geotiffDir.resolve("L8-archive.zip") val archiveURI = URI.create("gdal://vsizip/" + archivePath.toString + "/L8-RGB-VA.tiff") @@ -173,6 +172,16 @@ class RasterSourceSpec extends TestEnvironment with TestData { gdal.extent should be (jvm.extent) gdal.cellSize should be(jvm.cellSize) } + + it("should choose correct delegate for scheme and file"){ + val hdfsSchemeTif = RasterSource("s3n://bucket/prefix/raster.tif") + val easySchemeTif = RasterSource("s3://bucket/prefix/raster.tif") // should interpret as /vsis3/ + lazy val hdfsSchemeJp2 = RasterSource("s3n://s22s-test-geotiffs/luray_snp/B04.jp2") // can't read with hadoop reader + + hdfsSchemeTif should matchPattern {case HadoopGeoTiffRasterSource(_, _) ⇒} + easySchemeTif should matchPattern {case GDALRasterSource(_) ⇒} + assertThrows[UnsupportedOperationException](hdfsSchemeJp2.bandCount) + } } } diff --git a/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py b/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py index 2a57cf356..fe2909a32 100644 --- a/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py +++ b/pyrasterframes/src/main/python/tests/RasterFunctionsTests.py @@ -300,9 +300,6 @@ def test_render_composite(self): # Look for the PNG magic cookie self.assertEqual(png_bytes[0:8], bytearray([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) - - - def test_rf_interpret_cell_type_as(self): from pyspark.sql import Row from pyrasterframes.rf_types import Tile