diff -Nru python-geopandas-0.7.0/appveyor.yml python-geopandas-0.8.1/appveyor.yml --- python-geopandas-0.7.0/appveyor.yml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/appveyor.yml 2020-07-15 17:54:36.000000000 +0000 @@ -35,9 +35,8 @@ # This PATH modification only works with conda 4.6+, but it won't hurt other versions. - set "PATH=%MINICONDA%\condabin:%PATH%" - conda info -a - # free channel needed for older envs (< py37), see - # https://github.com/conda/conda/issues/8849 - - conda config --set restore_free_channel true + - conda config --add channels conda-forge + - conda config --set channel_priority strict - conda env create --file="${ENV_FILE}" test_script: diff -Nru python-geopandas-0.7.0/asv.conf.json python-geopandas-0.8.1/asv.conf.json --- python-geopandas-0.7.0/asv.conf.json 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/asv.conf.json 2020-07-15 17:54:36.000000000 +0000 @@ -30,6 +30,7 @@ // determined by looking for tools on the PATH environment // variable. "environment_type": "conda", + "conda_channels": ["conda-forge", "defaults"], // timeout in seconds for installing any dependencies in environment // defaults to 10 min @@ -55,7 +56,7 @@ "matrix": { "pandas": [], "shapely": [], - "cython": [], + "pygeos": [], "fiona": [], "pyproj": [], "rtree": [], diff -Nru python-geopandas-0.7.0/benchmarks/clip.py python-geopandas-0.8.1/benchmarks/clip.py --- python-geopandas-0.7.0/benchmarks/clip.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/benchmarks/clip.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,14 @@ +from geopandas import read_file, datasets, clip +from shapely.geometry import box + + +class Bench: + def setup(self, *args): + world = read_file(datasets.get_path("naturalearth_lowres")) + capitals = read_file(datasets.get_path("naturalearth_cities")) + self.bounds = [box(*geom.bounds) for geom in world.geometry] + self.points = capitals + + def time_clip(self): + for bound in self.bounds: + clip(self.points, bound) diff -Nru python-geopandas-0.7.0/benchmarks/io.py python-geopandas-0.8.1/benchmarks/io.py --- python-geopandas-0.7.0/benchmarks/io.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/benchmarks/io.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,59 +1,109 @@ import os import shutil import tempfile +import warnings -from geopandas import GeoDataFrame, GeoSeries import numpy as np + from shapely.geometry import Point +from geopandas import GeoDataFrame, GeoSeries, read_file, read_parquet, read_feather + + +# TEMP: hide warning from to_parquet +warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*") + + +format_dict = { + "ESRI Shapefile": ( + ".shp", + lambda gdf, filename: gdf.to_file(filename, driver="ESRI Shapefile"), + lambda filename: read_file(filename, driver="ESRI Shapefile"), + ), + "GeoJSON": ( + ".json", + lambda gdf, filename: gdf.to_file(filename, driver="GeoJSON"), + lambda filename: read_file(filename, driver="GeoJSON"), + ), + "GPKG": ( + ".gpkg", + lambda gdf, filename: gdf.to_file(filename, driver="GeoJSON"), + lambda filename: read_file(filename, driver="GeoJSON"), + ), + "Parquet": ( + ".parquet", + lambda gdf, filename: gdf.to_parquet(filename), + lambda filename: read_parquet(filename), + ), + "Feather": ( + ".feather", + lambda gdf, filename: gdf.to_feather(filename), + lambda filename: read_feather(filename), + ), +} + class Bench: - # extensions for different file types to test - params = [".shp", ".json", ".gpkg"] - param_names = ["ext"] - - def setup(self, ext): - - self.driver_dict = {".shp": "ESRI Shapefile", - ".json": "GeoJSON", - ".gpkg": "GPKG"} - driver = self.driver_dict[ext] + params = ["ESRI Shapefile", "GeoJSON", "GPKG", "Parquet", "Feather"] + param_names = ["file_format"] + + def setup(self, file_format): + self.ext, self.writer, self.reader = format_dict[file_format] num_points = 20000 xs = np.random.rand(num_points) ys = np.random.rand(num_points) self.points = GeoSeries([Point(x, y) for (x, y) in zip(xs, ys)]) - self.df = GeoDataFrame({"geometry": self.points, "x": xs, "y": ys, - "s": np.zeros(num_points, dtype="object")}) + self.df = GeoDataFrame( + { + "geometry": self.points, + "x": xs, + "y": ys, + "s": np.zeros(num_points, dtype="object"), + } + ) self.tmpdir = tempfile.mkdtemp() - self.series_filename = os.path.join(self.tmpdir, "series" + ext) - self.frame_filename = os.path.join(self.tmpdir, "frame" + ext) - self.points.to_file(self.series_filename, driver=driver) - self.df.to_file(self.frame_filename, driver=driver) + self.filename = os.path.join(self.tmpdir, "frame" + self.ext) + self.writer(self.df, self.filename) - def teardown(self, ext): + def teardown(self, file_format): shutil.rmtree(self.tmpdir) - def time_write_frame(self, ext): - driver = self.driver_dict[ext] - with tempfile.TemporaryDirectory() as tmpdir: - out_filename = os.path.join(tmpdir, "frame" + ext) - self.df.to_file(out_filename, driver=driver) - def time_write_series(self, ext): - driver = self.driver_dict[ext] +class BenchFrame(Bench): + + params = ["ESRI Shapefile", "GeoJSON", "GPKG", "Parquet", "Feather"] + param_names = ["file_format"] + + def time_write(self, file_format): with tempfile.TemporaryDirectory() as tmpdir: - out_filename = os.path.join(tmpdir, "series" + ext) - self.points.to_file(out_filename, driver=driver) + out_filename = os.path.join(tmpdir, "frame" + self.ext) + self.writer(self.df, out_filename) + + def time_read(self, file_format): + self.reader(self.filename) + - def time_read_frame(self, ext): - frame = GeoDataFrame.from_file(self.frame_filename) +class BenchSeries(Bench): + + params = ["ESRI Shapefile", "GeoJSON", "GPKG"] + param_names = ["file_format"] + + def setup(self, file_format): + super().setup(file_format) + self.filename_series = os.path.join(self.tmpdir, "series" + self.ext) + self.writer(self.points, self.filename_series) + + def time_write_series(self, file_format): + with tempfile.TemporaryDirectory() as tmpdir: + out_filename = os.path.join(tmpdir, "series" + self.ext) + self.writer(self.points, out_filename) - def time_read_series(self, ext): - points = GeoSeries.from_file(self.series_filename) + def time_read_series(self, file_format): + GeoSeries.from_file(self.filename_series) - def time_read_series_from_frame(self, ext): - points = GeoSeries.from_file(self.frame_filename) + def time_read_series_from_frame(self, file_format): + GeoSeries.from_file(self.filename) diff -Nru python-geopandas-0.7.0/benchmarks/sindex.py python-geopandas-0.8.1/benchmarks/sindex.py --- python-geopandas-0.7.0/benchmarks/sindex.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/benchmarks/sindex.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,69 @@ +from geopandas import read_file, datasets +from geopandas.sindex import VALID_QUERY_PREDICATES + + +def generate_test_df(): + world = read_file(datasets.get_path("naturalearth_lowres")) + capitals = read_file(datasets.get_path("naturalearth_cities")) + countries = world.to_crs("epsg:3395")[["geometry"]] + capitals = capitals.to_crs("epsg:3395")[["geometry"]] + mixed = capitals.append(countries) # get a mix of geometries + points = capitals + polygons = countries + # filter out invalid geometries + data = { + "mixed": mixed[mixed.is_valid], + "points": points[points.is_valid], + "polygons": polygons[polygons.is_valid], + } + return data + + +class Bench: + + param_names = ["tree_geom_type"] + params = [["mixed", "points", "polygons"]] + + def setup(self, *args): + self.data = generate_test_df() + # cache bounds so that bound creation is not counted in benchmarks + self.bounds = [g.bounds for g in self.data["mixed"].geometry] + + def time_index_creation(self, tree_geom_type): + """Time creation of spatial index. + + Note: pygeos will only create the index once; this benchmark + is not intended to be used to compare rtree and pygeos. + """ + self.data[tree_geom_type]._invalidate_sindex() + self.data[tree_geom_type]._generate_sindex() + + def time_intersects(self, tree_geom_type): + for bounds in self.bounds: + self.data[tree_geom_type].sindex.intersection(bounds) + + def time_intersects_objects(self, tree_geom_type): + for bounds in self.bounds: + self.data[tree_geom_type].sindex.intersection(bounds, objects=True) + + +class BenchQuery: + + param_names = ["predicate", "input_geom_type", "tree_geom_type"] + params = [ + [*VALID_QUERY_PREDICATES], + ["mixed", "points", "polygons"], + ["mixed", "points", "polygons"], + ] + + def setup(self, *args): + self.data = generate_test_df() + + def time_query_bulk(self, predicate, input_geom_type, tree_geom_type): + self.data[tree_geom_type].sindex.query_bulk( + self.data[input_geom_type].geometry, predicate=predicate + ) + + def time_query(self, predicate, input_geom_type, tree_geom_type): + for geo in self.data[input_geom_type].geometry.sample(10, random_state=0): + self.data[tree_geom_type].sindex.query(geo, predicate=predicate) diff -Nru python-geopandas-0.7.0/CHANGELOG.md python-geopandas-0.8.1/CHANGELOG.md --- python-geopandas-0.7.0/CHANGELOG.md 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/CHANGELOG.md 2020-07-15 17:54:36.000000000 +0000 @@ -1,5 +1,111 @@ -Changes -======= +Changelog +========= + + +Version 0.8.1 (July 15, 2020) +----------------------------- + +Small bug-fix release: + +- Fix a regression in the `plot()` method when visualizing with a + JenksCaspallSampled or FisherJenksSampled scheme (#1486). +- Fix spurious warning in `GeoDataFrame.to_postgis` (#1497). +- Fix the un-pickling with `pd.read_pickle` of files written with older + GeoPandas versions (#1511). + + +Version 0.8.0 (June 24, 2020) +----------------------------- + +**Experimental**: optional use of PyGEOS to speed up spatial operations (#1155). +PyGEOS is a faster alternative for Shapely (being contributed back to a future +version of Shapely), and is used in element-wise spatial operations and for +spatial index in e.g. `sjoin` (#1343, #1401, #1421, #1427, #1428). See the +[installation docs](https://geopandas.readthedocs.io/en/latest/install.html#using-the-optional-pygeos-dependency) +for more info and how to enable it. + +New features and improvements: + +- IO enhancements: + - New `GeoDataFrame.to_postgis()` method to write to PostGIS database (#1248). + - New Apache Parquet and Feather file format support (#1180, #1435) + - Allow appending to files with `GeoDataFrame.to_file` (#1229). + - Add support for the `ignore_geometry` keyword in `read_file` to only read + the attribute data. If set to True, a pandas DataFrame without geometry is + returned (#1383). + - `geopandas.read_file` now supports reading from file-like objects (#1329). + - `GeoDataFrame.to_file` now supports specifying the CRS to write to the file + (#802). By default it still uses the CRS of the GeoDataFrame. + - New `chunksize` keyword in `geopandas.read_postgis` to read a query in + chunks (#1123). +- Improvements related to geometry columns and CRS: + - Any column of the GeoDataFrame that has a "geometry" dtype is now returned + as a GeoSeries. This means that when having multiple geometry columns, not + only the "active" geometry column is returned as a GeoSeries, but also + accessing another geometry column (`gdf["other_geom_column"]`) gives a + GeoSeries (#1336). + - Multiple geometry columns in a GeoDataFrame can now each have a different + CRS. The global `gdf.crs` attribute continues to returns the CRS of the + "active" geometry column. The CRS of other geometry columns can be accessed + from the column itself (eg `gdf["other_geom_column"].crs`) (#1339). + - New `set_crs()` method on GeoDataFrame/GeoSeries to set the CRS of naive + geometries (#747). +- Improvements related to plotting: + - The y-axis is now scaled depending on the center of the plot when using a + geographic CRS, instead of using an equal aspect ratio (#1290). + - When passing a column of categorical dtype to the `column=` keyword of the + GeoDataFrame `plot()`, we now honor all categories and its order (#1483). + In addition, a new `categories` keyword allows to specify all categories + and their order otherwise (#1173). + - For choropleths using a classification scheme (using `scheme=`), the + `legend_kwds` accept two new keywords to control the formatting of the + legend: `fmt` with a format string for the bin edges (#1253), and `labels` + to pass fully custom class labels (#1302). +- New `covers()` and `covered_by()` methods on GeoSeries/GeoDataframe for the + equivalent spatial predicates (#1460, #1462). +- GeoPandas now warns when using distance-based methods with data in a + geographic projection (#1378). + +Deprecations: + +- When constructing a GeoSeries or GeoDataFrame from data that already has a + CRS, a deprecation warning is raised when both CRS don't match, and in the + future an error will be raised in such a case. You can use the new `set_crs` + method to override an existing CRS. See + [the docs](https://geopandas.readthedocs.io/en/latest/projections.html#projection-for-multiple-geometry-columns). +- The helper functions in the `geopandas.plotting` module are deprecated for + public usage (#656). +- The `geopandas.io` functions are deprecated, use the top-level `read_file` and + `to_file` instead (#1407). +- The set operators (`&`, `|`, `^`, `-`) are deprecated, use the + `intersection()`, `union()`, `symmetric_difference()`, `difference()` methods + instead (#1255). +- The `sindex` for empty dataframe will in the future return an empty spatial + index instead of `None` (#1438). +- The `objects` keyword in the `intersection` method of the spatial index + returned by the `sindex` attribute is deprecated and will be removed in the + future (#1440). + +Bug fixes: + +- Fix the `total_bounds()` method to ignore missing and empty geometries (#1312). +- Fix `geopandas.clip` when masking with non-overlapping area resulting in an + empty GeoDataFrame (#1309, #1365). +- Fix error in `geopandas.sjoin` when joining on an empty geometry column (#1318). +- CRS related fixes: `pandas.concat` preserves CRS when concatenating GeoSeries + objects (#1340), preserve the CRS in `geopandas.clip` (#1362) and in + `GeoDataFrame.astype` (#1366). +- Fix bug in `GeoDataFrame.explode()` when 'level_1' is one of the column names + (#1445). +- Better error message when rtree is not installed (#1425). +- Fix bug in `GeoSeries.equals()` (#1451). +- Fix plotting of multi-part geometries with additional style keywords (#1385). + +And we now have a [Code of Conduct](https://github.com/geopandas/geopandas/blob/master/CODE_OF_CONDUCT.md)! + +GeoPandas 0.8.0 is the last release to support Python 3.5. The next release +will require Python 3.6, pandas 0.24, numpy 1.15 and shapely 1.6 or higher. + Version 0.7.0 (February 16, 2020) --------------------------------- diff -Nru python-geopandas-0.7.0/ci/travis/35-minimal.yaml python-geopandas-0.8.1/ci/travis/35-minimal.yaml --- python-geopandas-0.7.0/ci/travis/35-minimal.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/35-minimal.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -19,10 +19,10 @@ - matplotlib - descartes - matplotlib=2.0 - - mapclassify + - mapclassify>=2.2.0 - geopy - SQLalchemy - - psycopg2 - libspatialite + - pyarrow - pip: - pyproj==2.2.2 diff -Nru python-geopandas-0.7.0/ci/travis/36-pd023.yaml python-geopandas-0.8.1/ci/travis/36-pd023.yaml --- python-geopandas-0.7.0/ci/travis/36-pd023.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/36-pd023.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -11,6 +11,7 @@ - gdal=2.3 - fiona #- pyproj + - geos # testing - pytest - pytest-cov @@ -19,12 +20,12 @@ - rtree - matplotlib=2 - descartes - - mapclassify #- geopy - SQLalchemy - - psycopg2 - libspatialite - pip: - pyproj==2.3.1 - geopy - codecov + - mapclassify>=2.2.0 + - git+https://github.com/pygeos/pygeos.git diff -Nru python-geopandas-0.7.0/ci/travis/36-pd024.yaml python-geopandas-0.8.1/ci/travis/36-pd024.yaml --- python-geopandas-0.7.0/ci/travis/36-pd024.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/36-pd024.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -8,6 +8,7 @@ - shapely - fiona=1.7 #- pyproj + - geos # testing - pytest - pytest-cov @@ -18,10 +19,11 @@ - descartes #- geopy - SQLalchemy - - psycopg2 - libspatialite + - pyarrow - pip: - pyproj - codecov - geopy - - mapclassify + - mapclassify>=2.2.0 + - git+https://github.com/pygeos/pygeos.git diff -Nru python-geopandas-0.7.0/ci/travis/37-dev.yaml python-geopandas-0.8.1/ci/travis/37-dev.yaml --- python-geopandas-0.7.0/ci/travis/37-dev.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/37-dev.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -9,6 +9,7 @@ - shapely - fiona - pyproj + - geos # testing - pytest - pytest-cov @@ -19,9 +20,10 @@ - descartes #- geopy - SQLalchemy - - psycopg2 - libspatialite + - pyarrow - pip: - codecov - geopy - - mapclassify + - mapclassify>=2.2.0 + - git+https://github.com/pygeos/pygeos.git diff -Nru python-geopandas-0.7.0/ci/travis/37-latest-conda-forge.yaml python-geopandas-0.8.1/ci/travis/37-latest-conda-forge.yaml --- python-geopandas-0.7.0/ci/travis/37-latest-conda-forge.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/37-latest-conda-forge.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -8,6 +8,7 @@ - shapely - fiona - pyproj + - pygeos # testing - pytest - pytest-cov @@ -16,8 +17,8 @@ - rtree - matplotlib - descartes - - pysal + - mapclassify - geopy - SQLalchemy - - psycopg2 - libspatialite + - pyarrow diff -Nru python-geopandas-0.7.0/ci/travis/37-latest-defaults.yaml python-geopandas-0.8.1/ci/travis/37-latest-defaults.yaml --- python-geopandas-0.7.0/ci/travis/37-latest-defaults.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/37-latest-defaults.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -8,6 +8,7 @@ - shapely - fiona - pyproj + - geos # testing - pytest - pytest-cov @@ -18,9 +19,10 @@ - descartes #- geopy - SQLalchemy - - psycopg2 - libspatialite + - pyarrow - pip: - codecov - geopy - mapclassify + - git+https://github.com/pygeos/pygeos.git diff -Nru python-geopandas-0.7.0/ci/travis/38-latest-conda-forge.yaml python-geopandas-0.8.1/ci/travis/38-latest-conda-forge.yaml --- python-geopandas-0.7.0/ci/travis/38-latest-conda-forge.yaml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/38-latest-conda-forge.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -8,6 +8,7 @@ - shapely - fiona - pyproj + - pygeos # testing - pytest - pytest-cov @@ -16,8 +17,12 @@ - rtree - matplotlib - descartes - - pysal + - mapclassify - geopy + # installed in travis.yml, because not available on windows + # - postgis - SQLalchemy - psycopg2 - libspatialite + - geoalchemy2 + - pyarrow diff -Nru python-geopandas-0.7.0/ci/travis/38-no-optional-deps.yaml python-geopandas-0.8.1/ci/travis/38-no-optional-deps.yaml --- python-geopandas-0.7.0/ci/travis/38-no-optional-deps.yaml 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/38-no-optional-deps.yaml 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,14 @@ +name: test +channels: + - conda-forge +dependencies: + - python=3.8 + # required + - pandas + - shapely + - fiona + - pyproj + # testing + - pytest + - pytest-cov + - codecov diff -Nru python-geopandas-0.7.0/ci/travis/setup_postgres.sh python-geopandas-0.8.1/ci/travis/setup_postgres.sh --- python-geopandas-0.7.0/ci/travis/setup_postgres.sh 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/ci/travis/setup_postgres.sh 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,21 @@ +#!/bin/bash -e + +echo "Setting up Postgresql" + +mkdir -p ${HOME}/var +rm -rf ${HOME}/var/db + +pg_ctl initdb -D ${HOME}/var/db +pg_ctl start -D ${HOME}/var/db + +echo -n 'waiting for postgres' +while [ ! -e /tmp/.s.PGSQL.5432 ]; do + sleep 1 + echo -n '.' +done + +createuser -U travis -s postgres +createdb --owner=postgres test_geopandas +psql -d test_geopandas -q -c "CREATE EXTENSION postgis" + +echo "Done setting up Postgresql" diff -Nru python-geopandas-0.7.0/CODE_OF_CONDUCT.md python-geopandas-0.8.1/CODE_OF_CONDUCT.md --- python-geopandas-0.7.0/CODE_OF_CONDUCT.md 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/CODE_OF_CONDUCT.md 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,133 @@ +# GeoPandas Project Code of Conduct + +Behind the GeoPandas Project is an engaged and respectful community made up of people +from all over the world and with a wide range of backgrounds. +Naturally, this implies diversity of ideas and perspectives on often complex +problems. Disagreement and healthy discussion of conflicting viewpoints is +welcome: the best solutions to hard problems rarely come from a single angle. +But disagreement is not an excuse for aggression: humans tend to take +disagreement personally and easily drift into behavior that ultimately degrades +a community. This is particularly acute with online communication across +language and cultural gaps, where many cues of human behavior are unavailable. +We are outlining here a set of principles and processes to support a +healthy community in the face of these challenges. + +Fundamentally, we are committed to fostering a productive, harassment-free +environment for everyone. Rather than considering this code an exhaustive list +of things that you can’t do, take it in the spirit it is intended - a guide to +make it easier to enrich all of us and the communities in which we participate. + +Importantly: as a member of our community, *you are also a steward of these +values*. Not all problems need to be resolved via formal processes, and often +a quick, friendly but clear word on an online forum or in person can help +resolve a misunderstanding and de-escalate things. + +However, sometimes these informal processes may be inadequate: they fail to +work, there is urgency or risk to someone, nobody is intervening publicly and +you don't feel comfortable speaking in public, etc. For these or other +reasons, structured follow-up may be necessary and here we provide the means +for that: we welcome reports by emailing +[*geopandas-conduct@googlegroups.com*](mailto:geopandas-conduct@googlegroups.com) +or by filling out +[this form](https://docs.google.com/forms/d/e/1FAIpQLSd8Tbi2zNl1i2N9COX0yavHEqTGFIPQ1_cLcy1A3JgVc1OrAQ/viewform). + +This code applies equally to founders, developers, mentors and new community +members, in all spaces managed by the GeoPandas Project. This +includes the mailing lists, our GitHub organization, our chat room, in-person +events, and any other forums created by the project team. In addition, +violations of this code outside these spaces may affect a person's ability to +participate within them. + +By embracing the following principles, guidelines and actions to follow or +avoid, you will help us make the GeoPandas Project a welcoming and productive community. Feel +free to contact the Code of Conduct Committee at +[*geopandas-conduct@googlegroups.com*](mailto:geopandas-conduct@googlegroups.com) with any questions. + + +1. **Be friendly and patient**. + +2. **Be welcoming**. We strive to be a community that welcomes and supports + people of all backgrounds and identities. This includes, but is not limited + to, members of any race, ethnicity, culture, national origin, color, + immigration status, social and economic class, educational level, sex, sexual + orientation, gender identity and expression, age, physical appearance, family + status, technological or professional choices, academic + discipline, religion, mental ability, and physical ability. + +3. **Be considerate**. Your work will be used by other people, and you in turn + will depend on the work of others. Any decision you take will affect users + and colleagues, and you should take those consequences into account when + making decisions. Remember that we're a world-wide community. You may be + communicating with someone with a different primary language or cultural + background. + +4. **Be respectful**. Not all of us will agree all the time, but disagreement is + no excuse for poor behavior or poor manners. We might all experience some + frustration now and then, but we cannot allow that frustration to turn into a + personal attack. It’s important to remember that a community where people + feel uncomfortable or threatened is not a productive one. + +5. **Be careful in the words that you choose**. Be kind to others. Do not insult + or put down other community members. Harassment and other exclusionary + behavior are not acceptable. This includes, but is not limited to: + * Violent threats or violent language directed against another person + * Discriminatory jokes and language + * Posting sexually explicit or violent material + * Posting (or threatening to post) other people's personally identifying + information ("doxing") + * Personal insults, especially those using racist, sexist, and xenophobic terms + * Unwelcome sexual attention + * Advocating for, or encouraging, any of the above behavior + * Repeated harassment of others. In general, if someone asks you to stop, + then stop + +6. **Moderate your expectations**. Please respect that community members choose + how they spend their time in the project. A thoughtful question about your + expectations is preferable to demands for another person's time. + +7. **When we disagree, try to understand why**. Disagreements, both social and + technical, happen all the time and the GeoPandas Project is no exception. Try to + understand where others are coming from, as seeing a question from their + viewpoint may help find a new path forward. And don’t forget that it is + human to err: blaming each other doesn’t get us anywhere, while we can learn + from mistakes to find better solutions. + +8. **A simple apology can go a long way**. It can often de-escalate a situation, + and telling someone that you are sorry is an act of empathy that doesn’t + automatically imply an admission of guilt. + + +## Reporting + +If you believe someone is violating the code of conduct, please report this in +a timely manner. Code of conduct violations reduce the value of the community +for everyone and we take them seriously. + +You can file a report by emailing +[*geopandas-conduct@googlegroups.com*](mailto:geopandas-conduct@googlegroups.com) or by filing out +[this form](https://docs.google.com/forms/d/e/1FAIpQLSd8Tbi2zNl1i2N9COX0yavHEqTGFIPQ1_cLcy1A3JgVc1OrAQ/viewform). + +The online form gives you the option to keep your report anonymous or request +that we follow up with you directly. While we cannot follow up on an anonymous +report, we will take appropriate action. + +Messages sent to the e-mail address or through the form will be sent +only to the Code of Conduct Committee, which currently consists of: + +* Hannah Aizenman +* Joris Van den Bossche +* Martin Fleischmann + + +## Enforcement + +Enforcement procedures within the GeoPandas Project follow Project Jupyter's +[*Enforcement Manual*](https://github.com/jupyter/governance/blob/master/conduct/enforcement.md). For information on enforcement, please view the [original manual](https://github.com/jupyter/governance/blob/master/conduct/enforcement.md). + +Original text courtesy of the [*Speak +Up!*](http://web.archive.org/web/20141109123859/http://speakup.io/coc.html), +[*Django*](https://www.djangoproject.com/conduct) and [*Jupyter*](https://github.com/jupyter/governance/blob/master/conduct/code_of_conduct.md) Projects, +modified by the GeoPandas Project. We are grateful to those projects for contributing these materials under open licensing terms for us to easily reuse. + +All content on this page is licensed under a [*Creative Commons +Attribution*](http://creativecommons.org/licenses/by/3.0/) license. diff -Nru python-geopandas-0.7.0/CONTRIBUTING.md python-geopandas-0.8.1/CONTRIBUTING.md --- python-geopandas-0.7.0/CONTRIBUTING.md 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/CONTRIBUTING.md 2020-07-15 17:54:36.000000000 +0000 @@ -13,8 +13,13 @@ where applicable. Please read the [pandas contributing guidelines](http://pandas.pydata.org/pandas-docs/stable/contributing.html). + In particular, when submitting a pull request: +- Install the requirements for the development environment (one can do this + with either conda, and the environment.yml file, or pip, and the + requirements-dev.txt file, and can use the pandas contributing guidelines + as a guide). - All existing tests should pass. Please make sure that the test suite passes, both locally and on [Travis CI](https://travis-ci.org/geopandas/geopandas). Status on @@ -46,7 +51,7 @@ code format throughout the project. - Imports should be grouped with standard library imports first, - 3rd-party libraries next, and GeoPandas imports third. Within each + third-party libraries next, and GeoPandas imports third. Within each grouping, imports should be alphabetized. Always use absolute imports when possible, and explicit relative imports for local imports when necessary in tests. diff -Nru python-geopandas-0.7.0/debian/changelog python-geopandas-0.8.1/debian/changelog --- python-geopandas-0.7.0/debian/changelog 2020-02-17 14:17:10.000000000 +0000 +++ python-geopandas-0.8.1/debian/changelog 2020-12-19 10:00:00.000000000 +0000 @@ -1,3 +1,39 @@ +python-geopandas (0.8.1-1~focal2) focal; urgency=medium + + * No change rebuild for GDAL 3.2.0 transition. + + -- Angelos Tzotsos Sat, 19 Dec 2020 12:00:00 +0200 + +python-geopandas (0.8.1-1~focal1) focal; urgency=medium + + * No change rebuild for GDAL 3.1.3 transition. + + -- Angelos Tzotsos Fri, 02 Oct 2020 14:00:00 +0300 + +python-geopandas (0.8.1-1~focal0) focal; urgency=medium + + * No change rebuild for Focal. + + -- Angelos Tzotsos Sat, 08 Aug 2020 20:00:00 +0300 + +python-geopandas (0.8.1-1) unstable; urgency=medium + + * Team upload. + * New upstream release. + + -- Bas Couwenberg Thu, 16 Jul 2020 05:39:56 +0200 + +python-geopandas (0.8.0-1) unstable; urgency=medium + + * Team upload. + * New upstream release. + * Bump debhelper compat to 10, changes: + - Drop --parallel option, enabled by default + * Refresh patches. + * Add lintian override for broken-zip. + + -- Bas Couwenberg Wed, 24 Jun 2020 15:50:39 +0200 + python-geopandas (0.7.0-1) unstable; urgency=medium * Team upload. diff -Nru python-geopandas-0.7.0/debian/compat python-geopandas-0.8.1/debian/compat --- python-geopandas-0.7.0/debian/compat 2014-12-30 07:22:01.000000000 +0000 +++ python-geopandas-0.8.1/debian/compat 2020-03-19 19:28:16.000000000 +0000 @@ -1 +1 @@ -9 +10 diff -Nru python-geopandas-0.7.0/debian/control python-geopandas-0.8.1/debian/control --- python-geopandas-0.7.0/debian/control 2020-02-07 05:45:38.000000000 +0000 +++ python-geopandas-0.8.1/debian/control 2020-03-19 19:28:23.000000000 +0000 @@ -3,7 +3,7 @@ Priority: optional Maintainer: Debian GIS Project Uploaders: Johan Van de Wauw -Build-Depends: debhelper (>= 9), +Build-Depends: debhelper (>= 10~), dh-python, gdal-bin, cython3, diff -Nru python-geopandas-0.7.0/debian/patches/no-gallery.patch python-geopandas-0.8.1/debian/patches/no-gallery.patch --- python-geopandas-0.7.0/debian/patches/no-gallery.patch 2020-02-17 14:16:32.000000000 +0000 +++ python-geopandas-0.8.1/debian/patches/no-gallery.patch 2020-06-24 13:50:11.000000000 +0000 @@ -12,7 +12,7 @@ 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.autodoc', -@@ -56,20 +55,6 @@ templates_path = ['_templates'] +@@ -57,18 +56,6 @@ templates_path = ['_templates'] autosummary_generate = True @@ -25,11 +25,9 @@ - 'reference_url': {'matplotlib': 'http://matplotlib.org', - 'numpy': 'http://docs.scipy.org/doc/numpy', - 'scipy': 'http://docs.scipy.org/doc/scipy/reference', -- 'pyproj': 'http://pyproj4.github.io/pyproj/stable/', - 'geopandas': None}, - 'backreferences_dir': 'reference' -} -- + # connect docs in other projects + intersphinx_mapping = {'pyproj': ('http://pyproj4.github.io/pyproj/stable/', None)} # suppress matplotlib warning in examples - warnings.filterwarnings( - "ignore", diff -Nru python-geopandas-0.7.0/debian/python3-geopandas.lintian-overrides python-geopandas-0.8.1/debian/python3-geopandas.lintian-overrides --- python-geopandas-0.7.0/debian/python3-geopandas.lintian-overrides 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/debian/python3-geopandas.lintian-overrides 2020-06-24 14:01:23.000000000 +0000 @@ -0,0 +1,3 @@ +# Zip archive data, at least v2.0 to extract +broken-zip usr/lib/python*/dist-packages/geopandas/datasets/nybb_16a.zip + diff -Nru python-geopandas-0.7.0/debian/rules python-geopandas-0.8.1/debian/rules --- python-geopandas-0.7.0/debian/rules 2019-12-24 04:44:02.000000000 +0000 +++ python-geopandas-0.8.1/debian/rules 2020-06-24 13:59:09.000000000 +0000 @@ -5,7 +5,7 @@ export PYBUILD_NAME=geopandas %: - dh $@ --with python3 --buildsystem pybuild --parallel + dh $@ --with python3 --buildsystem pybuild override_dh_clean: dh_clean diff -Nru python-geopandas-0.7.0/doc/environment.yml python-geopandas-0.8.1/doc/environment.yml --- python-geopandas-0.7.0/doc/environment.yml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/environment.yml 2020-07-15 17:54:36.000000000 +0000 @@ -15,6 +15,7 @@ - sphinx=2.4.1 - sphinx_rtd_theme=0.4.3 - numpydoc=0.9.2 +- recommonmark==0.6.0 - ipython=7.12.0 - pillow=7.0.0 - mock=3.0.5 diff -Nru python-geopandas-0.7.0/doc/source/changelog.rst python-geopandas-0.8.1/doc/source/changelog.rst --- python-geopandas-0.7.0/doc/source/changelog.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/changelog.rst 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1 @@ +.. include:: ../../CHANGELOG.md diff -Nru python-geopandas-0.7.0/doc/source/code_of_conduct.rst python-geopandas-0.8.1/doc/source/code_of_conduct.rst --- python-geopandas-0.7.0/doc/source/code_of_conduct.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/code_of_conduct.rst 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,135 @@ +GeoPandas Project Code of Conduct +================================= + +Behind the GeoPandas Project is an engaged and respectful community made up of +people from all over the world and with a wide range of backgrounds. +Naturally, this implies diversity of ideas and perspectives on often +complex problems. Disagreement and healthy discussion of conflicting +viewpoints is welcome: the best solutions to hard problems rarely come from a single +angle. But disagreement is not an excuse for aggression: humans tend to take +disagreement personally and easily drift into behavior that ultimately +degrades a community. This is particularly acute with online communication +across language and cultural gaps, where many cues of human behavior are +unavailable. We are outlining here a set of principles and processes to support a +healthy community in the face of these challenges. + +Fundamentally, we are committed to fostering a productive, harassment-free +environment for everyone. Rather than considering this code an exhaustive list +of things that you can’t do, take it in the spirit it is intended - a guide to +make it easier to enrich all of us and the communities in which we participate. + +Importantly: as a member of our community, *you are also a steward of these +values*. Not all problems need to be resolved via formal processes, and often +a quick, friendly but clear word on an online forum or in person can help +resolve a misunderstanding and de-escalate things. + +However, sometimes these informal processes may be inadequate: they fail to +work, there is urgency or risk to someone, nobody is intervening publicly and +you don't feel comfortable speaking in public, etc. For these or other +reasons, structured follow-up may be necessary and here we provide the means +for that: we welcome reports by emailing +`geopandas-conduct@googlegroups.com `__ or by filling out `this +form `__. + +This code applies equally to founders, developers, mentors and new community +members, in all spaces managed by the GeoPandas Project. This +includes the mailing lists, our GitHub organization, our chat room, in-person +events, and any other forums created by the project team. In addition, +violations of this code outside these spaces may affect a person's ability to +participate within them. + +By embracing the following principles, guidelines and actions to follow or +avoid, you will help us make Jupyter a welcoming and productive community. Feel +free to contact the Code of Conduct Committee at +`geopandas-conduct@googlegroups.com `__ with any questions. + +1. **Be friendly and patient**. + +2. **Be welcoming**. We strive to be a community that welcomes and supports + people of all backgrounds and identities. This includes, but is not limited + to, members of any race, ethnicity, culture, national origin, color, + immigration status, social and economic class, educational level, sex, sexual + orientation, gender identity and expression, age, physical appearance, family + status, technological or professional choices, academic + discipline, religion, mental ability, and physical ability. + +3. **Be considerate**. Your work will be used by other people, and you in turn + will depend on the work of others. Any decision you take will affect users + and colleagues, and you should take those consequences into account when + making decisions. Remember that we're a world-wide community. You may be + communicating with someone with a different primary language or cultural + background. + +4. **Be respectful**. Not all of us will agree all the time, but disagreement is + no excuse for poor behavior or poor manners. We might all experience some + frustration now and then, but we cannot allow that frustration to turn into a + personal attack. It’s important to remember that a community where people + feel uncomfortable or threatened is not a productive one. + +5. **Be careful in the words that you choose**. Be kind to others. Do not insult + or put down other community members. Harassment and other exclusionary + behavior are not acceptable. This includes, but is not limited to: + + - Violent threats or violent language directed against another person + - Discriminatory jokes and language + - Posting sexually explicit or violent material + - Posting (or threatening to post) other people's personally identifying information ("doxing") + - Personal insults, especially those using racist, sexist, and xenophobic terms + - Unwelcome sexual attention + - Advocating for, or encouraging, any of the above behavior + - Repeated harassment of others. In general, if someone asks you to stop, then stop + +6. **Moderate your expectations**. Please respect that community members choose + how they spend their time in the project. A thoughtful question about your + expectations is preferable to demands for another person's time. + +7. **When we disagree, try to understand why**. Disagreements, both social and + technical, happen all the time and the GeoPandas Project is no exception. Try to + understand where others are coming from, as seeing a question from their + viewpoint may help find a new path forward. And don’t forget that it is + human to err: blaming each other doesn’t get us anywhere, while we can learn + from mistakes to find better solutions. + +8. **A simple apology can go a long way**. It can often de-escalate a situation, + and telling someone that you are sorry is an act of empathy that doesn’t + automatically imply an admission of guilt. + +Reporting +--------- + +If you believe someone is violating the code of conduct, please report this in +a timely manner. Code of conduct violations reduce the value of the community +for everyone and we take them seriously. + +You can file a report by emailing +`geopandas-conduct@googlegroups.com `__ or by filing out +`this form `__. + +The online form gives you the option to keep your report anonymous or request +that we follow up with you directly. While we cannot follow up on an anonymous +report, we will take appropriate action. + +Messages sent to the e-mail address or through the form will be sent +only to the Code of Conduct Committee, which currently consists of: + +- Hannah Aizenman +- Joris Van den Bossche +- Martin Fleischmann + +Enforcement +----------- + +Enforcement procedures within the GeoPandas Project follow Project Jupyter's `Enforcement +Manual `__. +For information on enforcement, please view the `original +manual `__. + +Original text courtesy of the `Speak +Up! `__, +`Django `__ and +`Jupyter `__ +Projects, modified by the GeoPandas Project. We are grateful to those projects for +contributing these materials under open licensing terms for us to easily reuse. + +All content on this page is licensed under a `Creative Commons +Attribution `__ license. diff -Nru python-geopandas-0.7.0/doc/source/conf.py python-geopandas-0.8.1/doc/source/conf.py --- python-geopandas-0.7.0/doc/source/conf.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/conf.py 2020-07-15 17:54:36.000000000 +0000 @@ -32,6 +32,7 @@ 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.autodoc', + 'recommonmark', 'numpydoc', ] @@ -65,11 +66,11 @@ 'reference_url': {'matplotlib': 'http://matplotlib.org', 'numpy': 'http://docs.scipy.org/doc/numpy', 'scipy': 'http://docs.scipy.org/doc/scipy/reference', - 'pyproj': 'http://pyproj4.github.io/pyproj/stable/', 'geopandas': None}, 'backreferences_dir': 'reference' } - +# connect docs in other projects +intersphinx_mapping = {'pyproj': ('http://pyproj4.github.io/pyproj/stable/', None)} # suppress matplotlib warning in examples warnings.filterwarnings( "ignore", @@ -79,7 +80,7 @@ ) # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ['.rst', '.md'] # The encoding of source files. #source_encoding = 'utf-8-sig' diff -Nru python-geopandas-0.7.0/doc/source/index.rst python-geopandas-0.8.1/doc/source/index.rst --- python-geopandas-0.7.0/doc/source/index.rst 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/index.rst 2020-07-15 17:54:36.000000000 +0000 @@ -51,6 +51,7 @@ :caption: Reference Guide Reference to All Attributes and Methods + Changelog .. toctree:: @@ -58,6 +59,7 @@ :caption: Developer Contributing to GeoPandas + Code of Conduct Get in touch diff -Nru python-geopandas-0.7.0/doc/source/install.rst python-geopandas-0.8.1/doc/source/install.rst --- python-geopandas-0.7.0/doc/source/install.rst 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/install.rst 2020-07-15 17:54:36.000000000 +0000 @@ -62,7 +62,7 @@ Creating a new environment is not strictly necessary, but given that installing other geospatial packages from different channels may cause dependency conflicts (as mentioned in the note above), it can be good practice to install the geospatial -stack in a clean environment starting fresh. +stack in a clean environment starting fresh. The following commands create a new environment with the name ``geo_env``, configures it to install packages always from conda-forge, and installs @@ -92,13 +92,13 @@ When using pip to install GeoPandas, you need to make sure that all dependencies are installed correctly. - - `shapely`_ and `fiona`_ provide binary wheels with the - dependencies included for Mac and Linux, but not for Windows. - - `pyproj`_ provides binary wheels with depencies included + - `fiona`_ provides binary wheels with the dependencies included for Mac and Linux, + but not for Windows. + - `pyproj`_ and `shapely`_ provide binary wheels with dependencies included for Mac, Linux, and Windows. - `rtree`_ does not provide wheels. - Windows wheels for `shapely`, `fiona`, `pyproj` and `rtree` - can be found at `Christopher Gohlke's website + can be found at `Christopher Gohlke's website `_. So depending on your platform, you might need to compile and install their @@ -143,15 +143,16 @@ - `shapely`_ (interface to `GEOS`_) - `fiona`_ (interface to `GDAL`_) - `pyproj`_ (interface to `PROJ`_; version 2.2.0 or later) -- `six`_ Further, optional dependencies are: - `rtree`_ (optional; spatial index to improve performance and required for overlay operations; interface to `libspatialindex`_) - `psycopg2`_ (optional; for PostGIS connection) +- `GeoAlchemy2`_ (optional; for writing to PostGIS) - `geopy`_ (optional; for geocoding) + For plotting, these additional packages may be used: - `matplotlib`_ (>= 2.0.1) @@ -159,6 +160,47 @@ - `mapclassify`_ +Using the optional PyGEOS dependency +------------------------------------ + +Work is ongoing to improve the performance of GeoPandas. Currently, the +fast implementations of basic spatial operations live in the `PyGEOS`_ +package (but work is under way to contribute those improvements to Shapely). +Starting with GeoPandas 0.8, it is possible to optionally use those +experimental speedups by installing PyGEOS. This can be done with conda +(using the conda-forge channel) or pip:: + + # conda + conda install pygeos --channel conda-forge + # pip + pip install pygeos + +More specifically, whether the speedups are used or not is determined by: + +- If PyGEOS is installed, it will be used by default (but installing GeoPandas + will not yet automatically install PyGEOS as dependency, you need to do this + manually). + +- You can still toggle the use of PyGEOS when it is available, by: + + - Setting an environment variable (``USE_PYGEOS=0/1``). Note this variable + is only checked at first import of GeoPandas. + - Setting an option: ``geopandas.options.use_pygeos = True/False``. Note, + although this variable can be set during an interactive session, it will + only work if the GeoDataFrames you use are created (e.g. reading a file + with ``read_file``) after changing this value. + +.. warning:: + + The use of PyGEOS is experimental! Although it is passing all tests, + there might still be issues and not all functions of GeoPandas will + already benefit from speedups (one known issue: the `to_crs` coordinate + transformations lose the z coordinate). But trying this out is very welcome! + Any issues you encounter (but also reports of successful usage are + interesting!) can be reported at https://gitter.im/geopandas/geopandas + or https://github.com/geopandas/geopandas/issues + + .. _PyPI: https://pypi.python.org/pypi/geopandas .. _GitHub: https://github.com/geopandas/geopandas @@ -177,10 +219,10 @@ .. _geopy: https://github.com/geopy/geopy -.. _six: https://pythonhosted.org/six - .. _psycopg2: https://pypi.python.org/pypi/psycopg2 +.. _GeoAlchemy2: https://geoalchemy-2.readthedocs.io/ + .. _mapclassify: http://pysal.org/mapclassify .. _pyproj: https://github.com/pyproj4/pyproj @@ -204,3 +246,5 @@ .. _GEOS: https://geos.osgeo.org .. _PROJ: https://proj.org/ + +.. _PyGEOS: https://github.com/pygeos/pygeos/ diff -Nru python-geopandas-0.7.0/doc/source/io.rst python-geopandas-0.8.1/doc/source/io.rst --- python-geopandas-0.7.0/doc/source/io.rst 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/io.rst 2020-07-15 17:54:36.000000000 +0000 @@ -41,6 +41,17 @@ zipfile = "zip:///Users/name/Downloads/gadm36_AFG_shp.zip!data/gadm36_AFG_1.shp" +It is also possible to read any file-like objects with a ``read()`` method, such as a file handler (e.g. via built-in ``open`` function) or ``StringIO``:: + + filename = "test.geojson" + file = open(filename) + df = geopandas.read_file(file) + +You can also read path objects:: + + import pathlib + path_object = pathlib.path(filename) + df = geopandas.read_file(path_object) *geopandas* can also get data from a PostGIS database using the :func:`geopandas.read_postgis` command. @@ -105,6 +116,31 @@ rows=slice(10, 20), ) +Field/Column Filters +^^^^^^^^^^^^^^^^^^^^ + +Load in a subset of fields from the file: + +.. note:: Requires Fiona 1.8+ + +.. code-block:: python + + gdf = geopandas.read_file( + geopandas.datasets.get_path("naturalearth_lowres"), + ignore_fields=["iso_a3", "gdp_md_est"], + ) + +Skip loading geometry from the file: + +.. note:: Requires Fiona 1.8+ +.. note:: Returns :obj:`pandas.DataFrame` + +.. code-block:: python + + pdf = geopandas.read_file( + geopandas.datasets.get_path("naturalearth_lowres"), + ignore_geometry=True, + ) Writing Spatial Data @@ -114,6 +150,15 @@ :meth:`geopandas.GeoDataFrame.to_file` method. For a full list of supported formats, type ``import fiona; fiona.supported_drivers``. +In addition, GeoDataFrames can be uploaded to `PostGIS `__ database (starting with GeoPandas 0.8) +by using the :meth:`geopandas.GeoDataFrame.to_postgis` method. + +.. note:: + + GeoDataFrame can contain more field types than supported by most of the file formats. For example tuples or lists + can be easily stored in the GeoDataFrame, but saving them to e.g. GeoPackage or Shapefile will raise a ValueError. + Before saving to a file, they need to be converted to a format supported by a selected driver. + **Writing to Shapefile**:: countries_gdf.to_file("countries.shp") @@ -126,3 +171,10 @@ countries_gdf.to_file("package.gpkg", layer='countries', driver="GPKG") cities_gdf.to_file("package.gpkg", layer='cities', driver="GPKG") + +**Writing to PostGIS**:: + + from sqlalchemy import create_engine + db_connection_url = "postgres://myusername:mypassword@myhost:5432/mydatabase"; + engine = create_engine(db_connection_url) + countries_gdf.to_postgis(name="countries_table", con=engine) diff -Nru python-geopandas-0.7.0/doc/source/mergingdata.rst python-geopandas-0.8.1/doc/source/mergingdata.rst --- python-geopandas-0.7.0/doc/source/mergingdata.rst 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/mergingdata.rst 2020-07-15 17:54:36.000000000 +0000 @@ -13,7 +13,7 @@ In an attribute join, a ``GeoSeries`` or ``GeoDataFrame`` is combined with a regular *pandas* ``Series`` or ``DataFrame`` based on a common variable. This is analogous to normal merging or joining in *pandas*. -In a Spatial Join, observations from to ``GeoSeries`` or ``GeoDataFrames`` are combined based on their spatial relationship to one another. +In a Spatial Join, observations from two ``GeoSeries`` or ``GeoDataFrames`` are combined based on their spatial relationship to one another. In the following examples, we use these datasets: @@ -31,6 +31,22 @@ countries = countries.rename(columns={'name':'country'}) +Appending +--------- + +Appending GeoDataFrames and GeoSeries uses pandas ``append`` methods. Keep in mind, that appended geometry columns needs to have the same CRS. + +.. ipython:: python + + # Appending GeoSeries + joined = world.geometry.append(cities.geometry) + + # Appending GeoDataFrames + europe = world[world.continent == 'Europe'] + asia = world[world.continent == 'Asia'] + eurasia = europe.append(asia) + + Attribute Joins ---------------- @@ -75,11 +91,11 @@ Sjoin Arguments ~~~~~~~~~~~~~~~~ -``sjoin.()`` has two core arguments: ``how`` and ``op``. +``sjoin()`` has two core arguments: ``how`` and ``op``. **op** -The ```op`` argument specifies how ``geopandas`` decides whether or not to join the attributes of one object to another. There are three different join options as follows: +The ``op`` argument specifies how ``geopandas`` decides whether or not to join the attributes of one object to another. There are three different join options as follows: * `intersects`: The attributes will be joined if the boundary and interior of the object intersect in any way with the boundary and/or interior of the other object. * `within`: The attributes will be joined if the object’s boundary and interior intersect *only* with the interior of the other object (not its boundary or exterior). @@ -96,25 +112,3 @@ * ``inner``: use intersection of index values from both geodataframes; retain only the `left_df` geometry column Note more complicated spatial relationships can be studied by combining geometric operations with spatial join. To find all polygons within a given distance of a point, for example, one can first use the ``buffer`` method to expand each point into a circle of appropriate radius, then intersect those buffered circles with the polygons in question. - - -Sjoin Performance -~~~~~~~~~~~~~~~~~~ - -Existing spatial indexes on either `left_df` or `right_df` will be reused when performing an ``sjoin``. If neither df has a spatial index, a spatial index will be generated for the longer df. If both have a spatial index, the `right_df`'s index will be used preferentially. Performance of multiple sjoins in a row involving a common GeoDataFrame may be improved by pre-generating the spatial index of the common GeoDataFrame prior to performing sjoins using ``df1.sindex``. - -.. code-block:: python - - df1 = # a GeoDataFrame with data - df2 = # a second GeoDataFrame - df3 = # a third GeoDataFrame - - # pre-generate sindex on df1 if it doesn't already exist - df1.sindex - - sjoin(df1, df2, ...) - # sindex for df1 is reused - sjoin(df1, df3, ...) - # sindex for df1 is reused again - - diff -Nru python-geopandas-0.7.0/doc/source/projections.rst python-geopandas-0.8.1/doc/source/projections.rst --- python-geopandas-0.7.0/doc/source/projections.rst 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/projections.rst 2020-07-15 17:54:36.000000000 +0000 @@ -25,7 +25,7 @@ commonly used CRS is the WGS84 latitude-longitude projection. This can be referred to using the authority code ``"EPSG:4326"``. -*geopandas* can accept anything accepted by `pyproj.CRS.from_user_input() `_: +*geopandas* can accept anything accepted by :meth:`pyproj.CRS.from_user_input() `: - CRS WKT string - An authority string (i.e. "epsg:4326") @@ -64,11 +64,15 @@ From time to time, however, you may get data that does not include a projection. In this situation, you have to set the CRS so *geopandas* knows how to interpret the coordinates. -For example, if you convert a spreadsheet of latitudes and longitudes into a GeoSeries by hand, you would set the projection by assigning the WGS84 latitude-longitude CRS to the :attr:`GeoSeries.crs` attribute: +For example, if you convert a spreadsheet of latitudes and longitudes into a +GeoSeries by hand, you would set the projection by passing the WGS84 +latitude-longitude CRS to the :meth:`GeoSeries.set_crs` method (or by setting +the :attr:`GeoSeries.crs` attribute): .. sourcecode:: python - my_geoseries.crs = "EPSG:4326" + my_geoseries = my_geoseries.set_crs("EPSG:4326"}) + my_geoseries = my_geoseries.set_crs(epsg=4326) Re-Projecting @@ -98,6 +102,45 @@ ax.set_title("Mercator"); +Projection for multiple geometry columns +---------------------------------------- + +GeoPandas 0.8 implements support for different projections assigned to different geometry +columns of the same GeoDataFrame. The projection is now stored together with geometries per column (directly +on the GeometryArray level). + +Note that if GeometryArray has assigned projection, it is preferred over the +projection passed to GeoSeries or GeoDataFrame during the creation: + +.. code-block:: python + + >>> array.crs + + Name: WGS 84 + Axis Info [ellipsoidal]: + - Lat[north]: Geodetic latitude (degree) + - Lon[east]: Geodetic longitude (degree) + ... + >>> GeoSeries(array, crs=3395).crs # crs=3395 is ignored as array already has CRS + FutureWarning: CRS mismatch between CRS of the passed geometries and 'crs'. Use 'GeoDataFrame.set_crs(crs, allow_override=True)' to overwrite CRS or 'GeoDataFrame.to_crs(crs)' to reproject geometries. CRS mismatch will raise an error in the future versions of GeoPandas. + GeoSeries(array, crs=3395).crs + + + Name: WGS 84 + Axis Info [ellipsoidal]: + - Lat[north]: Geodetic latitude (degree) + - Lon[east]: Geodetic longitude (degree) + ... + +If you want to overwrite projection, you can then assign it to the GeoSeries +manually or re-project geometries to the target projection using either +``GeoSeries.set_crs(epsg=3395, allow_override=True)`` or +``GeoSeries.to_crs(epsg=3395)``. + +All GeometryArray-based operations preserve projection; however, if you loop over a column +containing geometry, this information might be lost. + + Upgrading to GeoPandas 0.7 with pyproj > 2.2 and PROJ > 6 --------------------------------------------------------- @@ -384,7 +427,7 @@ You can see that the CRS object constructed from the WKT string has a "Easting, Northing" (i.e. x, y) axis order, while the CRS object constructed from the EPSG -code has a (Northing, Easting) axis order. +code has a (Northing, Easting) axis order. Only having this difference in axis order is no problem when using the CRS in GeoPandas, since GeoPandas always uses a (x, y) order to store the data diff -Nru python-geopandas-0.7.0/doc/source/reference.rst python-geopandas-0.8.1/doc/source/reference.rst --- python-geopandas-0.7.0/doc/source/reference.rst 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/doc/source/reference.rst 2020-07-15 17:54:36.000000000 +0000 @@ -25,6 +25,10 @@ .. autoattribute:: geopandas.GeoSeries.interiors +.. autoattribute:: geopandas.GeoSeries.x + +.. autoattribute:: geopandas.GeoSeries.y + `Unary Predicates` .. autoattribute:: geopandas.GeoSeries.is_empty @@ -49,10 +53,14 @@ .. automethod:: geopandas.GeoSeries.intersects +.. automethod:: geopandas.GeoSeries.overlaps + .. automethod:: geopandas.GeoSeries.touches .. automethod:: geopandas.GeoSeries.within +.. automethod:: geopandas.GeoSeries.covers + `Set-theoretic Methods` .. automethod:: geopandas.GeoSeries.difference @@ -95,18 +103,29 @@ Additionally, the following attributes and methods are implemented: +.. automethod:: geopandas.GeoSeries.from_file + +.. automethod:: geopandas.GeoSeries.to_file + +.. automethod:: geopandas.GeoSeries.to_json + .. autoattribute:: geopandas.GeoSeries.crs .. automethod:: geopandas.GeoSeries.to_crs -.. automethod:: geopandas.GeoSeries.from_file - .. automethod:: geopandas.GeoSeries.plot .. autoattribute:: geopandas.GeoSeries.total_bounds .. autoattribute:: geopandas.GeoSeries.__geo_interface__ +.. automethod:: geopandas.GeoSeries.isna + +.. automethod:: geopandas.GeoSeries.notna + +.. automethod:: geopandas.GeoSeries.fillna + + Methods of pandas ``Series`` objects are also available, although not all are applicable to geometric objects and some may return a ``Series`` rather than a ``GeoSeries`` result. The methods @@ -117,7 +136,7 @@ GeoDataFrame ------------ -A ``GeoDataFrame`` is a tablular data structure that contains a column +A ``GeoDataFrame`` is a tabular data structure that contains a column called ``geometry`` which contains a `GeoSeries``. Currently, the following methods/attributes are implemented for a ``GeoDataFrame``: @@ -128,18 +147,32 @@ .. automethod:: geopandas.GeoDataFrame.from_file +.. automethod:: geopandas.GeoDataFrame.from_features + .. automethod:: geopandas.GeoDataFrame.from_postgis +.. automethod:: geopandas.GeoDataFrame.to_crs + .. automethod:: geopandas.GeoDataFrame.to_file .. automethod:: geopandas.GeoDataFrame.to_json +.. automethod:: geopandas.GeoDataFrame.to_parquet + +.. automethod:: geopandas.GeoDataFrame.to_feather + +.. automethod:: geopandas.GeoDataFrame.to_postgis + .. automethod:: geopandas.GeoDataFrame.plot .. automethod:: geopandas.GeoDataFrame.rename_geometry .. automethod:: geopandas.GeoDataFrame.set_geometry +.. automethod:: geopandas.GeoDataFrame.explode + +.. automethod:: geopandas.GeoDataFrame.dissolve + .. autoattribute:: geopandas.GeoDataFrame.__geo_interface__ All pandas ``DataFrame`` methods are also available, although they may @@ -147,8 +180,22 @@ return a ``GeoDataFrame`` result even when it would be appropriate to do so. -API Pages ---------- +Testing +------- + +GeoPandas includes specific functions to test its objects. + +.. autofunction:: geopandas.testing.geom_equals + +.. autofunction:: geopandas.testing.geom_almost_equals + +.. autofunction:: geopandas.testing.assert_geoseries_equal + +.. autofunction:: geopandas.testing.assert_geodataframe_equal + + +Top-level Functions +------------------- .. currentmodule:: geopandas .. autosummary:: @@ -158,8 +205,13 @@ GeoDataFrame GeoSeries read_file + read_parquet + read_feather + read_postgis sjoin overlay clip tools.geocode + tools.collect + points_from_xy datasets.get_path diff -Nru python-geopandas-0.7.0/environment.yml python-geopandas-0.8.1/environment.yml --- python-geopandas-0.7.0/environment.yml 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/environment.yml 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,33 @@ +name: geopandas-dev +channels: + - conda-forge +dependencies: + # required + - fiona>=1.7 + - pandas>=0.23.4 + - pyproj>=2.2.0 + - shapely>=1.5 + + # geodatabase access + - psycopg2>=2.5.1 + - SQLAlchemy>=0.8.3 + + # geocoding + - geopy + + # plotting + - descartes>=1.0 + - matplotlib>=2.0 + + # testing + - mock>=1.0.1 # technically not need for python >= 3.3 + - pytest>=3.1.0 + - pytest-cov + - codecov + + # spatial access methods + - rtree>=0.8 + + # styling + - black + - pre-commit diff -Nru python-geopandas-0.7.0/examples/choro_legends.ipynb python-geopandas-0.8.1/examples/choro_legends.ipynb --- python-geopandas-0.7.0/examples/choro_legends.ipynb 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/examples/choro_legends.ipynb 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,542 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas\n", + "from geopandas import read_file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2.2.0'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import mapclassify\n", + "mapclassify.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'4.2.0'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import libpysal\n", + "libpysal.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Name Description Installed\n", + "0 10740 Albuquerque, New Mexico, Census 2000 Tract Data True\n", + "1 AirBnB Airbnb rentals, socioeconomics, and crime in C... False\n", + "2 Atlanta Atlanta, GA region homicide counts and rates False\n", + "3 Baltimore Baltimore house sales prices and hedonics False\n", + "4 Bostonhsg Boston housing and neighborhood data False\n", + "5 Buenosaires Electoral Data for 1999 Argentinean Elections False\n", + "6 Charleston1 2000 Census Tract Data for Charleston, SC MSA... False\n", + "7 Charleston2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "8 Chicago Health Chicago Health + Socio-Economics False\n", + "9 Chile Labor Labor Markets in Chile (1982-2002) False\n", + "10 Chile Migration Internal Migration in Chile (1977-2002) False\n", + "11 Cincinnati 2008 Cincinnati Crime + Socio-Demographics False\n", + "12 Cleveland 2015 sales prices of homes in Cleveland, OH. False\n", + "13 Columbus Columbus neighborhood crime False\n", + "14 Denver Demographics and housing in Denver neighborho... False\n", + "15 Elections 2012 and 2016 Presidential Elections False\n", + "16 Grid100 Grid with simulated variables False\n", + "17 Groceries 2015 Chicago supermarkets False\n", + "18 Guerry Moral statistics of France (Guerry, 1833) False\n", + "19 Health Indicators Chicago Health Indicators (2005-11) False\n", + "20 Health+ 2000 Health, Income + Diversity False\n", + "21 Hickory1 2000 Census Tract Data for Hickory, NC MSA an... False\n", + "22 Hickory2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "23 Home Sales 2014-15 Home Sales in King County, WA False\n", + "24 Houston Houston, TX region homicide counts and rates False\n", + "25 Juvenile Cardiff juvenile delinquent residences False\n", + "26 Lansing1 2000 Census Tract Data for Lansing, MI MSA an... False\n", + "27 Lansing2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "28 Laozone Ozone measures at monitoring stations in Los ... False\n", + "29 LasRosas Corn yield, fertilizer and field data for pre... False\n", + "30 Line Line Shapefile True\n", + "31 Liquor Stores 2015 Chicago Liquor Stores False\n", + "32 Malaria Malaria incidence and population (1973, 95, 9... False\n", + "33 Milwaukee1 2000 Census Tract Data for Milwaukee, WI MSA False\n", + "34 Milwaukee2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "35 NCOVR US county homicides 1960-1990 False\n", + "36 NDVI Normalized Difference Vegetation Index grid False\n", + "37 NYC Demographic and housing data for New York Cit... False\n", + "38 NYC Earnings Block-level Earnings in NYC (2002-14) False\n", + "39 NYC Education NYC Education (2000) False\n", + "40 NYC Neighborhoods Demographics for New York City neighborhoods False\n", + "41 NYC Socio-Demographics NYC Education + Socio-Demographics False\n", + "42 Natregimes NCOVR with regimes (book/PySAL) False\n", + "43 Nepal Health, poverty and education indicators for ... False\n", + "44 Ohiolung Ohio lung cancer data, 1968, 1978, 1988 False\n", + "45 Orlando1 2000 Census Tract Data for Orlando, FL MSA an... False\n", + "46 Orlando2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "47 Oz9799 Monthly ozone data, 1997-99 False\n", + "48 Phoenix ACS Phoenix American Community Survey Data (2010,... False\n", + "49 Pittsburgh Pittsburgh homicide locations False\n", + "50 Point Point Shapefile True\n", + "51 Police Police expenditures Mississippi counties False\n", + "52 Polygon Polygon Shapefile True\n", + "53 Polygon_Holes Example to test treatment of holes True\n", + "54 Rio Grande do Sul Cities of the Brazilian State of Rio Grande do... False\n", + "55 SIDS North Carolina county SIDS death counts False\n", + "56 SIDS2 North Carolina county SIDS death counts and r... False\n", + "57 Sacramento1 2000 Census Tract Data for Sacramento MSA False\n", + "58 Sacramento2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "59 SanFran Crime July-Dec 2012 crime incidents in San Francisc... False\n", + "60 Savannah1 2000 Census Tract Data for Savannah, GA MSA a... False\n", + "61 Savannah2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "62 Scotlip Male lip cancer in Scotland, 1975-80 False\n", + "63 Seattle1 2000 Census Tract Data for Seattle, WA MSA an... False\n", + "64 Seattle2 1998 and 2001 Zip Code Business Patterns (Cen... False\n", + "65 South US Southern county homicides 1960-1990 False\n", + "66 StLouis St Louis region county homicide counts and rates False\n", + "67 Tampa1 2000 Census Tract Data for Tampa, FL MSA and ... False\n", + "68 arcgis arcgis testing files True\n", + "69 baltim Baltimore house sales prices and hedonics 1978 True\n", + "70 berlin Prenzlauer Berg neighborhood AirBnB data from ... True\n", + "71 book Synthetic data to illustrate spatial weights True\n", + "72 burkitt Burkitt's lymphoma in the Western Nile distric... True\n", + "73 calemp Employment density for California counties True\n", + "74 chicago Chicago neighborhoods True\n", + "75 clearwater mgwr testing dataset False\n", + "76 columbus Columbus neighborhood crime data 1980 True\n", + "77 desmith Small dataset to illustrate Moran's I statistic True\n", + "78 geodanet Datasets from geodanet for network analysis True\n", + "79 georgia Various socio-economic variables for counties ... True\n", + "80 juvenile Residences of juvenile offenders in Cardiff, UK True\n", + "81 mexico Decennial per capita incomes of Mexican states... True\n", + "82 networks Datasets used for network testing True\n", + "83 newHaven Network testing dataset False\n", + "84 nyc_bikes New York City Bike Trips False\n", + "85 sids2 North Carolina county SIDS death counts and rates True\n", + "86 snow_maps Public water pumps and Cholera deaths in Londo... True\n", + "87 stl Homicides and selected socio-economic characte... True\n", + "88 street_net_pts Street network points True\n", + "89 taz Traffic Analysis Zones in So. California False\n", + "90 tokyo Tokyo Mortality data True\n", + "91 us_income Per-capita income for the lower 48 US states 1... True\n", + "92 virginia Virginia counties shapefile True\n", + "93 wmat Datasets used for spatial weights testing True\n" + ] + } + ], + "source": [ + "libpysal.examples.available()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading South to /home/jovyan/.local/pysal_data/South\n" + ] + } + ], + "source": [ + "_ = libpysal.examples.load_example('South')\n", + "pth = libpysal.examples.get_path('south.shp')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = read_file(pth)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## New default legend formatting" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \\\n", + " cmap='BuPu', legend=True,\n", + " legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5)})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[ 0.00, 3.21]', '( 3.21, 6.25]', '( 6.25, 9.96]', '( 9.96, 92.94]']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels = [t.get_text() for t in ax.get_legend().get_texts()]\n", + "labels" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Quantiles \n", + "\n", + " Interval Count\n", + "----------------------\n", + "[ 0.00, 3.21] | 353\n", + "( 3.21, 6.25] | 353\n", + "( 6.25, 9.96] | 353\n", + "( 9.96, 92.94] | 353" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q4 = mapclassify.Quantiles(df.HR60, k=4)\n", + "q4" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels == q4.get_legend_classes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that in this case, the first interval is closed on the minimum value in the dataset. The other intervals have an open lower bound. This is now displayed in the legend." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overriding numerical format" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \\\n", + " cmap='BuPu', legend=True,\n", + " legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5)},\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \\\n", + " cmap='BuPu', legend=True,\n", + " legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5), 'fmt':\"{:.4f}\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.plot(column='HR60', scheme='QUANTILES', k=4, \\\n", + " cmap='BuPu', legend=True,\n", + " legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5), 'fmt':\"{:.0f}\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The new legends_kwds arg `fmt` takes a string to set the numerical formatting." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## When first class lower bound < y.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.plot(column='HR60', scheme='BoxPlot', \\\n", + " cmap='BuPu', legend=True,\n", + " legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5),\n", + " 'fmt': \"{:.0f}\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BoxPlot \n", + "\n", + " Interval Count\n", + "----------------------\n", + "( -inf, -6.90] | 0\n", + "(-6.90, 3.21] | 353\n", + "( 3.21, 6.25] | 353\n", + "( 6.25, 9.96] | 353\n", + "( 9.96, 20.07] | 311\n", + "(20.07, 92.94] | 42" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp = mapclassify.BoxPlot(df.HR60)\n", + "bp\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['(-inf, -7]',\n", + " '( -7, 3]',\n", + " '( 3, 6]',\n", + " '( 6, 10]',\n", + " '( 10, 20]',\n", + " '( 20, 93]']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.get_legend_classes(fmt=\"{:.0f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In some classifiers the user should be aware that the lower (upper) bound of the first (last) interval is not equal to the minimum (maximum) of the attribute values. This is useful to detect extreme values and highly skewed distributions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Categorical Data" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = df.plot(column='STATE_NAME', categorical=True, legend=True, \\\n", + " legend_kwds={'loc': 'center left', 'bbox_to_anchor':(1,0.5),\n", + " 'fmt': \"{:.0f}\"}) # fmt is ignored for categorical data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff -Nru python-geopandas-0.7.0/examples/plotting_basemap_background.py python-geopandas-0.8.1/examples/plotting_basemap_background.py --- python-geopandas-0.7.0/examples/plotting_basemap_background.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/examples/plotting_basemap_background.py 2020-07-15 17:54:36.000000000 +0000 @@ -4,7 +4,7 @@ This example shows how you can add a background basemap to plots created with the geopandas ``.plot()`` method. This makes use of the -`contextily `__ package to retrieve +`contextily `__ package to retrieve web map tiles from several sources (OpenStreetMap, Stamen). """ diff -Nru python-geopandas-0.7.0/geopandas/array.py python-geopandas-0.8.1/geopandas/array.py --- python-geopandas-0.7.0/geopandas/array.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/array.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,6 +1,8 @@ +from collections.abc import Iterable import numbers import operator import warnings +import inspect import numpy as np import pandas as pd @@ -12,10 +14,15 @@ from shapely.geometry.base import BaseGeometry import shapely.ops import shapely.wkt +from pyproj import CRS -from collections.abc import Iterable +try: + import pygeos +except ImportError: + geos = None -from ._compat import PANDAS_GE_024, PANDAS_GE_10 +from . import _compat as compat +from . import _vectorized as vectorized class GeometryDtype(ExtensionDtype): @@ -41,7 +48,7 @@ return GeometryArray -if PANDAS_GE_024: +if compat.PANDAS_GE_024: from pandas.api.extensions import register_extension_dtype register_extension_dtype(GeometryDtype) @@ -49,7 +56,7 @@ def _isna(value): """ - Check if scalar value is NA-like (None or np.nan). + Check if scalar value is NA-like (None, np.nan or pd.NA). Custom version that only works for scalars (returning True or False), as `pd.isna` also works for array-like input returning a boolean array. @@ -58,40 +65,105 @@ return True elif isinstance(value, float) and np.isnan(value): return True + elif compat.PANDAS_GE_10 and value is pd.NA: + return True else: return False +def _check_crs(left, right, allow_none=False): + """ + Check if the projection of both arrays is the same. + + If allow_none is True, empty CRS is treated as the same. + """ + if allow_none: + if not left.crs or not right.crs: + return True + if not left.crs == right.crs: + return False + return True + + +def _crs_mismatch_warn(left, right, stacklevel=3): + """ + Raise a CRS mismatch warning with the information on the assigned CRS. + """ + if left.crs: + left_srs = left.crs.to_string() + left_srs = left_srs if len(left_srs) <= 50 else " ".join([left_srs[:50], "..."]) + else: + left_srs = None + + if right.crs: + right_srs = right.crs.to_string() + right_srs = ( + right_srs if len(right_srs) <= 50 else " ".join([right_srs[:50], "..."]) + ) + else: + right_srs = None + + warnings.warn( + "CRS mismatch between the CRS of left geometries " + "and the CRS of right geometries.\n" + "Use `to_crs()` to reproject one of " + "the input geometries to match the CRS of the other.\n\n" + "Left CRS: {0}\n" + "Right CRS: {1}\n".format(left_srs, right_srs), + UserWarning, + stacklevel=stacklevel, + ) + + # ----------------------------------------------------------------------------- # Constructors / converters to other formats # ----------------------------------------------------------------------------- -def from_shapely(data): +def _geom_to_shapely(geom): """ - Convert a list or array of shapely objects to a GeometryArray. + Convert internal representation (PyGEOS or Shapely) to external Shapely object. + """ + if not compat.USE_PYGEOS: + return geom + else: + return vectorized._pygeos_to_shapely(geom) - Validates the elements. + +def _shapely_to_geom(geom): """ - n = len(data) + Convert external Shapely object to internal representation (PyGEOS or Shapely). + """ + if not compat.USE_PYGEOS: + return geom + else: + return vectorized._shapely_to_pygeos(geom) - out = [] - for idx in range(n): - geom = data[idx] - if isinstance(geom, BaseGeometry): - out.append(geom) - elif hasattr(geom, "__geo_interface__"): - geom = shapely.geometry.asShape(geom) - out.append(geom) - elif _isna(geom): - out.append(None) - else: - raise TypeError("Input must be valid geometry objects: {0}".format(geom)) +def _is_scalar_geometry(geom): + if compat.USE_PYGEOS: + return isinstance(geom, pygeos.Geometry) + else: + return isinstance(geom, BaseGeometry) + + +def from_shapely(data, crs=None): + """ + Convert a list or array of shapely objects to a GeometryArray. + + Validates the elements. - aout = np.empty(n, dtype=object) - aout[:] = out - return GeometryArray(aout) + Parameters + ---------- + data : array-like + list or array of shapely objects + crs : value, optional + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + + """ + return GeometryArray(vectorized.from_shapely(data), crs=crs) def to_shapely(geoms): @@ -100,84 +172,72 @@ """ if not isinstance(geoms, GeometryArray): raise ValueError("'geoms' must be a GeometryArray") - return geoms.data + return vectorized.to_shapely(geoms.data) -def from_wkb(data): +def from_wkb(data, crs=None): """ Convert a list or array of WKB objects to a GeometryArray. - """ - import shapely.wkb - n = len(data) - - out = [] - - for idx in range(n): - geom = data[idx] - if geom is not None and len(geom): - geom = shapely.wkb.loads(geom) - else: - geom = None - out.append(geom) + Parameters + ---------- + data : array-like + list or array of WKB objects + crs : value, optional + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. - aout = np.empty(n, dtype=object) - aout[:] = out - return GeometryArray(aout) + """ + return GeometryArray(vectorized.from_wkb(data), crs=crs) -def to_wkb(geoms): +def to_wkb(geoms, hex=False): """ Convert GeometryArray to a numpy object array of WKB objects. """ if not isinstance(geoms, GeometryArray): raise ValueError("'geoms' must be a GeometryArray") - out = [geom.wkb if geom is not None else None for geom in geoms] - return np.array(out, dtype=object) + return vectorized.to_wkb(geoms.data, hex=hex) -def from_wkt(data): +def from_wkt(data, crs=None): """ Convert a list or array of WKT objects to a GeometryArray. - """ - import shapely.wkt - - n = len(data) - out = [] - - for idx in range(n): - geom = data[idx] - if geom is not None and len(geom): - if isinstance(geom, bytes): - geom = geom.decode("utf-8") - geom = shapely.wkt.loads(geom) - else: - geom = None - out.append(geom) + Parameters + ---------- + data : array-like + list or array of WKT objects + crs : value, optional + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. - aout = np.empty(n, dtype=object) - aout[:] = out - return GeometryArray(aout) + """ + return GeometryArray(vectorized.from_wkt(data), crs=crs) -def to_wkt(geoms): +def to_wkt(geoms, **kwargs): """ Convert GeometryArray to a numpy object array of WKT objects. """ if not isinstance(geoms, GeometryArray): raise ValueError("'geoms' must be a GeometryArray") - out = [geom.wkt if geom is not None else None for geom in geoms] - return np.array(out, dtype=object) + return vectorized.to_wkt(geoms.data, **kwargs) -def _points_from_xy(x, y, z=None): +def points_from_xy(x, y, z=None, crs=None): """ - Generate list of shapely Point geometries from x, y(, z) coordinates. + Generate GeometryArray of shapely Point geometries from x, y(, z) coordinates. Parameters ---------- - x, y, z : array + x, y, z : iterable + crs : value, optional + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. Examples -------- @@ -188,229 +248,9 @@ Returns ------- - list : list - """ - if not len(x) == len(y): - raise ValueError("x and y arrays must be equal length.") - if z is not None: - if not len(z) == len(x): - raise ValueError("z array must be same length as x and y.") - geom = [shapely.geometry.Point(i, j, k) for i, j, k in zip(x, y, z)] - else: - geom = [shapely.geometry.Point(i, j) for i, j in zip(x, y)] - return geom - - -def points_from_xy(x, y, z=None): - """Convert arrays of x and y values to a GeometryArray of points.""" - x = np.asarray(x, dtype="float64") - y = np.asarray(y, dtype="float64") - if z is not None: - z = np.asarray(z, dtype="float64") - out = _points_from_xy(x, y, z) - aout = np.empty(len(x), dtype=object) - aout[:] = out - return GeometryArray(aout) - - -# ----------------------------------------------------------------------------- -# Helper methods for the vectorized operations -# ----------------------------------------------------------------------------- - - -def _binary_geo(op, left, right): - # type: (str, GeometryArray, [GeometryArray/BaseGeometry]) -> GeometryArray - """ Apply geometry-valued operation - - Supports: - - - difference - - symmetric_difference - - intersection - - union - - Parameters - ---------- - op: string - right: GeometryArray or single shapely BaseGeoemtry + output : GeometryArray """ - if isinstance(right, BaseGeometry): - # intersection can return empty GeometryCollections, and if the - # result are only those, numpy will coerce it to empty 2D array - data = np.empty(len(left), dtype=object) - data[:] = [ - getattr(s, op)(right) if s is not None and right is not None else None - for s in left.data - ] - return GeometryArray(data) - elif isinstance(right, GeometryArray): - if len(left) != len(right): - msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( - len(left), len(right) - ) - raise ValueError(msg) - data = np.empty(len(left), dtype=object) - data[:] = [ - getattr(this_elem, op)(other_elem) - if this_elem is not None and other_elem is not None - else None - for this_elem, other_elem in zip(left.data, right.data) - ] - return GeometryArray(data) - else: - raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) - - -def _binary_predicate(op, left, right, *args, **kwargs): - # type: (str, GeometryArray, GeometryArray/BaseGeometry, args/kwargs) - # -> array[bool] - """Binary operation on GeometryArray that returns a boolean ndarray - - Supports: - - - contains - - disjoint - - intersects - - touches - - crosses - - within - - overlaps - - covers - - covered_by - - equals - - Parameters - ---------- - op: string - right: GeometryArray or single shapely BaseGeoemtry - """ - # empty geometries are handled by shapely (all give False except disjoint) - if isinstance(right, BaseGeometry): - data = [ - getattr(s, op)(right, *args, **kwargs) if s is not None else False - for s in left.data - ] - return np.array(data, dtype=bool) - elif isinstance(right, GeometryArray): - if len(left) != len(right): - msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( - len(left), len(right) - ) - raise ValueError(msg) - data = [ - getattr(this_elem, op)(other_elem, *args, **kwargs) - if not (this_elem is None or other_elem is None) - else False - for this_elem, other_elem in zip(left.data, right.data) - ] - return np.array(data, dtype=bool) - else: - raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) - - -def _binary_op_float(op, left, right, *args, **kwargs): - # type: (str, GeometryArray, GeometryArray/BaseGeometry, args/kwargs) - # -> array - """Binary operation on GeometryArray that returns a ndarray""" - # used for distance -> check for empty as we want to return np.nan instead 0.0 - # as shapely does currently (https://github.com/Toblerity/Shapely/issues/498) - if isinstance(right, BaseGeometry): - data = [ - getattr(s, op)(right, *args, **kwargs) - if not (s is None or s.is_empty or right.is_empty) - else np.nan - for s in left.data - ] - return np.array(data, dtype=float) - elif isinstance(right, GeometryArray): - if len(left) != len(right): - msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( - len(left), len(right) - ) - raise ValueError(msg) - data = [ - getattr(this_elem, op)(other_elem, *args, **kwargs) - if not (this_elem is None or this_elem.is_empty) - | (other_elem is None or other_elem.is_empty) - else np.nan - for this_elem, other_elem in zip(left.data, right.data) - ] - return np.array(data, dtype=float) - else: - raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) - - -def _binary_op(op, left, right, *args, **kwargs): - # type: (str, GeometryArray, GeometryArray/BaseGeometry, args/kwargs) - # -> array - """Binary operation on GeometryArray that returns a ndarray""" - # pass empty to shapely (relate handles this correctly, project only - # for linestrings and points) - if op == "project": - null_value = np.nan - dtype = float - elif op == "relate": - null_value = None - dtype = object - else: - raise AssertionError("wrong op") - - if isinstance(right, BaseGeometry): - data = [ - getattr(s, op)(right, *args, **kwargs) if s is not None else null_value - for s in left.data - ] - return np.array(data, dtype=dtype) - elif isinstance(right, GeometryArray): - if len(left) != len(right): - msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( - len(left), len(right) - ) - raise ValueError(msg) - data = [ - getattr(this_elem, op)(other_elem, *args, **kwargs) - if not (this_elem is None or other_elem is None) - else null_value - for this_elem, other_elem in zip(left.data, right.data) - ] - return np.array(data, dtype=dtype) - else: - raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) - - -def _unary_geo(op, left, *args, **kwargs): - # type: (str, GeometryArray) -> GeometryArray - """Unary operation that returns new geometries""" - # ensure 1D output, see note above - data = np.empty(len(left), dtype=object) - data[:] = [getattr(geom, op, None) for geom in left.data] - return GeometryArray(data) - - -def _unary_op(op, left, null_value=False): - # type: (str, GeometryArray, Any) -> np.array - """Unary operation that returns a Series""" - data = [getattr(geom, op, null_value) for geom in left.data] - return np.array(data, dtype=np.dtype(type(null_value))) - - -def _affinity_method(op, left, *args, **kwargs): - # type: (str, GeometryArray, ...) -> GeometryArray - - # not all shapely.affinity methods can handle empty geometries: - # affine_transform itself works (as well as translate), but rotate, scale - # and skew fail (they try to unpack the bounds). - # Here: consistently returning empty geom for input empty geom - out = [] - for geom in left.data: - if geom is None or geom.is_empty: - res = geom - else: - res = getattr(shapely.affinity, op)(geom, *args, **kwargs) - out.append(res) - data = np.empty(len(left), dtype=object) - data[:] = out - return GeometryArray(data) + return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs) class GeometryArray(ExtensionArray): @@ -421,8 +261,10 @@ _dtype = GeometryDtype() - def __init__(self, data): + def __init__(self, data, crs=None): if isinstance(data, self.__class__): + if not crs: + crs = data.crs data = data.data elif not isinstance(data, np.ndarray): raise TypeError( @@ -435,6 +277,41 @@ ) self.data = data + self._crs = None + self.crs = crs + + @property + def crs(self): + """ + The Coordinate Reference System (CRS) represented as a ``pyproj.CRS`` + object. + + Returns None if the CRS is not set, and to set the value it + :getter: Returns a ``pyproj.CRS`` or None. When setting, the value + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + """ + return self._crs + + @crs.setter + def crs(self, value): + """Sets the value of the crs""" + self._crs = None if not value else CRS.from_user_input(value) + + def check_geographic_crs(self, stacklevel): + """Check CRS and warn if the planar operation is done in a geographic CRS""" + if self.crs and self.crs.is_geographic: + warnings.warn( + "Geometry is in a geographic CRS. Results from '{}' are likely " + "incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a " + "projected CRS before this operation.\n".format( + inspect.stack()[1].function + ), + UserWarning, + stacklevel=stacklevel, + ) + @property def dtype(self): return self._dtype @@ -444,19 +321,19 @@ def __getitem__(self, idx): if isinstance(idx, numbers.Integral): - return self.data[idx] + return _geom_to_shapely(self.data[idx]) # array-like, slice - if PANDAS_GE_10: + if compat.PANDAS_GE_10: # for pandas >= 1.0, validate and convert IntegerArray/BooleanArray # to numpy array, pass-through non-array-like indexers idx = pd.api.indexers.check_array_indexer(self, idx) if isinstance(idx, (Iterable, slice)): - return GeometryArray(self.data[idx]) + return GeometryArray(self.data[idx], crs=self.crs) else: raise TypeError("Index type not supported", idx) def __setitem__(self, key, value): - if PANDAS_GE_10: + if compat.PANDAS_GE_10: # for pandas >= 1.0, validate and convert IntegerArray/BooleanArray # keys to numpy array, pass-through non-array-like indexers key = pd.api.indexers.check_array_indexer(self, key) @@ -473,6 +350,10 @@ # internally only use None as missing value indicator # but accept others value = None + elif isinstance(value, BaseGeometry): + value = from_shapely([value]).data[0] + else: + raise TypeError("should be valid geometry") if isinstance(key, (slice, list, np.ndarray)): value_array = np.empty(1, dtype=object) value_array[:] = [value] @@ -484,54 +365,73 @@ "Value should be either a BaseGeometry or None, got %s" % str(value) ) + # TODO: use this once pandas-dev/pandas#33457 is fixed + # if hasattr(value, "crs"): + # if value.crs and (value.crs != self.crs): + # raise ValueError( + # "CRS mismatch between CRS of the passed geometries " + # "and CRS of existing geometries." + # ) + + if compat.USE_PYGEOS: + + def __getstate__(self): + return (pygeos.to_wkb(self.data), self._crs) + + def __setstate__(self, state): + geoms = pygeos.from_wkb(state[0]) + self._crs = state[1] + self.data = geoms + self.base = None + + else: + + def __setstate__(self, state): + if "_crs" not in state: + state["_crs"] = None + self.__dict__.update(state) + # ------------------------------------------------------------------------- # Geometry related methods # ------------------------------------------------------------------------- @property def is_valid(self): - return _unary_op("is_valid", self, null_value=False) + return vectorized.is_valid(self.data) @property def is_empty(self): - return _unary_op("is_empty", self, null_value=False) + return vectorized.is_empty(self.data) @property def is_simple(self): - return _unary_op("is_simple", self, null_value=False) + return vectorized.is_simple(self.data) @property def is_ring(self): - # operates on the exterior, so can't use _unary_op() - return np.array( - [ - geom.exterior.is_ring - if geom is not None and geom.exterior is not None - else False - for geom in self.data - ], - dtype=bool, - ) + return vectorized.is_ring(self.data) @property def is_closed(self): - return _unary_op("is_closed", self, null_value=False) + return vectorized.is_closed(self.data) @property def has_z(self): - return _unary_op("has_z", self, null_value=False) + return vectorized.has_z(self.data) @property def geom_type(self): - return _unary_op("geom_type", self, null_value=None) + return vectorized.geom_type(self.data) @property def area(self): - return _unary_op("area", self, null_value=np.nan) + self.check_geographic_crs(stacklevel=5) + return vectorized.area(self.data) @property def length(self): - return _unary_op("length", self, null_value=np.nan) + self.check_geographic_crs(stacklevel=5) + return vectorized.length(self.data) # # Unary operations that return new geometries @@ -539,195 +439,216 @@ @property def boundary(self): - return _unary_geo("boundary", self) + return GeometryArray(vectorized.boundary(self.data), crs=self.crs) @property def centroid(self): - return _unary_geo("centroid", self) + self.check_geographic_crs(stacklevel=5) + return GeometryArray(vectorized.centroid(self.data), crs=self.crs) @property def convex_hull(self): - return _unary_geo("convex_hull", self) + return GeometryArray(vectorized.convex_hull(self.data), crs=self.crs) @property def envelope(self): - return _unary_geo("envelope", self) + return GeometryArray(vectorized.envelope(self.data), crs=self.crs) @property def exterior(self): - return _unary_geo("exterior", self) + return GeometryArray(vectorized.exterior(self.data), crs=self.crs) @property def interiors(self): - has_non_poly = False - inner_rings = [] - for geom in self.data: - interior_ring_seq = getattr(geom, "interiors", None) - # polygon case - if interior_ring_seq is not None: - inner_rings.append(list(interior_ring_seq)) - # non-polygon case - else: - has_non_poly = True - inner_rings.append(None) - if has_non_poly: - warnings.warn( - "Only Polygon objects have interior rings. For other " - "geometry types, None is returned." - ) - data = np.empty(len(self), dtype=object) - data[:] = inner_rings - return data + # no GeometryArray as result + return vectorized.interiors(self.data) def representative_point(self): - # method and not a property -> can't use _unary_geo - data = np.empty(len(self), dtype=object) - data[:] = [ - geom.representative_point() if geom is not None else None - for geom in self.data - ] - return GeometryArray(data) + return GeometryArray(vectorized.representative_point(self.data), crs=self.crs) # # Binary predicates # + @staticmethod + def _binary_method(op, left, right, **kwargs): + if isinstance(right, GeometryArray): + if len(left) != len(right): + msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( + len(left), len(right) + ) + raise ValueError(msg) + if not _check_crs(left, right): + _crs_mismatch_warn(left, right, stacklevel=7) + right = right.data + + return getattr(vectorized, op)(left.data, right, **kwargs) + def covers(self, other): - return _binary_predicate("covers", self, other) + return self._binary_method("covers", self, other) + + def covered_by(self, other): + return self._binary_method("covered_by", self, other) def contains(self, other): - return _binary_predicate("contains", self, other) + return self._binary_method("contains", self, other) def crosses(self, other): - return _binary_predicate("crosses", self, other) + return self._binary_method("crosses", self, other) def disjoint(self, other): - return _binary_predicate("disjoint", self, other) + return self._binary_method("disjoint", self, other) - def equals(self, other): - return _binary_predicate("equals", self, other) + def geom_equals(self, other): + return self._binary_method("equals", self, other) def intersects(self, other): - return _binary_predicate("intersects", self, other) + return self._binary_method("intersects", self, other) def overlaps(self, other): - return _binary_predicate("overlaps", self, other) + return self._binary_method("overlaps", self, other) def touches(self, other): - return _binary_predicate("touches", self, other) + return self._binary_method("touches", self, other) def within(self, other): - return _binary_predicate("within", self, other) + return self._binary_method("within", self, other) + + def geom_equals_exact(self, other, tolerance): + return self._binary_method("equals_exact", self, other, tolerance=tolerance) + + def geom_almost_equals(self, other, decimal): + return self.geom_equals_exact(other, 0.5 * 10 ** (-decimal)) + # return _binary_predicate("almost_equals", self, other, decimal=decimal) def equals_exact(self, other, tolerance): - return _binary_predicate("equals_exact", self, other, tolerance=tolerance) + warnings.warn( + "GeometryArray.equals_exact() is now GeometryArray.geom_equals_exact(). " + "GeometryArray.equals_exact() will be deprecated in the future.", + FutureWarning, + stacklevel=2, + ) + return self._binary_method("equals_exact", self, other, tolerance=tolerance) def almost_equals(self, other, decimal): - return _binary_predicate("almost_equals", self, other, decimal=decimal) + warnings.warn( + "GeometryArray.almost_equals() is now GeometryArray.geom_almost_equals(). " + "GeometryArray.almost_equals() will be deprecated in the future.", + FutureWarning, + stacklevel=2, + ) + return self.geom_equals_exact(other, 0.5 * 10 ** (-decimal)) # # Binary operations that return new geometries # def difference(self, other): - return _binary_geo("difference", self, other) + return GeometryArray( + self._binary_method("difference", self, other), crs=self.crs + ) def intersection(self, other): - return _binary_geo("intersection", self, other) + return GeometryArray( + self._binary_method("intersection", self, other), crs=self.crs + ) def symmetric_difference(self, other): - return _binary_geo("symmetric_difference", self, other) + return GeometryArray( + self._binary_method("symmetric_difference", self, other), crs=self.crs + ) def union(self, other): - return _binary_geo("union", self, other) + return GeometryArray(self._binary_method("union", self, other), crs=self.crs) # # Other operations # def distance(self, other): - return _binary_op_float("distance", self, other) + self.check_geographic_crs(stacklevel=6) + return self._binary_method("distance", self, other) def buffer(self, distance, resolution=16, **kwargs): - data = data = np.empty(len(self), dtype=object) - if isinstance(distance, np.ndarray): - if len(distance) != len(self): - raise ValueError( - "Length of distance sequence does not match " - "length of the GeoSeries" - ) - - data[:] = [ - geom.buffer(dist, resolution, **kwargs) if geom is not None else None - for geom, dist in zip(self.data, distance) - ] - return GeometryArray(data) - - data[:] = [ - geom.buffer(distance, resolution, **kwargs) if geom is not None else None - for geom in self.data - ] - return GeometryArray(data) + if not (isinstance(distance, (int, float)) and distance == 0): + self.check_geographic_crs(stacklevel=5) + return GeometryArray( + vectorized.buffer(self.data, distance, resolution=resolution, **kwargs), + crs=self.crs, + ) def interpolate(self, distance, normalized=False): - data = data = np.empty(len(self), dtype=object) - if isinstance(distance, np.ndarray): - if len(distance) != len(self): - raise ValueError( - "Length of distance sequence does not match " - "length of the GeoSeries" - ) - data[:] = [ - geom.interpolate(dist, normalized=normalized) - for geom, dist in zip(self.data, distance) - ] - return GeometryArray(data) - - data[:] = [ - geom.interpolate(distance, normalized=normalized) for geom in self.data - ] - return GeometryArray(data) - - def simplify(self, *args, **kwargs): - # method and not a property -> can't use _unary_geo - data = np.empty(len(self), dtype=object) - data[:] = [geom.simplify(*args, **kwargs) for geom in self.data] - return GeometryArray(data) + self.check_geographic_crs(stacklevel=5) + return GeometryArray( + vectorized.interpolate(self.data, distance, normalized=normalized), + crs=self.crs, + ) + + def simplify(self, tolerance, preserve_topology=True): + return GeometryArray( + vectorized.simplify( + self.data, tolerance, preserve_topology=preserve_topology + ), + crs=self.crs, + ) def project(self, other, normalized=False): - return _binary_op("project", self, other, normalized=normalized) + if isinstance(other, BaseGeometry): + other = _shapely_to_geom(other) + elif isinstance(other, GeometryArray): + other = other.data + return vectorized.project(self.data, other, normalized=normalized) def relate(self, other): - return _binary_op("relate", self, other) + if isinstance(other, GeometryArray): + other = other.data + return vectorized.relate(self.data, other) # # Reduction operations that return a Shapely geometry # def unary_union(self): - return shapely.ops.unary_union(self.data) + return vectorized.unary_union(self.data) # # Affinity operations # def affine_transform(self, matrix): - return _affinity_method("affine_transform", self, matrix) + return GeometryArray( + vectorized._affinity_method("affine_transform", self.data, matrix), + crs=self.crs, + ) def translate(self, xoff=0.0, yoff=0.0, zoff=0.0): - return _affinity_method("translate", self, xoff, yoff, zoff) + return GeometryArray( + vectorized._affinity_method("translate", self.data, xoff, yoff, zoff), + crs=self.crs, + ) def rotate(self, angle, origin="center", use_radians=False): - return _affinity_method( - "rotate", self, angle, origin=origin, use_radians=use_radians + return GeometryArray( + vectorized._affinity_method( + "rotate", self.data, angle, origin=origin, use_radians=use_radians + ), + crs=self.crs, ) def scale(self, xfact=1.0, yfact=1.0, zfact=1.0, origin="center"): - return _affinity_method("scale", self, xfact, yfact, zfact, origin=origin) + return GeometryArray( + vectorized._affinity_method( + "scale", self.data, xfact, yfact, zfact, origin=origin + ), + crs=self.crs, + ) def skew(self, xs=0.0, ys=0.0, origin="center", use_radians=False): - return _affinity_method( - "skew", self, xs, ys, origin=origin, use_radians=use_radians + return GeometryArray( + vectorized._affinity_method( + "skew", self.data, xs, ys, origin=origin, use_radians=use_radians + ), + crs=self.crs, ) # @@ -738,7 +659,7 @@ def x(self): """Return the x location of point geometries in a GeoSeries""" if (self.geom_type[~self.isna()] == "Point").all(): - return _unary_op("x", self, null_value=np.nan) + return vectorized.get_x(self.data) else: message = "x attribute access only provided for Point geometries" raise ValueError(message) @@ -747,27 +668,14 @@ def y(self): """Return the y location of point geometries in a GeoSeries""" if (self.geom_type[~self.isna()] == "Point").all(): - return _unary_op("y", self, null_value=np.nan) + return vectorized.get_y(self.data) else: message = "y attribute access only provided for Point geometries" raise ValueError(message) @property def bounds(self): - # ensure that for empty arrays, the result has the correct shape - if len(self) == 0: - return np.empty((0, 4), dtype="float64") - # need to explicitly check for empty (in addition to missing) geometries, - # as those return an empty tuple, not resulting in a 2D array - bounds = np.array( - [ - geom.bounds - if not (geom is None or geom.is_empty) - else (np.nan, np.nan, np.nan, np.nan) - for geom in self.data - ] - ) - return bounds + return vectorized.bounds(self.data) @property def total_bounds(self): @@ -778,12 +686,12 @@ b = self.bounds return np.array( ( - b[:, 0].min(), # minx - b[:, 1].min(), # miny - b[:, 2].max(), # maxx - b[:, 3].max(), + np.nanmin(b[:, 0]), # minx + np.nanmin(b[:, 1]), # miny + np.nanmax(b[:, 2]), # maxx + np.nanmax(b[:, 3]), # maxy ) - ) # maxy + ) # ------------------------------------------------------------------------- # general array like compat @@ -803,26 +711,30 @@ def copy(self, *args, **kwargs): # still taking args/kwargs for compat with pandas 0.24 - return GeometryArray(self.data.copy()) + return GeometryArray(self.data.copy(), crs=self._crs) def take(self, indices, allow_fill=False, fill_value=None): from pandas.api.extensions import take if allow_fill: if fill_value is None or pd.isna(fill_value): - fill_value = 0 + fill_value = None + elif isinstance(fill_value, BaseGeometry): + fill_value = _shapely_to_geom(fill_value) + elif not _is_scalar_geometry(fill_value): + raise TypeError("provide geometry or None as fill value") result = take(self.data, indices, allow_fill=allow_fill, fill_value=fill_value) - if fill_value == 0: - result[result == 0] = None - return GeometryArray(result) + if allow_fill and fill_value is None: + result[pd.isna(result)] = None + return GeometryArray(result, crs=self.crs) def _fill(self, idx, value): """ Fill index locations with value Value should be a BaseGeometry """ - if not (isinstance(value, BaseGeometry) or value is None): + if not (_is_scalar_geometry(value) or value is None): raise TypeError( "Value should be either a BaseGeometry or None, got %s" % str(value) ) @@ -869,6 +781,7 @@ raise NotImplementedError( "fillna currently only supports filling with a scalar geometry" ) + value = _shapely_to_geom(value) new_values = new_values._fill(mask, value) return new_values @@ -899,7 +812,13 @@ elif pd.api.types.is_string_dtype(dtype) and not pd.api.types.is_object_dtype( dtype ): - return to_wkt(self).astype(dtype, copy=False) + string_values = to_wkt(self) + if compat.PANDAS_GE_10: + pd_dtype = pd.api.types.pandas_dtype(dtype) + if isinstance(pd_dtype, pd.StringDtype): + # ensure to return a pandas string array instead of numpy array + return pd.array(string_values, dtype="string") + return string_values.astype(dtype, copy=False) else: return np.array(self, dtype=dtype, copy=copy) @@ -907,7 +826,10 @@ """ Boolean NumPy array indicating if each value is missing """ - return np.array([g is None for g in self.data], dtype="bool") + if compat.USE_PYGEOS: + return pygeos.is_missing(self.data) + else: + return np.array([g is None for g in self.data], dtype="bool") def unique(self): """Compute the ExtensionArray of unique values. @@ -949,6 +871,9 @@ ------- ExtensionArray """ + # GH 1413 + if isinstance(scalars, BaseGeometry): + scalars = [scalars] return from_shapely(scalars) def _values_for_factorize(self): @@ -1066,13 +991,14 @@ ExtensionArray """ data = np.concatenate([ga.data for ga in to_concat]) - return GeometryArray(data) + return GeometryArray(data, crs=to_concat[0].crs) def _reduce(self, name, skipna=True, **kwargs): # including the base class version here (that raises by default) # because this was not yet defined in pandas 0.23 if name == "any" or name == "all": - return getattr(self.data, name)() + # TODO(pygeos) + return getattr(to_shapely(self), name)() raise TypeError( "cannot perform {name} with type {dtype}".format( name=name, dtype=self.dtype @@ -1087,7 +1013,7 @@ ------- values : numpy array """ - return self.data + return to_shapely(self) def _binop(self, other, op): def convert_values(param): @@ -1111,7 +1037,7 @@ # a TypeError should be raised res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] - res = np.asarray(res) + res = np.asarray(res, dtype=bool) return res def __eq__(self, other): diff -Nru python-geopandas-0.7.0/geopandas/base.py python-geopandas-0.8.1/geopandas/base.py --- python-geopandas-0.7.0/geopandas/base.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/base.py 2020-07-15 17:54:36.000000000 +0000 @@ -4,7 +4,6 @@ import pandas as pd from pandas import DataFrame, MultiIndex, Series -from pyproj import CRS from shapely.geometry import box from shapely.geometry.base import BaseGeometry from shapely.ops import cascaded_union @@ -12,17 +11,11 @@ import geopandas as gpd from .array import GeometryArray, GeometryDtype +from .sindex import get_sindex_class, has_sindex -try: - from rtree.core import RTreeError - - HAS_SINDEX = True -except ImportError: - - class RTreeError(Exception): - pass - - HAS_SINDEX = False +# for backwards compat +# this will be static (will NOT follow USE_PYGEOS changes) +HAS_SINDEX = has_sindex() def is_geometry_type(data): @@ -42,10 +35,12 @@ # type: (str, GeoSeries, GeoSeries) -> GeoSeries/Series this = this.geometry if isinstance(other, GeoPandasBase): - this, other = this.align(other.geometry) + if not this.index.equals(other.index): + warn("The indices of the two GeoSeries are different.") + this, other = this.align(other.geometry) + else: + other = other.geometry - if this.crs != other.crs: - warn("GeoSeries crs mismatch: {0} and {1}".format(this.crs, other.crs)) a_this = GeometryArray(this.values) other = GeometryArray(other.values) elif isinstance(other, BaseGeometry): @@ -100,23 +95,22 @@ _sindex_generated = False def _generate_sindex(self): - if not HAS_SINDEX: - warn("Cannot generate spatial index: Missing package `rtree`.") - else: - from geopandas.sindex import SpatialIndex - - stream = ( - (i, item.bounds, idx) - for i, (idx, item) in enumerate(self.geometry.iteritems()) - if pd.notnull(item) and not item.is_empty - ) - try: - self._sindex = SpatialIndex(stream) - # What we really want here is an empty generator error, or - # for the bulk loader to log that the generator was empty - # and move on. See https://github.com/Toblerity/rtree/issues/20. - except RTreeError: - pass + sindex_cls = get_sindex_class() + if sindex_cls is not None: + _sindex = sindex_cls(self.geometry) + if not _sindex.is_empty: + self._sindex = _sindex + else: + warn( + "Generated spatial index is empty and returned `None`. " + "Future versions of GeoPandas will return zero-length spatial " + "index instead of `None`. Use `len(gdf.sindex) > 0` " + "or `if gdf.sindex` instead of `if gd.sindex is not None` " + "to check for empty spatial indexes.", + FutureWarning, + stacklevel=3, + ) + self._sindex = None self._sindex_generated = True def _invalidate_sindex(self): @@ -142,15 +136,16 @@ Returns None if the CRS is not set, and to set the value it :getter: Returns a ``pyproj.CRS`` or None. When setting, the value - can be anything accepted by :meth:`pyproj.CRS.from_user_input`, + can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, such as an authority string (eg "EPSG:4326") or a WKT string. """ - return self._crs + return self.geometry.values.crs @crs.setter def crs(self, value): """Sets the value of the crs""" - self._crs = None if not value else CRS.from_user_input(value) + self.geometry.values.crs = value @property def geom_type(self): @@ -176,8 +171,33 @@ @property def is_empty(self): - """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for - empty geometries.""" + """ + Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + empty geometries. + + Examples + -------- + An example of a GeoDataFrame with one empty point, one point and one missing + value: + + >>> from shapely.geometry import Point + >>> d = {'geometry': [Point(), Point(2,1), None]} + >>> gdf = gpd.GeoDataFrame(d, crs="EPSG:4326") + >>> gdf + geometry + 0 GEOMETRYCOLLECTION EMPTY + 1 POINT (2.00000 1.00000) + 2 None + >>> gdf.is_empty + 0 True + 1 False + 2 False + dtype: bool + + See Also + -------- + GeoSeries.isna : detect missing values + """ return _delegate_property("is_empty", self) @property @@ -320,14 +340,14 @@ The GeoSeries (elementwise) or geometric object to test for equality. """ - return _binary_op("equals", self, other) + return _binary_op("geom_equals", self, other) def geom_almost_equals(self, other, decimal=6): """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` if each geometry is approximately equal to `other`. Approximate equality is tested at all points to the specified `decimal` - place precision. See also :meth:`equals`. + place precision. See also :meth:`geom_equals`. Parameters ---------- @@ -336,12 +356,12 @@ decimal : int Decimal place presion used when testing for approximate equality. """ - return _binary_op("almost_equals", self, other, decimal=decimal) + return _binary_op("geom_almost_equals", self, other, decimal=decimal) def geom_equals_exact(self, other, tolerance): """Return True for all geometries that equal *other* to a given tolerance, else False""" - return _binary_op("equals_exact", self, other, tolerance=tolerance) + return _binary_op("geom_equals_exact", self, other, tolerance=tolerance) def crosses(self, other): """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for @@ -390,7 +410,14 @@ return _binary_op("intersects", self, other) def overlaps(self, other): - """Return True for all geometries that overlap *other*, else False""" + """Returns True for all geometries that overlap *other*, else False. + + Parameters + ---------- + other : GeoSeries or geometric object + The GeoSeries (elementwise) or geometric object to test if + overlaps. + """ return _binary_op("overlaps", self, other) def touches(self, other): @@ -430,6 +457,44 @@ """ return _binary_op("within", self, other) + def covers(self, other): + """ + Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + each geometry that is entirely covering `other`. + + An object A is said to cover another object B if no points of B lie + in the exterior of A. + + See + https://lin-ear-th-inking.blogspot.com/2007/06/subtleties-of-ogc-covers-spatial.html + for reference. + + Parameters + ---------- + other : Geoseries or geometric object + The Geoseries (elementwise) or geometric object to check is being covered. + """ + return _binary_geo("covers", self, other) + + def covered_by(self, other): + """ + Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for + each geometry that is entirely covered by `other`. + + An object A is said to cover another object B if no points of B lie + in the exterior of A. + + See + https://lin-ear-th-inking.blogspot.com/2007/06/subtleties-of-ogc-covers-spatial.html + for reference. + + Parameters + ---------- + other : Geoseries or geometric object + The Geoseries (elementwise) or geometric object to check is being covered. + """ + return _binary_geo("covered_by", self, other) + def distance(self, other): """Returns a ``Series`` containing the distance to `other`. @@ -746,8 +811,8 @@ original index and a zero-based integer index that counts the number of single geometries within a multi-part geometry. - Example - ------- + Examples + -------- >>> gdf # gdf is GeoSeries of MultiPoints 0 MULTIPOINT (0 0, 1 1) 1 MULTIPOINT (2 2, 3 3, 4 4) @@ -788,6 +853,34 @@ """ return _CoordinateIndexer(self) + def equals(self, other): + """ + Test whether two objects contain the same elements. + + This function allows two GeoSeries or GeoDataFrames to be compared + against each other to see if they have the same shape and elements. + Missing values in the same location are considered equal. The + row/column index do not need to have the same type (as long as the + values are still considered equal), but the dtypes of the respective + columns must be the same. + + Parameters + ---------- + other : GeoSeries or GeoDataFrame + The other GeoSeries or GeoDataFrame to be compared with the first. + + Returns + ------- + bool + True if all elements are the same in both objects, False + otherwise. + """ + # we override this because pandas is using `self._constructor` in the + # isinstance check (https://github.com/geopandas/geopandas/issues/1420) + if not isinstance(other, type(self)): + return False + return self._data.equals(other._data) + class _CoordinateIndexer(object): # see docstring GeoPandasBase.cx property above diff -Nru python-geopandas-0.7.0/geopandas/_compat.py python-geopandas-0.8.1/geopandas/_compat.py --- python-geopandas-0.7.0/geopandas/_compat.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/_compat.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,6 +1,10 @@ from distutils.version import LooseVersion +import importlib +import os +import warnings import pandas as pd +import shapely # ----------------------------------------------------------------------------- # pandas compat @@ -9,3 +13,141 @@ PANDAS_GE_024 = str(pd.__version__) >= LooseVersion("0.24.0") PANDAS_GE_025 = str(pd.__version__) >= LooseVersion("0.25.0") PANDAS_GE_10 = str(pd.__version__) >= LooseVersion("0.26.0.dev") +PANDAS_GE_11 = str(pd.__version__) >= LooseVersion("1.1.0.dev") + + +# ----------------------------------------------------------------------------- +# Shapely / PyGEOS compat +# ----------------------------------------------------------------------------- + + +SHAPELY_GE_17 = str(shapely.__version__) >= LooseVersion("1.7.0") + +HAS_PYGEOS = None +USE_PYGEOS = None +PYGEOS_SHAPELY_COMPAT = None + +try: + import pygeos # noqa + + HAS_PYGEOS = True +except ImportError: + HAS_PYGEOS = False + + +def set_use_pygeos(val=None): + """ + Set the global configuration on whether to use PyGEOS or not. + + The default is use PyGEOS if it is installed. This can be overridden + with an environment variable USE_PYGEOS (this is only checked at + first import, cannot be changed during interactive session). + + Alternatively, pass a value here to force a True/False value. + """ + global USE_PYGEOS + global PYGEOS_SHAPELY_COMPAT + + if val is not None: + USE_PYGEOS = bool(val) + else: + if USE_PYGEOS is None: + + USE_PYGEOS = HAS_PYGEOS + + env_use_pygeos = os.getenv("USE_PYGEOS", None) + if env_use_pygeos is not None: + USE_PYGEOS = bool(int(env_use_pygeos)) + + # validate the pygeos version + if USE_PYGEOS: + try: + import pygeos # noqa + + # validate the pygeos version + if not str(pygeos.__version__) >= LooseVersion("0.6"): + raise ImportError( + "PyGEOS >= 0.6 is required, version {0} is installed".format( + pygeos.__version__ + ) + ) + + # Check whether Shapely and PyGEOS use the same GEOS version. + # Based on PyGEOS from_shapely implementation. + + from shapely.geos import geos_version_string as shapely_geos_version + from pygeos import geos_capi_version_string + + # shapely has something like: "3.6.2-CAPI-1.10.2 4d2925d6" + # pygeos has something like: "3.6.2-CAPI-1.10.2" + if not shapely_geos_version.startswith(geos_capi_version_string): + warnings.warn( + "The Shapely GEOS version ({}) is incompatible with the GEOS " + "version PyGEOS was compiled with ({}). Conversions between both " + "will be slow.".format( + shapely_geos_version, geos_capi_version_string + ) + ) + PYGEOS_SHAPELY_COMPAT = False + else: + PYGEOS_SHAPELY_COMPAT = True + + except ImportError: + raise ImportError( + "To use the PyGEOS speed-ups within GeoPandas, you need to install " + "PyGEOS: 'conda install pygeos' or 'pip install pygeos'" + ) + + +set_use_pygeos() + + +def import_optional_dependency(name: str, extra: str = ""): + """ + Import an optional dependency. + + Adapted from pandas.compat._optional::import_optional_dependency + + Raises a formatted ImportError if the module is not present. + + Parameters + ---------- + name : str + The module name. + extra : str + Additional text to include in the ImportError message. + Returns + ------- + module + """ + msg = """Missing optional dependency '{name}'. {extra} " + "Use pip or conda to install {name}.""".format( + name=name, extra=extra + ) + + if not isinstance(name, str): + raise ValueError( + "Invalid module name: '{name}'; must be a string".format(name=name) + ) + + try: + module = importlib.import_module(name) + + except ImportError: + raise ImportError(msg) from None + + return module + + +# ----------------------------------------------------------------------------- +# RTree compat +# ----------------------------------------------------------------------------- + +HAS_RTREE = None +RTREE_GE_094 = False +try: + import rtree # noqa + + HAS_RTREE = True +except ImportError: + HAS_RTREE = False diff -Nru python-geopandas-0.7.0/geopandas/_config.py python-geopandas-0.8.1/geopandas/_config.py --- python-geopandas-0.7.0/geopandas/_config.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/_config.py 2020-07-15 17:54:36.000000000 +0000 @@ -31,6 +31,8 @@ if option.validator: option.validator(value) self._config[key] = value + if option.callback: + option.callback(key, value) else: msg = "You can only set the value of existing options" raise AttributeError(msg) @@ -58,7 +60,7 @@ else: doc_text = u"No description available." doc_text = indent(doc_text, prefix=" ") - description += doc_text + description += doc_text + "\n" space = "\n " description = description.replace("\n", space) return "{}({}{})".format(cls, space, description) @@ -100,4 +102,36 @@ callback=None, ) -options = Options({"display_precision": display_precision}) + +def _validate_bool(value): + if not isinstance(value, bool): + raise TypeError("Expected bool value, got {0}".format(type(value))) + + +def _default_use_pygeos(): + import geopandas._compat as compat + + return compat.USE_PYGEOS + + +def _callback_use_pygeos(key, value): + assert key == "use_pygeos" + import geopandas._compat as compat + + compat.set_use_pygeos(value) + + +use_pygeos = Option( + key="use_pygeos", + default_value=_default_use_pygeos(), + doc=( + "Whether to use PyGEOS to speed up spatial operations. The default is True " + "if PyGEOS is installed, and follows the USE_PYGEOS environment variable " + "if set." + ), + validator=_validate_bool, + callback=_callback_use_pygeos, +) + + +options = Options({"display_precision": display_precision, "use_pygeos": use_pygeos}) diff -Nru python-geopandas-0.7.0/geopandas/geodataframe.py python-geopandas-0.8.1/geopandas/geodataframe.py --- python-geopandas-0.7.0/geopandas/geodataframe.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/geodataframe.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,26 +1,34 @@ import json +import warnings import numpy as np import pandas as pd from pandas import DataFrame, Series from shapely.geometry import mapping, shape +from shapely.geometry.base import BaseGeometry -from geopandas.array import GeometryArray, from_shapely + +from pyproj import CRS + +from geopandas.array import GeometryArray, from_shapely, GeometryDtype from geopandas.base import GeoPandasBase, is_geometry_type from geopandas.geoseries import GeoSeries import geopandas.io from geopandas.plotting import plot_dataframe + DEFAULT_GEO_COLUMN_NAME = "geometry" -def _ensure_geometry(data): +def _ensure_geometry(data, crs=None): """ Ensure the data is of geometry dtype or converted to it. If input is a (Geo)Series, output is a GeoSeries, otherwise output is GeometryArray. + + If the input is a GeometryDtype with a set CRS, `crs` is ignored. """ if is_geometry_type(data): if isinstance(data, Series): @@ -28,10 +36,10 @@ return data else: if isinstance(data, Series): - out = from_shapely(np.asarray(data)) + out = from_shapely(np.asarray(data), crs=crs) return GeoSeries(out, index=data.index, name=data.name) else: - out = from_shapely(data) + out = from_shapely(data, crs=crs) return out @@ -43,11 +51,32 @@ Parameters ---------- - crs : str (optional) - Coordinate system + crs : value (optional) + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. geometry : str or array (optional) If str, column to use as geometry. If array, will be set as 'geometry' column on GeoDataFrame. + + Examples + -------- + Constructing GeoDataFrame from a dictionary. + + >>> from shapely.geometry import Point + >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1,2), Point(2,1)]} + >>> gdf = gpd.GeoDataFrame(d, crs="EPSG:4326") + >>> gdf + col1 geometry + 0 name1 POINT (1.00000 2.00000) + 1 name2 POINT (2.00000 1.00000) + + Notice that the inferred dtype of 'geometry' columns is geometry. + + >>> gdf.dtypes + col1 object + geometry geometry + dtype: object """ _metadata = ["_crs", "_geometry_column_name"] @@ -61,8 +90,7 @@ # need to set this before calling self['geometry'], because # getitem accesses crs - self._crs = None - self.crs = crs + self._crs = crs if crs is not None else None # set_geometry ensures the geometry data have the proper dtype, # but is not called if `geometry=None` ('geometry' column present @@ -74,7 +102,24 @@ # only if we have actual geometry values -> call set_geometry index = self.index try: - self["geometry"] = _ensure_geometry(self["geometry"].values) + if ( + hasattr(self["geometry"].values, "crs") + and self["geometry"].values.crs + and crs + and not self["geometry"].values.crs == crs + ): + warnings.warn( + "CRS mismatch between CRS of the passed geometries " + "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " + "allow_override=True)' to overwrite CRS or " + "'GeoDataFrame.to_crs(crs)' to reproject geometries. " + "CRS mismatch will raise an error in the future versions " + "of GeoPandas.", + FutureWarning, + stacklevel=2, + ) + # TODO: raise error in 0.9 or 0.10. + self["geometry"] = _ensure_geometry(self["geometry"].values, crs) except TypeError: pass else: @@ -86,9 +131,34 @@ geometry = "geometry" if geometry is not None: + if ( + hasattr(geometry, "crs") + and geometry.crs + and crs + and not geometry.crs == crs + ): + warnings.warn( + "CRS mismatch between CRS of the passed geometries " + "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " + "allow_override=True)' to overwrite CRS or " + "'GeoDataFrame.to_crs(crs)' to reproject geometries. " + "CRS mismatch will raise an error in the future versions " + "of GeoPandas.", + FutureWarning, + stacklevel=2, + ) + # TODO: raise error in 0.9 or 0.10. self.set_geometry(geometry, inplace=True) self._invalidate_sindex() + if geometry is None and crs: + warnings.warn( + "Assigning CRS to a GeoDataFrame without a geometry column is now " + "deprecated and will not be supported in the future.", + FutureWarning, + stacklevel=2, + ) + def __setattr__(self, attr, val): # have to special case geometry b/c pandas tries to use as column... if attr == "geometry": @@ -100,7 +170,7 @@ if self._geometry_column_name not in self: raise AttributeError( "No geometry data set yet (expected in" - " column '%s'." % self._geometry_column_name + " column '%s'.)" % self._geometry_column_name ) return self[self._geometry_column_name] @@ -127,10 +197,12 @@ Delete column to be used as the new geometry inplace : boolean, default False Modify the GeoDataFrame in place (do not create a new object) - crs : str/result of fion.get_crs (optional) - Coordinate system to use. If passed, overrides both DataFrame and - col's crs. Otherwise, tries to get crs from passed col values or - DataFrame. + crs : pyproj.CRS, optional + Coordinate system to use. The value can be anything accepted + by :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + If passed, overrides both DataFrame and col's crs. + Otherwise, tries to get crs from passed col values or DataFrame. Examples -------- @@ -139,7 +211,7 @@ Returns ------- - geodataframe : GeoDataFrame + GeoDataFrame """ # Most of the code here is taken from DataFrame.set_index() if inplace: @@ -147,9 +219,6 @@ else: frame = self.copy() - if not crs: - crs = getattr(col, "crs", self.crs) - to_remove = None geo_column_name = self._geometry_column_name if isinstance(col, (Series, list, np.ndarray, GeometryArray)): @@ -172,13 +241,17 @@ if to_remove: del frame[to_remove] - if isinstance(level, GeoSeries) and level.crs != crs: + if not crs: + level_crs = getattr(level, "crs", None) + crs = level_crs if level_crs is not None else self._crs + + if isinstance(level, (GeoSeries, GeometryArray)) and level.crs != crs: # Avoids caching issues/crs sharing issues level = level.copy() level.crs = crs # Check that we are using a listlike of geometries - level = _ensure_geometry(level) + level = _ensure_geometry(level, crs=crs) index = frame.index frame[geo_column_name] = level if frame.index is not index and len(frame.index) == len(index): @@ -223,6 +296,61 @@ self.rename(columns={geometry_col: col}, inplace=inplace) self.set_geometry(col, inplace=inplace) + @property + def crs(self): + """ + The Coordinate Reference System (CRS) represented as a ``pyproj.CRS`` + object. + + Returns None if the CRS is not set, and to set the value it + :getter: Returns a ``pyproj.CRS`` or None. When setting, the value + can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + """ + return self._crs + + @crs.setter + def crs(self, value): + """Sets the value of the crs""" + if self._geometry_column_name not in self: + warnings.warn( + "Assigning CRS to a GeoDataFrame without a geometry column is now " + "deprecated and will not be supported in the future.", + FutureWarning, + stacklevel=4, + ) + self._crs = None if not value else CRS.from_user_input(value) + else: + if hasattr(self.geometry.values, "crs"): + self.geometry.values.crs = value + self._crs = self.geometry.values.crs + else: + # column called 'geometry' without geometry + self._crs = None if not value else CRS.from_user_input(value) + + def __setstate__(self, state): + # overriding DataFrame method for compat with older pickles (CRS handling) + if isinstance(state, dict): + if "_metadata" in state and "crs" in state["_metadata"]: + metadata = state["_metadata"] + metadata[metadata.index("crs")] = "_crs" + if "crs" in state and "_crs" not in state: + crs = state.pop("crs") + state["_crs"] = CRS.from_user_input(crs) if crs is not None else crs + + super().__setstate__(state) + + # for some versions that didn't yet have CRS at array level -> crs is set + # at GeoDataFrame level with '_crs' (and not 'crs'), so without propagating + # to the GeoSeries/GeometryArray + try: + if self.crs is not None: + if self.geometry.values.crs is None: + self.crs = self.crs + except Exception: + pass + @classmethod def from_file(cls, filename, **kwargs): """Alternate constructor to create a ``GeoDataFrame`` from a file. @@ -245,7 +373,7 @@ -------- >>> df = geopandas.GeoDataFrame.from_file('nybb.shp') """ - return geopandas.io.file.read_file(filename, **kwargs) + return geopandas.io.file._read_file(filename, **kwargs) @classmethod def from_features(cls, features, crs=None, columns=None): @@ -291,18 +419,17 @@ features_lst = features rows = [] - for f in features_lst: - if hasattr(f, "__geo_interface__"): - f = f.__geo_interface__ - else: - f = f - - d = {"geometry": shape(f["geometry"]) if f["geometry"] else None} - d.update(f["properties"]) - rows.append(d) - df = GeoDataFrame(rows, columns=columns) - df.crs = crs - return df + for feature in features_lst: + # load geometry + if hasattr(feature, "__geo_interface__"): + feature = feature.__geo_interface__ + row = { + "geometry": shape(feature["geometry"]) if feature["geometry"] else None + } + # load properties + row.update(feature["properties"]) + rows.append(row) + return GeoDataFrame(rows, columns=columns, crs=crs) @classmethod def from_postgis( @@ -315,6 +442,7 @@ coerce_float=True, parse_dates=None, params=None, + chunksize=None, ): """ Alternate constructor to create a ``GeoDataFrame`` from a sql query @@ -344,6 +472,9 @@ without native Datetime support, such as SQLite. params : list, tuple or dict, optional, default None List of parameters to pass to execute method. + chunksize : int, default None + If specified, return an iterator where chunksize is the number + of rows to include in each chunk. Examples -------- @@ -353,7 +484,7 @@ >>> df = geopandas.GeoDataFrame.from_postgis(sql, con) """ - df = geopandas.io.sql.read_postgis( + df = geopandas.io.sql._read_postgis( sql, con, geom_col=geom_col, @@ -362,6 +493,7 @@ coerce_float=coerce_float, parse_dates=parse_dates, params=params, + chunksize=chunksize, ) return df @@ -424,6 +556,12 @@ if na not in ["null", "drop", "keep"]: raise ValueError("Unknown na method {0}".format(na)) + if self._geometry_column_name not in self: + raise AttributeError( + "No geometry data set (expected in" + " column '%s')." % self._geometry_column_name + ) + ids = np.array(self.index, copy=False) geometries = np.array(self[self._geometry_column_name], copy=False) @@ -484,6 +622,81 @@ return geo + def to_parquet(self, path, index=None, compression="snappy", **kwargs): + """Write a GeoDataFrame to the Parquet format. + + Any geometry columns present are serialized to WKB format in the file. + + Requires 'pyarrow'. + + WARNING: this is an initial implementation of Parquet file support and + associated metadata. This is tracking version 0.1.0 of the metadata + specification at: + https://github.com/geopandas/geo-arrow-spec + + This metadata specification does not yet make stability promises. As such, + we do not yet recommend using this in a production setting unless you are + able to rewrite your Parquet files. + + .. versionadded:: 0.8 + + Parameters + ---------- + path : str, path object + index : bool, default None + If ``True``, always include the dataframe's index(es) as columns + in the file output. + If ``False``, the index(es) will not be written to the file. + If ``None``, the index(ex) will be included as columns in the file + output except `RangeIndex` which is stored as metadata only. + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + kwargs + Additional keyword arguments passed to to pyarrow.parquet.write_table(). + """ + + from geopandas.io.arrow import _to_parquet + + _to_parquet(self, path, compression=compression, index=index, **kwargs) + + def to_feather(self, path, index=None, compression=None, **kwargs): + """Write a GeoDataFrame to the Feather format. + + Any geometry columns present are serialized to WKB format in the file. + + Requires 'pyarrow' >= 0.17. + + WARNING: this is an initial implementation of Feather file support and + associated metadata. This is tracking version 0.1.0 of the metadata + specification at: + https://github.com/geopandas/geo-arrow-spec + + This metadata specification does not yet make stability promises. As such, + we do not yet recommend using this in a production setting unless you are + able to rewrite your Feather files. + + .. versionadded:: 0.8 + + Parameters + ---------- + path : str, path object + index : bool, default None + If ``True``, always include the dataframe's index(es) as columns + in the file output. + If ``False``, the index(es) will not be written to the file. + If ``None``, the index(ex) will be included as columns in the file + output except `RangeIndex` which is stored as metadata only. + compression : {'zstd', 'lz4', 'uncompressed'}, optional + Name of the compression to use. Use ``"uncompressed"`` for no + compression. By default uses LZ4 if available, otherwise uncompressed. + kwargs + Additional keyword arguments passed to to pyarrow.feather.write_feather(). + """ + + from geopandas.io.arrow import _to_feather + + _to_feather(self, path, index=index, compression=compression, **kwargs) + def to_file( self, filename, driver="ESRI Shapefile", schema=None, index=None, **kwargs ): @@ -528,34 +741,73 @@ -------- GeoSeries.to_file """ - from geopandas.io.file import to_file + from geopandas.io.file import _to_file - to_file(self, filename, driver, schema, index, **kwargs) + _to_file(self, filename, driver, schema, index, **kwargs) + + def set_crs(self, crs=None, epsg=None, inplace=False, allow_override=False): + """ + Set the Coordinate Reference System (CRS) of the ``GeoDataFrame``. + + If there are multiple geometry columns within the GeoDataFrame, only + the CRS of the active geometry column is set. + + NOTE: The underlying geometries are not transformed to this CRS. To + transform the geometries to a new CRS, use the ``to_crs`` method. + + Parameters + ---------- + crs : pyproj.CRS, optional if `epsg` is specified + The value can be anything accepted + by :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + epsg : int, optional if `crs` is specified + EPSG code specifying the projection. + inplace : bool, default False + If True, the CRS of the GeoDataFrame will be changed in place + (while still returning the result) instead of making a copy of + the GeoDataFrame. + allow_override : bool, default False + If the the GeoDataFrame already has a CRS, allow to replace the + existing CRS, even when both are not equal. + """ + if not inplace: + df = self.copy() + else: + df = self + df.geometry = df.geometry.set_crs( + crs=crs, epsg=epsg, allow_override=allow_override, inplace=True + ) + return df def to_crs(self, crs=None, epsg=None, inplace=False): """Transform geometries to a new coordinate reference system. - Transform all geometries in a GeoSeries to a different coordinate + Transform all geometries in an active geometry column to a different coordinate reference system. The ``crs`` attribute on the current GeoSeries must be set. Either ``crs`` or ``epsg`` may be specified for output. - This method will transform all points in all objects. It has no notion + This method will transform all points in all objects. It has no notion or projecting entire geometries. All segments joining points are - assumed to be lines in the current projection, not geodesics. Objects + assumed to be lines in the current projection, not geodesics. Objects crossing the dateline (or other projection boundary) will have undesirable behavior. Parameters ---------- crs : pyproj.CRS, optional if `epsg` is specified - The value can be anything accepted - by :meth:`pyproj.CRS.from_user_input`, such as an authority - string (eg "EPSG:4326") or a WKT string. + The value can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. epsg : int, optional if `crs` is specified EPSG code specifying output projection. inplace : bool, optional, default: False Whether to return a new GeoDataFrame or do the transformation in place. + + Returns + ------- + GeoDataFrame """ if inplace: df = self @@ -575,19 +827,31 @@ """ result = super(GeoDataFrame, self).__getitem__(key) geo_col = self._geometry_column_name - if isinstance(key, str) and key == geo_col: + if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype): result.__class__ = GeoSeries - result.crs = self.crs result._invalidate_sindex() elif isinstance(result, DataFrame) and geo_col in result: result.__class__ = GeoDataFrame - result.crs = self.crs result._geometry_column_name = geo_col result._invalidate_sindex() elif isinstance(result, DataFrame) and geo_col not in result: result.__class__ = DataFrame return result + def __setitem__(self, key, value): + """ + Overwritten to preserve CRS of GeometryArray in cases like + df['geometry'] = [geom... for geom in df.geometry] + """ + if not pd.api.types.is_list_like(key) and key == self._geometry_column_name: + if pd.api.types.is_scalar(value) or isinstance(value, BaseGeometry): + value = [value] * self.shape[0] + try: + value = _ensure_geometry(value, crs=self.crs) + except TypeError: + warnings.warn("Geometry column does not contain geometry.") + super(GeoDataFrame, self).__setitem__(key, value) + # # Implement pandas methods # @@ -641,6 +905,7 @@ else: for name in self._metadata: object.__setattr__(self, name, getattr(other, name, None)) + return self def plot(self, *args, **kwargs): @@ -728,6 +993,9 @@ """ df_copy = self.copy() + if "level_1" in df_copy.columns: # GH1393 + df_copy = df_copy.rename(columns={"level_1": "__level_1"}) + exploded_geom = df_copy.geometry.explode().reset_index(level=-1) exploded_index = exploded_geom.columns[0] @@ -738,6 +1006,10 @@ # exploded GeoSeries index. df.set_index(exploded_index, append=True, inplace=True) df.index.names = list(self.index.names) + [None] + + if "__level_1" in df.columns: + df = df.rename(columns={"__level_1": "level_1"}) + geo_df = df.set_geometry(self._geometry_column_name) return geo_df @@ -767,6 +1039,106 @@ # do not return a GeoDataFrame return pd.DataFrame(df) + def to_postgis( + self, + name, + con, + schema=None, + if_exists="fail", + index=False, + index_label=None, + chunksize=None, + dtype=None, + ): + + """ + Upload GeoDataFrame into PostGIS database. + + This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL + Python driver (e.g. psycopg2) to be installed. + + Parameters + ---------- + name : str + Name of the target table. + con : sqlalchemy.engine.Engine + Active connection to the PostGIS database. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + How to behave if the table already exists: + + - fail: Raise a ValueError. + - replace: Drop the table before inserting new values. + - append: Insert new values to the existing table. + schema : string, optional + Specify the schema. If None, use default schema: 'public'. + index : bool, default True + Write DataFrame index as a column. + Uses *index_label* as the column name in the table. + index_label : string or sequence, default None + Column label for index column(s). + If None is given (default) and index is True, + then the index names are used. + chunksize : int, optional + Rows will be written in batches of this size at a time. + By default, all rows will be written at once. + dtype : dict of column name to SQL type, default None + Specifying the datatype for columns. + The keys should be the column names and the values + should be the SQLAlchemy types. + + Examples + -------- + + >>> from sqlalchemy import create_engine + >>> engine = create_engine("postgres://myusername:mypassword@myhost:5432\ +/mydatabase";) + >>> gdf.to_postgis("my_table", engine) + """ + geopandas.io.sql._write_postgis( + self, name, con, schema, if_exists, index, index_label, chunksize, dtype + ) + + # + # Implement standard operators for GeoSeries + # + + def __xor__(self, other): + """Implement ^ operator as for builtin set type""" + warnings.warn( + "'^' operator will be deprecated. Use the 'symmetric_difference' " + "method instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.geometry.symmetric_difference(other) + + def __or__(self, other): + """Implement | operator as for builtin set type""" + warnings.warn( + "'|' operator will be deprecated. Use the 'union' method instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.geometry.union(other) + + def __and__(self, other): + """Implement & operator as for builtin set type""" + warnings.warn( + "'&' operator will be deprecated. Use the 'intersection' method instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.geometry.intersection(other) + + def __sub__(self, other): + """Implement - operator as for builtin set type""" + warnings.warn( + "'-' operator will be deprecated. Use the 'difference' method instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.geometry.difference(other) + def _dataframe_set_geometry(self, col, drop=False, inplace=False, crs=None): if inplace: diff -Nru python-geopandas-0.7.0/geopandas/geoseries.py python-geopandas-0.8.1/geopandas/geoseries.py --- python-geopandas-0.7.0/geopandas/geoseries.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/geoseries.py 2020-07-15 17:54:36.000000000 +0000 @@ -8,13 +8,13 @@ from pyproj import CRS, Transformer from shapely.geometry.base import BaseGeometry -from shapely.ops import transform from geopandas.base import GeoPandasBase, _delegate_property from geopandas.plotting import plot_series -from .array import GeometryDtype, from_shapely +from .array import GeometryArray, GeometryDtype, from_shapely from .base import is_geometry_type +from . import _vectorized as vectorized _SERIES_WARNING_MSG = """\ @@ -42,6 +42,24 @@ return Series(data=data, index=index, **kwargs) +def inherit_doc(cls): + """ + A decorator adding a docstring from an existing method. + """ + + def decorator(decorated): + original_method = getattr(cls, decorated.__name__, None) + if original_method: + doc = original_method.__doc__ or "" + else: + doc = "" + + decorated.__doc__ = doc + return decorated + + return decorator + + class GeoSeries(GeoPandasBase, Series): """ A Series object designed to store shapely geometry objects. @@ -52,8 +70,11 @@ The geometries to store in the GeoSeries. index : array-like or Index The index for the GeoSeries. - crs : str, dict (optional) - Coordinate Reference System of the geometry objects. + crs : value (optional) + Coordinate Reference System of the geometry objects. Can be anything accepted by + :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + kwargs Additional arguments passed to the Series constructor, e.g. ``name``. @@ -76,11 +97,30 @@ """ - _metadata = ["name", "_crs"] + _metadata = ["name"] def __new__(cls, data=None, index=None, crs=None, **kwargs): # we need to use __new__ because we want to return Series instance # instead of GeoSeries instance in case of non-geometry data + + if hasattr(data, "crs") and crs: + if not data.crs: + # make a copy to avoid setting CRS to passed GeometryArray + data = data.copy() + else: + if not data.crs == crs: + warnings.warn( + "CRS mismatch between CRS of the passed geometries " + "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " + "allow_override=True)' to overwrite CRS or " + "'GeoSeries.to_crs(crs)' to reproject geometries. " + "CRS mismatch will raise an error in the future versions " + "of GeoPandas.", + FutureWarning, + stacklevel=2, + ) + # TODO: raise error in 0.9 or 0.10. + if isinstance(data, SingleBlockManager): if isinstance(data.blocks[0].dtype, GeometryDtype): if data.blocks[0].ndim == 2: @@ -96,7 +136,7 @@ data = SingleBlockManager([block], data.axes[0], fastpath=True) self = super(GeoSeries, cls).__new__(cls) super(GeoSeries, self).__init__(data, index=index, **kwargs) - self.crs = crs + self.crs = getattr(self.values, "crs", crs) return self warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) return Series(data, index=index, **kwargs) @@ -125,7 +165,7 @@ return s # try to convert to GeometryArray, if fails return plain Series try: - data = from_shapely(s.values) + data = from_shapely(s.values, crs) except TypeError: warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) return s @@ -134,7 +174,9 @@ self = super(GeoSeries, cls).__new__(cls) super(GeoSeries, self).__init__(data, index=index, name=name, **kwargs) - self.crs = crs + + if not self.crs: + self.crs = crs self._invalidate_sindex() return self @@ -260,15 +302,26 @@ def __getitem__(self, key): return self._wrapped_pandas_method("__getitem__", key) + @inherit_doc(pd.Series) def sort_index(self, *args, **kwargs): return self._wrapped_pandas_method("sort_index", *args, **kwargs) + @inherit_doc(pd.Series) def take(self, *args, **kwargs): return self._wrapped_pandas_method("take", *args, **kwargs) + @inherit_doc(pd.Series) def select(self, *args, **kwargs): return self._wrapped_pandas_method("select", *args, **kwargs) + @inherit_doc(pd.Series) + def apply(self, func, args=(), **kwargs): + result = super().apply(func, args=args, **kwargs) + if isinstance(result, GeoSeries): + if self.crs is not None: + result.set_crs(self.crs, inplace=True) + return result + def __finalize__(self, other, method=None, **kwargs): """ propagate metadata from other to self """ # NOTE: backported from pandas master (upcoming v0.13) @@ -394,6 +447,54 @@ # Additional methods # + def set_crs(self, crs=None, epsg=None, inplace=False, allow_override=False): + """ + Set the Coordinate Reference System (CRS) of a ``GeoSeries``. + + NOTE: The underlying geometries are not transformed to this CRS. To + transform the geometries to a new CRS, use the ``to_crs`` method. + + Parameters + ---------- + crs : pyproj.CRS, optional if `epsg` is specified + The value can be anything accepted + by :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + epsg : int, optional if `crs` is specified + EPSG code specifying the projection. + inplace : bool, default False + If True, the CRS of the GeoSeries will be changed in place + (while still returning the result) instead of making a copy of + the GeoSeries. + allow_override : bool, default False + If the the GeoSeries already has a CRS, allow to replace the + existing CRS, even when both are not equal. + + Returns + ------- + GeoSeries + """ + if crs is not None: + crs = CRS.from_user_input(crs) + elif epsg is not None: + crs = CRS.from_epsg(epsg) + else: + raise ValueError("Must pass either crs or epsg.") + + if not allow_override and self.crs is not None and not self.crs == crs: + raise ValueError( + "The GeoSeries already has a CRS which is not equal to the passed " + "CRS. Specify 'allow_override=True' to allow replacing the existing " + "CRS without doing any transformation. If you actually want to " + "transform the geometries, use 'GeoSeries.to_crs' instead." + ) + if not inplace: + result = self.copy() + else: + result = self + result.crs = crs + return result + def to_crs(self, crs=None, epsg=None): """Returns a ``GeoSeries`` with all geometries transformed to a new coordinate reference system. @@ -412,10 +513,14 @@ ---------- crs : pyproj.CRS, optional if `epsg` is specified The value can be anything accepted - by :meth:`pyproj.CRS.from_user_input`, such as an authority - string (eg "EPSG:4326") or a WKT string. + by :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. epsg : int, optional if `crs` is specified EPSG code specifying output projection. + + Returns + ------- + GeoSeries """ if self.crs is None: raise ValueError( @@ -434,11 +539,11 @@ return self transformer = Transformer.from_crs(self.crs, crs, always_xy=True) - result = self.apply(lambda geom: transform(transformer.transform, geom)) - result.__class__ = GeoSeries - result.crs = crs - result._invalidate_sindex() - return result + + new_data = vectorized.transform(self.values.data, transformer.transform) + return GeoSeries( + GeometryArray(new_data), crs=crs, index=self.index, name=self.name + ) def to_json(self, **kwargs): """ @@ -456,16 +561,37 @@ def __xor__(self, other): """Implement ^ operator as for builtin set type""" + warnings.warn( + "'^' operator will be deprecated. Use the 'symmetric_difference' " + "method instead.", + DeprecationWarning, + stacklevel=2, + ) return self.symmetric_difference(other) def __or__(self, other): """Implement | operator as for builtin set type""" + warnings.warn( + "'|' operator will be deprecated. Use the 'union' method instead.", + DeprecationWarning, + stacklevel=2, + ) return self.union(other) def __and__(self, other): """Implement & operator as for builtin set type""" + warnings.warn( + "'&' operator will be deprecated. Use the 'intersection' method instead.", + DeprecationWarning, + stacklevel=2, + ) return self.intersection(other) def __sub__(self, other): """Implement - operator as for builtin set type""" + warnings.warn( + "'-' operator will be deprecated. Use the 'difference' method instead.", + DeprecationWarning, + stacklevel=2, + ) return self.difference(other) diff -Nru python-geopandas-0.7.0/geopandas/__init__.py python-geopandas-0.8.1/geopandas/__init__.py --- python-geopandas-0.7.0/geopandas/__init__.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/__init__.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,9 +1,13 @@ +from geopandas._config import options # noqa + from geopandas.geoseries import GeoSeries # noqa from geopandas.geodataframe import GeoDataFrame # noqa -from geopandas.array import _points_from_xy as points_from_xy # noqa +from geopandas.array import points_from_xy # noqa -from geopandas.io.file import read_file # noqa -from geopandas.io.sql import read_postgis # noqa +from geopandas.io.file import _read_file as read_file # noqa +from geopandas.io.arrow import _read_parquet as read_parquet # noqa +from geopandas.io.arrow import _read_feather as read_feather # noqa +from geopandas.io.sql import _read_postgis as read_postgis # noqa from geopandas.tools import sjoin # noqa from geopandas.tools import overlay # noqa from geopandas.tools._show_versions import show_versions # noqa @@ -12,7 +16,6 @@ import geopandas.datasets # noqa -from geopandas._config import options # noqa # make the interactive namespace easier to use # for `from geopandas import *` demos. diff -Nru python-geopandas-0.7.0/geopandas/io/arrow.py python-geopandas-0.8.1/geopandas/io/arrow.py --- python-geopandas-0.7.0/geopandas/io/arrow.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/arrow.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,454 @@ +from distutils.version import LooseVersion +import json +import warnings + +from pandas import DataFrame + +from geopandas._compat import import_optional_dependency +from geopandas.array import from_wkb, to_wkb +from geopandas import GeoDataFrame +import geopandas + + +METADATA_VERSION = "0.1.0" +# reference: https://github.com/geopandas/geo-arrow-spec + +# Metadata structure: +# { +# "geo": { +# "columns": { +# "": { +# "crs": "", +# "encoding": "WKB" +# } +# }, +# "creator": { +# "library": "geopandas", +# "version": "" +# } +# "primary_column": "", +# "schema_version": "" +# } +# } + + +def _create_metadata(df): + """Create and encode geo metadata dict. + + Parameters + ---------- + df : GeoDataFrame + + Returns + ------- + dict + """ + + # Construct metadata for each geometry + column_metadata = {} + for col in df.columns[df.dtypes == "geometry"]: + series = df[col] + column_metadata[col] = { + "crs": series.crs.to_wkt() if series.crs else None, + "encoding": "WKB", + "bbox": series.total_bounds.tolist(), + } + + return { + "primary_column": df._geometry_column_name, + "columns": column_metadata, + "schema_version": METADATA_VERSION, + "creator": {"library": "geopandas", "version": geopandas.__version__}, + } + + +def _encode_metadata(metadata): + """Encode metadata dict to UTF-8 JSON string + + Parameters + ---------- + metadata : dict + + Returns + ------- + UTF-8 encoded JSON string + """ + return json.dumps(metadata).encode("utf-8") + + +def _encode_wkb(df): + """Encode all geometry columns in the GeoDataFrame to WKB. + + Parameters + ---------- + df : GeoDataFrame + + Returns + ------- + DataFrame + geometry columns are encoded to WKB + """ + + df = DataFrame(df.copy()) + + # Encode all geometry columns to WKB + for col in df.columns[df.dtypes == "geometry"]: + df[col] = to_wkb(df[col].values) + + return df + + +def _decode_metadata(metadata_str): + """Decode a UTF-8 encoded JSON string to dict + + Parameters + ---------- + metadata_str : string (UTF-8 encoded) + + Returns + ------- + dict + """ + if metadata_str is None: + return None + + return json.loads(metadata_str.decode("utf-8")) + + +def _validate_dataframe(df): + """Validate that the GeoDataFrame conforms to requirements for writing + to Parquet format. + + Raises `ValueError` if the GeoDataFrame is not valid. + + copied from `pandas.io.parquet` + + Parameters + ---------- + df : GeoDataFrame + """ + + if not isinstance(df, DataFrame): + raise ValueError("Writing to Parquet/Feather only supports IO with DataFrames") + + # must have value column names (strings only) + if df.columns.inferred_type not in {"string", "unicode", "empty"}: + raise ValueError("Writing to Parquet/Feather requires string column names") + + # index level names must be strings + valid_names = all( + isinstance(name, str) for name in df.index.names if name is not None + ) + if not valid_names: + raise ValueError("Index level names must be strings") + + +def _validate_metadata(metadata): + """Validate geo metadata. + Must not be empty, and must contain the structure specified above. + + Raises ValueError if metadata is not valid. + + Parameters + ---------- + metadata : dict + """ + + if not metadata: + raise ValueError("Missing or malformed geo metadata in Parquet/Feather file") + + required_keys = ("primary_column", "columns") + for key in required_keys: + if metadata.get(key, None) is None: + raise ValueError( + "'geo' metadata in Parquet/Feather file is missing required key: " + "'{key}'".format(key=key) + ) + + if not isinstance(metadata["columns"], dict): + raise ValueError("'columns' in 'geo' metadata must be a dict") + + # Validate that geometry columns have required metadata and values + required_col_keys = ("crs", "encoding") + for col, column_metadata in metadata["columns"].items(): + for key in required_col_keys: + if key not in column_metadata: + raise ValueError( + "'geo' metadata in Parquet/Feather file is missing required key " + "'{key}' for column '{col}'".format(key=key, col=col) + ) + + if column_metadata["encoding"] != "WKB": + raise ValueError("Only WKB geometry encoding is supported") + + +def _geopandas_to_arrow(df, index=None): + """ + Helper function with main, shared logic for to_parquet/to_feather. + """ + from pyarrow import Table + + warnings.warn( + "this is an initial implementation of Parquet/Feather file support and " + "associated metadata. This is tracking version 0.1.0 of the metadata " + "specification at " + "https://github.com/geopandas/geo-arrow-spec\n\n" + "This metadata specification does not yet make stability promises. " + "We do not yet recommend using this in a production setting unless you " + "are able to rewrite your Parquet/Feather files.\n\n" + "To further ignore this warning, you can do: \n" + "import warnings; warnings.filterwarnings('ignore', " + "message='.*initial implementation of Parquet.*')", + UserWarning, + stacklevel=4, + ) + + _validate_dataframe(df) + + # create geo metadata before altering incoming data frame + geo_metadata = _create_metadata(df) + + df = _encode_wkb(df) + + table = Table.from_pandas(df, preserve_index=index) + + # Store geopandas specific file-level metadata + # This must be done AFTER creating the table or it is not persisted + metadata = table.schema.metadata + metadata.update({b"geo": _encode_metadata(geo_metadata)}) + return table.replace_schema_metadata(metadata) + + +def _to_parquet(df, path, index=None, compression="snappy", **kwargs): + """ + Write a GeoDataFrame to the Parquet format. + + Any geometry columns present are serialized to WKB format in the file. + + Requires 'pyarrow'. + + WARNING: this is an initial implementation of Parquet file support and + associated metadata. This is tracking version 0.1.0 of the metadata + specification at: + https://github.com/geopandas/geo-arrow-spec + + This metadata specification does not yet make stability promises. As such, + we do not yet recommend using this in a production setting unless you are + able to rewrite your Parquet files. + + + .. versionadded:: 0.8 + + Parameters + ---------- + path : str, path object + index : bool, default None + If ``True``, always include the dataframe's index(es) as columns + in the file output. + If ``False``, the index(es) will not be written to the file. + If ``None``, the index(ex) will be included as columns in the file + output except `RangeIndex` which is stored as metadata only. + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + kwargs + Additional keyword arguments passed to pyarrow.parquet.write_table(). + """ + parquet = import_optional_dependency( + "pyarrow.parquet", extra="pyarrow is required for Parquet support." + ) + + table = _geopandas_to_arrow(df, index=index) + parquet.write_table(table, path, compression=compression, **kwargs) + + +def _to_feather(df, path, index=None, compression=None, **kwargs): + """ + Write a GeoDataFrame to the Feather format. + + Any geometry columns present are serialized to WKB format in the file. + + Requires 'pyarrow' >= 0.17. + + WARNING: this is an initial implementation of Feather file support and + associated metadata. This is tracking version 0.1.0 of the metadata + specification at: + https://github.com/geopandas/geo-arrow-spec + + This metadata specification does not yet make stability promises. As such, + we do not yet recommend using this in a production setting unless you are + able to rewrite your Feather files. + + .. versionadded:: 0.8 + + Parameters + ---------- + path : str, path object + index : bool, default None + If ``True``, always include the dataframe's index(es) as columns + in the file output. + If ``False``, the index(es) will not be written to the file. + If ``None``, the index(ex) will be included as columns in the file + output except `RangeIndex` which is stored as metadata only. + compression : {'zstd', 'lz4', 'uncompressed'}, optional + Name of the compression to use. Use ``"uncompressed"`` for no + compression. By default uses LZ4 if available, otherwise uncompressed. + kwargs + Additional keyword arguments passed to pyarrow.feather.write_feather(). + """ + feather = import_optional_dependency( + "pyarrow.feather", extra="pyarrow is required for Feather support." + ) + # TODO move this into `import_optional_dependency` + import pyarrow + + if pyarrow.__version__ < LooseVersion("0.17.0"): + raise ImportError("pyarrow >= 0.17 required for Feather support") + + table = _geopandas_to_arrow(df, index=index) + feather.write_feather(table, path, compression=compression, **kwargs) + + +def _arrow_to_geopandas(table): + """ + Helper function with main, shared logic for read_parquet/read_feather. + """ + df = table.to_pandas() + + metadata = table.schema.metadata + if b"geo" not in metadata: + raise ValueError( + """Missing geo metadata in Parquet/Feather file. + Use pandas.read_parquet/read_feather() instead.""" + ) + + try: + metadata = _decode_metadata(metadata.get(b"geo", b"")) + + except (TypeError, json.decoder.JSONDecodeError): + raise ValueError("Missing or malformed geo metadata in Parquet/Feather file") + + _validate_metadata(metadata) + + # Find all geometry columns that were read from the file. May + # be a subset if 'columns' parameter is used. + geometry_columns = df.columns.intersection(metadata["columns"]) + + if not len(geometry_columns): + raise ValueError( + """No geometry columns are included in the columns read from + the Parquet/Feather file. To read this file without geometry columns, + use pandas.read_parquet/read_feather() instead.""" + ) + + geometry = metadata["primary_column"] + + # Missing geometry likely indicates a subset of columns was read; + # promote the first available geometry to the primary geometry. + if len(geometry_columns) and geometry not in geometry_columns: + geometry = geometry_columns[0] + + # if there are multiple non-primary geometry columns, raise a warning + if len(geometry_columns) > 1: + warnings.warn( + "Multiple non-primary geometry columns read from Parquet/Feather " + "file. The first column read was promoted to the primary geometry." + ) + + # Convert the WKB columns that are present back to geometry. + for col in geometry_columns: + df[col] = from_wkb(df[col].values, crs=metadata["columns"][col]["crs"]) + + return GeoDataFrame(df, geometry=geometry) + + +def _read_parquet(path, columns=None, **kwargs): + """ + Load a Parquet object from the file path, returning a GeoDataFrame. + + You can read a subset of columns in the file using the ``columns`` parameter. + However, the structure of the returned GeoDataFrame will depend on which + columns you read: + + * if no geometry columns are read, this will raise a ``ValueError`` - you + should use the pandas `read_parquet` method instead. + * if the primary geometry column saved to this file is not included in + columns, the first available geometry column will be set as the geometry + column of the returned GeoDataFrame. + + Requires 'pyarrow'. + + .. versionadded:: 0.8 + + Parameters + ---------- + path : str, path object + columns : list-like of strings, default=None + If not None, only these columns will be read from the file. If + the primary geometry column is not included, the first secondary + geometry read from the file will be set as the geometry column + of the returned GeoDataFrame. If no geometry columns are present, + a ``ValueError`` will be raised. + **kwargs + Any additional kwargs passed to pyarrow.parquet.read_table(). + + Returns + ------- + GeoDataFrame + """ + + parquet = import_optional_dependency( + "pyarrow.parquet", extra="pyarrow is required for Parquet support." + ) + + kwargs["use_pandas_metadata"] = True + table = parquet.read_table(path, columns=columns, **kwargs) + + return _arrow_to_geopandas(table) + + +def _read_feather(path, columns=None, **kwargs): + """ + Load a Feather object from the file path, returning a GeoDataFrame. + + You can read a subset of columns in the file using the ``columns`` parameter. + However, the structure of the returned GeoDataFrame will depend on which + columns you read: + + * if no geometry columns are read, this will raise a ``ValueError`` - you + should use the pandas `read_feather` method instead. + * if the primary geometry column saved to this file is not included in + columns, the first available geometry column will be set as the geometry + column of the returned GeoDataFrame. + + Requires 'pyarrow' >= 0.17. + + .. versionadded:: 0.8 + + Parameters + ---------- + path : str, path object + columns : list-like of strings, default=None + If not None, only these columns will be read from the file. If + the primary geometry column is not included, the first secondary + geometry read from the file will be set as the geometry column + of the returned GeoDataFrame. If no geometry columns are present, + a ``ValueError`` will be raised. + **kwargs + Any additional kwargs passed to pyarrow.feather.read_table(). + + Returns + ------- + GeoDataFrame + """ + + feather = import_optional_dependency( + "pyarrow.feather", extra="pyarrow is required for Feather support." + ) + # TODO move this into `import_optional_dependency` + import pyarrow + + if pyarrow.__version__ < LooseVersion("0.17.0"): + raise ImportError("pyarrow >= 0.17 required for Feather support") + + table = feather.read_table(path, columns=columns, **kwargs) + return _arrow_to_geopandas(table) diff -Nru python-geopandas-0.7.0/geopandas/io/file.py python-geopandas-0.8.1/geopandas/io/file.py --- python-geopandas-0.7.0/geopandas/io/file.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/file.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,9 +1,11 @@ from distutils.version import LooseVersion +import io import numpy as np import pandas as pd import fiona +import pyproj from shapely.geometry import mapping from shapely.geometry.base import BaseGeometry @@ -34,7 +36,7 @@ return False -def read_file(filename, bbox=None, mask=None, rows=None, **kwargs): +def _read_file(filename, bbox=None, mask=None, rows=None, **kwargs): """ Returns a GeoDataFrame from a file or URL. @@ -42,22 +44,23 @@ Parameters ---------- - filename: str + filename : str, path object or file-like object Either the absolute or relative path to the file or URL to - be opened. - bbox: tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None + be opened, or any object with a read() method (such as an open file + or StringIO) + bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely geometry. CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame. Cannot be used with mask. - mask: dict | GeoDataFrame or GeoSeries | shapely Geometry, default None + mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None Filter for features that intersect with the given dict-like geojson geometry, GeoSeries, GeoDataFrame or shapely geometry. CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame. Cannot be used with bbox. - rows: int or slice, default None + rows : int or slice, default None Load in specific rows by passing an integer (first `n` rows) or a slice() object. - **kwargs: + **kwargs : Keyword args to be passed to the `open` or `BytesCollection` method in the fiona library when opening the file. For more information on possible keywords, type: @@ -69,7 +72,8 @@ Returns ------- - :obj:`geopandas.GeoDataFrame` + :obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` : + If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned. Notes ----- @@ -81,6 +85,9 @@ req = _urlopen(filename) path_or_bytes = req.read() reader = fiona.BytesCollection + elif isinstance(filename, io.TextIOBase): + path_or_bytes = filename.read() + reader = fiona.open else: path_or_bytes = filename reader = fiona.open @@ -122,14 +129,55 @@ f_filt = features.filter(bbox=bbox, mask=mask) else: f_filt = features + # get list of columns + columns = list(features.schema["properties"]) + if kwargs.get("ignore_geometry", False): + return pd.DataFrame( + [record["properties"] for record in f_filt], columns=columns + ) - columns = list(features.meta["schema"]["properties"]) + ["geometry"] - gdf = GeoDataFrame.from_features(f_filt, crs=crs, columns=columns) + return GeoDataFrame.from_features( + f_filt, crs=crs, columns=columns + ["geometry"] + ) - return gdf +def read_file(*args, **kwargs): + import warnings -def to_file(df, filename, driver="ESRI Shapefile", schema=None, index=None, **kwargs): + warnings.warn( + "geopandas.io.file.read_file() is intended for internal " + "use only, and will be deprecated. Use geopandas.read_file() instead.", + DeprecationWarning, + stacklevel=2, + ) + + return _read_file(*args, **kwargs) + + +def to_file(*args, **kwargs): + import warnings + + warnings.warn( + "geopandas.io.file.to_file() is intended for internal " + "use only, and will be deprecated. Use GeoDataFrame.to_file() " + "or GeoSeries.to_file() instead.", + DeprecationWarning, + stacklevel=2, + ) + + return _to_file(*args, **kwargs) + + +def _to_file( + df, + filename, + driver="ESRI Shapefile", + schema=None, + index=None, + mode="w", + crs=None, + **kwargs +): """ Write this GeoDataFrame to an OGR data source @@ -156,6 +204,18 @@ .. versionadded:: 0.7 Previously the index was not written. + mode : string, default 'w' + The write mode, 'w' to overwrite the existing file and 'a' to append. + Not all drivers support appending. The drivers that support appending + are listed in fiona.supported_drivers or + https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py + crs : pyproj.CRS, default None + If specified, the CRS is passed to Fiona to + better control how the file is written. If None, GeoPandas + will determine the crs based on crs df attribute. + The value can be anything accepted + by :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. The *kwargs* are passed to fiona.open and can be used to write to multi-layer data, store data within archives (zip files), etc. @@ -177,18 +237,22 @@ df = df.reset_index(drop=False) if schema is None: schema = infer_schema(df) + if crs: + crs = pyproj.CRS.from_user_input(crs) + else: + crs = df.crs with fiona_env(): crs_wkt = None try: gdal_version = fiona.env.get_gdal_release_name() except AttributeError: gdal_version = "2.0.0" # just assume it is not the latest - if LooseVersion(gdal_version) >= LooseVersion("3.0.0") and df.crs: - crs_wkt = df.crs.to_wkt() - elif df.crs: - crs_wkt = df.crs.to_wkt("WKT1_GDAL") + if LooseVersion(gdal_version) >= LooseVersion("3.0.0") and crs: + crs_wkt = crs.to_wkt() + elif crs: + crs_wkt = crs.to_wkt("WKT1_GDAL") with fiona.open( - filename, "w", driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs + filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs ) as colxn: colxn.writerecords(df.iterfeatures()) diff -Nru python-geopandas-0.7.0/geopandas/io/sql.py python-geopandas-0.8.1/geopandas/io/sql.py --- python-geopandas-0.7.0/geopandas/io/sql.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/sql.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,4 +1,4 @@ -import sys +import warnings import pandas as pd @@ -6,8 +6,64 @@ from geopandas import GeoDataFrame +from .. import _compat as compat -def read_postgis( + +def _df_to_geodf(df, geom_col="geom", crs=None): + """ + Transforms a pandas DataFrame into a GeoDataFrame. + The column 'geom_col' must be a geometry column in WKB representation. + To be used to convert df based on pd.read_sql to gdf. + Parameters + ---------- + df : DataFrame + pandas DataFrame with geometry column in WKB representation. + geom_col : string, default 'geom' + column name to convert to shapely geometries + crs : pyproj.CRS, optional + CRS to use for the returned GeoDataFrame. The value can be anything accepted + by :meth:`pyproj.CRS.from_user_input() `, + such as an authority string (eg "EPSG:4326") or a WKT string. + If not set, tries to determine CRS from the SRID associated with the + first geometry in the database, and assigns that to all geometries. + Returns + ------- + GeoDataFrame + """ + + if geom_col not in df: + raise ValueError("Query missing geometry column '{}'".format(geom_col)) + + geoms = df[geom_col].dropna() + + if not geoms.empty: + load_geom_bytes = shapely.wkb.loads + """Load from Python 3 binary.""" + + def load_geom_buffer(x): + """Load from Python 2 binary.""" + return shapely.wkb.loads(str(x)) + + def load_geom_text(x): + """Load from binary encoded as text.""" + return shapely.wkb.loads(str(x), hex=True) + + if isinstance(geoms.iat[0], bytes): + load_geom = load_geom_bytes + else: + load_geom = load_geom_text + + df[geom_col] = geoms = geoms.apply(load_geom) + if crs is None: + srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom) + # if no defined SRID in geodatabase, returns SRID of 0 + if srid != 0: + crs = "epsg:{}".format(srid) + + return GeoDataFrame(df, crs=crs, geometry=geom_col) + + +def _read_postgis( sql, con, geom_col="geom", @@ -16,6 +72,7 @@ coerce_float=True, parse_dates=None, params=None, + chunksize=None, ): """ Returns a GeoDataFrame corresponding to the result of the query @@ -34,17 +91,20 @@ CRS to use for the returned GeoDataFrame; if not set, tries to determine CRS from the SRID associated with the first geometry in the database, and assigns that to all geometries. + chunksize : int, default None + If specified, return an iterator where chunksize is the number of rows to + include in each chunk. See the documentation for pandas.read_sql for further explanation of the following parameters: - index_col, coerce_float, parse_dates, params + index_col, coerce_float, parse_dates, params, chunksize Returns ------- GeoDataFrame - Example - ------- + Examples + -------- PostGIS >>> sql = "SELECT geom, kind FROM polygons" SpatiaLite @@ -52,47 +112,293 @@ >>> df = geopandas.read_postgis(sql, con) """ - df = pd.read_sql( - sql, - con, - index_col=index_col, - coerce_float=coerce_float, - parse_dates=parse_dates, - params=params, + if chunksize is None: + # read all in one chunk and return a single GeoDataFrame + df = pd.read_sql( + sql, + con, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + params=params, + chunksize=chunksize, + ) + return _df_to_geodf(df, geom_col=geom_col, crs=crs) + + else: + # read data in chunks and return a generator + df_generator = pd.read_sql( + sql, + con, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + params=params, + chunksize=chunksize, + ) + return (_df_to_geodf(df, geom_col=geom_col, crs=crs) for df in df_generator) + + +def read_postgis(*args, **kwargs): + import warnings + + warnings.warn( + "geopandas.io.sql.read_postgis() is intended for internal " + "use only, and will be deprecated. Use geopandas.read_postgis() instead.", + DeprecationWarning, + stacklevel=2, ) - if geom_col not in df: - raise ValueError("Query missing geometry column '{}'".format(geom_col)) + return _read_postgis(*args, **kwargs) - geoms = df[geom_col].dropna() - if not geoms.empty: - load_geom_bytes = shapely.wkb.loads - """Load from Python 3 binary.""" +def _get_geometry_type(gdf): + """ + Get basic geometry type of a GeoDataFrame. See more info from: + https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType - def load_geom_buffer(x): - """Load from Python 2 binary.""" - return shapely.wkb.loads(str(x)) + Following rules apply: + - if geometries all share the same geometry-type, + geometries are inserted with the given GeometryType with following types: + - Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, + GeometryCollection. + - LinearRing geometries will be converted into LineString -objects. + - in all other cases, geometries will be inserted with type GEOMETRY: + - a mix of Polygons and MultiPolygons in GeoSeries + - a mix of Points and LineStrings in GeoSeries + - geometry is of type GeometryCollection, + such as GeometryCollection([Point, LineStrings]) + - if any of the geometries has Z-coordinate, all records will + be written with 3D. + """ + geom_types = list(gdf.geometry.geom_type.unique()) + has_curve = False + + for gt in geom_types: + if gt is None: + continue + elif "LinearRing" in gt: + has_curve = True + + if len(geom_types) == 1: + if has_curve: + target_geom_type = "LINESTRING" + else: + if geom_types[0] is None: + raise ValueError("No valid geometries in the data.") + else: + target_geom_type = geom_types[0].upper() + else: + target_geom_type = "GEOMETRY" - def load_geom_text(x): - """Load from binary encoded as text.""" - return shapely.wkb.loads(str(x), hex=True) + # Check for 3D-coordinates + if any(gdf.geometry.has_z): + target_geom_type = target_geom_type + "Z" + + return target_geom_type, has_curve + + +def _get_srid_from_crs(gdf): + """ + Get EPSG code from CRS if available. If not, return -1. + """ + + # Use geoalchemy2 default for srid + # Note: undefined srid in PostGIS is 0 + srid = -1 + warning_msg = ( + "Could not parse CRS from the GeoDataFrame. " + + "Inserting data without defined CRS.", + ) + if gdf.crs is not None: + try: + srid = gdf.crs.to_epsg(min_confidence=25) + if srid is None: + srid = -1 + warnings.warn(warning_msg, UserWarning, stacklevel=2) + except Exception: + warnings.warn(warning_msg, UserWarning, stacklevel=2) + return srid + + +def _convert_linearring_to_linestring(gdf, geom_name): + from shapely.geometry import LineString + + # Todo: Use Pygeos function once it's implemented: + # https://github.com/pygeos/pygeos/issues/76 + + mask = gdf.geom_type == "LinearRing" + gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply( + lambda geom: LineString(geom) + ) + return gdf + + +def _convert_to_ewkb(gdf, geom_name, srid): + """Convert geometries to ewkb. """ + if compat.USE_PYGEOS: + from pygeos import set_srid, to_wkb + + geoms = to_wkb( + set_srid(gdf[geom_name].values.data, srid=srid), hex=True, include_srid=True + ) + + else: + from shapely.wkb import dumps + + geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]] + + # The gdf will warn that the geometry column doesn't hold in-memory geometries + # now that they are EWKB, so convert back to a regular dataframe to avoid warning + # the user that the dtypes are unexpected. + df = pd.DataFrame(gdf, copy=False) + df[geom_name] = geoms + return df - if sys.version_info.major < 3: - if isinstance(geoms.iat[0], buffer): - load_geom = load_geom_buffer + +def _psql_insert_copy(tbl, conn, keys, data_iter): + import io + import csv + + s_buf = io.StringIO() + writer = csv.writer(s_buf) + writer.writerows(data_iter) + s_buf.seek(0) + + columns = ", ".join('"{}"'.format(k) for k in keys) + + dbapi_conn = conn.connection + with dbapi_conn.cursor() as cur: + sql = "COPY {} ({}) FROM STDIN WITH CSV".format(tbl.table.fullname, columns) + cur.copy_expert(sql=sql, file=s_buf) + + +def _write_postgis( + gdf, + name, + con, + schema=None, + if_exists="fail", + index=False, + index_label=None, + chunksize=None, + dtype=None, +): + """ + Upload GeoDataFrame into PostGIS database. + + This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL + Python driver (e.g. psycopg2) to be installed. + + Parameters + ---------- + name : str + Name of the target table. + con : sqlalchemy.engine.Engine + Active connection to the PostGIS database. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + How to behave if the table already exists: + + - fail: Raise a ValueError. + - replace: Drop the table before inserting new values. + - append: Insert new values to the existing table. + schema : string, optional + Specify the schema. If None, use default schema: 'public'. + index : bool, default True + Write DataFrame index as a column. + Uses *index_label* as the column name in the table. + index_label : string or sequence, default None + Column label for index column(s). + If None is given (default) and index is True, + then the index names are used. + chunksize : int, optional + Rows will be written in batches of this size at a time. + By default, all rows will be written at once. + dtype : dict of column name to SQL type, default None + Specifying the datatype for columns. + The keys should be the column names and the values + should be the SQLAlchemy types. + + Examples + -------- + + >>> from sqlalchemy import create_engine + >>> engine = create_engine("postgres://myusername:mypassword@myhost:5432\ +/mydatabase";) + >>> gdf.to_postgis("my_table", engine) + """ + try: + from geoalchemy2 import Geometry + except ImportError: + raise ImportError("'to_postgis()' requires geoalchemy2 package. ") + + if not compat.SHAPELY_GE_17: + raise ImportError( + "'to_postgis()' requires newer version of Shapely " + "(>= '1.7.0').\nYou can update the library using " + "'pip install shapely --upgrade' or using " + "'conda update shapely' if using conda package manager." + ) + + gdf = gdf.copy() + geom_name = gdf.geometry.name + + # Get srid + srid = _get_srid_from_crs(gdf) + + # Get geometry type and info whether data contains LinearRing. + geometry_type, has_curve = _get_geometry_type(gdf) + + # Build dtype with Geometry + if dtype is not None: + dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid) + else: + dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)} + + # Convert LinearRing geometries to LineString + if has_curve: + gdf = _convert_linearring_to_linestring(gdf, geom_name) + + # Convert geometries to EWKB + gdf = _convert_to_ewkb(gdf, geom_name, srid) + + if if_exists == "append": + # Check that the geometry srid matches with the current GeoDataFrame + with con.begin() as connection: + if schema is not None: + schema_name = schema else: - load_geom = load_geom_text - elif isinstance(geoms.iat[0], bytes): - load_geom = load_geom_bytes - else: - load_geom = load_geom_text + schema_name = "public" - df[geom_col] = geoms = geoms.apply(load_geom) - if crs is None: - srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom) - # if no defined SRID in geodatabase, returns SRID of 0 - if srid != 0: - crs = "epsg:{}".format(srid) + # Only check SRID if table exists + if connection.run_callable(connection.dialect.has_table, name, schema): + target_srid = connection.execute( + "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format( + schema=schema_name, table=name, geom_col=geom_name + ) + ).fetchone()[0] + + if target_srid != srid: + msg = ( + "The CRS of the target table (EPSG:{epsg_t}) differs from the " + "CRS of current GeoDataFrame (EPSG:{epsg_src}).".format( + epsg_t=target_srid, epsg_src=srid + ) + ) + raise ValueError(msg) + + with con.begin() as connection: + + gdf.to_sql( + name, + connection, + schema=schema, + if_exists=if_exists, + index=index, + index_label=index_label, + chunksize=chunksize, + dtype=dtype, + method=_psql_insert_copy, + ) - return GeoDataFrame(df, crs=crs, geometry=geom_col) + return Binary files /tmp/tmptG7gKQ/wfN1ju2z2a/python-geopandas-0.7.0/geopandas/io/tests/data/pickle/0.5.1_pd-0.25.3_py-3.7.3_x86_64_linux.pickle and /tmp/tmptG7gKQ/pTopuwFY4B/python-geopandas-0.8.1/geopandas/io/tests/data/pickle/0.5.1_pd-0.25.3_py-3.7.3_x86_64_linux.pickle differ Binary files /tmp/tmptG7gKQ/wfN1ju2z2a/python-geopandas-0.7.0/geopandas/io/tests/data/pickle/0.6.3_pd-0.25.3_py-3.8.0_x86_64_linux.pickle and /tmp/tmptG7gKQ/pTopuwFY4B/python-geopandas-0.8.1/geopandas/io/tests/data/pickle/0.6.3_pd-0.25.3_py-3.8.0_x86_64_linux.pickle differ Binary files /tmp/tmptG7gKQ/wfN1ju2z2a/python-geopandas-0.7.0/geopandas/io/tests/data/pickle/0.7.0_pd-1.0.4_py-3.7.6_x86_64_linux.pickle and /tmp/tmptG7gKQ/pTopuwFY4B/python-geopandas-0.8.1/geopandas/io/tests/data/pickle/0.7.0_pd-1.0.4_py-3.7.6_x86_64_linux.pickle differ Binary files /tmp/tmptG7gKQ/wfN1ju2z2a/python-geopandas-0.7.0/geopandas/io/tests/data/pickle/0.8.0_pd-1.0.5_py-3.8.3_x86_64_linux.pickle and /tmp/tmptG7gKQ/pTopuwFY4B/python-geopandas-0.8.1/geopandas/io/tests/data/pickle/0.8.0_pd-1.0.5_py-3.8.3_x86_64_linux.pickle differ diff -Nru python-geopandas-0.7.0/geopandas/io/tests/generate_legacy_storage_files.py python-geopandas-0.8.1/geopandas/io/tests/generate_legacy_storage_files.py --- python-geopandas-0.7.0/geopandas/io/tests/generate_legacy_storage_files.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/tests/generate_legacy_storage_files.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,98 @@ +""" +Script to create the data and write legacy storage (pickle) files. + +Based on pandas' generate_legacy_storage_files.py script. + +To use this script, create an environment for which you want to +generate pickles, activate the environment, and run this script as: + +$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \ + geopandas/geopandas/io/tests/data/pickle/ pickle + +This script generates a storage file for the current arch, system, + +The idea here is you are using the *current* version of the +generate_legacy_storage_files with an *older* version of geopandas to +generate a pickle file. We will then check this file into a current +branch, and test using test_pickle.py. This will load the *older* +pickles and test versus the current data that is generated +(with master). These are then compared. + +""" +import os +import pickle +import platform +import sys + +import pandas as pd + +import geopandas +from shapely.geometry import Point + + +def create_pickle_data(): + """ create the pickle data """ + + # custom geometry column name + gdf_the_geom = geopandas.GeoDataFrame( + {"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]}, + geometry="the_geom", + ) + + # with crs + gdf_crs = geopandas.GeoDataFrame( + {"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]}, + crs="EPSG:4326", + ) + + return dict(gdf_the_geom=gdf_the_geom, gdf_crs=gdf_crs) + + +def platform_name(): + return "_".join( + [ + str(geopandas.__version__), + "pd-" + str(pd.__version__), + "py-" + str(platform.python_version()), + str(platform.machine()), + str(platform.system().lower()), + ] + ) + + +def write_legacy_pickles(output_dir): + print( + "This script generates a storage file for the current arch, system, " + "and python version" + ) + print("geopandas version: {}").format(geopandas.__version__) + print(" output dir : {}".format(output_dir)) + print(" storage format: pickle") + + pth = "{}.pickle".format(platform_name()) + + fh = open(os.path.join(output_dir, pth), "wb") + pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL) + fh.close() + + print("created pickle file: {}".format(pth)) + + +def main(): + if len(sys.argv) != 3: + exit( + "Specify output directory and storage type: generate_legacy_" + "storage_files.py " + ) + + output_dir = str(sys.argv[1]) + storage_type = str(sys.argv[2]) + + if storage_type == "pickle": + write_legacy_pickles(output_dir=output_dir) + else: + exit("storage_type must be one of {'pickle'}") + + +if __name__ == "__main__": + main() diff -Nru python-geopandas-0.7.0/geopandas/io/tests/test_arrow.py python-geopandas-0.8.1/geopandas/io/tests/test_arrow.py --- python-geopandas-0.7.0/geopandas/io/tests/test_arrow.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/tests/test_arrow.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,511 @@ +from __future__ import absolute_import + +from distutils.version import LooseVersion +import os + +import pytest +from pandas import DataFrame, read_parquet as pd_read_parquet +from pandas.testing import assert_frame_equal +import numpy as np + +import geopandas +from geopandas import GeoDataFrame, read_file, read_parquet, read_feather +from geopandas.array import to_wkb +from geopandas.datasets import get_path +from geopandas.io.arrow import ( + _create_metadata, + _decode_metadata, + _encode_metadata, + _encode_wkb, + _validate_dataframe, + _validate_metadata, + METADATA_VERSION, +) +from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal + + +# Skip all tests in this module if pyarrow is not available +pyarrow = pytest.importorskip("pyarrow") + +# TEMPORARY: hide warning from to_parquet +pytestmark = pytest.mark.filterwarnings("ignore:.*initial implementation of Parquet.*") + + +@pytest.fixture( + params=[ + "parquet", + pytest.param( + "feather", + marks=pytest.mark.skipif( + pyarrow.__version__ < LooseVersion("0.17.0"), + reason="needs pyarrow >= 0.17", + ), + ), + ] +) +def file_format(request): + if request.param == "parquet": + return read_parquet, GeoDataFrame.to_parquet + elif request.param == "feather": + return read_feather, GeoDataFrame.to_feather + + +def test_create_metadata(): + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + metadata = _create_metadata(df) + + assert isinstance(metadata, dict) + assert metadata["schema_version"] == METADATA_VERSION + assert metadata["creator"]["library"] == "geopandas" + assert metadata["creator"]["version"] == geopandas.__version__ + assert metadata["primary_column"] == "geometry" + assert "geometry" in metadata["columns"] + assert metadata["columns"]["geometry"]["crs"] == df.geometry.crs.to_wkt() + assert metadata["columns"]["geometry"]["encoding"] == "WKB" + + assert np.array_equal( + metadata["columns"]["geometry"]["bbox"], df.geometry.total_bounds + ) + + +def test_encode_metadata(): + metadata = {"a": "b"} + + expected = b'{"a": "b"}' + assert _encode_metadata(metadata) == expected + + +def test_decode_metadata(): + metadata_str = b'{"a": "b"}' + + expected = {"a": "b"} + assert _decode_metadata(metadata_str) == expected + + +def test_validate_dataframe(): + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + # valid: should not raise ValueError + _validate_dataframe(df) + _validate_dataframe(df.set_index("iso_a3")) + + # add column with non-string type + df[0] = 1 + + # invalid: should raise ValueError + with pytest.raises(ValueError): + _validate_dataframe(df) + + with pytest.raises(ValueError): + _validate_dataframe(df.set_index(0)) + + # not a DataFrame: should raise ValueError + with pytest.raises(ValueError): + _validate_dataframe("not a dataframe") + + +def test_validate_metadata_valid(): + _validate_metadata( + { + "primary_column": "geometry", + "columns": {"geometry": {"crs": None, "encoding": "WKB"}}, + } + ) + + _validate_metadata( + { + "primary_column": "geometry", + "columns": {"geometry": {"crs": "WKT goes here", "encoding": "WKB"}}, + } + ) + + +@pytest.mark.parametrize( + "metadata,error", + [ + ({}, "Missing or malformed geo metadata in Parquet/Feather file"), + ( + {"primary_column": "foo"}, + "'geo' metadata in Parquet/Feather file is missing required key:", + ), + ( + {"primary_column": "foo", "columns": None}, + "'geo' metadata in Parquet/Feather file is missing required key", + ), + ( + {"primary_column": "foo", "columns": []}, + "'columns' in 'geo' metadata must be a dict", + ), + ( + {"primary_column": "foo", "columns": {"foo": {}}}, + ( + "'geo' metadata in Parquet/Feather file is missing required key 'crs' " + "for column 'foo'" + ), + ), + ( + {"primary_column": "foo", "columns": {"foo": {"crs": None}}}, + "'geo' metadata in Parquet/Feather file is missing required key", + ), + ( + {"primary_column": "foo", "columns": {"foo": {"encoding": None}}}, + "'geo' metadata in Parquet/Feather file is missing required key", + ), + ( + { + "primary_column": "foo", + "columns": {"foo": {"crs": None, "encoding": None}}, + }, + "Only WKB geometry encoding is supported", + ), + ( + { + "primary_column": "foo", + "columns": {"foo": {"crs": None, "encoding": "BKW"}}, + }, + "Only WKB geometry encoding is supported", + ), + ], +) +def test_validate_metadata_invalid(metadata, error): + with pytest.raises(ValueError, match=error): + _validate_metadata(metadata) + + +def test_encode_wkb(): + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + encoded = _encode_wkb(df) + + # make sure original is not modified + assert isinstance(df, GeoDataFrame) + assert ( + encoded.geometry.iloc[0][:16] + == b"\x01\x06\x00\x00\x00\x03\x00\x00\x00\x01\x03\x00\x00\x00\x01\x00" + ) + + +# TEMPORARY: used to determine if pyarrow fails for roundtripping pandas data +# without geometries +def test_pandas_parquet_roundtrip1(tmpdir): + df = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + + filename = os.path.join(str(tmpdir), "test.pq") + df.to_parquet(filename) + + pq_df = pd_read_parquet(filename) + + assert_frame_equal(df, pq_df) + + +@pytest.mark.parametrize( + "test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"] +) +def test_pandas_parquet_roundtrip2(test_dataset, tmpdir): + test_dataset = "naturalearth_lowres" + df = DataFrame(read_file(get_path(test_dataset)).drop(columns=["geometry"])) + + filename = os.path.join(str(tmpdir), "test.pq") + df.to_parquet(filename) + + pq_df = pd_read_parquet(filename) + + assert_frame_equal(df, pq_df) + + +@pytest.mark.parametrize( + "test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"] +) +def test_roundtrip(tmpdir, file_format, test_dataset): + """Writing to parquet should not raise errors, and should not alter original + GeoDataFrame + """ + reader, writer = file_format + + df = read_file(get_path(test_dataset)) + orig = df.copy() + + filename = os.path.join(str(tmpdir), "test.pq") + + # TEMP: Initial implementation should raise a UserWarning + with pytest.warns(UserWarning, match="initial implementation"): + writer(df, filename) + + assert os.path.exists(filename) + + # make sure that the original data frame is unaltered + assert_geodataframe_equal(df, orig) + + # make sure that we can roundtrip the data frame + pq_df = reader(filename) + + assert isinstance(pq_df, GeoDataFrame) + assert_geodataframe_equal(df, pq_df) + + +def test_index(tmpdir, file_format): + """Setting index=`True` should preserve index in output, and + setting index=`False` should drop index from output. + """ + reader, writer = file_format + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)).set_index("iso_a3") + + filename = os.path.join(str(tmpdir), "test_with_index.pq") + writer(df, filename, index=True) + pq_df = reader(filename) + assert_geodataframe_equal(df, pq_df) + + filename = os.path.join(str(tmpdir), "drop_index.pq") + writer(df, filename, index=False) + pq_df = reader(filename) + assert_geodataframe_equal(df.reset_index(drop=True), pq_df) + + +@pytest.mark.parametrize("compression", ["snappy", "gzip", "brotli", None]) +def test_parquet_compression(compression, tmpdir): + """Using compression options should not raise errors, and should + return identical GeoDataFrame. + """ + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + filename = os.path.join(str(tmpdir), "test.pq") + df.to_parquet(filename, compression=compression) + pq_df = read_parquet(filename) + + assert isinstance(pq_df, GeoDataFrame) + assert_geodataframe_equal(df, pq_df) + + +@pytest.mark.skipif( + pyarrow.__version__ < LooseVersion("0.17.0"), + reason="Feather only supported for pyarrow >= 0.17", +) +@pytest.mark.parametrize("compression", ["uncompressed", "lz4", "zstd"]) +def test_feather_compression(compression, tmpdir): + """Using compression options should not raise errors, and should + return identical GeoDataFrame. + """ + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + filename = os.path.join(str(tmpdir), "test.feather") + df.to_feather(filename, compression=compression) + pq_df = read_feather(filename) + + assert isinstance(pq_df, GeoDataFrame) + assert_geodataframe_equal(df, pq_df) + + +def test_parquet_multiple_geom_cols(tmpdir, file_format): + """If multiple geometry columns are present when written to parquet, + they should all be returned as such when read from parquet. + """ + reader, writer = file_format + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + df["geom2"] = df.geometry.copy() + + filename = os.path.join(str(tmpdir), "test.pq") + writer(df, filename) + + assert os.path.exists(filename) + + pq_df = reader(filename) + + assert isinstance(pq_df, GeoDataFrame) + assert_geodataframe_equal(df, pq_df) + + assert_geoseries_equal(df.geom2, pq_df.geom2, check_geom_type=True) + + +def test_parquet_missing_metadata(tmpdir): + """Missing geo metadata, such as from a parquet file created + from a pandas DataFrame, will raise a ValueError. + """ + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + # convert to DataFrame + df = DataFrame(df) + + # convert the geometry column so we can extract later + df["geometry"] = to_wkb(df["geometry"].values) + + filename = os.path.join(str(tmpdir), "test.pq") + + # use pandas to_parquet (no geo metadata) + df.to_parquet(filename) + + # missing metadata will raise ValueError + with pytest.raises( + ValueError, match="Missing geo metadata in Parquet/Feather file." + ): + read_parquet(filename) + + +@pytest.mark.parametrize( + "geo_meta,error", + [ + ({"geo": b""}, "Missing or malformed geo metadata in Parquet/Feather file"), + ( + {"geo": _encode_metadata({})}, + "Missing or malformed geo metadata in Parquet/Feather file", + ), + ( + {"geo": _encode_metadata({"foo": "bar"})}, + "'geo' metadata in Parquet/Feather file is missing required key", + ), + ], +) +def test_parquet_invalid_metadata(tmpdir, geo_meta, error): + """Has geo metadata with missing required fields will raise a ValueError. + + This requires writing the parquet file directly below, so that we can + control the metadata that is written for this test. + """ + + from pyarrow import parquet, Table + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + # convert to DataFrame and encode geometry to WKB + df = DataFrame(df) + df["geometry"] = to_wkb(df["geometry"].values) + + table = Table.from_pandas(df) + metadata = table.schema.metadata + metadata.update(geo_meta) + table = table.replace_schema_metadata(metadata) + + filename = os.path.join(str(tmpdir), "test.pq") + parquet.write_table(table, filename) + + with pytest.raises(ValueError, match=error): + read_parquet(filename) + + +def test_subset_columns(tmpdir, file_format): + """Reading a subset of columns should correctly decode selected geometry + columns. + """ + reader, writer = file_format + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + filename = os.path.join(str(tmpdir), "test.pq") + writer(df, filename) + pq_df = reader(filename, columns=["name", "geometry"]) + + assert_geodataframe_equal(df[["name", "geometry"]], pq_df) + + with pytest.raises( + ValueError, match="No geometry columns are included in the columns read" + ): + reader(filename, columns=["name"]) + + +def test_parquet_repeat_columns(tmpdir): + """Reading repeated columns should return first value of each repeated column + """ + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + filename = os.path.join(str(tmpdir), "test.pq") + df.to_parquet(filename) + + columns = ["name", "name", "iso_a3", "name", "geometry"] + pq_df = read_parquet(filename, columns=columns) + + assert pq_df.columns.tolist() == ["name", "iso_a3", "geometry"] + + +def test_promote_secondary_geometry(tmpdir, file_format): + """Reading a subset of columns that does not include the primary geometry + column should promote the first geometry column present. + """ + reader, writer = file_format + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + df["geom2"] = df.geometry.copy() + + filename = os.path.join(str(tmpdir), "test.pq") + writer(df, filename) + pq_df = reader(filename, columns=["name", "geom2"]) + + assert_geodataframe_equal(df.set_geometry("geom2")[["name", "geom2"]], pq_df) + + df["geom3"] = df.geometry.copy() + + writer(df, filename) + with pytest.warns( + UserWarning, + match="Multiple non-primary geometry columns read from Parquet/Feather file.", + ): + pq_df = reader(filename, columns=["name", "geom2", "geom3"]) + + assert_geodataframe_equal( + df.set_geometry("geom2")[["name", "geom2", "geom3"]], pq_df + ) + + +def test_columns_no_geometry(tmpdir, file_format): + """Reading a parquet file that is missing all of the geometry columns + should raise a ValueError""" + reader, writer = file_format + + test_dataset = "naturalearth_lowres" + df = read_file(get_path(test_dataset)) + + filename = os.path.join(str(tmpdir), "test.pq") + writer(df, filename) + + with pytest.raises(ValueError): + reader(filename, columns=["name"]) + + +def test_missing_crs(tmpdir, file_format): + """If CRS is `None`, it should be properly handled + and remain `None` when read from parquet`. + """ + reader, writer = file_format + + test_dataset = "naturalearth_lowres" + + df = read_file(get_path(test_dataset)) + df.crs = None + + filename = os.path.join(str(tmpdir), "test.pq") + writer(df, filename) + pq_df = reader(filename) + + assert pq_df.crs is None + + assert_geodataframe_equal(df, pq_df, check_crs=True) + + +@pytest.mark.skipif( + pyarrow.__version__ >= LooseVersion("0.17.0"), + reason="Feather only supported for pyarrow >= 0.17", +) +def test_feather_arrow_version(tmpdir): + df = read_file(get_path("naturalearth_lowres")) + filename = os.path.join(str(tmpdir), "test.feather") + + with pytest.raises( + ImportError, match="pyarrow >= 0.17 required for Feather support" + ): + df.to_feather(filename) diff -Nru python-geopandas-0.7.0/geopandas/io/tests/test_file.py python-geopandas-0.8.1/geopandas/io/tests/test_file.py --- python-geopandas-0.7.0/geopandas/io/tests/test_file.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/tests/test_file.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,8 +1,10 @@ from collections import OrderedDict import datetime from distutils.version import LooseVersion +import io import os import pathlib +import tempfile import sys import numpy as np @@ -38,6 +40,11 @@ @pytest.fixture +def file_path(): + return os.path.join(PACKAGE_DIR, "examples", "null_geom.geojson") + + +@pytest.fixture def df_points(): N = 10 crs = _CRS @@ -204,6 +211,12 @@ input_empty_df.to_file(tempfilename) +def test_to_file_privacy(tmpdir, df_nybb): + tempfilename = os.path.join(str(tmpdir), "test.shp") + with pytest.warns(DeprecationWarning): + geopandas.io.file.to_file(df_nybb, tempfilename) + + def test_to_file_schema(tmpdir, df_nybb): """ Ensure that the file is written according to the schema @@ -230,6 +243,36 @@ assert result_schema == schema +@pytest.mark.parametrize("driver,ext", driver_ext_pairs) +def test_append_file(tmpdir, df_nybb, df_null, driver, ext): + """ Test to_file with append mode and from_file """ + from fiona import supported_drivers + + if "a" not in supported_drivers[driver]: + return None + + tempfilename = os.path.join(str(tmpdir), "boros." + ext) + df_nybb.to_file(tempfilename, driver=driver) + df_nybb.to_file(tempfilename, mode="a", driver=driver) + # Read layer back in + df = GeoDataFrame.from_file(tempfilename) + assert "geometry" in df + assert len(df) == (5 * 2) + expected = pd.concat([df_nybb] * 2, ignore_index=True) + assert_geodataframe_equal(df, expected) + + # Write layer with null geometry out to file + tempfilename = os.path.join(str(tmpdir), "null_geom." + ext) + df_null.to_file(tempfilename, driver=driver) + df_null.to_file(tempfilename, mode="a", driver=driver) + # Read layer back in + df = GeoDataFrame.from_file(tempfilename) + assert "geometry" in df + assert len(df) == (2 * 2) + expected = pd.concat([df_null] * 2, ignore_index=True) + assert_geodataframe_equal(df, expected) + + # ----------------------------------------------------------------------------- # read_file tests # ----------------------------------------------------------------------------- @@ -259,6 +302,73 @@ assert isinstance(gdf, geopandas.GeoDataFrame) +@pytest.mark.skipif( + not _FIONA18, reason="support for file-like objects in fiona.open() added in 1.8" +) +def test_read_file_textio(file_path): + file_text_stream = open(file_path) + file_stringio = io.StringIO(open(file_path).read()) + gdf_text_stream = read_file(file_text_stream) + gdf_stringio = read_file(file_stringio) + assert isinstance(gdf_text_stream, geopandas.GeoDataFrame) + assert isinstance(gdf_stringio, geopandas.GeoDataFrame) + + +@pytest.mark.skipif( + not _FIONA18, reason="support for file-like objects in fiona.open() added in 1.8" +) +def test_read_file_bytesio(file_path): + file_binary_stream = open(file_path, "rb") + file_bytesio = io.BytesIO(open(file_path, "rb").read()) + gdf_binary_stream = read_file(file_binary_stream) + gdf_bytesio = read_file(file_bytesio) + assert isinstance(gdf_binary_stream, geopandas.GeoDataFrame) + assert isinstance(gdf_bytesio, geopandas.GeoDataFrame) + + +@pytest.mark.skipif( + not _FIONA18, reason="support for file-like objects in fiona.open() added in 1.8" +) +def test_read_file_raw_stream(file_path): + file_raw_stream = open(file_path, "rb", buffering=0) + gdf_raw_stream = read_file(file_raw_stream) + assert isinstance(gdf_raw_stream, geopandas.GeoDataFrame) + + +@pytest.mark.skipif( + not _FIONA18, reason="support for file-like objects in fiona.open() added in 1.8" +) +def test_read_file_pathlib(file_path): + path_object = pathlib.Path(file_path) + gdf_path_object = read_file(path_object) + assert isinstance(gdf_path_object, geopandas.GeoDataFrame) + + +@pytest.mark.skipif( + not _FIONA18, reason="support for file-like objects in fiona.open() added in 1.8" +) +def test_read_file_tempfile(): + temp = tempfile.TemporaryFile() + temp.write( + b""" + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [0, 0] + }, + "properties": { + "name": "Null Island" + } + } + """ + ) + temp.seek(0) + gdf_tempfile = geopandas.read_file(temp) + assert isinstance(gdf_tempfile, geopandas.GeoDataFrame) + temp.close() + + def test_read_file_filtered(df_nybb): full_df_shape = df_nybb.shape nybb_filename = geopandas.datasets.get_path("nybb") @@ -310,11 +420,35 @@ assert filtered_df_shape == (1, 5) -def read_file_filtered_rows_invalid(): +def test_read_file_filtered_rows_invalid(): with pytest.raises(TypeError): read_file(geopandas.datasets.get_path("nybb"), rows="not_a_slice") +@pytest.mark.skipif( + LooseVersion(fiona.__version__) < LooseVersion("1.8"), + reason="Ignore geometry only available in Fiona 1.8", +) +def test_read_file__ignore_geometry(): + pdf = geopandas.read_file( + geopandas.datasets.get_path("naturalearth_lowres"), ignore_geometry=True, + ) + assert "geometry" not in pdf.columns + assert isinstance(pdf, pd.DataFrame) and not isinstance(pdf, geopandas.GeoDataFrame) + + +@pytest.mark.skipif( + LooseVersion(fiona.__version__) < LooseVersion("1.8"), + reason="Ignore fields only available in Fiona 1.8", +) +def test_read_file__ignore_all_fields(): + gdf = geopandas.read_file( + geopandas.datasets.get_path("naturalearth_lowres"), + ignore_fields=["pop_est", "continent", "name", "iso_a3", "gdp_md_est"], + ) + assert gdf.columns.tolist() == ["geometry"] + + def test_read_file_filtered_with_gdf_boundary(df_nybb): full_df_shape = df_nybb.shape nybb_filename = geopandas.datasets.get_path("nybb") @@ -423,6 +557,11 @@ assert all(empty.columns == ["A", "Z", "geometry"]) +def test_read_file_privacy(tmpdir, df_nybb): + with pytest.warns(DeprecationWarning): + geopandas.io.file.read_file(geopandas.datasets.get_path("nybb")) + + class FileNumber(object): def __init__(self, tmpdir, base, ext): self.tmpdir = str(tmpdir) diff -Nru python-geopandas-0.7.0/geopandas/io/tests/test_pickle.py python-geopandas-0.8.1/geopandas/io/tests/test_pickle.py --- python-geopandas-0.7.0/geopandas/io/tests/test_pickle.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/tests/test_pickle.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,60 @@ +""" +See generate_legacy_storage_files.py for the creation of the legacy files. + +""" +from distutils.version import LooseVersion +import glob +import os +import pathlib + +import pandas as pd + +import pyproj + +import pytest +from geopandas.testing import assert_geodataframe_equal +from geopandas import _compat as compat + + +DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data" + + +@pytest.fixture(scope="module") +def current_pickle_data(): + # our current version pickle data + from .generate_legacy_storage_files import create_pickle_data + + return create_pickle_data() + + +files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle")) + + +@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files]) +def legacy_pickle(request): + return request.param + + +@pytest.mark.skipif( + compat.USE_PYGEOS or (str(pyproj.__version__) < LooseVersion("2.4")), + reason=( + "pygeos-based unpickling currently only works for pygeos-written files; " + "old pyproj versions can't read pickles from newer pyproj versions" + ), +) +def test_legacy_pickles(current_pickle_data, legacy_pickle): + result = pd.read_pickle(legacy_pickle) + + for name, value in result.items(): + expected = current_pickle_data[name] + assert_geodataframe_equal(value, expected) + + +def test_round_trip_current(tmpdir, current_pickle_data): + data = current_pickle_data + + for name, value in data.items(): + path = str(tmpdir / "{}.pickle".format(name)) + value.to_pickle(path) + result = pd.read_pickle(path) + assert_geodataframe_equal(result, value) diff -Nru python-geopandas-0.7.0/geopandas/io/tests/test_sql.py python-geopandas-0.8.1/geopandas/io/tests/test_sql.py --- python-geopandas-0.7.0/geopandas/io/tests/test_sql.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/io/tests/test_sql.py 2020-07-15 17:54:36.000000000 +0000 @@ -4,17 +4,15 @@ configuration. postGIS tests require a test database to have been setup; see geopandas.tests.util for more information. """ +import os + +import pandas as pd import geopandas -from geopandas import read_file, read_postgis +from geopandas import GeoDataFrame, read_file, read_postgis -from geopandas.tests.util import ( - connect, - connect_spatialite, - create_postgis, - create_spatialite, - validate_boro_df, -) +from geopandas.io.sql import _write_postgis as write_postgis +from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df import pytest @@ -25,126 +23,614 @@ return df +@pytest.fixture() +def connection_postgis(): + """ + Initiaties a connection to a postGIS database that must already exist. + See create_postgis for more information. + """ + psycopg2 = pytest.importorskip("psycopg2") + from psycopg2 import OperationalError + + dbname = "test_geopandas" + user = os.environ.get("PGUSER") + password = os.environ.get("PGPASSWORD") + host = os.environ.get("PGHOST") + port = os.environ.get("PGPORT") + try: + con = psycopg2.connect( + dbname=dbname, user=user, password=password, host=host, port=port + ) + except OperationalError: + pytest.skip("Cannot connect with postgresql database") + + yield con + con.close() + + +@pytest.fixture() +def engine_postgis(): + """ + Initiaties a connection engine to a postGIS database that must already exist. + """ + sqlalchemy = pytest.importorskip("sqlalchemy") + from sqlalchemy.engine.url import URL + + user = os.environ.get("PGUSER") + password = os.environ.get("PGPASSWORD") + host = os.environ.get("PGHOST") + port = os.environ.get("PGPORT") + dbname = "test_geopandas" + + try: + con = sqlalchemy.create_engine( + URL( + drivername="postgresql+psycopg2", + username=user, + database=dbname, + password=password, + host=host, + port=port, + ) + ) + con.begin() + except Exception: + pytest.skip("Cannot connect with postgresql database") + + yield con + con.dispose() + + +@pytest.fixture() +def connection_spatialite(): + """ + Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized. + + `The sqlite3 module must be built with loadable extension support + `_ and + `SpatiaLite `_ + must be available on the system as a SQLite module. + Packages available on Anaconda meet requirements. + + Exceptions + ---------- + ``AttributeError`` on missing support for loadable SQLite extensions + ``sqlite3.OperationalError`` on missing SpatiaLite + """ + sqlite3 = pytest.importorskip("sqlite3") + try: + with sqlite3.connect(":memory:") as con: + con.enable_load_extension(True) + con.load_extension("mod_spatialite") + con.execute("SELECT InitSpatialMetaData(TRUE)") + except Exception: + con.close() + pytest.skip("Cannot setup spatialite database") + + yield con + con.close() + + +def drop_table_if_exists(engine, table): + sqlalchemy = pytest.importorskip("sqlalchemy") + + if engine.has_table(table): + metadata = sqlalchemy.MetaData(engine) + metadata.reflect() + table = metadata.tables.get(table) + if table is not None: + table.drop(checkfirst=True) + + +@pytest.fixture +def df_mixed_single_and_multi(): + from shapely.geometry import Point, LineString, MultiLineString + + df = geopandas.GeoDataFrame( + { + "geometry": [ + LineString([(0, 0), (1, 1)]), + MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]), + Point(0, 1), + ] + }, + crs="epsg:4326", + ) + return df + + +@pytest.fixture +def df_geom_collection(): + from shapely.geometry import Point, LineString, Polygon, GeometryCollection + + df = geopandas.GeoDataFrame( + { + "geometry": [ + GeometryCollection( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + LineString([(0, 0), (1, 1)]), + Point(0, 0), + ] + ) + ] + }, + crs="epsg:4326", + ) + return df + + +@pytest.fixture +def df_linear_ring(): + from shapely.geometry import LinearRing + + df = geopandas.GeoDataFrame( + {"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326" + ) + return df + + +@pytest.fixture +def df_3D_geoms(): + from shapely.geometry import Point, LineString, Polygon + + df = geopandas.GeoDataFrame( + { + "geometry": [ + LineString([(0, 0, 0), (1, 1, 1)]), + Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]), + Point(0, 1, 2), + ] + }, + crs="epsg:4326", + ) + return df + + class TestIO: - def test_read_postgis_default(self, df_nybb): - con = connect("test_geopandas") - if con is None or not create_postgis(df_nybb): - raise pytest.skip() + def test_read_postgis_default(self, connection_postgis, df_nybb): + con = connection_postgis + create_postgis(con, df_nybb) - try: - sql = "SELECT * FROM nybb;" - df = read_postgis(sql, con) - finally: - con.close() + sql = "SELECT * FROM nybb;" + df = read_postgis(sql, con) validate_boro_df(df) # no crs defined on the created geodatabase, and none specified # by user; should not be set to 0, as from get_srid failure assert df.crs is None - def test_read_postgis_custom_geom_col(self, df_nybb): - con = connect("test_geopandas") + def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb): + con = connection_postgis geom_col = "the_geom" - if con is None or not create_postgis(df_nybb, geom_col=geom_col): - raise pytest.skip() + create_postgis(con, df_nybb, geom_col=geom_col) - try: - sql = "SELECT * FROM nybb;" - df = read_postgis(sql, con, geom_col=geom_col) - finally: - con.close() + sql = "SELECT * FROM nybb;" + df = read_postgis(sql, con, geom_col=geom_col) validate_boro_df(df) - def test_read_postgis_select_geom_as(self, df_nybb): + def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb): """Tests that a SELECT {geom} AS {some_other_geom} works.""" - con = connect("test_geopandas") + con = connection_postgis orig_geom = "geom" out_geom = "the_geom" - if con is None or not create_postgis(df_nybb, geom_col=orig_geom): - raise pytest.skip() + create_postgis(con, df_nybb, geom_col=orig_geom) - try: - sql = """SELECT borocode, boroname, shape_leng, shape_area, - {} as {} FROM nybb;""".format( - orig_geom, out_geom - ) - df = read_postgis(sql, con, geom_col=out_geom) - finally: - con.close() + sql = """SELECT borocode, boroname, shape_leng, shape_area, + {} as {} FROM nybb;""".format( + orig_geom, out_geom + ) + df = read_postgis(sql, con, geom_col=out_geom) validate_boro_df(df) - def test_read_postgis_get_srid(self, df_nybb): + def test_read_postgis_get_srid(self, connection_postgis, df_nybb): """Tests that an SRID can be read from a geodatabase (GH #451).""" + con = connection_postgis crs = "epsg:4269" df_reproj = df_nybb.to_crs(crs) - created = create_postgis(df_reproj, srid=4269) - con = connect("test_geopandas") - if con is None or not created: - raise pytest.skip() + create_postgis(con, df_reproj, srid=4269) - try: - sql = "SELECT * FROM nybb;" - df = read_postgis(sql, con) - finally: - con.close() + sql = "SELECT * FROM nybb;" + df = read_postgis(sql, con) validate_boro_df(df) assert df.crs == crs - def test_read_postgis_override_srid(self, df_nybb): + def test_read_postgis_override_srid(self, connection_postgis, df_nybb): """Tests that a user specified CRS overrides the geodatabase SRID.""" + con = connection_postgis orig_crs = df_nybb.crs - created = create_postgis(df_nybb, srid=4269) - con = connect("test_geopandas") - if con is None or not created: - raise pytest.skip() + create_postgis(con, df_nybb, srid=4269) - try: - sql = "SELECT * FROM nybb;" - df = read_postgis(sql, con, crs=orig_crs) - finally: - con.close() + sql = "SELECT * FROM nybb;" + df = read_postgis(sql, con, crs=orig_crs) validate_boro_df(df) assert df.crs == orig_crs - def test_read_postgis_null_geom(self, df_nybb): + def test_from_postgis_default(self, connection_postgis, df_nybb): + con = connection_postgis + create_postgis(con, df_nybb) + + sql = "SELECT * FROM nybb;" + df = GeoDataFrame.from_postgis(sql, con) + + validate_boro_df(df, case_sensitive=False) + + def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb): + con = connection_postgis + geom_col = "the_geom" + create_postgis(con, df_nybb, geom_col=geom_col) + + sql = "SELECT * FROM nybb;" + df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col) + + validate_boro_df(df, case_sensitive=False) + + def test_read_postgis_null_geom(self, connection_spatialite, df_nybb): """Tests that geometry with NULL is accepted.""" + con = connection_spatialite + geom_col = df_nybb.geometry.name + df_nybb.geometry.iat[0] = None + create_spatialite(con, df_nybb) + sql = ( + "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, " + 'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col) + ) + df = read_postgis(sql, con, geom_col=geom_col) + validate_boro_df(df) + + def test_read_postgis_binary(self, connection_spatialite, df_nybb): + """Tests that geometry read as binary is accepted.""" + con = connection_spatialite + geom_col = df_nybb.geometry.name + create_spatialite(con, df_nybb) + sql = ( + "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, " + 'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col) + ) + df = read_postgis(sql, con, geom_col=geom_col) + validate_boro_df(df) + + def test_read_postgis_chunksize(self, connection_postgis, df_nybb): + """Test chunksize argument""" + chunksize = 2 + con = connection_postgis + create_postgis(con, df_nybb) + + sql = "SELECT * FROM nybb;" + df = pd.concat(read_postgis(sql, con, chunksize=chunksize)) + + validate_boro_df(df) + # no crs defined on the created geodatabase, and none specified + # by user; should not be set to 0, as from get_srid failure + assert df.crs is None + + def test_read_postgis_privacy(self, connection_postgis, df_nybb): + con = connection_postgis + create_postgis(con, df_nybb) + + sql = "SELECT * FROM nybb;" + with pytest.warns(DeprecationWarning): + geopandas.io.sql.read_postgis(sql, con) + + def test_write_postgis_default(self, engine_postgis, df_nybb): + """Tests that GeoDataFrame can be written to PostGIS with defaults.""" + engine = engine_postgis + table = "nybb" + + # If table exists, delete it before trying to write with defaults + drop_table_if_exists(engine, table) + + # Write to db + write_postgis(df_nybb, con=engine, name=table, if_exists="fail") + # Validate + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + validate_boro_df(df) + + def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb): + """ + Tests that uploading the same table raises error when: if_replace='fail'. + """ + engine = engine_postgis + + table = "nybb" + + # Ensure table exists + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + try: - con = connect_spatialite() - except Exception: - raise pytest.skip() - else: - geom_col = df_nybb.geometry.name - df_nybb.geometry.iat[0] = None - create_spatialite(con, df_nybb) - sql = ( - "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, " - 'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col) + write_postgis(df_nybb, con=engine, name=table, if_exists="fail") + except ValueError as e: + if "already exists" in str(e): + pass + else: + raise e + + def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb): + """ + Tests that replacing a table is possible when: if_replace='replace'. + """ + engine = engine_postgis + + table = "nybb" + + # Ensure table exists + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + # Overwrite + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + # Validate + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + validate_boro_df(df) + + def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb): + """ + Tests that appending to existing table produces correct results when: + if_replace='append'. + """ + engine = engine_postgis + + table = "nybb" + + orig_rows, orig_cols = df_nybb.shape + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + write_postgis(df_nybb, con=engine, name=table, if_exists="append") + # Validate + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + new_rows, new_cols = df.shape + + # There should be twice as many rows in the new table + assert new_rows == orig_rows * 2, ( + "There should be {target} rows," + "found: {current}".format(target=orig_rows * 2, current=new_rows), + ) + # Number of columns should stay the same + assert new_cols == orig_cols, ( + "There should be {target} columns," + "found: {current}".format(target=orig_cols, current=new_cols), + ) + + def test_write_postgis_without_crs(self, engine_postgis, df_nybb): + """ + Tests that GeoDataFrame can be written to PostGIS without CRS information. + """ + engine = engine_postgis + + table = "nybb" + + # Write to db + df_nybb = df_nybb + df_nybb.crs = None + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + # Validate that srid is -1 + target_srid = engine.execute( + "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format( + schema="public", table=table, geom_col="geometry" ) - df = read_postgis(sql, con, geom_col=geom_col) - validate_boro_df(df) - finally: - if "con" in locals(): - con.close() + ).fetchone()[0] + assert target_srid == 0, "SRID should be 0, found %s" % target_srid + + def test_write_postgis_geometry_collection( + self, engine_postgis, df_geom_collection + ): + """ + Tests that writing a mix of different geometry types is possible. + """ + engine = engine_postgis + + table = "geomtype_tests" + + write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace") + + # Validate geometry type + sql = "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format( + table=table + ) + geom_type = engine.execute(sql).fetchone()[0] + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + + assert geom_type.upper() == "GEOMETRYCOLLECTION" + assert df.geom_type.unique()[0] == "GeometryCollection" + + def test_write_postgis_mixed_geometry_types( + self, engine_postgis, df_mixed_single_and_multi + ): + """ + Tests that writing a mix of single and MultiGeometries is possible. + """ + engine = engine_postgis + + table = "geomtype_tests" + + write_postgis( + df_mixed_single_and_multi, con=engine, name=table, if_exists="replace" + ) + + # Validate geometry type + sql = "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format( + table=table + ) + res = engine.execute(sql).fetchall() + assert res[0][0].upper() == "LINESTRING" + assert res[1][0].upper() == "MULTILINESTRING" + assert res[2][0].upper() == "POINT" + + def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring): + """ + Tests that writing a LinearRing. + """ + engine = engine_postgis + + table = "geomtype_tests" + + write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace") + + # Validate geometry type + sql = "SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format( + table=table + ) + geom_type = engine.execute(sql).fetchone()[0] + + assert geom_type.upper() == "LINESTRING" + + def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi): + """ + Tests writing a LinearRing works. + """ + engine = engine_postgis + + table = "geomtype_tests" + + write_postgis( + df_mixed_single_and_multi, + con=engine, + name=table, + if_exists="replace", + chunksize=1, + ) + # Validate row count + sql = "SELECT COUNT(geometry) FROM {table};".format(table=table) + row_cnt = engine.execute(sql).fetchone()[0] + assert row_cnt == 3 + + # Validate geometry type + sql = "SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format( + table=table + ) + res = engine.execute(sql).fetchall() + assert res[0][0].upper() == "LINESTRING" + assert res[1][0].upper() == "MULTILINESTRING" + assert res[2][0].upper() == "POINT" + + def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb): + """ + Tests writing data to alternative schema. + """ + engine = engine_postgis + + table = "nybb" + schema_to_use = "test" + sql = "CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use) + engine.execute(sql) + + write_postgis( + df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use + ) + # Validate + sql = "SELECT * FROM {schema}.{table};".format( + schema=schema_to_use, table=table + ) + + df = read_postgis(sql, engine, geom_col="geometry") + validate_boro_df(df) + + def test_write_postgis_to_different_schema_when_table_exists( + self, engine_postgis, df_nybb + ): + """ + Tests writing data to alternative schema. + """ + engine = engine_postgis + + table = "nybb" + schema_to_use = "test" + sql = "CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use) + engine.execute(sql) - def test_read_postgis_binary(self, df_nybb): - """Tests that geometry read as binary is accepted.""" try: - con = connect_spatialite() - except Exception: - raise pytest.skip() - else: - geom_col = df_nybb.geometry.name - create_spatialite(con, df_nybb) - sql = ( - "SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, " - 'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col) + write_postgis( + df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use ) - df = read_postgis(sql, con, geom_col=geom_col) + # Validate + sql = "SELECT * FROM {schema}.{table};".format( + schema=schema_to_use, table=table + ) + + df = read_postgis(sql, engine, geom_col="geometry") validate_boro_df(df) - finally: - if "con" in locals(): - con.close() + + # Should raise a ValueError when table exists + except ValueError: + pass + + # Try with replace flag on + write_postgis( + df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use + ) + # Validate + sql = "SELECT * FROM {schema}.{table};".format( + schema=schema_to_use, table=table + ) + + df = read_postgis(sql, engine, geom_col="geometry") + validate_boro_df(df) + + def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms): + """ + Tests writing a geometries with 3 dimensions works. + """ + engine = engine_postgis + + table = "geomtype_tests" + + write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace") + + # Check that all geometries have 3 dimensions + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + assert list(df.geometry.has_z) == [True, True, True] + + def test_row_order(self, engine_postgis, df_nybb): + """ + Tests that the row order in db table follows the order of the original frame. + """ + engine = engine_postgis + + table = "row_order_test" + correct_order = df_nybb["BoroCode"].tolist() + + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + + # Check that the row order matches + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + assert df["BoroCode"].tolist() == correct_order + + def test_append_before_table_exists(self, engine_postgis, df_nybb): + """ + Tests that insert works with if_exists='append' when table does not exist yet. + """ + engine = engine_postgis + + table = "nybb" + # If table exists, delete it before trying to write with defaults + drop_table_if_exists(engine, table) + + write_postgis(df_nybb, con=engine, name=table, if_exists="append") + + # Check that the row order matches + sql = "SELECT * FROM {table};".format(table=table) + df = read_postgis(sql, engine, geom_col="geometry") + validate_boro_df(df) + + def test_append_with_different_crs(self, engine_postgis, df_nybb): + """ + Tests that the warning is raised if table CRS differs from frame. + """ + engine = engine_postgis + + table = "nybb" + write_postgis(df_nybb, con=engine, name=table, if_exists="replace") + + # Reproject + df_nybb2 = df_nybb.to_crs(epsg=4326) + + # Should raise error when appending + with pytest.raises(ValueError, match="CRS of the target table"): + write_postgis(df_nybb2, con=engine, name=table, if_exists="append") diff -Nru python-geopandas-0.7.0/geopandas/plotting.py python-geopandas-0.8.1/geopandas/plotting.py --- python-geopandas-0.7.0/geopandas/plotting.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/plotting.py 2020-07-15 17:54:36.000000000 +0000 @@ -5,6 +5,23 @@ import geopandas +from distutils.version import LooseVersion + + +def deprecated(new): + """Helper to provide deprecation warning.""" + + def old(*args, **kwargs): + warnings.warn( + "{} is intended for internal ".format(new.__name__[1:]) + + "use only, and will be deprecated.", + DeprecationWarning, + stacklevel=2, + ) + new(*args, **kwargs) + + return old + def _flatten_multi_geoms(geoms, prefix="Multi"): """ @@ -17,7 +34,6 @@ Returns ------- - components : list of geometry component_index : index array @@ -40,7 +56,37 @@ return components, np.array(component_index) -def plot_polygon_collection( +def _expand_kwargs(kwargs, multiindex): + """ + Most arguments to the plot functions must be a (single) value, or a sequence + of values. This function checks each key-value pair in 'kwargs' and expands + it (in place) to the correct length/formats with help of 'multiindex', unless + the value appears to already be a valid (single) value for the key. + """ + from matplotlib.colors import is_color_like + from typing import Iterable + + for att, value in kwargs.items(): + if "color" in att: # color(s), edgecolor(s), facecolor(s) + if is_color_like(value): + continue + elif "linestyle" in att: # linestyle(s) + # A single linestyle can be 2-tuple of a number and an iterable. + if ( + isinstance(value, tuple) + and len(value) == 2 + and isinstance(value[1], Iterable) + ): + continue + elif att in ["marker", "alpha"]: + # For these attributes, only a single value is allowed, so never expand. + continue + + if pd.api.types.is_list_like(value): + kwargs[att] = np.take(value, multiindex, axis=0) + + +def _plot_polygon_collection( ax, geoms, values=None, color=None, cmap=None, vmin=None, vmax=None, **kwargs ): """ @@ -48,32 +94,25 @@ Parameters ---------- - ax : matplotlib.axes.Axes where shapes will be plotted - geoms : a sequence of `N` Polygons and/or MultiPolygons (can be mixed) values : a sequence of `N` values, optional Values will be mapped to colors using vmin/vmax/cmap. They should have 1:1 correspondence with the geometries (not their components). Otherwise follows `color` / `facecolor` kwargs. - edgecolor : single color or sequence of `N` colors Color for the edge of the polygons - facecolor : single color or sequence of `N` colors Color to fill the polygons. Cannot be used together with `values`. - color : single color or sequence of `N` colors Sets both `edgecolor` and `facecolor` - **kwargs Additional keyword arguments passed to the collection Returns ------- - collection : matplotlib.collections.Collection that was plotted """ @@ -86,36 +125,23 @@ "'pip install descartes'." ) from matplotlib.collections import PatchCollection - from matplotlib.colors import is_color_like geoms, multiindex = _flatten_multi_geoms(geoms) if values is not None: values = np.take(values, multiindex, axis=0) # PatchCollection does not accept some kwargs. - if "markersize" in kwargs: - del kwargs["markersize"] + kwargs = { + att: value + for att, value in kwargs.items() + if att not in ["markersize", "marker"] + } + + # Add to kwargs for easier checking below. if color is not None: - if is_color_like(color): - kwargs["color"] = color - elif pd.api.types.is_list_like(color): - kwargs["color"] = np.take(color, multiindex, axis=0) - else: - raise TypeError( - "Color attribute has to be a single color or sequence of colors." - ) + kwargs["color"] = color - else: - for att in ["facecolor", "edgecolor"]: - if att in kwargs: - if not is_color_like(kwargs[att]): - if pd.api.types.is_list_like(kwargs[att]): - kwargs[att] = np.take(kwargs[att], multiindex, axis=0) - elif kwargs[att] is not None: - raise TypeError( - "Color attribute has to be a single color or sequence " - "of colors." - ) + _expand_kwargs(kwargs, multiindex) collection = PatchCollection([PolygonPatch(poly) for poly in geoms], **kwargs) @@ -130,7 +156,10 @@ return collection -def plot_linestring_collection( +plot_polygon_collection = deprecated(_plot_polygon_collection) + + +def _plot_linestring_collection( ax, geoms, values=None, color=None, cmap=None, vmin=None, vmax=None, **kwargs ): """ @@ -138,47 +167,38 @@ Parameters ---------- - ax : matplotlib.axes.Axes where shapes will be plotted - geoms : a sequence of `N` LineStrings and/or MultiLineStrings (can be mixed) - values : a sequence of `N` values, optional Values will be mapped to colors using vmin/vmax/cmap. They should have 1:1 correspondence with the geometries (not their components). - color : single color or sequence of `N` colors Cannot be used together with `values`. Returns ------- - collection : matplotlib.collections.Collection that was plotted - """ from matplotlib.collections import LineCollection - from matplotlib.colors import is_color_like geoms, multiindex = _flatten_multi_geoms(geoms) if values is not None: values = np.take(values, multiindex, axis=0) # LineCollection does not accept some kwargs. - if "markersize" in kwargs: - del kwargs["markersize"] + kwargs = { + att: value + for att, value in kwargs.items() + if att not in ["markersize", "marker"] + } - # color=None gives black instead of default color cycle + # Add to kwargs for easier checking below. if color is not None: - if is_color_like(color): - kwargs["color"] = color - elif pd.api.types.is_list_like(color): - kwargs["color"] = np.take(color, multiindex, axis=0) - else: - raise TypeError( - "Color attribute has to be a single color or sequence of colors." - ) + kwargs["color"] = color + + _expand_kwargs(kwargs, multiindex) segments = [np.array(linestring)[:, :2] for linestring in geoms] collection = LineCollection(segments, **kwargs) @@ -194,7 +214,10 @@ return collection -def plot_point_collection( +plot_linestring_collection = deprecated(_plot_linestring_collection) + + +def _plot_point_collection( ax, geoms, values=None, @@ -227,8 +250,6 @@ ------- collection : matplotlib.collections.Collection that was plotted """ - from matplotlib.colors import is_color_like - if values is not None and color is not None: raise ValueError("Can only specify one of 'values' and 'color' kwargs") @@ -245,26 +266,27 @@ if markersize is not None: kwargs["s"] = markersize + # Add to kwargs for easier checking below. if color is not None: - if not is_color_like(color): - if pd.api.types.is_list_like(color): - color = np.take(color, multiindex, axis=0) - else: - raise TypeError( - "Color attribute has to be a single color or sequence of colors." - ) + kwargs["color"] = color + if marker is not None: + kwargs["marker"] = marker + _expand_kwargs(kwargs, multiindex) if "norm" not in kwargs: - collection = ax.scatter( - x, y, color=color, vmin=vmin, vmax=vmax, cmap=cmap, marker=marker, **kwargs - ) + collection = ax.scatter(x, y, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) else: - collection = ax.scatter(x, y, color=color, cmap=cmap, marker=marker, **kwargs) + collection = ax.scatter(x, y, cmap=cmap, **kwargs) return collection -def plot_series(s, cmap=None, color=None, ax=None, figsize=None, **style_kwds): +plot_point_collection = deprecated(_plot_point_collection) + + +def plot_series( + s, cmap=None, color=None, ax=None, figsize=None, aspect="auto", **style_kwds +): """ Plot a GeoSeries. @@ -291,6 +313,14 @@ figsize : pair of floats (default None) Size of the resulting matplotlib.figure.Figure. If the argument ax is given explicitly, figsize is ignored. + aspect : 'auto', 'equal' or float (default 'auto') + Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if + however data are not projected (coordinates are long/lat), the aspect is by + default set to 1/cos(s_y * pi/180) with s_y the y coordinate of the middle of + the GeoSeries (the mean of the y range of bounding box) so that a long/lat + square appears square in the middle of the plot. This implies an + Equirectangular projection. It can also be set manually (float) as the ratio + of y-unit to x-unit. **style_kwds : dict Color options to be passed on to the actual plot function, such as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``, @@ -326,7 +356,18 @@ if ax is None: fig, ax = plt.subplots(figsize=figsize) - ax.set_aspect("equal") + + if aspect == "auto": + if s.crs and s.crs.is_geographic: + bounds = s.total_bounds + y_coord = np.mean([bounds[1], bounds[3]]) + ax.set_aspect(1 / np.cos(y_coord * np.pi / 180)) + # formula ported from R package sp + # https://github.com/edzer/sp/blob/master/R/mapasp.R + else: + ax.set_aspect("equal") + else: + ax.set_aspect(aspect) if s.empty: warnings.warn( @@ -369,7 +410,7 @@ facecolor = color values_ = values[poly_idx] if cmap else None - plot_polygon_collection( + _plot_polygon_collection( ax, polys, values_, facecolor=facecolor, cmap=cmap, **style_kwds ) @@ -377,7 +418,7 @@ lines = expl_series[line_idx] if not lines.empty: values_ = values[line_idx] if cmap else None - plot_linestring_collection( + _plot_linestring_collection( ax, lines, values_, color=color, cmap=cmap, **style_kwds ) @@ -385,7 +426,9 @@ points = expl_series[point_idx] if not points.empty: values_ = values[point_idx] if cmap else None - plot_point_collection(ax, points, values_, color=color, cmap=cmap, **style_kwds) + _plot_point_collection( + ax, points, values_, color=color, cmap=cmap, **style_kwds + ) plt.draw() return ax @@ -407,8 +450,10 @@ markersize=None, figsize=None, legend_kwds=None, + categories=None, classification_kwds=None, missing_kwds=None, + aspect="auto", **style_kwds ): """ @@ -472,6 +517,17 @@ legend_kwds : dict (default None) Keyword arguments to pass to matplotlib.pyplot.legend() or matplotlib.pyplot.colorbar(). + Additional accepted keywords when `scheme` is specified: + + fmt : string + A formatting specification for the bin edges of the classes in the + legend. For example, to have no decimals: ``{"fmt": "{:.0f}"}``. + labels : list-like + A list of legend labels to override the auto-generated labels. + Needs to have the same number of elements as the number of + classes (`k`). + categories : list-like + Ordered list-like object of categories to be used for categorical plot. classification_kwds : dict (default None) Keyword arguments to pass to mapclassify missing_kwds : dict (default None) @@ -479,9 +535,17 @@ to be passed on to geometries with missing values in addition to or overwriting other style kwds. If None, geometries with missing values are not plotted. + aspect : 'auto', 'equal' or float (default 'auto') + Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if + however data are not projected (coordinates are long/lat), the aspect is by + default set to 1/cos(df_y * pi/180) with df_y the y coordinate of the middle of + the GeoDataFrame (the mean of the y range of bounding box) so that a long/lat + square appears square in the middle of the plot. This implies an + Equirectangular projection. It can also be set manually (float) as the ratio + of y-unit to x-unit. **style_kwds : dict - Color options to be passed on to the actual plot function, such + Style options to be passed on to the actual plot function, such as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``, ``alpha``. @@ -523,7 +587,18 @@ if cax is not None: raise ValueError("'ax' can not be None if 'cax' is not.") fig, ax = plt.subplots(figsize=figsize) - ax.set_aspect("equal") + + if aspect == "auto": + if df.crs and df.crs.is_geographic: + bounds = df.total_bounds + y_coord = np.mean([bounds[1], bounds[3]]) + ax.set_aspect(1 / np.cos(y_coord * np.pi / 180)) + # formula ported from R package sp + # https://github.com/edzer/sp/blob/master/R/mapasp.R + else: + ax.set_aspect("equal") + else: + ax.set_aspect(aspect) if df.empty: warnings.warn( @@ -544,6 +619,7 @@ ax=ax, figsize=figsize, markersize=markersize, + aspect=aspect, **style_kwds ) @@ -554,23 +630,40 @@ "The dataframe and given column have different number of rows." ) else: - values = np.asarray(column) + values = column else: - values = np.asarray(df[column]) + values = df[column] - if values.dtype is np.dtype("O"): + if pd.api.types.is_categorical_dtype(values.dtype): + if categories is not None: + raise ValueError( + "Cannot specify 'categories' when column has categorical dtype" + ) + categorical = True + elif values.dtype is np.dtype("O") or categories: categorical = True - nan_idx = pd.isna(values) + nan_idx = np.asarray(pd.isna(values), dtype="bool") # Define `values` as a Series if categorical: if cmap is None: cmap = "tab10" - categories = list(set(values[~nan_idx])) - categories.sort() - valuemap = dict((k, v) for (v, k) in enumerate(categories)) - values = np.array([valuemap[k] for k in values[~nan_idx]]) + + cat = pd.Categorical(values, categories=categories) + categories = list(cat.categories) + + # values missing in the Categorical but not in original values + missing = list(np.unique(values[~nan_idx & cat.isna()])) + if missing: + raise ValueError( + "Column contains values not listed in categories. " + "Missing categories: {}.".format(missing) + ) + + values = cat.codes[~nan_idx] + vmin = 0 if vmin is None else vmin + vmax = len(categories) - 1 if vmax is None else vmax if scheme is not None: if classification_kwds is None: @@ -581,11 +674,21 @@ binning = _mapclassify_choro(values[~nan_idx], scheme, **classification_kwds) # set categorical to True for creating the legend categorical = True - binedges = [values[~nan_idx].min()] + binning.bins.tolist() - categories = [ - "{0:.2f} - {1:.2f}".format(binedges[i], binedges[i + 1]) - for i in range(len(binedges) - 1) - ] + if legend_kwds is not None and "labels" in legend_kwds: + if len(legend_kwds["labels"]) != binning.k: + raise ValueError( + "Number of labels must match number of bins, " + "received {} labels for {} bins".format( + len(legend_kwds["labels"]), binning.k + ) + ) + else: + categories = list(legend_kwds.pop("labels")) + else: + fmt = "{:.2f}" + if legend_kwds is not None and "fmt" in legend_kwds: + fmt = legend_kwds.pop("fmt") + categories = binning.get_legend_classes(fmt) values = np.array(binning.yb) # fill values with placeholder where were NaNs originally to map them properly @@ -616,7 +719,7 @@ polys = expl_series[poly_idx & np.invert(nan_idx)] subset = values[poly_idx & np.invert(nan_idx)] if not polys.empty: - plot_polygon_collection( + _plot_polygon_collection( ax, polys, subset, vmin=mn, vmax=mx, cmap=cmap, **style_kwds ) @@ -624,7 +727,7 @@ lines = expl_series[line_idx & np.invert(nan_idx)] subset = values[line_idx & np.invert(nan_idx)] if not lines.empty: - plot_linestring_collection( + _plot_linestring_collection( ax, lines, subset, vmin=mn, vmax=mx, cmap=cmap, **style_kwds ) @@ -635,7 +738,7 @@ if isinstance(markersize, np.ndarray): markersize = np.take(markersize, multiindex, axis=0) markersize = markersize[point_idx & np.invert(nan_idx)] - plot_point_collection( + _plot_point_collection( ax, points, subset, @@ -660,6 +763,8 @@ if legend_kwds is None: legend_kwds = {} + if "fmt" in legend_kwds: + legend_kwds.pop("fmt") from matplotlib.lines import Line2D from matplotlib.colors import Normalize @@ -746,19 +851,21 @@ binning Binning objects that holds the Series with values replaced with class identifier and the bins. - """ try: import mapclassify.classifiers as classifiers + except ImportError: - try: - import pysal.viz.mapclassify.classifiers as classifiers - except ImportError: - raise ImportError( - "The 'mapclassify' or 'pysal' package is required to use the" - " 'scheme' keyword" - ) + raise ImportError( + "The 'mapclassify' >= 2.2.0 package is required to use the 'scheme' keyword" + ) + from mapclassify import __version__ as mc_version + if mc_version < LooseVersion("2.2.0"): + raise ImportError( + "The 'mapclassify' >= 2.2.0 package is required to " + "use the 'scheme' keyword" + ) schemes = {} for classifier in classifiers.CLASSIFIERS: schemes[classifier.lower()] = getattr(classifiers, classifier) @@ -803,15 +910,13 @@ ) if classification_kwds["k"] is not None: - try: - from inspect import getfullargspec as getspec - except ImportError: - from inspect import getargspec as getspec + from inspect import getfullargspec as getspec + spec = getspec(scheme_class.__init__) if "k" not in spec.args: del classification_kwds["k"] try: - binning = scheme_class(values, **classification_kwds) + binning = scheme_class(np.asarray(values), **classification_kwds) except TypeError: raise TypeError("Invalid keyword argument for %r " % scheme) return binning diff -Nru python-geopandas-0.7.0/geopandas/sindex.py python-geopandas-0.8.1/geopandas/sindex.py --- python-geopandas-0.7.0/geopandas/sindex.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/sindex.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,25 +1,481 @@ -from geopandas import base +from collections import namedtuple +from warnings import warn -if base.HAS_SINDEX: - from rtree.index import Index as RTreeIndex +from shapely.geometry.base import BaseGeometry +import pandas as pd +import numpy as np +from . import _compat as compat -class SpatialIndex(RTreeIndex): + +VALID_QUERY_PREDICATES = { + None, + "intersects", + "within", + "contains", + "overlaps", + "crosses", + "touches", +} + + +def has_sindex(): + """Dynamically checks for ability to generate spatial index. """ - A simple wrapper around rtree's RTree Index + try: + get_sindex_class() + return True + except ImportError: + return False + + +def get_sindex_class(): + """Dynamically chooses a spatial indexing backend. + + Required to comply with _compat.USE_PYGEOS. + The selection order goes PyGEOS > RTree > Error. """ + if compat.USE_PYGEOS: + return PyGEOSSTRTreeIndex + if compat.HAS_RTREE: + return RTreeIndex + raise ImportError( + "Spatial indexes require either `rtree` or `pygeos`. " + "See installation instructions at https://geopandas.org/install.html" + ) + + +if compat.HAS_RTREE: + + import rtree.index # noqa + from rtree.core import RTreeError # noqa + from shapely.prepared import prep # noqa + + class SpatialIndex(rtree.index.Index): + """Original rtree wrapper, kept for backwards compatibility. + """ + + def __init__(self, *args): + super().__init__(self, *args) + + @property + def size(self): + return len(self.leaves()[0][1]) + + @property + def is_empty(self): + if len(self.leaves()) > 1: + return False + return self.size < 1 + + class RTreeIndex(rtree.index.Index): + """A simple wrapper around rtree's RTree Index + + Parameters + ---------- + geometry : GeoSeries + GeoSeries from which to build the spatial index. + """ + + # set of valid predicates for this spatial index + # by default, the global set + valid_query_predicates = VALID_QUERY_PREDICATES + + def __init__(self, geometry): + stream = ( + (i, item.bounds, idx) + for i, (idx, item) in enumerate(geometry.iteritems()) + if pd.notnull(item) and not item.is_empty + ) + try: + super().__init__(stream) + except RTreeError: + # What we really want here is an empty generator error, or + # for the bulk loader to log that the generator was empty + # and move on. + # See https://github.com/Toblerity/rtree/issues/20. + super().__init__() + + # store reference to geometries for predicate queries + self.geometries = geometry.geometry.values + # create a prepared geometry cache + self._prepared_geometries = np.array( + [None] * self.geometries.size, dtype=object + ) + + def query(self, geometry, predicate=None, sort=False): + """Compatibility layer for pygeos.query. + + This is not a vectorized function, if speed is important, + please use PyGEOS. + + Parameters + ---------- + geometry : shapely geometry + A single shapely geometry to query against the spatial index. + predicate : {None, 'intersects', 'within', 'contains', \ +'overlaps', 'crosses', 'touches'}, optional + If predicate is provided, the input geometry is + tested using the predicate function against each item + in the tree whose extent intersects the envelope of the + input geometry: predicate(input_geometry, tree_geometry). + If possible, prepared geometries are used to help + speed up the predicate operation. + sort : bool, default False + If True, the results will be sorted in ascending order. + If False, results are often sorted but there is no guarantee. + + Returns + ------- + matches : ndarray of shape (n_results, ) + Integer indices for matching geometries from the spatial index. + """ + + # handle invalid predicates + if predicate not in self.valid_query_predicates: + raise ValueError( + "Got `predicate` = `{}`, `predicate` must be one of {}".format( + predicate, self.valid_query_predicates + ) + ) + + # handle empty / invalid geometries + if geometry is None: + # return an empty integer array, similar to pygeys.STRtree.query. + return np.array([], dtype=np.intp) + + if not isinstance(geometry, BaseGeometry): + raise TypeError( + "Got `geometry` of type `{}`, `geometry` must be ".format( + type(geometry) + ) + + "a shapely geometry." + ) + + if geometry.is_empty: + return np.array([], dtype=np.intp) + + # query tree + bounds = geometry.bounds # rtree operates on bounds + tree_idx = list(self.intersection(bounds, objects=False)) + + if not tree_idx: + return np.array([], dtype=np.intp) + + # Check predicate + # This is checked as input_geometry.predicate(tree_geometry) + # When possible, we use prepared geometries. + # Prepared geometries only support "intersects" and "contains" + # For the special case of "within", we are able to flip the + # comparison and check if tree_geometry.contains(input_geometry) + # to still take advantage of prepared geometries. + if predicate == "within": + # To use prepared geometries for within, + # we compare tree_geom.contains(input_geom) + # Since we are preparing the tree geometries, + # we cache them for multiple comparisons. + res = [] + for index_in_tree in tree_idx: + if self._prepared_geometries[index_in_tree] is None: + # if not already prepared, prepare and cache + self._prepared_geometries[index_in_tree] = prep( + self.geometries[index_in_tree] + ) + if self._prepared_geometries[index_in_tree].contains(geometry): + res.append(index_in_tree) + tree_idx = res + elif predicate is not None: + # For the remaining predicates, + # we compare input_geom.predicate(tree_geom) + if predicate in ("contains", "intersects"): + # prepare this input geometry + geometry = prep(geometry) + tree_idx = [ + index_in_tree + for index_in_tree in tree_idx + if getattr(geometry, predicate)(self.geometries[index_in_tree]) + ] + + # sort if requested + if sort: + # sorted + return np.sort(np.array(tree_idx, dtype=np.intp)) + + # unsorted + return np.array(tree_idx, dtype=np.intp) + + def query_bulk(self, geometry, predicate=None, sort=False): + """Compatibility layer for pygeos.query_bulk. + + Iterates over `geometry` and queries index. + This operation is not vectorized and may be slow. + Use PyGEOS with `query_bulk` for speed. + + Parameters + ---------- + geometry : {GeoSeries, GeometryArray, numpy.array of PyGEOS geometries} + Accepts GeoPandas geometry iterables (GeoSeries, GeometryArray) + or a numpy array of PyGEOS geometries. + predicate : {None, 'intersects', 'within', 'contains', 'overlaps', \ +'crosses', 'touches'}, optional + If predicate is provided, the input geometries are tested using + the predicate function against each item in the tree whose extent + intersects the envelope of the each input geometry: + predicate(input_geometry, tree_geometry). If possible, prepared + geometries are used to help speed up the predicate operation. + sort : bool, default False + If True, results sorted lexicographically using + geometry's indexes as the primary key and the sindex's indexes as the + secondary key. If False, no additional sorting is applied. + + Returns + ------- + ndarray with shape (2, n) + The first subarray contains input geometry integer indexes. + The second subarray contains tree geometry integer indexes. + """ + # Iterates over geometry, applying func. + tree_index = [] + input_geometry_index = [] + + for i, geo in enumerate(geometry): + res = self.query(geo, predicate=predicate, sort=sort) + tree_index.extend(res) + input_geometry_index.extend([i] * len(res)) + return np.vstack([input_geometry_index, tree_index]) + + def intersection(self, coordinates, objects=False): + """Find tree geometries that intersect the input coordinates. + + Parameters + ---------- + coordinates : sequence or array + Sequence of the form (min_x, min_y, max_x, max_y) + to query a rectangle or (x, y) to query a point. + objects : boolean, default False + If True, return the label based indexes. If False, integer indexes + are returned. + """ + if objects: + warn( + "`objects` is deprecated and will be removed in a future version. " + "Instead, use `iloc` to index your GeoSeries/GeoDataFrame using " + "integer indexes returned by `intersection`.", + FutureWarning, + ) + return super().intersection(coordinates, objects) + + @property + def size(self): + return len(self.leaves()[0][1]) + + @property + def is_empty(self): + return self.size == 0 + + def __len__(self): + return self.size + + +if compat.HAS_PYGEOS: + + from . import geoseries # noqa + from .array import GeometryArray, _shapely_to_geom # noqa + import pygeos # noqa + + class PyGEOSSTRTreeIndex(pygeos.STRtree): + """A simple wrapper around pygeos's STRTree. + + + Parameters + ---------- + geometry : GeoSeries + GeoSeries from which to build the spatial index. + """ + + # helper for loc/label based indexing in `intersection` method + with_objects = namedtuple("with_objects", "object id") + + # set of valid predicates for this spatial index + # by default, the global set + valid_query_predicates = VALID_QUERY_PREDICATES + + def __init__(self, geometry): + # for compatibility with old RTree implementation, store ids/indexes + original_indexes = geometry.index + # set empty geometries to None to avoid segfault on GEOS <= 3.6 + # see: + # https://github.com/pygeos/pygeos/issues/146 + # https://github.com/pygeos/pygeos/issues/147 + non_empty = geometry.values.data.copy() + non_empty[pygeos.is_empty(non_empty)] = None + # set empty geometries to None to mantain indexing + self.objects = self.ids = original_indexes + super().__init__(non_empty) + # store geometries, including empty geometries for user access + self.geometries = geometry.values.data.copy() + + def query(self, geometry, predicate=None, sort=False): + """Wrapper for pygeos.query. + + This also ensures a deterministic (sorted) order for the results. + + Parameters + ---------- + geometry : single PyGEOS geometry + predicate : {None, 'intersects', 'within', 'contains', \ +'overlaps', 'crosses', 'touches'}, optional + If predicate is provided, the input geometry is tested + using the predicate function against each item in the + tree whose extent intersects the envelope of the input + geometry: predicate(input_geometry, tree_geometry). + sort : bool, default False + If True, the results will be sorted in ascending order. + If False, results are often sorted but there is no guarantee. + + Returns + ------- + matches : ndarray of shape (n_results, ) + Integer indices for matching geometries from the spatial index. + + See also + -------- + See PyGEOS.strtree documentation for more information. + """ + + if predicate not in self.valid_query_predicates: + raise ValueError( + "Got `predicate` = `{}`; ".format(predicate) + + "`predicate` must be one of {}".format( + self.valid_query_predicates + ) + ) + + if isinstance(geometry, BaseGeometry): + geometry = _shapely_to_geom(geometry) + + matches = super().query(geometry=geometry, predicate=predicate) + + if sort: + return np.sort(matches) + + return matches + + def query_bulk(self, geometry, predicate=None, sort=False): + """Wrapper to expose underlaying pygeos objects to pygeos.query_bulk. + + This also allows a deterministic (sorted) order for the results. + + + Parameters + ---------- + geometry : {GeoSeries, GeometryArray, numpy.array of PyGEOS geometries} + Accepts GeoPandas geometry iterables (GeoSeries, GeometryArray) + or a numpy array of PyGEOS geometries. + predicate : {None, 'intersects', 'within', 'contains', \ +'overlaps', 'crosses', 'touches'}, optional + If predicate is provided, the input geometry is tested + using the predicate function against each item in the + index whose extent intersects the envelope of the input geometry: + predicate(input_geometry, tree_geometry). + sort : bool, default False + If True, results sorted lexicographically using + geometry's indexes as the primary key and the sindex's indexes as the + secondary key. If False, no additional sorting is applied. + + Returns + ------- + ndarray with shape (2, n) + The first subarray contains input geometry integer indexes. + The second subarray contains tree geometry integer indexes. + + See also + -------- + See PyGEOS.strtree documentation for more information. + """ + + if predicate not in self.valid_query_predicates: + raise ValueError( + "Got `predicate` = `{}`, `predicate` must be one of {}".format( + predicate, self.valid_query_predicates + ) + ) + if isinstance(geometry, geoseries.GeoSeries): + geometry = geometry.values.data + elif isinstance(geometry, GeometryArray): + geometry = geometry.data + elif not isinstance(geometry, np.ndarray): + geometry = np.asarray(geometry) + + res = super().query_bulk(geometry, predicate) + + if sort: + # sort by first array (geometry) and then second (tree) + geo_res, tree_res = res + indexing = np.lexsort((tree_res, geo_res)) + return np.vstack((geo_res[indexing], tree_res[indexing])) + + return res + + def intersection(self, coordinates, objects=False): + """Wrapper for pygeos.query that uses the RTree API. + + Parameters + ---------- + coordinates : sequence or array + Sequence of the form (min_x, min_y, max_x, max_y) + to query a rectangle or (x, y) to query a point. + objects : boolean, default False + If True, return the label based indexes. If False, integer indexes + are returned. + """ + if objects: + warn( + "`objects` is deprecated and will be removed in a future version. " + "Instead, use `iloc` to index your GeoSeries/GeoDataFrame using " + "integer indexes returned by `intersection`.", + FutureWarning, + ) + + # convert bounds to geometry + # the old API uses tuples of bound, but pygeos uses geometries + try: + iter(coordinates) + except TypeError: + # likely not an iterable + # this is a check that rtree does, we mimic it + # to ensure a useful failure message + raise TypeError( + "Invalid coordinates, must be iterable in format " + "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). " + "Got `coordinates` = {}.".format(coordinates) + ) + + # need to convert tuple of bounds to a geometry object + if len(coordinates) == 4: + indexes = super().query(pygeos.box(*coordinates)) + elif len(coordinates) == 2: + indexes = super().query(pygeos.points(*coordinates)) + else: + raise TypeError( + "Invalid coordinates, must be iterable in format " + "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). " + "Got `coordinates` = {}.".format(coordinates) + ) + + if objects: + objs = self.objects[indexes].values + ids = self.ids[indexes] + return [ + self.with_objects(id=id, object=obj) for id, obj in zip(ids, objs) + ] + else: + return indexes + + @property + def size(self): + return len(self) - def __init__(self, *args): - if not base.HAS_SINDEX: - raise ImportError("SpatialIndex needs `rtree`") - RTreeIndex.__init__(self, *args) - - @property - def size(self): - return len(self.leaves()[0][1]) - - @property - def is_empty(self): - if len(self.leaves()) > 1: - return False - return self.size < 1 + @property + def is_empty(self): + return len(self) == 0 diff -Nru python-geopandas-0.7.0/geopandas/testing.py python-geopandas-0.8.1/geopandas/testing.py --- python-geopandas-0.7.0/geopandas/testing.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/testing.py 2020-07-15 17:54:36.000000000 +0000 @@ -61,7 +61,7 @@ def assert_geoseries_equal( left, right, - check_dtype=False, + check_dtype=True, check_index_type=False, check_series_type=True, check_less_precise=False, @@ -92,16 +92,14 @@ """ assert len(left) == len(right), "%d != %d" % (len(left), len(right)) - msg = "dtype should be a GeometryDtype, got {0}" - assert isinstance(left.dtype, GeometryDtype), msg.format(left.dtype) - assert isinstance(right.dtype, GeometryDtype), msg.format(left.dtype) + if check_dtype: + msg = "dtype should be a GeometryDtype, got {0}" + assert isinstance(left.dtype, GeometryDtype), msg.format(left.dtype) + assert isinstance(right.dtype, GeometryDtype), msg.format(left.dtype) if check_index_type: assert isinstance(left.index, type(right.index)) - if check_dtype: - assert left.dtype == right.dtype, "dtype: %s != %s" % (left.dtype, right.dtype) - if check_series_type: assert isinstance(left, GeoSeries) assert isinstance(left, type(right)) @@ -124,7 +122,7 @@ if not check_crs: with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "GeoSeries crs mismatch", UserWarning) + warnings.filterwarnings("ignore", "CRS mismatch", UserWarning) if check_less_precise: assert geom_almost_equals(left, right) else: @@ -214,14 +212,16 @@ ) # geometry comparison - assert_geoseries_equal( - left.geometry, - right.geometry, - check_dtype=check_dtype, - check_less_precise=check_less_precise, - check_geom_type=check_geom_type, - check_crs=False, - ) + for col, dtype in left.dtypes.iteritems(): + if isinstance(dtype, GeometryDtype): + assert_geoseries_equal( + left[col], + right[col], + check_dtype=check_dtype, + check_less_precise=check_less_precise, + check_geom_type=check_geom_type, + check_crs=check_crs, + ) # drop geometries and check remaining columns left2 = left.drop([left._geometry_column_name], axis=1) diff -Nru python-geopandas-0.7.0/geopandas/tests/test_api.py python-geopandas-0.8.1/geopandas/tests/test_api.py --- python-geopandas-0.7.0/geopandas/tests/test_api.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_api.py 2020-07-15 17:54:36.000000000 +0000 @@ -18,6 +18,7 @@ "sqlalchemy", "psycopg2", "geopy", + "geoalchemy2", } if PANDAS_GE_10: # pandas > 0.25 stopped importing matplotlib by default diff -Nru python-geopandas-0.7.0/geopandas/tests/test_array.py python-geopandas-0.8.1/geopandas/tests/test_array.py --- python-geopandas-0.7.0/geopandas/tests/test_array.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_array.py 2020-07-15 17:54:36.000000000 +0000 @@ -20,7 +20,10 @@ points_from_xy, to_wkb, to_wkt, + _check_crs, + _crs_mismatch_warn, ) +import geopandas._compat as compat import pytest @@ -70,15 +73,19 @@ gsz = [shapely.geometry.Point(x, x, x) for x in range(10)] geometry1 = geopandas.points_from_xy(df["x"], df["y"]) geometry2 = geopandas.points_from_xy(df["x"], df["y"], df["z"]) - assert geometry1 == gs - assert geometry2 == gsz + assert isinstance(geometry1, GeometryArray) + assert isinstance(geometry2, GeometryArray) + assert list(geometry1) == gs + assert list(geometry2) == gsz # using Series or numpy arrays or lists for s in [pd.Series(range(10)), np.arange(10), list(range(10))]: geometry1 = geopandas.points_from_xy(s, s) geometry2 = geopandas.points_from_xy(s, s, s) - assert geometry1 == gs - assert geometry2 == gsz + assert isinstance(geometry1, GeometryArray) + assert isinstance(geometry2, GeometryArray) + assert list(geometry1) == gs + assert list(geometry2) == gsz # using different lengths should throw error arr_10 = np.arange(10) @@ -129,10 +136,15 @@ assert all(v.equals(t) for v, t in zip(res, points_no_missing)) # missing values - L_wkb.extend([b"", None]) + # TODO(pygeos) does not support empty strings + if compat.USE_PYGEOS: + L_wkb.extend([None]) + else: + L_wkb.extend([b"", None]) res = from_wkb(L_wkb) assert res[-1] is None - assert res[-2] is None + if not compat.USE_PYGEOS: + assert res[-2] is None # single MultiPolygon multi_poly = shapely.geometry.MultiPolygon( @@ -149,6 +161,11 @@ assert isinstance(res, np.ndarray) np.testing.assert_array_equal(res, exp) + res = to_wkb(P, hex=True) + exp = np.array([p.wkb_hex for p in points_no_missing], dtype=object) + assert isinstance(res, np.ndarray) + np.testing.assert_array_equal(res, exp) + # missing values a = from_shapely([None, points_no_missing[0]]) res = to_wkb(a) @@ -182,10 +199,15 @@ assert all(v.almost_equals(t) for v, t in zip(res, points_no_missing)) # missing values - L_wkt.extend([f(""), None]) + # TODO(pygeos) does not support empty strings + if compat.USE_PYGEOS: + L_wkt.extend([None]) + else: + L_wkt.extend([f(""), None]) res = from_wkt(L_wkt) assert res[-1] is None - assert res[-2] is None + if not compat.USE_PYGEOS: + assert res[-2] is None # single MultiPolygon multi_poly = shapely.geometry.MultiPolygon( @@ -197,7 +219,7 @@ def test_to_wkt(): P = from_shapely(points_no_missing) - res = to_wkt(P) + res = to_wkt(P, rounding_precision=-1) exp = np.array([p.wkt for p in points_no_missing], dtype=object) assert isinstance(res, np.ndarray) np.testing.assert_array_equal(res, exp) @@ -215,13 +237,13 @@ ("covers", ()), ("crosses", ()), ("disjoint", ()), - ("equals", ()), + ("geom_equals", ()), ("intersects", ()), ("overlaps", ()), ("touches", ()), ("within", ()), - ("equals_exact", (0.1,)), - ("almost_equals", (3,)), + ("geom_equals_exact", (0.1,)), + ("geom_almost_equals", (3,)), ], ) def test_predicates_vector_scalar(attr, args): @@ -236,7 +258,9 @@ assert result.dtype == bool expected = [ - getattr(tri, attr)(other, *args) if tri is not None else na_value + getattr(tri, attr if "geom" not in attr else attr[5:])(other, *args) + if tri is not None + else na_value for tri in triangles ] @@ -252,13 +276,13 @@ ("covers", ()), ("crosses", ()), ("disjoint", ()), - ("equals", ()), + ("geom_equals", ()), ("intersects", ()), ("overlaps", ()), ("touches", ()), ("within", ()), - ("equals_exact", (0.1,)), - ("almost_equals", (3,)), + ("geom_equals_exact", (0.1,)), + ("geom_almost_equals", (3,)), ], ) def test_predicates_vector_vector(attr, args): @@ -294,12 +318,27 @@ elif a.is_empty or b.is_empty: expected.append(empty_value) else: - expected.append(getattr(a, attr)(b, *args)) + expected.append( + getattr(a, attr if "geom" not in attr else attr[5:])(b, *args) + ) assert result.tolist() == expected @pytest.mark.parametrize( + "attr,args", [("equals_exact", (0.1,)), ("almost_equals", (3,))], +) +def test_equals_deprecation(attr, args): + point = points[0] + tri = triangles[0] + + for other in [point, tri, shapely.geometry.Polygon()]: + with pytest.warns(FutureWarning): + result = getattr(T, attr)(other, *args) + assert result.tolist() == getattr(T, "geom_" + attr)(other, *args).tolist() + + +@pytest.mark.parametrize( "attr", [ "boundary", @@ -314,9 +353,11 @@ na_value = None if attr == "boundary": - # boundary raises for empty geometry - with pytest.raises(Exception): - T.boundary + # pygeos returns None for empty geometries + if not compat.USE_PYGEOS: + # boundary raises for empty geometry + with pytest.raises(Exception): + T.boundary values = triangle_no_missing + [None] A = from_shapely(values) @@ -325,7 +366,17 @@ A = T result = getattr(A, attr) - expected = [getattr(t, attr) if t is not None else na_value for t in values] + if attr == "exterior" and compat.USE_PYGEOS: + # TODO(pygeos) + # empty Polygon() has an exterior with shapely > 1.7, which gives + # empty LinearRing instead of None, + # but conversion to pygeos still results in empty GeometryCollection + expected = [ + getattr(t, attr) if t is not None and not t.is_empty else na_value + for t in values + ] + else: + expected = [getattr(t, attr) if t is not None else na_value for t in values] assert equal_geometries(result, expected) @@ -395,7 +446,7 @@ ) def test_unary_predicates(attr): na_value = False - if attr == "is_simple" and geos_version < (3, 8): + if attr == "is_simple" and geos_version < (3, 8) and not compat.USE_PYGEOS: # poly.is_simple raises an error for empty polygon for GEOS < 3.8 with pytest.raises(Exception): T.is_simple @@ -407,7 +458,16 @@ result = getattr(V, attr) - if attr == "is_ring": + if attr == "is_simple" and (geos_version < (3, 8) or compat.USE_PYGEOS): + # poly.is_simple raises an error for empty polygon for GEOS < 3.8 + # with shapely, pygeos always returns False for all GEOS versions + # But even for Shapely with GEOS >= 3.8, empty GeometryCollection + # returns True instead of False + expected = [ + getattr(t, attr) if t is not None and not t.is_empty else na_value + for t in vals + ] + elif attr == "is_ring": expected = [ getattr(t.exterior, attr) if t is not None and t.exterior is not None @@ -527,6 +587,11 @@ @pytest.mark.parametrize("join_style", [JOIN_STYLE.round, JOIN_STYLE.bevel]) @pytest.mark.parametrize("resolution", [16, 25]) def test_buffer(resolution, cap_style, join_style): + if compat.USE_PYGEOS: + # TODO(pygeos) need to further investigate why this test fails + if cap_style == 1 and join_style == 3: + pytest.skip("failing TODO") + na_value = None expected = [ p.buffer(0.1, resolution=resolution, cap_style=cap_style, join_style=join_style) @@ -537,7 +602,12 @@ result = P.buffer( 0.1, resolution=resolution, cap_style=cap_style, join_style=join_style ) + assert equal_geometries(expected, result) + dist = np.array([0.1] * len(P)) + result = P.buffer( + dist, resolution=resolution, cap_style=cap_style, join_style=join_style + ) assert equal_geometries(expected, result) @@ -631,10 +701,10 @@ ) expected = np.array( [ - bounds[:, 0].min(), # minx - bounds[:, 1].min(), # miny - bounds[:, 2].max(), # maxx - bounds[:, 3].max(), # maxy + np.nanmin(bounds[:, 0]), # minx + np.nanmin(bounds[:, 1]), # miny + np.nanmax(bounds[:, 2]), # maxx + np.nanmax(bounds[:, 3]), # maxy ] ) np.testing.assert_allclose(result, expected) @@ -702,7 +772,7 @@ # assert (T.data != T2.data).all() assert T2[-1] is None assert T2[-2].is_empty - assert T[:-2].equals(T2[:-2]).all() + assert T[:-2].geom_equals(T2[:-2]).all() def test_raise_on_bad_sizes(): @@ -745,3 +815,44 @@ result = arr.astype(np.dtype("U10")) assert result.dtype == np.dtype("U10") assert result[0] == multi_poly.wkt[:10] + + +def test_check_crs(): + t1 = T.copy() + t1.crs = 4326 + assert _check_crs(t1, T) is False + assert _check_crs(t1, t1) is True + assert _check_crs(t1, T, allow_none=True) is True + + +def test_crs_mismatch_warn(): + t1 = T.copy() + t2 = T.copy() + t1.crs = 4326 + t2.crs = 3857 + + # two different CRS + with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): + _crs_mismatch_warn(t1, t2) + + # left None + with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): + _crs_mismatch_warn(T, t2) + + # right None + with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): + _crs_mismatch_warn(t1, T) + + +@pytest.mark.parametrize("NA", [None, np.nan]) +def test_isna(NA): + t1 = T.copy() + t1[0] = NA + assert t1[0] is None + + +@pytest.mark.skipif(not compat.PANDAS_GE_10, reason="pd.NA introduced in pandas 1.0") +def test_isna_pdNA(): + t1 = T.copy() + t1[0] = pd.NA + assert t1[0] is None diff -Nru python-geopandas-0.7.0/geopandas/tests/test_compat.py python-geopandas-0.8.1/geopandas/tests/test_compat.py --- python-geopandas-0.7.0/geopandas/tests/test_compat.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_compat.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,30 @@ +import pytest + +from geopandas._compat import import_optional_dependency + + +def test_import_optional_dependency_present(): + # pandas is not optional, but we know it is present + pandas = import_optional_dependency("pandas") + assert pandas is not None + + # module imported normally must be same + import pandas as pd + + assert pandas == pd + + +def test_import_optional_dependency_absent(): + with pytest.raises(ImportError, match="Missing optional dependency 'foo'"): + import_optional_dependency("foo") + + with pytest.raises(ImportError, match="foo is required"): + import_optional_dependency("foo", extra="foo is required") + + +@pytest.mark.parametrize( + "bad_import", [["foo"], 0, False, True, {}, {"foo"}, {"foo": "bar"}] +) +def test_import_optional_dependency_invalid(bad_import): + with pytest.raises(ValueError, match="Invalid module name"): + import_optional_dependency(bad_import) diff -Nru python-geopandas-0.7.0/geopandas/tests/test_config.py python-geopandas-0.8.1/geopandas/tests/test_config.py --- python-geopandas-0.7.0/geopandas/tests/test_config.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_config.py 2020-07-15 17:54:36.000000000 +0000 @@ -6,7 +6,7 @@ def test_options(): assert "display_precision: " in repr(geopandas.options) - assert dir(geopandas.options) == ["display_precision"] + assert dir(geopandas.options) == ["display_precision", "use_pygeos"] with pytest.raises(AttributeError): geopandas.options.non_existing_option diff -Nru python-geopandas-0.7.0/geopandas/tests/test_crs.py python-geopandas-0.8.1/geopandas/tests/test_crs.py --- python-geopandas-0.7.0/geopandas/tests/test_crs.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_crs.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,11 +1,16 @@ from distutils.version import LooseVersion +import os + +import random import numpy as np +import pandas as pd -from shapely.geometry import Point +from shapely.geometry import Point, Polygon, LineString import pyproj -from geopandas import GeoDataFrame, points_from_xy +from geopandas import GeoSeries, GeoDataFrame, points_from_xy, datasets, read_file +from geopandas.array import from_shapely, from_wkb, from_wkt, GeometryArray from geopandas.testing import assert_geodataframe_equal import pytest @@ -106,7 +111,12 @@ @pytest.mark.filterwarnings("ignore:'\\+init:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:'\\+init:FutureWarning") def test_transform2(epsg4326, epsg26918): + # with PROJ >= 7, the transformation using EPSG code vs proj4 string is + # slightly different due to use of grid files or not -> turn off network + # to not use grid files at all for this test + os.environ["PROJ_NETWORK"] = "OFF" df = df_epsg26918() lonlat = df.to_crs(**epsg4326) utm = lonlat.to_crs(**epsg26918) @@ -128,3 +138,476 @@ df = df_epsg26918() utm = df.to_crs(df.crs) assert_geodataframe_equal(df, utm, check_less_precise=True) + + +# Test CRS on GeometryArray level +class TestGeometryArrayCRS: + def setup_method(self): + self.osgb = pyproj.CRS(27700) + self.wgs = pyproj.CRS(4326) + + self.geoms = [Point(0, 0), Point(1, 1)] + self.polys = [ + Polygon([(random.random(), random.random()) for i in range(3)]) + for _ in range(10) + ] + self.arr = from_shapely(self.polys, crs=27700) + + def test_array(self): + arr = from_shapely(self.geoms) + arr.crs = 27700 + assert arr.crs == self.osgb + + arr = from_shapely(self.geoms, crs=27700) + assert arr.crs == self.osgb + + arr = GeometryArray(arr) + assert arr.crs == self.osgb + + arr = GeometryArray(arr, crs=4326) + assert arr.crs == self.wgs + + def test_series(self): + s = GeoSeries(crs=27700) + assert s.crs == self.osgb + assert s.values.crs == self.osgb + + arr = from_shapely(self.geoms) + s = GeoSeries(arr, crs=27700) + assert s.crs == self.osgb + assert s.values.crs == self.osgb + + # manually change CRS + s.crs = 4326 + assert s.crs == self.wgs + assert s.values.crs == self.wgs + + s = GeoSeries(self.geoms, crs=27700) + assert s.crs == self.osgb + assert s.values.crs == self.osgb + + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(arr) + assert s.crs == self.osgb + assert s.values.crs == self.osgb + + with pytest.warns(FutureWarning): + s = GeoSeries(arr, crs=4326) + assert s.crs == self.osgb + + @pytest.mark.filterwarnings("ignore:Assigning CRS") + def test_dataframe(self): + arr = from_shapely(self.geoms, crs=27700) + df = GeoDataFrame(geometry=arr) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + arr = from_shapely(self.geoms) + s = GeoSeries(arr, crs=27700) + df = GeoDataFrame(geometry=s) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + # different passed CRS than array CRS is ignored + with pytest.warns(FutureWarning): + df = GeoDataFrame(geometry=s, crs=4326) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + with pytest.warns(FutureWarning): + GeoDataFrame(geometry=s, crs=4326) + with pytest.warns(FutureWarning): + GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326) + with pytest.warns(FutureWarning): + GeoDataFrame(df, crs=4326).crs + + # manually change CRS + arr = from_shapely(self.geoms) + s = GeoSeries(arr, crs=27700) + df = GeoDataFrame(geometry=s) + df.crs = 4326 + assert df.crs == self.wgs + assert df.geometry.crs == self.wgs + assert df.geometry.values.crs == self.wgs + + df = GeoDataFrame(self.geoms, columns=["geom"], crs=27700) + assert df.crs == self.osgb + df = df.set_geometry("geom") + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + assert df.geom.crs == self.osgb + assert df.geom.values.crs == self.osgb + + df = GeoDataFrame(geometry=self.geoms, crs=27700) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + df = GeoDataFrame(crs=27700) + df = df.set_geometry(self.geoms) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + # new geometry with set CRS has priority over GDF CRS + df = GeoDataFrame(crs=27700) + df = df.set_geometry(self.geoms, crs=4326) + assert df.crs == self.wgs + assert df.geometry.crs == self.wgs + assert df.geometry.values.crs == self.wgs + + df = GeoDataFrame() + df = df.set_geometry(s) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + arr = from_shapely(self.geoms, crs=27700) + df = GeoDataFrame() + df = df.set_geometry(arr) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + arr = from_shapely(self.geoms) + df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326) + assert df.crs == self.wgs + assert df.geometry.crs == self.wgs + assert df.geometry.values.crs == self.wgs + + arr = from_shapely(self.geoms, crs=4326) + df = GeoDataFrame({"col1": [1, 2], "geometry": arr}) + assert df.crs == self.wgs + assert df.geometry.crs == self.wgs + assert df.geometry.values.crs == self.wgs + + # geometry column without geometry + df = GeoDataFrame({"geometry": [0, 1]}) + df.crs = 27700 + assert df.crs == self.osgb + + @pytest.mark.parametrize( + "scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])] + ) + def test_scalar(self, scalar): + with pytest.warns(FutureWarning): + df = GeoDataFrame() + df.crs = 4326 + df["geometry"] = scalar + assert df.crs == self.wgs + assert df.geometry.crs == self.wgs + assert df.geometry.values.crs == self.wgs + + def test_read_file(self): + nybb_filename = datasets.get_path("nybb") + df = read_file(nybb_filename) + assert df.crs == pyproj.CRS(2263) + assert df.geometry.crs == pyproj.CRS(2263) + assert df.geometry.values.crs == pyproj.CRS(2263) + + def test_multiple_geoms(self): + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(self.geoms, crs=4326) + df = GeoDataFrame(s, geometry=arr, columns=["col1"]) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + assert df.col1.crs == self.wgs + assert df.col1.values.crs == self.wgs + + def test_multiple_geoms_set_geom(self): + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(self.geoms, crs=4326) + df = GeoDataFrame(s, geometry=arr, columns=["col1"]) + df = df.set_geometry("col1") + assert df.crs == self.wgs + assert df.geometry.crs == self.wgs + assert df.geometry.values.crs == self.wgs + assert df["geometry"].crs == self.osgb + assert df["geometry"].values.crs == self.osgb + + def test_assign_cols(self): + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(self.geoms, crs=4326) + df = GeoDataFrame(s, geometry=arr, columns=["col1"]) + df["geom2"] = s + df["geom3"] = s.values + df["geom4"] = from_shapely(self.geoms) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + assert df.geom2.crs == self.wgs + assert df.geom2.values.crs == self.wgs + assert df.geom3.crs == self.wgs + assert df.geom3.values.crs == self.wgs + assert df.geom4.crs is None + assert df.geom4.values.crs is None + + def test_copy(self): + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(self.geoms, crs=4326) + df = GeoDataFrame(s, geometry=arr, columns=["col1"]) + + arr_copy = arr.copy() + assert arr_copy.crs == arr.crs + + s_copy = s.copy() + assert s_copy.crs == s.crs + assert s_copy.values.crs == s.values.crs + + df_copy = df.copy() + assert df_copy.crs == df.crs + assert df_copy.geometry.crs == df.geometry.crs + assert df_copy.geometry.values.crs == df.geometry.values.crs + assert df_copy.col1.crs == df.col1.crs + assert df_copy.col1.values.crs == df.col1.values.crs + + def test_rename(self): + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(self.geoms, crs=4326) + df = GeoDataFrame(s, geometry=arr, columns=["col1"]) + df = df.rename(columns={"geometry": "geom"}).set_geometry("geom") + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + df = df.rename_geometry("geom2") + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + df = df.rename(columns={"col1": "column1"}) + assert df.column1.crs == self.wgs + assert df.column1.values.crs == self.wgs + + def test_to_crs(self): + s = GeoSeries(self.geoms, crs=27700) + s = s.to_crs(4326) + assert s.crs == self.wgs + assert s.values.crs == self.wgs + + df = GeoDataFrame(geometry=s) + assert df.crs == self.wgs + df = df.to_crs(27700) + assert df.crs == self.osgb + assert df.geometry.crs == self.osgb + assert df.geometry.values.crs == self.osgb + + # make sure that only active geometry is transformed + arr = from_shapely(self.geoms, crs=4326) + df["col1"] = arr + df = df.to_crs(3857) + assert df.col1.crs == self.wgs + assert df.col1.values.crs == self.wgs + + def test_from_shapely(self): + arr = from_shapely(self.geoms, crs=27700) + assert arr.crs == self.osgb + + def test_from_wkb(self): + L_wkb = [p.wkb for p in self.geoms] + arr = from_wkb(L_wkb, crs=27700) + assert arr.crs == self.osgb + + def test_from_wkt(self): + L_wkt = [p.wkt for p in self.geoms] + arr = from_wkt(L_wkt, crs=27700) + assert arr.crs == self.osgb + + def test_points_from_xy(self): + df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)]) + arr = points_from_xy(df["x"], df["y"], crs=27700) + assert arr.crs == self.osgb + + # setting CRS in GeoSeries should not set it in passed array without CRS + def test_original(self): + arr = from_shapely(self.geoms) + s = GeoSeries(arr, crs=27700) + assert arr.crs is None + assert s.crs == self.osgb + + def test_ops(self): + arr = self.arr + bound = arr.boundary + assert bound.crs == self.osgb + + cent = arr.centroid + assert cent.crs == self.osgb + + hull = arr.convex_hull + assert hull.crs == self.osgb + + envelope = arr.envelope + assert envelope.crs == self.osgb + + exterior = arr.exterior + assert exterior.crs == self.osgb + + representative_point = arr.representative_point() + assert representative_point.crs == self.osgb + + def test_binary_ops(self): + arr = self.arr + quads = [] + while len(quads) < 10: + geom = Polygon([(random.random(), random.random()) for i in range(4)]) + if geom.is_valid: + quads.append(geom) + + arr2 = from_shapely(quads, crs=27700) + + difference = arr.difference(arr2) + assert difference.crs == self.osgb + + intersection = arr.intersection(arr2) + assert intersection.crs == self.osgb + + symmetric_difference = arr.symmetric_difference(arr2) + assert symmetric_difference.crs == self.osgb + + union = arr.union(arr2) + assert union.crs == self.osgb + + def test_other(self): + arr = self.arr + + buffer = arr.buffer(5) + assert buffer.crs == self.osgb + + interpolate = arr.exterior.interpolate(0.1) + assert interpolate.crs == self.osgb + + simplify = arr.simplify(5) + assert simplify.crs == self.osgb + + @pytest.mark.parametrize( + "attr, arg", + [ + ("affine_transform", ([0, 1, 1, 0, 0, 0],)), + ("translate", ()), + ("rotate", (10,)), + ("scale", ()), + ("skew", ()), + ], + ) + def test_affinity_methods(self, attr, arg): + result = getattr(self.arr, attr)(*arg) + + assert result.crs == self.osgb + + def test_slice(self): + s = GeoSeries(self.arr, crs=27700) + assert s.iloc[1:].values.crs == self.osgb + + df = GeoDataFrame({"col1": self.arr}, geometry=s) + assert df.iloc[1:].geometry.values.crs == self.osgb + assert df.iloc[1:].col1.values.crs == self.osgb + + def test_concat(self): + s = GeoSeries(self.arr, crs=27700) + assert pd.concat([s, s]).values.crs == self.osgb + + df = GeoDataFrame({"col1": from_shapely(self.geoms, crs=4326)}, geometry=s) + assert pd.concat([df, df]).geometry.values.crs == self.osgb + assert pd.concat([df, df]).col1.values.crs == self.wgs + + def test_merge(self): + arr = from_shapely(self.geoms, crs=27700) + s = GeoSeries(self.geoms, crs=4326) + df = GeoDataFrame({"col1": s}, geometry=arr) + df2 = GeoDataFrame({"col2": s}, geometry=arr).rename_geometry("geom") + merged = df.merge(df2, left_index=True, right_index=True) + assert merged.col1.values.crs == self.wgs + assert merged.geometry.values.crs == self.osgb + assert merged.col2.values.crs == self.wgs + assert merged.geom.values.crs == self.osgb + assert merged.crs == self.osgb + + # CRS should be assigned to geometry + def test_deprecation(self): + with pytest.warns(FutureWarning): + GeoDataFrame([], crs=27700) + + with pytest.warns(FutureWarning): + df = GeoDataFrame([]) + df.crs = 27700 + + # make sure that geometry column from list has CRS (__setitem__) + def test_setitem_geometry(self): + arr = from_shapely(self.geoms, crs=27700) + df = GeoDataFrame({"col1": [0, 1]}, geometry=arr) + + df["geometry"] = [g for g in df.geometry] + assert df.geometry.values.crs == self.osgb + + df2 = GeoDataFrame({"col1": [0, 1]}, geometry=arr) + df2["geometry"] = from_shapely(self.geoms, crs=4326) + assert df2.geometry.values.crs == self.wgs + + def test_astype(self): + arr = from_shapely(self.geoms, crs=27700) + df = GeoDataFrame({"col1": [0, 1]}, geometry=arr) + df2 = df.astype({"col1": str}) + assert df2.crs == self.osgb + + def test_apply(self): + s = GeoSeries(self.arr) + assert s.crs == 27700 + + # apply preserves the CRS if the result is a GeoSeries + result = s.apply(lambda x: x.centroid) + assert result.crs == 27700 + + +class TestSetCRS: + @pytest.mark.parametrize( + "constructor", + [ + lambda geoms, crs: GeoSeries(geoms, crs=crs), + lambda geoms, crs: GeoDataFrame(geometry=geoms, crs=crs), + ], + ids=["geoseries", "geodataframe"], + ) + def test_set_crs(self, constructor): + naive = constructor([Point(0, 0), Point(1, 1)], crs=None) + assert naive.crs is None + + # by default returns a copy + result = naive.set_crs(crs="EPSG:4326") + assert result.crs == "EPSG:4326" + assert naive.crs is None + + result = naive.set_crs(epsg=4326) + assert result.crs == "EPSG:4326" + assert naive.crs is None + + # with inplace=True + result = naive.set_crs(crs="EPSG:4326", inplace=True) + assert result is naive + assert result.crs == naive.crs == "EPSG:4326" + + # raise for non-naive when crs would be overridden + non_naive = constructor([Point(0, 0), Point(1, 1)], crs="EPSG:4326") + assert non_naive.crs == "EPSG:4326" + with pytest.raises(ValueError, match="already has a CRS"): + non_naive.set_crs("EPSG:3857") + + # allow for equal crs + result = non_naive.set_crs("EPSG:4326") + assert result.crs == "EPSG:4326" + + # replace with allow_override=True + result = non_naive.set_crs("EPSG:3857", allow_override=True) + assert non_naive.crs == "EPSG:4326" + assert result.crs == "EPSG:3857" + + result = non_naive.set_crs("EPSG:3857", allow_override=True, inplace=True) + assert non_naive.crs == "EPSG:3857" + assert result.crs == "EPSG:3857" + + # raise error when no crs is passed + with pytest.raises(ValueError): + naive.set_crs(crs=None, epsg=None) diff -Nru python-geopandas-0.7.0/geopandas/tests/test_extension_array.py python-geopandas-0.8.1/geopandas/tests/test_extension_array.py --- python-geopandas-0.7.0/geopandas/tests/test_extension_array.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_extension_array.py 2020-07-15 17:54:36.000000000 +0000 @@ -418,11 +418,15 @@ class TestMethods(extension_tests.BaseMethodsTests): - @no_sorting + @not_yet_implemented @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna): pass + @not_yet_implemented + def test_value_counts_with_normalize(self, data): + pass + @no_sorting def test_argsort(self, data_for_sorting): result = pd.Series(data_for_sorting).argsort() @@ -493,6 +497,18 @@ def test_argsort_missing_array(self): pass + @no_sorting + def test_argmin_argmax(self): + pass + + @no_sorting + def test_argmin_argmax_empty_array(self): + pass + + @no_sorting + def test_argmin_argmax_all_na(self): + pass + class TestCasting(extension_tests.BaseCastingTests): pass diff -Nru python-geopandas-0.7.0/geopandas/tests/test_geocode.py python-geopandas-0.8.1/geopandas/tests/test_geocode.py --- python-geopandas-0.7.0/geopandas/tests/test_geocode.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_geocode.py 2020-07-15 17:54:36.000000000 +0000 @@ -98,7 +98,14 @@ assert "address" in df row = df.loc["b"] - assert len(row["geometry"].coords) == 0 + # The shapely.geometry.Point() is actually a GeometryCollection, and thus + # gets converted to that in conversion to pygeos. When converting back + # on access, you now get a GeometryCollection object instead of Point, + # which has no coords + # see https://github.com/Toblerity/Shapely/issues/742/#issuecomment-545296708 + # TODO we should probably replace this with a missing value instead of point? + # assert len(row["geometry"].coords) == 0 + assert row["geometry"].is_empty assert np.isnan(row["address"]) diff -Nru python-geopandas-0.7.0/geopandas/tests/test_geodataframe.py python-geopandas-0.8.1/geopandas/tests/test_geodataframe.py --- python-geopandas-0.7.0/geopandas/tests/test_geodataframe.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_geodataframe.py 2020-07-15 17:54:36.000000000 +0000 @@ -12,10 +12,10 @@ import geopandas from geopandas import GeoDataFrame, GeoSeries, read_file -from geopandas.array import GeometryArray, GeometryDtype +from geopandas.array import GeometryArray, GeometryDtype, from_shapely from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal -from geopandas.tests.util import PACKAGE_DIR, connect, create_postgis, validate_boro_df +from geopandas.tests.util import PACKAGE_DIR, validate_boro_df from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal import pytest @@ -62,6 +62,7 @@ with pytest.raises(CRSError): df.set_geometry(geom2, crs="dummy_crs") + @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS") def test_geo_getitem(self): data = { "A": range(5), @@ -77,6 +78,12 @@ assert not isinstance(df["geometry"], GeoSeries) assert isinstance(df["location"], GeoSeries) + df["buff"] = df.buffer(1) + assert isinstance(df["buff"], GeoSeries) + + df["array"] = from_shapely([Point(x, y) for x, y in zip(range(5), range(5))]) + assert isinstance(df["array"], GeoSeries) + data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) @@ -159,7 +166,7 @@ assert_geoseries_equal(df["geometry"], new_geom) # new crs - gs = GeoSeries(new_geom, crs="epsg:3857") + gs = new_geom.to_crs(crs="epsg:3857") df.geometry = gs assert df.crs == "epsg:3857" @@ -208,7 +215,7 @@ df2 = self.df.set_geometry(geom) assert self.df is not df2 - assert_geoseries_equal(df2.geometry, geom) + assert_geoseries_equal(df2.geometry, geom, check_crs=False) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df["geometry"], self.df.geometry) # unknown column @@ -420,6 +427,29 @@ assert type(df2) is GeoDataFrame assert self.df.crs == df2.crs + def test_to_file_crs(self): + """ + Ensure that the file is written according to the crs + if it is specified + + """ + tempfilename = os.path.join(self.tempdir, "crs.shp") + # save correct CRS + self.df.to_file(tempfilename) + df = GeoDataFrame.from_file(tempfilename) + assert df.crs == self.df.crs + # overwrite CRS + self.df.to_file(tempfilename, crs=3857) + df = GeoDataFrame.from_file(tempfilename) + assert df.crs == "epsg:3857" + + # specify CRS for gdf without one + df2 = self.df.copy() + df2.crs = None + df2.to_file(tempfilename, crs=2263) + df = GeoDataFrame.from_file(tempfilename) + assert df.crs == "epsg:2263" + def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df["BoroName"].str.contains("B")] @@ -476,6 +506,23 @@ ) assert_frame_equal(expected, result) + def test_from_features_geom_interface_feature(self): + class Placemark(object): + def __init__(self, geom, val): + self.__geo_interface__ = { + "type": "Feature", + "properties": {"a": val}, + "geometry": geom.__geo_interface__, + } + + p1 = Point(1, 1) + f1 = Placemark(p1, 0) + p2 = Point(3, 3) + f2 = Placemark(p2, 0) + df = GeoDataFrame.from_features([f1, f2]) + assert sorted(df.columns) == ["a", "geometry"] + assert df.geometry.tolist() == [p1, p2] + def test_from_feature_collection(self): data = { "name": ["a", "b", "c"], @@ -501,33 +548,6 @@ res = GeoDataFrame.from_features(gdf) assert_frame_equal(res, expected) - def test_from_postgis_default(self): - con = connect("test_geopandas") - if con is None or not create_postgis(self.df): - raise pytest.skip() - - try: - sql = "SELECT * FROM nybb;" - df = GeoDataFrame.from_postgis(sql, con) - finally: - con.close() - - validate_boro_df(df, case_sensitive=False) - - def test_from_postgis_custom_geom_col(self): - con = connect("test_geopandas") - geom_col = "the_geom" - if con is None or not create_postgis(self.df, geom_col=geom_col): - raise pytest.skip() - - try: - sql = "SELECT * FROM nybb;" - df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col) - finally: - con.close() - - validate_boro_df(df, case_sensitive=False) - def test_dataframe_to_geodataframe(self): df = pd.DataFrame( {"A": range(len(self.df)), "location": list(self.df.geometry)}, @@ -600,6 +620,11 @@ result = list(df_only_numerical_cols.iterfeatures(na="keep"))[0] assert type(result["properties"]["Shape_Leng"]) is float + # geometry not set + df = GeoDataFrame({"values": [0, 1], "geom": [Point(0, 1), Point(1, 0)]}) + with pytest.raises(AttributeError): + list(df.iterfeatures()) + def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) assert "bbox" not in geo.keys() diff -Nru python-geopandas-0.7.0/geopandas/tests/test_geom_methods.py python-geopandas-0.8.1/geopandas/tests/test_geom_methods.py --- python-geopandas-0.7.0/geopandas/tests/test_geom_methods.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_geom_methods.py 2020-07-15 17:54:36.000000000 +0000 @@ -12,6 +12,7 @@ from geopandas.base import GeoPandasBase from geopandas.tests.util import assert_geoseries_equal, geom_almost_equals, geom_equals +from geopandas import _compat as compat from pandas.testing import assert_frame_equal, assert_series_equal import pytest @@ -29,6 +30,8 @@ self.t2 = Polygon([(0, 0), (1, 1), (0, 1)]) self.t3 = Polygon([(2, 0), (3, 0), (3, 1)]) self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) + self.t4 = Polygon([(0, 0), (3, 0), (3, 3), (0, 2)]) + self.t5 = Polygon([(2, 0), (3, 0), (3, 3), (2, 3)]) self.inner_sq = Polygon( [(0.25, 0.25), (0.75, 0.25), (0.75, 0.75), (0.25, 0.75)] ) @@ -66,6 +69,8 @@ self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)]) self.g5 = GeoSeries([self.l1, self.l2]) self.g6 = GeoSeries([self.p0, self.t3]) + self.g7 = GeoSeries([self.sq, self.t4]) + self.g8 = GeoSeries([self.t1, self.t5]) self.empty = GeoSeries([]) self.all_none = GeoSeries([None, None]) self.empty_poly = Polygon() @@ -83,6 +88,9 @@ self.gdf2 = GeoDataFrame( {"geometry": self.g1, "col3": [4, 5], "col4": ["rand", "string"]} ) + self.gdf3 = GeoDataFrame( + {"geometry": self.g3, "col3": [4, 5], "col4": ["rand", "string"]} + ) def _test_unary_real(self, op, expected, a): """ Tests for 'area', 'length', 'is_valid', etc. """ @@ -123,6 +131,7 @@ """ The operators only have GeoSeries on the left, but can have GeoSeries or GeoDataFrame on the right. + If GeoDataFrame is on the left, geometry column is used. """ if isinstance(expected, GeoPandasBase): @@ -220,9 +229,10 @@ def test_intersection(self): self._test_binary_topological("intersection", self.t1, self.g1, self.g2) - self._test_binary_topological( - "intersection", self.all_none, self.g1, self.empty - ) + with pytest.warns(UserWarning, match="The indices .+ different"): + self._test_binary_topological( + "intersection", self.all_none, self.g1, self.empty + ) def test_union_series(self): self._test_binary_topological("union", self.sq, self.g1, self.g2) @@ -275,6 +285,10 @@ expected = Series(np.array([0.5, np.nan]), index=self.na_none.index) self._test_unary_real("area", expected, self.na_none) + def test_area_crs_warn(self): + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS"): + self.g4.area + def test_bounds(self): # Set columns to get the order right expected = DataFrame( @@ -324,6 +338,10 @@ expected = Series(np.array([2 + np.sqrt(2), np.nan]), index=self.na_none.index) self._test_unary_real("length", expected, self.na_none) + def test_length_crs_warn(self): + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS"): + self.g4.length + def test_crosses(self): expected = [False, False, False, False, False, False, False] assert_array_dtype_equal(expected, self.g0.crosses(self.t1)) @@ -362,6 +380,10 @@ expected = Series(np.array([np.sqrt(4 ** 2 + 4 ** 2), np.nan]), self.g6.index) assert_array_dtype_equal(expected, self.g6.distance(self.na_none)) + def test_distance_crs_warning(self): + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS"): + self.g4.distance(self.p0) + def test_intersects(self): expected = [True, True, True, True, True, False, False] assert_array_dtype_equal(expected, self.g0.intersects(self.t1)) @@ -396,6 +418,31 @@ expected = [True, True, True, True, True, False, False] assert_array_dtype_equal(expected, self.g0.within(self.sq)) + def test_covers_itself(self): + # Each polygon in a Series covers itself + res = self.g1.covers(self.g1) + exp = Series([True, True]) + assert_series_equal(res, exp) + + def test_covers(self): + res = self.g7.covers(self.g8) + exp = Series([True, False]) + assert_series_equal(res, exp) + + def test_covers_inverse(self): + res = self.g8.covers(self.g7) + exp = Series([False, False]) + assert_series_equal(res, exp) + + @pytest.mark.skipif( + not compat.USE_PYGEOS, + reason="covered_by is only implemented for pygeos, not shapely", + ) + def test_covered_by(self): + res = self.g1.covered_by(self.g1) + exp = Series([True, True]) + assert_series_equal(res, exp) + def test_is_valid(self): expected = Series(np.array([True] * len(self.g1)), self.g1.index) self._test_unary_real("is_valid", expected, self.g1) @@ -438,6 +485,10 @@ points = GeoSeries([point for i in range(3)]) assert_geoseries_equal(polygons.centroid, points) + def test_centroid_crs_warn(self): + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS"): + self.g4.centroid + def test_convex_hull(self): # the convex hull of a square should be the same as the square squares = GeoSeries([self.sq for i in range(3)]) @@ -488,6 +539,12 @@ with pytest.raises(ValueError): self.g5.interpolate(distances) + def test_interpolate_crs_warning(self): + g5_crs = self.g5.copy() + g5_crs.crs = 4326 + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS"): + g5_crs.interpolate(1) + def test_project(self): expected = Series([2.0, 1.5], index=self.g5.index) p = Point(1.0, 0.5) @@ -606,6 +663,16 @@ result = s.buffer(np.array([0, 0, 0])) assert_geoseries_equal(result, s) + def test_buffer_crs_warn(self): + with pytest.warns(UserWarning, match="Geometry is in a geographic CRS"): + self.g4.buffer(1) + + with pytest.warns(None) as record: + # do not warn for 0 + self.g4.buffer(0) + + assert len(record) == 0 + def test_envelope(self): e = self.g3.envelope assert np.all(e.geom_equals(self.sq)) @@ -653,27 +720,64 @@ expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df) + @pytest.mark.parametrize("index_name", [None, "test"]) + def test_explode_geodataframe_level_1(self, index_name): + # GH1393 + s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) + df = GeoDataFrame({"level_1": [1, 2], "geometry": s}) + df.index.name = index_name + + test_df = df.explode() + + expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) + expected_df = GeoDataFrame({"level_1": [1, 1, 2], "geometry": expected_s}) + expected_index = MultiIndex( + [[0, 1], [0, 1]], # levels + [[0, 0, 1], [0, 1, 0]], # labels/codes + names=[index_name, None], + ) + expected_df = expected_df.set_index(expected_index) + if not compat.PANDAS_GE_024: + expected_df = expected_df[["level_1", "geometry"]] + assert_frame_equal(test_df, expected_df) + # # Test '&', '|', '^', and '-' - # The left can only be a GeoSeries. The right hand side can be a - # GeoSeries, GeoDataFrame or Shapely geometry # def test_intersection_operator(self): - self._test_binary_operator("__and__", self.t1, self.g1, self.g2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__and__", self.t1, self.g1, self.g2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__and__", self.t1, self.gdf1, self.g2) def test_union_operator(self): - self._test_binary_operator("__or__", self.sq, self.g1, self.g2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__or__", self.sq, self.g1, self.g2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__or__", self.sq, self.gdf1, self.g2) def test_union_operator_polygon(self): - self._test_binary_operator("__or__", self.sq, self.g1, self.t2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__or__", self.sq, self.g1, self.t2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__or__", self.sq, self.gdf1, self.t2) def test_symmetric_difference_operator(self): - self._test_binary_operator("__xor__", self.sq, self.g3, self.g4) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__xor__", self.sq, self.g3, self.g4) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__xor__", self.sq, self.gdf3, self.g4) def test_difference_series2(self): expected = GeoSeries([GeometryCollection(), self.t2]) - self._test_binary_operator("__sub__", expected, self.g1, self.g2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__sub__", expected, self.g1, self.g2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__sub__", expected, self.gdf1, self.g2) def test_difference_poly2(self): expected = GeoSeries([self.t1, self.t1]) - self._test_binary_operator("__sub__", expected, self.g1, self.t2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__sub__", expected, self.g1, self.t2) + with pytest.warns(DeprecationWarning): + self._test_binary_operator("__sub__", expected, self.gdf1, self.t2) diff -Nru python-geopandas-0.7.0/geopandas/tests/test_geoseries.py python-geopandas-0.8.1/geopandas/tests/test_geoseries.py --- python-geopandas-0.7.0/geopandas/tests/test_geoseries.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_geoseries.py 2020-07-15 17:54:36.000000000 +0000 @@ -98,12 +98,39 @@ exp2 = pd.Series([np.nan, 1, 2], index=["A", "B", "C"]) assert_series_equal(res2, exp2) + def test_warning_if_not_aligned(self): + # GH-816 + # Test that warning is issued when operating on non-aligned series + + # _series_op + with pytest.warns(UserWarning, match="The indices .+ different"): + self.a1.contains(self.a2) + + # _geo_op + with pytest.warns(UserWarning, match="The indices .+ different"): + self.a1.union(self.a2) + + def test_no_warning_if_aligned(self): + # GH-816 + # Test that warning is not issued when operating on aligned series + a1, a2 = self.a1.align(self.a2) + + with pytest.warns(None) as warnings: + a1.contains(a2) # _series_op, explicitly aligned + self.g1.intersects(self.g2) # _series_op, implicitly aligned + a2.union(a1) # _geo_op, explicitly aligned + self.g2.intersection(self.g1) # _geo_op, implicitly aligned + + user_warnings = [w for w in warnings if w.category is UserWarning] + assert not user_warnings, user_warnings[0].message + def test_geom_equals(self): assert np.all(self.g1.geom_equals(self.g1)) assert_array_equal(self.g1.geom_equals(self.sq), [False, True]) def test_geom_equals_align(self): - a = self.a1.geom_equals(self.a2) + with pytest.warns(UserWarning, match="The indices .+ different"): + a = self.a1.geom_equals(self.a2) exp = pd.Series([False, True, False], index=["A", "B", "C"]) assert_series_equal(a, exp) @@ -286,6 +313,7 @@ for g in geoms: gs = GeoSeries(g) assert len(gs) == 1 + # accessing elements no longer give identical objects assert gs.iloc[0].equals(g) gs = GeoSeries(g, index=index) diff -Nru python-geopandas-0.7.0/geopandas/tests/test_overlay.py python-geopandas-0.8.1/geopandas/tests/test_overlay.py --- python-geopandas-0.7.0/geopandas/tests/test_overlay.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_overlay.py 2020-07-15 17:54:36.000000000 +0000 @@ -14,6 +14,11 @@ DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data", "overlay") +pytestmark = pytest.mark.skipif( + not geopandas.sindex.has_sindex(), reason="overlay requires spatial index" +) + + @pytest.fixture def dfs(request): s1 = GeoSeries( @@ -160,6 +165,33 @@ assert len(result.columns) == len(expected.columns) result = result.reindex(columns=expected.columns) + # the ordering of the spatial index results causes slight deviations + # in the resultant geometries for multipolygons + # for more details on the discussion, see: + # https://github.com/geopandas/geopandas/pull/1338 + # https://github.com/geopandas/geopandas/issues/1337 + + # Temporary workaround below: + + # simplify multipolygon geometry comparison + # since the order of the constituent polygons depends on + # the ordering of spatial indexing results, we cannot + # compare symmetric_difference results directly when the + # resultant geometry is a multipolygon + + # first, check that all bounds and areas are approx equal + # this is a very rough check for multipolygon equality + pd.testing.assert_series_equal( + result.geometry.area, expected.geometry.area, check_less_precise=True + ) + pd.testing.assert_frame_equal( + result.geometry.bounds, expected.geometry.bounds, check_less_precise=True + ) + + # now drop multipolygons + result.geometry[result.geometry.geom_type == "MultiPolygon"] = None + expected.geometry[expected.geometry.geom_type == "MultiPolygon"] = None + assert_geodataframe_equal( result, expected, check_crs=False, check_column_type=False ) @@ -295,6 +327,14 @@ assert result.crs == crs +def test_crs_mismatch(dfs, how): + df1, df2 = dfs + df1.crs = 4326 + df2.crs = 3857 + with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): + overlay(df1, df2, how=how) + + def test_empty_intersection(dfs): df1, df2 = dfs polys3 = GeoSeries( @@ -419,6 +459,15 @@ "{t}_{h}_{s}.geojson".format(t=geom_types, h=how, s=keep_geom_type), ) ) + + # the order depends on the spatial index used + # so we sort the resultant dataframes to get a consistent order + # independently of the spatial index implementation + assert all(expected.columns == result.columns), "Column name mismatch" + cols = list(set(result.columns) - set(["geometry"])) + expected = expected.sort_values(cols, axis=0).reset_index(drop=True) + result = result.sort_values(cols, axis=0).reset_index(drop=True) + assert_geodataframe_equal( result, expected, diff -Nru python-geopandas-0.7.0/geopandas/tests/test_pandas_methods.py python-geopandas-0.8.1/geopandas/tests/test_pandas_methods.py --- python-geopandas-0.7.0/geopandas/tests/test_pandas_methods.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_pandas_methods.py 2020-07-15 17:54:36.000000000 +0000 @@ -9,7 +9,7 @@ import geopandas from geopandas import GeoDataFrame, GeoSeries -from geopandas._compat import PANDAS_GE_024, PANDAS_GE_025 +from geopandas._compat import PANDAS_GE_024, PANDAS_GE_025, PANDAS_GE_11 from geopandas.array import from_shapely from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal @@ -314,6 +314,22 @@ assert_frame_equal(res, exp) +def test_equals(s, df): + # https://github.com/geopandas/geopandas/issues/1420 + s2 = s.copy() + assert s.equals(s2) is True + s2.iloc[0] = None + assert s.equals(s2) is False + + df2 = df.copy() + assert df.equals(df2) is True + df2.loc[0, "geometry"] = Point(10, 10) + assert df.equals(df2) is False + df2 = df.copy() + df2.loc[0, "value1"] = 10 + assert df.equals(df2) is False + + # Missing values @@ -444,11 +460,24 @@ # applying on the geometry column res = df.groupby("value2")["geometry"].apply(lambda x: x.cascaded_union) - exp = pd.Series( - [shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)], - index=pd.Index([1, 2], name="value2"), - name="geometry", - ) + if PANDAS_GE_11: + exp = GeoSeries( + [shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)], + index=pd.Index([1, 2], name="value2"), + name="geometry", + ) + else: + exp = pd.Series( + [shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)], + index=pd.Index([1, 2], name="value2"), + name="geometry", + ) + assert_series_equal(res, exp) + + # apply on geometry column not resulting in new geometry + res = df.groupby("value2")["geometry"].apply(lambda x: x.unary_union.area) + exp = pd.Series([0.0, 0.0], index=pd.Index([1, 2], name="value2"), name="geometry") + assert_series_equal(res, exp) @@ -460,6 +489,26 @@ assert_frame_equal(res, exp) +def test_apply(s): + # function that returns geometry preserves GeoSeries class + def geom_func(geom): + assert isinstance(geom, Point) + return geom + + result = s.apply(geom_func) + assert isinstance(result, GeoSeries) + assert_geoseries_equal(result, s) + + # function that returns non-geometry results in Series + def numeric_func(geom): + assert isinstance(geom, Point) + return geom.x + + result = s.apply(numeric_func) + assert not isinstance(result, GeoSeries) + assert_series_equal(result, pd.Series([0.0, 1.0, 2.0])) + + def test_apply_loc_len1(df): # subset of len 1 with loc -> bug in pandas with inconsistent Block ndim # resulting in bug in apply diff -Nru python-geopandas-0.7.0/geopandas/tests/test_plotting.py python-geopandas-0.8.1/geopandas/tests/test_plotting.py --- python-geopandas-0.7.0/geopandas/tests/test_plotting.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_plotting.py 2020-07-15 17:54:36.000000000 +0000 @@ -2,6 +2,7 @@ import warnings import numpy as np +import pandas as pd from shapely.affinity import rotate from shapely.geometry import ( @@ -128,7 +129,7 @@ _check_colors( self.N, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5, 0.5)] * self.N ) - with pytest.raises(TypeError): + with pytest.raises((ValueError, TypeError)): self.df.plot(color="not color") with warnings.catch_warnings(record=True) as _: # don't print warning @@ -155,11 +156,24 @@ ax = self.df.plot(column="values", markersize="values") assert (ax.collections[0].get_sizes() == self.df["values"]).all() + def test_markerstyle(self): + ax = self.df2.plot(marker="+") + expected = _style_to_vertices("+") + np.testing.assert_array_equal( + expected, ax.collections[0].get_paths()[0].vertices + ) + def test_style_kwargs(self): ax = self.points.plot(edgecolors="k") assert (ax.collections[0].get_edgecolor() == [0, 0, 0, 1]).all() + def test_style_kwargs_alpha(self): + ax = self.df.plot(alpha=0.7) + np.testing.assert_array_equal([0.7], ax.collections[0].get_alpha()) + with pytest.raises(TypeError): # no list allowed for alpha + ax = self.df.plot(alpha=[0.7, 0.2]) + def test_legend(self): with warnings.catch_warnings(record=True) as _: # don't print warning # legend ignored if color is given. @@ -243,6 +257,54 @@ # colors are repeated for all components within a MultiPolygon _check_colors(2, ax.collections[0].get_facecolors(), ["r"] * 10 + ["b"] * 10) + def test_multipoints_alpha(self): + ax = self.df2.plot(alpha=0.7) + np.testing.assert_array_equal([0.7], ax.collections[0].get_alpha()) + with pytest.raises(TypeError): # no list allowed for alpha + ax = self.df2.plot(alpha=[0.7, 0.2]) + + def test_categories(self): + self.df["cats_object"] = ["cat1", "cat2"] * 5 + self.df["nums"] = [1, 2] * 5 + self.df["singlecat_object"] = ["cat2"] * 10 + self.df["cats"] = pd.Categorical(["cat1", "cat2"] * 5) + self.df["singlecat"] = pd.Categorical( + ["cat2"] * 10, categories=["cat1", "cat2"] + ) + self.df["cats_ordered"] = pd.Categorical( + ["cat2", "cat1"] * 5, categories=["cat2", "cat1"] + ) + + ax1 = self.df.plot("cats_object", legend=True) + ax2 = self.df.plot("cats", legend=True) + ax3 = self.df.plot("singlecat_object", categories=["cat1", "cat2"], legend=True) + ax4 = self.df.plot("singlecat", legend=True) + ax5 = self.df.plot("cats_ordered", legend=True) + ax6 = self.df.plot("nums", categories=[1, 2], legend=True) + + point_colors1 = ax1.collections[0].get_facecolors() + for ax in [ax2, ax3, ax4, ax5, ax6]: + point_colors2 = ax.collections[0].get_facecolors() + np.testing.assert_array_equal(point_colors1[1], point_colors2[1]) + + legend1 = [x.get_markerfacecolor() for x in ax1.get_legend().get_lines()] + for ax in [ax2, ax3, ax4, ax5, ax6]: + legend2 = [x.get_markerfacecolor() for x in ax.get_legend().get_lines()] + np.testing.assert_array_equal(legend1, legend2) + + with pytest.raises(TypeError): + self.df.plot(column="cats_object", categories="non_list") + + with pytest.raises( + ValueError, match="Column contains values not listed in categories." + ): + self.df.plot(column="cats_object", categories=["cat1"]) + + with pytest.raises( + ValueError, match="Cannot specify 'categories' when column has" + ): + self.df.plot(column="cats", categories=["cat1"]) + def test_misssing(self): self.df.loc[0, "values"] = np.nan ax = self.df.plot("values") @@ -325,7 +387,7 @@ _check_colors( self.N, ax.collections[0].get_colors(), [(0.5, 0.5, 0.5, 0.5)] * self.N ) - with pytest.raises(TypeError): + with pytest.raises((TypeError, ValueError)): self.df.plot(color="not color") with warnings.catch_warnings(record=True) as _: # don't print warning @@ -333,26 +395,54 @@ ax = self.df.plot(column="values", color="green") _check_colors(self.N, ax.collections[0].get_colors(), ["green"] * self.N) - def test_style_kwargs(self): - # linestyle (style patterns depend on linewidth, therefore pin to 1) - linestyle = "dashed" - linewidth = 1 - - ax = self.lines.plot(linestyle=linestyle, linewidth=linewidth) - exp_ls = _style_to_linestring_onoffseq(linestyle, linewidth) - for ls in ax.collections[0].get_linestyles(): - assert ls[0] == exp_ls[0] - assert ls[1] == exp_ls[1] - - ax = self.df.plot(linestyle=linestyle, linewidth=linewidth) - for ls in ax.collections[0].get_linestyles(): - assert ls[0] == exp_ls[0] - assert ls[1] == exp_ls[1] - - ax = self.df.plot(column="values", linestyle=linestyle, linewidth=linewidth) - for ls in ax.collections[0].get_linestyles(): - assert ls[0] == exp_ls[0] - assert ls[1] == exp_ls[1] + def test_style_kwargs_linestyle(self): + # single + for ax in [ + self.lines.plot(linestyle=":", linewidth=1), + self.df.plot(linestyle=":", linewidth=1), + self.df.plot(column="values", linestyle=":", linewidth=1), + ]: + assert [(0.0, [1.0, 1.65])] == ax.collections[0].get_linestyle() + + # tuple + ax = self.lines.plot(linestyle=(0, (3, 10, 1, 15)), linewidth=1) + assert [(0, [3, 10, 1, 15])] == ax.collections[0].get_linestyle() + + # multiple + ls = [("dashed", "dotted", "dashdot", "solid")[k % 4] for k in range(self.N)] + exp_ls = [_style_to_linestring_onoffseq(st, 1) for st in ls] + for ax in [ + self.lines.plot(linestyle=ls, linewidth=1), + self.lines.plot(linestyles=ls, linewidth=1), + self.df.plot(linestyle=ls, linewidth=1), + self.df.plot(column="values", linestyle=ls, linewidth=1), + ]: + np.testing.assert_array_equal(exp_ls, ax.collections[0].get_linestyle()) + + def test_style_kwargs_linewidth(self): + # single + for ax in [ + self.lines.plot(linewidth=2), + self.df.plot(linewidth=2), + self.df.plot(column="values", linewidth=2), + ]: + np.testing.assert_array_equal([2], ax.collections[0].get_linewidths()) + + # multiple + lw = [(0, 1, 2, 5.5, 10)[k % 5] for k in range(self.N)] + for ax in [ + self.lines.plot(linewidth=lw), + self.lines.plot(linewidths=lw), + self.df.plot(linewidth=lw), + self.df.plot(column="values", linewidth=lw), + ]: + np.testing.assert_array_equal(lw, ax.collections[0].get_linewidths()) + + def test_style_kwargs_alpha(self): + ax = self.df.plot(alpha=0.7) + np.testing.assert_array_equal([0.7], ax.collections[0].get_alpha()) + with pytest.raises(TypeError): # no list allowed for alpha + ax = self.df.plot(alpha=[0.7, 0.2]) def test_subplots_norm(self): # colors of subplots are the same as for plot (norm is applied) @@ -419,7 +509,7 @@ _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5)] * 2) ax = self.df.plot(color=(0.5, 0.5, 0.5, 0.5)) _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5, 0.5)] * 2) - with pytest.raises(TypeError): + with pytest.raises((TypeError, ValueError)): self.df.plot(color="not color") with warnings.catch_warnings(record=True) as _: # don't print warning @@ -445,7 +535,7 @@ actual_colors = ax.collections[0].get_facecolors() assert np.any(np.not_equal(actual_colors[0], actual_colors[1])) - def test_style_kwargs(self): + def test_style_kwargs_color(self): # facecolor overrides default cmap when color is not set ax = self.polys.plot(facecolor="k") @@ -483,6 +573,38 @@ _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5, 0.5)] * 2) _check_colors(2, ax.collections[0].get_edgecolors(), [(0.4, 0.5, 0.6, 0.5)] * 2) + def test_style_kwargs_linestyle(self): + # single + ax = self.df.plot(linestyle=":", linewidth=1) + assert [(0.0, [1.0, 1.65])] == ax.collections[0].get_linestyle() + + # tuple + ax = self.df.plot(linestyle=(0, (3, 10, 1, 15)), linewidth=1) + assert [(0, [3, 10, 1, 15])] == ax.collections[0].get_linestyle() + + # multiple + ls = ["dashed", "dotted"] + exp_ls = [_style_to_linestring_onoffseq(st, 1) for st in ls] + for ax in [ + self.df.plot(linestyle=ls, linewidth=1), + self.df.plot(linestyles=ls, linewidth=1), + ]: + assert exp_ls == ax.collections[0].get_linestyle() + + def test_style_kwargs_linewidth(self): + # single + ax = self.df.plot(linewidth=2) + np.testing.assert_array_equal([2], ax.collections[0].get_linewidths()) + # multiple + for ax in [self.df.plot(linewidth=[2, 4]), self.df.plot(linewidths=[2, 4])]: + np.testing.assert_array_equal([2, 4], ax.collections[0].get_linewidths()) + + # alpha + ax = self.df.plot(alpha=0.7) + np.testing.assert_array_equal([0.7], ax.collections[0].get_alpha()) + with pytest.raises(TypeError): # no list allowed for alpha + ax = self.df.plot(alpha=[0.7, 0.2]) + def test_legend_kwargs(self): ax = self.df.plot( @@ -516,7 +638,20 @@ assert ax.get_figure().axes[1].get_xlabel() == label_txt - def test_multipolygons(self): + def test_fmt_ignore(self): + # test if fmt is removed if scheme is not passed (it would raise Error) + # GH #1253 + + self.df.plot( + column="values", + categorical=True, + legend=True, + legend_kwds={"fmt": "{:.0f}"}, + ) + + self.df.plot(column="values", legend=True, legend_kwds={"fmt": "{:.0f}"}) + + def test_multipolygons_color(self): # MultiPolygons ax = self.df2.plot() @@ -533,6 +668,41 @@ # colors are repeated for all components within a MultiPolygon _check_colors(4, ax.collections[0].get_facecolors(), ["r", "r", "b", "b"]) + def test_multipolygons_linestyle(self): + # single + ax = self.df2.plot(linestyle=":", linewidth=1) + assert [(0.0, [1.0, 1.65])] == ax.collections[0].get_linestyle() + + # tuple + ax = self.df2.plot(linestyle=(0, (3, 10, 1, 15)), linewidth=1) + assert [(0, [3, 10, 1, 15])] == ax.collections[0].get_linestyle() + + # multiple + ls = ["dashed", "dotted"] + exp_ls = [_style_to_linestring_onoffseq(st, 1) for st in ls for i in range(2)] + for ax in [ + self.df2.plot(linestyle=ls, linewidth=1), + self.df2.plot(linestyles=ls, linewidth=1), + ]: + assert exp_ls == ax.collections[0].get_linestyle() + + def test_multipolygons_linewidth(self): + # single + ax = self.df2.plot(linewidth=2) + np.testing.assert_array_equal([2], ax.collections[0].get_linewidths()) + + # multiple + for ax in [self.df2.plot(linewidth=[2, 4]), self.df2.plot(linewidths=[2, 4])]: + np.testing.assert_array_equal( + [2, 2, 4, 4], ax.collections[0].get_linewidths() + ) + + def test_multipolygons_alpha(self): + ax = self.df2.plot(alpha=0.7) + np.testing.assert_array_equal([0.7], ax.collections[0].get_alpha()) + with pytest.raises(TypeError): # no list allowed for alpha + ax = self.df2.plot(alpha=[0.7, 0.2]) + def test_subplots_norm(self): # colors of subplots are the same as for plot (norm is applied) cmap = matplotlib.cm.viridis_r @@ -628,6 +798,102 @@ ax = self.df.plot(markersize=10) assert ax.collections[2].get_sizes() == [10] + def test_style_kwargs_linestyle(self): + # single + for ax in [ + self.series.plot(linestyle=":", linewidth=1), + self.df.plot(linestyle=":", linewidth=1), + ]: + assert [(0.0, [1.0, 1.65])] == ax.collections[0].get_linestyle() + + # tuple + ax = self.series.plot(linestyle=(0, (3, 10, 1, 15)), linewidth=1) + assert [(0, [3, 10, 1, 15])] == ax.collections[0].get_linestyle() + + @pytest.mark.skip( + reason="array-like style_kwds not supported for mixed geometry types (#1379)" + ) + def test_style_kwargs_linestyle_listlike(self): + # multiple + ls = ["solid", "dotted", "dashdot"] + exp_ls = [_style_to_linestring_onoffseq(style, 1) for style in ls] + for ax in [ + self.series.plot(linestyle=ls, linewidth=1), + self.series.plot(linestyles=ls, linewidth=1), + self.df.plot(linestyles=ls, linewidth=1), + ]: + np.testing.assert_array_equal(exp_ls, ax.collections[0].get_linestyle()) + + def test_style_kwargs_linewidth(self): + # single + ax = self.df.plot(linewidth=2) + np.testing.assert_array_equal([2], ax.collections[0].get_linewidths()) + + @pytest.mark.skip( + reason="array-like style_kwds not supported for mixed geometry types (#1379)" + ) + def test_style_kwargs_linewidth_listlike(self): + # multiple + for ax in [ + self.series.plot(linewidths=[2, 4, 5.5]), + self.series.plot(linewidths=[2, 4, 5.5]), + self.df.plot(linewidths=[2, 4, 5.5]), + ]: + np.testing.assert_array_equal( + [2, 4, 5.5], ax.collections[0].get_linewidths() + ) + + def test_style_kwargs_alpha(self): + ax = self.df.plot(alpha=0.7) + np.testing.assert_array_equal([0.7], ax.collections[0].get_alpha()) + with pytest.raises(TypeError): # no list allowed for alpha + ax = self.df.plot(alpha=[0.7, 0.2, 0.9]) + + +class TestGeographicAspect: + def setup_class(self): + pth = get_path("naturalearth_lowres") + df = read_file(pth) + self.north = df.loc[df.continent == "North America"] + self.north_proj = self.north.to_crs("ESRI:102008") + bounds = self.north.total_bounds + y_coord = np.mean([bounds[1], bounds[3]]) + self.exp = 1 / np.cos(y_coord * np.pi / 180) + + def test_auto(self): + ax = self.north.geometry.plot() + assert ax.get_aspect() == self.exp + ax2 = self.north_proj.geometry.plot() + assert ax2.get_aspect() in ["equal", 1.0] + ax = self.north.plot() + assert ax.get_aspect() == self.exp + ax2 = self.north_proj.plot() + assert ax2.get_aspect() in ["equal", 1.0] + ax3 = self.north.plot("pop_est") + assert ax3.get_aspect() == self.exp + ax4 = self.north_proj.plot("pop_est") + assert ax4.get_aspect() in ["equal", 1.0] + + def test_manual(self): + ax = self.north.geometry.plot(aspect="equal") + assert ax.get_aspect() in ["equal", 1.0] + ax2 = self.north.geometry.plot(aspect=0.5) + assert ax2.get_aspect() == 0.5 + ax3 = self.north_proj.geometry.plot(aspect=0.5) + assert ax3.get_aspect() == 0.5 + ax = self.north.plot(aspect="equal") + assert ax.get_aspect() in ["equal", 1.0] + ax2 = self.north.plot(aspect=0.5) + assert ax2.get_aspect() == 0.5 + ax3 = self.north_proj.plot(aspect=0.5) + assert ax3.get_aspect() == 0.5 + ax = self.north.plot("pop_est", aspect="equal") + assert ax.get_aspect() in ["equal", 1.0] + ax2 = self.north.plot("pop_est", aspect=0.5) + assert ax2.get_aspect() == 0.5 + ax3 = self.north_proj.plot("pop_est", aspect=0.5) + assert ax3.get_aspect() == 0.5 + class TestMapclassifyPlotting: @classmethod @@ -635,10 +901,9 @@ try: import mapclassify # noqa except ImportError: - try: - import pysal # noqa - except ImportError: - pytest.importorskip("mapclassify") + pytest.importorskip("mapclassify") + cls.classifiers = list(mapclassify.classifiers.CLASSIFIERS) + cls.classifiers.remove("UserDefined") pth = get_path("naturalearth_lowres") cls.df = read_file(pth) cls.df["NEGATIVES"] = np.linspace(-10, 10, len(cls.df.index)) @@ -651,18 +916,55 @@ ) labels = [t.get_text() for t in ax.get_legend().get_texts()] expected = [ - u"140.00 - 5217064.00", - u"5217064.00 - 19532732.33", - u"19532732.33 - 1379302771.00", + u"[ 140.00, 5217064.00]", + u"( 5217064.00, 19532732.33]", + u"( 19532732.33, 1379302771.00]", ] assert labels == expected + def test_bin_labels(self): + ax = self.df.plot( + column="pop_est", + scheme="QUANTILES", + k=3, + cmap="OrRd", + legend=True, + legend_kwds={"labels": ["foo", "bar", "baz"]}, + ) + labels = [t.get_text() for t in ax.get_legend().get_texts()] + expected = ["foo", "bar", "baz"] + assert labels == expected + + def test_invalid_labels_length(self): + with pytest.raises(ValueError): + self.df.plot( + column="pop_est", + scheme="QUANTILES", + k=3, + cmap="OrRd", + legend=True, + legend_kwds={"labels": ["foo", "bar"]}, + ) + def test_negative_legend(self): ax = self.df.plot( column="NEGATIVES", scheme="FISHER_JENKS", k=3, cmap="OrRd", legend=True ) labels = [t.get_text() for t in ax.get_legend().get_texts()] - expected = [u"-10.00 - -3.41", u"-3.41 - 3.30", u"3.30 - 10.00"] + expected = [u"[-10.00, -3.41]", u"( -3.41, 3.30]", u"( 3.30, 10.00]"] + assert labels == expected + + def test_fmt(self): + ax = self.df.plot( + column="NEGATIVES", + scheme="FISHER_JENKS", + k=3, + cmap="OrRd", + legend=True, + legend_kwds={"fmt": "{:.0f}"}, + ) + labels = [t.get_text() for t in ax.get_legend().get_texts()] + expected = [u"[-10, -3]", u"( -3, 3]", u"( 3, 10]"] assert labels == expected @pytest.mark.parametrize("scheme", ["FISHER_JENKS", "FISHERJENKS"]) @@ -670,6 +972,11 @@ ax = self.df.plot(column="NEGATIVES", scheme=scheme, k=3, legend=True) assert len(ax.get_legend().get_texts()) == 3 + def test_schemes(self): + # test if all available classifiers pass + for scheme in self.classifiers: + self.df.plot(column="pop_est", scheme=scheme, legend=True) + def test_classification_kwds(self): ax = self.df.plot( column="pop_est", @@ -680,7 +987,7 @@ legend=True, ) labels = [t.get_text() for t in ax.get_legend().get_texts()] - expected = ["140.00 - 9961396.00", "9961396.00 - 1379302771.00"] + expected = ["[ 140.00, 9961396.00]", "( 9961396.00, 1379302771.00]"] assert labels == expected def test_invalid_scheme(self): @@ -744,40 +1051,40 @@ # failing with matplotlib 1.4.3 (edge stays black even when specified) pytest.importorskip("matplotlib", "1.5.0") - from geopandas.plotting import plot_point_collection + from geopandas.plotting import _plot_point_collection, plot_point_collection from matplotlib.collections import PathCollection fig, ax = plt.subplots() - coll = plot_point_collection(ax, self.points) + coll = _plot_point_collection(ax, self.points) assert isinstance(coll, PathCollection) ax.cla() # default: single default matplotlib color - coll = plot_point_collection(ax, self.points) + coll = _plot_point_collection(ax, self.points) _check_colors(self.N, coll.get_facecolors(), [MPL_DFT_COLOR] * self.N) # edgecolor depends on matplotlib version # _check_colors(self.N, coll.get_edgecolors(), [MPL_DFT_COLOR]*self.N) ax.cla() # specify single other color - coll = plot_point_collection(ax, self.points, color="g") + coll = _plot_point_collection(ax, self.points, color="g") _check_colors(self.N, coll.get_facecolors(), ["g"] * self.N) _check_colors(self.N, coll.get_edgecolors(), ["g"] * self.N) ax.cla() # specify edgecolor/facecolor - coll = plot_point_collection(ax, self.points, facecolor="g", edgecolor="r") + coll = _plot_point_collection(ax, self.points, facecolor="g", edgecolor="r") _check_colors(self.N, coll.get_facecolors(), ["g"] * self.N) _check_colors(self.N, coll.get_edgecolors(), ["r"] * self.N) ax.cla() # list of colors - coll = plot_point_collection(ax, self.points, color=["r", "g", "b"]) + coll = _plot_point_collection(ax, self.points, color=["r", "g", "b"]) _check_colors(self.N, coll.get_facecolors(), ["r", "g", "b"]) _check_colors(self.N, coll.get_edgecolors(), ["r", "g", "b"]) ax.cla() - coll = plot_point_collection( + coll = _plot_point_collection( ax, self.points, color=[(0.5, 0.5, 0.5, 0.5), (0.1, 0.2, 0.3, 0.5), (0.4, 0.5, 0.6, 0.5)], @@ -795,15 +1102,19 @@ ax.cla() # not a color - with pytest.raises(TypeError): - plot_point_collection(ax, self.points, color="not color") + with pytest.raises((TypeError, ValueError)): + _plot_point_collection(ax, self.points, color="not color") + + # check DeprecationWarning + with pytest.warns(DeprecationWarning): + plot_point_collection(ax, self.points) def test_points_values(self): - from geopandas.plotting import plot_point_collection + from geopandas.plotting import _plot_point_collection # default colormap fig, ax = plt.subplots() - coll = plot_point_collection(ax, self.points, self.values) + coll = _plot_point_collection(ax, self.points, self.values) fig.canvas.draw_idle() cmap = plt.get_cmap() expected_colors = cmap(np.arange(self.N) / (self.N - 1)) @@ -812,36 +1123,39 @@ # _check_colors(self.N, coll.get_edgecolors(), expected_colors) def test_linestrings(self): - from geopandas.plotting import plot_linestring_collection + from geopandas.plotting import ( + _plot_linestring_collection, + plot_linestring_collection, + ) from matplotlib.collections import LineCollection fig, ax = plt.subplots() - coll = plot_linestring_collection(ax, self.lines) + coll = _plot_linestring_collection(ax, self.lines) assert isinstance(coll, LineCollection) ax.cla() # default: single default matplotlib color - coll = plot_linestring_collection(ax, self.lines) + coll = _plot_linestring_collection(ax, self.lines) _check_colors(self.N, coll.get_color(), [MPL_DFT_COLOR] * self.N) ax.cla() # specify single other color - coll = plot_linestring_collection(ax, self.lines, color="g") + coll = _plot_linestring_collection(ax, self.lines, color="g") _check_colors(self.N, coll.get_colors(), ["g"] * self.N) ax.cla() # specify edgecolor / facecolor - coll = plot_linestring_collection(ax, self.lines, facecolor="g", edgecolor="r") + coll = _plot_linestring_collection(ax, self.lines, facecolor="g", edgecolor="r") _check_colors(self.N, coll.get_facecolors(), ["g"] * self.N) _check_colors(self.N, coll.get_edgecolors(), ["r"] * self.N) ax.cla() # list of colors - coll = plot_linestring_collection(ax, self.lines, color=["r", "g", "b"]) + coll = _plot_linestring_collection(ax, self.lines, color=["r", "g", "b"]) _check_colors(self.N, coll.get_colors(), ["r", "g", "b"]) ax.cla() - coll = plot_linestring_collection( + coll = _plot_linestring_collection( ax, self.lines, color=[(0.5, 0.5, 0.5, 0.5), (0.1, 0.2, 0.3, 0.5), (0.4, 0.5, 0.6, 0.5)], @@ -854,7 +1168,7 @@ ax.cla() # pass through of kwargs - coll = plot_linestring_collection(ax, self.lines, linestyle="--", linewidth=1) + coll = _plot_linestring_collection(ax, self.lines, linestyle="--", linewidth=1) exp_ls = _style_to_linestring_onoffseq("dashed", 1) res_ls = coll.get_linestyle()[0] assert res_ls[0] == exp_ls[0] @@ -862,16 +1176,19 @@ ax.cla() # not a color - with pytest.raises(TypeError): - plot_linestring_collection(ax, self.lines, color="not color") + with pytest.raises((TypeError, ValueError)): + _plot_linestring_collection(ax, self.lines, color="not color") + # check DeprecationWarning + with pytest.warns(DeprecationWarning): + plot_linestring_collection(ax, self.lines) def test_linestrings_values(self): - from geopandas.plotting import plot_linestring_collection + from geopandas.plotting import _plot_linestring_collection fig, ax = plt.subplots() # default colormap - coll = plot_linestring_collection(ax, self.lines, self.values) + coll = _plot_linestring_collection(ax, self.lines, self.values) fig.canvas.draw_idle() cmap = plt.get_cmap() expected_colors = cmap(np.arange(self.N) / (self.N - 1)) @@ -879,7 +1196,7 @@ ax.cla() # specify colormap - coll = plot_linestring_collection(ax, self.lines, self.values, cmap="RdBu") + coll = _plot_linestring_collection(ax, self.lines, self.values, cmap="RdBu") fig.canvas.draw_idle() cmap = plt.get_cmap("RdBu") expected_colors = cmap(np.arange(self.N) / (self.N - 1)) @@ -887,7 +1204,7 @@ ax.cla() # specify vmin/vmax - coll = plot_linestring_collection(ax, self.lines, self.values, vmin=3, vmax=5) + coll = _plot_linestring_collection(ax, self.lines, self.values, vmin=3, vmax=5) fig.canvas.draw_idle() cmap = plt.get_cmap() expected_colors = cmap([0]) @@ -895,33 +1212,33 @@ ax.cla() def test_polygons(self): - from geopandas.plotting import plot_polygon_collection + from geopandas.plotting import _plot_polygon_collection, plot_polygon_collection from matplotlib.collections import PatchCollection fig, ax = plt.subplots() - coll = plot_polygon_collection(ax, self.polygons) + coll = _plot_polygon_collection(ax, self.polygons) assert isinstance(coll, PatchCollection) ax.cla() # default: single default matplotlib color - coll = plot_polygon_collection(ax, self.polygons) + coll = _plot_polygon_collection(ax, self.polygons) _check_colors(self.N, coll.get_facecolor(), [MPL_DFT_COLOR] * self.N) _check_colors(self.N, coll.get_edgecolor(), ["k"] * self.N) ax.cla() # default: color sets both facecolor and edgecolor - coll = plot_polygon_collection(ax, self.polygons, color="g") + coll = _plot_polygon_collection(ax, self.polygons, color="g") _check_colors(self.N, coll.get_facecolor(), ["g"] * self.N) _check_colors(self.N, coll.get_edgecolor(), ["g"] * self.N) ax.cla() # default: color can be passed as a list - coll = plot_polygon_collection(ax, self.polygons, color=["g", "b", "r"]) + coll = _plot_polygon_collection(ax, self.polygons, color=["g", "b", "r"]) _check_colors(self.N, coll.get_facecolor(), ["g", "b", "r"]) _check_colors(self.N, coll.get_edgecolor(), ["g", "b", "r"]) ax.cla() - coll = plot_polygon_collection( + coll = _plot_polygon_collection( ax, self.polygons, color=[(0.5, 0.5, 0.5, 0.5), (0.1, 0.2, 0.3, 0.5), (0.4, 0.5, 0.6, 0.5)], @@ -939,28 +1256,31 @@ ax.cla() # only setting facecolor keeps default for edgecolor - coll = plot_polygon_collection(ax, self.polygons, facecolor="g") + coll = _plot_polygon_collection(ax, self.polygons, facecolor="g") _check_colors(self.N, coll.get_facecolor(), ["g"] * self.N) _check_colors(self.N, coll.get_edgecolor(), ["k"] * self.N) ax.cla() # custom facecolor and edgecolor - coll = plot_polygon_collection(ax, self.polygons, facecolor="g", edgecolor="r") + coll = _plot_polygon_collection(ax, self.polygons, facecolor="g", edgecolor="r") _check_colors(self.N, coll.get_facecolor(), ["g"] * self.N) _check_colors(self.N, coll.get_edgecolor(), ["r"] * self.N) ax.cla() # not a color - with pytest.raises(TypeError): - plot_polygon_collection(ax, self.polygons, color="not color") + with pytest.raises((TypeError, ValueError)): + _plot_polygon_collection(ax, self.polygons, color="not color") + # check DeprecationWarning + with pytest.warns(DeprecationWarning): + plot_polygon_collection(ax, self.polygons) def test_polygons_values(self): - from geopandas.plotting import plot_polygon_collection + from geopandas.plotting import _plot_polygon_collection fig, ax = plt.subplots() # default colormap, edge is still black by default - coll = plot_polygon_collection(ax, self.polygons, self.values) + coll = _plot_polygon_collection(ax, self.polygons, self.values) fig.canvas.draw_idle() cmap = plt.get_cmap() exp_colors = cmap(np.arange(self.N) / (self.N - 1)) @@ -970,7 +1290,7 @@ ax.cla() # specify colormap - coll = plot_polygon_collection(ax, self.polygons, self.values, cmap="RdBu") + coll = _plot_polygon_collection(ax, self.polygons, self.values, cmap="RdBu") fig.canvas.draw_idle() cmap = plt.get_cmap("RdBu") exp_colors = cmap(np.arange(self.N) / (self.N - 1)) @@ -978,7 +1298,7 @@ ax.cla() # specify vmin/vmax - coll = plot_polygon_collection(ax, self.polygons, self.values, vmin=3, vmax=5) + coll = _plot_polygon_collection(ax, self.polygons, self.values, vmin=3, vmax=5) fig.canvas.draw_idle() cmap = plt.get_cmap() exp_colors = cmap([0]) @@ -986,7 +1306,7 @@ ax.cla() # override edgecolor - coll = plot_polygon_collection(ax, self.polygons, self.values, edgecolor="g") + coll = _plot_polygon_collection(ax, self.polygons, self.values, edgecolor="g") fig.canvas.draw_idle() cmap = plt.get_cmap() exp_colors = cmap(np.arange(self.N) / (self.N - 1)) @@ -1075,3 +1395,10 @@ """ offset, dashes = matplotlib.lines._get_dash_pattern(linestyle) return matplotlib.lines._scale_dashes(offset, dashes, linewidth) + + +def _style_to_vertices(markerstyle): + """ Converts a markerstyle string to a path. """ + # TODO: Vertices values are twice the actual path; unclear, why. + path = matplotlib.markers.MarkerStyle(markerstyle).get_path() + return path.vertices / 2 diff -Nru python-geopandas-0.7.0/geopandas/tests/test_show_versions.py python-geopandas-0.8.1/geopandas/tests/test_show_versions.py --- python-geopandas-0.7.0/geopandas/tests/test_show_versions.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_show_versions.py 2020-07-15 17:54:36.000000000 +0000 @@ -37,9 +37,9 @@ assert "pyproj" in deps_info assert "matplotlib" in deps_info assert "mapclassify" in deps_info - assert "pysal" in deps_info assert "geopy" in deps_info assert "psycopg2" in deps_info + assert "geoalchemy2" in deps_info def test_show_versions(capsys): diff -Nru python-geopandas-0.7.0/geopandas/tests/test_sindex.py python-geopandas-0.8.1/geopandas/tests/test_sindex.py --- python-geopandas-0.7.0/geopandas/tests/test_sindex.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_sindex.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,19 +1,53 @@ import sys -from shapely.geometry import Point, Polygon +from shapely.geometry import ( + Point, + Polygon, + MultiPolygon, + box, + GeometryCollection, + LineString, +) +from numpy.testing import assert_array_equal import geopandas -from geopandas import GeoDataFrame, GeoSeries, base, read_file +from geopandas import _compat as compat +from geopandas import GeoDataFrame, GeoSeries, read_file, sindex, datasets import pytest +import numpy as np + + +class TestNoSindex: + @pytest.mark.skipif(sindex.has_sindex(), reason="Spatial index present, skipping") + def test_no_sindex_installed(self): + """Checks that an error is raised when no spatial index is present.""" + with pytest.raises(ImportError): + sindex.get_sindex_class() + + @pytest.mark.skipif( + compat.HAS_RTREE or not compat.HAS_PYGEOS, + reason="rtree cannot be disabled via flags", + ) + def test_no_sindex_active(self): + """Checks that an error is given when rtree is not installed + and compat.USE_PYGEOS is False. + """ + state = compat.USE_PYGEOS # try to save state + compat.set_use_pygeos(False) + with pytest.raises(ImportError): + sindex.get_sindex_class() + compat.set_use_pygeos(state) # try to restore state @pytest.mark.skipif(sys.platform.startswith("win"), reason="fails on AppVeyor") -@pytest.mark.skipif(not base.HAS_SINDEX, reason="Rtree absent, skipping") +@pytest.mark.skipif(not sindex.has_sindex(), reason="Spatial index absent, skipping") class TestSeriesSindex: def test_empty_geoseries(self): - - assert GeoSeries().sindex is None + """Tests creating a spatial index from an empty GeoSeries.""" + with pytest.warns(FutureWarning, match="Generated spatial index is empty"): + # TODO: add checking len(GeoSeries().sindex) == 0 once deprecated + assert not GeoSeries(dtype=object).sindex def test_point(self): s = GeoSeries([Point(0, 0)]) @@ -24,9 +58,13 @@ assert len(list(hits)) == 0 def test_empty_point(self): + """Tests that a single empty Point results in an empty tree.""" s = GeoSeries([Point()]) - assert s.sindex is None + with pytest.warns(FutureWarning, match="Generated spatial index is empty"): + # TODO: add checking len(s) == 0 once deprecated + assert not s.sindex + assert s._sindex_generated is True def test_polygons(self): @@ -54,7 +92,7 @@ @pytest.mark.skipif(sys.platform.startswith("win"), reason="fails on AppVeyor") -@pytest.mark.skipif(not base.HAS_SINDEX, reason="Rtree absent, skipping") +@pytest.mark.skipif(not sindex.has_sindex(), reason="Spatial index absent, skipping") class TestFrameSindex: def setup_method(self): data = { @@ -67,7 +105,9 @@ def test_sindex(self): self.df.crs = "epsg:4326" assert self.df.sindex.size == 5 - hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4), objects=True)) + with pytest.warns(FutureWarning, match="`objects` is deprecated"): + # TODO: remove warning check once deprecated + hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4), objects=True)) assert len(hits) == 2 assert hits[0].object == 3 @@ -95,21 +135,27 @@ def test_merge_geo(self): # First check that we gets hits from the boros frame. tree = self.boros.sindex - hits = tree.intersection((1012821.80, 229228.26), objects=True) + with pytest.warns(FutureWarning, match="`objects` is deprecated"): + # TODO: remove warning check once deprecated + hits = tree.intersection((1012821.80, 229228.26), objects=True) res = [self.boros.loc[hit.object]["BoroName"] for hit in hits] assert res == ["Bronx", "Queens"] # Check that we only get the Bronx from this view. first = self.boros[self.boros["BoroCode"] < 3] tree = first.sindex - hits = tree.intersection((1012821.80, 229228.26), objects=True) + with pytest.warns(FutureWarning, match="`objects` is deprecated"): + # TODO: remove warning check once deprecated + hits = tree.intersection((1012821.80, 229228.26), objects=True) res = [first.loc[hit.object]["BoroName"] for hit in hits] assert res == ["Bronx"] # Check that we only get Queens from this view. second = self.boros[self.boros["BoroCode"] >= 3] tree = second.sindex - hits = tree.intersection((1012821.80, 229228.26), objects=True) + with pytest.warns(FutureWarning, match="`objects` is deprecated"): + # TODO: remove warning check once deprecated + hits = tree.intersection((1012821.80, 229228.26), objects=True) res = ([second.loc[hit.object]["BoroName"] for hit in hits],) assert res == ["Queens"] @@ -118,6 +164,402 @@ assert len(merged) == 5 assert merged.sindex.size == 5 tree = merged.sindex - hits = tree.intersection((1012821.80, 229228.26), objects=True) + with pytest.warns(FutureWarning, match="`objects` is deprecated"): + # TODO: remove warning check once deprecated + hits = tree.intersection((1012821.80, 229228.26), objects=True) res = [merged.loc[hit.object]["BoroName"] for hit in hits] assert res == ["Bronx", "Queens"] + + +@pytest.mark.skipif(not sindex.has_sindex(), reason="Spatial index absent, skipping") +class TestPygeosInterface: + def setup_method(self): + data = { + "location": [Point(x, y) for x, y in zip(range(5), range(5))] + + [box(10, 10, 20, 20)] # include a box geometry + } + self.df = GeoDataFrame(data, geometry="location") + self.expected_size = len(data["location"]) + + # --------------------------- `intersection` tests -------------------------- # + @pytest.mark.parametrize( + "test_geom, expected", + ( + ((-1, -1, -0.5, -0.5), []), + ((-0.5, -0.5, 0.5, 0.5), [0]), + ((0, 0, 1, 1), [0, 1]), + ((0, 0), [0]), + ), + ) + def test_intersection_bounds_tuple(self, test_geom, expected): + """Tests the `intersection` method with valid inputs.""" + res = list(self.df.sindex.intersection(test_geom)) + assert_array_equal(res, expected) + + @pytest.mark.parametrize("test_geom", ((-1, -1, -0.5), -0.5, None, Point(0, 0))) + def test_intersection_invalid_bounds_tuple(self, test_geom): + """Tests the `intersection` method with invalid inputs.""" + if compat.USE_PYGEOS: + with pytest.raises(TypeError): + # we raise a useful TypeError + self.df.sindex.intersection(test_geom) + else: + with pytest.raises((TypeError, Exception)): + # catch a general exception + # rtree raises an RTreeError which we need to catch + self.df.sindex.intersection(test_geom) + + # ------------------------------ `query` tests ------------------------------ # + @pytest.mark.parametrize( + "predicate, test_geom, expected", + ( + (None, box(-1, -1, -0.5, -0.5), []), # bbox does not intersect + (None, box(-0.5, -0.5, 0.5, 0.5), [0]), # bbox intersects + (None, box(0, 0, 1, 1), [0, 1]), # bbox intersects multiple + ( + None, + LineString([(0, 1), (1, 0)]), + [0, 1], + ), # bbox intersects but not geometry + ("intersects", box(-1, -1, -0.5, -0.5), []), # bbox does not intersect + ( + "intersects", + box(-0.5, -0.5, 0.5, 0.5), + [0], + ), # bbox and geometry intersect + ( + "intersects", + box(0, 0, 1, 1), + [0, 1], + ), # bbox and geometry intersect multiple + ( + "intersects", + LineString([(0, 1), (1, 0)]), + [], + ), # bbox intersects but not geometry + ("within", box(0.25, 0.28, 0.75, 0.75), []), # does not intersect + ("within", box(0, 0, 10, 10), []), # intersects but is not within + ("within", box(11, 11, 12, 12), [5]), # intersects and is within + ("within", LineString([(0, 1), (1, 0)]), []), # intersects but not within + ("contains", box(0, 0, 1, 1), []), # intersects but does not contain + ("contains", box(0, 0, 1.001, 1.001), [1]), # intersects and contains + ("contains", box(0.5, 0.5, 1.5, 1.5), [1]), # intersects and contains + ("contains", box(-1, -1, 2, 2), [0, 1]), # intersects and contains multiple + ( + "contains", + LineString([(0, 1), (1, 0)]), + [], + ), # intersects but not contains + ("touches", box(-1, -1, 0, 0), [0]), # bbox intersects and touches + ( + "touches", + box(-0.5, -0.5, 1.5, 1.5), + [], + ), # bbox intersects but geom does not touch + ), + ) + def test_query(self, predicate, test_geom, expected): + """Tests the `query` method with valid inputs and valid predicates.""" + res = self.df.sindex.query(test_geom, predicate=predicate) + assert_array_equal(res, expected) + + def test_query_invalid_geometry(self): + """Tests the `query` method with invalid geometry. + """ + with pytest.raises(TypeError): + self.df.sindex.query("notavalidgeom") + + @pytest.mark.parametrize( + "test_geom, expected_value", + [ + (None, []), + (GeometryCollection(), []), + (Point(), []), + (MultiPolygon(), []), + (Polygon(), []), + ], + ) + def test_query_empty_geometry(self, test_geom, expected_value): + """Tests the `query` method with empty geometry. + """ + res = self.df.sindex.query(test_geom) + assert_array_equal(res, expected_value) + + def test_query_invalid_predicate(self): + """Tests the `query` method with invalid predicates. + """ + test_geom = box(-1, -1, -0.5, -0.5) + with pytest.raises(ValueError): + self.df.sindex.query(test_geom, predicate="test") + + @pytest.mark.parametrize( + "sort, expected", + ( + (True, [[0, 0, 0], [0, 1, 2]]), + # False could be anything, at least we'll know if it changes + (False, [[0, 0, 0], [0, 1, 2]]), + ), + ) + def test_query_sorting(self, sort, expected): + """Check that results from `query` don't depend on the + order of geometries. + """ + # these geometries come from a reported issue: + # https://github.com/geopandas/geopandas/issues/1337 + # there is no theoretical reason they were chosen + test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])]) + tree_polys = GeoSeries( + [ + Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), + Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), + Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]), + ] + ) + expected = [0, 1, 2] + + # pass through GeoSeries to have GeoPandas + # determine if it should use shapely or pygeos geometry objects + tree_df = geopandas.GeoDataFrame(geometry=tree_polys) + test_df = geopandas.GeoDataFrame(geometry=test_polys) + + test_geo = test_df.geometry.values.data[0] + res = tree_df.sindex.query(test_geo, sort=sort) + try: + assert_array_equal(res, expected) + except AssertionError as e: + if not compat.USE_PYGEOS and sort is False: + pytest.xfail( + "rtree results are known to be unordered, see " + "https://github.com/geopandas/geopandas/issues/1337\n" + "Expected:\n {}\n".format(expected) + + "Got:\n {}\n".format(res.tolist()) + ) + raise e + + # ------------------------- `query_bulk` tests -------------------------- # + @pytest.mark.parametrize( + "predicate, test_geom, expected", + ( + (None, [(-1, -1, -0.5, -0.5)], [[], []]), + (None, [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]), + (None, [(0, 0, 1, 1)], [[0, 0], [0, 1]]), + ("intersects", [(-1, -1, -0.5, -0.5)], [[], []]), + ("intersects", [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]), + ("intersects", [(0, 0, 1, 1)], [[0, 0], [0, 1]]), + # only second geom intersects + ("intersects", [(-1, -1, -0.5, -0.5), (-0.5, -0.5, 0.5, 0.5)], [[1], [0]]), + # both geoms intersect + ( + "intersects", + [(-1, -1, 1, 1), (-0.5, -0.5, 0.5, 0.5)], + [[0, 0, 1], [0, 1, 0]], + ), + ("within", [(0.25, 0.28, 0.75, 0.75)], [[], []]), # does not intersect + ("within", [(0, 0, 10, 10)], [[], []]), # intersects but is not within + ("within", [(11, 11, 12, 12)], [[0], [5]]), # intersects and is within + ("contains", [(0, 0, 1, 1)], [[], []]), # intersects but does not contain + ( + "contains", + [(0, 0, 1.001, 1.001)], + [[0], [1]], + ), # intersects 2 and contains 1 + ( + "contains", + [(0.5, 0.5, 1.001, 1.001)], + [[0], [1]], + ), # intersects 1 and contains 1 + ("contains", [(0.5, 0.5, 1.5, 1.5)], [[0], [1]]), # intersects and contains + ( + "contains", + [(-1, -1, 2, 2)], + [[0, 0], [0, 1]], + ), # intersects and contains multiple + ), + ) + def test_query_bulk(self, predicate, test_geom, expected): + """Tests the `query_bulk` method with valid + inputs and valid predicates. + """ + # pass through GeoSeries to have GeoPandas + # determine if it should use shapely or pygeos geometry objects + test_geom = geopandas.GeoSeries( + [box(*geom) for geom in test_geom], index=range(len(test_geom)) + ) + res = self.df.sindex.query_bulk(test_geom, predicate=predicate) + assert_array_equal(res, expected) + + @pytest.mark.parametrize( + "test_geoms, expected_value", + [ + # single empty geometry + ([GeometryCollection()], [[], []]), + # None should be skipped + ([GeometryCollection(), None], [[], []]), + ([None], [[], []]), + ([None, box(-0.5, -0.5, 0.5, 0.5), None], [[1], [0]]), + ], + ) + def test_query_bulk_empty_geometry(self, test_geoms, expected_value): + """Tests the `query_bulk` method with an empty geometry. + """ + # pass through GeoSeries to have GeoPandas + # determine if it should use shapely or pygeos geometry objects + # note: for this test, test_geoms (note plural) is a list already + test_geoms = geopandas.GeoSeries(test_geoms, index=range(len(test_geoms))) + res = self.df.sindex.query_bulk(test_geoms) + assert_array_equal(res, expected_value) + + def test_query_bulk_empty_input_array(self): + """Tests the `query_bulk` method with an empty input array. + """ + test_array = np.array([], dtype=object) + expected_value = [[], []] + res = self.df.sindex.query_bulk(test_array) + assert_array_equal(res, expected_value) + + def test_query_bulk_invalid_input_geometry(self): + """Tests the `query_bulk` method with invalid input for the `geometry` parameter. + """ + test_array = "notanarray" + with pytest.raises(TypeError): + self.df.sindex.query_bulk(test_array) + + def test_query_bulk_invalid_predicate(self): + """Tests the `query_bulk` method with invalid predicates. + """ + test_geom_bounds = (-1, -1, -0.5, -0.5) + test_predicate = "test" + + # pass through GeoSeries to have GeoPandas + # determine if it should use shapely or pygeos geometry objects + test_geom = geopandas.GeoSeries([box(*test_geom_bounds)], index=["0"]) + + with pytest.raises(ValueError): + self.df.sindex.query_bulk(test_geom.geometry, predicate=test_predicate) + + @pytest.mark.parametrize( + "predicate, test_geom, expected", + ( + (None, (-1, -1, -0.5, -0.5), [[], []]), + ("intersects", (-1, -1, -0.5, -0.5), [[], []]), + ("contains", (-1, -1, 1, 1), [[0], [0]]), + ), + ) + def test_query_bulk_input_type(self, predicate, test_geom, expected): + """Tests that query_bulk can accept a GeoSeries, GeometryArray or + numpy array. + """ + # pass through GeoSeries to have GeoPandas + # determine if it should use shapely or pygeos geometry objects + test_geom = geopandas.GeoSeries([box(*test_geom)], index=["0"]) + + # test GeoSeries + res = self.df.sindex.query_bulk(test_geom, predicate=predicate) + assert_array_equal(res, expected) + + # test GeometryArray + res = self.df.sindex.query_bulk(test_geom.geometry, predicate=predicate) + assert_array_equal(res, expected) + res = self.df.sindex.query_bulk(test_geom.geometry.values, predicate=predicate) + assert_array_equal(res, expected) + + # test numpy array + res = self.df.sindex.query_bulk( + test_geom.geometry.values.data, predicate=predicate + ) + assert_array_equal(res, expected) + res = self.df.sindex.query_bulk( + test_geom.geometry.values.data, predicate=predicate + ) + assert_array_equal(res, expected) + + @pytest.mark.parametrize( + "sort, expected", + ( + (True, [[0, 0, 0], [0, 1, 2]]), + # False could be anything, at least we'll know if it changes + (False, [[0, 0, 0], [0, 1, 2]]), + ), + ) + def test_query_bulk_sorting(self, sort, expected): + """Check that results from `query_bulk` don't depend + on the order of geometries. + """ + # these geometries come from a reported issue: + # https://github.com/geopandas/geopandas/issues/1337 + # there is no theoretical reason they were chosen + test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])]) + tree_polys = GeoSeries( + [ + Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), + Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), + Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]), + ] + ) + + # pass through GeoSeries to have GeoPandas + # determine if it should use shapely or pygeos geometry objects + tree_df = geopandas.GeoDataFrame(geometry=tree_polys) + test_df = geopandas.GeoDataFrame(geometry=test_polys) + + res = tree_df.sindex.query_bulk(test_df.geometry, sort=sort) + try: + assert_array_equal(res, expected) + except AssertionError as e: + if not compat.USE_PYGEOS and sort is False: + pytest.xfail( + "rtree results are known to be unordered, see " + "https://github.com/geopandas/geopandas/issues/1337\n" + "Expected:\n {}\n".format(expected) + + "Got:\n {}\n".format(res.tolist()) + ) + raise e + + # --------------------------- misc tests ---------------------------- # + + def test_empty_tree_geometries(self): + """Tests building sindex with interleaved empty geometries. + """ + geoms = [Point(0, 0), None, Point(), Point(1, 1), Point()] + df = geopandas.GeoDataFrame(geometry=geoms) + assert df.sindex.query(Point(1, 1))[0] == 3 + + def test_size(self): + """Tests the `size` property.""" + assert self.df.sindex.size == self.expected_size + + def test_len(self): + """Tests the `__len__` method of spatial indexes.""" + assert len(self.df.sindex) == self.expected_size + + def test_is_empty(self): + """Tests the `is_empty` property.""" + # create empty tree + cls_ = sindex.get_sindex_class() + empty = geopandas.GeoSeries(dtype=object) + tree = cls_(empty) + assert tree.is_empty + # create a non-empty tree + non_empty = geopandas.GeoSeries([Point(0, 0)]) + tree = cls_(non_empty) + assert not tree.is_empty + + @pytest.mark.parametrize( + "predicate, expected_shape", + [ + (None, (2, 396)), + ("intersects", (2, 172)), + ("within", (2, 172)), + ("contains", (2, 0)), + ("overlaps", (2, 0)), + ("crosses", (2, 0)), + ("touches", (2, 0)), + ], + ) + def test_integration_natural_earth(self, predicate, expected_shape): + """Tests output sizes for the naturalearth datasets.""" + world = read_file(datasets.get_path("naturalearth_lowres")) + capitals = read_file(datasets.get_path("naturalearth_cities")) + + res = world.sindex.query_bulk(capitals.geometry, predicate) + assert res.shape == expected_shape diff -Nru python-geopandas-0.7.0/geopandas/tests/test_testing.py python-geopandas-0.8.1/geopandas/tests/test_testing.py --- python-geopandas-0.7.0/geopandas/tests/test_testing.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/test_testing.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,8 +1,10 @@ import numpy as np from shapely.geometry import Point, Polygon +from pandas import Series from geopandas import GeoDataFrame, GeoSeries +from geopandas.array import from_shapely from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal import pytest @@ -20,12 +22,46 @@ ] ) + +s3 = Series( + [ + Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]), + Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]), + ] +) + +a = from_shapely( + [ + Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]), + Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]), + ] +) + +s4 = Series(a) + df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1}) df2 = GeoDataFrame({"col1": [1, 2], "geometry": s2}) +s4 = s1.copy() +s4.crs = 4326 +s5 = s2.copy() +s5.crs = 27700 +df4 = GeoDataFrame( + {"col1": [1, 2], "geometry": s1.copy(), "geom2": s4.copy(), "geom3": s5.copy()}, + crs=3857, +) +df5 = GeoDataFrame( + {"col1": [1, 2], "geometry": s1.copy(), "geom3": s5.copy(), "geom2": s4.copy()}, + crs=3857, +) + +@pytest.mark.filterwarnings("ignore::UserWarning") def test_geoseries(): assert_geoseries_equal(s1, s2) + assert_geoseries_equal(s1, s3, check_series_type=False, check_dtype=False) + assert_geoseries_equal(s3, s2, check_series_type=False, check_dtype=False) + assert_geoseries_equal(s1, s4, check_series_type=False) with pytest.raises(AssertionError): assert_geoseries_equal(s1, s2, check_less_precise=True) @@ -47,6 +83,11 @@ with pytest.raises(AssertionError): assert_geodataframe_equal(df1, df3) + assert_geodataframe_equal(df5, df4, check_like=True) + df5.geom2.crs = 3857 + with pytest.raises(AssertionError): + assert_geodataframe_equal(df5, df4, check_like=True) + def test_equal_nans(): s = GeoSeries([Point(0, 0), np.nan]) @@ -61,7 +102,7 @@ def test_ignore_crs_mismatch(): - df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs="EPSG:4326") + df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1.copy()}, crs="EPSG:4326") df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs="EPSG:31370") with pytest.raises(AssertionError): diff -Nru python-geopandas-0.7.0/geopandas/tests/util.py python-geopandas-0.8.1/geopandas/tests/util.py --- python-geopandas-0.7.0/geopandas/tests/util.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tests/util.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,5 +1,4 @@ import os.path -import sqlite3 from pandas import Series @@ -15,15 +14,6 @@ PACKAGE_DIR = os.path.dirname(os.path.dirname(HERE)) -try: - import psycopg2 - from psycopg2 import OperationalError -except ImportError: - - class OperationalError(Exception): - pass - - # mock not used here, but the import from here is used in other modules try: import unittest.mock as mock # noqa @@ -47,26 +37,6 @@ assert Series(df.geometry.type).dropna().eq("MultiPolygon").all() -def connect(dbname, user=None, password=None, host=None, port=None): - """ - Initiaties a connection to a postGIS database that must already exist. - See create_postgis for more information. - """ - - user = user or os.environ.get("PGUSER") - password = password or os.environ.get("PGPASSWORD") - host = host or os.environ.get("PGHOST") - port = port or os.environ.get("PGPORT") - try: - con = psycopg2.connect( - dbname=dbname, user=user, password=password, host=host, port=port - ) - except (NameError, OperationalError): - return None - - return con - - def get_srid(df): """Return srid from `df.crs`.""" if df.crs is not None: @@ -74,32 +44,6 @@ return 0 -def connect_spatialite(): - """ - Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized. - - `The sqlite3 module must be built with loadable extension support - `_ and - `SpatiaLite `_ - must be available on the system as a SQLite module. - Packages available on Anaconda meet requirements. - - Exceptions - ---------- - ``AttributeError`` on missing support for loadable SQLite extensions - ``sqlite3.OperationalError`` on missing SpatiaLite - """ - try: - with sqlite3.connect(":memory:") as con: - con.enable_load_extension(True) - con.load_extension("mod_spatialite") - con.execute("SELECT InitSpatialMetaData(TRUE)") - except Exception: - con.close() - raise - return con - - def create_spatialite(con, df): """ Return a SpatiaLite connection containing the nybb table. @@ -143,10 +87,9 @@ for row in df.itertuples(index=False) ), ) - return con -def create_postgis(df, srid=None, geom_col="geom"): +def create_postgis(con, df, srid=None, geom_col="geom"): """ Create a nybb table in the test_geopandas PostGIS database. Returns a boolean indicating whether the database table was successfully @@ -158,10 +101,6 @@ # 'test_geopandas' and enable postgis in it: # > createdb test_geopandas # > psql -c "CREATE EXTENSION postgis" -d test_geopandas - con = connect("test_geopandas") - if con is None: - return False - if srid is not None: geom_schema = "geometry(MULTIPOLYGON, {})".format(srid) geom_insert = "ST_SetSRID(ST_GeometryFromText(%s), {})".format(srid) @@ -201,6 +140,3 @@ finally: cursor.close() con.commit() - con.close() - - return True diff -Nru python-geopandas-0.7.0/geopandas/tools/clip.py python-geopandas-0.8.1/geopandas/tools/clip.py --- python-geopandas-0.7.0/geopandas/tools/clip.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tools/clip.py 2020-07-15 17:54:36.000000000 +0000 @@ -13,6 +13,7 @@ from shapely.geometry import Polygon, MultiPolygon from geopandas import GeoDataFrame, GeoSeries +from geopandas.array import _check_crs, _crs_mismatch_warn def _clip_points(gdf, poly): @@ -36,12 +37,7 @@ The returned GeoDataFrame is a subset of gdf that intersects with poly. """ - spatial_index = gdf.sindex - bbox = poly.bounds - sidx = list(spatial_index.intersection(bbox)) - gdf_sub = gdf.iloc[sidx] - - return gdf_sub[gdf_sub.geometry.intersects(poly)] + return gdf.iloc[gdf.sindex.query(poly, predicate="intersects")] def _clip_line_poly(gdf, poly): @@ -65,26 +61,17 @@ The returned GeoDataFrame is a clipped subset of gdf that intersects with poly. """ - spatial_index = gdf.sindex - - # Create a box for the initial intersection - bbox = poly.bounds - # Get a list of id's for each object that overlaps the bounding box and - # subset the data to just those lines - sidx = list(spatial_index.intersection(bbox)) - gdf_sub = gdf.iloc[sidx] + gdf_sub = gdf.iloc[gdf.sindex.query(poly, predicate="intersects")] # Clip the data with the polygon if isinstance(gdf_sub, GeoDataFrame): clipped = gdf_sub.copy() clipped["geometry"] = gdf_sub.intersection(poly) - - # Return the clipped layer with no null geometry values or empty geometries - return clipped[~clipped.geometry.is_empty & clipped.geometry.notnull()] else: # GeoSeries clipped = gdf_sub.intersection(poly) - return clipped[~clipped.is_empty & clipped.notnull()] + + return clipped def clip(gdf, mask, keep_geom_type=False): @@ -144,6 +131,10 @@ ) if isinstance(mask, (GeoDataFrame, GeoSeries)): + if not _check_crs(gdf, mask): + _crs_mismatch_warn(gdf, mask, stacklevel=3) + + if isinstance(mask, (GeoDataFrame, GeoSeries)): box_mask = mask.total_bounds else: box_mask = mask.bounds @@ -152,7 +143,7 @@ ((box_mask[0] <= box_gdf[2]) and (box_gdf[0] <= box_mask[2])) and ((box_mask[1] <= box_gdf[3]) and (box_gdf[1] <= box_mask[3])) ): - return GeoDataFrame(columns=gdf.columns, crs=gdf.crs) + return gdf.iloc[:0] if isinstance(mask, (GeoDataFrame, GeoSeries)): poly = mask.geometry.unary_union @@ -242,7 +233,11 @@ elif orig_type in lines: concat = concat.loc[concat.geom_type.isin(lines)] - # preserve the original order of the input + # Return empty GeoDataFrame or GeoSeries if no shapes remain + if len(concat) == 0: + return gdf.iloc[:0] + + # Preserve the original order of the input if isinstance(concat, GeoDataFrame): concat["_order"] = order return concat.sort_values(by="_order").drop(columns="_order") diff -Nru python-geopandas-0.7.0/geopandas/tools/overlay.py python-geopandas-0.8.1/geopandas/tools/overlay.py --- python-geopandas-0.7.0/geopandas/tools/overlay.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tools/overlay.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,9 +1,11 @@ +import warnings from functools import reduce import numpy as np import pandas as pd from geopandas import GeoDataFrame, GeoSeries +from geopandas.array import _check_crs, _crs_mismatch_warn def _ensure_geometry_column(df): @@ -25,27 +27,20 @@ Overlay Intersection operation used in overlay function """ # Spatial Index to create intersections - spatial_index = df2.sindex - bbox = df1.geometry.apply(lambda x: x.bounds) - sidx = bbox.apply(lambda x: list(spatial_index.intersection(x))) + idx1, idx2 = df2.sindex.query_bulk(df1.geometry, predicate="intersects", sort=True) # Create pairs of geometries in both dataframes to be intersected - nei = [] - for i, j in enumerate(sidx): - for k in j: - nei.append([i, k]) - if nei != []: - pairs = pd.DataFrame(nei, columns=["__idx1", "__idx2"]) - left = df1.geometry.take(pairs["__idx1"].values) + if idx1.size > 0 and idx2.size > 0: + left = df1.geometry.take(idx1) left.reset_index(drop=True, inplace=True) - right = df2.geometry.take(pairs["__idx2"].values) + right = df2.geometry.take(idx2) right.reset_index(drop=True, inplace=True) intersections = left.intersection(right) poly_ix = intersections.type.isin(["Polygon", "MultiPolygon"]) intersections.loc[poly_ix] = intersections[poly_ix].buffer(0) # only keep actual intersecting geometries - pairs_intersect = pairs[~intersections.is_empty] - geom_intersect = intersections[~intersections.is_empty] + pairs_intersect = pd.DataFrame({"__idx1": idx1, "__idx2": idx2}) + geom_intersect = intersections # merge data for intersecting geometries df1 = df1.reset_index(drop=True) @@ -59,7 +54,7 @@ df2.drop(df2._geometry_column_name, axis=1), left_on="__idx2", right_index=True, - suffixes=["_1", "_2"], + suffixes=("_1", "_2"), ) return GeoDataFrame(dfinter, geometry=geom_intersect, crs=df1.crs) @@ -75,10 +70,14 @@ """ Overlay Difference operation used in overlay function """ - # Spatial Index to create intersections - spatial_index = df2.sindex - bbox = df1.geometry.apply(lambda x: x.bounds) - sidx = bbox.apply(lambda x: list(spatial_index.intersection(x))) + # spatial index query to find intersections + idx1, idx2 = df2.sindex.query_bulk(df1.geometry, predicate="intersects", sort=True) + idx1_unique, idx1_unique_indices = np.unique(idx1, return_index=True) + idx2_split = np.split(idx2, idx1_unique_indices[1:]) + sidx = [ + idx2_split.pop(0) if idx in idx1_unique else [] + for idx in range(df1.geometry.size) + ] # Create differences new_g = [] for geom, neighbours in zip(df1.geometry, sidx): @@ -86,7 +85,7 @@ lambda x, y: x.difference(y), [geom] + list(df2.geometry.iloc[neighbours]) ) new_g.append(new) - differences = GeoSeries(new_g, index=df1.index) + differences = GeoSeries(new_g, index=df1.index, crs=df1.crs) poly_ix = differences.type.isin(["Polygon", "MultiPolygon"]) differences.loc[poly_ix] = differences[poly_ix].buffer(0) geom_diff = differences[~differences.is_empty].copy() @@ -110,7 +109,7 @@ _ensure_geometry_column(dfdiff2) # combine both 'difference' dataframes dfsym = dfdiff1.merge( - dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=["_1", "_2"] + dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=("_1", "_2") ) geometry = dfsym.geometry_1.copy() geometry.name = "geometry" @@ -139,16 +138,17 @@ def overlay(df1, df2, how="intersection", make_valid=True, keep_geom_type=True): - """Perform spatial overlay between two polygons. + """Perform spatial overlay between two GeoDataFrames. - Currently only supports data GeoDataFrames with polygons. - Implements several methods that are all effectively subsets of - the union. + Currently only supports data GeoDataFrames with uniform geometry types, + i.e. containing only (Multi)Polygons, or only (Multi)Points, or a + combination of (Multi)LineString and LinearRing shapes. + Implements several methods that are all effectively subsets of the union. Parameters ---------- - df1 : GeoDataFrame with MultiPolygon or Polygon geometry column - df2 : GeoDataFrame with MultiPolygon or Polygon geometry column + df1 : GeoDataFrame + df2 : GeoDataFrame how : string Method of spatial overlay: 'intersection', 'union', 'identity', 'symmetric_difference' or 'difference'. @@ -174,13 +174,17 @@ # Error Messages if how not in allowed_hows: raise ValueError( - "`how` was '{0}' but is expected to be in %s".format(how, allowed_hows) + "`how` was '{0}' but is expected to be in {1}".format(how, allowed_hows) ) if isinstance(df1, GeoSeries) or isinstance(df2, GeoSeries): raise NotImplementedError( "overlay currently only implemented for " "GeoDataFrames" ) + + if not _check_crs(df1, df2): + _crs_mismatch_warn(df1, df2, stacklevel=3) + polys = ["Polygon", "MultiPolygon"] lines = ["LineString", "MultiLineString", "LinearRing"] points = ["Point", "MultiPoint"] @@ -201,17 +205,19 @@ if df2.geom_type.isin(polys).all(): df2[df2._geometry_column_name] = df2.geometry.buffer(0) - if how == "difference": - return _overlay_difference(df1, df2) - elif how == "intersection": - result = _overlay_intersection(df1, df2) - elif how == "symmetric_difference": - result = _overlay_symmetric_diff(df1, df2) - elif how == "union": - result = _overlay_union(df1, df2) - elif how == "identity": - dfunion = _overlay_union(df1, df2) - result = dfunion[dfunion["__idx1"].notnull()].copy() + with warnings.catch_warnings(): # CRS checked above, supress array-level warning + warnings.filterwarnings("ignore", message="CRS mismatch between the CRS") + if how == "difference": + return _overlay_difference(df1, df2) + elif how == "intersection": + result = _overlay_intersection(df1, df2) + elif how == "symmetric_difference": + result = _overlay_symmetric_diff(df1, df2) + elif how == "union": + result = _overlay_union(df1, df2) + elif how == "identity": + dfunion = _overlay_union(df1, df2) + result = dfunion[dfunion["__idx1"].notnull()].copy() if keep_geom_type: type = df1.geom_type.iloc[0] diff -Nru python-geopandas-0.7.0/geopandas/tools/_show_versions.py python-geopandas-0.8.1/geopandas/tools/_show_versions.py --- python-geopandas-0.7.0/geopandas/tools/_show_versions.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tools/_show_versions.py 2020-07-15 17:54:36.000000000 +0000 @@ -94,9 +94,10 @@ "pyproj", "matplotlib", "mapclassify", - "pysal", "geopy", "psycopg2", + "geoalchemy2", + "pyarrow", ] def get_version(module): @@ -122,9 +123,12 @@ """ Print system information and installed module versions. - Example - ------- - > python -c "import geopandas; geopandas.show_versions()" + Examples + -------- + + :: + + $ python -c "import geopandas; geopandas.show_versions()" """ sys_info = _get_sys_info() deps_info = _get_deps_info() diff -Nru python-geopandas-0.7.0/geopandas/tools/sjoin.py python-geopandas-0.8.1/geopandas/tools/sjoin.py --- python-geopandas-0.7.0/geopandas/tools/sjoin.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tools/sjoin.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,11 +1,9 @@ -from warnings import warn +import warnings -import numpy as np import pandas as pd -from shapely import prepared - from geopandas import GeoDataFrame +from geopandas.array import _check_crs, _crs_mismatch_warn def sjoin( @@ -23,7 +21,7 @@ * 'right': use keys from right_df; retain only right_df geometry column * 'inner': use intersection of keys from both dfs; retain only left_df geometry column - op : string, default 'intersection' + op : string, default 'intersects' Binary predicate, one of {'intersects', 'contains', 'within'}. See http://shapely.readthedocs.io/en/latest/manual.html#binary-predicates. lsuffix : string, default 'left' @@ -54,13 +52,8 @@ '`op` was "%s" but is expected to be in %s' % (op, allowed_ops) ) - if left_df.crs != right_df.crs: - warn( - ( - "CRS of frames being joined does not match!" - "(%s != %s)" % (left_df.crs, right_df.crs) - ) - ) + if not _check_crs(left_df, right_df): + _crs_mismatch_warn(left_df, right_df, stacklevel=3) index_left = "index_%s" % lsuffix index_right = "index_%s" % rsuffix @@ -74,18 +67,41 @@ " joined".format(index_left, index_right) ) - # Attempt to re-use spatial indexes, otherwise generate the spatial index - # for the longer dataframe - if right_df._sindex_generated or ( - not left_df._sindex_generated and right_df.shape[0] > left_df.shape[0] - ): - tree_idx = right_df.sindex - tree_idx_right = True + # query index + with warnings.catch_warnings(): + # We don't need to show our own warning here + # TODO remove this once the deprecation has been enforced + warnings.filterwarnings( + "ignore", "Generated spatial index is empty", FutureWarning + ) + if op == "within": + # within is implemented as the inverse of contains + # contains is a faster predicate + # see discussion at https://github.com/geopandas/geopandas/pull/1421 + predicate = "contains" + sindex = left_df.sindex + input_geoms = right_df.geometry + else: + # all other predicates are symmetric + # keep them the same + predicate = op + sindex = right_df.sindex + input_geoms = left_df.geometry + + if sindex: + l_idx, r_idx = sindex.query_bulk(input_geoms, predicate=predicate, sort=False) + result = pd.DataFrame({"_key_left": l_idx, "_key_right": r_idx}) else: - tree_idx = left_df.sindex - tree_idx_right = False + # when sindex is empty / has no valid geometries + result = pd.DataFrame(columns=["_key_left", "_key_right"], dtype=float) + if op == "within": + # within is implemented as the inverse of contains + # flip back the results + result = result.rename( + columns={"_key_left": "_key_right", "_key_right": "_key_left"} + ) - # the rtree spatial index only allows limited (numeric) index types, but an + # the spatial index only allows limited (numeric) index types, but an # index in geopandas may be any arbitrary dtype. so reset both indices now # and store references to the original indices, to be reaffixed later. # GH 352 @@ -95,7 +111,8 @@ left_df.index = left_df.index.rename(index_left) except TypeError: index_left = [ - "index_%s" % lsuffix + str(l) for l, ix in enumerate(left_df.index.names) + "index_%s" % lsuffix + str(pos) + for pos, ix in enumerate(left_df.index.names) ] left_index_name = left_df.index.names left_df.index = left_df.index.rename(index_left) @@ -107,85 +124,14 @@ right_df.index = right_df.index.rename(index_right) except TypeError: index_right = [ - "index_%s" % rsuffix + str(l) for l, ix in enumerate(right_df.index.names) + "index_%s" % rsuffix + str(pos) + for pos, ix in enumerate(right_df.index.names) ] right_index_name = right_df.index.names right_df.index = right_df.index.rename(index_right) right_df = right_df.reset_index() - if op == "within": - # within implemented as the inverse of contains; swap names - left_df, right_df = right_df, left_df - tree_idx_right = not tree_idx_right - - r_idx = np.empty((0, 0)) - l_idx = np.empty((0, 0)) - # get rtree spatial index - if tree_idx_right: - idxmatch = left_df.geometry.apply(lambda x: x.bounds).apply( - lambda x: list(tree_idx.intersection(x)) if not x == () else [] - ) - idxmatch = idxmatch[idxmatch.apply(len) > 0] - # indexes of overlapping boundaries - if idxmatch.shape[0] > 0: - r_idx = np.concatenate(idxmatch.values) - l_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()]) - else: - # tree_idx_df == 'left' - idxmatch = right_df.geometry.apply(lambda x: x.bounds).apply( - lambda x: list(tree_idx.intersection(x)) if not x == () else [] - ) - idxmatch = idxmatch[idxmatch.apply(len) > 0] - if idxmatch.shape[0] > 0: - # indexes of overlapping boundaries - l_idx = np.concatenate(idxmatch.values) - r_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()]) - - if len(r_idx) > 0 and len(l_idx) > 0: - # Vectorize predicate operations - def find_intersects(a1, a2): - return a1.intersects(a2) - - def find_contains(a1, a2): - return a1.contains(a2) - - predicate_d = { - "intersects": find_intersects, - "contains": find_contains, - "within": find_contains, - } - - check_predicates = np.vectorize(predicate_d[op]) - - result = pd.DataFrame( - np.column_stack( - [ - l_idx, - r_idx, - check_predicates( - left_df.geometry.apply(lambda x: prepared.prep(x))[l_idx], - right_df[right_df.geometry.name][r_idx], - ), - ] - ) - ) - - result.columns = ["_key_left", "_key_right", "match_bool"] - result = pd.DataFrame(result[result["match_bool"] == 1]).drop( - "match_bool", axis=1 - ) - - else: - # when output from the join has no overlapping geometries - result = pd.DataFrame(columns=["_key_left", "_key_right"], dtype=float) - - if op == "within": - # within implemented as the inverse of contains; swap names - left_df, right_df = right_df, left_df - result = result.rename( - columns={"_key_left": "_key_right", "_key_right": "_key_left"} - ) - + # perform join on the dataframes if how == "inner": result = result.set_index("_key_left") joined = ( diff -Nru python-geopandas-0.7.0/geopandas/tools/tests/test_clip.py python-geopandas-0.8.1/geopandas/tools/tests/test_clip.py --- python-geopandas-0.7.0/geopandas/tools/tests/test_clip.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tools/tests/test_clip.py 2020-07-15 17:54:36.000000000 +0000 @@ -9,11 +9,16 @@ import geopandas from geopandas import GeoDataFrame, GeoSeries, clip -from geopandas.testing import assert_geodataframe_equal +from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal import pytest +pytestmark = pytest.mark.skipif( + not geopandas.sindex.has_sindex(), reason="clip requires spatial index" +) + + @pytest.fixture def point_gdf(): """Create a point GeoDataFrame.""" @@ -23,6 +28,24 @@ @pytest.fixture +def pointsoutside_nooverlap_gdf(): + """Create a point GeoDataFrame. Its points are all outside the single + rectangle, and its bounds are outside the single rectangle's.""" + pts = np.array([[5, 15], [15, 15], [15, 20]]) + gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:4326") + return gdf + + +@pytest.fixture +def pointsoutside_overlap_gdf(): + """Create a point GeoDataFrame. Its points are all outside the single + rectangle, and its bounds are overlapping the single rectangle's.""" + pts = np.array([[5, 15], [15, 15], [15, 5]]) + gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:4326") + return gdf + + +@pytest.fixture def single_rectangle_gdf(): """Create a single rectangle for clipping.""" poly_inters = Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)]) @@ -159,7 +182,7 @@ def test_non_overlapping_geoms(): - """Test that a bounding box returns error if the extents don't overlap""" + """Test that a bounding box returns empty if the extents don't overlap""" unit_box = Polygon([(0, 0), (0, 1), (1, 1), (1, 0), (0, 0)]) unit_gdf = GeoDataFrame([1], geometry=[unit_box], crs="EPSG:4326") non_overlapping_gdf = unit_gdf.copy() @@ -167,9 +190,9 @@ lambda x: shapely.affinity.translate(x, xoff=20) ) out = clip(unit_gdf, non_overlapping_gdf) - assert_geodataframe_equal( - out, GeoDataFrame(columns=unit_gdf.columns, crs=unit_gdf.crs) - ) + assert_geodataframe_equal(out, unit_gdf.iloc[:0]) + out2 = clip(unit_gdf.geometry, non_overlapping_gdf) + assert_geoseries_equal(out2, GeoSeries(crs=unit_gdf.crs)) def test_clip_points(point_gdf, single_rectangle_gdf): @@ -313,6 +336,18 @@ assert "LineString" == clipped.geom_type[1] +def test_clip_no_box_overlap(pointsoutside_nooverlap_gdf, single_rectangle_gdf): + """Test clip when intersection is empty and boxes do not overlap.""" + clipped = clip(pointsoutside_nooverlap_gdf, single_rectangle_gdf) + assert len(clipped) == 0 + + +def test_clip_box_overlap(pointsoutside_overlap_gdf, single_rectangle_gdf): + """Test clip when intersection is emtpy and boxes do overlap.""" + clipped = clip(pointsoutside_overlap_gdf, single_rectangle_gdf) + assert len(clipped) == 0 + + def test_warning_extra_geoms_mixed(single_rectangle_gdf, mixed_gdf): """Test the correct warnings are raised if keep_geom_type is called on a mixed GDF""" @@ -325,3 +360,8 @@ called on a GDF with GeometryCollection""" with pytest.warns(UserWarning): clip(geomcol_gdf, single_rectangle_gdf, keep_geom_type=True) + + +def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf): + with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): + clip(point_gdf, single_rectangle_gdf.to_crs(3857)) diff -Nru python-geopandas-0.7.0/geopandas/tools/tests/test_sjoin.py python-geopandas-0.8.1/geopandas/tools/tests/test_sjoin.py --- python-geopandas-0.7.0/geopandas/tools/tests/test_sjoin.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/tools/tests/test_sjoin.py 2020-07-15 17:54:36.000000000 +0000 @@ -1,15 +1,22 @@ +from distutils.version import LooseVersion + import numpy as np import pandas as pd from shapely.geometry import Point, Polygon, GeometryCollection import geopandas -from geopandas import GeoDataFrame, GeoSeries, base, read_file, sjoin +from geopandas import GeoDataFrame, GeoSeries, read_file, sindex, sjoin from pandas.testing import assert_frame_equal import pytest +pytestmark = pytest.mark.skipif( + not sindex.has_sindex(), reason="sjoin requires spatial index" +) + + @pytest.fixture() def dfs(request): polys1 = GeoSeries( @@ -83,13 +90,12 @@ return [request.param, df1, df2, expected] -@pytest.mark.skipif(not base.HAS_SINDEX, reason="Rtree absent, skipping") class TestSpatialJoin: @pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True) def test_crs_mismatch(self, dfs): index, df1, df2, expected = dfs df1.crs = "epsg:4326" - with pytest.warns(UserWarning): + with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): sjoin(df1, df2) @pytest.mark.parametrize( @@ -186,7 +192,7 @@ assert_frame_equal(res, exp) def test_empty_join(self): - # Check empty joins + # Check joins resulting in empty gdfs. polygons = geopandas.GeoDataFrame( { "col2": [1, 2], @@ -204,6 +210,32 @@ empty = sjoin(not_in, polygons, how="inner", op="intersects") assert empty.empty + @pytest.mark.parametrize("op", ["intersects", "contains", "within"]) + @pytest.mark.parametrize( + "empty", + [ + GeoDataFrame(geometry=[GeometryCollection(), GeometryCollection()]), + GeoDataFrame(geometry=GeoSeries()), + ], + ) + def test_join_with_empty(self, op, empty): + # Check joins with empty geometry columns/dataframes. + polygons = geopandas.GeoDataFrame( + { + "col2": [1, 2], + "geometry": [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), + Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]), + ], + } + ) + result = sjoin(empty, polygons, how="left", op=op) + assert result.index_right.isnull().all() + result = sjoin(empty, polygons, how="right", op=op) + assert result.index_left.isnull().all() + result = sjoin(empty, polygons, how="inner", op=op) + assert result.empty + @pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True) def test_sjoin_invalid_args(self, dfs): index, df1, df2, expected = dfs @@ -262,10 +294,13 @@ ) exp.index.names = df2.index.names + # GH 1364 fix of behaviour was done in pandas 1.1.0 + if op == "within" and str(pd.__version__) >= LooseVersion("1.1.0"): + exp = exp.sort_index() + assert_frame_equal(res, exp, check_index_type=False) -@pytest.mark.skipif(not base.HAS_SINDEX, reason="Rtree absent, skipping") class TestSpatialJoinNYBB: def setup_method(self): nybb_filename = geopandas.datasets.get_path("nybb") @@ -383,7 +418,7 @@ axis=1, ) - expected_inner = GeoDataFrame(expected_inner_df, crs="epsg:4326") + expected_inner = GeoDataFrame(expected_inner_df) expected_right_df = pd.concat( [ @@ -400,9 +435,7 @@ axis=1, ) - expected_right = GeoDataFrame(expected_right_df, crs="epsg:4326").set_index( - "index_right" - ) + expected_right = GeoDataFrame(expected_right_df).set_index("index_right") expected_left_df = pd.concat( [ @@ -413,7 +446,7 @@ axis=1, ) - expected_left = GeoDataFrame(expected_left_df, crs="epsg:4326") + expected_left = GeoDataFrame(expected_left_df) assert expected_inner.equals(df_inner) assert expected_right.equals(df_right) @@ -432,8 +465,16 @@ df2 = sjoin(self.pointdf, self.polydf.append(empty), how="left") assert df2.shape == (21, 8) + @pytest.mark.parametrize("op", ["intersects", "within", "contains"]) + def test_sjoin_no_valid_geoms(self, op): + """Tests a completely empty GeoDataFrame.""" + empty = GeoDataFrame(geometry=[], crs=self.pointdf.crs) + assert sjoin(self.pointdf, empty, how="inner", op=op).empty + assert sjoin(self.pointdf, empty, how="right", op=op).empty + assert sjoin(empty, self.pointdf, how="inner", op=op).empty + assert sjoin(empty, self.pointdf, how="left", op=op).empty + -@pytest.mark.skipif(not base.HAS_SINDEX, reason="Rtree absent, skipping") class TestSpatialJoinNaturalEarth: def setup_method(self): world_path = geopandas.datasets.get_path("naturalearth_lowres") diff -Nru python-geopandas-0.7.0/geopandas/_vectorized.py python-geopandas-0.8.1/geopandas/_vectorized.py --- python-geopandas-0.7.0/geopandas/_vectorized.py 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/_vectorized.py 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,891 @@ +""" +Compatibility shim for the vectorized geometry operations. + +Uses PyGEOS if available/set, otherwise loops through Shapely geometries. + +""" +import warnings + +import numpy as np + +import shapely.geometry +import shapely.geos +import shapely.wkb +import shapely.wkt + +from shapely.geometry.base import BaseGeometry + +from . import _compat as compat + +try: + import pygeos +except ImportError: + geos = None + + +_names = { + "NAG": None, + "POINT": "Point", + "LINESTRING": "LineString", + "LINEARRING": "LinearRing", + "POLYGON": "Polygon", + "MULTIPOINT": "MultiPoint", + "MULTILINESTRING": "MultiLineString", + "MULTIPOLYGON": "MultiPolygon", + "GEOMETRYCOLLECTION": "GeometryCollection", +} + +if compat.USE_PYGEOS: + type_mapping = {p.value: _names[p.name] for p in pygeos.GeometryType} + geometry_type_ids = list(type_mapping.keys()) + geometry_type_values = np.array(list(type_mapping.values()), dtype=object) +else: + type_mapping, geometry_type_ids, geometry_type_values = None, None, None + + +def _isna(value): + """ + Check if scalar value is NA-like (None or np.nan). + + Custom version that only works for scalars (returning True or False), + as `pd.isna` also works for array-like input returning a boolean array. + """ + if value is None: + return True + elif isinstance(value, float) and np.isnan(value): + return True + else: + return False + + +def _pygeos_to_shapely(geom): + if geom is None: + return None + + if compat.PYGEOS_SHAPELY_COMPAT: + geom = shapely.geos.lgeos.GEOSGeom_clone(geom._ptr) + return shapely.geometry.base.geom_factory(geom) + + # fallback going through WKB + if pygeos.is_empty(geom) and pygeos.get_type_id(geom) == 0: + # empty point does not roundtrip through WKB + return shapely.wkt.loads("POINT EMPTY") + else: + return shapely.wkb.loads(pygeos.to_wkb(geom)) + + +def _shapely_to_pygeos(geom): + if geom is None: + return None + + if compat.PYGEOS_SHAPELY_COMPAT: + return pygeos.from_shapely(geom) + + # fallback going through WKB + if geom.is_empty and geom.geom_type == "Point": + # empty point does not roundtrip through WKB + return pygeos.from_wkt("POINT EMPTY") + else: + return pygeos.from_wkb(geom.wkb) + + +def from_shapely(data): + """ + Convert a list or array of shapely objects to an object-dtype numpy + array of validated geometry elements. + + """ + # First try a fast path for pygeos if possible, but do this in a try-except + # block because pygeos.from_shapely only handles Shapely objects, while + # the rest of this function is more forgiving (also __geo_interface__). + if compat.USE_PYGEOS and compat.PYGEOS_SHAPELY_COMPAT: + if not isinstance(data, np.ndarray): + arr = np.empty(len(data), dtype=object) + arr[:] = data + else: + arr = data + try: + return pygeos.from_shapely(arr) + except TypeError: + pass + + out = [] + + for geom in data: + if compat.USE_PYGEOS and isinstance(geom, pygeos.Geometry): + out.append(geom) + elif isinstance(geom, BaseGeometry): + if compat.USE_PYGEOS: + out.append(_shapely_to_pygeos(geom)) + else: + out.append(geom) + elif hasattr(geom, "__geo_interface__"): + geom = shapely.geometry.asShape(geom) + # asShape returns GeometryProxy -> trigger actual materialization + # with one of its methods + geom.wkb + if compat.USE_PYGEOS: + out.append(_shapely_to_pygeos(geom)) + else: + out.append(geom) + elif _isna(geom): + out.append(None) + else: + raise TypeError("Input must be valid geometry objects: {0}".format(geom)) + + if compat.USE_PYGEOS: + return np.array(out, dtype=object) + else: + # numpy can expand geometry collections into 2D arrays, use this + # two-step construction to avoid this + aout = np.empty(len(data), dtype=object) + aout[:] = out + return aout + + +def to_shapely(data): + if compat.USE_PYGEOS: + out = np.empty(len(data), dtype=object) + out[:] = [_pygeos_to_shapely(geom) for geom in data] + return out + else: + return data + + +def from_wkb(data): + """ + Convert a list or array of WKB objects to a np.ndarray[geoms]. + """ + if compat.USE_PYGEOS: + return pygeos.from_wkb(data) + + import shapely.wkb + + out = [] + + for geom in data: + if geom is not None and len(geom): + geom = shapely.wkb.loads(geom) + else: + geom = None + out.append(geom) + + aout = np.empty(len(data), dtype=object) + aout[:] = out + return aout + + +def to_wkb(data, hex=False): + if compat.USE_PYGEOS: + return pygeos.to_wkb(data, hex=hex) + else: + if hex: + out = [geom.wkb_hex if geom is not None else None for geom in data] + else: + out = [geom.wkb if geom is not None else None for geom in data] + return np.array(out, dtype=object) + + +def from_wkt(data): + """ + Convert a list or array of WKT objects to a np.ndarray[geoms]. + """ + if compat.USE_PYGEOS: + return pygeos.from_wkt(data) + + import shapely.wkt + + out = [] + + for geom in data: + if geom is not None and len(geom): + if isinstance(geom, bytes): + geom = geom.decode("utf-8") + geom = shapely.wkt.loads(geom) + else: + geom = None + out.append(geom) + + aout = np.empty(len(data), dtype=object) + aout[:] = out + return aout + + +def to_wkt(data, **kwargs): + if compat.USE_PYGEOS: + return pygeos.to_wkt(data, **kwargs) + else: + out = [geom.wkt if geom is not None else None for geom in data] + return np.array(out, dtype=object) + + +def _points_from_xy(x, y, z=None): + # helper method for shapely-based function + if not len(x) == len(y): + raise ValueError("x and y arrays must be equal length.") + if z is not None: + if not len(z) == len(x): + raise ValueError("z array must be same length as x and y.") + geom = [shapely.geometry.Point(i, j, k) for i, j, k in zip(x, y, z)] + else: + geom = [shapely.geometry.Point(i, j) for i, j in zip(x, y)] + return geom + + +def points_from_xy(x, y, z=None): + + x = np.asarray(x, dtype="float64") + y = np.asarray(y, dtype="float64") + if z is not None: + z = np.asarray(z, dtype="float64") + + if compat.USE_PYGEOS: + return pygeos.points(x, y, z) + else: + out = _points_from_xy(x, y, z) + aout = np.empty(len(x), dtype=object) + aout[:] = out + return aout + + +# ----------------------------------------------------------------------------- +# Helper methods for the vectorized operations +# ----------------------------------------------------------------------------- + + +def _binary_method(op, left, right, **kwargs): + # type: (str, np.array[geoms], [np.array[geoms]/BaseGeometry]) -> array-like + if isinstance(right, BaseGeometry): + right = from_shapely([right])[0] + return getattr(pygeos, op)(left, right, **kwargs) + + +def _binary_geo(op, left, right): + # type: (str, np.array[geoms], [np.array[geoms]/BaseGeometry]) -> np.array[geoms] + """ Apply geometry-valued operation + + Supports: + + - difference + - symmetric_difference + - intersection + - union + + Parameters + ---------- + op: string + right: np.array[geoms] or single shapely BaseGeoemtry + """ + if isinstance(right, BaseGeometry): + # intersection can return empty GeometryCollections, and if the + # result are only those, numpy will coerce it to empty 2D array + data = np.empty(len(left), dtype=object) + data[:] = [ + getattr(s, op)(right) if s is not None and right is not None else None + for s in left + ] + return data + elif isinstance(right, np.ndarray): + if len(left) != len(right): + msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( + len(left), len(right) + ) + raise ValueError(msg) + data = np.empty(len(left), dtype=object) + data[:] = [ + getattr(this_elem, op)(other_elem) + if this_elem is not None and other_elem is not None + else None + for this_elem, other_elem in zip(left, right) + ] + return data + else: + raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) + + +def _binary_predicate(op, left, right, *args, **kwargs): + # type: (str, np.array[geoms], np.array[geoms]/BaseGeometry, args/kwargs) + # -> array[bool] + """Binary operation on np.array[geoms] that returns a boolean ndarray + + Supports: + + - contains + - disjoint + - intersects + - touches + - crosses + - within + - overlaps + - covers + - covered_by + - equals + + Parameters + ---------- + op: string + right: np.array[geoms] or single shapely BaseGeoemtry + """ + # empty geometries are handled by shapely (all give False except disjoint) + if isinstance(right, BaseGeometry): + data = [ + getattr(s, op)(right, *args, **kwargs) if s is not None else False + for s in left + ] + return np.array(data, dtype=bool) + elif isinstance(right, np.ndarray): + data = [ + getattr(this_elem, op)(other_elem, *args, **kwargs) + if not (this_elem is None or other_elem is None) + else False + for this_elem, other_elem in zip(left, right) + ] + return np.array(data, dtype=bool) + else: + raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) + + +def _binary_op_float(op, left, right, *args, **kwargs): + # type: (str, np.array[geoms], np.array[geoms]/BaseGeometry, args/kwargs) + # -> array + """Binary operation on np.array[geoms] that returns a ndarray""" + # used for distance -> check for empty as we want to return np.nan instead 0.0 + # as shapely does currently (https://github.com/Toblerity/Shapely/issues/498) + if isinstance(right, BaseGeometry): + data = [ + getattr(s, op)(right, *args, **kwargs) + if not (s is None or s.is_empty or right.is_empty) + else np.nan + for s in left + ] + return np.array(data, dtype=float) + elif isinstance(right, np.ndarray): + if len(left) != len(right): + msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( + len(left), len(right) + ) + raise ValueError(msg) + data = [ + getattr(this_elem, op)(other_elem, *args, **kwargs) + if not (this_elem is None or this_elem.is_empty) + | (other_elem is None or other_elem.is_empty) + else np.nan + for this_elem, other_elem in zip(left, right) + ] + return np.array(data, dtype=float) + else: + raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) + + +def _binary_op(op, left, right, *args, **kwargs): + # type: (str, np.array[geoms], np.array[geoms]/BaseGeometry, args/kwargs) + # -> array + """Binary operation on np.array[geoms] that returns a ndarray""" + # pass empty to shapely (relate handles this correctly, project only + # for linestrings and points) + if op == "project": + null_value = np.nan + dtype = float + elif op == "relate": + null_value = None + dtype = object + else: + raise AssertionError("wrong op") + + if isinstance(right, BaseGeometry): + data = [ + getattr(s, op)(right, *args, **kwargs) if s is not None else null_value + for s in left + ] + return np.array(data, dtype=dtype) + elif isinstance(right, np.ndarray): + if len(left) != len(right): + msg = "Lengths of inputs do not match. Left: {0}, Right: {1}".format( + len(left), len(right) + ) + raise ValueError(msg) + data = [ + getattr(this_elem, op)(other_elem, *args, **kwargs) + if not (this_elem is None or other_elem is None) + else null_value + for this_elem, other_elem in zip(left, right) + ] + return np.array(data, dtype=dtype) + else: + raise TypeError("Type not known: {0} vs {1}".format(type(left), type(right))) + + +def _affinity_method(op, left, *args, **kwargs): + # type: (str, np.array[geoms], ...) -> np.array[geoms] + + # not all shapely.affinity methods can handle empty geometries: + # affine_transform itself works (as well as translate), but rotate, scale + # and skew fail (they try to unpack the bounds). + # Here: consistently returning empty geom for input empty geom + left = to_shapely(left) + out = [] + for geom in left: + if geom is None or geom.is_empty: + res = geom + else: + res = getattr(shapely.affinity, op)(geom, *args, **kwargs) + out.append(res) + data = np.empty(len(left), dtype=object) + data[:] = out + return from_shapely(data) + + +# ----------------------------------------------------------------------------- +# Vectorized operations +# ----------------------------------------------------------------------------- + + +# +# Unary operations that return non-geometry (bool or float) +# + + +def _unary_op(op, left, null_value=False): + # type: (str, np.array[geoms], Any) -> np.array + """Unary operation that returns a Series""" + data = [getattr(geom, op, null_value) for geom in left] + return np.array(data, dtype=np.dtype(type(null_value))) + + +def is_valid(data): + if compat.USE_PYGEOS: + return pygeos.is_valid(data) + else: + return _unary_op("is_valid", data, null_value=False) + + +def is_empty(data): + if compat.USE_PYGEOS: + return pygeos.is_empty(data) + else: + return _unary_op("is_empty", data, null_value=False) + + +def is_simple(data): + if compat.USE_PYGEOS: + return pygeos.is_simple(data) + else: + return _unary_op("is_simple", data, null_value=False) + + +def is_ring(data): + if compat.USE_PYGEOS: + return pygeos.is_ring(pygeos.get_exterior_ring(data)) + else: + # operates on the exterior, so can't use _unary_op() + # XXX needed to change this because there is now a geometry collection + # in the shapely ones that was something else before? + return np.array( + [ + geom.exterior.is_ring + if geom is not None + and hasattr(geom, "exterior") + and geom.exterior is not None + else False + for geom in data + ], + dtype=bool, + ) + + +def is_closed(data): + if compat.USE_PYGEOS: + return pygeos.is_closed(data) + else: + return _unary_op("is_closed", data, null_value=False) + + +def has_z(data): + if compat.USE_PYGEOS: + return pygeos.has_z(data) + else: + return _unary_op("has_z", data, null_value=False) + + +def geom_type(data): + if compat.USE_PYGEOS: + res = pygeos.get_type_id(data) + return geometry_type_values[np.searchsorted(geometry_type_ids, res)] + else: + return _unary_op("geom_type", data, null_value=None) + + +def area(data): + if compat.USE_PYGEOS: + return pygeos.area(data) + else: + return _unary_op("area", data, null_value=np.nan) + + +def length(data): + if compat.USE_PYGEOS: + return pygeos.length(data) + else: + return _unary_op("length", data, null_value=np.nan) + + +# +# Unary operations that return new geometries +# + + +def _unary_geo(op, left, *args, **kwargs): + # type: (str, np.array[geoms]) -> np.array[geoms] + """Unary operation that returns new geometries""" + # ensure 1D output, see note above + data = np.empty(len(left), dtype=object) + data[:] = [getattr(geom, op, None) for geom in left] + return data + + +def boundary(data): + if compat.USE_PYGEOS: + return pygeos.boundary(data) + else: + return _unary_geo("boundary", data) + + +def centroid(data): + if compat.USE_PYGEOS: + return pygeos.centroid(data) + else: + return _unary_geo("centroid", data) + + +def convex_hull(data): + if compat.USE_PYGEOS: + return pygeos.convex_hull(data) + else: + return _unary_geo("convex_hull", data) + + +def envelope(data): + if compat.USE_PYGEOS: + return pygeos.envelope(data) + else: + return _unary_geo("envelope", data) + + +def exterior(data): + if compat.USE_PYGEOS: + return pygeos.get_exterior_ring(data) + else: + return _unary_geo("exterior", data) + + +def interiors(data): + data = to_shapely(data) + has_non_poly = False + inner_rings = [] + for geom in data: + interior_ring_seq = getattr(geom, "interiors", None) + # polygon case + if interior_ring_seq is not None: + inner_rings.append(list(interior_ring_seq)) + # non-polygon case + else: + has_non_poly = True + inner_rings.append(None) + if has_non_poly: + warnings.warn( + "Only Polygon objects have interior rings. For other " + "geometry types, None is returned." + ) + data = np.empty(len(data), dtype=object) + data[:] = inner_rings + return data + + +def representative_point(data): + if compat.USE_PYGEOS: + return pygeos.point_on_surface(data) + else: + # method and not a property -> can't use _unary_geo + out = np.empty(len(data), dtype=object) + out[:] = [ + geom.representative_point() if geom is not None else None for geom in data + ] + return out + + +# +# Binary predicates +# + + +def covers(data, other): + if compat.USE_PYGEOS: + return _binary_method("covers", data, other) + else: + return _binary_predicate("covers", data, other) + + +def covered_by(data, other): + if compat.USE_PYGEOS: + return _binary_method("covered_by", data, other) + else: + raise NotImplementedError( + "covered_by is only implemented for pygeos, not shapely" + ) + + +def contains(data, other): + if compat.USE_PYGEOS: + return _binary_method("contains", data, other) + else: + return _binary_predicate("contains", data, other) + + +def crosses(data, other): + if compat.USE_PYGEOS: + return _binary_method("crosses", data, other) + else: + return _binary_predicate("crosses", data, other) + + +def disjoint(data, other): + if compat.USE_PYGEOS: + return _binary_method("disjoint", data, other) + else: + return _binary_predicate("disjoint", data, other) + + +def equals(data, other): + if compat.USE_PYGEOS: + return _binary_method("equals", data, other) + else: + return _binary_predicate("equals", data, other) + + +def intersects(data, other): + if compat.USE_PYGEOS: + return _binary_method("intersects", data, other) + else: + return _binary_predicate("intersects", data, other) + + +def overlaps(data, other): + if compat.USE_PYGEOS: + return _binary_method("overlaps", data, other) + else: + return _binary_predicate("overlaps", data, other) + + +def touches(data, other): + if compat.USE_PYGEOS: + return _binary_method("touches", data, other) + else: + return _binary_predicate("touches", data, other) + + +def within(data, other): + if compat.USE_PYGEOS: + return _binary_method("within", data, other) + else: + return _binary_predicate("within", data, other) + + +def equals_exact(data, other, tolerance): + if compat.USE_PYGEOS: + return _binary_method("equals_exact", data, other, tolerance=tolerance) + else: + return _binary_predicate("equals_exact", data, other, tolerance=tolerance) + + +def almost_equals(self, other, decimal): + if compat.USE_PYGEOS: + return self.equals_exact(other, 0.5 * 10 ** (-decimal)) + else: + return _binary_predicate("almost_equals", self, other, decimal=decimal) + + +# +# Binary operations that return new geometries +# + + +def difference(data, other): + if compat.USE_PYGEOS: + return _binary_method("difference", data, other) + else: + return _binary_geo("difference", data, other) + + +def intersection(data, other): + if compat.USE_PYGEOS: + return _binary_method("intersection", data, other) + else: + return _binary_geo("intersection", data, other) + + +def symmetric_difference(data, other): + if compat.USE_PYGEOS: + return _binary_method("symmetric_difference", data, other) + else: + return _binary_geo("symmetric_difference", data, other) + + +def union(data, other): + if compat.USE_PYGEOS: + return _binary_method("union", data, other) + else: + return _binary_geo("union", data, other) + + +# +# Other operations +# + + +def distance(data, other): + if compat.USE_PYGEOS: + return _binary_method("distance", data, other) + else: + return _binary_op_float("distance", data, other) + + +def buffer(data, distance, resolution=16, **kwargs): + if compat.USE_PYGEOS: + return pygeos.buffer(data, distance, quadsegs=resolution, **kwargs) + else: + out = np.empty(len(data), dtype=object) + if isinstance(distance, np.ndarray): + if len(distance) != len(data): + raise ValueError( + "Length of distance sequence does not match " + "length of the GeoSeries" + ) + + out[:] = [ + geom.buffer(dist, resolution, **kwargs) if geom is not None else None + for geom, dist in zip(data, distance) + ] + return out + + out[:] = [ + geom.buffer(distance, resolution, **kwargs) if geom is not None else None + for geom in data + ] + return out + + +def interpolate(data, distance, normalized=False): + if compat.USE_PYGEOS: + return pygeos.line_interpolate_point(data, distance, normalize=normalized) + else: + out = np.empty(len(data), dtype=object) + if isinstance(distance, np.ndarray): + if len(distance) != len(data): + raise ValueError( + "Length of distance sequence does not match " + "length of the GeoSeries" + ) + out[:] = [ + geom.interpolate(dist, normalized=normalized) + for geom, dist in zip(data, distance) + ] + return out + + out[:] = [geom.interpolate(distance, normalized=normalized) for geom in data] + return out + + +def simplify(data, tolerance, preserve_topology=True): + if compat.USE_PYGEOS: + # preserve_topology has different default as pygeos! + return pygeos.simplify(data, tolerance, preserve_topology=preserve_topology) + else: + # method and not a property -> can't use _unary_geo + out = np.empty(len(data), dtype=object) + out[:] = [ + geom.simplify(tolerance, preserve_topology=preserve_topology) + for geom in data + ] + return out + + +def project(data, other, normalized=False): + if compat.USE_PYGEOS: + return pygeos.line_locate_point(data, other, normalize=normalized) + else: + return _binary_op("project", data, other, normalized=normalized) + + +def relate(data, other): + data = to_shapely(data) + if isinstance(other, np.ndarray): + other = to_shapely(other) + return _binary_op("relate", data, other) + + +def unary_union(data): + if compat.USE_PYGEOS: + return _pygeos_to_shapely(pygeos.union_all(data)) + else: + return shapely.ops.unary_union(data) + + +# +# Coordinate related properties +# + + +def get_x(data): + if compat.USE_PYGEOS: + return pygeos.get_x(data) + else: + return _unary_op("x", data, null_value=np.nan) + + +def get_y(data): + if compat.USE_PYGEOS: + return pygeos.get_y(data) + else: + return _unary_op("y", data, null_value=np.nan) + + +def bounds(data): + if compat.USE_PYGEOS: + return pygeos.bounds(data) + # ensure that for empty arrays, the result has the correct shape + if len(data) == 0: + return np.empty((0, 4), dtype="float64") + # need to explicitly check for empty (in addition to missing) geometries, + # as those return an empty tuple, not resulting in a 2D array + bounds = np.array( + [ + geom.bounds + if not (geom is None or geom.is_empty) + else (np.nan, np.nan, np.nan, np.nan) + for geom in data + ] + ) + return bounds + + +# +# Coordinate transformation +# + + +def transform(data, func): + if compat.USE_PYGEOS: + coords = pygeos.get_coordinates(data) + new_coords = func(coords[:, 0], coords[:, 1]) + result = pygeos.set_coordinates(data.copy(), np.array(new_coords).T) + return result + else: + from shapely.ops import transform + + n = len(data) + result = np.empty(n, dtype=object) + for i in range(n): + geom = data[i] + result[i] = transform(func, geom) + + return result diff -Nru python-geopandas-0.7.0/geopandas/_version.py python-geopandas-0.8.1/geopandas/_version.py --- python-geopandas-0.7.0/geopandas/_version.py 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/geopandas/_version.py 2020-07-15 17:54:36.000000000 +0000 @@ -22,8 +22,8 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = " (HEAD -> master, tag: v0.7.0)" - git_full = "032bcbebf86fd8cae2870097e850e8427c861644" + git_refnames = " (tag: v0.8.1, 0.8.x)" + git_full = "03546f483e358b6565b11333f576e1bc68df1b57" keywords = {"refnames": git_refnames, "full": git_full} return keywords diff -Nru python-geopandas-0.7.0/requirements-dev.txt python-geopandas-0.8.1/requirements-dev.txt --- python-geopandas-0.7.0/requirements-dev.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-geopandas-0.8.1/requirements-dev.txt 2020-07-15 17:54:36.000000000 +0000 @@ -0,0 +1,33 @@ +# required +fiona>=1.7 +pandas>=0.23.4 +pyproj>=2.2.0 +shapely>=1.5 + +# geodatabase access +psycopg2>=2.5.1 +SQLAlchemy>=0.8.3 + +# geocoding +geopy + +# plotting +descartes>=1.0 +matplotlib>=2.0 +mapclassify + +# testing +mock>=1.0.1 # technically not need for python >= 3.3 +pytest>=3.1.0 +pytest-cov +codecov + +# spatial access methods +rtree>=0.8 + +# styling +black +pre-commit + +# PostGIS writing +GeoAlchemy2 diff -Nru python-geopandas-0.7.0/requirements.test.txt python-geopandas-0.8.1/requirements.test.txt --- python-geopandas-0.7.0/requirements.test.txt 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/requirements.test.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -psycopg2>=2.5.1 -SQLAlchemy>=0.8.3 -geopy -matplotlib>=1.2.1 -descartes>=1.0 -mock>=1.0.1 # technically not need for python >= 3.3 -pytest>=3.1.0 -pytest-cov -codecov -rtree>=0.8 -mapclassify diff -Nru python-geopandas-0.7.0/requirements.txt python-geopandas-0.8.1/requirements.txt --- python-geopandas-0.7.0/requirements.txt 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/requirements.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -Cython>=0.16 -shapely>=1.2.18 -fiona>=1.0.1 -pyproj>=2.2.0 diff -Nru python-geopandas-0.7.0/.travis.yml python-geopandas-0.8.1/.travis.yml --- python-geopandas-0.7.0/.travis.yml 2020-02-17 07:39:12.000000000 +0000 +++ python-geopandas-0.8.1/.travis.yml 2020-07-15 17:54:36.000000000 +0000 @@ -4,20 +4,28 @@ matrix: include: - # Only one test for these Python versions + # One build with minimum versions of dependencies - env: ENV_FILE="ci/travis/35-minimal.yaml" + # one build with no optional dependencies + - env: ENV_FILE="ci/travis/38-no-optional-deps.yaml" + # Python 3.6 test all supported Pandas versions - - env: ENV_FILE="ci/travis/36-pd023.yaml" - - env: ENV_FILE="ci/travis/36-pd024.yaml" + - env: ENV_FILE="ci/travis/36-pd023.yaml" PYGEOS=true + - env: ENV_FILE="ci/travis/36-pd024.yaml" PYGEOS=true + + - env: ENV_FILE="ci/travis/37-latest-defaults.yaml" STYLE=true PYGEOS=true + - env: ENV_FILE="ci/travis/37-latest-conda-forge.yaml" PYGEOS=true - - env: ENV_FILE="ci/travis/37-latest-defaults.yaml" STYLE=true - - env: ENV_FILE="ci/travis/37-latest-conda-forge.yaml" + - env: ENV_FILE="ci/travis/38-latest-conda-forge.yaml" PYGEOS=true POSTGIS=true PGUSER=postgres PGPASSWORD=postgres - - env: ENV_FILE="ci/travis/38-latest-conda-forge.yaml" + - env: ENV_FILE="ci/travis/37-dev.yaml" DEV=true PYGEOS=true - - env: ENV_FILE="ci/travis/37-dev.yaml" DEV=true + allow_failures: + - env: ENV_FILE="ci/travis/37-dev.yaml" DEV=true PYGEOS=true +before_install: + - chmod +x ci/travis/setup_postgres.sh install: # Install conda @@ -39,12 +47,21 @@ - if [ "$DEV" ]; then pip install git+https://github.com/matplotlib/matplotlib.git; fi - if [ "$DEV" ]; then pip install git+https://github.com/Toblerity/Shapely.git; fi - if [ "$STYLE" ]; then pip install black flake8; fi + - if [ "$POSTGIS" ]; then conda install postgis -c conda-forge; fi - pip install -e . + + # List environment - conda list - python -c "import geopandas; geopandas.show_versions();" + # Set-up database + - if [ "$POSTGIS" ]; then ci/travis/setup_postgres.sh; fi + script: - - py.test geopandas --cov geopandas -v --cov-report term-missing + - echo "Testing without PyGEOS" + - USE_PYGEOS=0 pytest geopandas -v -r s --cov geopandas --cov-report term-missing + - if [ "$PYGEOS" ]; then echo "Testing with PyGEOS"; fi + - if [ "$PYGEOS" ]; then USE_PYGEOS=1 pytest geopandas -v -r s --cov-append --cov geopandas --cov-report term-missing; fi - if [ "$STYLE" ]; then black --check geopandas; fi - if [ "$STYLE" ]; then flake8 geopandas; fi