diff -Nru pyshp-2.1.3+ds/changelog.txt pyshp-2.2.0/changelog.txt --- pyshp-2.1.3+ds/changelog.txt 2021-01-14 14:12:34.000000000 +0000 +++ pyshp-2.2.0/changelog.txt 2022-02-02 17:07:33.000000000 +0000 @@ -1,4 +1,26 @@ +VERSION 2.2.0 + +2022-02-02 + New Features: + * Read shapefiles directly from zipfiles. + * Read shapefiles directly from urls. + * Allow fast extraction of only a subset of dbf fields through a `fields` arg. + * Allow fast filtering which shapes to read from the file through a `bbox` arg. + + Improvements: + * More examples and restructuring of README. + * More informative Shape to geojson warnings (see #219). + * Shape object information when calling repr(). + * Faster ring orientation checks, enforce geojson output ring orientation. + + Bug fixes: + * Remove null-padding at end of some record character fields. + * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. + * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) + * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) + * Fix typos in docs (@timgates) + VERSION 2.1.3 2021-01-14 diff -Nru pyshp-2.1.3+ds/debian/changelog pyshp-2.2.0/debian/changelog --- pyshp-2.1.3+ds/debian/changelog 2021-01-14 15:49:08.000000000 +0000 +++ pyshp-2.2.0/debian/changelog 2022-02-02 19:20:28.000000000 +0000 @@ -1,3 +1,14 @@ +pyshp (2.2.0-1) unstable; urgency=medium + + * Team upload. + * New upstream release. + * Bump Standards-Version to 4.6.0, no changes. + * Bump debhelper compat to 12, no changes. + * Update watch file to use GitHub tags. + * Use pytest instead of doctests. + + -- Bas Couwenberg Wed, 02 Feb 2022 20:20:28 +0100 + pyshp (2.1.3+ds-1) unstable; urgency=medium * Team upload. diff -Nru pyshp-2.1.3+ds/debian/compat pyshp-2.2.0/debian/compat --- pyshp-2.1.3+ds/debian/compat 2021-01-14 15:46:21.000000000 +0000 +++ pyshp-2.2.0/debian/compat 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -10 diff -Nru pyshp-2.1.3+ds/debian/control pyshp-2.2.0/debian/control --- pyshp-2.1.3+ds/debian/control 2021-01-14 15:46:21.000000000 +0000 +++ pyshp-2.2.0/debian/control 2022-02-02 19:19:40.000000000 +0000 @@ -4,11 +4,12 @@ Ross Gammon Section: python Priority: optional -Build-Depends: debhelper (>= 10~), +Build-Depends: debhelper-compat (= 12), dh-python, python3-all, - python3-setuptools -Standards-Version: 4.5.1 + python3-setuptools, + python3-pytest +Standards-Version: 4.6.0 Vcs-Browser: https://salsa.debian.org/debian-gis-team/pyshp Vcs-Git: https://salsa.debian.org/debian-gis-team/pyshp.git Homepage: https://github.com/GeospatialPython/pyshp diff -Nru pyshp-2.1.3+ds/debian/copyright pyshp-2.2.0/debian/copyright --- pyshp-2.1.3+ds/debian/copyright 2021-01-14 15:47:00.000000000 +0000 +++ pyshp-2.2.0/debian/copyright 2022-02-02 19:19:40.000000000 +0000 @@ -1,10 +1,7 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: PyShp Upstream-Contact: Joel Lawhead -Source: https://pypi.debian.net/pyshp/ -Comment: The upstream sources are repacked to excluded the pyshp.egg-info - directory that is automatically removed by dh_clean. -Files-Excluded: pyshp.egg-info/* +Source: https://github.com/GeospatialPython/pyshp Files: * Copyright: Joel Lawhead diff -Nru pyshp-2.1.3+ds/debian/patches/no-network.patch pyshp-2.2.0/debian/patches/no-network.patch --- pyshp-2.1.3+ds/debian/patches/no-network.patch 1970-01-01 00:00:00.000000000 +0000 +++ pyshp-2.2.0/debian/patches/no-network.patch 2022-02-02 19:19:50.000000000 +0000 @@ -0,0 +1,15 @@ +Description: Mark tests requiring network. + Allows skipping tests in environments without network. +Author: Bas Couwennberg +Forwarded: https://github.com/GeospatialPython/pyshp/pull/230 + +--- a/test_shapefile.py ++++ b/test_shapefile.py +@@ -260,6 +260,7 @@ def test_reader_context_manager(): + assert sf.shx.closed is True + + ++@pytest.mark.network + def test_reader_url(): + """ + Assert that Reader can open shapefiles from a url. diff -Nru pyshp-2.1.3+ds/debian/patches/series pyshp-2.2.0/debian/patches/series --- pyshp-2.1.3+ds/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 +++ pyshp-2.2.0/debian/patches/series 2022-02-02 19:19:40.000000000 +0000 @@ -0,0 +1 @@ +no-network.patch diff -Nru pyshp-2.1.3+ds/debian/rules pyshp-2.2.0/debian/rules --- pyshp-2.1.3+ds/debian/rules 2021-01-14 15:46:21.000000000 +0000 +++ pyshp-2.2.0/debian/rules 2022-02-02 19:19:40.000000000 +0000 @@ -5,6 +5,10 @@ #export DH_VERBOSE=1 export PYBUILD_NAME=pyshp +export PYBUILD_TEST_PYTEST=1 +export PYBUILD_TEST_ARGS=-vv -m "not network" +export PYBUILD_BEFORE_TEST=cp -r {dir}/README.md {dir}/shapefiles {dir}/test_*py {build_dir} +export PYBUILD_AFTER_TEST=rm -rf {build_dir}/README.md {build_dir}/shapefiles {build_dir}/test_*py {build_dir}/*.pyc {build_dir}/*.dbf {build_dir}/*.sbn {build_dir}/*.sbx {build_dir}/*.shp {build_dir}/*.shx %: dh $@ \ @@ -12,8 +16,4 @@ --with python3 override_dh_auto_test: - PYBUILD_SYSTEM=custom \ - PYBUILD_BEFORE_TEST="cp -r {dir}/README.md {dir}/shapefiles {build_dir}" \ - PYBUILD_TEST_ARGS="cd {build_dir} && {interpreter} shapefile.py" \ - PYBUILD_AFTER_TEST="rm -rf {build_dir}/README.md {build_dir}/shapefiles {build_dir}/*.pyc {build_dir}/*.dbf {build_dir}/*.sbn {build_dir}/*.sbx {build_dir}/*.shp {build_dir}/*.shx" \ dh_auto_test diff -Nru pyshp-2.1.3+ds/debian/watch pyshp-2.2.0/debian/watch --- pyshp-2.1.3+ds/debian/watch 2021-01-14 15:46:21.000000000 +0000 +++ pyshp-2.2.0/debian/watch 2022-02-02 19:19:40.000000000 +0000 @@ -1,6 +1,7 @@ version=4 opts=\ dversionmangle=s/\+(debian|dfsg|ds|deb)\d*$//,\ -uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/,\ -repacksuffix=+ds \ -https://pypi.debian.net/pyshp/pyshp-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) +uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/;s/RC/rc/,\ +filenamemangle=s/(?:.*\/)?(?:rel|v|pyshp)?[\-\_]?(\d\S+)\.(tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))/pyshp-$1.$2/ \ +https://github.com/GeospatialPython/pyshp/tags \ +(?:.*?/archive/(?:.*?/)?)?(?:rel|v|pyshp)?[\-\_]?(\d\S+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) diff -Nru pyshp-2.1.3+ds/.github/workflows/build.yml pyshp-2.2.0/.github/workflows/build.yml --- pyshp-2.1.3+ds/.github/workflows/build.yml 1970-01-01 00:00:00.000000000 +0000 +++ pyshp-2.2.0/.github/workflows/build.yml 2022-02-02 17:07:33.000000000 +0000 @@ -0,0 +1,37 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: build + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["2.7", "3.5", "3.6", "3.7", "3.8", "3.9"] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install pytest + if [ -f requirements.test.txt ]; then pip install -r requirements.test.txt; fi + - name: Test with doctest + run: | + python shapefile.py + - name: Test with pytest + run: | + pytest diff -Nru pyshp-2.1.3+ds/.github/workflows/deploy.yml pyshp-2.2.0/.github/workflows/deploy.yml --- pyshp-2.1.3+ds/.github/workflows/deploy.yml 1970-01-01 00:00:00.000000000 +0000 +++ pyshp-2.2.0/.github/workflows/deploy.yml 2022-02-02 17:07:33.000000000 +0000 @@ -0,0 +1,36 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: deploy + +on: + release: + types: [published] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build --sdist --wheel --outdir dist/ + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_INTEGRATION }} diff -Nru pyshp-2.1.3+ds/.gitignore pyshp-2.2.0/.gitignore --- pyshp-2.1.3+ds/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ pyshp-2.2.0/.gitignore 2022-02-02 17:07:33.000000000 +0000 @@ -0,0 +1,21 @@ +shapefile_[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9].dbf +shapefile_[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9].shp +shapefile_[a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9][a-zA-Z0-9].shx +shapefiles/test/copy.dbf +shapefiles/test/copy.shp +shapefiles/test/copy.shx +shapefiles/test/geojson.dbf +shapefiles/test/geojson.shp +shapefiles/test/geojson.shx +shapefiles/test/latin_as_utf8.dbf +shapefiles/test/latin_as_utf8.shp +shapefiles/test/latin_as_utf8.shx +shapefiles/test/null.dbf +shapefiles/test/null.shp +shapefiles/test/null.shx +__pycache__/ +__cache__/ +build/ +dist/ +*.egg-info/ +*.py[cod] diff -Nru pyshp-2.1.3+ds/PKG-INFO pyshp-2.2.0/PKG-INFO --- pyshp-2.1.3+ds/PKG-INFO 2021-01-14 14:12:50.000000000 +0000 +++ pyshp-2.2.0/PKG-INFO 1970-01-01 00:00:00.000000000 +0000 @@ -1,1195 +0,0 @@ -Metadata-Version: 2.1 -Name: pyshp -Version: 2.1.3 -Summary: Pure Python read/write support for ESRI Shapefile format -Home-page: https://github.com/GeospatialPython/pyshp -Author: Joel Lawhead -Author-email: jlawhead@geospatialpython.com -License: MIT -Download-URL: https://github.com/GeospatialPython/pyshp/archive/2.1.1.tar.gz -Description: # PyShp - - The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Python. - - ![pyshp logo](http://4.bp.blogspot.com/_SBi37QEsCvg/TPQuOhlHQxI/AAAAAAAAAE0/QjFlWfMx0tQ/S350/GSP_Logo.png "PyShp") - - [![Build Status](https://travis-ci.org/GeospatialPython/pyshp.svg?branch=master)](https://travis-ci.org/GeospatialPython/pyshp) - - ## Contents - - [Overview](#overview) - - [Version Changes](#version-changes) - - [Examples](#examples) - - [Reading Shapefiles](#reading-shapefiles) - - [The Reader Class](#the-reader-class) - - [Reading Geometry](#reading-geometry) - - [Reading Records](#reading-records) - - [Reading Geometry and Records Simultaneously](#reading-geometry-and-records-simultaneously) - - [Writing Shapefiles](#writing-shapefiles) - - [The Writer Class](#the-writer-class) - - [Adding Records](#adding-records) - - [Adding Geometry](#adding-geometry) - - [Geometry and Record Balancing](#geometry-and-record-balancing) - - [How To's](#how-tos) - - [3D and Other Geometry Types](#3d-and-other-geometry-types) - - [Working with Large Shapefiles](#working-with-large-shapefiles) - - [Unicode and Shapefile Encodings](#unicode-and-shapefile-encodings) - - [Testing](#testing) - - - # Overview - - The Python Shapefile Library (PyShp) provides read and write support for the - Esri Shapefile format. The Shapefile format is a popular Geographic - Information System vector data format created by Esri. For more information - about this format please read the well-written "ESRI Shapefile Technical - Description - July 1998" located at [http://www.esri.com/library/whitepapers/p - dfs/shapefile.pdf](http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf) - . The Esri document describes the shp and shx file formats. However a third - file format called dbf is also required. This format is documented on the web - as the "XBase File Format Description" and is a simple file-based database - format created in the 1960's. For more on this specification see: [http://www.clicketyclick.dk/databases/xbase/format/index.html](http://www.clicketyclick.dk/databases/xbase/format/index.html) - - Both the Esri and XBase file-formats are very simple in design and memory - efficient which is part of the reason the shapefile format remains popular - despite the numerous ways to store and exchange GIS data available today. - - Pyshp is compatible with Python 2.7-3.x. - - This document provides examples for using PyShp to read and write shapefiles. However - many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), - and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). - - Currently the sample census blockgroup shapefile referenced in the examples is available on the GitHub project site at - [https://github.com/GeospatialPython/pyshp](https://github.com/GeospatialPython/pyshp). These - examples are straight-forward and you can also easily run them against your - own shapefiles with minimal modification. - - Important: If you are new to GIS you should read about map projections. - Please visit: [https://github.com/GeospatialPython/pyshp/wiki/Map-Projections](https://github.com/GeospatialPython/pyshp/wiki/Map-Projections) - - I sincerely hope this library eliminates the mundane distraction of simply - reading and writing data, and allows you to focus on the challenging and FUN - part of your geospatial project. - - - # Version Changes - - ## 2.1.3 - - ### Bug fixes: - - - Fix recent bug in geojson hole-in-polygon checking (see #205) - - Misc fixes to allow geo interface dump to json (eg dates as strings) - - Handle additional dbf date null values, and return faulty dates as unicode (see #187) - - Add writer target typecheck - - Fix bugs to allow reading shp/shx/dbf separately - - Allow delayed shapefile loading by passing no args - - Fix error with writing empty z/m shapefile (@mcuprjak) - - Fix signed_area() so ignores z/m coords - - Enforce writing the 11th field name character as null-terminator (only first 10 are used) - - Minor README fixes - - Added more tests - - ## 2.1.2 - - ### Bug fixes: - - - Fix issue where warnings.simplefilter('always') changes global warning behavior [see #203] - - ## 2.1.1 - - ### Improvements: - - - Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) - - Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] - - Added pytest testing [@jmoujaes] - - ### Bug fixes: - - - Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] - - Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] - - Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] - - Fix polygons not being auto closed, which was accidentally dropped - - Fix error for null geometries in feature geojson - - Misc docstring cleanup [@fiveham] - - ## 2.1.0 - - ### New Features: - - - Added back read/write support for unicode field names. - - Improved Record representation - - More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() - - ### Bug fixes: - - - Fixed error when reading optional m-values - - Fixed Record attribute autocomplete in Python 3 - - Misc readme cleanup - - ## 2.0.0 - - The newest version of PyShp, version 2.0 introduced some major new improvements. - A great thanks to all who have contributed code and raised issues, and for everyone's - patience and understanding during the transition period. - Some of the new changes are incompatible with previous versions. - Users of the previous version 1.x should therefore take note of the following changes - (Note: Some contributor attributions may be missing): - - ### Major Changes: - - - Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. - - PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - - Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. - - Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] - - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] - - Add more support and documentation for MultiPatch 3D shapes. - - The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. - - Better documentation of previously unclear aspects, such as field types. - - ### Important Fixes: - - - More reliable/robust: - - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. - - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - - Fix some geo interface errors, including checking polygon directions. - - Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] - - Enforce maximum field limit. [@mwtoews] - - - # Examples - - Before doing anything you must import the library. - - - >>> import shapefile - - The examples below will use a shapefile created from the U.S. Census Bureau - Blockgroups data set near San Francisco, CA and available in the git - repository of the PyShp GitHub site. - - ## Reading Shapefiles - - ### The Reader Class - - To read a shapefile create a new "Reader" object and pass it the name of an - existing shapefile. The shapefile format is actually a collection of three - files. You specify the base filename of the shapefile or the complete filename - of any of the shapefile component files. - - - >>> sf = shapefile.Reader("shapefiles/blockgroups") - - OR - - - >>> sf = shapefile.Reader("shapefiles/blockgroups.shp") - - OR - - - >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") - - OR any of the other 5+ formats which are potentially part of a shapefile. The - library does not care about file extensions. - - #### Reading Shapefiles Using the Context Manager - - The "Reader" class can be used as a context manager, to ensure open file - objects are properly closed when done reading the data: - - >>> with shapefile.Reader("shapefiles/blockgroups.shp") as shp: - ... print(shp) - shapefile Reader - 663 shapes (type 'POLYGON') - 663 records (44 fields) - - #### Reading Shapefiles from File-Like Objects - - You can also load shapefiles from any Python file-like object using keyword - arguments to specify any of the three files. This feature is very powerful and - allows you to load shapefiles from a url, a zip file, a serialized object, - or in some cases a database. - - - >>> myshp = open("shapefiles/blockgroups.shp", "rb") - >>> mydbf = open("shapefiles/blockgroups.dbf", "rb") - >>> r = shapefile.Reader(shp=myshp, dbf=mydbf) - - Notice in the examples above the shx file is never used. The shx file is a - very simple fixed-record index for the variable-length records in the shp - file. This file is optional for reading. If it's available PyShp will use the - shx file to access shape records a little faster but will do just fine without - it. - - #### Reading Shapefile Meta-Data - - Shapefiles have a number of attributes for inspecting the file contents. - A shapefile is a container for a specific type of geometry, and this can be checked using the - shapeType attribute. - - - >>> sf.shapeType - 5 - - Shape types are represented by numbers between 0 and 31 as defined by the - shapefile specification and listed below. It is important to note that the numbering system has - several reserved numbers that have not been used yet, therefore the numbers of - the existing shape types are not sequential: - - - NULL = 0 - - POINT = 1 - - POLYLINE = 3 - - POLYGON = 5 - - MULTIPOINT = 8 - - POINTZ = 11 - - POLYLINEZ = 13 - - POLYGONZ = 15 - - MULTIPOINTZ = 18 - - POINTM = 21 - - POLYLINEM = 23 - - POLYGONM = 25 - - MULTIPOINTM = 28 - - MULTIPATCH = 31 - - Based on this we can see that our blockgroups shapefile contains - Polygon type shapes. The shape types are also defined as constants in - the shapefile module, so that we can compare types more intuitively: - - - >>> sf.shapeType == shapefile.POLYGON - True - - For convenience, you can also get the name of the shape type as a string: - - - >>> sf.shapeTypeName == 'POLYGON' - True - - Other pieces of meta-data that we can check include the number of features - and the bounding box area the shapefile covers: - - - >>> len(sf) - 663 - >>> sf.bbox - [-122.515048, 37.652916, -122.327622, 37.863433] - - Finally, if you would prefer to work with the entire shapefile in a different - format, you can convert all of it to a GeoJSON dictionary, although you may lose - some information in the process, such as z- and m-values: - - - >>> sf.__geo_interface__['type'] - 'FeatureCollection' - - ### Reading Geometry - - A shapefile's geometry is the collection of points or shapes made from - vertices and implied arcs representing physical locations. All types of - shapefiles just store points. The metadata about the points determine how they - are handled by software. - - You can get a list of the shapefile's geometry by calling the shapes() - method. - - - >>> shapes = sf.shapes() - - The shapes method returns a list of Shape objects describing the geometry of - each shape record. - - - >>> len(shapes) - 663 - - To read a single shape by calling its index use the shape() method. The index - is the shape's count from 0. So to read the 8th shape record you would use its - index which is 7. - - - >>> s = sf.shape(7) - - >>> # Read the bbox of the 8th shape to verify - >>> # Round coordinates to 3 decimal places - >>> ['%.3f' % coord for coord in s.bbox] - ['-122.450', '37.801', '-122.442', '37.808'] - - Each shape record (except Points) contains the following attributes. Records of - shapeType Point do not have a bounding box 'bbox'. - - - >>> for name in dir(shapes[3]): - ... if not name.startswith('_'): - ... name - 'bbox' - 'parts' - 'points' - 'shapeType' - 'shapeTypeName' - - * shapeType: an integer representing the type of shape as defined by the - shapefile specification. - - - >>> shapes[3].shapeType - 5 - - * shapeTypeName: a string representation of the type of shape as defined by shapeType. Read-only. - - - >>> shapes[3].shapeTypeName - 'POLYGON' - - * bbox: If the shape type contains multiple points this tuple describes the - lower left (x,y) coordinate and upper right corner coordinate creating a - complete box around the points. If the shapeType is a - Null (shapeType == 0) then an AttributeError is raised. - - - >>> # Get the bounding box of the 4th shape. - >>> # Round coordinates to 3 decimal places - >>> bbox = shapes[3].bbox - >>> ['%.3f' % coord for coord in bbox] - ['-122.486', '37.787', '-122.446', '37.811'] - - * parts: Parts simply group collections of points into shapes. If the shape - record has multiple parts this attribute contains the index of the first - point of each part. If there is only one part then a list containing 0 is - returned. - - - >>> shapes[3].parts - [0] - - * points: The points attribute contains a list of tuples containing an - (x,y) coordinate for each point in the shape. - - - >>> len(shapes[3].points) - 173 - >>> # Get the 8th point of the fourth shape - >>> # Truncate coordinates to 3 decimal places - >>> shape = shapes[3].points[7] - >>> ['%.3f' % coord for coord in shape] - ['-122.471', '37.787'] - - In most cases, however, if you need to do more than just type or bounds checking, you may want - to convert the geometry to the more human-readable [GeoJSON format](http://geojson.org), - where lines and polygons are grouped for you: - - - >>> s = sf.shape(0) - >>> geoj = s.__geo_interface__ - >>> geoj["type"] - 'MultiPolygon' - - The results from the shapes() method similiarly supports converting to GeoJSON: - - - >>> shapes.__geo_interface__['type'] - 'GeometryCollection' - - - ### Reading Records - - A record in a shapefile contains the attributes for each shape in the - collection of geometries. Records are stored in the dbf file. The link between - geometry and attributes is the foundation of all geographic information systems. - This critical link is implied by the order of shapes and corresponding records - in the shp geometry file and the dbf attribute file. - - The field names of a shapefile are available as soon as you read a shapefile. - You can call the "fields" attribute of the shapefile as a Python list. Each - field is a Python list with the following information: - - * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: - * "C": Characters, text. - * "N": Numbers, with or without decimals. - * "F": Floats (same as "N"). - * "L": Logical, for boolean True/False values. - * "D": Dates. - * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. - * Field length: the length of the data found at this column index. Older GIS - software may truncate this length to 8 or 11 characters for "Character" - fields. - * Decimal length: the number of decimal places found in "Number" fields. - - To see the fields for the Reader object above (sf) call the "fields" - attribute: - - - >>> fields = sf.fields - - >>> assert fields == [("DeletionFlag", "C", 1, 0), ["AREA", "N", 18, 5], - ... ["BKG_KEY", "C", 12, 0], ["POP1990", "N", 9, 0], ["POP90_SQMI", "N", 10, 1], - ... ["HOUSEHOLDS", "N", 9, 0], - ... ["MALES", "N", 9, 0], ["FEMALES", "N", 9, 0], ["WHITE", "N", 9, 0], - ... ["BLACK", "N", 8, 0], ["AMERI_ES", "N", 7, 0], ["ASIAN_PI", "N", 8, 0], - ... ["OTHER", "N", 8, 0], ["HISPANIC", "N", 8, 0], ["AGE_UNDER5", "N", 8, 0], - ... ["AGE_5_17", "N", 8, 0], ["AGE_18_29", "N", 8, 0], ["AGE_30_49", "N", 8, 0], - ... ["AGE_50_64", "N", 8, 0], ["AGE_65_UP", "N", 8, 0], - ... ["NEVERMARRY", "N", 8, 0], ["MARRIED", "N", 9, 0], ["SEPARATED", "N", 7, 0], - ... ["WIDOWED", "N", 8, 0], ["DIVORCED", "N", 8, 0], ["HSEHLD_1_M", "N", 8, 0], - ... ["HSEHLD_1_F", "N", 8, 0], ["MARHH_CHD", "N", 8, 0], - ... ["MARHH_NO_C", "N", 8, 0], ["MHH_CHILD", "N", 7, 0], - ... ["FHH_CHILD", "N", 7, 0], ["HSE_UNITS", "N", 9, 0], ["VACANT", "N", 7, 0], - ... ["OWNER_OCC", "N", 8, 0], ["RENTER_OCC", "N", 8, 0], - ... ["MEDIAN_VAL", "N", 7, 0], ["MEDIANRENT", "N", 4, 0], - ... ["UNITS_1DET", "N", 8, 0], ["UNITS_1ATT", "N", 7, 0], ["UNITS2", "N", 7, 0], - ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], - ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] - - You can get a list of the shapefile's records by calling the records() method: - - - >>> records = sf.records() - - >>> len(records) - 663 - - To read a single record call the record() method with the record's index: - - - >>> rec = sf.record(3) - - Each record is a list-like Record object containing the values corresponding to each field in - the field list. A record's values can be accessed by positional indexing or slicing. - For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id - and the 1990 population count of that San Francisco blockgroup: - - - >>> rec[1:3] - ['060750601001', 4715] - - For simpler access, the fields of a record can also accessed via the name of the field, - either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile - can also be retrieved as: - - - >>> rec['BKG_KEY'] - '060750601001' - - >>> rec.BKG_KEY - '060750601001' - - The record values can be easily integrated with other programs by converting it to a field-value dictionary: - - - >>> dct = rec.as_dict() - >>> sorted(dct.items()) - [('AGE_18_29', 1467), ('AGE_30_49', 1681), ('AGE_50_64', 92), ('AGE_5_17', 848), ('AGE_65_UP', 30), ('AGE_UNDER5', 597), ('AMERI_ES', 6), ('AREA', 2.34385), ('ASIAN_PI', 452), ('BKG_KEY', '060750601001'), ('BLACK', 1007), ('DIVORCED', 149), ('FEMALES', 2095), ('FHH_CHILD', 16), ('HISPANIC', 416), ('HOUSEHOLDS', 1195), ('HSEHLD_1_F', 40), ('HSEHLD_1_M', 22), ('HSE_UNITS', 1258), ('MALES', 2620), ('MARHH_CHD', 79), ('MARHH_NO_C', 958), ('MARRIED', 2021), ('MEDIANRENT', 739), ('MEDIAN_VAL', 337500), ('MHH_CHILD', 0), ('MOBILEHOME', 0), ('NEVERMARRY', 703), ('OTHER', 288), ('OWNER_OCC', 66), ('POP1990', 4715), ('POP90_SQMI', 2011.6), ('RENTER_OCC', 3733), ('SEPARATED', 49), ('UNITS10_49', 49), ('UNITS2', 160), ('UNITS3_9', 672), ('UNITS50_UP', 0), ('UNITS_1ATT', 302), ('UNITS_1DET', 43), ('VACANT', 93), ('WHITE', 2962), ('WIDOWED', 37)] - - If at a later point you need to check the record's index position in the original - shapefile, you can do this through the "oid" attribute: - - - >>> rec.oid - 3 - - ### Reading Geometry and Records Simultaneously - - You may want to examine both the geometry and the attributes for a record at - the same time. The shapeRecord() and shapeRecords() method let you do just - that. - - Calling the shapeRecords() method will return the geometry and attributes for - all shapes as a list of ShapeRecord objects. Each ShapeRecord instance has a - "shape" and "record" attribute. The shape attribute is a Shape object as - discussed in the first section "Reading Geometry". The record attribute is a - list-like object containing field values as demonstrated in the "Reading Records" section. - - - >>> shapeRecs = sf.shapeRecords() - - Let's read the blockgroup key and the population for the 4th blockgroup: - - - >>> shapeRecs[3].record[1:3] - ['060750601001', 4715] - - The results from the shapeRecords() method is a list-like object that can be easily converted - to GeoJSON through the _\_geo_interface\_\_: - - - >>> shapeRecs.__geo_interface__['type'] - 'FeatureCollection' - - The shapeRecord() method reads a single shape/record pair at the specified index. - To get the 4th shape record from the blockgroups shapefile use the third index: - - - >>> shapeRec = sf.shapeRecord(3) - - Each individual shape record also supports the _\_geo_interface\_\_ to convert it to a GeoJSON: - - - >>> shapeRec.__geo_interface__['type'] - 'Feature' - - The blockgroup key and population count: - - - >>> shapeRec.record[1:3] - ['060750601001', 4715] - - - ## Writing Shapefiles - - ### The Writer Class - - PyShp tries to be as flexible as possible when writing shapefiles while - maintaining some degree of automatic validation to make sure you don't - accidentally write an invalid file. - - PyShp can write just one of the component files such as the shp or dbf file - without writing the others. So in addition to being a complete shapefile - library, it can also be used as a basic dbf (xbase) library. Dbf files are a - common database format which are often useful as a standalone simple database - format. And even shp files occasionally have uses as a standalone format. Some - web-based GIS systems use an user-uploaded shp file to specify an area of - interest. Many precision agriculture chemical field sprayers also use the shp - format as a control file for the sprayer system (usually in combination with - custom database file formats). - - To create a shapefile you begin by initiating a new Writer instance, passing it - the file path and name to save to: - - - >>> w = shapefile.Writer('shapefiles/test/testfile') - >>> w.field('field1', 'C') - - File extensions are optional when reading or writing shapefiles. If you specify - them PyShp ignores them anyway. When you save files you can specify a base - file name that is used for all three file types. Or you can specify a name for - one or more file types: - - - >>> w = shapefile.Writer(dbf='shapefiles/test/onlydbf.dbf') - >>> w.field('field1', 'C') - - In that case, any file types not assigned will not - save and only file types with file names will be saved. - - #### Writing Shapefiles Using the Context Manager - - The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. - In case of a crash and to make the code more readable, it is nevertheless recommended - you do this manually by calling the "close()" method: - - - >>> w.close() - - Alternatively, you can also use the "Writer" class as a context manager, to ensure open file - objects are properly closed and final headers written once you exit the with-clause: - - - >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: - ... w.field('field1', 'C') - ... pass - - #### Writing Shapefiles to File-Like Objects - - Just as you can read shapefiles from python file-like objects you can also - write to them: - - - >>> try: - ... from StringIO import StringIO - ... except ImportError: - ... from io import BytesIO as StringIO - >>> shp = StringIO() - >>> shx = StringIO() - >>> dbf = StringIO() - >>> w = shapefile.Writer(shp=shp, shx=shx, dbf=dbf) - >>> w.field('field1', 'C') - >>> w.record() - >>> w.null() - >>> w.close() - >>> # To read back the files you could call the "StringIO.getvalue()" method later. - - #### Setting the Shape Type - - The shape type defines the type of geometry contained in the shapefile. All of - the shapes must match the shape type setting. - - There are three ways to set the shape type: - * Set it when creating the class instance. - * Set it by assigning a value to an existing class instance. - * Set it automatically to the type of the first non-null shape by saving the shapefile. - - To manually set the shape type for a Writer object when creating the Writer: - - - >>> w = shapefile.Writer('shapefiles/test/shapetype', shapeType=3) - >>> w.field('field1', 'C') - - >>> w.shapeType - 3 - - OR you can set it after the Writer is created: - - - >>> w.shapeType = 1 - - >>> w.shapeType - 1 - - - ### Adding Records - - Before you can add records you must first create the fields that define what types of - values will go into each attribute. - - There are several different field types, all of which support storing None values as NULL. - - Text fields are created using the 'C' type, and the third 'size' argument can be customized to the expected - length of text values to save space: - - - >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('TEXT', 'C') - >>> w.field('SHORT_TEXT', 'C', size=5) - >>> w.field('LONG_TEXT', 'C', size=250) - >>> w.null() - >>> w.record('Hello', 'World', 'World'*50) - >>> w.close() - - >>> r = shapefile.Reader('shapefiles/test/dtype') - >>> assert r.record(0) == ['Hello', 'World', 'World'*50] - - Date fields are created using the 'D' type, and can be created using either - date objects, lists, or a YYYYMMDD formatted string. - Field length or decimal have no impact on this type: - - - >>> from datetime import date - >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('DATE', 'D') - >>> w.null() - >>> w.null() - >>> w.null() - >>> w.null() - >>> w.record(date(1898,1,30)) - >>> w.record([1998,1,30]) - >>> w.record('19980130') - >>> w.record(None) - >>> w.close() - - >>> r = shapefile.Reader('shapefiles/test/dtype') - >>> assert r.record(0) == [date(1898,1,30)] - >>> assert r.record(1) == [date(1998,1,30)] - >>> assert r.record(2) == [date(1998,1,30)] - >>> assert r.record(3) == [None] - - Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). - By default the fourth decimal argument is set to zero, essentially creating an integer field. - To store floats you must set the decimal argument to the precision of your choice. - To store very large numbers you must increase the field length size to the total number of digits - (including comma and minus). - - - >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('INT', 'N') - >>> w.field('LOWPREC', 'N', decimal=2) - >>> w.field('MEDPREC', 'N', decimal=10) - >>> w.field('HIGHPREC', 'N', decimal=30) - >>> w.field('FTYPE', 'F', decimal=10) - >>> w.field('LARGENR', 'N', 101) - >>> nr = 1.3217328 - >>> w.null() - >>> w.null() - >>> w.record(INT=nr, LOWPREC=nr, MEDPREC=nr, HIGHPREC=-3.2302e-25, FTYPE=nr, LARGENR=int(nr)*10**100) - >>> w.record(None, None, None, None, None, None) - >>> w.close() - - >>> r = shapefile.Reader('shapefiles/test/dtype') - >>> assert r.record(0) == [1, 1.32, 1.3217328, -3.2302e-25, 1.3217328, 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] - >>> assert r.record(1) == [None, None, None, None, None, None] - - - Finally, we can create boolean fields by setting the type to 'L'. - This field can take True or False values, or 1 (True) or 0 (False). - None is interpreted as missing. - - - >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('BOOLEAN', 'L') - >>> w.null() - >>> w.null() - >>> w.null() - >>> w.null() - >>> w.null() - >>> w.null() - >>> w.record(True) - >>> w.record(1) - >>> w.record(False) - >>> w.record(0) - >>> w.record(None) - >>> w.record("Nonesense") - >>> w.close() - - >>> r = shapefile.Reader('shapefiles/test/dtype') - >>> r.record(0) - Record #0: [True] - >>> r.record(1) - Record #1: [True] - >>> r.record(2) - Record #2: [False] - >>> r.record(3) - Record #3: [False] - >>> r.record(4) - Record #4: [None] - >>> r.record(5) - Record #5: [None] - - You can also add attributes using keyword arguments where the keys are field names. - - - >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('FIRST_FLD','C','40') - >>> w.field('SECOND_FLD','C','40') - >>> w.null() - >>> w.null() - >>> w.record('First', 'Line') - >>> w.record(FIRST_FLD='First', SECOND_FLD='Line') - >>> w.close() - - ### Adding Geometry - - Geometry is added using one of several convenience methods. The "null" method is used - for null shapes, "point" is used for point shapes, "multipoint" is used for multipoint shapes, "line" for lines, - "poly" for polygons. - - **Adding a Null shape** - - A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. - Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. - - - >>> w = shapefile.Writer('shapefiles/test/null') - >>> w.field('name', 'C') - - >>> w.null() - >>> w.record('nullgeom') - - >>> w.close() - - **Adding a Point shape** - - Point shapes are added using the "point" method. A point is specified by an x and - y value. - - - >>> w = shapefile.Writer('shapefiles/test/point') - >>> w.field('name', 'C') - - >>> w.point(122, 37) - >>> w.record('point1') - - >>> w.close() - - **Adding a MultiPoint shape** - - If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. - These are specified as a list of xy point coordinates. - - - >>> w = shapefile.Writer('shapefiles/test/multipoint') - >>> w.field('name', 'C') - - >>> w.multipoint([[122,37], [124,32]]) - >>> w.record('multipoint1') - - >>> w.close() - - **Adding a LineString shape** - - For LineString shapefiles, each shape is given as a list of one or more linear features. - Each of the linear features must have at least two points. - - - >>> w = shapefile.Writer('shapefiles/test/line') - >>> w.field('name', 'C') - - >>> w.line([ - ... [[1,5],[5,5],[5,1],[3,3],[1,1]], # line 1 - ... [[3,2],[2,6]] # line 2 - ... ]) - - >>> w.record('linestring1') - - >>> w.close() - - **Adding a Polygon shape** - - Similarly to LineString, Polygon shapes consist of multiple polygons, and must be given as a list of polygons. - The main difference is that polygons must have at least 4 points and the last point must be the same as the first. - It's also okay if you forget to repeat the first point at the end; PyShp automatically checks and closes the polygons - if you don't. - - It's important to note that for Polygon shapefiles, your polygon coordinates must be ordered in a clockwise direction. - If any of the polygons have holes, then the hole polygon coordinates must be ordered in a counterclockwise direction. - The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. - - - >>> w = shapefile.Writer('shapefiles/test/polygon') - >>> w.field('name', 'C') - - >>> w.poly([ - ... [[113,24], [112,32], [117,36], [122,37], [118,20]], # poly 1 - ... [[116,29],[116,26],[119,29],[119,32]], # hole 1 - ... [[15,2], [17,6], [22,7]] # poly 2 - ... ]) - >>> w.record('polygon1') - - >>> w.close() - - **Adding from an existing Shape object** - - Finally, geometry can be added by passing an existing "Shape" object to the "shape" method. - You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. - This can be particularly useful for copying from one file to another: - - - >>> r = shapefile.Reader('shapefiles/test/polygon') - - >>> w = shapefile.Writer('shapefiles/test/copy') - >>> w.fields = r.fields[1:] # skip first deletion field - - >>> # adding existing Shape objects - >>> for shaperec in r.iterShapeRecords(): - ... w.record(*shaperec.record) - ... w.shape(shaperec.shape) - - >>> # or GeoJSON dicts - >>> for shaperec in r.iterShapeRecords(): - ... w.record(*shaperec.record) - ... w.shape(shaperec.shape.__geo_interface__) - - >>> w.close() - - - ### Geometry and Record Balancing - - Because every shape must have a corresponding record it is critical that the - number of records equals the number of shapes to create a valid shapefile. You - must take care to add records and shapes in the same order so that the record - data lines up with the geometry data. For example: - - - >>> w = shapefile.Writer('shapefiles/test/balancing', shapeType=shapefile.POINT) - >>> w.field("field1", "C") - >>> w.field("field2", "C") - - >>> w.record("row", "one") - >>> w.point(1, 1) - - >>> w.record("row", "two") - >>> w.point(2, 2) - - To help prevent accidental misalignment PyShp has an "auto balance" feature to - make sure when you add either a shape or a record the two sides of the - equation line up. This way if you forget to update an entry the - shapefile will still be valid and handled correctly by most shapefile - software. Autobalancing is NOT turned on by default. To activate it set - the attribute autoBalance to 1 or True: - - - >>> w.autoBalance = 1 - >>> w.record("row", "three") - >>> w.record("row", "four") - >>> w.point(4, 4) - - >>> w.recNum == w.shpNum - True - - You also have the option of manually calling the balance() method at any time - to ensure the other side is up to date. When balancing is used - null shapes are created on the geometry side or records - with a value of "NULL" for each field is created on the attribute side. - This gives you flexibility in how you build the shapefile. - You can create all of the shapes and then create all of the records or vice versa. - - - >>> w.autoBalance = 0 - >>> w.record("row", "five") - >>> w.record("row", "six") - >>> w.record("row", "seven") - >>> w.point(5, 5) - >>> w.point(6, 6) - >>> w.balance() - - >>> w.recNum == w.shpNum - True - - If you do not use the autoBalance() or balance() method and forget to manually - balance the geometry and attributes the shapefile will be viewed as corrupt by - most shapefile software. - - - - # How To's - - ## 3D and Other Geometry Types - - Most shapefiles store conventional 2D points, lines, or polygons. But the shapefile format is also capable - of storing various other types of geometries as well, including complex 3D surfaces and objects. - - **Shapefiles with measurement (M) values** - - Measured shape types are shapes that include a measurement value at each vertex, for instance - speed measurements from a GPS device. Shapes with measurement (M) values are added with the following - methods: "pointm", "multipointm", "linem", and "polygonm". The M-values are specified by adding a - third M value to each XY coordinate. Missing or unobserved M-values are specified with a None value, - or by simply omitting the third M-coordinate. - - - >>> w = shapefile.Writer('shapefiles/test/linem') - >>> w.field('name', 'C') - - >>> w.linem([ - ... [[1,5,0],[5,5],[5,1,3],[3,3,None],[1,1,0]], # line with one omitted and one missing M-value - ... [[3,2],[2,6]] # line without any M-values - ... ]) - - >>> w.record('linem1') - - >>> w.close() - - Shapefiles containing M-values can be examined in several ways: - - >>> r = shapefile.Reader('shapefiles/test/linem') - - >>> r.mbox # the lower and upper bound of M-values in the shapefile - [0.0, 3.0] - - >>> r.shape(0).m # flat list of M-values - [0.0, None, 3.0, None, 0.0, None, None] - - - **Shapefiles with elevation (Z) values** - - Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. - Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". - The Z-values are specified by adding a third Z value to each XY coordinate. Z-values do not support the concept of missing data, - but if you omit the third Z-coordinate it will default to 0. Note that Z-type shapes also support measurement (M) values added - as a fourth M-coordinate. This too is optional. - - - >>> w = shapefile.Writer('shapefiles/test/linez') - >>> w.field('name', 'C') - - >>> w.linez([ - ... [[1,5,18],[5,5,20],[5,1,22],[3,3],[1,1]], # line with some omitted Z-values - ... [[3,2],[2,6]], # line without any Z-values - ... [[3,2,15,0],[2,6,13,3],[1,9,14,2]] # line with both Z- and M-values - ... ]) - - >>> w.record('linez1') - - >>> w.close() - - To examine a Z-type shapefile you can do: - - >>> r = shapefile.Reader('shapefiles/test/linez') - - >>> r.zbox # the lower and upper bound of Z-values in the shapefile - [0.0, 22.0] - - >>> r.shape(0).z # flat list of Z-values - [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] - - **3D MultiPatch Shapefiles** - - Multipatch shapes are useful for storing composite 3-Dimensional objects. - A MultiPatch shape represents a 3D object made up of one or more surface parts. - Each surface in "parts" is defined by a list of XYZM values (Z and M values optional), and its corresponding type is - given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one - of the following module constants: TRIANGLE_STRIP, TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent - its roof: - - >>> from shapefile import TRIANGLE_STRIP, TRIANGLE_FAN - - >>> w = shapefile.Writer('shapefiles/test/multipatch') - >>> w.field('name', 'C') - - >>> w.multipatch([ - ... [[0,0,0],[0,0,3],[5,0,0],[5,0,3],[5,5,0],[5,5,3],[0,5,0],[0,5,3],[0,0,0],[0,0,3]], # TRIANGLE_STRIP for house walls - ... [[2.5,2.5,5],[0,0,3],[5,0,3],[5,5,3],[0,5,3],[0,0,3]], # TRIANGLE_FAN for pointed house roof - ... ], - ... partTypes=[TRIANGLE_STRIP, TRIANGLE_FAN]) # one type for each part - - >>> w.record('house1') - - >>> w.close() - - For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this - ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). - - ## Working with Large Shapefiles - - Despite being a lightweight library, PyShp is designed to be able to read and write - shapefiles of any size, allowing you to work with hundreds of thousands or even millions - of records and complex geometries. - - When first creating the Reader class, the library only reads the header information - and leaves the rest of the file contents alone. Once you call the records() and shapes() - methods however, it will attempt to read the entire file into memory at once. - For very large files this can result in MemoryError. So when working with large files - it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() - methods instead. These iterate through the file contents one at a time, enabling you to loop - through them while keeping memory usage at a minimum. - - - >>> for shape in sf.iterShapes(): - ... # do something here - ... pass - - >>> for rec in sf.iterRecords(): - ... # do something here - ... pass - - >>> for shapeRec in sf.iterShapeRecords(): - ... # do something here - ... pass - - >>> for shapeRec in sf: # same as iterShapeRecords() - ... # do something here - ... pass - - The shapefile Writer class uses a similar streaming approach to keep memory - usage at a minimum. The library takes care of this under-the-hood by immediately - writing each geometry and record to disk the moment they - are added using shape() or record(). Once the writer is closed, exited, or garbage - collected, the final header information is calculated and written to the beginning of - the file. - - This means that as long as you are able to iterate through a source file without having - to load everything into memory, such as a large CSV table or a large shapefile, you can - process and write any number of items, and even merge many different source files into a single - large shapefile. If you need to edit or undo any of your writing you would have to read the - file back in, one record at a time, make your changes, and write it back out. - - ## Unicode and Shapefile Encodings - - PyShp has full support for unicode and shapefile encodings, so you can always expect to be working - with unicode strings in shapefiles that have text fields. - Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't - have to specify the encoding. For reading shapefiles in any other encoding, such as Latin-1, just - supply the encoding option when creating the Reader class. - - - >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") - >>> r.record(0) == [2, u'Ñandú'] - True - - Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such - as UTF-8. Provided the new encoding supports the characters you are trying to write, reading it back in - should give you the same unicode string you started with. - - - >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") - >>> w.fields = r.fields[1:] - >>> w.record(*r.record(0)) - >>> w.null() - >>> w.close() - - >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") - >>> r.record(0) == [2, u'Ñandú'] - True - - If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an - exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore - or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This - applies to both reading and writing. - - - >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") - >>> r.record(0) == [2, u'�and�'] - True - - - # Testing - - The testing framework is doctest, which are located in this file README.md. - In the same folder as README.md and shapefile.py, from the command line run - ``` - $ python shapefile.py - ``` - - Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order - correct line endings in README.md. - - # Contributors - - ``` - Atle Frenvik Sveen - Bas Couwenberg - Casey Meisenzahl - Charles Arnold - David A. Riggs - davidh-ssec - Evan Heidtmann - ezcitron - fiveham - geospatialpython - Hannes - Ignacio Martinez Vazquez - Jason Moujaes - Jonty Wareing - Karim Bahgat - Kyle Kelley - Louis Tiao - Marcin Cuprjak - mcuprjak - Micah Cochran - Michael Davis - Michal Čihař - Mike Toews - Nilo - pakoun - Paulo Ernesto - Raynor Vliegendhart - Razzi Abuissa - RosBer97 - Ross Rogers - Ryan Brideau - Tobias Megies - Tommi Penttinen - Uli Köhler - Vsevolod Novikov - Zac Miller - ``` - -Keywords: gis geospatial geographic shapefile shapefiles -Platform: UNKNOWN -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Classifier: Topic :: Scientific/Engineering :: GIS -Classifier: Topic :: Software Development :: Libraries -Classifier: Topic :: Software Development :: Libraries :: Python Modules -Requires-Python: >= 2.7 -Description-Content-Type: text/markdown diff -Nru pyshp-2.1.3+ds/README.md pyshp-2.2.0/README.md --- pyshp-2.1.3+ds/README.md 2021-01-14 14:12:34.000000000 +0000 +++ pyshp-2.2.0/README.md 2022-02-02 17:07:33.000000000 +0000 @@ -4,32 +4,52 @@ ![pyshp logo](http://4.bp.blogspot.com/_SBi37QEsCvg/TPQuOhlHQxI/AAAAAAAAAE0/QjFlWfMx0tQ/S350/GSP_Logo.png "PyShp") -[![Build Status](https://travis-ci.org/GeospatialPython/pyshp.svg?branch=master)](https://travis-ci.org/GeospatialPython/pyshp) +![build status](https://github.com/GeospatialPython/pyshp/actions/workflows/build.yml/badge.svg) -## Contents - -[Overview](#overview) +- **Author**: [Joel Lawhead](https://github.com/GeospatialPython) +- **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) +- **Version**: 2.2.0 +- **Date**: 2 February, 2022 +- **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) -[Version Changes](#version-changes) - -[Examples](#examples) -- [Reading Shapefiles](#reading-shapefiles) - - [The Reader Class](#the-reader-class) - - [Reading Geometry](#reading-geometry) - - [Reading Records](#reading-records) - - [Reading Geometry and Records Simultaneously](#reading-geometry-and-records-simultaneously) -- [Writing Shapefiles](#writing-shapefiles) - - [The Writer Class](#the-writer-class) - - [Adding Records](#adding-records) - - [Adding Geometry](#adding-geometry) - - [Geometry and Record Balancing](#geometry-and-record-balancing) - -[How To's](#how-tos) -- [3D and Other Geometry Types](#3d-and-other-geometry-types) -- [Working with Large Shapefiles](#working-with-large-shapefiles) -- [Unicode and Shapefile Encodings](#unicode-and-shapefile-encodings) +## Contents -[Testing](#testing) +- [Overview](#overview) +- [Version Changes](#version-changes) +- [The Basics](#the-basics) + - [Reading Shapefiles](#reading-shapefiles) + - [The Reader Class](#the-reader-class) + - [Reading Shapefiles from Local Files](#reading-shapefiles-from-local-files) + - [Reading Shapefiles from Zip Files](#reading-shapefiles-from-zip-files) + - [Reading Shapefiles from URLs](#reading-shapefiles-from-urls) + - [Reading Shapefiles from File-Like Objects](#reading-shapefiles-from-file-like-objects) + - [Reading Shapefiles Using the Context Manager](#reading-shapefiles-using-the-context-manager) + - [Reading Shapefile Meta-Data](#reading-shapefile-meta-data) + - [Reading Geometry](#reading-geometry) + - [Reading Records](#reading-records) + - [Reading Geometry and Records Simultaneously](#reading-geometry-and-records-simultaneously) + - [Writing Shapefiles](#writing-shapefiles) + - [The Writer Class](#the-writer-class) + - [Writing Shapefiles to Local Files](#writing-shapefiles-to-local-files) + - [Writing Shapefiles to File-Like Objects](#writing-shapefiles-to-file-like-objects) + - [Writing Shapefiles Using the Context Manager](#writing-shapefiles-using-the-context-manager) + - [Setting the Shape Type](#setting-the-shape-type) + - [Adding Records](#adding-records) + - [Adding Geometry](#adding-geometry) + - [Geometry and Record Balancing](#geometry-and-record-balancing) +- [Advanced Use](#advanced-use) + - [Shapefile Language and Character Encoding](#shapefile-language-and-character-encoding) + - [Reading Large Shapefiles](#reading-large-shapefiles) + - [Iterating through a shapefile](#iterating-through-a-shapefile) + - [Limiting which fields to read](#limiting-which-fields-to-read) + - [Attribute filtering](#attribute-filtering) + - [Spatial filtering](#spatial-filtering) + - [Writing large shapefiles](#writing-large-shapefiles) + - [Merging multiple shapefiles](#merging-multiple-shapefiles) + - [Editing shapefiles](#editing-shapefiles) + - [3D and Other Geometry Types](#3d-and-other-geometry-types) +- [Testing](#testing) +- [Contributors](#contributors) # Overview @@ -70,6 +90,30 @@ # Version Changes +## 2.2.0 + +### New Features: + +- Read shapefiles directly from zipfiles. +- Read shapefiles directly from urls. +- Allow fast extraction of only a subset of dbf fields through a `fields` arg. +- Allow fast filtering which shapes to read from the file through a `bbox` arg. + +### Improvements: + +- More examples and restructuring of README. +- More informative Shape to geojson warnings (see #219). +- Shape object information when calling repr(). +- Faster ring orientation checks, enforce geojson output ring orientation. + +### Bug fixes: + +- Remove null-padding at end of some record character fields. +- Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. +- Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) +- Fix bug where records and shapes would be assigned incorrect record number (@karanrn) +- Fix typos in docs (@timgates) + ## 2.1.3 ### Bug fixes: @@ -144,7 +188,7 @@ - Reading shapefiles is now more convenient: - Shapefiles can be opened using the context manager, and files are properly closed. - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] + - New ways of inspecting shapefile metadata by printing. [@megies] - More convenient accessing of Record values as attributes. [@philippkraft] - More convenient shape type name checking. [@megies] - Add more support and documentation for MultiPatch 3D shapes. @@ -163,7 +207,7 @@ - Enforce maximum field limit. [@mwtoews] -# Examples +# The Basics Before doing anything you must import the library. @@ -178,6 +222,8 @@ ### The Reader Class +#### Reading Shapefiles from Local Files + To read a shapefile create a new "Reader" object and pass it the name of an existing shapefile. The shapefile format is actually a collection of three files. You specify the base filename of the shapefile or the complete filename @@ -199,23 +245,36 @@ OR any of the other 5+ formats which are potentially part of a shapefile. The library does not care about file extensions. -#### Reading Shapefiles Using the Context Manager +#### Reading Shapefiles from Zip Files -The "Reader" class can be used as a context manager, to ensure open file -objects are properly closed when done reading the data: +If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: - >>> with shapefile.Reader("shapefiles/blockgroups.shp") as shp: - ... print(shp) - shapefile Reader - 663 shapes (type 'POLYGON') - 663 records (44 fields) + + >>> sf = shapefile.Reader("shapefiles/blockgroups.zip") + +If the zip file contains multiple shapefiles, just specify which shapefile to read by additionally specifying the relative path after the ".zip" part: + + + >>> sf = shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") + +#### Reading Shapefiles from URLs + +Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: + + + >>> # from a zipped shapefile on website + >>> sf = shapefile.Reader("https://biogeo.ucdavis.edu/data/diva/rrd/NIC_rrd.zip") + + >>> # from a shapefile collection of files in a github repository + >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true") + +This will automatically download the file(s) to a temporary location before reading, saving you a lot of time and repetitive boilerplate code when you just want quick access to some external data. #### Reading Shapefiles from File-Like Objects You can also load shapefiles from any Python file-like object using keyword arguments to specify any of the three files. This feature is very powerful and -allows you to load shapefiles from a url, a zip file, a serialized object, -or in some cases a database. +allows you to custom load shapefiles from arbitrary storage formats, such as a protected url or zip file, a serialized object, or in some cases a database. >>> myshp = open("shapefiles/blockgroups.shp", "rb") @@ -228,6 +287,17 @@ shx file to access shape records a little faster but will do just fine without it. +#### Reading Shapefiles Using the Context Manager + +The "Reader" class can be used as a context manager, to ensure open file +objects are properly closed when done reading the data: + + >>> with shapefile.Reader("shapefiles/blockgroups.shp") as shp: + ... print(shp) + shapefile Reader + 663 shapes (type 'POLYGON') + 663 records (44 fields) + #### Reading Shapefile Meta-Data Shapefiles have a number of attributes for inspecting the file contents. @@ -235,6 +305,7 @@ shapeType attribute. + >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") >>> sf.shapeType 5 @@ -315,6 +386,8 @@ >>> s = sf.shape(7) + >>> s + Shape #7: POLYGON >>> # Read the bbox of the 8th shape to verify >>> # Round coordinates to 3 decimal places @@ -329,25 +402,32 @@ ... if not name.startswith('_'): ... name 'bbox' + 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' - * shapeType: an integer representing the type of shape as defined by the + * `oid`: The shape's index position in the original shapefile. + + + >>> shapes[3].oid + 3 + + * `shapeType`: an integer representing the type of shape as defined by the shapefile specification. >>> shapes[3].shapeType 5 - * shapeTypeName: a string representation of the type of shape as defined by shapeType. Read-only. + * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. >>> shapes[3].shapeTypeName 'POLYGON' - * bbox: If the shape type contains multiple points this tuple describes the + * `bbox`: If the shape type contains multiple points this tuple describes the lower left (x,y) coordinate and upper right corner coordinate creating a complete box around the points. If the shapeType is a Null (shapeType == 0) then an AttributeError is raised. @@ -359,7 +439,7 @@ >>> ['%.3f' % coord for coord in bbox] ['-122.486', '37.787', '-122.446', '37.811'] - * parts: Parts simply group collections of points into shapes. If the shape + * `parts`: Parts simply group collections of points into shapes. If the shape record has multiple parts this attribute contains the index of the first point of each part. If there is only one part then a list containing 0 is returned. @@ -368,7 +448,7 @@ >>> shapes[3].parts [0] - * points: The points attribute contains a list of tuples containing an + * `points`: The points attribute contains a list of tuples containing an (x,y) coordinate for each point in the shape. @@ -390,11 +470,19 @@ >>> geoj["type"] 'MultiPolygon' -The results from the shapes() method similiarly supports converting to GeoJSON: +The results from the shapes() method similarly supports converting to GeoJSON: >>> shapes.__geo_interface__['type'] 'GeometryCollection' + +Note: In some cases, if the conversion from shapefile geometry to GeoJSON encountered any problems +or potential issues, a warning message will be displayed with information about the affected +geometry. To ignore or suppress these warnings, you can disable this behavior by setting the +module constant VERBOSE to False: + + + >>> shapefile.VERBOSE = False ### Reading Records @@ -559,6 +647,8 @@ format as a control file for the sprayer system (usually in combination with custom database file formats). +#### Writing Shapefiles to Local Files + To create a shapefile you begin by initiating a new Writer instance, passing it the file path and name to save to: @@ -578,23 +668,6 @@ In that case, any file types not assigned will not save and only file types with file names will be saved. -#### Writing Shapefiles Using the Context Manager - -The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. -In case of a crash and to make the code more readable, it is nevertheless recommended -you do this manually by calling the "close()" method: - - - >>> w.close() - -Alternatively, you can also use the "Writer" class as a context manager, to ensure open file -objects are properly closed and final headers written once you exit the with-clause: - - - >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: - ... w.field('field1', 'C') - ... pass - #### Writing Shapefiles to File-Like Objects Just as you can read shapefiles from python file-like objects you can also @@ -614,6 +687,23 @@ >>> w.null() >>> w.close() >>> # To read back the files you could call the "StringIO.getvalue()" method later. + +#### Writing Shapefiles Using the Context Manager + +The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. +In case of a crash and to make the code more readable, it is nevertheless recommended +you do this manually by calling the "close()" method: + + + >>> w.close() + +Alternatively, you can also use the "Writer" class as a context manager, to ensure open file +objects are properly closed and final headers written once you exit the with-clause: + + + >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: + ... w.field('field1', 'C') + ... pass #### Setting the Shape Type @@ -936,7 +1026,214 @@ -# How To's +# Advanced Use + +## Shapefile Language and Character Encoding + +PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. +Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't +have to specify the encoding. For reading shapefiles in any other encoding, such as Latin-1, just +supply the encoding option when creating the Reader class. + + + >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") + >>> r.record(0) == [2, u'Ñandú'] + True + +Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such +as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in +should give you the same unicode string you started with. + + + >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") + >>> w.fields = r.fields[1:] + >>> w.record(*r.record(0)) + >>> w.null() + >>> w.close() + + >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") + >>> r.record(0) == [2, u'Ñandú'] + True + +If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an +exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore +or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This +applies to both reading and writing. + + + >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") + >>> r.record(0) == [2, u'�and�'] + True + + + +## Reading Large Shapefiles + +Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions +of records and complex geometries. + +### Iterating through a shapefile + +As an example, let's load this Natural Earth shapefile of more than 4000 global administrative boundary polygons: + + + >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/10m_cultural/ne_10m_admin_1_states_provinces?raw=true") + +When first creating the Reader class, the library only reads the header information +and leaves the rest of the file contents alone. Once you call the records() and shapes() +methods however, it will attempt to read the entire file into memory at once. +For very large files this can result in MemoryError. So when working with large files +it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() +methods instead. These iterate through the file contents one at a time, enabling you to loop +through them while keeping memory usage at a minimum. + + + >>> for shape in sf.iterShapes(): + ... # do something here + ... pass + + >>> for rec in sf.iterRecords(): + ... # do something here + ... pass + + >>> for shapeRec in sf.iterShapeRecords(): + ... # do something here + ... pass + + >>> for shapeRec in sf: # same as iterShapeRecords() + ... # do something here + ... pass + +### Limiting which fields to read + +By default when reading the attribute records of a shapefile, pyshp unpacks and returns the data for all of the dbf fields, regardless of whether you actually need that data or not. To limit which field data is unpacked when reading each record and speed up processing time, you can specify the `fields` argument to any of the methods involving record data. Note that the order of the specified fields does not matter, the resulting records will list the specified field values in the order that they appear in the original dbf file. For instance, if we are only interested in the country and name of each admin unit, the following is a more efficient way of iterating through the file: + + + >>> fields = ["geonunit", "name"] + >>> for rec in sf.iterRecords(fields=fields): + ... # do something + ... pass + >>> rec + Record #4595: ['Birgu', 'Malta'] + +### Attribute filtering + +In many cases, we aren't interested in all entries of a shapefile, but rather only want to retrieve a small subset of records by filtering on some attribute. To avoid wasting time reading records and shapes that we don't need, we can start by iterating only the records and fields of interest, check if the record matches some condition as a way to filter the data, and finally load the full record and shape geometry for those that meet the condition: + + + >>> filter_field = "geonunit" + >>> filter_value = "Eritrea" + >>> for rec in sf.iterRecords(fields=[filter_field]): + ... if rec[filter_field] == filter_value: + ... # load full record and shape + ... shapeRec = sf.shapeRecord(rec.oid) + ... shapeRec.record["name"] + 'Debubawi Keyih Bahri' + 'Debub' + 'Semenawi Keyih Bahri' + 'Gash Barka' + 'Maekel' + 'Anseba' + +Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. + +### Spatial filtering + +Another common use-case is that we only want to read those records that are located in some region of interest. Because the shapefile stores the bounding box of each shape separately from the geometry data, it's possible to quickly retrieve all shapes that might overlap a given bounding box region without having to load the full shape geometry data for every shape. This can be done by specifying the `bbox` argument to any of the record or shape methods: + + + >>> bbox = [36.423, 12.360, 43.123, 18.004] # ca bbox of Eritrea + >>> fields = ["geonunit","name"] + >>> for shapeRec in sf.iterShapeRecords(bbox=bbox, fields=fields): + ... shapeRec.record + Record #368: ['Afar', 'Ethiopia'] + Record #369: ['Tadjourah', 'Djibouti'] + Record #375: ['Obock', 'Djibouti'] + Record #376: ['Debubawi Keyih Bahri', 'Eritrea'] + Record #1106: ['Amhara', 'Ethiopia'] + Record #1107: ['Gedarif', 'Sudan'] + Record #1108: ['Tigray', 'Ethiopia'] + Record #1414: ['Sa`dah', 'Yemen'] + Record #1415: ['`Asir', 'Saudi Arabia'] + Record #1416: ['Hajjah', 'Yemen'] + Record #1417: ['Jizan', 'Saudi Arabia'] + Record #1598: ['Debub', 'Eritrea'] + Record #1599: ['Red Sea', 'Sudan'] + Record #1600: ['Semenawi Keyih Bahri', 'Eritrea'] + Record #1601: ['Gash Barka', 'Eritrea'] + Record #1602: ['Kassala', 'Sudan'] + Record #1603: ['Maekel', 'Eritrea'] + Record #2037: ['Al Hudaydah', 'Yemen'] + Record #3741: ['Anseba', 'Eritrea'] + +This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. + + + +## Writing large shapefiles + +Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory +usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately +writing each geometry and record to disk the moment they +are added using shape() or record(). Once the writer is closed, exited, or garbage +collected, the final header information is calculated and written to the beginning of +the file. + +### Merging multiple shapefiles + +This means that it's possible to merge hundreds or thousands of shapefiles, as +long as you iterate through the source files to avoid loading everything into +memory. The following example copies the contents of a shapefile to a new file 10 times: + + >>> # create writer + >>> w = shapefile.Writer('shapefiles/test/merge') + + >>> # copy over fields from the reader + >>> r = shapefile.Reader("shapefiles/blockgroups") + >>> for field in r.fields[1:]: + ... w.field(*field) + + >>> # copy the shapefile to writer 10 times + >>> repeat = 10 + >>> for i in range(repeat): + ... r = shapefile.Reader("shapefiles/blockgroups") + ... for shapeRec in r.iterShapeRecords(): + ... w.record(*shapeRec.record) + ... w.shape(shapeRec.shape) + + >>> # check that the written file is 10 times longer + >>> len(w) == len(r) * 10 + True + + >>> # close the writer + >>> w.close() + +In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. + +### Editing shapefiles + +If you need to edit a shapefile you would have to read the +file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: + + >>> # create writer + >>> w = shapefile.Writer('shapefiles/test/edit') + + >>> # define which fields to keep + >>> keep_fields = ['BKG_KEY', 'MEDIANRENT'] + + >>> # copy over the relevant fields from the reader + >>> r = shapefile.Reader("shapefiles/blockgroups") + >>> for field in r.fields[1:]: + ... if field[0] in keep_fields: + ... w.field(*field) + + >>> # write only the relevant attribute values + >>> for shapeRec in r.iterShapeRecords(fields=keep_fields): + ... w.record(*shapeRec.record) + ... w.shape(shapeRec.shape) + + >>> # close writer + >>> w.close() ## 3D and Other Geometry Types @@ -1035,87 +1332,6 @@ For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). -## Working with Large Shapefiles - -Despite being a lightweight library, PyShp is designed to be able to read and write -shapefiles of any size, allowing you to work with hundreds of thousands or even millions -of records and complex geometries. - -When first creating the Reader class, the library only reads the header information -and leaves the rest of the file contents alone. Once you call the records() and shapes() -methods however, it will attempt to read the entire file into memory at once. -For very large files this can result in MemoryError. So when working with large files -it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() -methods instead. These iterate through the file contents one at a time, enabling you to loop -through them while keeping memory usage at a minimum. - - - >>> for shape in sf.iterShapes(): - ... # do something here - ... pass - - >>> for rec in sf.iterRecords(): - ... # do something here - ... pass - - >>> for shapeRec in sf.iterShapeRecords(): - ... # do something here - ... pass - - >>> for shapeRec in sf: # same as iterShapeRecords() - ... # do something here - ... pass - -The shapefile Writer class uses a similar streaming approach to keep memory -usage at a minimum. The library takes care of this under-the-hood by immediately -writing each geometry and record to disk the moment they -are added using shape() or record(). Once the writer is closed, exited, or garbage -collected, the final header information is calculated and written to the beginning of -the file. - -This means that as long as you are able to iterate through a source file without having -to load everything into memory, such as a large CSV table or a large shapefile, you can -process and write any number of items, and even merge many different source files into a single -large shapefile. If you need to edit or undo any of your writing you would have to read the -file back in, one record at a time, make your changes, and write it back out. - -## Unicode and Shapefile Encodings - -PyShp has full support for unicode and shapefile encodings, so you can always expect to be working -with unicode strings in shapefiles that have text fields. -Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't -have to specify the encoding. For reading shapefiles in any other encoding, such as Latin-1, just -supply the encoding option when creating the Reader class. - - - >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") - >>> r.record(0) == [2, u'Ñandú'] - True - -Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such -as UTF-8. Provided the new encoding supports the characters you are trying to write, reading it back in -should give you the same unicode string you started with. - - - >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") - >>> w.fields = r.fields[1:] - >>> w.record(*r.record(0)) - >>> w.null() - >>> w.close() - - >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") - >>> r.record(0) == [2, u'Ñandú'] - True - -If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an -exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore -or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This -applies to both reading and writing. - - - >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") - >>> r.record(0) == [2, u'�and�'] - True # Testing @@ -1147,6 +1363,7 @@ Jason Moujaes Jonty Wareing Karim Bahgat +karanrn Kyle Kelley Louis Tiao Marcin Cuprjak @@ -1155,6 +1372,7 @@ Michael Davis Michal Čihař Mike Toews +Miroslav Šedivý Nilo pakoun Paulo Ernesto @@ -1163,6 +1381,7 @@ RosBer97 Ross Rogers Ryan Brideau +Tim Gates Tobias Megies Tommi Penttinen Uli Köhler diff -Nru pyshp-2.1.3+ds/setup.cfg pyshp-2.2.0/setup.cfg --- pyshp-2.1.3+ds/setup.cfg 2021-01-14 14:12:50.000000000 +0000 +++ pyshp-2.2.0/setup.cfg 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -[egg_info] -tag_build = -tag_date = 0 - diff -Nru pyshp-2.1.3+ds/setup.py pyshp-2.2.0/setup.py --- pyshp-2.1.3+ds/setup.py 2021-01-14 14:12:34.000000000 +0000 +++ pyshp-2.2.0/setup.py 2022-02-02 17:07:33.000000000 +0000 @@ -7,14 +7,13 @@ return data.decode('utf-8') setup(name='pyshp', - version='2.1.3', + version='2.2.0', description='Pure Python read/write support for ESRI Shapefile format', long_description=read_file('README.md'), long_description_content_type='text/markdown', - author='Joel Lawhead', + author='Joel Lawhead, Karim Bahgat', author_email='jlawhead@geospatialpython.com', url='https://github.com/GeospatialPython/pyshp', - download_url='https://github.com/GeospatialPython/pyshp/archive/2.1.1.tar.gz', py_modules=['shapefile'], license='MIT', zip_safe=False, @@ -27,6 +26,7 @@ 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Topic :: Scientific/Engineering :: GIS', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules']) diff -Nru pyshp-2.1.3+ds/shapefile.py pyshp-2.2.0/shapefile.py --- pyshp-2.1.3+ds/shapefile.py 2021-01-14 14:12:34.000000000 +0000 +++ pyshp-2.2.0/shapefile.py 2022-02-02 17:07:33.000000000 +0000 @@ -1,12 +1,13 @@ """ shapefile.py Provides read and write support for ESRI Shapefiles. -author: jlawheadgeospatialpython.com -version: 2.1.3 +authors: jlawheadgeospatialpython.com +maintainer: karim.bahgat.norwaygmail.com +version: 2.2.0 Compatible with Python versions 2.7-3.x """ -__version__ = "2.1.3" +__version__ = "2.2.0" from struct import pack, unpack, calcsize, error, Struct import os @@ -14,11 +15,15 @@ import time import array import tempfile -import warnings +import logging import io from datetime import date +import zipfile +# Module settings +VERBOSE = True + # Constants for shape types NULL = 0 POINT = 1 @@ -74,9 +79,18 @@ if PYTHON3: xrange = range izip = zip + + from urllib.parse import urlparse, urlunparse + from urllib.error import HTTPError + from urllib.request import urlopen, Request + else: from itertools import izip + from urlparse import urlparse, urlunparse + from urllib2 import HTTPError + from urllib2 import urlopen, Request + # Helpers @@ -151,19 +165,37 @@ # Begin class _Array(array.array): - """Converts python tuples to lists of the appropritate type. + """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" def __repr__(self): return str(self.tolist()) -def signed_area(coords): +def signed_area(coords, fast=False): """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. + A faster version is possible by setting 'fast' to True, which returns + 2x the area, e.g. if you're only interested in the sign of the area. """ xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values xs.append(xs[1]) ys.append(ys[1]) - return sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords)))/2.0 + area2 = sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords))) + if fast: + return area2 + else: + return area2 / 2.0 + +def is_cw(coords): + """Returns True if a polygon ring has clockwise orientation, determined + by a negatively signed area. + """ + area2 = signed_area(coords, fast=True) + return area2 < 0 + +def rewind(coords): + """Returns the input coords in reversed order. + """ + return list(reversed(coords)) def ring_bbox(coords): """Calculates and returns the bounding box of a ring. @@ -257,7 +289,7 @@ if not is_straight_line: # get triplet orientation closed_triplet = triplet + [triplet[0]] - triplet_ccw = signed_area(closed_triplet) >= 0 + triplet_ccw = not is_cw(closed_triplet) # check that triplet has the same orientation as the ring (means triangle is inside the ring) if ccw == triplet_ccw: # get triplet centroid @@ -279,10 +311,11 @@ ''' return all((ring_contains_point(coords1, p2) for p2 in coords2)) -def organize_polygon_rings(rings): +def organize_polygon_rings(rings, return_errors=None): '''Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). Rings are determined as exteriors if they run in clockwise direction, or interior @@ -296,11 +329,13 @@ for ring in rings: # shapefile format defines a polygon as a sequence of rings # where exterior rings are clockwise, and holes counterclockwise - if signed_area(ring) < 0: + if is_cw(ring): # ring is exterior + ring = rewind(ring) # GeoJSON and Shapefile exteriors have opposite orientation exteriors.append(ring) else: # ring is a hole + ring = rewind(ring) # GeoJSON and Shapefile holes have opposite orientation holes.append(ring) # if only one exterior, then all holes belong to that exterior @@ -336,7 +371,8 @@ if len(exterior_candidates) > 1: # get hole sample point - hole_sample = ring_sample(holes[hole_i], ccw=True) + # Note: all rings now follow GeoJSON orientation, i.e. holes are clockwise + hole_sample = ring_sample(holes[hole_i], ccw=False) # collect new exterior candidates new_exterior_candidates = [] for ext_i in exterior_candidates: @@ -353,14 +389,13 @@ if len(exterior_candidates) > 1: # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x])))[0] + ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] hole_exteriors[hole_i] = [ext_i] # separate out holes that are orphaned (not contained by any exterior) orphan_holes = [] for hole_i,exterior_candidates in list(hole_exteriors.items()): if not exterior_candidates: - warnings.warn('Shapefile shape has invalid polygon: found orphan hole (not contained by any of the exteriors); interpreting as exterior.') orphan_holes.append( hole_i ) del hole_exteriors[hole_i] continue @@ -380,21 +415,31 @@ # add orphan holes as exteriors for hole_i in orphan_holes: - ext = holes[hole_i] # could potentially reverse their order, but in geojson winding order doesn't matter + ext = holes[hole_i] + # since this was previously a clockwise ordered hole, inverse the winding order + ext = rewind(ext) + # add as single exterior without any holes poly = [ext] polys.append(poly) + if orphan_holes and return_errors is not None: + return_errors['polygon_orphaned_holes'] = len(orphan_holes) + return polys # no exteriors, be nice and assume due to incorrect winding order else: - warnings.warn('Shapefile shape has invalid polygon: no exterior rings found (must have clockwise orientation); interpreting holes as exteriors.') - exteriors = holes # could potentially reverse their order, but in geojson winding order doesn't matter + if return_errors is not None: + return_errors['polygon_only_holes'] = len(holes) + exteriors = holes + # since these were previously clockwise ordered holes, inverse the winding order + exteriors = [rewind(ext) for ext in exteriors] + # add as single exterior without any holes polys = [[ext] for ext in exteriors] return polys class Shape(object): - def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None): + def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are usually point, polyline, or polygons. Every shape type @@ -411,6 +456,15 @@ self.parts = parts or [] if partTypes: self.partTypes = partTypes + + # and a dict to silently record any errors encountered + self._errors = {} + + # add oid + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 @property def __geo_interface__(self): @@ -490,7 +544,25 @@ # organize rings into list of polygons, where each polygon is defined as list of rings. # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings) + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) + orphans = self._errors.get('polygon_orphaned_holes', None) + if orphans: + msg = header + 'Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes.' + logging.warning(msg) + only_holes = self._errors.get('polygon_only_holes', None) + if only_holes: + msg = header + 'Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes.' + logging.warning(msg) # return as geojson if len(polys) == 1: @@ -543,12 +615,15 @@ parts = [] index = 0 for i,ext_or_hole in enumerate(geoj["coordinates"]): - if i == 0 and not signed_area(ext_or_hole) < 0: + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): # flip exterior direction - ext_or_hole = list(reversed(ext_or_hole)) - elif i > 0 and not signed_area(ext_or_hole) >= 0: + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): # flip hole direction - ext_or_hole = list(reversed(ext_or_hole)) + ext_or_hole = rewind(ext_or_hole) points.extend(ext_or_hole) parts.append(index) index += len(ext_or_hole) @@ -570,12 +645,15 @@ index = 0 for polygon in geoj["coordinates"]: for i,ext_or_hole in enumerate(polygon): - if i == 0 and not signed_area(ext_or_hole) < 0: + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): # flip exterior direction - ext_or_hole = list(reversed(ext_or_hole)) - elif i > 0 and not signed_area(ext_or_hole) >= 0: + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): # flip hole direction - ext_or_hole = list(reversed(ext_or_hole)) + ext_or_hole = rewind(ext_or_hole) points.extend(ext_or_hole) parts.append(index) index += len(ext_or_hole) @@ -584,9 +662,17 @@ return shape @property + def oid(self): + """The index position of the shape in the original shapefile""" + return self.__oid + + @property def shapeTypeName(self): return SHAPETYPE_LOOKUP[self.shapeType] + def __repr__(self): + return 'Shape #{}: {}'.format(self.__oid, self.shapeTypeName) + class _Record(list): """ A class to hold a record. Subclasses list to ensure compatibility with @@ -749,8 +835,9 @@ def __geo_interface__(self): # Note: currently this will fail if any of the shapes are null-geometries # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - return {'type': 'GeometryCollection', - 'geometries': [shape.__geo_interface__ for shape in self]} + collection = {'type': 'GeometryCollection', + 'geometries': [shape.__geo_interface__ for shape in self]} + return collection class ShapeRecords(list): """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with @@ -763,13 +850,50 @@ @property def __geo_interface__(self): - return {'type': 'FeatureCollection', - 'features': [shaperec.__geo_interface__ for shaperec in self]} + collection = {'type': 'FeatureCollection', + 'features': [shaperec.__geo_interface__ for shaperec in self]} + return collection class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" pass +# def warn_geojson_collection(shapes): +# # collect information about any potential errors with the GeoJSON +# errors = {} +# for i,shape in enumerate(shapes): +# shape_errors = shape._errors +# if shape_errors: +# for error in shape_errors.keys(): +# errors[error] = errors[error] + [i] if error in errors else [] + +# # warn if any errors were found +# if errors: +# messages = ['Summary of possibles issues encountered during shapefile to GeoJSON conversion:'] + +# # polygon orphan holes +# orphans = errors.get('polygon_orphaned_holes', None) +# if orphans: +# msg = 'GeoJSON format requires that all interior holes be contained by an exterior ring, \ +# but the Shapefile contained {} records of polygons where some of its interior holes were \ +# orphaned (not contained by any other rings). The rings were still included but were \ +# encoded as GeoJSON exterior rings instead of holes. Shape ids: {}'.format(len(orphans), orphans) +# messages.append(msg) + +# # polygon only holes/wrong orientation +# only_holes = errors.get('polygon_only_holes', None) +# if only_holes: +# msg = 'GeoJSON format requires that polygons contain at least one exterior ring, but \ +# the Shapefile contained {} records of polygons where all of its component rings were stored as interior \ +# holes. The rings were still included but were encoded as GeoJSON exterior rings instead of holes. \ +# Shape ids: {}'.format(len(only_holes), only_holes) +# messages.append(msg) + +# if len(messages) > 1: +# # more than just the "Summary of..." header +# msg = '\n'.join(messages) +# logging.warning(msg) + class Reader(object): """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -778,7 +902,9 @@ The .shx index file is used if available for efficiency but is not required to read the geometry from the .shp file. The "shapefile" argument in the constructor is the - name of the file you want to open. + name of the file you want to open, and can be the path + to a shapefile on a local filesystem, inside a zipfile, + or a url. You can instantiate a Reader without specifying a shapefile and then specify one later with the load() method. @@ -799,14 +925,112 @@ self.numShapes = None self.fields = [] self.__dbfHdrLength = 0 - self.__fieldposition_lookup = {} + self.__fieldLookup = {} self.encoding = kwargs.pop('encoding', 'utf-8') self.encodingErrors = kwargs.pop('encodingErrors', 'strict') # See if a shapefile name was passed as the first argument if len(args) > 0: if is_string(args[0]): - self.load(args[0]) - return + path = args[0] + + if '.zip' in path: + # Shapefile is inside a zipfile + if path.count('.zip') > 1: + # Multiple nested zipfiles + raise ShapefileException('Reading from multiple nested zipfiles is not supported: %s' % args[0]) + # Split into zipfile and shapefile paths + if path.endswith('.zip'): + zpath = path + shapefile = None + else: + zpath = path[:path.find('.zip')+4] + shapefile = path[path.find('.zip')+4+1:] + # Create a zip file handle + if zpath.startswith('http'): + # Zipfile is from a url + # Download to a temporary url and treat as normal zipfile + req = Request(zpath, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + resp = urlopen(req) + # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected + zipfileobj = tempfile.NamedTemporaryFile(mode='w+b', suffix='.zip', delete=True) + zipfileobj.write(resp.read()) + zipfileobj.seek(0) + else: + # Zipfile is from a file + zipfileobj = open(zpath, mode='rb') + # Open the zipfile archive + with zipfile.ZipFile(zipfileobj, 'r') as archive: + if not shapefile: + # Only the zipfile path is given + # Inspect zipfile contents to find the full shapefile path + shapefiles = [name + for name in archive.namelist() + if name.endswith('.shp')] + # The zipfile must contain exactly one shapefile + if len(shapefiles) == 0: + raise ShapefileException('Zipfile does not contain any shapefiles') + elif len(shapefiles) == 1: + shapefile = shapefiles[0] + else: + raise ShapefileException('Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open.' % shapefiles ) + # Try to extract file-like objects from zipfile + shapefile = os.path.splitext(shapefile)[0] # root shapefile name + for ext in ['shp','shx','dbf']: + try: + member = archive.open(shapefile+'.'+ext) + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + except: + pass + # Close and delete the temporary zipfile + try: zipfileobj.close() + except: pass + # Try to load shapefile + if (self.shp or self.dbf): + # Load and exit early + self.load() + return + else: + raise ShapefileException("No shp or dbf file found in zipfile: %s" % path) + + elif path.startswith('http'): + # Shapefile is from a url + # Download each file to temporary path and treat as normal shapefile path + urlinfo = urlparse(path) + urlpath = urlinfo[2] + urlpath,_ = os.path.splitext(urlpath) + shapefile = os.path.basename(urlpath) + for ext in ['shp','shx','dbf']: + try: + _urlinfo = list(urlinfo) + _urlinfo[2] = urlpath + '.' + ext + _path = urlunparse(_urlinfo) + req = Request(_path, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + resp = urlopen(req) + # write url data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj.write(resp.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + except HTTPError: + pass + if (self.shp or self.dbf): + # Load and exit early + self.load() + return + else: + raise ShapefileException("No shp or dbf file found at url: %s" % path) + + else: + # Local file path to a shapefile + # Load and exit early + self.load(path) + return + # Otherwise, load from separate shp/shx/dbf args (must be file-like) if "shp" in kwargs.keys(): if hasattr(kwargs["shp"], "read"): @@ -894,10 +1118,9 @@ else: # Index file not available, iterate all shapes to get total count if self.numShapes is None: - i = 0 for i,shape in enumerate(self.iterShapes()): - i += 1 - self.numShapes = i + pass + self.numShapes = i + 1 return self.numShapes @@ -981,10 +1204,16 @@ self.close() def close(self): - for attribute in (self.shp, self.shx, self.dbf): - if hasattr(attribute, 'close'): + for attribute in ('shp','shx','dbf'): + try: + obj = getattr(self, attribute) + except AttributeError: + # deepcopies fail to copy these attributes and raises exception during + # garbage collection - https://github.com/mattijn/topojson/issues/120 + obj = None + if obj and hasattr(obj, 'close'): try: - attribute.close() + obj.close() except IOError: pass @@ -1033,10 +1262,10 @@ else: self.mbox.append(None) - def __shape(self): + def __shape(self, oid=None, bbox=None): """Returns the header info and geometry for a single shape.""" f = self.__getFileObj(self.shp) - record = Shape() + record = Shape(oid=oid) nParts = nPoints = zmin = zmax = mmin = mmax = None (recNum, recLength) = unpack(">2i", f.read(8)) # Determine the start of the next record @@ -1049,6 +1278,12 @@ # All shape types capable of having a bounding box elif shapeType in (3,5,8,13,15,18,23,25,28,31): record.bbox = _Array('d', unpack("<4d", f.read(32))) + # if bbox specified and no overlap, skip this shape + if bbox is not None and not bbox_overlap(bbox, record.bbox): + # because we stop parsing this shape, skip to beginning of + # next shape before we return + f.seek(next) + return None # Shape types with parts if shapeType in (3,5,13,15,23,25,31): nParts = unpack(" self.shpNum: self.balance() + fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) if recordList: record = list(recordList) + while len(record) < fieldCount: + record.append("") elif recordDict: record = [] for field in self.fields: @@ -1831,9 +2195,11 @@ record.append("") else: record.append(val) + else: + record.append("") # need empty value for missing dict entries else: # Blank fields for empty record - record = ["" for field in self.fields if field[0] != 'DeletionFlag'] + record = ["" for _ in range(fieldCount)] self.__dbfRecord(record) def __dbfRecord(self, record): @@ -2026,7 +2392,7 @@ PartTypes is a list of types that define each of the surface patches. The types can be any of the following module constants: TRIANGLE_STRIP, TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - If the z (elavation) value is not included, it defaults to 0. + If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPATCH polyShape = Shape(shapeType) Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/blockgroups_multishapefile.zip and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/blockgroups_multishapefile.zip differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/blockgroups.zip and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/blockgroups.zip differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/empty_zipfile.zip and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/empty_zipfile.zip differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/balancing.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/balancing.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/contextwriter.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/contextwriter.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/dtype.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/dtype.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/edit.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/edit.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/edit.shp and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/edit.shp differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/edit.shx and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/edit.shx differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/line.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/line.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/linem.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/linem.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/linez.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/linez.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/merge.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/merge.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/merge.shp and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/merge.shp differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/merge.shx and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/merge.shx differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/multipatch.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/multipatch.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/multipoint.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/multipoint.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/onlydbf.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/onlydbf.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/point.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/point.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/polygon.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/polygon.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/shapetype.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/shapetype.dbf differ Binary files /tmp/tmpaiexcbc2/r8T3AvjcQW/pyshp-2.1.3+ds/shapefiles/test/testfile.dbf and /tmp/tmpaiexcbc2/Lpxu8rJ2Hv/pyshp-2.2.0/shapefiles/test/testfile.dbf differ diff -Nru pyshp-2.1.3+ds/test_shapefile.py pyshp-2.2.0/test_shapefile.py --- pyshp-2.1.3+ds/test_shapefile.py 1970-01-01 00:00:00.000000000 +0000 +++ pyshp-2.2.0/test_shapefile.py 2022-02-02 17:07:33.000000000 +0000 @@ -0,0 +1,1007 @@ +""" +This module tests the functionality of shapefile.py. +""" +# std lib imports +import os.path + +# third party imports +import pytest +import json +import datetime + +# our imports +import shapefile + +# define various test shape tuples of (type, points, parts indexes, and expected geo interface output) +geo_interface_tests = [ (shapefile.POINT, # point + [(1,1)], + [], + {'type':'Point','coordinates':(1,1)} + ), + (shapefile.MULTIPOINT, # multipoint + [(1,1),(2,1),(2,2)], + [], + {'type':'MultiPoint','coordinates':[(1,1),(2,1),(2,2)]} + ), + (shapefile.POLYLINE, # single linestring + [(1,1),(2,1)], + [0], + {'type':'LineString','coordinates':[(1,1),(2,1)]} + ), + (shapefile.POLYLINE, # multi linestring + [(1,1),(2,1), # line 1 + (10,10),(20,10)], # line 2 + [0,2], + {'type':'MultiLineString','coordinates':[ + [(1,1),(2,1)], # line 1 + [(10,10),(20,10)] # line 2 + ]} + ), + (shapefile.POLYGON, # single polygon, no holes + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior + ], + [0], + {'type':'Polygon','coordinates':[ + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), + ]} + ), + (shapefile.POLYGON, # single polygon, holes (ordered) + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior + (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 + (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 + ], + [0,5,5+5], + {'type':'Polygon','coordinates':[ + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), # exterior + shapefile.rewind([(2,2),(4,2),(4,4),(2,4),(2,2)]), # hole 1 + shapefile.rewind([(5,5),(7,5),(7,7),(5,7),(5,5)]), # hole 2 + ]} + ), + (shapefile.POLYGON, # single polygon, holes (unordered) + [ + (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 + (1,1),(1,9),(9,9),(9,1),(1,1), # exterior + (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 + ], + [0,5,5+5], + {'type':'Polygon','coordinates':[ + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), # exterior + shapefile.rewind([(2,2),(4,2),(4,4),(2,4),(2,2)]), # hole 1 + shapefile.rewind([(5,5),(7,5),(7,7),(5,7),(5,5)]), # hole 2 + ]} + ), + (shapefile.POLYGON, # multi polygon, no holes + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior + (11,11),(11,19),(19,19),(19,11),(11,11), # exterior + ], + [0,5], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), + ], + [ # poly 2 + shapefile.rewind([(11,11),(11,19),(19,19),(19,11),(11,11)]), + ], + ]} + ), + (shapefile.POLYGON, # multi polygon, holes (unordered) + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 + (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 + (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 + (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 + (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 + (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 + ], + [0,5,10,15,20,25], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), # exterior + shapefile.rewind([(2,2),(4,2),(4,4),(2,4),(2,2)]), # hole 1 + shapefile.rewind([(5,5),(7,5),(7,7),(5,7),(5,5)]), # hole 2 + ], + [ # poly 2 + shapefile.rewind([(11,11),(11,19),(19,19),(19,11),(11,11)]), # exterior + shapefile.rewind([(12,12),(14,12),(14,14),(12,14),(12,12)]), # hole 1 + shapefile.rewind([(15,15),(17,15),(17,17),(15,17),(15,15)]), # hole 2 + ], + ]} + ), + (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 + (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 + (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 + (4,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 + (2,2),(8,2),(8,8),(2,8),(2,2), # hole 1.1 + ], + [0,5,10,15,20], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), # exterior 1 + shapefile.rewind([(2,2),(8,2),(8,8),(2,8),(2,2)]), # hole 1.1 + ], + [ # poly 2 + shapefile.rewind([(3,3),(3,7),(7,7),(7,3),(3,3)]), # exterior 2 + shapefile.rewind([(4,4),(6,4),(6,6),(4,6),(4,4)]), # hole 2.1 + ], + [ # poly 3 + shapefile.rewind([(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)]), # exterior 3 + ], + ]} + ), + (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 + (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 + (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 + (4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 (hole has duplicate coords) + (2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2), # hole 1.1 (hole coords form straight line and starts in concave orientation) + ], + [0,5,10,15,20+3], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), # exterior 1 + shapefile.rewind([(2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2)]), # hole 1.1 + ], + [ # poly 2 + shapefile.rewind([(3,3),(3,7),(7,7),(7,3),(3,3)]), # exterior 2 + shapefile.rewind([(4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4)]), # hole 2.1 + ], + [ # poly 3 + shapefile.rewind([(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)]), # exterior 3 + ], + ]} + ), + (shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning + [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 + (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 + (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 + (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 + (95,95),(97,95),(97,97),(95,97),(95,95), # hole x.1 (orphaned hole, should be interpreted as exterior) + (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 + (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 + ], + [0,5,10,15,20,25,30], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + shapefile.rewind([(1,1),(1,9),(9,9),(9,1),(1,1)]), # exterior + shapefile.rewind([(2,2),(4,2),(4,4),(2,4),(2,2)]), # hole 1 + shapefile.rewind([(5,5),(7,5),(7,7),(5,7),(5,5)]), # hole 2 + ], + [ # poly 2 + shapefile.rewind([(11,11),(11,19),(19,19),(19,11),(11,11)]), # exterior + shapefile.rewind([(12,12),(14,12),(14,14),(12,14),(12,12)]), # hole 1 + shapefile.rewind([(15,15),(17,15),(17,17),(15,17),(15,15)]), # hole 2 + ], + [ # poly 3 (orphaned hole) + # Note: due to the hole-to-exterior conversion, should return the same ring orientation + [(95,95),(97,95),(97,97),(95,97),(95,95)], # exterior + ], + ]} + ), + (shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning + [(1,1),(9,1),(9,9),(1,9),(1,1), # exterior with hole-orientation + (11,11),(19,11),(19,19),(11,19),(11,11), # exterior with hole-orientation + ], + [0,5], + {'type':'MultiPolygon','coordinates':[ + [ # poly 1 + # Note: due to the hole-to-exterior conversion, should return the same ring orientation + [(1,1),(9,1),(9,9),(1,9),(1,1)], + ], + [ # poly 2 + # Note: due to the hole-to-exterior conversion, should return the same ring orientation + [(11,11),(19,11),(19,19),(11,19),(11,11)], + ], + ]} + ), + ] + +def test_empty_shape_geo_interface(): + """ + Assert that calling __geo_interface__ + on a Shape with no points or parts + raises an Exception. + """ + shape = shapefile.Shape() + with pytest.raises(Exception): + shape.__geo_interface__ + +@pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) +def test_expected_shape_geo_interface(typ, points, parts, expected): + """ + Assert that calling __geo_interface__ + on arbitrary input Shape works as expected. + """ + shape = shapefile.Shape(typ, points, parts) + geoj = shape.__geo_interface__ + assert geoj == expected + + +def test_reader_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + geoj = r.__geo_interface__ + assert geoj['type'] == 'FeatureCollection' + assert 'bbox' in geoj + assert json.dumps(geoj) + + +def test_shapes_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + geoj = r.shapes().__geo_interface__ + assert geoj['type'] == 'GeometryCollection' + assert json.dumps(geoj) + + +def test_shaperecords_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + geoj = r.shapeRecords().__geo_interface__ + assert geoj['type'] == 'FeatureCollection' + assert json.dumps(geoj) + + +def test_shaperecord_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + for shaperec in r: + assert json.dumps(shaperec.__geo_interface__) + + +def test_reader_context_manager(): + """ + Assert that the Reader context manager + closes the shp, shx, and dbf files + on exit. + """ + # note uses an actual shapefile from + # the projects "shapefiles" directory + with shapefile.Reader("shapefiles/blockgroups") as sf: + pass + + assert sf.shp.closed is True + assert sf.dbf.closed is True + assert sf.shx.closed is True + + +def test_reader_url(): + """ + Assert that Reader can open shapefiles from a url. + """ + # test with extension + url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true" + with shapefile.Reader(url) as sf: + for recShape in sf.iterShapeRecords(): + pass + + # test without extension + url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" + with shapefile.Reader(url) as sf: + for recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + + # test no files found + url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" + with pytest.raises(shapefile.ShapefileException): + with shapefile.Reader(url) as sf: + pass + + # test reading zipfile from url + url = "https://biogeo.ucdavis.edu/data/diva/rrd/NIC_rrd.zip" + with shapefile.Reader(url) as sf: + for recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + + +def test_reader_zip(): + """ + Assert that Reader can open shapefiles inside a zipfile. + """ + # test reading zipfile only + with shapefile.Reader("shapefiles/blockgroups.zip") as sf: + for recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + + # test require specific path when reading multi-shapefile zipfile + with pytest.raises(shapefile.ShapefileException): + with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip") as sf: + pass + + # test specifying the path when reading multi-shapefile zipfile (with extension) + with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") as sf: + for recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + + # test specifying the path when reading multi-shapefile zipfile (without extension) + with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2") as sf: + for recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + + # test raising error when can't find shapefile inside zipfile + with pytest.raises(shapefile.ShapefileException): + with shapefile.Reader("shapefiles/empty_zipfile.zip") as sf: + pass + + +def test_reader_close(): + """ + Assert that manually calling Reader.close() + closes the shp, shx, and dbf files + on exit. + """ + # note uses an actual shapefile from + # the projects "shapefiles" directory + with shapefile.Reader("shapefiles/blockgroups") as sf: + pass + + assert sf.shp.closed is True + assert sf.dbf.closed is True + assert sf.shx.closed is True + + +def test_reader_shapefile_type(): + """ + Assert that the type of the shapefile + is returned correctly. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + assert sf.shapeType is 5 # 5 means Polygon + assert sf.shapeType is shapefile.POLYGON + assert sf.shapeTypeName is "POLYGON" + + +def test_reader_shapefile_length(): + """ + Assert that the length the reader gives us + matches up with the number of records + in the file. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + assert len(sf) == len(sf.shapes()) + + +def test_shape_metadata(): + with shapefile.Reader("shapefiles/blockgroups") as sf: + shape = sf.shape(0) + assert shape.shapeType is 5 # Polygon + assert shape.shapeType is shapefile.POLYGON + assert sf.shapeTypeName is "POLYGON" + + +def test_reader_fields(): + """ + Assert that the reader's fields attribute + gives the shapefile's fields as a list. + Assert that each field has a name, + type, field length, and decimal length. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + fields = sf.fields + assert isinstance(fields, list) + + field = fields[0] + assert isinstance(field[0], str) # field name + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type + assert isinstance(field[2], int) # field length + assert isinstance(field[3], int) # decimal length + + +def test_reader_shapefile_extension_ignored(): + """ + Assert that the filename's extension is + ignored when reading a shapefile. + """ + base = "shapefiles/blockgroups" + ext = ".abc" + filename = base + ext + with shapefile.Reader(filename) as sf: + assert len(sf) == 663 + + # assert test.abc does not exist + assert not os.path.exists(filename) + + +def test_reader_filelike_dbf_only(): + """ + Assert that specifying just the + dbf argument to the shapefile reader + reads just the dbf file. + """ + with shapefile.Reader(dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: + assert len(sf) == 663 + record = sf.record(3) + assert record[1:3] == ['060750601001', 4715] + + +def test_reader_filelike_shp_shx_only(): + """ + Assert that specifying just the + shp and shx argument to the shapefile reader + reads just the shp and shx file. + """ + with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), shx=open("shapefiles/blockgroups.shx", "rb")) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) is 173 + + +def test_reader_filelike_shx_optional(): + """ + Assert that specifying just the + shp argument to the shapefile reader + reads just the shp file (shx optional). + """ + with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb")) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) is 173 + + +def test_reader_shapefile_delayed_load(): + """ + Assert that the filename's extension is + ignored when reading a shapefile. + """ + with shapefile.Reader() as sf: + # assert that data request raises exception, since no file has been provided yet + with pytest.raises(shapefile.ShapefileException): + sf.shape(0) + # assert that works after loading file manually + sf.load("shapefiles/blockgroups") + assert len(sf) == 663 + + +def test_records_match_shapes(): + """ + Assert that the number of records matches + the number of shapes in the shapefile. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + records = sf.records() + shapes = sf.shapes() + assert len(records) == len(shapes) + + +def test_record_attributes(fields=None): + """ + Assert that record retrieves all relevant values and can + be accessed as attributes and dictionary items. + """ + # note + # second element in fields matches first element + # in record because records dont have DeletionFlag + with shapefile.Reader("shapefiles/blockgroups") as sf: + for i in range(len(sf)): + # full record + full_record = sf.record(i) + # user-fetched record + if fields is not None: + # only a subset of fields + record = sf.record(i, fields=fields) + else: + # default all fields + record = full_record + fields = [field[0] for field in sf.fields[1:]] # fieldnames, sans del flag + # check correct length + assert len(record) == len(set(fields)) + # check record values (should be in same order as shapefile fields) + i = 0 + for field in sf.fields: + field_name = field[0] + if field_name in fields: + assert record[i] == record[field_name] == getattr(record, field_name) + i += 1 + + +def test_record_subfields(): + """ + Assert that reader correctly retrieves only a subset + of fields when specified. + """ + fields = ["AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + test_record_attributes(fields=fields) + + +def test_record_subfields_unordered(): + """ + Assert that reader correctly retrieves only a subset + of fields when specified, given in random order but + retrieved in the order of the shapefile fields. + """ + fields = sorted(["AREA","POP1990","MALES","FEMALES","MOBILEHOME"]) + test_record_attributes(fields=fields) + + +def test_record_subfields_delflag_notvalid(): + """ + Assert that reader does not consider DeletionFlag as a valid field name. + """ + fields = ["DeletionFlag","AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + with pytest.raises(ValueError): + test_record_attributes(fields=fields) + + +def test_record_subfields_duplicates(): + """ + Assert that reader correctly retrieves only a subset + of fields when specified, handling duplicate input fields. + """ + fields = ["AREA","AREA","AREA","MALES","MALES","MOBILEHOME"] + test_record_attributes(fields=fields) + # check that only 3 values + with shapefile.Reader("shapefiles/blockgroups") as sf: + rec = sf.record(0, fields=fields) + assert len(rec) == len(set(fields)) + + +def test_record_subfields_empty(): + """ + Assert that reader does not retrieve any fields when given + an empty list. + """ + fields = [] + test_record_attributes(fields=fields) + # check that only 0 values + with shapefile.Reader("shapefiles/blockgroups") as sf: + rec = sf.record(0, fields=fields) + assert len(rec) == 0 + + +def test_record_as_dict(): + """ + Assert that a record object can be converted + into a dictionary and data remains correct. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + record = sf.record(0) + as_dict = record.as_dict() + + assert len(record) == len(as_dict) + for key, value in as_dict.items(): + assert record[key] == value + + +def test_record_oid(): + """ + Assert that the record's oid attribute returns + its index in the shapefile. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + for i in range(len(sf)): + record = sf.record(i) + assert record.oid == i + + for i,record in enumerate(sf.records()): + assert record.oid == i + + for i,record in enumerate(sf.iterRecords()): + assert record.oid == i + + for i,shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.record.oid == i + + +def test_shape_oid(): + """ + Assert that the shape's oid attribute returns + its index in the shapefile. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + for i in range(len(sf)): + shape = sf.shape(i) + assert shape.oid == i + + for i,shape in enumerate(sf.shapes()): + assert shape.oid == i + + for i,shape in enumerate(sf.iterShapes()): + assert shape.oid == i + + for i,shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.shape.oid == i + + +def test_bboxfilter_shape(): + """ + Assert that applying the bbox filter to shape() correctly ignores the shape + if it falls outside, and returns it if inside. + """ + inside = [-122.4, 37.8, -122.35, 37.82] + outside = list(inside) + outside[0] *= 10 + outside[2] *= 10 + with shapefile.Reader("shapefiles/blockgroups") as sf: + assert sf.shape(0, bbox=inside) is not None + assert sf.shape(0, bbox=outside) is None + + +def test_bboxfilter_shapes(): + """ + Assert that applying the bbox filter to shapes() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shapes = sf.shapes(bbox=bbox) + # manually check bboxes + manual = shapefile.Shapes() + for shape in sf.iterShapes(): + if shapefile.bbox_overlap(shape.bbox, bbox): + manual.append(shape) + # compare + assert len(shapes) == len(manual) + # check that they line up + for shape,man in zip(shapes,manual): + assert shape.oid == man.oid + assert shape.__geo_interface__ == man.__geo_interface__ + + +def test_bboxfilter_shapes_outside(): + """ + Assert that applying the bbox filter to shapes() correctly returns + no shapes when the bbox is outside the entire shapefile. + """ + bbox = [-180, 89, -179, 90] + with shapefile.Reader("shapefiles/blockgroups") as sf: + shapes = sf.shapes(bbox=bbox) + assert len(shapes) == 0 + + +def test_bboxfilter_itershapes(): + """ + Assert that applying the bbox filter to iterShapes() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shapes = list(sf.iterShapes(bbox=bbox)) + # manually check bboxes + manual = shapefile.Shapes() + for shape in sf.iterShapes(): + if shapefile.bbox_overlap(shape.bbox, bbox): + manual.append(shape) + # compare + assert len(shapes) == len(manual) + # check that they line up + for shape,man in zip(shapes,manual): + assert shape.oid == man.oid + assert shape.__geo_interface__ == man.__geo_interface__ + + +def test_bboxfilter_shaperecord(): + """ + Assert that applying the bbox filter to shapeRecord() correctly ignores the shape + if it falls outside, and returns it if inside. + """ + inside = [-122.4, 37.8, -122.35, 37.82] + outside = list(inside) + outside[0] *= 10 + outside[2] *= 10 + with shapefile.Reader("shapefiles/blockgroups") as sf: + # inside + shaperec = sf.shapeRecord(0, bbox=inside) + assert shaperec is not None + assert shaperec.shape.oid == shaperec.record.oid + # outside + assert sf.shapeRecord(0, bbox=outside) is None + + +def test_bboxfilter_shaperecords(): + """ + Assert that applying the bbox filter to shapeRecords() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shaperecs = sf.shapeRecords(bbox=bbox) + # manually check bboxes + manual = shapefile.ShapeRecords() + for shaperec in sf.iterShapeRecords(): + if shapefile.bbox_overlap(shaperec.shape.bbox, bbox): + manual.append(shaperec) + # compare + assert len(shaperecs) == len(manual) + # check that they line up + for shaperec,man in zip(shaperecs,manual): + # oids + assert shaperec.shape.oid == shaperec.record.oid + # same shape as manual + assert shaperec.shape.oid == man.shape.oid + assert shaperec.shape.__geo_interface__ == man.shape.__geo_interface__ + # same record as manual + assert shaperec.record.oid == man.record.oid + assert shaperec.record == man.record + + +def test_bboxfilter_itershaperecords(): + """ + Assert that applying the bbox filter to iterShapeRecords() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shaperecs = list(sf.iterShapeRecords(bbox=bbox)) + # manually check bboxes + manual = shapefile.ShapeRecords() + for shaperec in sf.iterShapeRecords(): + if shapefile.bbox_overlap(shaperec.shape.bbox, bbox): + manual.append(shaperec) + # compare + assert len(shaperecs) == len(manual) + # check that they line up + for shaperec,man in zip(shaperecs,manual): + # oids + assert shaperec.shape.oid == shaperec.record.oid + # same shape as manual + assert shaperec.shape.oid == man.shape.oid + assert shaperec.shape.__geo_interface__ == man.shape.__geo_interface__ + # same record as manual + assert shaperec.record.oid == man.record.oid + assert shaperec.record == man.record + + +def test_shaperecords_shaperecord(): + """ + Assert that shapeRecords returns a list of + ShapeRecord objects. + Assert that shapeRecord returns a single + ShapeRecord at the given index. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + shaperecs = sf.shapeRecords() + shaperec = sf.shapeRecord(0) + should_match = shaperecs[0] + + # assert record is equal + assert shaperec.record == should_match.record + + # assert shape is equal + shaperec_json = shaperec.shape.__geo_interface__ + should_match_json = should_match.shape.__geo_interface__ + assert shaperec_json == should_match_json + + +def test_shaperecord_shape(): + """ + Assert that a ShapeRecord object has a shape + attribute that contains shape data. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + shaperec = sf.shapeRecord(3) + shape = shaperec.shape + point = shape.points[0] + assert len(point) is 2 + + +def test_shaperecord_record(): + """ + Assert that a ShapeRecord object has a record + attribute that contains record data. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + shaperec = sf.shapeRecord(3) + record = shaperec.record + + assert record[1:3] == ['060750601001', 4715] + + +def test_write_field_name_limit(tmpdir): + """ + Abc... + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(filename) as writer: + writer.field('a'*5, 'C') # many under length limit + writer.field('a'*9, 'C') # 1 under length limit + writer.field('a'*10, 'C') # at length limit + writer.field('a'*11, 'C') # 1 over length limit + writer.field('a'*20, 'C') # many over limit + + with shapefile.Reader(filename) as reader: + fields = reader.fields[1:] + assert len(fields[0][0]) == 5 + assert len(fields[1][0]) == 9 + assert len(fields[2][0]) == 10 + assert len(fields[3][0]) == 10 + assert len(fields[4][0]) == 10 + + +def test_write_shp_only(tmpdir): + """ + Assert that specifying just the + shp argument to the shapefile writer + creates just a shp file. + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(shp=filename) as writer: + pass + + # assert test.shp exists + assert os.path.exists(filename) + + # assert test.shx does not exist + assert not os.path.exists(tmpdir.join("test.shx").strpath) + + # assert test.dbf does not exist + assert not os.path.exists(tmpdir.join("test.dbf").strpath) + + +def test_write_shx_only(tmpdir): + """ + Assert that specifying just the + shx argument to the shapefile writer + creates just a shx file. + """ + filename = tmpdir.join("test.shx").strpath + with shapefile.Writer(shx=filename) as writer: + pass + + # assert test.shx exists + assert os.path.exists(filename) + + # assert test.shp does not exist + assert not os.path.exists(tmpdir.join("test.shp").strpath) + + # assert test.dbf does not exist + assert not os.path.exists(tmpdir.join("test.dbf").strpath) + + +def test_write_dbf_only(tmpdir): + """ + Assert that specifying just the + dbf argument to the shapefile writer + creates just a dbf file. + """ + filename = tmpdir.join("test.dbf").strpath + with shapefile.Writer(dbf=filename) as writer: + writer.field('field1', 'C') # required to create a valid dbf file + + # assert test.dbf exists + assert os.path.exists(filename) + + # assert test.shp does not exist + assert not os.path.exists(tmpdir.join("test.shp").strpath) + + # assert test.shx does not exist + assert not os.path.exists(tmpdir.join("test.shx").strpath) + + +def test_write_default_shp_shx_dbf(tmpdir): + """ + Assert that creating the shapefile writer without + specifying the shp, shx, or dbf arguments + creates a set of shp, shx, and dbf files. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(filename) as writer: + writer.field('field1', 'C') # required to create a valid dbf file + + # assert shp, shx, dbf files exist + assert os.path.exists(filename + ".shp") + assert os.path.exists(filename + ".shx") + assert os.path.exists(filename + ".dbf") + + +def test_write_shapefile_extension_ignored(tmpdir): + """ + Assert that the filename's extension is + ignored when creating a shapefile. + """ + base = "test" + ext = ".abc" + filename = tmpdir.join(base + ext).strpath + with shapefile.Writer(filename) as writer: + writer.field('field1', 'C') # required to create a valid dbf file + + # assert shp, shx, dbf files exist + basepath = tmpdir.join(base).strpath + assert os.path.exists(basepath + ".shp") + assert os.path.exists(basepath + ".shx") + assert os.path.exists(basepath + ".dbf") + + # assert test.abc does not exist + assert not os.path.exists(basepath + ext) + + +def test_write_record(tmpdir): + """ + Test that .record() correctly writes a record using either a list of *args + or a dict of **kwargs. + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(filename) as writer: + writer.autoBalance = True + + writer.field('one', 'C') # many under length limit + writer.field('two', 'C') # 1 under length limit + writer.field('three', 'C') # at length limit + writer.field('four', 'C') # 1 over length limit + + values = ['one','two','three','four'] + writer.record(*values) + writer.record(*values) + + valuedict = dict(zip(values, values)) + writer.record(**valuedict) + writer.record(**valuedict) + + with shapefile.Reader(filename) as reader: + for record in reader.iterRecords(): + assert record == values + + +def test_write_partial_record(tmpdir): + """ + Test that .record() correctly writes a partial record (given only some of the values) + using either a list of *args or a dict of **kwargs. Should fill in the gaps. + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(filename) as writer: + writer.autoBalance = True + + writer.field('one', 'C') # many under length limit + writer.field('two', 'C') # 1 under length limit + writer.field('three', 'C') # at length limit + writer.field('four', 'C') # 1 over length limit + + values = ['one','two'] + writer.record(*values) + writer.record(*values) + + valuedict = dict(zip(values, values)) + writer.record(**valuedict) + writer.record(**valuedict) + + with shapefile.Reader(filename) as reader: + expected = list(values) + expected.extend(['','']) + for record in reader.iterRecords(): + assert record == expected + + assert len(reader.records()) == 4 + + +def test_write_geojson(tmpdir): + """ + Assert that the output of geo interface can be written to json. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(filename) as w: + w.field('TEXT', 'C') + w.field('NUMBER', 'N') + w.field('DATE', 'D') + w.record('text', 123, datetime.date(1898,1,30)) + w.record('text', 123, [1998,1,30]) + w.record('text', 123, '19980130') + w.record('text', 123, '-9999999') # faulty date + w.record(None, None, None) + w.null() + w.null() + w.null() + w.null() + w.null() + + with shapefile.Reader(filename) as r: + for feat in r: + assert json.dumps(feat.__geo_interface__) + assert json.dumps(r.shapeRecords().__geo_interface__) + assert json.dumps(r.__geo_interface__) + +shape_types = [k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31] # exclude multipatch + +@pytest.mark.parametrize("shape_type", shape_types) +def test_write_empty_shapefile(tmpdir, shape_type): + """ + Assert that can write an empty shapefile, for all different shape types. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(filename, shapeType=shape_type) as w: + w.field('field1', 'C') # required to create a valid dbf file + + with shapefile.Reader(filename) as r: + assert r.shapeType == shape_type