diff -Nru python-fastparquet-2023.10.1/.github/workflows/main.yaml python-fastparquet-2024.2.0/.github/workflows/main.yaml
--- python-fastparquet-2023.10.1/.github/workflows/main.yaml	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/.github/workflows/main.yaml	2024-02-07 18:38:43.000000000 +0000
@@ -120,7 +120,7 @@
       - name: pip-install
         shell: bash -l {0}
         run: |
-          pip install 'Cython<3'
+          pip install Cython
           pip install hypothesis
           pip install pytest-localserver pytest-xdist pytest-asyncio
           pip install -e . --no-deps # Install fastparquet
diff -Nru python-fastparquet-2023.10.1/.github/workflows/test_wheel.yaml python-fastparquet-2024.2.0/.github/workflows/test_wheel.yaml
--- python-fastparquet-2023.10.1/.github/workflows/test_wheel.yaml	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/.github/workflows/test_wheel.yaml	2024-02-07 18:38:43.000000000 +0000
@@ -38,7 +38,7 @@
           fetch-depth: 0
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
@@ -50,7 +50,7 @@
 
       - name: Add msbuild to PATH
         if: runner.os == 'Windows'
-        uses: microsoft/setup-msbuild@v1.3
+        uses: microsoft/setup-msbuild@v2
 
       - name: delvewheel install
         if: runner.os == 'Windows'
@@ -58,7 +58,7 @@
           python -m pip install delvewheel cython
 
       - name: Build wheels
-        uses: joerick/cibuildwheel@v2.16.2
+        uses: joerick/cibuildwheel@v2.16.5
 
       - name: Install wheels
         shell: bash -l {0}
diff -Nru python-fastparquet-2023.10.1/.github/workflows/wheel.yml python-fastparquet-2024.2.0/.github/workflows/wheel.yml
--- python-fastparquet-2023.10.1/.github/workflows/wheel.yml	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/.github/workflows/wheel.yml	2024-02-07 18:38:43.000000000 +0000
@@ -32,9 +32,9 @@
           fetch-depth: 0
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Set up QEMU
         if: runner.os == 'Linux'
@@ -44,7 +44,7 @@
 
       - name: Add msbuild to PATH
         if: runner.os == 'Windows'
-        uses: microsoft/setup-msbuild@v1
+        uses: microsoft/setup-msbuild@v2
 
       - name: delvewheel install
         if: runner.os == 'Windows'
@@ -52,7 +52,7 @@
           python -m pip install delvewheel cython
 
       - name: Build wheels
-        uses: joerick/cibuildwheel@v2.16.2
+        uses: joerick/cibuildwheel@v2.16.5
 
       - uses: actions/upload-artifact@v3
         with:
@@ -87,9 +87,9 @@
           fetch-depth: 0
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Set up QEMU
         if: runner.os == 'Linux'
@@ -99,7 +99,7 @@
 
       - name: Add msbuild to PATH
         if: runner.os == 'Windows'
-        uses: microsoft/setup-msbuild@v1
+        uses: microsoft/setup-msbuild@v2
 
       - name: delvewheel install
         if: runner.os == 'Windows'
@@ -107,7 +107,7 @@
           python -m pip install delvewheel cython
 
       - name: Build wheels
-        uses: joerick/cibuildwheel@v2.16.2
+        uses: joerick/cibuildwheel@v2.16.5
 
       - uses: actions/upload-artifact@v3
         with:
@@ -142,9 +142,9 @@
           fetch-depth: 0
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Set up QEMU
         if: runner.os == 'Linux'
@@ -154,7 +154,7 @@
 
       - name: Add msbuild to PATH
         if: runner.os == 'Windows'
-        uses: microsoft/setup-msbuild@v1
+        uses: microsoft/setup-msbuild@v2
 
       - name: delvewheel install
         if: runner.os == 'Windows'
@@ -162,7 +162,7 @@
           python -m pip install delvewheel cython
 
       - name: Build wheels
-        uses: joerick/cibuildwheel@v2.16.2
+        uses: joerick/cibuildwheel@v2.16.5
 
       - uses: actions/upload-artifact@v3
         with:
@@ -197,9 +197,9 @@
             fetch-depth: 0
 
         - name: Setup Python
-          uses: actions/setup-python@v4
+          uses: actions/setup-python@v5
           with:
-            python-version: "3.11"
+            python-version: "3.12"
 
         - name: Set up QEMU
           if: runner.os == 'Linux'
@@ -209,7 +209,7 @@
 
         - name: Add msbuild to PATH
           if: runner.os == 'Windows'
-          uses: microsoft/setup-msbuild@v1
+          uses: microsoft/setup-msbuild@v2
 
         - name: delvewheel install
           if: runner.os == 'Windows'
@@ -217,7 +217,7 @@
             python -m pip install delvewheel cython
 
         - name: Build wheels
-          uses: joerick/cibuildwheel@v2.16.2
+          uses: joerick/cibuildwheel@v2.16.5
 
         - uses: actions/upload-artifact@v3
           with:
@@ -246,12 +246,12 @@
           fetch-depth: 0
 
       - name: Setup Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Build wheels
-        uses: joerick/cibuildwheel@v2.16.2
+        uses: joerick/cibuildwheel@v2.16.5
 
       - uses: actions/upload-artifact@v3
         with:
diff -Nru python-fastparquet-2023.10.1/debian/changelog python-fastparquet-2024.2.0/debian/changelog
--- python-fastparquet-2023.10.1/debian/changelog	2023-11-28 11:09:24.000000000 +0000
+++ python-fastparquet-2024.2.0/debian/changelog	2024-02-28 18:14:19.000000000 +0000
@@ -1,3 +1,12 @@
+python-fastparquet (2024.2.0-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Bug fix: "FTBFS: Python.h: No such file or directory", thanks to
+    Sebastian Ramacher (Closes: #1063598). 
+  * Use --with=numpy3 to fix missing dependency.
+
+ -- Roland Mas <lolando@debian.org>  Wed, 28 Feb 2024 19:14:19 +0100
+
 python-fastparquet (2023.10.1-2) unstable; urgency=medium
 
   * Source-only upload for migration to testing.
diff -Nru python-fastparquet-2023.10.1/debian/control python-fastparquet-2024.2.0/debian/control
--- python-fastparquet-2023.10.1/debian/control	2023-11-15 15:41:45.000000000 +0000
+++ python-fastparquet-2024.2.0/debian/control	2024-02-28 18:06:49.000000000 +0000
@@ -21,6 +21,7 @@
 	       python3-numpy,
 	       cython3,
 	       python3-dev,
+	       libpython3-all-dev,
 	       python3-pandas,
 	       python3-fsspec,
 	       python3-cramjam,
diff -Nru python-fastparquet-2023.10.1/debian/rules python-fastparquet-2024.2.0/debian/rules
--- python-fastparquet-2023.10.1/debian/rules	2023-11-15 15:38:06.000000000 +0000
+++ python-fastparquet-2024.2.0/debian/rules	2024-02-28 18:13:31.000000000 +0000
@@ -2,6 +2,6 @@
 
 export PYBUILD_NAME=fastparquet
 %:
-	dh $@ --with python3 --buildsystem=pybuild
+	dh $@ --with python3,numpy3 --buildsystem=pybuild
 
 override_dh_auto_test:
diff -Nru python-fastparquet-2023.10.1/fastparquet/__init__.py python-fastparquet-2024.2.0/fastparquet/__init__.py
--- python-fastparquet-2023.10.1/fastparquet/__init__.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/__init__.py	2024-02-07 18:38:43.000000000 +0000
@@ -1,8 +1,8 @@
 """parquet - read parquet files."""
 
-from ._version import __version__
-from .writer import write, update_file_custom_metadata
-from . import core, schema, converted_types, api
-from .api import ParquetFile
-from .util import ParquetException
+from fastparquet._version import __version__
+from fastparquet.writer import write, update_file_custom_metadata
+from fastparquet import core, schema, converted_types, api
+from fastparquet.api import ParquetFile
+from fastparquet.util import ParquetException
 
diff -Nru python-fastparquet-2023.10.1/fastparquet/api.py python-fastparquet-2024.2.0/fastparquet/api.py
--- python-fastparquet-2023.10.1/fastparquet/api.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/api.py	2024-02-07 18:38:43.000000000 +0000
@@ -6,16 +6,15 @@
 
 import numpy as np
 import fsspec
-from fastparquet.util import join_path
 import pandas as pd
 
-from . import core, schema, converted_types, encoding, dataframe, writer
-from . import parquet_thrift
-from .cencoding import ThriftObject, from_buffer
-from .json import json_decoder
-from .util import (default_open, default_remove, ParquetException, val_to_num,
+from fastparquet import core, schema, converted_types, encoding, dataframe, writer
+from fastparquet import parquet_thrift
+from fastparquet.cencoding import ThriftObject, from_buffer
+from fastparquet.json import json_decoder
+from fastparquet.util import (default_open, default_remove, ParquetException, val_to_num,
                    ops, ensure_bytes, ensure_str, check_column_names, metadata_from_many,
-                   ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION)
+                   ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION, join_path)
 
 
 # Find in names of partition files the integer matching "**part.*.parquet",
@@ -380,6 +379,9 @@
                 size = rg.num_rows
             df, assign = self.pre_allocate(
                     size, columns, categories, index)
+            if "PANDAS_ATTRS" in self.key_value_metadata:
+                import json
+                df.attrs = json.loads(self.key_value_metadata["PANDAS_ATTRS"])
             ret = True
         f = infile or self.open(fn, mode='rb')
 
@@ -765,6 +767,10 @@
             size = sum(rg.num_rows for rg in rgs)
             selected = [None] * len(rgs)  # just to fill zip, below
         df, views = self.pre_allocate(size, columns, categories, index, dtypes=dtypes)
+        if "PANDAS_ATTRS" in self.key_value_metadata:
+            import json
+            df.attrs = json.loads(self.key_value_metadata["PANDAS_ATTRS"])
+
         start = 0
         if self.file_scheme == 'simple':
             infile = self.open(self.fn, 'rb')
@@ -959,10 +965,11 @@
                         dt = md[col]["numpy_type"]
                     if tz is not None and tz.get(col, False):
                         z = dataframe.tz_to_dt_tz(tz[col])
-                        if PANDAS_VERSION.major >= 2:
-                            dt = pd.Series([], dtype=dt).dt.tz_convert(z).dtype
+                        dt_series = pd.Series([], dtype=dt)
+                        if PANDAS_VERSION.major >= 2 and dt_series.dt.tz is not None:
+                            dt = dt_series.dt.tz_convert(z).dtype
                         else:
-                            dt = pd.Series([], dtype=dt).dt.tz_localize(z).dtype
+                            dt = dt_series.dt.tz_localize(z).dtype
                     dtype[col] = dt
                 elif dt in converted_types.nullable:
                     if self.pandas_metadata:
diff -Nru python-fastparquet-2023.10.1/fastparquet/cencoding.pyx python-fastparquet-2024.2.0/fastparquet/cencoding.pyx
--- python-fastparquet-2023.10.1/fastparquet/cencoding.pyx	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/cencoding.pyx	2024-02-07 18:38:43.000000000 +0000
@@ -214,22 +214,30 @@
 
 
 cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth,
-                               NumpyIO o, uint64_t count, uint8_t itemsize=4):
+                               NumpyIO o, uint64_t count, uint8_t longval=0):
     cdef:
         uint64_t data = 0
-        int8_t stop = -bitwidth
+        int8_t left = 0
+        int8_t right = 0
         uint64_t mask = 0XFFFFFFFFFFFFFFFF >> (64 - bitwidth)
     while count > 0:
-        if stop < 0:
-            data = ((data & 0X00FFFFFFFFFFFFFF) << 8) | file_obj.read_byte()
-            stop += 8
+        if (left - right) < bitwidth:
+            data = data | (<uint64_t>file_obj.read_byte() << left)
+            left += 8
+        elif right > 8:
+            data >>= 8
+            left -= 8
+            right -= 8
         else:
-            o.write_int((data >> stop) & mask)
-            stop -= bitwidth
+            if longval:
+                o.write_long((data >> right) & mask)
+            else:
+                o.write_int((data >> right) & mask)
+            right += bitwidth
             count -= 1
 
 
-cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
+cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o, uint8_t longval=0):
     cdef:
         uint64_t block_size = read_unsigned_var_int(file_obj)
         uint64_t miniblock_per_block = read_unsigned_var_int(file_obj)
@@ -248,19 +256,27 @@
                 temp = o.loc
                 if count > 1:
                     # no more diffs if on last value
-                    delta_read_bitpacked(file_obj, bitwidth, o, values_per_miniblock, count)
+                    delta_read_bitpacked(file_obj, bitwidth, o, values_per_miniblock, longval)
                 o.loc = temp
                 for j in range(values_per_miniblock):
-                    temp = o.read_int()
-                    o.loc -= 4
-                    o.write_int(value)
+                    if longval:
+                        temp = o.read_long()
+                        o.loc -= 8
+                        o.write_long(value)
+                    else:
+                        temp = o.read_int()
+                        o.loc -= 4
+                        o.write_int(value)
                     value += min_delta + temp
                     count -= 1
                     if count <= 0:
                         return
             else:
                 for j in range(values_per_miniblock):
-                    o.write_int(value)
+                    if longval:
+                        o.write_long(value)
+                    else:
+                        o.write_int(value)
                     value += min_delta
                     count -= 1
                     if count <= 0:
@@ -372,6 +388,20 @@
         (<int32_t*> self.get_pointer())[0] = i
         self.loc += 4
 
+    cdef void write_long(self, int64_t i):
+        if self.nbytes - self.loc < 8:
+            return
+        (<int64_t*> self.get_pointer())[0] = i
+        self.loc += 8
+
+    cdef int64_t read_long(self):
+        cdef int64_t i
+        if self.nbytes - self.loc < 8:
+            return 0
+        i = (<int64_t*> self.get_pointer())[0]
+        self.loc += 8
+        return i
+
     cdef void write_many(self, char b, int32_t count):
         cdef int32_t i
         for i in range(count):
diff -Nru python-fastparquet-2023.10.1/fastparquet/compression.py python-fastparquet-2024.2.0/fastparquet/compression.py
--- python-fastparquet-2023.10.1/fastparquet/compression.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/compression.py	2024-02-07 18:38:43.000000000 +0000
@@ -1,7 +1,7 @@
 
 import cramjam
 import numpy as np
-from . import parquet_thrift
+from fastparquet import parquet_thrift
 
 # TODO: use stream/direct-to-buffer conversions instead of memcopy
 
diff -Nru python-fastparquet-2023.10.1/fastparquet/converted_types.py python-fastparquet-2024.2.0/fastparquet/converted_types.py
--- python-fastparquet-2023.10.1/fastparquet/converted_types.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/converted_types.py	2024-02-07 18:38:43.000000000 +0000
@@ -10,9 +10,9 @@
 import numpy as np
 import pandas as pd
 
-from . import parquet_thrift
-from .cencoding import time_shift
-from .json import json_decoder
+from fastparquet import parquet_thrift
+from fastparquet.cencoding import time_shift
+from fastparquet.json import json_decoder
 
 logger = logging.getLogger('parquet')  # pylint: disable=invalid-name
 
diff -Nru python-fastparquet-2023.10.1/fastparquet/core.py python-fastparquet-2024.2.0/fastparquet/core.py
--- python-fastparquet-2023.10.1/fastparquet/core.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/core.py	2024-02-07 18:38:43.000000000 +0000
@@ -1,17 +1,16 @@
-import warnings
 import numpy as np
 import pandas as pd
 
-from . import encoding
-from . encoding import read_plain
+from fastparquet import encoding
+from fastparquet.encoding import read_plain
 import fastparquet.cencoding as encoding
-from .compression import decompress_data, rev_map, decom_into
-from .converted_types import convert, simple, converts_inplace
-from .schema import _is_list_like, _is_map_like
-from .speedups import unpack_byte_array
-from . import parquet_thrift
-from .cencoding import ThriftObject, read_thrift
-from .util import val_to_num, ex_from_sep
+from fastparquet.compression import decompress_data, rev_map, decom_into
+from fastparquet.converted_types import convert, simple, converts_inplace
+from fastparquet.schema import _is_list_like, _is_map_like
+from fastparquet.speedups import unpack_byte_array
+from fastparquet import parquet_thrift
+from fastparquet.cencoding import ThriftObject
+from fastparquet.util import val_to_num
 
 
 def _read_page(file_obj, page_header, column_metadata):
@@ -157,13 +156,17 @@
                 o = encoding.NumpyIO(values)
                 encoding.read_rle_bit_packed_hybrid(
                     io_obj, bit_width, io_obj.len-io_obj.tell(), o=o, itemsize=1)
-            values = values.data[:nval]
+            if isinstance(values, np.ndarray):
+                values = values[:nval]
+            else:
+                values = values.data[:nval]
         else:
             values = np.zeros(nval, dtype=np.int8)
     elif daph.encoding == parquet_thrift.Encoding.DELTA_BINARY_PACKED:
-        values = np.empty(daph.num_values - num_nulls, dtype=np.int32)
+        values = np.empty(daph.num_values - num_nulls,
+                          dtype=np.int64 if metadata.type == 2 else np.int32)
         o = encoding.NumpyIO(values.view('uint8'))
-        encoding.delta_binary_unpack(io_obj, o)
+        encoding.delta_binary_unpack(io_obj, o, longval=metadata.type == 2)
     else:
         raise NotImplementedError('Encoding %s' % daph.encoding)
     return definition_levels, repetition_levels, values[:nval]
diff -Nru python-fastparquet-2023.10.1/fastparquet/dataframe.py python-fastparquet-2024.2.0/fastparquet/dataframe.py
--- python-fastparquet-2023.10.1/fastparquet/dataframe.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/dataframe.py	2024-02-07 18:38:43.000000000 +0000
@@ -11,7 +11,7 @@
 from pandas.core.arrays.masked import BaseMaskedDtype
 import warnings
 
-from .util import PANDAS_VERSION
+from fastparquet.util import PANDAS_VERSION
 
 
 class Dummy(object):
@@ -107,7 +107,7 @@
                 # funky pandas not-dtype
                 t = t.base
             if ("M" in str(t) or "time" in str(t)) and "[" not in str(t):
-                t = t + "[ns]"
+                t = str(t) + "[ns]"
             d = np.empty(0, dtype=t)
             if d.dtype.kind == "M" and str(col) in timezones:
                 try:
diff -Nru python-fastparquet-2023.10.1/fastparquet/encoding.py python-fastparquet-2024.2.0/fastparquet/encoding.py
--- python-fastparquet-2023.10.1/fastparquet/encoding.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/encoding.py	2024-02-07 18:38:43.000000000 +0000
@@ -1,8 +1,8 @@
 """encoding.py - methods for reading parquet encoded data blocks."""
 import numpy as np
-from .cencoding import read_bitpacked1, NumpyIO
-from .speedups import unpack_byte_array
-from . import parquet_thrift
+from fastparquet.cencoding import read_bitpacked1, NumpyIO
+from fastparquet.speedups import unpack_byte_array
+from fastparquet import parquet_thrift
 
 
 def read_plain_boolean(raw_bytes, count, out=None):
diff -Nru python-fastparquet-2023.10.1/fastparquet/parquet_thrift/__init__.py python-fastparquet-2024.2.0/fastparquet/parquet_thrift/__init__.py
--- python-fastparquet-2023.10.1/fastparquet/parquet_thrift/__init__.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/parquet_thrift/__init__.py	2024-02-07 18:38:43.000000000 +0000
@@ -4,5 +4,7 @@
 
 def __getattr__(name):
     # for compatability with coe that calls, e.g., parquet_thrift.RowGroup(...)
-    from ..cencoding import ThriftObject
-    return partial(ThriftObject.from_fields, thrift_name=name)
+    from fastparquet.cencoding import ThriftObject
+    if name[0].isupper():
+        return partial(ThriftObject.from_fields, thrift_name=name)
+    raise AttributeError(name)
diff -Nru python-fastparquet-2023.10.1/fastparquet/schema.py python-fastparquet-2024.2.0/fastparquet/schema.py
--- python-fastparquet-2023.10.1/fastparquet/schema.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/schema.py	2024-02-07 18:38:43.000000000 +0000
@@ -1,7 +1,7 @@
 """Utils for working with the parquet thrift models."""
 from collections import OrderedDict
 
-from . import parquet_thrift
+from fastparquet import parquet_thrift
 
 
 def schema_tree(schema, i=0):
diff -Nru python-fastparquet-2023.10.1/fastparquet/test/test_api.py python-fastparquet-2024.2.0/fastparquet/test/test_api.py
--- python-fastparquet-2023.10.1/fastparquet/test/test_api.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/test/test_api.py	2024-02-07 18:38:43.000000000 +0000
@@ -9,7 +9,7 @@
 import fsspec
 import numpy as np
 import pandas as pd
-from pandas._testing import makeMixedDataFrame
+from .util import makeMixedDataFrame
 try:
     from pandas.tslib import Timestamp
 except ImportError:
diff -Nru python-fastparquet-2023.10.1/fastparquet/test/test_encoding.py python-fastparquet-2024.2.0/fastparquet/test/test_encoding.py
--- python-fastparquet-2023.10.1/fastparquet/test/test_encoding.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/test/test_encoding.py	2024-02-07 18:38:43.000000000 +0000
@@ -165,8 +165,8 @@
     # one and only miniblock
     cencoding.encode_unsigned_varint(zigzag(-2), o)  # minimum delta (zigzag)
     o.write_byte(2)  # bit-width list (only one)
-    o.write_byte(0b00000011)  # [0, 0, 0, 3]
-    o.write_byte(0b11111100)  # [3, 3, 3, pad]
+    o.write_byte(0b11000000)  # rev([0, 0, 0, 3])
+    o.write_byte(0b00111111)  # rev([3, 3, 3, pad])
 
     o.seek(0)
 
diff -Nru python-fastparquet-2023.10.1/fastparquet/test/test_output.py python-fastparquet-2024.2.0/fastparquet/test/test_output.py
--- python-fastparquet-2023.10.1/fastparquet/test/test_output.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/test/test_output.py	2024-02-07 18:38:43.000000000 +0000
@@ -8,7 +8,7 @@
 from fastparquet import ParquetFile
 from fastparquet import write, parquet_thrift, update_file_custom_metadata
 from fastparquet import writer, encoding
-from pandas._testing import makeMixedDataFrame
+from .util import makeMixedDataFrame
 from pandas.testing import assert_frame_equal
 from pandas.api.types import CategoricalDtype
 import pytest
@@ -1206,3 +1206,13 @@
     df.to_parquet(path=fn, engine="fastparquet")
     df2 = pd.read_parquet(fn, engine="fastparquet")
     assert df.to_dict() == df2.to_dict()
+
+
+def test_attrs_roundtrip(tempdir):
+    fn = os.path.join(tempdir, "out.parq")
+    attrs = {"oi": 5}
+    df = pd.DataFrame({"A": np.array([[1.1, 1.2], [], None], dtype=object)})
+    df.attrs = attrs
+    df.to_parquet(path=fn, engine="fastparquet")
+    df2 = pd.read_parquet(fn, engine="fastparquet")
+    assert df2.attrs == attrs
diff -Nru python-fastparquet-2023.10.1/fastparquet/test/test_pd_optional_types.py python-fastparquet-2024.2.0/fastparquet/test/test_pd_optional_types.py
--- python-fastparquet-2023.10.1/fastparquet/test/test_pd_optional_types.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/test/test_pd_optional_types.py	2024-02-07 18:38:43.000000000 +0000
@@ -3,6 +3,7 @@
 import numpy as np
 import pandas as pd
 from pandas.testing import assert_frame_equal
+from pandas.core.arrays import IntegerArray
 import fastparquet as fp
 from .util import tempdir
 from fastparquet import write, parquet_thrift
@@ -10,61 +11,51 @@
 import numpy.random as random
 
 
-EXPECTED_SERIES_INT8 = pd.Series(random.uniform(low=-128, high=127,size=100)).round()
-EXPECTED_SERIES_INT16 = pd.Series(random.uniform(low=-32768, high=32767,size=100)).round()
-EXPECTED_SERIES_INT32 = pd.Series(random.uniform(low=-2147483648, high=2147483647,size=100)).round()
-EXPECTED_SERIES_INT64 = pd.Series(random.uniform(low=-9223372036854775808, high=9223372036854775807,size=100)).round()
-EXPECTED_SERIES_UINT8 = pd.Series(random.uniform(low=0, high=255,size=100)).round()
-EXPECTED_SERIES_UINT16 = pd.Series(random.uniform(low=0, high=65535,size=100)).round()
-EXPECTED_SERIES_UINT32 = pd.Series(random.uniform(low=0, high=4294967295,size=100)).round()
-EXPECTED_SERIES_UINT64 = pd.Series(random.uniform(low=0, high=18446744073709551615,size=100)).round()
-EXPECTED_SERIES_BOOL = pd.Series(random.choice([False, True], 100))
-EXPECTED_SERIES_STRING = pd.Series(random.choice([
+EXPECTED_SERIES_INT8 = random.uniform(low=-128, high=127, size=100).round()
+EXPECTED_SERIES_INT16 = random.uniform(low=-32768, high=32767, size=100).round()
+EXPECTED_SERIES_INT32 = random.uniform(low=-2147483648, high=2147483647, size=100).round()
+EXPECTED_SERIES_INT64 = random.uniform(low=-9223372036854775808, high=9223372036854775807, size=100).round()
+EXPECTED_SERIES_UINT8 = random.uniform(low=0, high=255, size=100).round()
+EXPECTED_SERIES_UINT16 = random.uniform(low=0, high=65535, size=100).round()
+EXPECTED_SERIES_UINT32 = random.uniform(low=0, high=4294967295, size=100).round()
+EXPECTED_SERIES_UINT64 = random.uniform(low=0, high=18446744073709551615, size=100).round()
+EXPECTED_SERIES_BOOL = random.choice([False, True], 100)
+EXPECTED_SERIES_STRING = random.choice([
     'You', 'are', 'my', 'fire', 
     'The', 'one', 'desire', 
     'Believe', 'when', 'I', 'say', 
     'I', 'want', 'it', 'that', 'way'
-    ], 100))
+    ], 100)
 
 
-EXPECTED_SERIES_INT8.loc[20:30] = np.nan
-EXPECTED_SERIES_INT16.loc[20:30] = np.nan
-EXPECTED_SERIES_INT32.loc[20:30] = np.nan
-EXPECTED_SERIES_INT64.loc[20:30] = np.nan
-EXPECTED_SERIES_UINT8.loc[20:30] = np.nan
-EXPECTED_SERIES_UINT16.loc[20:30] = np.nan
-EXPECTED_SERIES_UINT32.loc[20:30] = np.nan
-EXPECTED_SERIES_UINT64.loc[20:30] = np.nan
-EXPECTED_SERIES_BOOL.loc[20:30] = np.nan
-EXPECTED_SERIES_STRING.loc[20:30] = np.nan
+EXPECTED_SERIES_INT8[20:30] = np.nan
+EXPECTED_SERIES_INT16[20:30] = np.nan
+EXPECTED_SERIES_INT32[20:30] = np.nan
+EXPECTED_SERIES_INT64[20:30] = np.nan
+EXPECTED_SERIES_UINT8[20:30] = np.nan
+EXPECTED_SERIES_UINT16[20:30] = np.nan
+EXPECTED_SERIES_UINT32[20:30] = np.nan
+EXPECTED_SERIES_UINT64[20:30] = np.nan
+EXPECTED_SERIES_BOOL[20:30] = np.nan
+EXPECTED_SERIES_STRING[20:30] = np.nan
+mask = EXPECTED_SERIES_UINT64 > -1
 
 
 TEST = pd.DataFrame({
-    'int8': EXPECTED_SERIES_INT8.astype('Int8'),
-    'int16': EXPECTED_SERIES_INT16.astype('Int16'),
-    'int32': EXPECTED_SERIES_INT32.astype('Int32'),
-    'int64': EXPECTED_SERIES_INT64.astype('Int64'),
-    'uint8': EXPECTED_SERIES_UINT8.astype('UInt8'),
-    'uint16': EXPECTED_SERIES_UINT16.astype('UInt16'),
-    'uint32': EXPECTED_SERIES_UINT32.astype('UInt32'),
-    'uint64': EXPECTED_SERIES_UINT64.astype('UInt64'),
-    'bool': EXPECTED_SERIES_BOOL.astype('boolean'),
-    'string': EXPECTED_SERIES_STRING.astype('string')
+    'int8': pd.Series(pd.array(EXPECTED_SERIES_INT8, dtype='Int8')),
+    'int16': pd.Series(pd.array(EXPECTED_SERIES_INT16, dtype='Int16')),
+    'int32': pd.Series(pd.array(EXPECTED_SERIES_INT32, dtype='Int32')),
+    'int64': pd.Series(pd.array(EXPECTED_SERIES_INT64, dtype='Int64')),
+    'uint8': pd.Series(pd.array(EXPECTED_SERIES_UINT8, dtype='UInt8')),
+    'uint16': pd.Series(pd.array(EXPECTED_SERIES_UINT16, dtype='UInt16')),
+    'uint32': pd.Series(pd.array(EXPECTED_SERIES_UINT32, dtype='UInt32')),
+    'uint64': pd.Series(pd.array(EXPECTED_SERIES_UINT64, dtype='UInt64')),
+    'bool': pd.Series(pd.array(EXPECTED_SERIES_BOOL, dtype='boolean')),
+    'string': pd.Series(EXPECTED_SERIES_STRING, dtype='string')
 })
 
 
-EXPECTED = pd.DataFrame({
-    'int8': EXPECTED_SERIES_INT8.astype('float16'),
-    'int16': EXPECTED_SERIES_INT16.astype('float32'),
-    'int32': EXPECTED_SERIES_INT32.astype('float64'),
-    'int64': EXPECTED_SERIES_INT64.astype('float64'),
-    'uint8': EXPECTED_SERIES_UINT8.astype('float16'),
-    'uint16': EXPECTED_SERIES_UINT16.astype('float32'),
-    'uint32': EXPECTED_SERIES_UINT32.astype('float64'),
-    'uint64': EXPECTED_SERIES_UINT64.astype('float64'),
-    'bool': EXPECTED_SERIES_BOOL.astype('float16'),
-    'string': EXPECTED_SERIES_STRING
-})
+EXPECTED = TEST
 
 
 EXPECTED_PARQUET_TYPES = {
@@ -80,7 +71,8 @@
     'string': 'BYTE_ARRAY'
 }
 
-@pytest.mark.parametrize('comp', (None,'snappy', 'gzip'))
+
+@pytest.mark.parametrize('comp', (None, 'snappy', 'gzip'))
 @pytest.mark.parametrize('scheme', ('simple', 'hive'))
 def test_write_nullable_columns(tempdir, scheme, comp):
     fname = os.path.join(tempdir, 'test_write_nullable_columns.parquet')
diff -Nru python-fastparquet-2023.10.1/fastparquet/test/test_read.py python-fastparquet-2024.2.0/fastparquet/test/test_read.py
--- python-fastparquet-2023.10.1/fastparquet/test/test_read.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/test/test_read.py	2024-02-07 18:38:43.000000000 +0000
@@ -581,3 +581,8 @@
         "k_int": 1,
         "k_bool": True,
     }
+
+def test_reading_timezone():
+    fn = os.path.join(TEST_DATA, "test-timezone.parquet")
+    pf = fastparquet.ParquetFile(fn)
+    assert pf.dtypes['date'] == 'datetime64[ns, UTC]'
\ No newline at end of file
diff -Nru python-fastparquet-2023.10.1/fastparquet/test/util.py python-fastparquet-2024.2.0/fastparquet/test/util.py
--- python-fastparquet-2023.10.1/fastparquet/test/util.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/test/util.py	2024-02-07 18:38:43.000000000 +0000
@@ -4,6 +4,8 @@
 import tempfile
 import shutil
 
+import pandas as pd
+
 TEST_DATA = "test-data"
 
 port = 5555
@@ -76,3 +78,18 @@
     yield d
     if os.path.exists(d):
         shutil.rmtree(d, ignore_errors=True)
+
+
+
+def makeMixedDataFrame():
+    index = pd.Index(["a", "b", "c", "d", "e"], name="index")
+
+    data = {
+        "A": pd.Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float64"),
+        "B": pd.Series([0.0, 1.0, 0.0, 1.0, 0.0], dtype="float64"),
+        "C": pd.Series(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype='object'),
+        "D": pd.bdate_range("1/1/2009", periods=5),
+    }
+    return pd.DataFrame(data=data)
+
+
diff -Nru python-fastparquet-2023.10.1/fastparquet/thrift_structures.py python-fastparquet-2024.2.0/fastparquet/thrift_structures.py
--- python-fastparquet-2023.10.1/fastparquet/thrift_structures.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/thrift_structures.py	2024-02-07 18:38:43.000000000 +0000
@@ -1,2 +1,5 @@
-from . import parquet_thrift
-from .cencoding import ThriftObject
+from fastparquet import parquet_thrift
+from fastparquet.cencoding import ThriftObject
+
+
+__all__ = ["ThriftObject", "parquet_thrift"]
diff -Nru python-fastparquet-2023.10.1/fastparquet/util.py python-fastparquet-2024.2.0/fastparquet/util.py
--- python-fastparquet-2023.10.1/fastparquet/util.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/util.py	2024-02-07 18:38:43.000000000 +0000
@@ -14,8 +14,8 @@
 
 import fsspec
 
-from . import parquet_thrift
-from .cencoding import ThriftObject
+from fastparquet import parquet_thrift
+from fastparquet.cencoding import ThriftObject
 from fastparquet import __version__
 
 PANDAS_VERSION = Version(pd.__version__)
@@ -297,7 +297,7 @@
         Thrift object or parquet file which metadata is to update.
     custom_metadata : dict
         Key-value metadata to update in thrift object.
-        
+        The values must be strings or binary. To pass a dictionary, serialize it as json string then encode it in binary.
     Notes
     -----
     Key-value metadata are expected binary encoded. This function ensures it
@@ -305,6 +305,10 @@
     """
     kvm = (obj.key_value_metadata if isinstance(obj, ThriftObject)
            else obj.fmd.key_value_metadata)
+    
+    if kvm is None:
+        kvm = []
+
     # Spare list of keys.
     kvm_keys = [item.key for item in kvm]
     for key, value in custom_metadata.items():
diff -Nru python-fastparquet-2023.10.1/fastparquet/writer.py python-fastparquet-2024.2.0/fastparquet/writer.py
--- python-fastparquet-2023.10.1/fastparquet/writer.py	2023-10-26 18:42:23.000000000 +0000
+++ python-fastparquet-2024.2.0/fastparquet/writer.py	2024-02-07 18:38:43.000000000 +0000
@@ -11,19 +11,17 @@
 
 from fastparquet.util import join_path
 
-from . import parquet_thrift
-from .api import ParquetFile, partitions, part_ids
-from .compression import compress_data
-from .converted_types import tobson
-from .json import json_encoder
-from .util import (default_open, default_mkdirs, check_column_names,
+from fastparquet import parquet_thrift, __version__, cencoding
+from fastparquet.api import ParquetFile, partitions, part_ids
+from fastparquet.compression import compress_data
+from fastparquet.converted_types import tobson
+from fastparquet.json import json_encoder
+from fastparquet.util import (default_open, default_mkdirs, check_column_names,
                    created_by, get_column_metadata,
                    norm_col_name, path_string, reset_row_idx, get_fs,
                    update_custom_metadata)
-from . import __version__
-from .speedups import array_encode_utf8, pack_byte_array
-from . import cencoding
-from .cencoding import NumpyIO, ThriftObject, from_buffer
+from fastparquet.speedups import array_encode_utf8, pack_byte_array
+from fastparquet.cencoding import NumpyIO, ThriftObject, from_buffer
 from decimal import Decimal
 
 MARKER = b'PAR1'
@@ -1243,6 +1241,9 @@
     --------
     >>> fastparquet.write('myfile.parquet', df)  # doctest: +SKIP
     """
+    custom_metadata = custom_metadata or {}
+    if getattr(data, "attrs", None):
+        custom_metadata["PANDAS_ATTRS"] = json.dumps(data.attrs)
     if file_scheme not in ('simple', 'hive', 'drill'):
         raise ValueError( 'File scheme should be simple|hive|drill, not '
                          f'{file_scheme}.')
@@ -1305,7 +1306,7 @@
                             object_encoding=object_encoding,
                             times=times, index_cols=index_cols,
                             partition_cols=partition_on, cols_dtype=cols_dtype)
-        if custom_metadata is not None:
+        if custom_metadata:
             kvm = fmd.key_value_metadata or []
             kvm.extend(
                 [
@@ -1608,6 +1609,7 @@
         Local path to file.
     custom_metadata : dict
         Key-value metadata to update in thrift object.
+        The values must be strings or binary. To pass a dictionary, serialize it as json string then encode it in binary.
     is_metadata_file : bool, default None
         Define if target file is a pure metadata file, or is a parquet data
         file. If `None`, is set depending file name.
Binary files /tmp/tmp3gswtb4_/QHrXAuLxR9/python-fastparquet-2023.10.1/test-data/test-timezone.parquet and /tmp/tmp3gswtb4_/qCypudxk85/python-fastparquet-2024.2.0/test-data/test-timezone.parquet differ