diff -Nru pgloader-3.6.7/bootstrap-debian.sh pgloader-3.6.9/bootstrap-debian.sh --- pgloader-3.6.7/bootstrap-debian.sh 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/bootstrap-debian.sh 2022-10-24 11:12:05.000000000 +0000 @@ -27,8 +27,8 @@ sudo add-apt-repository 'deb http://mirrors.linsrv.net/mariadb/repo/10.0/debian wheezy main' sudo apt-get update -sudo apt-get install -y postgresql-9.3 postgresql-contrib-9.3 \ - postgresql-9.3-ip4r \ +sudo apt-get install -y postgresql-15 \ + postgresql-15-ip4r \ sbcl \ git patch unzip \ devscripts pandoc \ diff -Nru pgloader-3.6.7/debian/changelog pgloader-3.6.9/debian/changelog --- pgloader-3.6.7/debian/changelog 2022-08-13 08:32:41.000000000 +0000 +++ pgloader-3.6.9/debian/changelog 2022-10-24 10:58:09.000000000 +0000 @@ -1,3 +1,17 @@ +pgloader (3.6.9-1) unstable; urgency=medium + + * New upstream version. + * Bump ip4r dependencies to 15. (Closes: #1022296) + + -- Christoph Berg Mon, 24 Oct 2022 12:58:09 +0200 + +pgloader (3.6.8-1) unstable; urgency=medium + + * New upstream version. + * Depend on libsqlite3-0. + + -- Christoph Berg Mon, 26 Sep 2022 14:24:02 +0200 + pgloader (3.6.7-1) unstable; urgency=medium * New upstream version: diff -Nru pgloader-3.6.7/debian/control pgloader-3.6.9/debian/control --- pgloader-3.6.7/debian/control 2022-06-26 21:24:58.000000000 +0000 +++ pgloader-3.6.9/debian/control 2022-10-24 10:56:13.000000000 +0000 @@ -2,7 +2,8 @@ Section: database Priority: optional Maintainer: Dimitri Fontaine -Uploaders: Christoph Berg +Uploaders: + Christoph Berg , Build-Depends: buildapp (>= 1.5), cl-abnf, @@ -49,19 +50,25 @@ debhelper-compat (= 13), gawk, help2man, - postgresql-14-ip4r | postgresql-ip4r , - python3-sphinx-rtd-theme, + libsqlite3-dev, + postgresql-15-ip4r | postgresql-ip4r , python3-sphinx, + python3-sphinx-rtd-theme, sbcl (>= 1.1.13), tzdata, -Standards-Version: 4.5.0 +Standards-Version: 4.6.0 Homepage: https://github.com/dimitri/pgloader Vcs-Git: https://github.com/dimitri/pgloader.git Vcs-Browser: https://github.com/dimitri/pgloader Package: pgloader Architecture: any -Depends: freetds-dev, ${misc:Depends}, ${shlibs:Depends}, ${ssl:Depends} +Depends: + freetds-dev, + ${misc:Depends}, + ${shlibs:Depends}, + ${sqlite:Depends}, + ${ssl:Depends}, Description: extract, transform and load data into PostgreSQL pgloader imports data from different kind of sources and COPY it into PostgreSQL. diff -Nru pgloader-3.6.7/debian/rules pgloader-3.6.9/debian/rules --- pgloader-3.6.7/debian/rules 2022-08-12 20:57:51.000000000 +0000 +++ pgloader-3.6.9/debian/rules 2022-10-24 10:58:09.000000000 +0000 @@ -2,6 +2,8 @@ include /usr/share/dpkg/pkg-info.mk +# get libsqlite3 package name from libsqlite3-dev +LIBSQLITE := $(shell dpkg-query --showformat='$${Depends}' --show libsqlite3-dev | grep -o 'libsqlite[^ ]*') # make pgloader depend on the libssl package cl-plus-ssl depends on LIBSSL := $(shell dpkg-query --showformat='$${Depends}' --show cl-plus-ssl | grep -o 'libssl[^ ]*') @@ -12,6 +14,11 @@ SIZE=16384 endif +MAKEFILE_VERSION = $(shell awk '/^VERSION/ { print $$3 }' Makefile) +DOC_VERSION = $(shell awk '/^release/ { print $$3 }' docs/conf.py | tr -d "'") +SPECFILE_VERSION = $(shell awk '/^Version/ { print $$2 }' pgloader.spec) +DEBIAN_VERSION = $(shell dpkg-parsechangelog -SVersion | cut -d- -f 1) + # buildd provides a build environment where $HOME is not writable, but the # CL compilers here will need to fill-in a per-user cache export HOME = $(CURDIR)/debian/home @@ -19,6 +26,10 @@ override_dh_auto_clean: dh_auto_clean rm -rf debian/home + # sanity checks on version number + [ "$(MAKEFILE_VERSION)" = "$(DOC_VERSION)" ] # Makefile = docs/conf.py version + [ "$(MAKEFILE_VERSION)" = "$(SPECFILE_VERSION)" ] # Makefile = pgloader.spec version + [ "$(MAKEFILE_VERSION)" = "$(DEBIAN_VERSION)" ] # Makefile = debian/changelog version override_dh_auto_build-indep: # do nothing @@ -46,6 +57,7 @@ || echo $$? > buildapp.fail cat buildapp.log test ! -f buildapp.fail + ls -l build/bin/pgloader $(MAKE) -C docs html override_dh_auto_test: @@ -64,7 +76,9 @@ debian/pgloader/usr/share/man/man1/pgloader.1 override_dh_gencontrol: - dh_gencontrol -- -V"ssl:Depends=$(LIBSSL)" + dh_gencontrol -- \ + -V"sqlite:Depends=$(LIBSQLITE)" \ + -V"ssl:Depends=$(LIBSSL)" %: dh $@ diff -Nru pgloader-3.6.7/debian/tests/control pgloader-3.6.9/debian/tests/control --- pgloader-3.6.7/debian/tests/control 2022-06-26 10:20:58.000000000 +0000 +++ pgloader-3.6.9/debian/tests/control 2022-10-24 10:56:13.000000000 +0000 @@ -1,7 +1,13 @@ -Depends: pgloader, ca-certificates, cl-postmodern, postgresql +Depends: + ca-certificates, + cl-postmodern, + pgloader, + postgresql, Tests: ssl Restrictions: allow-stderr, needs-root -Depends: pgloader, postgresql-14-ip4r | postgresql-ip4r +Depends: + pgloader, + postgresql-15-ip4r | postgresql-ip4r, Tests: testsuite Restrictions: allow-stderr diff -Nru pgloader-3.6.7/Dockerfile.ccl pgloader-3.6.9/Dockerfile.ccl --- pgloader-3.6.7/Dockerfile.ccl 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/Dockerfile.ccl 2022-10-24 11:12:05.000000000 +0000 @@ -21,7 +21,7 @@ cl-babel \ && rm -rf /var/lib/apt/lists/* - RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.11.5/ccl-1.11.5-linuxx86.tar.gz \ + RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.12/ccl-1.12-linuxx86.tar.gz \ | tar xz -C /usr/local/src/ \ && mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl diff -Nru pgloader-3.6.7/docs/batches.rst pgloader-3.6.9/docs/batches.rst --- pgloader-3.6.7/docs/batches.rst 1970-01-01 00:00:00.000000000 +0000 +++ pgloader-3.6.9/docs/batches.rst 2022-10-24 11:12:05.000000000 +0000 @@ -0,0 +1,123 @@ +Batch Processing +================ + +To load data to PostgreSQL, pgloader uses the `COPY` streaming protocol. +While this is the faster way to load data, `COPY` has an important drawback: +as soon as PostgreSQL emits an error with any bit of data sent to it, +whatever the problem is, the whole data set is rejected by PostgreSQL. + +To work around that, pgloader cuts the data into *batches* of 25000 rows +each, so that when a problem occurs it's only impacting that many rows of +data. Each batch is kept in memory while the `COPY` streaming happens, in +order to be able to handle errors should some happen. + +When PostgreSQL rejects the whole batch, pgloader logs the error message +then isolates the bad row(s) from the accepted ones by retrying the batched +rows in smaller batches. To do that, pgloader parses the *CONTEXT* error +message from the failed COPY, as the message contains the line number where +the error was found in the batch, as in the following example:: + + CONTEXT: COPY errors, line 3, column b: "2006-13-11" + +Using that information, pgloader will reload all rows in the batch before +the erroneous one, log the erroneous one as rejected, then try loading the +remaining of the batch in a single attempt, which may or may not contain +other erroneous data. + +At the end of a load containing rejected rows, you will find two files in +the *root-dir* location, under a directory named the same as the target +database of your setup. The filenames are the target table, and their +extensions are `.dat` for the rejected data and `.log` for the file +containing the full PostgreSQL client side logs about the rejected data. + +The `.dat` file is formatted in PostgreSQL the text COPY format as documented +in `http://www.postgresql.org/docs/9.2/static/sql-copy.html#AEN66609`. + +It is possible to use the following WITH options to control pgloader batch +behavior: + + - *on error stop*, *on error resume next* + + This option controls if pgloader is using building batches of data at + all. The batch implementation allows pgloader to recover errors by + sending the data that PostgreSQL accepts again, and by keeping away the + data that PostgreSQL rejects. + + To enable retrying the data and loading the good parts, use the option + *on error resume next*, which is the default to file based data loads + (such as CSV, IXF or DBF). + + When migrating from another RDMBS technology, it's best to have a + reproducible loading process. In that case it's possible to use *on + error stop* and fix either the casting rules, the data transformation + functions or in cases the input data until your migration runs through + completion. That's why *on error resume next* is the default for SQLite, + MySQL and MS SQL source kinds. + +A Note About Performance +------------------------ + +pgloader has been developed with performance in mind, to be able to cope +with ever growing needs in loading large amounts of data into PostgreSQL. + +The basic architecture it uses is the old Unix pipe model, where a thread is +responsible for loading the data (reading a CSV file, querying MySQL, etc) +and fills pre-processed data into a queue. Another threads feeds from the +queue, apply some more *transformations* to the input data and stream the +end result to PostgreSQL using the COPY protocol. + +When given a file that the PostgreSQL `COPY` command knows how to parse, and +if the file contains no erroneous data, then pgloader will never be as fast +as just using the PostgreSQL `COPY` command. + +Note that while the `COPY` command is restricted to read either from its +standard input or from a local file on the server's file system, the command +line tool `psql` implements a `\copy` command that knows how to stream a +file local to the client over the network and into the PostgreSQL server, +using the same protocol as pgloader uses. + +A Note About Parallelism +------------------------ + +pgloader uses several concurrent tasks to process the data being loaded: + + - a reader task reads the data in and pushes it to a queue, + + - at last one write task feeds from the queue and formats the raw into the + PostgreSQL COPY format in batches (so that it's possible to then retry a + failed batch without reading the data from source again), and then sends + the data to PostgreSQL using the COPY protocol. + +The parameter *workers* allows to control how many worker threads are +allowed to be active at any time (that's the parallelism level); and the +parameter *concurrency* allows to control how many tasks are started to +handle the data (they may not all run at the same time, depending on the +*workers* setting). + +We allow *workers* simultaneous workers to be active at the same time in the +context of a single table. A single unit of work consist of several kinds of +workers: + + - a reader getting raw data from the source, + - N writers preparing and sending the data down to PostgreSQL. + +The N here is setup to the *concurrency* parameter: with a *CONCURRENCY* of +2, we start (+ 1 2) = 3 concurrent tasks, with a *concurrency* of 4 we start +(+ 1 4) = 5 concurrent tasks, of which only *workers* may be active +simultaneously. + +The defaults are `workers = 4, concurrency = 1` when loading from a database +source, and `workers = 8, concurrency = 2` when loading from something else +(currently, a file). Those defaults are arbitrary and waiting for feedback +from users, so please consider providing feedback if you play with the +settings. + +As the `CREATE INDEX` threads started by pgloader are only waiting until +PostgreSQL is done with the real work, those threads are *NOT* counted into +the concurrency levels as detailed here. + +By default, as many `CREATE INDEX` threads as the maximum number of indexes +per table are found in your source schema. It is possible to set the `max +parallel create index` *WITH* option to another number in case there's just +too many of them to create. + diff -Nru pgloader-3.6.7/docs/command.rst pgloader-3.6.9/docs/command.rst --- pgloader-3.6.7/docs/command.rst 1970-01-01 00:00:00.000000000 +0000 +++ pgloader-3.6.9/docs/command.rst 2022-10-24 11:12:05.000000000 +0000 @@ -0,0 +1,380 @@ +Command Syntax +============== + +pgloader implements a Domain Specific Language allowing to setup complex +data loading scripts handling computed columns and on-the-fly sanitization +of the input data. For more complex data loading scenarios, you will be +required to learn that DSL's syntax. It's meant to look familiar to DBA by +being inspired by SQL where it makes sense, which is not that much after +all. + +The pgloader commands follow the same global grammar rules. Each of them +might support only a subset of the general options and provide specific +options. + +:: + + LOAD + FROM + [ HAVING FIELDS ] + INTO + [ TARGET TABLE [ "" ]."" ] + [ TARGET COLUMNS ] + + [ WITH ] + + [ SET ] + + [ BEFORE LOAD [ DO | EXECUTE ] ... ] + [ AFTER LOAD [ DO | EXECUTE ] ... ] + ; + +The main clauses are the `LOAD`, `FROM`, `INTO` and `WITH` clauses that each +command implements. Some command then implement the `SET` command, or some +specific clauses such as the `CAST` clause. + +.. _common_clauses: + +Command Clauses +--------------- + +The pgloader command syntax allows composing CLAUSEs together. Some clauses +are specific to the FROM source-type, most clauses are always available. + +FROM +---- + +The *FROM* clause specifies where to read the data from, and each command +introduces its own variant of sources. For instance, the *CSV* source +supports `inline`, `stdin`, a filename, a quoted filename, and a *FILENAME +MATCHING* clause (see above); whereas the *MySQL* source only supports a +MySQL database URI specification. + +INTO +---- + +The PostgreSQL connection URI must contains the name of the target table +where to load the data into. That table must have already been created in +PostgreSQL, and the name might be schema qualified. + +Then *INTO* option also supports an optional comma separated list of target +columns, which are either the name of an input *field* or the white space +separated list of the target column name, its PostgreSQL data type and a +*USING* expression. + +The *USING* expression can be any valid Common Lisp form and will be read +with the current package set to `pgloader.transforms`, so that you can use +functions defined in that package, such as functions loaded dynamically with +the `--load` command line parameter. + +Each *USING* expression is compiled at runtime to native code. + +This feature allows pgloader to load any number of fields in a CSV file into +a possibly different number of columns in the database, using custom code +for that projection. + +WITH +---- + +Set of options to apply to the command, using a global syntax of either: + + - *key = value* + - *use option* + - *do not use option* + +See each specific command for details. + +All data sources specific commands support the following options: + + - *on error stop*, *on error resume next* + - *batch rows = R* + - *batch size = ... MB* + - *prefetch rows = ...* + +See the section BATCH BEHAVIOUR OPTIONS for more details. + +In addition, the following settings are available: + + - *workers = W* + - *concurrency = C* + - *max parallel create index = I* + +See section A NOTE ABOUT PARALLELISM for more details. + +SET +--- + +This clause allows to specify session parameters to be set for all the +sessions opened by pgloader. It expects a list of parameter name, the equal +sign, then the single-quoted value as a comma separated list. + +The names and values of the parameters are not validated by pgloader, they +are given as-is to PostgreSQL. + +BEFORE LOAD DO +-------------- + +You can run SQL queries against the database before loading the data from +the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT EXISTS` so +that the data can be loaded. + +Each command must be *dollar-quoted*: it must begin and end with a double +dollar sign, `$$`. Dollar-quoted queries are then comma separated. No extra +punctuation is expected after the last SQL query. + +BEFORE LOAD EXECUTE +------------------- + +Same behaviour as in the *BEFORE LOAD DO* clause. Allows you to read the SQL +queries from a SQL file. Implements support for PostgreSQL dollar-quoting +and the `\i` and `\ir` include facilities as in `psql` batch mode (where +they are the same thing). + +AFTER LOAD DO +------------- + +Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that +section are executed once the load is done. That's the right time to create +indexes and constraints, or re-enable triggers. + +AFTER LOAD EXECUTE +------------------ + +Same behaviour as in the *AFTER LOAD DO* clause. Allows you to read the SQL +queries from a SQL file. Implements support for PostgreSQL dollar-quoting +and the `\i` and `\ir` include facilities as in `psql` batch mode (where +they are the same thing). + +AFTER CREATE SCHEMA DO +---------------------- + +Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that +section are executed once the schema has been created by pgloader, and +before the data is loaded. It's the right time to ALTER TABLE or do some +custom implementation on-top of what pgloader does, like maybe partitioning. + +AFTER CREATE SCHEMA EXECUTE +--------------------------- + +Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read +the SQL queries from a SQL file. Implements support for PostgreSQL +dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch +mode (where they are the same thing). + +Connection String +----------------- + +The `` parameter is expected to be given as a *Connection URI* +as documented in the PostgreSQL documentation at +http://www.postgresql.org/docs/9.3/static/libpq-connect.html#LIBPQ-CONNSTRING. + +:: + + postgresql://[user[:password]@][netloc][:port][/dbname][?option=value&...] + +Where: + + - *user* + + Can contain any character, including colon (`:`) which must then be + doubled (`::`) and at-sign (`@`) which must then be doubled (`@@`). + + When omitted, the *user* name defaults to the value of the `PGUSER` + environment variable, and if it is unset, the value of the `USER` + environment variable. + + - *password* + + Can contain any character, including the at sign (`@`) which must then + be doubled (`@@`). To leave the password empty, when the *user* name + ends with at at sign, you then have to use the syntax user:@. + + When omitted, the *password* defaults to the value of the `PGPASSWORD` + environment variable if it is set, otherwise the password is left + unset. + + When no *password* is found either in the connection URI nor in the + environment, then pgloader looks for a `.pgpass` file as documented at + https://www.postgresql.org/docs/current/static/libpq-pgpass.html. The + implementation is not that of `libpq` though. As with `libpq` you can + set the environment variable `PGPASSFILE` to point to a `.pgpass` file, + and pgloader defaults to `~/.pgpass` on unix like systems and + `%APPDATA%\postgresql\pgpass.conf` on windows. Matching rules and syntax + are the same as with `libpq`, refer to its documentation. + + - *netloc* + + Can be either a hostname in dotted notation, or an ipv4, or an Unix + domain socket path. Empty is the default network location, under a + system providing *unix domain socket* that method is preferred, otherwise + the *netloc* default to `localhost`. + + It's possible to force the *unix domain socket* path by using the syntax + `unix:/path/to/where/the/socket/file/is`, so to force a non default + socket path and a non default port, you would have: + + postgresql://unix:/tmp:54321/dbname + + The *netloc* defaults to the value of the `PGHOST` environment + variable, and if it is unset, to either the default `unix` socket path + when running on a Unix system, and `localhost` otherwise. + + Socket path containing colons are supported by doubling the colons + within the path, as in the following example: + + postgresql://unix:/tmp/project::region::instance:5432/dbname + + - *dbname* + + Should be a proper identifier (letter followed by a mix of letters, + digits and the punctuation signs comma (`,`), dash (`-`) and underscore + (`_`). + + When omitted, the *dbname* defaults to the value of the environment + variable `PGDATABASE`, and if that is unset, to the *user* value as + determined above. + + - *options* + + The optional parameters must be supplied with the form `name=value`, and + you may use several parameters by separating them away using an + ampersand (`&`) character. + + Only some options are supported here, *tablename* (which might be + qualified with a schema name) *sslmode*, *host*, *port*, *dbname*, + *user* and *password*. + + The *sslmode* parameter values can be one of `disable`, `allow`, + `prefer` or `require`. + + For backward compatibility reasons, it's possible to specify the + *tablename* option directly, without spelling out the `tablename=` + parts. + + The options override the main URI components when both are given, and + using the percent-encoded option parameters allow using passwords + starting with a colon and bypassing other URI components parsing + limitations. + +Regular Expressions +------------------- + +Several clauses listed in the following accept *regular expressions* with +the following input rules: + + - A regular expression begins with a tilde sign (`~`), + + - is then followed with an opening sign, + + - then any character is allowed and considered part of the regular + expression, except for the closing sign, + + - then a closing sign is expected. + +The opening and closing sign are allowed by pair, here's the complete list +of allowed delimiters:: + + ~// + ~[] + ~{} + ~() + ~<> + ~"" + ~'' + ~|| + ~## + +Pick the set of delimiters that don't collide with the *regular expression* +you're trying to input. If your expression is such that none of the +solutions allow you to enter it, the places where such expressions are +allowed should allow for a list of expressions. + +Comments +-------- + +Any command may contain comments, following those input rules: + + - the `--` delimiter begins a comment that ends with the end of the + current line, + + - the delimiters `/*` and `*/` respectively start and end a comment, which + can be found in the middle of a command or span several lines. + +Any place where you could enter a *whitespace* will accept a comment too. + +Batch behaviour options +----------------------- + +All pgloader commands have support for a *WITH* clause that allows for +specifying options. Some options are generic and accepted by all commands, +such as the *batch behaviour options*, and some options are specific to a +data source kind, such as the CSV *skip header* option. + +The global batch behaviour options are: + + - *batch rows* + + Takes a numeric value as argument, used as the maximum number of rows + allowed in a batch. The default is `25 000` and can be changed to try + having better performance characteristics or to control pgloader memory + usage; + + - *batch size* + + Takes a memory unit as argument, such as *20 MB*, its default value. + Accepted multipliers are *kB*, *MB*, *GB*, *TB* and *PB*. The case is + important so as not to be confused about bits versus bytes, we're only + talking bytes here. + + - *prefetch rows* + + Takes a numeric value as argument, defaults to `100000`. That's the + number of rows that pgloader is allowed to read in memory in each reader + thread. See the *workers* setting for how many reader threads are + allowed to run at the same time. + +Other options are specific to each input source, please refer to specific +parts of the documentation for their listing and covering. + +A batch is then closed as soon as either the *batch rows* or the *batch +size* threshold is crossed, whichever comes first. In cases when a batch has +to be closed because of the *batch size* setting, a *debug* level log +message is printed with how many rows did fit in the *oversized* batch. + +Templating with Mustache +------------------------ + +pgloader implements the https://mustache.github.io/ templating system so +that you may have dynamic parts of your commands. See the documentation for +this template system online. + +A specific feature of pgloader is the ability to fetch a variable from the +OS environment of the pgloader process, making it possible to run pgloader +as in the following example:: + + $ DBPATH=sqlite/sqlite.db pgloader ./test/sqlite-env.load + +or in several steps:: + + $ export DBPATH=sqlite/sqlite.db + $ pgloader ./test/sqlite-env.load + +The variable can then be used in a typical mustache fashion:: + + load database + from '{{DBPATH}}' + into postgresql:///pgloader; + +It's also possible to prepare a INI file such as the following:: + + [pgloader] + + DBPATH = sqlite/sqlite.db + +And run the following command, feeding the INI values as a *context* for +pgloader templating system:: + + $ pgloader --context ./test/sqlite.ini ./test/sqlite-ini.load + +The mustache templates implementation with OS environment support replaces +former `GETENV` implementation, which didn't work anyway. diff -Nru pgloader-3.6.7/docs/conf.py pgloader-3.6.9/docs/conf.py --- pgloader-3.6.7/docs/conf.py 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/conf.py 2022-10-24 11:12:05.000000000 +0000 @@ -21,62 +21,34 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) +# -- Project information ----------------------------------------------------- + +project = 'pgloader' +copyright = '2005-2022, Dimitri Fontaine' +author = 'Dimitri Fontaine' + +version = '3.6' +release = '3.6.9' # -- General configuration ------------------------------------------------ -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' +# The master toctree document. +master_doc = 'index' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.githubpages'] +extensions = [ +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = '.rst' -source_suffix = ['.rst', '.md'] - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'pgloader' -copyright = '2005-2022, Dimitri Fontaine' -author = 'Dimitri Fontaine' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '3.6' -# The full version, including alpha/beta/rc tags. -release = '3.6.7' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path +# This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - # -- Options for HTML output ---------------------------------------------- @@ -86,42 +58,10 @@ #html_theme = 'alabaster' html_theme = 'sphinx_rtd_theme' -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} -html_theme_options = { - #'github_user': 'dimitri', - #'github_repo': 'pgloader', - #'description': 'your migration companion', - #'travis_button': True, - #'show_related': True, - #'sidebar_collapse': False, -} - # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - '**': [ - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', - ] -} - - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'pgloaderdoc' +#html_static_path = ['_static'] # -- Options for LaTeX output --------------------------------------------- diff -Nru pgloader-3.6.7/docs/index.rst pgloader-3.6.9/docs/index.rst --- pgloader-3.6.7/docs/index.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/index.rst 2022-10-24 11:12:05.000000000 +0000 @@ -6,6 +6,14 @@ Welcome to pgloader's documentation! ==================================== +The `pgloader`__ project is an Open Source Software project. The development +happens at `https://github.com/dimitri/pgloader`__ and is public: everyone +is welcome to participate by opening issues, pull requests, giving feedback, +etc. + +__ https://github.com/dimitri/pgloader +__ https://github.com/dimitri/pgloader + pgloader loads data from various sources into PostgreSQL. It can transform the data it reads on the fly and submit raw SQL before and after the loading. It uses the `COPY` PostgreSQL protocol to stream the data into the @@ -238,28 +246,47 @@ implement the third step in a fully automated way. That's pgloader. .. toctree:: - :maxdepth: 2 - :caption: Table Of Contents: + :hidden: + :caption: Getting Started intro quickstart tutorial/tutorial install + bugreport + +.. toctree:: + :hidden: + :caption: Reference Manual + pgloader + command + batches + ref/transforms + +.. toctree:: + :hidden: + :caption: Manual for file formats + ref/csv ref/fixed ref/copy ref/dbf ref/ixf ref/archive + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Manual for Database Servers + ref/mysql ref/sqlite ref/mssql ref/pgsql ref/pgsql-citus-target ref/pgsql-redshift - ref/transforms - bugreport + Indices and tables ================== diff -Nru pgloader-3.6.7/docs/intro.rst pgloader-3.6.9/docs/intro.rst --- pgloader-3.6.7/docs/intro.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/intro.rst 2022-10-24 11:12:05.000000000 +0000 @@ -13,7 +13,9 @@ * CSV * Fixed Format + * Postgres COPY text format * DBF + * IXF * Databases diff -Nru pgloader-3.6.7/docs/pgloader.rst pgloader-3.6.9/docs/pgloader.rst --- pgloader-3.6.7/docs/pgloader.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/pgloader.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -PgLoader Reference Manual -========================= +Command Line +============ pgloader loads data from various sources into PostgreSQL. It can transform the data it reads on the fly and submit raw SQL before and @@ -59,87 +59,88 @@ Inquiry Options ^^^^^^^^^^^^^^^ -Use these options when you want to know more about how to use `pgloader`, as -those options will cause `pgloader` not to load any data. +Use these options when you want to know more about how to use pgloader, as +those options will cause pgloader not to load any data. - * `-h`, `--help` +--help Show command usage summary and exit. - * `-V`, `--version` +--version Show pgloader version string and exit. - * `-E`, `--list-encodings` +--with-encodings List known encodings in this version of pgloader. - * `-U`, `--upgrade-config` +--upgrade-config - Parse given files in the command line as `pgloader.conf` files with the - `INI` syntax that was in use in pgloader versions 2.x, and output the + Parse given files in the command line as ``pgloader.conf`` files with + the INI syntax that was in use in pgloader versions 2.x, and output the new command syntax for pgloader on standard output. General Options ^^^^^^^^^^^^^^^ -Those options are meant to tweak `pgloader` behavior when loading data. +Those options are meant to tweak pgloader behavior when loading data. - * `-v`, `--verbose` +--verbose Be verbose. - * `-q`, `--quiet` +--quiet Be quiet. - * `-d`, `--debug` +--debug Show debug level information messages. - * `-D`, `--root-dir` +--root-dir - Set the root working directory (default to "/tmp/pgloader"). + Set the root working directory (defaults to ``/tmp/pgloader``). - * `-L`, `--logfile` +--logfile - Set the pgloader log file (default to "/tmp/pgloader/pgloader.log"). + Set the pgloader log file (defaults to ``/tmp/pgloader/pgloader.log``). - * `--log-min-messages` +--log-min-messages Minimum level of verbosity needed for log message to make it to the logfile. One of critical, log, error, warning, notice, info or debug. - * `--client-min-messages` +--client-min-messages Minimum level of verbosity needed for log message to make it to the console. One of critical, log, error, warning, notice, info or debug. - * `-S`, `--summary` +--summary A filename where to copy the summary output. When relative, the filename - is expanded into `*root-dir*`. + is expanded into ``*root-dir*``. The format of the filename defaults to being *human readable*. It is + possible to have the output in machine friendly formats such as *CSV*, *COPY* (PostgreSQL's own COPY format) or *JSON* by specifying a filename - with the extension resp. `.csv`, `.copy` or `.json`. + with the extension resp. ``.csv``, ``.copy`` or ``.json``. - * `-l `, `--load-lisp-file ` +--load-lisp-file Specify a lisp to compile and load into the pgloader image before reading the commands, allowing to define extra transformation function. - Those functions should be defined in the `pgloader.transforms` package. - This option can appear more than once in the command line. + Those functions should be defined in the ``pgloader.transforms`` + package. This option can appear more than once in the command line. - * `--dry-run` +--dry-run - Allow testing a `.load` file without actually trying to load any data. + Allow testing a ``.load`` file without actually trying to load any data. It's useful to debug it until it's ok, in particular to fix connection strings. - * `--on-error-stop` +--on-error-stop Alter pgloader behavior: rather than trying to be smart about error handling and continue loading good data, separating away the bad one, @@ -147,14 +148,14 @@ debug data processing, transformation function and specific type casting. - * `--self-upgrade ` +--self-upgrade Specify a where to find pgloader sources so that one of the very first things it does is dynamically loading-in (and compiling to machine code) another version of itself, usually a newer one like a very recent git checkout. - * `--no-ssl-cert-verification` +--no-ssl-cert-verification Uses the OpenSSL option to accept a locally issued server-side certificate, avoiding the following error message:: @@ -169,53 +170,55 @@ Command Line Only Operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Those options are meant to be used when using `pgloader` from the command -line only, rather than using a command file and the rich command clauses and +Those options are meant to be used when using pgloader from the command line +only, rather than using a command file and the rich command clauses and parser. In simple cases, it can be much easier to use the *SOURCE* and *TARGET* directly on the command line, then tweak the loading with those options: - * `--with "option"` +--with
" ] - [ TARGET COLUMNS ] - - [ WITH ] - - [ SET ] - - [ BEFORE LOAD [ DO | EXECUTE ] ... ] - [ AFTER LOAD [ DO | EXECUTE ] ... ] - ; - -The main clauses are the `LOAD`, `FROM`, `INTO` and `WITH` clauses that each -command implements. Some command then implement the `SET` command, or some -specific clauses such as the `CAST` clause. - -Templating with Mustache ------------------------- - -pgloader implements the https://mustache.github.io/ templating system so -that you may have dynamic parts of your commands. See the documentation for -this template system online. - -A specific feature of pgloader is the ability to fetch a variable from the -OS environment of the pgloader process, making it possible to run pgloader -as in the following example:: - - $ DBPATH=sqlite/sqlite.db pgloader ./test/sqlite-env.load - -or in several steps:: - - $ export DBPATH=sqlite/sqlite.db - $ pgloader ./test/sqlite-env.load - -The variable can then be used in a typical mustache fashion:: - - load database - from '{{DBPATH}}' - into postgresql:///pgloader; - -It's also possible to prepare a INI file such as the following:: - - [pgloader] - - DBPATH = sqlite/sqlite.db - -And run the following command, feeding the INI values as a *context* for -pgloader templating system:: - - $ pgloader --context ./test/sqlite.ini ./test/sqlite-ini.load - -The mustache templates implementation with OS environment support replaces -former `GETENV` implementation, which didn't work anyway. - -.. _common_clauses: - -Common Clauses --------------- - -Some clauses are common to all commands: - -FROM -^^^^ - -The *FROM* clause specifies where to read the data from, and each command -introduces its own variant of sources. For instance, the *CSV* source -supports `inline`, `stdin`, a filename, a quoted filename, and a *FILENAME -MATCHING* clause (see above); whereas the *MySQL* source only supports a -MySQL database URI specification. - -INTO -^^^^ - -The PostgreSQL connection URI must contains the name of the target table -where to load the data into. That table must have already been created in -PostgreSQL, and the name might be schema qualified. - -Then *INTO* option also supports an optional comma separated list of target -columns, which are either the name of an input *field* or the white space -separated list of the target column name, its PostgreSQL data type and a -*USING* expression. - -The *USING* expression can be any valid Common Lisp form and will be read -with the current package set to `pgloader.transforms`, so that you can use -functions defined in that package, such as functions loaded dynamically with -the `--load` command line parameter. - -Each *USING* expression is compiled at runtime to native code. - -This feature allows pgloader to load any number of fields in a CSV file into -a possibly different number of columns in the database, using custom code -for that projection. - -WITH -^^^^ - -Set of options to apply to the command, using a global syntax of either: - - - *key = value* - - *use option* - - *do not use option* - -See each specific command for details. - -All data sources specific commands support the following options: - - - *on error stop*, *on error resume next* - - *batch rows = R* - - *batch size = ... MB* - - *prefetch rows = ...* - -See the section BATCH BEHAVIOUR OPTIONS for more details. - -In addition, the following settings are available: - - - *workers = W* - - *concurrency = C* - - *max parallel create index = I* - -See section A NOTE ABOUT PARALLELISM for more details. - -SET -^^^ - -This clause allows to specify session parameters to be set for all the -sessions opened by pgloader. It expects a list of parameter name, the equal -sign, then the single-quoted value as a comma separated list. - -The names and values of the parameters are not validated by pgloader, they -are given as-is to PostgreSQL. - -BEFORE LOAD DO -^^^^^^^^^^^^^^ - -You can run SQL queries against the database before loading the data from -the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT EXISTS` so -that the data can be loaded. - -Each command must be *dollar-quoted*: it must begin and end with a double -dollar sign, `$$`. Dollar-quoted queries are then comma separated. No extra -punctuation is expected after the last SQL query. - -BEFORE LOAD EXECUTE -^^^^^^^^^^^^^^^^^^^ - -Same behaviour as in the *BEFORE LOAD DO* clause. Allows you to read the SQL -queries from a SQL file. Implements support for PostgreSQL dollar-quoting -and the `\i` and `\ir` include facilities as in `psql` batch mode (where -they are the same thing). - -AFTER LOAD DO -^^^^^^^^^^^^^ - -Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that -section are executed once the load is done. That's the right time to create -indexes and constraints, or re-enable triggers. - -AFTER LOAD EXECUTE -^^^^^^^^^^^^^^^^^^ - -Same behaviour as in the *AFTER LOAD DO* clause. Allows you to read the SQL -queries from a SQL file. Implements support for PostgreSQL dollar-quoting -and the `\i` and `\ir` include facilities as in `psql` batch mode (where -they are the same thing). - -AFTER CREATE SCHEMA DO -^^^^^^^^^^^^^^^^^^^^^^ - -Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that -section are executed once the schema has been created by pgloader, and -before the data is loaded. It's the right time to ALTER TABLE or do some -custom implementation on-top of what pgloader does, like maybe partitioning. - -AFTER CREATE SCHEMA EXECUTE -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read -the SQL queries from a SQL file. Implements support for PostgreSQL -dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch -mode (where they are the same thing). - -Connection String -^^^^^^^^^^^^^^^^^ - -The `` parameter is expected to be given as a *Connection URI* -as documented in the PostgreSQL documentation at -http://www.postgresql.org/docs/9.3/static/libpq-connect.html#LIBPQ-CONNSTRING. - -:: - - postgresql://[user[:password]@][netloc][:port][/dbname][?option=value&...] - -Where: - - - *user* - - Can contain any character, including colon (`:`) which must then be - doubled (`::`) and at-sign (`@`) which must then be doubled (`@@`). - - When omitted, the *user* name defaults to the value of the `PGUSER` - environment variable, and if it is unset, the value of the `USER` - environment variable. - - - *password* - - Can contain any character, including the at sign (`@`) which must then - be doubled (`@@`). To leave the password empty, when the *user* name - ends with at at sign, you then have to use the syntax user:@. - - When omitted, the *password* defaults to the value of the `PGPASSWORD` - environment variable if it is set, otherwise the password is left - unset. - - When no *password* is found either in the connection URI nor in the - environment, then pgloader looks for a `.pgpass` file as documented at - https://www.postgresql.org/docs/current/static/libpq-pgpass.html. The - implementation is not that of `libpq` though. As with `libpq` you can - set the environment variable `PGPASSFILE` to point to a `.pgpass` file, - and pgloader defaults to `~/.pgpass` on unix like systems and - `%APPDATA%\postgresql\pgpass.conf` on windows. Matching rules and syntax - are the same as with `libpq`, refer to its documentation. - - - *netloc* - - Can be either a hostname in dotted notation, or an ipv4, or an Unix - domain socket path. Empty is the default network location, under a - system providing *unix domain socket* that method is preferred, otherwise - the *netloc* default to `localhost`. - - It's possible to force the *unix domain socket* path by using the syntax - `unix:/path/to/where/the/socket/file/is`, so to force a non default - socket path and a non default port, you would have: - - postgresql://unix:/tmp:54321/dbname - - The *netloc* defaults to the value of the `PGHOST` environment - variable, and if it is unset, to either the default `unix` socket path - when running on a Unix system, and `localhost` otherwise. - - Socket path containing colons are supported by doubling the colons - within the path, as in the following example: - - postgresql://unix:/tmp/project::region::instance:5432/dbname - - - *dbname* - - Should be a proper identifier (letter followed by a mix of letters, - digits and the punctuation signs comma (`,`), dash (`-`) and underscore - (`_`). - - When omitted, the *dbname* defaults to the value of the environment - variable `PGDATABASE`, and if that is unset, to the *user* value as - determined above. - - - *options* - - The optional parameters must be supplied with the form `name=value`, and - you may use several parameters by separating them away using an - ampersand (`&`) character. - - Only some options are supported here, *tablename* (which might be - qualified with a schema name) *sslmode*, *host*, *port*, *dbname*, - *user* and *password*. - - The *sslmode* parameter values can be one of `disable`, `allow`, - `prefer` or `require`. - - For backward compatibility reasons, it's possible to specify the - *tablename* option directly, without spelling out the `tablename=` - parts. - - The options override the main URI components when both are given, and - using the percent-encoded option parameters allow using passwords - starting with a colon and bypassing other URI components parsing - limitations. - -Regular Expressions -^^^^^^^^^^^^^^^^^^^ - -Several clauses listed in the following accept *regular expressions* with -the following input rules: - - - A regular expression begins with a tilde sign (`~`), - - - is then followed with an opening sign, - - - then any character is allowed and considered part of the regular - expression, except for the closing sign, - - - then a closing sign is expected. - -The opening and closing sign are allowed by pair, here's the complete list -of allowed delimiters:: - - ~// - ~[] - ~{} - ~() - ~<> - ~"" - ~'' - ~|| - ~## - -Pick the set of delimiters that don't collide with the *regular expression* -you're trying to input. If your expression is such that none of the -solutions allow you to enter it, the places where such expressions are -allowed should allow for a list of expressions. - -Comments -^^^^^^^^ - -Any command may contain comments, following those input rules: - - - the `--` delimiter begins a comment that ends with the end of the - current line, - - - the delimiters `/*` and `*/` respectively start and end a comment, which - can be found in the middle of a command or span several lines. - -Any place where you could enter a *whitespace* will accept a comment too. - -Batch behaviour options -^^^^^^^^^^^^^^^^^^^^^^^ - -All pgloader commands have support for a *WITH* clause that allows for -specifying options. Some options are generic and accepted by all commands, -such as the *batch behaviour options*, and some options are specific to a -data source kind, such as the CSV *skip header* option. - -The global batch behaviour options are: - - - *batch rows* - - Takes a numeric value as argument, used as the maximum number of rows - allowed in a batch. The default is `25 000` and can be changed to try - having better performance characteristics or to control pgloader memory - usage; - - - *batch size* - - Takes a memory unit as argument, such as *20 MB*, its default value. - Accepted multipliers are *kB*, *MB*, *GB*, *TB* and *PB*. The case is - important so as not to be confused about bits versus bytes, we're only - talking bytes here. - - - *prefetch rows* - - Takes a numeric value as argument, defaults to `100000`. That's the - number of rows that pgloader is allowed to read in memory in each reader - thread. See the *workers* setting for how many reader threads are - allowed to run at the same time. - -Other options are specific to each input source, please refer to specific -parts of the documentation for their listing and covering. - -A batch is then closed as soon as either the *batch rows* or the *batch -size* threshold is crossed, whichever comes first. In cases when a batch has -to be closed because of the *batch size* setting, a *debug* level log -message is printed with how many rows did fit in the *oversized* batch. - diff -Nru pgloader-3.6.7/docs/ref/archive.rst pgloader-3.6.9/docs/ref/archive.rst --- pgloader-3.6.7/docs/ref/archive.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/archive.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Loading From an Archive -======================= +Archive (http, zip) +=================== This command instructs pgloader to load data from one or more files contained in an archive. Currently the only supported archive format is *ZIP*, and the diff -Nru pgloader-3.6.7/docs/ref/copy.rst pgloader-3.6.9/docs/ref/copy.rst --- pgloader-3.6.7/docs/ref/copy.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/copy.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Loading COPY Formatted Files -============================ +COPY +==== This commands instructs pgloader to load from a file containing COPY TEXT data as described in the PostgreSQL documentation. diff -Nru pgloader-3.6.7/docs/ref/csv.rst pgloader-3.6.9/docs/ref/csv.rst --- pgloader-3.6.7/docs/ref/csv.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/csv.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Loading CSV data -================ +CSV +=== This command instructs pgloader to load data from a `CSV` file. Because of the complexity of guessing the parameters of a CSV file, it's simpler to diff -Nru pgloader-3.6.7/docs/ref/dbf.rst pgloader-3.6.9/docs/ref/dbf.rst --- pgloader-3.6.7/docs/ref/dbf.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/dbf.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Loading DBF data -================= +DBF +=== This command instructs pgloader to load data from a `DBF` file. A default set of casting rules are provided and might be overloaded and appended to by diff -Nru pgloader-3.6.7/docs/ref/fixed.rst pgloader-3.6.9/docs/ref/fixed.rst --- pgloader-3.6.7/docs/ref/fixed.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/fixed.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Loading Fixed Cols File Formats -=============================== +Fixed Columns +============= This command instructs pgloader to load data from a text file containing columns arranged in a *fixed size* manner. diff -Nru pgloader-3.6.7/docs/ref/ixf.rst pgloader-3.6.9/docs/ref/ixf.rst --- pgloader-3.6.7/docs/ref/ixf.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/ixf.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Loading IXF Data -================ +IXF +=== This command instructs pgloader to load data from an IBM `IXF` file. diff -Nru pgloader-3.6.7/docs/ref/mssql.rst pgloader-3.6.9/docs/ref/mssql.rst --- pgloader-3.6.7/docs/ref/mssql.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/mssql.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Migrating a MS SQL Database to PostgreSQL -========================================= +MS SQL to Postgres +================== This command instructs pgloader to load data from a MS SQL database. Automatic discovery of the schema is supported, including build of the diff -Nru pgloader-3.6.7/docs/ref/mysql.rst pgloader-3.6.9/docs/ref/mysql.rst --- pgloader-3.6.7/docs/ref/mysql.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/mysql.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Migrating a MySQL Database to PostgreSQL -======================================== +MySQL to Postgres +================= This command instructs pgloader to load data from a database connection. pgloader supports dynamically converting the schema of the source database diff -Nru pgloader-3.6.7/docs/ref/pgsql-citus-target.rst pgloader-3.6.9/docs/ref/pgsql-citus-target.rst --- pgloader-3.6.7/docs/ref/pgsql-citus-target.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/pgsql-citus-target.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Migrating a PostgreSQL Database to Citus -======================================== +PostgreSQL to Citus +=================== This command instructs pgloader to load data from a database connection. Automatic discovery of the schema is supported, including build of the diff -Nru pgloader-3.6.7/docs/ref/pgsql-redshift.rst pgloader-3.6.9/docs/ref/pgsql-redshift.rst --- pgloader-3.6.7/docs/ref/pgsql-redshift.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/pgsql-redshift.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Support for Redshift in pgloader -================================ +Redshift to Postgres +==================== The command and behavior are the same as when migration from a PostgreSQL database source, see :ref:`migrating_to_pgsql`. pgloader automatically diff -Nru pgloader-3.6.7/docs/ref/pgsql.rst pgloader-3.6.9/docs/ref/pgsql.rst --- pgloader-3.6.7/docs/ref/pgsql.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/pgsql.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,13 +1,18 @@ .. _migrating_to_pgsql: -Migrating a PostgreSQL Database to PostgreSQL -============================================= +Postgres to Postgres +==================== This command instructs pgloader to load data from a database connection. Automatic discovery of the schema is supported, including build of the indexes, primary and foreign keys constraints. A default set of casting rules are provided and might be overloaded and appended to by the command. +For a complete Postgres to Postgres solution including Change Data Capture +support with Logical Decoding, see `pgcopydb`__. + +__ https://pgcopydb.readthedocs.io/ + Using default settings ---------------------- diff -Nru pgloader-3.6.7/docs/ref/sqlite.rst pgloader-3.6.9/docs/ref/sqlite.rst --- pgloader-3.6.7/docs/ref/sqlite.rst 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/docs/ref/sqlite.rst 2022-10-24 11:12:05.000000000 +0000 @@ -1,5 +1,5 @@ -Migrating a SQLite database to PostgreSQL -========================================= +SQLite to Postgres +================== This command instructs pgloader to load data from a SQLite file. Automatic discovery of the schema is supported, including build of the indexes. diff -Nru pgloader-3.6.7/docs/requirements.txt pgloader-3.6.9/docs/requirements.txt --- pgloader-3.6.7/docs/requirements.txt 1970-01-01 00:00:00.000000000 +0000 +++ pgloader-3.6.9/docs/requirements.txt 2022-10-24 11:12:05.000000000 +0000 @@ -0,0 +1,4 @@ +Sphinx==4.2.0 +sphinx_rtd_theme==1.0.0 +docutils==0.16 +readthedocs-sphinx-search==0.1.0 diff -Nru pgloader-3.6.7/.github/workflows/debian-ci.yml pgloader-3.6.9/.github/workflows/debian-ci.yml --- pgloader-3.6.7/.github/workflows/debian-ci.yml 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/.github/workflows/debian-ci.yml 2022-10-24 11:12:05.000000000 +0000 @@ -14,6 +14,12 @@ - name: Checkout uses: actions/checkout@v2 + - name: Install postgresql-common + run: sudo apt-get install -y postgresql-common + + - name: Install pgapt repository + run: sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y + - name: Install build-dependencies run: sudo apt-get build-dep -y . diff -Nru pgloader-3.6.7/Makefile pgloader-3.6.9/Makefile --- pgloader-3.6.7/Makefile 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/Makefile 2022-10-24 11:12:05.000000000 +0000 @@ -1,6 +1,6 @@ # pgloader build tool APP_NAME = pgloader -VERSION = 3.6.7 +VERSION = 3.6.9 # use either sbcl or ccl CL = sbcl diff -Nru pgloader-3.6.7/pgloader.spec pgloader-3.6.9/pgloader.spec --- pgloader-3.6.7/pgloader.spec 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/pgloader.spec 2022-10-24 11:12:05.000000000 +0000 @@ -1,6 +1,6 @@ Summary: extract, transform and load data into PostgreSQL Name: pgloader -Version: 3.6.7 +Version: 3.6.9 Release: 22%{?dist} License: The PostgreSQL Licence Group: System Environment/Base diff -Nru pgloader-3.6.7/README.md pgloader-3.6.9/README.md --- pgloader-3.6.7/README.md 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/README.md 2022-10-24 11:12:05.000000000 +0000 @@ -23,8 +23,8 @@ ## Documentation Full documentation is available online, including manual pages of all the -pgcopydb sub-commands. Check out -[https://pgcopydb.readthedocs.io/](https://pgcopydb.readthedocs.io/en/latest/). +pgloader sub-commands. Check out +[https://pgloader.readthedocs.io/](https://pgloader.readthedocs.io/en/latest/). ``` $ pgloader --help diff -Nru pgloader-3.6.7/.readthedocs.yaml pgloader-3.6.9/.readthedocs.yaml --- pgloader-3.6.7/.readthedocs.yaml 1970-01-01 00:00:00.000000000 +0000 +++ pgloader-3.6.9/.readthedocs.yaml 2022-10-24 11:12:05.000000000 +0000 @@ -0,0 +1,11 @@ +version: 2 + +# Build from the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Explicitly set the version of Python and its requirements +python: + version: 3.7 + install: + - requirements: docs/requirements.txt \ No newline at end of file diff -Nru pgloader-3.6.7/src/pgsql/pgsql-create-schema.lisp pgloader-3.6.9/src/pgsql/pgsql-create-schema.lisp --- pgloader-3.6.7/src/pgsql/pgsql-create-schema.lisp 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/src/pgsql/pgsql-create-schema.lisp 2022-10-24 11:12:05.000000000 +0000 @@ -408,7 +408,8 @@ || trim(trailing ')' from replace(pg_get_expr(d.adbin, d.adrelid), 'nextval', 'setval')) - || ', (select greatest(max(' || quote_ident(a.attname) || '), 1) from only ' + || ', (select greatest(max(' || quote_ident(a.attname) || '), (select seqmin from pg_sequence where seqrelid = (''' + || pg_get_serial_sequence(quote_ident(nspname) || '.' || quote_ident(relname), quote_ident(a.attname)) || ''')::regclass limit 1), 1) from only ' || quote_ident(nspname) || '.' || quote_ident(relname) || '));' as sql FROM pg_class c JOIN pg_namespace n on n.oid = c.relnamespace diff -Nru pgloader-3.6.7/src/sources/mssql/mssql-cast-rules.lisp pgloader-3.6.9/src/sources/mssql/mssql-cast-rules.lisp --- pgloader-3.6.7/src/sources/mssql/mssql-cast-rules.lisp 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/src/sources/mssql/mssql-cast-rules.lisp 2022-10-24 11:12:05.000000000 +0000 @@ -14,6 +14,12 @@ (:source (:type "int" :auto-increment t) :target (:type "bigserial" :drop-default t)) + + (:source (:type "bigint" :auto-increment t) + :target (:type "bigserial")) + + (:source (:type "smallint" :auto-increment t) + :target (:type "smallserial")) (:source (:type "tinyint") :target (:type "smallint")) diff -Nru pgloader-3.6.7/src/sources/mssql/sql/list-all-columns.sql pgloader-3.6.9/src/sources/mssql/sql/list-all-columns.sql --- pgloader-3.6.7/src/sources/mssql/sql/list-all-columns.sql 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/src/sources/mssql/sql/list-all-columns.sql 2022-10-24 11:12:05.000000000 +0000 @@ -14,6 +14,7 @@ WHEN SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) = 'newid()' THEN 'GENERATE_UUID' WHEN SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) LIKE 'convert(%varchar%,getdate(),%)' THEN 'today' WHEN SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) = 'getdate()' THEN 'CURRENT_TIMESTAMP' + WHEN SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) = 'sysdatetimeoffset()' THEN 'CURRENT_TIMESTAMP' WHEN SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) LIKE '''%''' THEN SUBSTRING(c.COLUMN_DEFAULT,4,len(c.COLUMN_DEFAULT)-6) ELSE SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) END @@ -22,6 +23,7 @@ WHEN SUBSTRING(c.COLUMN_DEFAULT,2,len(c.COLUMN_DEFAULT)-2) = 'newid()' THEN 'GENERATE_UUID' WHEN SUBSTRING(c.COLUMN_DEFAULT,2,len(c.COLUMN_DEFAULT)-2) LIKE 'convert(%varchar%,getdate(),%)' THEN 'today' WHEN SUBSTRING(c.COLUMN_DEFAULT,2,len(c.COLUMN_DEFAULT)-2) = 'getdate()' THEN 'CURRENT_TIMESTAMP' + WHEN SUBSTRING(c.COLUMN_DEFAULT,2,len(c.COLUMN_DEFAULT)-2) = 'sysdatetimeoffset()' THEN 'CURRENT_TIMESTAMP' WHEN SUBSTRING(c.COLUMN_DEFAULT,2,len(c.COLUMN_DEFAULT)-2) LIKE '''%''' THEN SUBSTRING(c.COLUMN_DEFAULT,3,len(c.COLUMN_DEFAULT)-4) ELSE SUBSTRING(c.COLUMN_DEFAULT,2,len(c.COLUMN_DEFAULT)-2) END diff -Nru pgloader-3.6.7/src/sources/mysql/mysql-cast-rules.lisp pgloader-3.6.9/src/sources/mysql/mysql-cast-rules.lisp --- pgloader-3.6.7/src/sources/mysql/mysql-cast-rules.lisp 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/src/sources/mysql/mysql-cast-rules.lisp 2022-10-24 11:12:05.000000000 +0000 @@ -66,10 +66,12 @@ :target (:type "bigint" :drop-typemod t)) (:source (:type "int" :unsigned t) :target (:type "bigint" :drop-typemod t)) - + (:source (:type "int" :unsigned t :auto-increment t) :target (:type "bigserial" :drop-typemod t)) - + (:source (:type "int" :signed t :auto-increment t) + :target (:type "serial" :drop-typemod t)) + ;; we need the following to benefit from :drop-typemod (:source (:type "tinyint") :target (:type "smallint" :drop-typemod t)) (:source (:type "smallint") :target (:type "smallint" :drop-typemod t)) diff -Nru pgloader-3.6.7/src/utils/queries.lisp pgloader-3.6.9/src/utils/queries.lisp --- pgloader-3.6.7/src/utils/queries.lisp 2022-08-13 08:35:39.000000000 +0000 +++ pgloader-3.6.9/src/utils/queries.lisp 2022-10-24 11:12:05.000000000 +0000 @@ -20,7 +20,7 @@ "Transform given PATHNAME into an URL at which to serve it within URL-PATH." (multiple-value-bind (flag path-list last-component file-namestring-p) (uiop:split-unix-namestring-directory-components - (uiop:native-namestring + (uiop:unix-namestring (uiop:enough-pathname pathname root))) (declare (ignore flag file-namestring-p)) ;;