diff -Nru r-cran-readstata13-0.9.2/debian/changelog r-cran-readstata13-0.10.0/debian/changelog --- r-cran-readstata13-0.9.2/debian/changelog 2020-05-30 16:45:25.000000000 +0000 +++ r-cran-readstata13-0.10.0/debian/changelog 2021-08-16 22:10:31.000000000 +0000 @@ -1,20 +1,19 @@ -r-cran-readstata13 (0.9.2-1build3) groovy; urgency=medium +r-cran-readstata13 (0.10.0-2) unstable; urgency=medium - * No-change rebuild against r-api-4.0 + * Simple rebuild for unstable following Debian 11 release - -- Graham Inggs Sat, 30 May 2020 16:45:25 +0000 + * debian/control: Set Build-Depends: to current R version -r-cran-readstata13 (0.9.2-1build2) focal; urgency=medium + -- Dirk Eddelbuettel Mon, 16 Aug 2021 17:10:31 -0500 - * No-change rebuild for libgcc-s1 package name change. +r-cran-readstata13 (0.10.0-1) experimental; urgency=medium - -- Matthias Klose Mon, 23 Mar 2020 07:24:37 +0100 + * New upstream release (to experimental during freeze) -r-cran-readstata13 (0.9.2-1build1) cosmic; urgency=medium - - * No-change rebuild against r-api-3.5 - - -- Graham Inggs Fri, 01 Jun 2018 15:00:58 +0000 + * debian/control: Set Build-Depends: to recent R version + * debian/control: Set Standards-Version: to current version + + -- Dirk Eddelbuettel Sat, 05 Jun 2021 12:52:03 -0500 r-cran-readstata13 (0.9.2-1) unstable; urgency=medium diff -Nru r-cran-readstata13-0.9.2/debian/compat r-cran-readstata13-0.10.0/debian/compat --- r-cran-readstata13-0.9.2/debian/compat 2018-05-27 15:06:55.000000000 +0000 +++ r-cran-readstata13-0.10.0/debian/compat 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -9 diff -Nru r-cran-readstata13-0.9.2/debian/control r-cran-readstata13-0.10.0/debian/control --- r-cran-readstata13-0.9.2/debian/control 2018-05-27 15:09:56.000000000 +0000 +++ r-cran-readstata13-0.10.0/debian/control 2021-08-16 22:10:21.000000000 +0000 @@ -2,8 +2,8 @@ Section: gnu-r Priority: optional Maintainer: Dirk Eddelbuettel -Build-Depends: debhelper (>= 7.0.0), r-base-dev (>= 3.4.4), dh-r, r-cran-rcpp -Standards-Version: 4.1.4 +Build-Depends: debhelper-compat (= 11), r-base-dev (>= 4.1.1), dh-r, r-cran-rcpp +Standards-Version: 4.5.1 Vcs-Browser: https://salsa.debian.org/edd/r-cran-readstata13 Vcs-Git: https://salsa.debian.org/edd/r-cran-readstata13.git Homepage: https://cran.r-project.org/package=readstata13 diff -Nru r-cran-readstata13-0.9.2/DESCRIPTION r-cran-readstata13-0.10.0/DESCRIPTION --- r-cran-readstata13-0.9.2/DESCRIPTION 2018-05-26 22:01:16.000000000 +0000 +++ r-cran-readstata13-0.10.0/DESCRIPTION 2021-05-25 11:50:02.000000000 +0000 @@ -1,7 +1,7 @@ Package: readstata13 Type: Package Title: Import 'Stata' Data Files -Version: 0.9.2 +Version: 0.10.0 Authors@R: c( person("Jan Marvin", "Garbuszus", email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut")), @@ -21,9 +21,10 @@ LinkingTo: Rcpp ByteCompile: yes Suggests: testthat -RoxygenNote: 6.0.1 +Encoding: UTF-8 +RoxygenNote: 7.1.1 NeedsCompilation: yes -Packaged: 2018-05-26 10:41:40 UTC; sj +Packaged: 2021-05-25 10:51:41 UTC; arbeit Author: Jan Marvin Garbuszus [aut], Sebastian Jeworutzki [aut, cre] (), @@ -34,4 +35,4 @@ Kevin Jin [ctb] Maintainer: Sebastian Jeworutzki Repository: CRAN -Date/Publication: 2018-05-26 22:01:16 UTC +Date/Publication: 2021-05-25 11:50:02 UTC diff -Nru r-cran-readstata13-0.9.2/inst/extdata/datetime.do r-cran-readstata13-0.10.0/inst/extdata/datetime.do --- r-cran-readstata13-0.9.2/inst/extdata/datetime.do 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-readstata13-0.10.0/inst/extdata/datetime.do 2021-05-25 09:58:32.000000000 +0000 @@ -0,0 +1,54 @@ + +// do file used to create stata datetimes +// commands used: https://www.stata.com/manuals/ddrop.pdf +. use "https://www.stata-press.com/data/r17/visits", replace + +. generate admit = date(admit_d, "YMD") +. generate dob = date(dateofbirth, "MDY") +. list admit_d admit dateofbirth dob + +. format admit dob %td +. list admit dob + +. generate double admit_time = clock(admit_t, "YMDhms") +. generate double disch_time = clock(discharge_t, "YMDhm") +. format admit_time disch_time %tc +. list admit_time disch_time + +. format disch_time %tcHH:MM +. list discharge_t disch_time + +. generate double admit_Time = Clock(admit_t, "YMDhms") +. format admit_Time %tC + +. generate admonth = month(admit) +. generate adyear = year(admit) +. format adyear %ty // inserted by me +. list admit admonth adyear + +. generate monthly = ym(adyear,admonth) +. format monthly %tm +. list admit monthly + +. generate monthly2 = ym(year(admit), month(admit)) +. format monthly2 %tm + +. generate dateoftime = dofc(admit_time) +. format dateoftime %td +. list admit_time dateoftime + +. generate monthofdate = mofd(admit) +. format monthofdate %tm +. list admit monthofdate + +. generate quarterly = qofd(dofm(monthofdate)) +. format quarterly %tq +. list monthofdate quarterly + + +// trim down +. keep dob adyear disch_time admit_time monthly quarterly +// rename +. rename (dob admit_time disch_time monthly quarterly adyear) (td tc tc_hh_mm tm tq ty) +// save +save "readstata13/inst/extdata/datetime.dta", replace Binary files /tmp/tmprd2373xi/l1bn0K5xnS/r-cran-readstata13-0.9.2/inst/extdata/datetime.dta and /tmp/tmprd2373xi/4BDsR70hSp/r-cran-readstata13-0.10.0/inst/extdata/datetime.dta differ Binary files /tmp/tmprd2373xi/l1bn0K5xnS/r-cran-readstata13-0.9.2/inst/extdata/test.zip and /tmp/tmprd2373xi/4BDsR70hSp/r-cran-readstata13-0.10.0/inst/extdata/test.zip differ diff -Nru r-cran-readstata13-0.9.2/inst/include/readstata.h r-cran-readstata13-0.10.0/inst/include/readstata.h --- r-cran-readstata13-0.9.2/inst/include/readstata.h 2018-05-26 07:44:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/inst/include/readstata.h 2021-05-25 09:58:32.000000000 +0000 @@ -40,12 +40,10 @@ #include #endif -#ifdef __APPLE__ -# define off64_t off_t +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__ANDROID__) # define fseeko64 fseeko #endif - #include "read_dta.h" #include "read_pre13_dta.h" diff -Nru r-cran-readstata13-0.9.2/man/read.dta13.Rd r-cran-readstata13-0.10.0/man/read.dta13.Rd --- r-cran-readstata13-0.9.2/man/read.dta13.Rd 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/man/read.dta13.Rd 2021-05-25 10:08:08.000000000 +0000 @@ -4,11 +4,24 @@ \alias{read.dta13} \title{Read Stata Binary Files} \usage{ -read.dta13(file, convert.factors = TRUE, generate.factors = FALSE, - encoding = "UTF-8", fromEncoding = NULL, convert.underscore = FALSE, - missing.type = FALSE, convert.dates = TRUE, replace.strl = TRUE, - add.rownames = FALSE, nonint.factors = FALSE, select.rows = NULL, - select.cols = NULL, strlexport = FALSE, strlpath = ".") +read.dta13( + file, + convert.factors = TRUE, + generate.factors = FALSE, + encoding = "UTF-8", + fromEncoding = NULL, + convert.underscore = FALSE, + missing.type = FALSE, + convert.dates = TRUE, + replace.strl = TRUE, + add.rownames = FALSE, + nonint.factors = FALSE, + select.rows = NULL, + select.cols = NULL, + strlexport = FALSE, + strlpath = ".", + tz = "GMT" +) } \arguments{ \item{file}{\emph{character.} Path to the dta file you want to import.} @@ -23,8 +36,8 @@ \item{encoding}{\emph{character.} Strings can be converted from Windows-1252 or UTF-8 to system encoding. Options are "latin1" or "UTF-8" to specify -target encoding explicitly. Stata 14 and 15 files are UTF-8 encoded and may contain -strings which can't be displayed in the current locale. +target encoding explicitly. Stata 14, 15 and 16 files are UTF-8 encoded and +may contain strings which can't be displayed in the current locale. Set encoding=NULL to stop reencoding.} \item{fromEncoding}{\emph{character.} We expect strings to be encoded as @@ -56,12 +69,16 @@ value rows from 1:val are selected. If two values of a range are selected the rows in range will be selected.} -\item{select.cols}{\emph{character:} Vector of variables to select.} +\item{select.cols}{\emph{character.} Vector of variables to select.} -\item{strlexport}{\emph{logical:} Should strl content be exported as binary +\item{strlexport}{\emph{logical.} Should strl content be exported as binary files?} -\item{strlpath}{\emph{cahracter:} Path for strl export.} +\item{strlpath}{\emph{character.} Path for strl export.} + +\item{tz}{\emph{character.} time zone specification to be used for +POSIXct values. ‘""’ is the current time zone, and ‘"GMT"’ is UTC +(Universal Time, Coordinated).} } \value{ The function returns a data.frame with attributes. The attributes @@ -128,9 +145,15 @@ read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members from foreign::read.dta(). } +\examples{ +\dontrun{ + library(readstata13) + r13 <- read.dta13("https://www.stata-press.com/data/r13/auto.dta") +} +} \references{ Stata Corp (2014): Description of .dta file format - \url{http://www.stata.com/help.cgi?dta} + \url{https://www.stata.com/help.cgi?dta} } \seealso{ \code{\link[foreign]{read.dta}} in package \code{foreign} and diff -Nru r-cran-readstata13-0.9.2/man/readstata13.Rd r-cran-readstata13-0.10.0/man/readstata13.Rd --- r-cran-readstata13-0.9.2/man/readstata13.Rd 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/man/readstata13.Rd 2021-05-25 09:58:32.000000000 +0000 @@ -3,7 +3,6 @@ \docType{package} \name{readstata13} \alias{readstata13} -\alias{readstata13-package} \title{Import Stata Data Files} \description{ Function to read the Stata file format into a data.frame. diff -Nru r-cran-readstata13-0.9.2/man/save.dta13.Rd r-cran-readstata13-0.10.0/man/save.dta13.Rd --- r-cran-readstata13-0.9.2/man/save.dta13.Rd 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/man/save.dta13.Rd 2021-05-25 10:02:45.000000000 +0000 @@ -4,10 +4,19 @@ \alias{save.dta13} \title{Write Stata Binary Files} \usage{ -save.dta13(data, file, data.label = NULL, time.stamp = TRUE, - convert.factors = TRUE, convert.dates = TRUE, tz = "GMT", - add.rownames = FALSE, compress = FALSE, version = 117, - convert.underscore = FALSE) +save.dta13( + data, + file, + data.label = NULL, + time.stamp = TRUE, + convert.factors = TRUE, + convert.dates = TRUE, + tz = "GMT", + add.rownames = FALSE, + compress = FALSE, + version = 117, + convert.underscore = FALSE +) } \arguments{ \item{data}{\emph{data.frame.} A data.frame Object.} @@ -28,7 +37,9 @@ \item{convert.dates}{\emph{logical.} If \code{TRUE}, dates will be converted to Stata date time format. Code from \code{foreign::write.dta}} -\item{tz}{\emph{character.} The name of the timezone convert.dates will use.} +\item{tz}{\emph{character.} time zone specification to be used for +POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current +time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated).} \item{add.rownames}{\emph{logical.} If \code{TRUE}, a new variable rownames will be added to the dta-file.} @@ -37,9 +48,10 @@ use all of Statas numeric-vartypes.} \item{version}{\emph{numeric.} Stata format for the resulting dta-file either -Stata version number (6 - 15) or the internal Stata dta-format (e.g. 117 for Stata 13). -Experimental support for large datasets: Use version="15mp" to save the dataset -in the new Stata 15/MP file format. This feature is not thoroughly tested yet.} +Stata version number (6 - 16) or the internal Stata dta-format (e.g. 117 for +Stata 13). Experimental support for large datasets: Use version="15mp" to +save the dataset in the new Stata 15/16 MP file format. This feature is not +thoroughly tested yet.} \item{convert.underscore}{\emph{logical.} If \code{TRUE}, all non numerics or non alphabet characters will be converted to underscores.} @@ -64,9 +76,15 @@ \code{save.dta13} writes a Stata dta-file bytewise and saves the data into a dta-file. } +\examples{ +\dontrun{ + library(readstata13) + save.dta13(cars, file="cars.dta") +} +} \references{ Stata Corp (2014): Description of .dta file format - \url{http://www.stata.com/help.cgi?dta} + \url{https://www.stata.com/help.cgi?dta} } \seealso{ \code{\link[foreign]{read.dta}} in package \code{foreign} and diff -Nru r-cran-readstata13-0.9.2/man/saveToExport.Rd r-cran-readstata13-0.10.0/man/saveToExport.Rd --- r-cran-readstata13-0.9.2/man/saveToExport.Rd 2017-04-23 09:59:48.000000000 +0000 +++ r-cran-readstata13-0.10.0/man/saveToExport.Rd 2021-05-25 09:58:32.000000000 +0000 @@ -2,7 +2,7 @@ % Please edit documentation in R/tools.R \name{saveToExport} \alias{saveToExport} -\title{Check if numeric vector can be expressed as interger vector} +\title{Check if numeric vector can be expressed as integer vector} \usage{ saveToExport(x) } diff -Nru r-cran-readstata13-0.9.2/man/stbcal.Rd r-cran-readstata13-0.10.0/man/stbcal.Rd --- r-cran-readstata13-0.9.2/man/stbcal.Rd 2017-04-23 09:59:48.000000000 +0000 +++ r-cran-readstata13-0.10.0/man/stbcal.Rd 2021-05-25 09:58:32.000000000 +0000 @@ -7,13 +7,13 @@ stbcal(stbcalfile) } \arguments{ -\item{stbcalfile}{\emph{stbcal-file} Stata buisness calendar file created by +\item{stbcalfile}{\emph{stbcal-file} Stata business calendar file created by Stata.} } \value{ Returns a data.frame with two cols: \describe{ -\item{range:}{The date matching the buisnesdate. Date format.} +\item{range:}{The date matching the businessdate. Date format.} \item{buisdays:}{The Stata business calendar day. Integer format.} } } @@ -21,7 +21,7 @@ Create conversion table for business calendar dates. } \details{ -Stata 12 introduced business calender format. Business dates are +Stata 12 introduced business calendar format. Business dates are integer numbers in a certain range of days, weeks, months or years. In this range some days are omitted (e.g. weekends or holidays). If a business calendar was created, a stbcal file matching this calendar was created. This diff -Nru r-cran-readstata13-0.9.2/man/varlabel.Rd r-cran-readstata13-0.10.0/man/varlabel.Rd --- r-cran-readstata13-0.9.2/man/varlabel.Rd 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/man/varlabel.Rd 2021-05-25 09:58:32.000000000 +0000 @@ -2,7 +2,7 @@ % Please edit documentation in R/tools.R \name{varlabel} \alias{varlabel} -\alias{varlabel} +\alias{'varlabel<-'} \alias{varlabel<-} \title{Get and assign Stata Variable Labels} \usage{ @@ -19,7 +19,7 @@ \item{lang}{\emph{character.} Label language. Default language defined by \code{\link{get.lang}} is used if NA} -\item{value}{\emph{character vector.} Vector of variable names.} +\item{value}{\emph{character vector.} Character vector of size ncol(data) with variable names.} } \value{ Returns an named vector of variable labels @@ -27,6 +27,25 @@ \description{ Retrieve or set variable labels for a dataset. } +\examples{ +dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"), + convert.factors=FALSE) + +# display variable labels +varlabel(dat) + +# display german variable labels +varlabel(dat, lang="de") + +# display german variable label for brand +varlabel(dat, var.name = "brand", lang="de") + +# define new variable labels +varlabel(dat) <- letters[1:ncol(dat)] + +# display new variable labels +varlabel(dat) +} \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} diff -Nru r-cran-readstata13-0.9.2/MD5 r-cran-readstata13-0.10.0/MD5 --- r-cran-readstata13-0.9.2/MD5 2018-05-26 22:01:16.000000000 +0000 +++ r-cran-readstata13-0.10.0/MD5 2021-05-25 11:50:02.000000000 +0000 @@ -1,14 +1,17 @@ -937bcbd9a32c2dbb8ab22fde4102ddf6 *DESCRIPTION +9b35cf345fd743ba509e9ecbc90467b4 *DESCRIPTION e8c1458438ead3c34974bc0be3a03ed6 *LICENSE -a12456c41e1ef128bcbbf3a2177ed986 *NAMESPACE -07789d5571c706690651f1e17339f835 *NEWS +89f7f4f07a2f632c76823d08c035ddd4 *NAMESPACE +4a03bc5a223f02097fc02a1ab8957b49 *NEWS 8aa1ee878b92e4641e471dc32ee3a0d2 *R/RcppExports.R -bc600119e8b1dbcd7a8715aa40d959cf *R/dbcal.R -8f44ee6ee622eab57ad533ad22974cfe *R/read.R +ae5dbe5847caa402b29ecb324c3b212d *R/convert.R +1d7c7f4e7ffba449e34c3f5e853a66ed *R/dbcal.R +706b347813ce92b53de069167a33b119 *R/read.R 29ce11e9849d698bed87f5fc9b1b4c72 *R/readstata13.R -3511e8b87b1bc0327b854f254f734b6a *R/save.R -3138b670a9496c66258c107d7c652e3a *R/tools.R -c2fcfbe243a55982b4ad6e281574bc44 *README.md +1939ad94a02afe9ef6f706a884204720 *R/save.R +f128a2293b2cd4f45d6d9cde52cc2945 *R/tools.R +57e73a50cda951c773acd7943f4b4e07 *README.md +df1d0cf8d5ec6e6a2c6ace8114d7544c *inst/extdata/datetime.do +f8f52bd111449bb5310fb0cbd728926e *inst/extdata/datetime.dta a885e4f610350825892c92d3ca858889 *inst/extdata/encode.do 1165031bfee6c9e6ce501baa24e3a7f1 *inst/extdata/encode.dta 23c478f4b7d45b7aabcc48a0f5795480 *inst/extdata/encodecp.dta @@ -23,13 +26,12 @@ 295396a1a55b4326d89d2c2a86e90441 *inst/extdata/sp500.stbcal 389e33d907d10ec8efe41250f99221ab *inst/extdata/statacar.do f899f302225e099f83de7ac42f0623f2 *inst/extdata/statacar.dta -a4248360860c7223c04f2bda517994fd *inst/extdata/test.zip 1e29776eed16f780a9beee2d11ada4d4 *inst/extdata/underscore.do 18d63a094394dd93f3b4363fcd09f322 *inst/extdata/underscore.dta be3bdd7d0414f9b7b9770645b944320a *inst/include/read_data.h 0a650c8fbc76b901c624289b0676e825 *inst/include/read_dta.h a04dcc41e345cae0fa9351ce678c27e6 *inst/include/read_pre13_dta.h -3ffe5e453924a7c642a6bc5c1086ccc3 *inst/include/readstata.h +287d81c8b2bb45fa66be3e04a5b4fa41 *inst/include/readstata.h 36c0ee1660a90fb2d8b961c558c3d145 *inst/include/statadefines.h fc806a4ead84a5b3c6bb4f00af91ebf3 *inst/include/swap_endian.h 3e936e81cffb62a119785e96d210b1e9 *man/as.caldays.Rd @@ -39,23 +41,23 @@ 5a4700ab8b6e29b9ad1fd134a6c62977 *man/get.lang.Rd 3b2bb969adb3f8a26d5741cf467d470b *man/get.origin.codes.Rd f3c2ac88ad9ea19659f1d7c35f3d0ac9 *man/maxchar.Rd -f78b325f1795fd27452a08b237029e04 *man/read.dta13.Rd -71f1e3ccae8375b9365488ba436eb934 *man/readstata13.Rd -628c37cd5fa01a2bd1c4b96de503fe0e *man/save.dta13.Rd -bc27b06c1c1e566f8c3bcb49eadd61b6 *man/saveToExport.Rd +1334f44f140c9a8736ab822dd9f825b1 *man/read.dta13.Rd +d85b47ecfc22e6d4dc8816e1129359af *man/readstata13.Rd +3bb7150ea0902a72d51c1c89b455fe8e *man/save.dta13.Rd +f403ecad1a2ea32a3ffd1af54e026cb4 *man/saveToExport.Rd 9dd790746cc83f755b65139c745e9c93 *man/set.label.Rd 67e025e2c70d6e96d54703a7b6654663 *man/set.lang.Rd -ec29e8c38f6333f0f2ce706a95acff83 *man/stbcal.Rd -12fa4a2647f8413110876599ffee13dc *man/varlabel.Rd +086d928578359d5c3b6fb0495451eb16 *man/stbcal.Rd +69dd3e9f18ec9f2187d2152c8b830d86 *man/varlabel.Rd 5a37728c526310cfca2804ea6c29fb51 *src/Makevars 5a37728c526310cfca2804ea6c29fb51 *src/Makevars.win e18c84894d06a1255a18152473cb82f1 *src/RcppExports.cpp fd42fb972ec16877b4aa18510233c8db *src/read.cpp -eb3d0bf081e4bcf4908d00bc75d61f51 *src/read_data.cpp -fb7b806ab067a7ca7f2c51d36596a558 *src/read_dta.cpp -1f235376919f05ec7cb19f91597efd77 *src/read_pre13_dta.cpp -fe0af5c98deb865283da1f241f6e0d8e *src/save_dta.cpp +5e0a6cf796fa828ed3a85484b8df88bc *src/read_data.cpp +20e7642cf7d997b02d12590493bee053 *src/read_dta.cpp +980f45a30c16aff1bfc07baa30c545da *src/read_pre13_dta.cpp +a0c0acd8b360c5be2563ec3376b1d2a5 *src/save_dta.cpp a9bcdd02d2270aaea3c54797bc5ce735 *src/save_pre13_dta.cpp 4dd91c288ce11a342d68442481e65e8b *tests/testthat.R -ef487b80f9222b1adb3c3ee8dfd8ddde *tests/testthat/test_read.R -b6696e529eead060c3c2475213a7c002 *tests/testthat/test_save.R +3ce92e2f9379e6baf4b02b4d556d7ce1 *tests/testthat/test_read.R +7175fa0cc47b1006e17ed6a7969f982b *tests/testthat/test_save.R diff -Nru r-cran-readstata13-0.9.2/NAMESPACE r-cran-readstata13-0.10.0/NAMESPACE --- r-cran-readstata13-0.9.2/NAMESPACE 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/NAMESPACE 2021-05-25 10:50:40.000000000 +0000 @@ -18,7 +18,6 @@ importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(utils,download.file) -importFrom(utils,localeToCharset) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) useDynLib(readstata13) diff -Nru r-cran-readstata13-0.9.2/NEWS r-cran-readstata13-0.10.0/NEWS --- r-cran-readstata13-0.9.2/NEWS 2018-05-26 08:32:28.000000000 +0000 +++ r-cran-readstata13-0.10.0/NEWS 2021-05-25 10:49:54.000000000 +0000 @@ -1,3 +1,12 @@ +[0.10.0] +- fix sortlist attribute for dta format 119 +- fix compress option. In the past, unwanted conversions to integer type could occur. +- fix encoding issues in variable and data labels +- fix for reading/writing of format 119 +- fix build on FreeBSD +- new feature: improved handling of time and date formats +- new feature: collect warnings from read.dta13 + [0.9.2] - fix build on OSX diff -Nru r-cran-readstata13-0.9.2/R/convert.R r-cran-readstata13-0.10.0/R/convert.R --- r-cran-readstata13-0.9.2/R/convert.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-readstata13-0.10.0/R/convert.R 2021-05-25 09:58:32.000000000 +0000 @@ -0,0 +1,77 @@ +# +# Copyright (C) 2014-2021 Jan Marvin Garbuszus and Sebastian Jeworutzki +# Copyright (C) of 'convert_dt_c' and 'convert_dt_C' Thomas Lumley +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + +convert_dt_c <- function(x, tz) { + as.POSIXct((x + 0.1) / 1000, # avoid rounding down + origin = "1960-01-01", + tz = tz) +} + +convert_dt_C <- function(x, tz) { + ls <- .leap.seconds + seq_along(.leap.seconds) + 315619200 + z <- (x + 0.1) / 1000 # avoid rounding down + z <- z - rowSums(outer(z, ls, ">=")) + as.POSIXct(z, origin = "1960-01-01", tz = tz) +} + +# Convert Stata format %tm integer to R date. +# Uses the first day of month. +# +# @param x element to be converted +# @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} +# @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} +convert_dt_m <- function(x) { + z <- x / 12 # divide by 12 to create years + mth <- x %% 12 + 1 + yr <- 1960 + floor(z) + + z <- paste0(yr, "-", mth, "-1") + z <- as.Date(z, "%Y-%m-%d") + if (any(is.na(z))) warning("conversion of %tm failed") + z +} + +# Convert Stata format %tq integer to R date. +# Uses the first month and day of quarter. +# +# @param x element to be converted +# @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} +# @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} +convert_dt_q <- function(x) { + z <- x / 4 + yr <- 1960 + floor(z) + + qrt <- x %% 4 + 1 + qrt_month <- c(1, 4, 7, 10) + + z <- paste0(yr, "-", qrt_month[qrt], "-1") + z <- as.Date(z, "%Y-%m-%d") + if (any(is.na(z))) warning("conversion of %tq failed") + z +} + +# Convert Stata format %ty integer to R date +# Uses the first month and day of year. +# +# @param x element to be converted +# @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} +# @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} +convert_dt_y <- function(x) { + z <- as.Date(paste0(x, "-1-1"), "%Y-%m-%d") + if (any(is.na(z))) warning("conversion of %ty failed") + z +} diff -Nru r-cran-readstata13-0.9.2/R/dbcal.R r-cran-readstata13-0.10.0/R/dbcal.R --- r-cran-readstata13-0.9.2/R/dbcal.R 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/R/dbcal.R 2021-05-25 09:58:32.000000000 +0000 @@ -19,14 +19,14 @@ #' #' Create conversion table for business calendar dates. #' -#' @param stbcalfile \emph{stbcal-file} Stata buisness calendar file created by +#' @param stbcalfile \emph{stbcal-file} Stata business calendar file created by #' Stata. #' @return Returns a data.frame with two cols: #' \describe{ -#' \item{range:}{The date matching the buisnesdate. Date format.} +#' \item{range:}{The date matching the businessdate. Date format.} #' \item{buisdays:}{The Stata business calendar day. Integer format.} #' } -#' @details Stata 12 introduced business calender format. Business dates are +#' @details Stata 12 introduced business calendar format. Business dates are #' integer numbers in a certain range of days, weeks, months or years. In this #' range some days are omitted (e.g. weekends or holidays). If a business #' calendar was created, a stbcal file matching this calendar was created. This diff -Nru r-cran-readstata13-0.9.2/R/read.R r-cran-readstata13-0.10.0/R/read.R --- r-cran-readstata13-0.9.2/R/read.R 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/R/read.R 2021-05-25 10:07:05.000000000 +0000 @@ -1,4 +1,5 @@ -# Copyright (C) 2014-2017 Jan Marvin Garbuszus and Sebastian Jeworutzki +# +# Copyright (C) 2014-2021 Jan Marvin Garbuszus and Sebastian Jeworutzki # Copyright (C) of 'convert.dates' and 'missing.types' Thomas Lumley # # This program is free software; you can redistribute it and/or modify it @@ -28,8 +29,8 @@ #' "label_(integer code)". #' @param encoding \emph{character.} Strings can be converted from Windows-1252 #' or UTF-8 to system encoding. Options are "latin1" or "UTF-8" to specify -#' target encoding explicitly. Stata 14 and 15 files are UTF-8 encoded and may contain -#' strings which can't be displayed in the current locale. +#' target encoding explicitly. Stata 14, 15 and 16 files are UTF-8 encoded and +#' may contain strings which can't be displayed in the current locale. #' Set encoding=NULL to stop reencoding. #' @param fromEncoding \emph{character.} We expect strings to be encoded as #' "CP1252" for Stata Versions 13 and older. For dta files saved with Stata 14 @@ -52,10 +53,13 @@ #' @param select.rows \emph{integer.} Vector of one or two numbers. If single #' value rows from 1:val are selected. If two values of a range are selected #' the rows in range will be selected. -#' @param select.cols \emph{character:} Vector of variables to select. -#' @param strlexport \emph{logical:} Should strl content be exported as binary +#' @param select.cols \emph{character.} Vector of variables to select. +#' @param strlexport \emph{logical.} Should strl content be exported as binary #' files? -#' @param strlpath \emph{cahracter:} Path for strl export. +#' @param strlpath \emph{character.} Path for strl export. +#' @param tz \emph{character.} time zone specification to be used for +#' POSIXct values. ‘""’ is the current time zone, and ‘"GMT"’ is UTC +#' (Universal Time, Coordinated). #' #' @details If the filename is a url, the file will be downloaded as a temporary #' file and read afterwards. @@ -117,7 +121,12 @@ #' versions < 13 and \code{read_dta} in package \code{haven} for Stata version #' >= 13. #' @references Stata Corp (2014): Description of .dta file format -#' \url{http://www.stata.com/help.cgi?dta} +#' \url{https://www.stata.com/help.cgi?dta} +#' @examples +#' \dontrun{ +#' library(readstata13) +#' r13 <- read.dta13("https://www.stata-press.com/data/r13/auto.dta") +#' } #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13 @@ -128,9 +137,12 @@ encoding = "UTF-8", fromEncoding=NULL, convert.underscore = FALSE, missing.type = FALSE, convert.dates = TRUE, replace.strl = TRUE, - add.rownames = FALSE, nonint.factors=FALSE, + add.rownames = FALSE, nonint.factors = FALSE, select.rows = NULL, select.cols = NULL, - strlexport = FALSE, strlpath = ".") { + strlexport = FALSE, strlpath = ".", tz = "GMT") { + + # List to collect all warnings from factor conversion + collected_warnings <- list(misslab = NULL, floatfact = NULL) # Check if path is a url if (length(grep("^(http|ftp|https)://", file))) { @@ -143,7 +155,7 @@ filepath <- get.filepath(file) } if (!file.exists(filepath)) - return(message("File not found.")) + stop("File not found.") @@ -252,6 +264,7 @@ } var.labels <- attr(data, "var.labels") + datalabel <- attr(data, "data.label") ## Encoding if(!is.null(encoding)) { @@ -262,6 +275,9 @@ if(attr(data, "version") >= 118L) fromEncoding <- "UTF-8" } + + attr(data, "data.label") <- read.encoding(datalabel, fromEncoding, + encoding) # varnames names(data) <- read.encoding(names(data), fromEncoding, encoding) @@ -327,16 +343,6 @@ if (convert.dates) { - convert_dt_c <- function(x) - as.POSIXct((x + 0.1) / 1000, origin = "1960-01-01") # avoid rounding down - - convert_dt_C <- function(x) { - ls <- .leap.seconds + seq_along(.leap.seconds) + 315619200 - z <- (x + 0.1) / 1000 # avoid rounding down - z <- z - rowSums(outer(z, ls, ">=")) - as.POSIXct(z, origin = "1960-01-01") - } - ff <- attr(data, "formats") ## dates <- grep("%-*d", ff) ## Stata 12 introduced 'business dates' @@ -352,8 +358,11 @@ base <- structure(-3653L, class = "Date") # Stata dates are integer vars for (v in dates) data[[v]] <- structure(base + data[[v]], class = "Date") - for (v in grep("%tc", ff)) data[[v]] <- convert_dt_c(data[[v]]) - for (v in grep("%tC", ff)) data[[v]] <- convert_dt_C(data[[v]]) + for (v in grep("%tc", ff)) data[[v]] <- convert_dt_c(data[[v]], tz) + for (v in grep("%tC", ff)) data[[v]] <- convert_dt_C(data[[v]], tz) + for (v in grep("%tm", ff)) data[[v]] <- convert_dt_m(data[[v]]) + for (v in grep("%tq", ff)) data[[v]] <- convert_dt_q(data[[v]]) + for (v in grep("%ty", ff)) data[[v]] <- convert_dt_y(data[[v]]) } if (convert.factors) { @@ -366,22 +375,24 @@ if (labname %in% names(label)) { if((vartype == sdouble | vartype == sfloat)) { if(!nonint.factors) { - warning(paste0("\n ",vnames[i], ":\n Factor codes of type double ", - "or float detected - no labels assigned.\n Set ", - "option nonint.factors to TRUE to assign labels ", - "anyway.\n")) + + # collect variables which need a warning + collected_warnings[["floatfact"]] <- c(collected_warnings[["floatfact"]], vnames[i]) next } } # get unique values / omit NA - varunique <- na.omit(unique(data[, i])) + varunique <- unique(as.character(na.omit(data[, i]))) #check for duplicated labels labcount <- table(names(labtable)) if(any(labcount > 1)) { - warning(paste0("\n ",vnames[i], ":\n Duplicated factor levels ", - "detected - generating unique labels.\n")) + + # collect variables which need a warning + collected_warnings[["dublifact"]] <- c(collected_warnings[["dublifact"]], vnames[i]) + labdups <- names(labtable) %in% names(labcount[labcount > 1]) + # generate unique labels from assigned label and code number names(labtable)[labdups] <- paste0(names(labtable)[labdups], "_(", labtable[labdups], ")") @@ -393,21 +404,22 @@ labels=names(labtable)) # else generate labels from codes } else if (generate.factors) { - names(varunique) <- as.character(varunique) + + names(varunique) <- varunique gen.lab <- sort(c(varunique[!varunique %in% labtable], labtable)) data[, i] <- factor(data[, i], levels=gen.lab, labels=names(gen.lab)) - + # add generated labels to label.table gen.lab.name <- paste0("gen_",vnames[i]) - attr(data, "label.table")[[gen.lab.name]] <- gen.lab + attr(data, "label.table")[[gen.lab.name]] <- gen.lab attr(data, "val.labels")[i] <- gen.lab.name } else { - warning(paste0("\n ",vnames[i], ":\n Missing factor labels - no ", - "labels assigned.\n Set option generate.factors=T to ", - "generate labels.")) + # collect variables which need a warning + collected_warnings[["misslab"]] <- c(collected_warnings[["mislab"]], + vnames[i]) } } } @@ -418,5 +430,46 @@ data[[1]] <- NULL } + ## issue warnings + #dublifact + if(length(collected_warnings[["dublifact"]]) > 0) { + dublifactvars <- paste(collected_warnings[["dublifact"]], collapse = ", ") + + warning(paste0("\n Duplicated factor levels for variables\n\n", + paste(strwrap(dublifactvars, + width = 0.6 * getOption("width"), + prefix = " "), + collapse = "\n"), + "\n\n Unique labels for these variables have been generated.\n")) + } + + # floatfact + if(length(collected_warnings[["floatfact"]]) > 0) { + + floatfactvars <- paste(collected_warnings[["floatfact"]], collapse = ", ") + + warning(paste0("\n Factor codes of type double or float detected in variables\n\n", + paste(strwrap(floatfactvars, + width = 0.6 * getOption("width"), + prefix = " "), + collapse = "\n"), + "\n\n No labels have been assigned.", + "\n Set option 'nonint.factors = TRUE' to assign labels anyway.\n")) + } + # misslab + if(length(collected_warnings[["misslab"]]) > 0) { + + misslabvars <- paste(collected_warnings[["misslab"]], collapse = ", ") + + warning(paste0("\n Missing factor labels for variables\n\n", + paste(strwrap(misslabvars, + width = 0.6 * getOption("width"), + prefix = " "), + collapse = "\n"), + "\n\n No labels have beend assigned.", + "\n Set option 'generate.factors=TRUE' to generate labels.")) + } + + # return data.frame return(data) } diff -Nru r-cran-readstata13-0.9.2/R/save.R r-cran-readstata13-0.10.0/R/save.R --- r-cran-readstata13-0.9.2/R/save.R 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/R/save.R 2021-05-25 10:01:49.000000000 +0000 @@ -1,5 +1,5 @@ # -# Copyright (C) 2014-2017 Jan Marvin Garbuszus and Sebastian Jeworutzki +# Copyright (C) 2014-2021 Jan Marvin Garbuszus and Sebastian Jeworutzki # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -33,15 +33,18 @@ #' to Stata date time format. Code from \code{foreign::write.dta} #' @param convert.underscore \emph{logical.} If \code{TRUE}, all non numerics or #' non alphabet characters will be converted to underscores. -#' @param tz \emph{character.} The name of the timezone convert.dates will use. +#' @param tz \emph{character.} time zone specification to be used for +#' POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current +#' time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated). #' @param add.rownames \emph{logical.} If \code{TRUE}, a new variable rownames #' will be added to the dta-file. #' @param compress \emph{logical.} If \code{TRUE}, the resulting dta-file will #' use all of Statas numeric-vartypes. #' @param version \emph{numeric.} Stata format for the resulting dta-file either -#' Stata version number (6 - 15) or the internal Stata dta-format (e.g. 117 for Stata 13). -#' Experimental support for large datasets: Use version="15mp" to save the dataset -#' in the new Stata 15/MP file format. This feature is not thoroughly tested yet. +#' Stata version number (6 - 16) or the internal Stata dta-format (e.g. 117 for +#' Stata 13). Experimental support for large datasets: Use version="15mp" to +#' save the dataset in the new Stata 15/16 MP file format. This feature is not +#' thoroughly tested yet. #' @return The function writes a dta-file to disk. The following features of the #' dta file format are supported: #' \describe{ @@ -60,11 +63,15 @@ #' \code{memisc} for dta files from Stata versions < 13 and \code{read_dta} in #' package \code{haven} for Stata version >= 13. #' @references Stata Corp (2014): Description of .dta file format -#' \url{http://www.stata.com/help.cgi?dta} +#' \url{https://www.stata.com/help.cgi?dta} +#' @examples +#' \dontrun{ +#' library(readstata13) +#' save.dta13(cars, file="cars.dta") +#' } #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13 -#' @importFrom utils localeToCharset #' @export save.dta13 <- function(data, file, data.label=NULL, time.stamp=TRUE, convert.factors=TRUE, convert.dates=TRUE, tz="GMT", @@ -78,9 +85,9 @@ stop("Path is invalid. Possibly a non-existing directory.") # Allow writing version as Stata version not Stata format - if (version=="15mp") + if (version=="15mp" | version=="16mp") version <- 119 - if (version==15L) + if (version==15L | version==16L) version <- 118 if (version==14L) version <- 118 @@ -98,7 +105,7 @@ version <- 108 if (version == 119) - message("Support for Stata 15/MP (119) format is experimental and not thoroughly tested.") + message("Support for Stata 15/16 MP (119) format is experimental and not thoroughly tested.") if (version<102 | version == 109 | version == 116 | version>119) stop("Version mismatch abort execution. No Data was saved.") @@ -127,13 +134,15 @@ if(!is.data.frame(data)) { stop("Object is not of class data.frame.") } + + is_utf8 <- l10n_info()[["UTF-8"]] # Is recoding necessary? if (version<=117) { # Reencoding is always needed doRecode <- TRUE toEncoding <- "CP1252" - } else if (toupper(localeToCharset()[1])!="UTF-8") { + } else if (!is_utf8) { # If R runs in a non UTF-8 locale and Stata > 13 doRecode <- TRUE toEncoding <- "UTF-8" @@ -172,6 +181,17 @@ data[[v]] <- as.integer(data[[v]]) vartypen <- vtyp <- sapply(data, class) + # Identify POSIXt + posix_datetime <- which(sapply(data, + function(x) inherits(x, "POSIXt"))) + vartypen[posix_datetime] <- vtyp[posix_datetime] <- "POSIXt" + + # Change origin to 1960-01-01 + # times: seconds from 1970-01-01 + 10 years (new origin 1960-01-01) * 1000 = miliseconds + # go back 1h + for (v in names(vartypen[vartypen == "POSIXt"])) + data[[v]] <- (as.double(data[[v]]) + 315622800 - 60*60)*1000 + if (convert.factors){ if (version < 106) { @@ -208,7 +228,7 @@ } attr(data, "label.table") <- rev(label.table) if (doRecode) { - valLabel <- save.encoding(valLabel, toEncoding) + valLabel <- sapply(valLabel, save.encoding, toEncoding) } attr(data, "vallabels") <- valLabel } else { @@ -224,13 +244,6 @@ data[[v]] <- as.vector( julian(data[[v]],as.Date("1960-1-1", tz = "GMT")) ) - dates <- which( - sapply(data, function(x) inherits(x,"POSIXt")) - ) - for (v in dates) - data[[v]] <- as.vector( - round(julian(data[[v]], ISOdate(1960, 1, 1, tz = tz))) - ) } # is.numeric is TRUE for integers @@ -254,10 +267,10 @@ # check if numerics can be stored as integers numToCompress <- sapply(data[ff], saveToExport) - + if (any(numToCompress)) { - saveToConvert <- names(ff[numToCompress]) - # replace numerics as intergers + saveToConvert <- names(data[ff])[numToCompress] + # replace numerics as integers data[saveToConvert] <- sapply(data[saveToConvert], as.integer) # recheck after update @@ -301,6 +314,7 @@ str.length <- sapply(data[vartypen == "character"], FUN=maxchar) str.length[str.length > sstr] <- sstrl + # vartypen for character for (v in names(vartypen[vartypen == "character"])) { # str.length[str.length > sstr] <- sstrl # no loop necessary! @@ -325,14 +339,21 @@ varnames <- names(data) lenvarnames <- sapply(varnames, nchar) - if (any (lenvarnames > 32) & version >= 117) { - message ("Varname to long. Resizing. Max size is 32.") - names(data) <- sapply(varnames, strtrim, width = 32) + maxlen <- 32 + if (version <= 108) + maxlen <- 8 + if (version >= 118) + maxlen <- 128 + + if (any (lenvarnames > maxlen)) { + message ("Varname to long. Resizing. Max size is ", maxlen, ".") + names(data) <- sapply(varnames, strtrim, width = maxlen) } # Stata format "%9,0g" means european format formats <- vartypen formats[vtyp == "Date"] <- "%td" + formats[vtyp == "POSIXt"] <- "%tc" formats[formats == sdouble] <- "%9.0g" formats[formats == sfloat] <- "%9.0g" formats[formats == slong] <- "%9.0g" @@ -340,6 +361,7 @@ formats[formats == sbyte] <- "%9.0g" formats[vartypen >= 0 & vartypen <= sstr] <- paste0("%", formats[vartypen >= 0 & vartypen <= sstr], "s") + formats[formats == sstrl] <- "%9s" attr(data, "formats") <- formats @@ -382,7 +404,12 @@ # attached. In this case the last variable label has a non existing variable # label which will crash our Rcpp code. Since varlabels do not respect the # ordering inside the data frame, we simply drop them. + varlabels <- attr(data, "var.labels") + + if (doRecode) { + attr(data, "var.labels") <- save.encoding(varlabels, toEncoding) + } if (!is.null(varlabels) & (length(varlabels)!=ncol(data))) { attr(data, "var.labels") <- NULL warning("Number of variable labels does not match number of variables. diff -Nru r-cran-readstata13-0.9.2/R/tools.R r-cran-readstata13-0.10.0/R/tools.R --- r-cran-readstata13-0.9.2/R/tools.R 2018-05-26 06:28:32.000000000 +0000 +++ r-cran-readstata13-0.10.0/R/tools.R 2021-05-25 09:58:32.000000000 +0000 @@ -1,5 +1,5 @@ # -# Copyright (C) 2014-2015 Jan Marvin Garbuszus and Sebastian Jeworutzki +# Copyright (C) 2014-2021 Jan Marvin Garbuszus and Sebastian Jeworutzki # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -28,9 +28,16 @@ } save.encoding <- function(x, encoding) { - iconv(x, - to=encoding, - sub="byte") + sapply(x, function(s) + ifelse(Encoding(s) == "unknown", + iconv(s, + to=encoding, + sub="byte"), + iconv(s, from=Encoding(s), + to=encoding, + sub="byte") + ) + ) } # Function to check if directory exists @@ -284,12 +291,30 @@ #' for all variables. #' @param lang \emph{character.} Label language. Default language defined by #' \code{\link{get.lang}} is used if NA -#' @param value \emph{character vector.} Vector of variable names. +#' @param value \emph{character vector.} Character vector of size ncol(data) with variable names. #' @return Returns an named vector of variable labels #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @aliases varlabel #' @aliases 'varlabel<-' +#' @examples +#' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"), +#' convert.factors=FALSE) +#' +#' # display variable labels +#' varlabel(dat) +#' +#' # display german variable labels +#' varlabel(dat, lang="de") +#' +#' # display german variable label for brand +#' varlabel(dat, var.name = "brand", lang="de") +#' +#' # define new variable labels +#' varlabel(dat) <- letters[1:ncol(dat)] +#' +#' # display new variable labels +#' varlabel(dat) NULL #' @rdname varlabel @@ -316,11 +341,11 @@ #' @rdname varlabel #' @export 'varlabel<-' <- function(dat, value) { - nlabs <- length(attr(dat, "var.labels")) + nlabs <- ncol(dat) if (length(value)==nlabs) { - attr(x, "var.labels") <- value + attr(dat, "var.labels") <- value } else { - warning(paste("Vector of new labels must have",nlabs,"entries.")) + warning(paste("Vector of new labels must have", nlabs, "entries.")) } dat } @@ -448,14 +473,16 @@ } } -#' Check if numeric vector can be expressed as interger vector +#' Check if numeric vector can be expressed as integer vector #' #' Compression can reduce numeric vectors as integers if the vector does only #' contain integer type data. #' #' @param x vector of data frame saveToExport <- function(x) { - isTRUE(all.equal(x, as.integer(x))) + ifelse(any(is.infinite(x)), FALSE, + ifelse(any(!is.na(x) & (x > .Machine$integer.max | x < -.Machine$integer.max)), FALSE, + isTRUE(all.equal(x, as.integer(x))))) } diff -Nru r-cran-readstata13-0.9.2/README.md r-cran-readstata13-0.10.0/README.md --- r-cran-readstata13-0.9.2/README.md 2018-05-26 08:43:15.000000000 +0000 +++ r-cran-readstata13-0.10.0/README.md 2021-05-25 10:49:43.000000000 +0000 @@ -1,7 +1,11 @@ # readstata13 -Package to read and write all Stata file formats (version 15 and older) into a -R data.frame. The dta file format versions 102 to 118 are supported. +[![CRAN status](http://www.r-pkg.org/badges/version/readstata13)](https://cran.r-project.org/package=readstata13) +[![Build status](https://github.com/sjewo/readstata13/workflows/R-CMD-check/badge.svg)](https://github.com/sjewo/readstata13/actions?workflow=R-CMD-check) +[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/readstata13)](https://cran.r-project.org/package=readstata13) + +Package to read and write all Stata file formats (version 17 and older) into a +R data.frame. The dta file format versions 102 to 119 are supported. The function ```read.dta``` from the foreign package imports only dta files from Stata versions <= 12. Due to the different structure and features of dta 117 @@ -14,7 +18,7 @@ ## Installation -The package is now hosted on CRAN. +The package is hosted on CRAN. ```R install.packages("readstata13") ``` @@ -27,20 +31,20 @@ ``` ## Development Version -To install the current release from github you need the plattform specific build +To install the current release from github you need the platform specific build tools. On Windows a current installation of [Rtools](https://cran.r-project.org/bin/windows/Rtools/) is necessary, while OS X users need to install -[Xcode](https://itunes.apple.com/us/app/xcode/id497799835). +[Xcode](https://apps.apple.com/us/app/xcode/id497799835). ```R # install.packages("devtools") -devtools::install_github("sjewo/readstata13", ref="0.9.2") +devtools::install_github("sjewo/readstata13", ref="0.10.0") ``` Older Versions of devtools require a username option: ```R -install_github("readstata13", username="sjewo", ref="0.9.2") +install_github("readstata13", username="sjewo", ref="0.10.0") ``` To install the current development version from github: @@ -50,84 +54,91 @@ ``` -## Current Status - -[![Build Status](https://travis-ci.org/sjewo/readstata13.svg?branch=master)](https://travis-ci.org/sjewo/readstata13) -[![CRAN Downloads](http://cranlogs.r-pkg.org/badges/readstata13)](https://cran.r-project.org/package=readstata13) - -### Changelog and Features +## Changelog and Features | Version | Changes | | ------ | ---------------------------------------------------- | + | 0.10.0 | fix for reading/writing of format 119 | + | | fix sortlist attribute for dta format 119 | + | | fix compress option. In the past, unwanted conversions to integer type could occur.| + | | fix encoding issues in variable and data labels | + | | fix build on FreeBSD | + | | new feature: improved handling of time and date formats | + | | new feature: collect warnings from read.dta13 | + | | | | 0.9.2 | Fix Build on MacOS X | | | | | 0.9.1 | Allow reading only pre-selected variables | - | 0.9.1 | Experimental support for format 119 | - | 0.9.1 | Improvements to partial reading. Idea by Kevin Jin | - | 0.9.1 | Export of binary data from dta-files | - | 0.9.1 | new function get.label.tables() to show all Stata label sets | - | 0.9.1 | Fix check for duplicate labels and in set.lang() | + | | Experimental support for format 119 | + | | Improvements to partial reading. Idea by Kevin Jin | + | | Export of binary data from dta-files | + | | new function get.label.tables() to show all Stata label sets | + | | Fix check for duplicate labels and in set.lang() | | | | 0.9.0 | Generate unique factor labels to prevent errors in factor definition | - | 0.9.0 | check interrupt for long read. Patch by Giovanni Righi | - | 0.9.0 | Updates to notes, roxygen and register | - | 0.9.0 | Fixed size of character length. Bug reported by Yiming (Paul) Li | - | 0.9.0 | Fix saving characters containing missings. Bug reported by Eivind H. Olsen | - | 0.9.0 | Adjustments to convert.underscore. Patch by luke-m-olson | - | 0.9.0 | Allow partial reading of selected rows | + | | check interrupt for long read. Patch by Giovanni Righi | + | | Updates to notes, roxygen and register | + | | Fixed size of character length. Bug reported by Yiming (Paul) Li | + | | Fix saving characters containing missings. Bug reported by Eivind H. Olsen | + | | Adjustments to convert.underscore. Patch by luke-m-olson | + | | Allow partial reading of selected rows | | | | 0.8.5 | Fix errors on big-endians systems | | | | 0.8.4 | Fix valgrind errors. converting from dta.write to writestr | - | 0.8.4 | Fix for empty data label | - | 0.8.4 | Make replace.strl default | + | | Fix for empty data label | + | | Make replace.strl default | | | | 0.8.3 | Restrict length of varnames to 32 chars for compatibility with Stata 14 | - | 0.8.3 | Add many function tests | - | 0.8.3 | Avoid converting of double to floats while writing compressed files | + | | Add many function tests | + | | Avoid converting of double to floats while writing compressed files | | | | 0.8.2 | Save NA values in character vector as empty string | - | 0.8.2 | Convert.underscore=T will convert all non-literal characters to underscores | - | 0.8.2 | Fix saving of Dates | - | 0.8.2 | Save with convert.factors by default | - | 0.8.2 | Test for NaN and inf values while writing missing values and replace with NA | - | 0.8.2 | Remove message about saving factors | + | | Convert.underscore=T will convert all non-literal characters to underscores | + | | Fix saving of Dates | + | | Save with convert.factors by default | + | | Test for NaN and inf values while writing missing values and replace with NA | + | | Remove message about saving factors | | | | 0.8.1 | Convert non-integer variables to factors (```nonint.factors=T```) | - | 0.8.1 | Handle large datasets | - | 0.8.1 | Working with strL variables is now a lot faster | + | | Handle large datasets | + | | Working with strL variables is now a lot faster | | | | | <0.8.1 | Reading data files from disk or url and create a data.frame | - | <0.8.1 | Saving dta files to disk - most features of the dta file format are supported | - | <0.8.1 | Assign variable names | - | <0.8.1 | Read the new strL strings and save them as attribute | - | <0.8.1 | Convert stata label to factors and save them as attribute | - | <0.8.1 | Read some meta data (timestamp, dataset label, formats,...) | - | <0.8.1 | Convert strings to system encoding | - | <0.8.1 | Handle different NA values | - | <0.8.1 | Handle multiple label languages | - | <0.8.1 | Convert dates | - | <0.8.1 | Reading business calendar files | - -### Test -Since our attributes differ from foreign::read.dta all.equal and identical -report false. If you check the values, everything is identical. + | | Saving dta files to disk - most features of the dta file format are supported | + | | Assign variable names | + | | Read the new strL strings and save them as attribute | + | | Convert stata label to factors and save them as attribute | + | | Read some meta data (timestamp, dataset label, formats,...) | + | | Convert strings to system encoding | + | | Handle different NA values | + | | Handle multiple label languages | + | | Convert dates | + | | Reading business calendar files | + +## readstata13 and foreign + +Most attributes of the resulting data.frame are largely similar to the data.frames produced by `foreign`. +Since newer Stata files require some additional attributes, the results of `all.equal()` and `identical()` will be `FALSE` for data.frames read by `foreign::read.dta` and `read.dta13()`. +Otherwise, the data.frames produced by both functions are identical. ```R -library("foreign") +library(foreign) +library(readstata13) + +# with factors r12 <- read.dta("http://www.stata-press.com/data/r12/auto.dta") r13 <- read.dta13("http://www.stata-press.com/data/r13/auto.dta") -Map(identical,r12,r13) - -att <- names(attributes(r12)) -for (i in seq(att)) - cat(att[i],":", all.equal(attr(r12,att[i]),attr(r13,att[i])),"\n") +all.equal(r12, r13, check.attributes = FALSE) -r12 <- read.dta("http://www.stata-press.com/data/r12/auto.dta",convert.factors=F) -r13 <- read.dta13("http://www.stata-press.com/data/r13/auto.dta",convert.factors=F) +# without factors +r12 <- read.dta("http://www.stata-press.com/data/r12/auto.dta", + convert.factors = FALSE) +r13 <- read.dta13("http://www.stata-press.com/data/r13/auto.dta", + convert.factors = FALSE) -Map(identical,r12,r13) +all.equal(r12, r13, check.attributes = FALSE) ``` ## Authors diff -Nru r-cran-readstata13-0.9.2/src/read_data.cpp r-cran-readstata13-0.10.0/src/read_data.cpp --- r-cran-readstata13-0.9.2/src/read_data.cpp 2018-05-26 10:41:40.000000000 +0000 +++ r-cran-readstata13-0.10.0/src/read_data.cpp 2021-05-25 09:58:32.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2017 Jan Marvin Garbuszus and Sebastian Jeworutzki + * Copyright (C) 2014-2018 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff -Nru r-cran-readstata13-0.9.2/src/read_dta.cpp r-cran-readstata13-0.10.0/src/read_dta.cpp --- r-cran-readstata13-0.9.2/src/read_dta.cpp 2018-05-26 10:41:40.000000000 +0000 +++ r-cran-readstata13-0.10.0/src/read_dta.cpp 2021-05-25 09:58:32.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2017 Jan Marvin Garbuszus and Sebastian Jeworutzki + * Copyright (C) 2014-2019 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -268,8 +268,13 @@ IntegerVector sortlist(big_k); for (uint64_t i=0; i