fork download
  1. help(file)
Success #stdin #stdout 0.57s 25624KB
stdin
Standard input is empty
stdout
connections                package:base                R Documentation

_F_u_n_c_t_i_o_n_s _t_o _M_a_n_i_p_u_l_a_t_e _C_o_n_n_e_c_t_i_o_n_s

_D_e_s_c_r_i_p_t_i_o_n:

     Functions to create, open and close connections.

_U_s_a_g_e:

     file(description = "", open = "", blocking = TRUE,
          encoding = getOption("encoding"), raw = FALSE)
     
     url(description, open = "", blocking = TRUE,
         encoding = getOption("encoding"))
     
     gzfile(description, open = "", encoding = getOption("encoding"),
            compression = 6)
     
     bzfile(description, open = "", encoding = getOption("encoding"),
            compression = 9)
     
     xzfile(description, open = "", encoding = getOption("encoding"),
            compression = 6)
     
     unz(description, filename, open = "",
         encoding = getOption("encoding"))
     
     pipe(description, open = "", encoding = getOption("encoding"))
     
     fifo(description, open = "", blocking = FALSE,
          encoding = getOption("encoding"))
     
     socketConnection(host = "localhost", port, server = FALSE,
                      blocking = FALSE, open = "a+",
                      encoding = getOption("encoding"))
     
     open(con, ...)
     ## S3 method for class 'connection':
     open(con, open = "r", blocking = TRUE, ...)
     
     close(con, ...)
     ## S3 method for class 'connection':
     close(con, type = "rw", ...)
     
     flush(con)
     
     isOpen(con, rw = "")
     isIncomplete(con)
     
_A_r_g_u_m_e_n_t_s:

description: character string. A description of the connection: see
          ‘Details’.

    open: character.  A description of how to open the connection (if
          it should be opened initially).  See section ‘Modes’ for
          possible values.

blocking: logical.  See the ‘Blocking’ section.

encoding: The name of the encoding to be used.  See the ‘Encoding’
          section.

     raw: logical.  If true, a ‘raw’ interface is used which will be
          more suitable for arguments which are not regular files, e.g.
          character devices.  This suppresses the check for a
          compressed file when opening for text-mode reading, and
          asserts that the ‘file’ may not be seekable.

compression: integer in 0-9.  The amount of compression to be applied
          when writing, from none to maximal available.  For ‘xzfile’
          can also be negative: see the ‘Compression’ section.

filename: a filename within a zip file.

    host: character.  Host name for port.

    port: integer.  The TCP port number.

  server: logical.  Should the socket be a client or a server?

     con: a connection.

    type: character. Currently ignored.

      rw: character.  Empty or ‘"read"’ or ‘"write"’, partial matches
          allowed.

     ...: arguments passed to or from other methods.

_D_e_t_a_i_l_s:

     The first nine functions create connections.  By default the
     connection is not opened (except for ‘socketConnection’), but may
     be opened by setting a non-empty value of argument ‘open’.

     For ‘file’ the description is a path to the file to be opened or a
     complete URL (when it is the same as calling ‘url’), or ‘""’ (the
     default) or ‘"clipboard"’ (see the ‘Clipboard’ section).  Use
     ‘"stdin"’ to refer to the C-level ‘standard input’ of the process
     (which need not be connected to anything in a console or embedded
     version of R), provided the C99 function ‘fdopen’ is supported on
     the platform.  (See also ‘stdin()’ for the subtly different
     R-level concept of ‘stdin’.)

     For ‘url’ the description is a complete URL, including scheme
     (such as ‘http://’, ‘ftp://’ or ‘file://’).  Proxies can be
     specified for HTTP and FTP ‘url’ connections: see ‘download.file’.

     For ‘gzfile’ the description is the path to a file compressed by
     ‘gzip’: it can also open for reading uncompressed files and (as
     from R 2.10.0) those compressed by ‘bzip2’, ‘xz’ or ‘lzma’.

     For ‘bzfile’ the description is the path to a file compressed by
     ‘bzip2’.

     For ‘xzfile’ the description is the path to a file compressed by
     ‘xz’ (<URL: http://e...content-available-to-author-only...a.org/wiki/Xz>) or (for reading
     only) ‘lzma’ (<URL: http://e...content-available-to-author-only...a.org/wiki/LZMA>).

     ‘unz’ reads (only) single files within zip files, in binary mode.
     The description is the full path to the zip file, with ‘.zip’
     extension if required.

     For ‘pipe’ the description is the command line to be piped to or
     from.

     For ‘fifo’ the description is the path of the fifo.  (Windows does
     not have fifos, so attempts to use this function there are an
     error.)

     All platforms support ‘file’, ‘gzfile’, ‘bzfile’, ‘xzfile’ ‘unz’
     and ‘url("file://")’ connections.  The other types may be
     partially implemented or not implemented at all.  (They do work on
     most Unix platforms, and all but ‘fifo’ on Windows.)

     The intention is that ‘file’ and ‘gzfile’ can be used generally
     for text input (from files and URLs) and binary input
     respectively.

     ‘open’, ‘close’ and ‘seek’ are generic functions: the following
     applies to the methods relevant to connections.

     ‘open’ opens a connection.  In general functions using connections
     will open them if they are not open, but then close them again, so
     to leave a connection open call ‘open’ explicitly.

     ‘close’ closes and destroys a connection.  This will happen
     automatically in due course (with a warning) if there is no longer
     an R object referring to the connection.

     A maximum of 128 connections can be allocated (not necessarily
     open) at any one time.  Three of these are pre-allocated (see
     ‘stdout’).  The OS will impose limits on the numbers of
     connections of various types, but these are usually larger than
     125.

     ‘flush’ flushes the output stream of a connection open for
     write/append (where implemented).

     If for a ‘file’ or ‘fifo’ connection the description is ‘""’, the
     file/fifo is immediately opened (in ‘"w+"’ mode unless ‘open =
     "w+b"’ is specified) and unlinked from the file system.  This
     provides a temporary file/fifo to write to and then read from.

_V_a_l_u_e:

     ‘file’, ‘pipe’, ‘fifo’, ‘url’, ‘gzfile’, ‘bzfile’, ‘xzfile’, ‘unz’
     and ‘socketConnection’ return a connection object which inherits
     from class ‘"connection"’ and has a first more specific class.

     ‘isOpen’ returns a logical value, whether the connection is
     currently open.

     ‘isIncomplete’ returns a logical value, whether last read attempt
     was blocked, or for an output text connection whether there is
     unflushed output.

_U_R_L_s:

     A note on ‘file://’ URLs.  The most general form (from RFC1738) is
     ‘file://host/path/to/file’, but R only accepts the form with an
     empty ‘host’ field referring to the local machine.  This is then
     ‘file:///path/to/file’, where ‘path/to/file’ is relative to ‘/’.
     So although the third slash is strictly part of the specification
     not part of the path, this can be regarded as a way to specify the
     file ‘/path/to/file’.  It is not possible to specify a relative
     path using a file URL.

     No attempt is made to decode an encoded URL: call ‘URLdecode’ if
     necessary.

     Note that ‘https://’ connections are not supported.

_M_o_d_e_s:

     Possible values for the argument ‘open’ are

     ‘"r"’ or ‘"rt"’ Open for reading in text mode.

     ‘"w"’ or ‘"wt"’ Open for writing in text mode.

     ‘"a"’ or ‘"at"’ Open for appending in text mode.

     ‘"rb"’ Open for reading in binary mode.

     ‘"wb"’ Open for writing in binary mode.

     ‘"ab"’ Open for appending in binary mode.

     ‘"r+"’, ‘"r+b"’ Open for reading and writing.

     ‘"w+"’, ‘"w+b"’ Open for reading and writing, truncating file
          initially.

     ‘"a+"’, ‘"a+b"’ Open for reading and appending.

     Not all modes are applicable to all connections: for example URLs
     can only be opened for reading.  Only file and socket connections
     can be opened for both reading and writing.

     If a file or fifo is created on a Unix-alike, its permissions will
     be the maximal allowed by the current setting of ‘umask’ (see
     ‘Sys.umask’).

     For many connections there is little or no difference between text
     and binary modes. For file-like connections on Windows,
     translation of line endings (between LF and CRLF) is done in text
     mode only (but text read operations on connections such as
     ‘readLines’, ‘scan’ and ‘source’ work for any form of line
     ending).  Various R operations are possible in only one of the
     modes: for example ‘pushBack’ is text-oriented and is only allowed
     on connections open for reading in text mode, and binary
     operations such as ‘readBin’, ‘load’ and ‘save’ operations can
     only be done on binary-mode connections.

     The mode of a connection is determined when actually opened, which
     is deferred if ‘open = ""’ is given (the default for all but
     socket connections).  An explicit call to ‘open’ can specify the
     mode, but otherwise the mode will be ‘"r"’.  (‘gzfile’, ‘bzfile’
     and ‘xzfile’ connections are exceptions, as the compressed file
     always has to be opened in binary mode and no conversion of
     line-endings is done even on Windows, so the default mode is
     interpreted as ‘"rb"’.)  Most operations that need write access or
     text-only or binary-only mode will override the default mode of a
     non-yet-open connection.

_C_o_m_p_r_e_s_s_i_o_n:

     R has for a long time supported ‘gzip’ and ‘bzip2’ compression,
     and support for ‘xz’ compression (and read-only support for its
     precursor ‘lzma’ compression) was added in R 2.10.0.

     For reading, the type of compression (if any) can be determined
     from the first few bytes of the file, and this is exploited as
     from R 2.10.0.  Thus for ‘file(raw = FALSE)’ connections, if
     ‘open’ is ‘""’, ‘"r"’ or ‘"rt"’ the connection can read any of the
     compressed file types as well as uncompressed files.  (Using
     ‘"rb"’ will allow compressed files to be read byte-by-byte.)
     Similarly, ‘gzfile’ connections can read any of the forms of
     compression and uncompressed files in any read mode.

     (The type of compression is determined when the connection is
     created if ‘open’ is unspecified and a file of that name exists.
     If the intention is to open the connection to write a file with a
     _different_ form of compression under that name, specify ‘open =
     "w"’ when the connection is created or ‘unlink’ the file before
     creating the connection.)

     For write-mode connections, ‘compress’ specifies now hard the
     compressor works to minimize the file size, and higher values need
     more CPU time and more working memory (up to ca 800Mb for
     ‘xzfile(compress = 9)’).  For ‘xzfile’ negative values of
     ‘compress’ correspond to adding the ‘xz’ argument ‘-e’: this takes
     more time (double?) to compress but may achieve (slightly) better
     compression.  The default (‘6’) has good compression and modest
     (100Mb memory usage): but if you are using ‘xz’ compression you
     are probably looking for high compression.

     Choosing the type of compression involves tradeoffs: ‘gzip’,
     ‘bzip2’ and ‘xz’ are successively less widely supported, need more
     resources for both compression and decompression, and achieve more
     compression (although individual files may buck the general
     trend).  Typical experience is that ‘bzip2’ compression is 15%
     better on text files than ‘gzip’ compression, and ‘xz’ with
     maximal compression 30% better.  The experience with R ‘save’
     files is similar, but on some large ‘.rda’ files ‘xz’ compression
     is much better than the other two.  With current computers
     decompression times even with ‘compress = 9’ are typically modest
     and reading compressed files is usually faster than uncompressed
     ones because of the reduction in disc activity.

_E_n_c_o_d_i_n_g:

     The encoding of the input/output stream of a connection can be
     specified by name in the same way as it would be given to ‘iconv’:
     see that help page for how to find out what encoding names are
     recognized on your platform.  Additionally, ‘""’ and
     ‘"native.enc"’ both mean the ‘native’ encoding, that is the
     internal encoding of the current locale and hence no translation
     is done.

     Re-encoding only works for connections in text mode.

     The encoding ‘"UCS-2LE"’ is treated specially, as it is the
     appropriate value for Windows ‘Unicode’ text files.  If the first
     two bytes are the Byte Order Mark ‘0xFFFE’ then these are removed
     as most implementations of ‘iconv’ do not accept BOMs.  Note that
     some implementations will handle BOMs using encoding ‘"UCS-2"’ but
     many will not.

     Requesting a conversion that is not supported is an error,
     reported when the connection is opened.  Exactly what happens when
     the requested translation cannot be done is in general
     undocumented.  On output the result is likely to be that up to the
     error, with a warning.  On input, it will most likely be all or
     some of the input up to the error.

_B_l_o_c_k_i_n_g:

     Whether or not the connection blocks can be specified for file,
     url (default yes) fifo and socket connections (default not).

     In blocking mode, functions using the connection do not return to
     the R evaluator until the read/write is complete.  In non-blocking
     mode, operations return as soon as possible, so on input they will
     return with whatever input is available (possibly none) and for
     output they will return whether or not the write succeeded.

     The function ‘readLines’ behaves differently in respect of
     incomplete last lines in the two modes: see its help page.

     Even when a connection is in blocking mode, attempts are made to
     ensure that it does not block the event loop and hence the
     operation of GUI parts of R.  These do not always succeed, and the
     whole R process will be blocked during a DNS lookup on Unix, for
     example.

     Most blocking operations on HTTP/FTP URLs and on sockets are
     subject to the timeout set by ‘options("timeout")’.  Note that
     this is a timeout for no response, not for the whole operation.
     The timeout is set at the time the connection is opened (more
     precisely, when the last connection of that type - ‘http:’, ‘ftp:’
     or socket - was opened).

_F_i_f_o_s:

     Fifos default to non-blocking.  That follows S version 4 and is
     probably most natural, but it does have some implications.  In
     particular, opening a non-blocking fifo connection for writing
     (only) will fail unless some other process is reading on the fifo.

     Opening a fifo for both reading and writing (in any mode: one can
     only append to fifos) connects both sides of the fifo to the R
     process, and provides an similar facility to ‘file()’.

_C_l_i_p_b_o_a_r_d:

     ‘file’ can be used with ‘description = "clipboard"’ in mode ‘"r"’
     only.  This reads the X11 primary selection (see <URL:
     http://s...content-available-to-author-only...p.org/clipboards-spec/clipboards-latest.txt>),
     which can also be specified as ‘"X11_primary"’ and the secondary
     selection as ‘"X11_secondary"’.  On most systems the clipboard
     selection (that used by ‘Copy’ from an ‘Edit’ menu) can be
     specified as ‘"X11_clipboard"’.

     When a clipboard is opened for reading, the contents are
     immediately copied to internal storage in the connection.

     Unix users wishing to _write_ to one of the selections may be able
     to do so via ‘xclip’ (<URL:
     http://s...content-available-to-author-only...e.net/projects/xclip/>), for example by
     ‘pipe("xclip -i", "w")’ for the primary selection.

     Mac OS X users can use ‘pipe("pbpaste")’ and ‘pipe("pbcopy", "w")’
     to read from and write to that system's clipboard.

_N_o_t_e:

     R's connections are modelled on those in S version 4 (see
     Chambers, 1998).  However R goes well beyond the S model, for
     example in output text connections and URL, compressed and socket
     connections.

     The default open mode in R is ‘"r"’ except for socket connections.
     This differs from S, where it is the equivalent of ‘"r+"’, known
     as ‘"*"’.

     On (rare) platforms where ‘vsnprintf’ does not return the needed
     length of output there is a 100,000 character output limit on the
     length of line for ‘fifo’, ‘gzfile’, ‘bzfile’ and ‘xzfile’
     connections: longer lines will be truncated with a warning.

_R_e_f_e_r_e_n_c_e_s:

     Chambers, J. M. (1998) _Programming with Data.  A Guide to the S
     Language._ Springer.

_S_e_e _A_l_s_o:

     ‘textConnection’, ‘seek’, ‘showConnections’, ‘pushBack’.

     Functions making direct use of connections are ‘readLines’,
     ‘readBin’, ‘readChar’, ‘writeLines’, ‘writeBin’, ‘writeChar’,
     ‘cat’, ‘sink’, ‘scan’, ‘parse’, ‘read.dcf’, ‘load’, ‘save’, ‘dput’
     and ‘dump’.

     ‘capabilities’ to see if HTTP/FTP ‘url’, ‘fifo’ and
     ‘socketConnection’ are supported by this build of R.

     ‘gzcon’ to wrap ‘gzip’ (de)compression around a connection.

     ‘memCompress’ for more ways to (de)compress and references on data
     compression.

_E_x_a_m_p_l_e_s:

     zz <- file("ex.data", "w")  # open an output file connection
     cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = zz, sep = "\n")
     cat("One more line\n", file = zz)
     close(zz)
     readLines("ex.data")
     unlink("ex.data")
     
     zz <- gzfile("ex.gz", "w")  # compressed file
     cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = zz, sep = "\n")
     close(zz)
     readLines(zz <- gzfile("ex.gz"))
     close(zz)
     unlink("ex.gz")
     
     zz <- bzfile("ex.bz2", "w")  # bzip2-ed file
     cat("TITLE extra line", "2 3 5 7", "", "11 13 17", file = zz, sep = "\n")
     close(zz)
     print(readLines(zz <- bzfile("ex.bz2")))
     close(zz)
     unlink("ex.bz2")
     
     ## An example of a file open for reading and writing
     Tfile <- file("test1", "w+")
     c(isOpen(Tfile, "r"), isOpen(Tfile, "w")) # both TRUE
     cat("abc\ndef\n", file=Tfile)
     readLines(Tfile)
     seek(Tfile, 0, rw="r") # reset to beginning
     readLines(Tfile)
     cat("ghi\n", file=Tfile)
     readLines(Tfile)
     close(Tfile)
     unlink("test1")
     
     ## We can do the same thing with an anonymous file.
     Tfile <- file()
     cat("abc\ndef\n", file=Tfile)
     readLines(Tfile)
     close(Tfile)
     
     ## fifo example -- may fail, e.g. on Cygwin, even with OS support for fifos
     if(capabilities("fifo")) {
       zz <- fifo("foo-fifo", "w+")
       writeLines("abc", zz)
       print(readLines(zz))
       close(zz)
       unlink("foo-fifo")
     }
     
     ## Unix examples of use of pipes
     
     # read listing of current directory
     readLines(pipe("ls -1"))
     
     # remove trailing commas. Suppose
     
     ## Not run:
     % cat data2
     450, 390, 467, 654,  30, 542, 334, 432, 421,
     357, 497, 493, 550, 549, 467, 575, 578, 342,
     446, 547, 534, 495, 979, 479
     ## End(Not run)
     
     # Then read this by
     scan(pipe("sed -e s/,$// data2_"), sep=",")
     
     
     # convert decimal point to comma in output: see also write.table
     # both R strings and (probably) the shell need \ doubled
     zz <- pipe(paste("sed s/\\\\./,/ >", "outfile"), "w")
     cat(format(round(stats::rnorm(48), 4)), fill=70, file = zz)
     close(zz)
     file.show("outfile", delete.file=TRUE)
     
     ## example for a machine running a finger daemon
     
     con <- socketConnection(port = 79, blocking = TRUE)
     writeLines(paste(system("whoami", intern=TRUE), "\r", sep=""), con)
     gsub(" *$", "", readLines(con))
     close(con)
     
     
     ## Not run:
     
     ## two R processes communicating via non-blocking sockets
     # R process 1
     con1 <- socketConnection(port = 6011, server=TRUE)
     writeLines(LETTERS, con1)
     close(con1)
     
     # R process 2
     con2 <- socketConnection(Sys.info()["nodename"], port = 6011)
     # as non-blocking, may need to loop for input
     readLines(con2)
     while(isIncomplete(con2)) {Sys.sleep(1); readLines(con2)}
     close(con2)
     
     ## examples of use of encodings
     # write a file in UTF-8
     cat(x, file = (con <- file("foo", "w", encoding="UTF-8"))); close(con)
     # read a 'Windows Unicode' file
     A <- read.table(con <- file("students", encoding="UCS-2LE")); close(con)
     ## End(Not run)