Skip to content

Commit

Permalink
use read(), not statfs(), to detect files that lie about size
Browse files Browse the repository at this point in the history
Files in some pseudo filesystems, particularly procfs and tracefs on
Linux, lie about their file size in stat(2). They claim to have 0 size,
but do return data when read() from.

Previously, less handled these files by using statfs(2) to check the
filesystem type (f_type field). However, there are 2 problems with this
approach:

- this requires compiling with the Linux "magic.h" header available, and
  if less is compiled without this header present, it will not have
  access to the magic numbers that define procfs and tracefs. Such a
  less binary will not work on these pseudo filesystems
- this approach is not resilient to the addition of new filesystems in
  the future with similar behavior but a new magic number

This patch takes a different approach; it takes advantage of the fact
that less (usually) read()s 256 bytes from the beginning of the file to
determine if the file is binary. The return value of the read() can be
compared with the filesize from stat(2) to determine if the latter is
untrustworthy.

This moves the bin_file() check to happen unconditionally (for regular
files, etc.), instead of only happening when the "--force" flag is
specified. While this does result in an extra syscall in that particular
circumstance, it will result in fewer syscalls overall for the most
common scenario of not specifying "--force".
  • Loading branch information
bertschingert authored and gwsw committed Jan 21, 2024
1 parent 1dd094e commit 1fafd96
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 45 deletions.
51 changes: 20 additions & 31 deletions ch.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@
#include <windows.h>
#endif

#if HAVE_PROCFS
#include <sys/statfs.h>
#if HAVE_LINUX_MAGIC_H
#include <linux/magic.h>
#endif
#endif

typedef POSITION BLOCKNUM;

public int ignore_eoi;
Expand Down Expand Up @@ -704,38 +697,18 @@ public void ch_flush(void)
bufnode_buf(bn)->block = -1;
}

/*
* Figure out the size of the file, if we can.
*/
ch_fsize = filesize(ch_file);

/*
* Seek to a known position: the beginning of the file.
*/
ch_fpos = 0;
ch_block = 0; /* ch_fpos / LBUFSIZE; */
ch_offset = 0; /* ch_fpos % LBUFSIZE; */

#if HAVE_PROCFS
/*
* This is a kludge to workaround a Linux kernel bug: files in
* /proc have a size of 0 according to fstat() but have readable
* data. They are sometimes, but not always, seekable.
* Force them to be non-seekable here.
*/
if (ch_fsize == 0)
if (ch_flags & CH_NOTRUSTSIZE)
{
struct statfs st;
if (fstatfs(ch_file, &st) == 0)
{
if (st.f_type == PROC_SUPER_MAGIC)
{
ch_fsize = NULL_POSITION;
ch_flags &= ~CH_CANSEEK;
}
}
ch_fsize = NULL_POSITION;
ch_flags &= ~CH_CANSEEK;
}
#endif

if (less_lseek(ch_file, (less_off_t)0, SEEK_SET) == BAD_LSEEK)
{
Expand Down Expand Up @@ -837,7 +810,7 @@ public void ch_set_eof(void)
/*
* Initialize file state for a new file.
*/
public void ch_init(int f, int flags)
public void ch_init(int f, int flags, ssize_t nread)
{
/*
* See if we already have a filestate for this file.
Expand Down Expand Up @@ -868,6 +841,22 @@ public void ch_init(int f, int flags)
}
if (thisfile->file == -1)
thisfile->file = f;

/*
* Figure out the size of the file, if we can.
*/
ch_fsize = filesize(ch_file);

/*
* This is a kludge to workaround a Linux kernel bug: files in some
* pseudo filesystems like /proc and tracefs have a size of 0 according
* to fstat() but have readable data.
*/
if (ch_fsize == 0 && nread > 0)
{
ch_flags |= CH_NOTRUSTSIZE;
}

ch_flush();
}

Expand Down
7 changes: 0 additions & 7 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,6 @@ AH_TEMPLATE([HAVE_CONST],
[Define HAVE_CONST if your compiler supports the "const" modifier.])
AH_TEMPLATE([HAVE_STAT_INO],
[Define HAVE_STAT_INO if your struct stat has st_ino and st_dev.])
AH_TEMPLATE([HAVE_PROCFS],
[Define HAVE_PROCFS if have have fstatfs with f_type and PROC_SUPER_MAGIC.])
AH_TEMPLATE([HAVE_TIME_T],
[Define HAVE_TIME_T if your system supports the "time_t" type.])
AH_TEMPLATE([HAVE_STRERROR],
Expand Down Expand Up @@ -251,11 +249,6 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <time.h>]], [[time_t t = 0;]])],[A
AC_MSG_CHECKING(for st_ino in struct stat)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <sys/types.h>
#include <sys/stat.h>]], [[struct stat s; dev_t dev = s.st_dev; ino_t ino = s.st_ino;]])],[AC_MSG_RESULT(yes); AC_DEFINE(HAVE_STAT_INO)],[AC_MSG_RESULT(no)])
AC_MSG_CHECKING(for procfs)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <sys/statfs.h>
#if HAVE_LINUX_MAGIC_H
#include <linux/magic.h>
#endif]], [[struct statfs s; s.f_type = PROC_SUPER_MAGIC; (void) fstatfs(0,&s); ]])],[AC_MSG_RESULT(yes); AC_DEFINE(HAVE_PROCFS)],[AC_MSG_RESULT(no)])

# Checks for ANSI function prototypes.
AC_MSG_CHECKING(for ANSI function prototypes)
Expand Down
5 changes: 3 additions & 2 deletions edit.c
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ public int edit_ifile(IFILE ifile)
IFILE was_curr_ifile;
char *p;
PARG parg;
ssize_t nread;

if (ifile == curr_ifile)
{
Expand Down Expand Up @@ -543,7 +544,7 @@ public int edit_ifile(IFILE ifile)
} else
{
chflags |= CH_CANSEEK;
if (!force_open && !opened(ifile) && bin_file(f))
if (bin_file(f, &nread) && !force_open && !opened(ifile))
{
/*
* Looks like a binary file.
Expand Down Expand Up @@ -607,7 +608,7 @@ public int edit_ifile(IFILE ifile)
set_altpipe(curr_ifile, altpipe);
set_open(curr_ifile); /* File has been opened */
get_pos(curr_ifile, &initial_scrpos);
ch_init(f, chflags);
ch_init(f, chflags, nread);
consecutive_nulls = 0;
check_modelines();

Expand Down
13 changes: 8 additions & 5 deletions filename.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,13 @@ public char * fcomplete(constant char *s)
/*
* Try to determine if a file is "binary".
* This is just a guess, and we need not try too hard to make it accurate.
*
* The number of bytes read is returned to the caller, because it will
* be used later to compare to st_size from stat(2) to see if the file
* is lying about its size.
*/
public int bin_file(int f)
public int bin_file(int f, ssize_t *n)
{
ssize_t n;
int bin_count = 0;
char data[256];
constant char* p;
Expand All @@ -464,10 +467,10 @@ public int bin_file(int f)
return (0);
if (less_lseek(f, (less_off_t)0, SEEK_SET) == BAD_LSEEK)
return (0);
n = read(f, data, sizeof(data));
if (n <= 0)
*n = read(f, data, sizeof(data));
if (*n <= 0)
return (0);
edata = &data[n];
edata = &data[*n];
for (p = data; p < edata; )
{
if (utf_mode && !is_utf8_well_formed(p, (int) ptr_diff(edata,p)))
Expand Down
1 change: 1 addition & 0 deletions less.h
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ typedef enum {
#define CH_POPENED 004
#define CH_HELPFILE 010
#define CH_NODATA 020 /* Special case for zero length files */
#define CH_NOTRUSTSIZE 040 /* For files that claim 0 length size falsely */

#define ch_zero() ((POSITION)0)

Expand Down

0 comments on commit 1fafd96

Please sign in to comment.