[aplusdev] map a very large text file?

John.Mizel at MorganStanley.com John.Mizel at MorganStanley.com
Mon Feb 11 13:28:51 EST 2002


Sasha,

I've attached a small C program that takes any file and prepends the 
header information to make it a vaild .m file.

It is simple to use:

  usage: a.out <fromfile> <tofile> [<c|i|f> d1 ... d9

i.e.
  gcc beamOut.c -o beamOut
  beamOut yourfile.txt  yourfile.m  # Creates a simple character vector

Thanks,
John

On Mon, 11 Feb 2002, Alexander Skomorokhov wrote:

> Brian,
> 
> > How about:
> > 
> > $mode ascii
> > 'large.m' beam sys.readmat 'large.txt'
> Thanks, but...
> 
> Do I have all content in the memory at a point:
> sys.readmat 'large.txt'
> ?
> 
> Saying very large I ment "more than RAM".
> 
> Sasha. 
> 
> 
> 

-- 
 
-------------- next part --------------
/* Takes a file an creates a memory A+ .m file by adding the A+ header and  */
/* copying the contents of the file.  The copying should be fast since it  */
/* uses memmory mapped files to transfer the data */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>	/* mmap() */
#include <fcntl.h>

#ifndef	MAP_FILE	/* 4.3+ BSD defines this & requires it to mmap files */
#define	MAP_FILE 0	/* to compile under systems other than 4.3+ BSD */
#endif

#define FILE_MODE       (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)

/* A+ stuff */
#define It 0L			/* Integer type       */
#define Ft 1L			/* Float(double) type */
#define Ct 2L			/* Character type     */

typedef long I;
typedef double F;
typedef char C;

struct aplusHeader
{
  I refCount;
  I type;
  I rank; 
  I n; 
  I dim[9];
  I items; 
};

/* Default settings */

int
main(int argc, char **argv)
{
  int fdIn, fdOut;
  int i;
  char *src, *dst;
  struct stat statbuf;
  I elementSize;
  I outFileSize=0;

  struct aplusHeader aHdr;

  if (argc < 3)
    {
      fprintf(stderr,"usage: a.out <fromfile> <tofile> [<c|i|f> d1 ... d9\n");
      exit(2);
    }

  if ( (fdIn = open(argv[1], O_RDONLY)) < 0)
    {
      fprintf(stderr,"can't open %s for reading\n", argv[1]);
      exit(2);
    }

  if ( (fdOut = open(argv[2], O_RDWR | O_CREAT | O_TRUNC, FILE_MODE)) < 0)
    {
      fprintf(stderr,"can't create %s for writing\n", argv[1]);
      exit(2);
    }

  if (fstat(fdIn, &statbuf) < 0)	/* need size of input file */
    {
      perror("fstat error");
      exit(2);
    }

  /* Create A+ header */
  /* Set A+ header defaults */
  aHdr.refCount=0;
  aHdr.rank=1;
  aHdr.type=Ct;	

  if( argc > 3 )		/* Type specified */
    if( *argv[3]=='c' )
      aHdr.type=Ct;
    else if( *argv[3]=='i' )
      aHdr.type=It;
    else if( *argv[3]=='f' )
      aHdr.type=Ft;
    else
      {
	fprintf(stderr,
		"usage: a.out <fromfile> <tofile> [<c|i|f> d1 ... d9\n");
	exit(2);
      }
 
  elementSize=aHdr.type==Ct?sizeof(C):aHdr.type==It?sizeof(I):sizeof(F);

  aHdr.n=aHdr.dim[0]=aHdr.items=statbuf.st_size/elementSize;

  if( argc > 4)			/* Dimensions specfied */
    {
      for( aHdr.n=1,aHdr.rank=0,i=4; i<argc; i++,aHdr.rank++)
	{
	  aHdr.dim[aHdr.rank]=atoi(argv[i]);
	  aHdr.n*=aHdr.dim[aHdr.rank];
	}
      aHdr.items=aHdr.dim[0];
    } 

  /* set size of output file */
  outFileSize = aHdr.n*elementSize;

  if (lseek(fdOut, outFileSize + ( sizeof(aHdr) ), SEEK_SET) == -1)
    {
      perror("lseek error");
      exit(2);
    }

  if (write(fdOut, "", 1) != 1)
    {
      perror("write error");
      exit(2);
    }

  if ( (src = mmap(0, statbuf.st_size, PROT_READ, 
		   MAP_FILE | MAP_SHARED, fdIn, 0)) == (caddr_t) -1)
    {
      perror("mmap error for input");
      exit(2);
    }

  if ( (dst = mmap(0, outFileSize+sizeof(aHdr), PROT_READ | PROT_WRITE,
		   MAP_FILE | MAP_SHARED, fdOut, 0)) == (caddr_t) -1)
    {
      perror("mmap error for output");
      exit(2);
    }

  if( outFileSize < statbuf.st_size)
    {
      fprintf(stderr,"warning %s too small... data truncated\n",argv[2]);
    }
  else if( outFileSize>statbuf.st_size)
    {
      fprintf(stderr,"warning %s too small... not enough data\n",argv[1]);
      outFileSize = statbuf.st_size;
    }
				/* Write the header */
  memcpy(dst, &aHdr, sizeof( aHdr ) );
				/* Write the data   */
  memcpy( dst + sizeof(aHdr), src, outFileSize );

  exit(0);
}









More information about the apluslist mailing list