Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check input file sanity in Reader::open #25

Merged
merged 3 commits into from
Jun 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions vlsv_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,40 @@ namespace vlsv {
else s << bytes/t << " B/s";
return s.str();
}

const std::string getErrorString(const vlsv::error::type& errorCode) {
switch (errorCode) {
case error::NONE:
return "No errors";
break;
case error::UNKNOWN:
return "Unknown or unsupported error code";
break;
case error::READ_CWD_FAIL:
return "Failed to cwd to input file dir";
break;
case error::READ_FILE_BAD:
return "Failed to open input file";
break;
case error::READ_FILE_ALREADY_OPEN:
return "vlsv::Reader already has an open input file";
break;
case error::READ_FILE_ENDIANNESS:
return "Failed to read file endianness";
break;
case error::READ_NO_FOOTER:
return "Input file broken, footer not found";
break;
case error::READ_FOOTER_OFFSET:
return "Failed to read footer position";
break;
case error::READ_FOOTER:
return "Failed to read footer";
break;
default:
return "Unknown or unsupported error code";
break;
}
}

} // namespace vlsv
18 changes: 18 additions & 0 deletions vlsv_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,22 @@ namespace vlsv {
};
}

/** Enumeration of all supported error codes.*/
namespace error {
enum type {
NONE, /**< No errors.*/
UNKNOWN, /**< Unknown or unsupported error.*/
READ_CWD_FAIL, /**< Reader failed to cwd to directory containing input file.*/
READ_FILE_BAD, /**< Reader failed to open input file.*/
READ_FILE_ALREADY_OPEN, /**< Reader failed to open file because a file is alread open.*/
READ_FILE_ENDIANNESS, /**< Reader failed to read file endianness.*/
READ_NO_FOOTER, /**< Input file has no footer.*/
READ_FOOTER_OFFSET, /**< Reader failed to read footer offset.*/
READ_FOOTER, /**< Reader failed to read footer.*/
SIZE
};
}

/** Tells whether a datatype stored in a buffer or a file is a signed or unsigned integer, or a floating point number.
* @brief Datatype description.*/
namespace datatype {
Expand Down Expand Up @@ -134,6 +150,8 @@ namespace vlsv {
};
}
}

const std::string getErrorString(const vlsv::error::type& errorCode);

template<typename T> T convertFloat(const char* const ptr);
template<typename T> T convertInteger(const char* const ptr,const bool& swapEndianness=false);
Expand Down
49 changes: 34 additions & 15 deletions vlsv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ namespace vlsv {
return true;
}

const std::string Reader::getErrorString() const {
return vlsv::getErrorString(lastErrorCode);
}

bool Reader::getFileName(std::string& openFile) const {
if (fileOpen == false) {
openFile = "";
Expand Down Expand Up @@ -149,11 +153,9 @@ namespace vlsv {
* @return If true, file was successfully opened.*/
bool Reader::open(const std::string& fname) {
bool success = true;
lastErrorCode = error::NONE;
if (fileOpen == true) {
#ifndef NDEBUG
cerr << "vlsv::Reader ERROR: File '" << fname << "' should be opened, but file '";
cerr << fileName << "' is currently open." << endl;
#endif
lastErrorCode = error::READ_FILE_ALREADY_OPEN;
return false;
}

Expand All @@ -173,7 +175,10 @@ namespace vlsv {
// then chdir back to current working directory.
// Chdir returns zero value if it succeeds
// Not done if the string is empty as chdir fials in that case.
if (fileio::chdir(pathName.c_str()) != 0) success = false;
if (fileio::chdir(pathName.c_str()) != 0) {
lastErrorCode = error::READ_CWD_FAIL;
success = false;
}
}
}

Expand All @@ -186,33 +191,47 @@ namespace vlsv {
} else {
filein.close();
success = false;
lastErrorCode = error::READ_FILE_BAD;
}

if (success == false) {
#ifndef NDEBUG
cerr << "vlsv::Reader ERROR: File '" << fnameWithoutPath << "' could not be opened!" << endl;
#endif
return success;
}

if (success == false) return success;

// Detect file endianness:
char* ptr = reinterpret_cast<char*>(&endiannessFile);
filein.read(ptr,1);
if (filein.good() == false) {
success = false;
lastErrorCode = error::READ_FILE_ENDIANNESS;
return success;
}
if (endiannessFile != endiannessReader) swapIntEndianness = true;

// Read footer offset:
uint64_t footerOffset;
char buffer[16];
filein.seekg(8);
filein.read(buffer,8);
if (filein.good() == false) {
lastErrorCode = error::READ_FOOTER_OFFSET;
success = false;
return success;
}
footerOffset = convUInt64(buffer,swapIntEndianness);

// Read footer XML tree:
filein.seekg(footerOffset);
xmlReader.read(filein);
if (filein.tellg() != footerOffset) {
lastErrorCode = error::READ_NO_FOOTER;
success = false;
}

if (success == true && xmlReader.read(filein) == false) {
lastErrorCode = error::READ_FOOTER;
success = false;
}
filein.clear();
filein.seekg(16);

return success;
}

Expand Down
2 changes: 2 additions & 0 deletions vlsv_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ namespace vlsv {
std::map<std::string,std::string>& attribsOut) const;
virtual bool getArrayInfo(const std::string& tagName,const std::list<std::pair<std::string,std::string> >& attribs,
uint64_t& arraySize,uint64_t& vectorSize,datatype::type& dataType,uint64_t& byteSize);
virtual const std::string getErrorString() const;
virtual bool getFileName(std::string& openFile) const;
virtual bool getUniqueAttributeValues(const std::string& tagName,const std::string& attribName,std::set<std::string>& output) const;
virtual bool loadArray(const std::string& tagName,const std::list<std::pair<std::string,std::string> >& attribs);
Expand All @@ -55,6 +56,7 @@ namespace vlsv {
protected:
unsigned char endiannessFile; /**< Endianness in VLSV file.*/
unsigned char endiannessReader; /**< Endianness of computer which reads the data.*/
error::type lastErrorCode; /**< Code indicating last error that has occurred, if any.*/
std::fstream filein; /**< Input file stream.*/
std::string fileName; /**< Name of the input file.*/
bool fileOpen; /**< If true, a file is currently open.*/
Expand Down
80 changes: 78 additions & 2 deletions vlsv_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/** This file is part of VLSV file format.
*
* Copyright 2011-2015 Finnish Meteorological Institute
* Copyright 2016 Arto Sandroos
* Copyright 2016-2017 Arto Sandroos
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
Expand Down Expand Up @@ -47,6 +47,7 @@ namespace vlsv {
types = NULL;
xmlWriter = NULL;
comm = MPI_COMM_NULL;
writeUsingMasterOnly = false;
}

/** Destructor for Writer. Deallocates XML writer.*/
Expand Down Expand Up @@ -372,7 +373,7 @@ namespace vlsv {
multiwriteUnits.swap(dummy);
}
multiwriteOffsets[0] = numeric_limits<unsigned int>::max();

// Broadcast vectorsize,datatype,and dataSize to all processes:
this->vectorSize = vectorSize;
MPI_Bcast(&(this->vectorSize),1,MPI_Type<uint64_t>(),masterRank,comm);
Expand Down Expand Up @@ -573,6 +574,15 @@ namespace vlsv {
return success;
}

/** Set if file i/o is done on master process only.
* @param writeUsingMasterOnly If true, only master writes data to file. Otherwise data
* is written using collective MPI.
* @return True if master is only process writing to file.*/
bool Writer::setWriteOnMasterOnly(const bool& writeUsingMasterOnly) {
this->writeUsingMasterOnly = writeUsingMasterOnly;
return this->writeUsingMasterOnly;
}

/** Write an array to output file.
* @param arrayName Name of the array. Only significant on master process.
* @param attribs XML attributes for the array. Only significant on master process.
Expand All @@ -584,6 +594,10 @@ namespace vlsv {
* @return If true, array was successfully written to the output file. Same value is returned on every process.*/
bool Writer::writeArray(const std::string& arrayName,const std::map<std::string,std::string>& attribs,const std::string& dataType,
const uint64_t& arraySize,const uint64_t& vectorSize,const uint64_t& dataSize,const char* array) {

if (writeUsingMasterOnly == true)
return writeArrayMaster(arrayName,attribs,dataType,arraySize,vectorSize,dataSize,array);

// Check that everything is OK before continuing:
bool success = true;
if (initialized == false) success = false;
Expand All @@ -603,5 +617,67 @@ namespace vlsv {
if (endMultiwrite(arrayName,attribs) == false) success = false;
return success;
}

/** Write an array to file so that file I/O is done on master only. Before writing to file all data is gathered to master.
* @param arrayName Name of the array. Only significant on master process.
* @param attribs XML attributes for the array. Only significant on master process.
* @param dataType String representation of the datatype. Only significant on master process.
* @param arraySize Number of array elements written by this process.
* @param vectorSize Size of the data vector stored in each array element. Only significant on master process.
* @param dataSize Byte size of vector element. Only significant on master process.
* @param array Pointer to data.
* @return If true, array was successfully written to the output file. Same value is returned on every process.*/
bool Writer::writeArrayMaster(const std::string& arrayName,const std::map<std::string,std::string>& attribs,const std::string& dataType,
const uint64_t& arraySize,const uint64_t& vectorSize,const uint64_t& dataSize,const char* array) {

// Check that everything is OK before continuing:
bool success = true;
if (initialized == false) success = false;
if (fileOpen == false) success = false;
if (checkSuccess(success,comm) == false) return false;

this->vectorSize = vectorSize;
this->dataSize = dataSize;
this->dataType = dataType;
this->vlsvType = getVLSVDatatype(dataType);

// Count amount of output data
myBytes = arraySize * vectorSize * dataSize;
MPI_Gather(&myBytes,1,MPI_Type<uint64_t>(),bytesPerProcess,1,MPI_Type<uint64_t>(),masterRank,comm);

// Gather data to master
uint64_t totalBytes = 0;
vector<int> byteCounts(N_processes);
vector<int> byteOffsets(N_processes);
if (myrank == masterRank) {
for (int i=0; i<N_processes; ++i) totalBytes += bytesPerProcess[i];

byteOffsets[0] = 0;
for (size_t p=0; p<byteCounts.size(); ++p) {
byteCounts[p] = bytesPerProcess[p];
if (p > 0 ) byteOffsets[p] = byteOffsets[p-1] + bytesPerProcess[p-1];
}
}
char* ptr = reinterpret_cast<char*>(this);
if (arraySize > 0) ptr = const_cast<char*>(array);
vector<char> buffer;
if (myrank == masterRank) buffer.resize(totalBytes);
MPI_Gatherv(ptr, myBytes, MPI_BYTE,
buffer.data(), byteCounts.data(), byteOffsets.data(),
MPI_BYTE, masterRank, comm);

// Write data at master
if (myrank == masterRank) {
MPI_Status status;
const double t_start = MPI_Wtime();
MPI_File_write_at(fileptr, offset, buffer.data(), totalBytes, MPI_Type<char>(), &status);
writeTime += (MPI_Wtime() - t_start);
}

// Add footer entry
if (multiwriteFooter(arrayName, attribs) == false) success = false;

return checkSuccess(success,comm);
}

} // namespace vlsv
5 changes: 5 additions & 0 deletions vlsv_writer.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/** This file is part of VLSV file format.
*
* Copyright 2011-2016 Finnish Meteorological Institute
* Copyright 2017 Arto Sandroos
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
Expand Down Expand Up @@ -64,10 +65,13 @@ namespace vlsv {
bool endMultiwrite(const std::string& tagName,const std::map<std::string,std::string>& attribs);
bool open(const std::string& fname,MPI_Comm comm,const int& masterProcessID,MPI_Info mpiInfo=MPI_INFO_NULL,bool append=false);
bool setSize(MPI_Offset newSize);
bool setWriteOnMasterOnly(const bool& writeUsingMasterOnly);
void startDryRun();
bool startMultiwrite(const std::string& datatype,const uint64_t& arraySize,const uint64_t& vectorSize,const uint64_t& dataSize);
bool writeArray(const std::string& arrayName,const std::map<std::string,std::string>& attribs,const std::string& dataType,
const uint64_t& arraySize,const uint64_t& vectorSize,const uint64_t& dataSize,const char* array);
bool writeArrayMaster(const std::string& arrayName,const std::map<std::string,std::string>& attribs,const std::string& dataType,
const uint64_t& arraySize,const uint64_t& vectorSize,const uint64_t& dataSize,const char* array);

// ***** TEMPLATE WRAPPER FUNCTIONS ***** //

Expand Down Expand Up @@ -112,6 +116,7 @@ namespace vlsv {
* This variable is used to synchronize threads in endMultiwrite function..*/
bool multiwriteInitialized; /**< If true, multiwrite array writing mode has initialized correctly.
* This variable is used to synchronize threads in startMultiwrite function.*/
bool writeUsingMasterOnly; /**< If true, only master process does file i/o.*/

std::vector<unsigned int> multiwriteOffsets; /**< Offset for each thread using VLSVWriter, used to load
* data into an MPI struct in endMultiwrite.*/
Expand Down