@@ -19,6 +19,14 @@ GNU General Public License for more details.
1919You should have received a copy of the GNU General Public License
2020along with this program. If not, see <http://www.gnu.org/licenses/>.
2121
22+ Specification of the .npy file format is at:
23+
24+ http://www.numpy.org/neps/nep-0001-npy-format.html
25+
26+ Python implementation is at:
27+
28+ https://github.com/numpy/numpy/blob/master/numpy/lib/format.py
29+
2230*/
2331
2432#include " NpyFile.h"
@@ -27,7 +35,6 @@ using namespace BinaryRecordingEngine;
2735
2836NpyFile::NpyFile (String path, const Array<NpyType>& typeList)
2937{
30-
3138 m_dim1 = 1 ;
3239 m_dim2 = 1 ;
3340
@@ -44,12 +51,10 @@ NpyFile::NpyFile(String path, const Array<NpyType>& typeList)
4451 if (!openFile (path))
4552 return ;
4653 writeHeader (typeList);
47-
4854}
4955
5056NpyFile::NpyFile (String path, NpyType type, unsigned int dim)
5157{
52-
5358 if (!openFile (path))
5459 return ;
5560
@@ -58,7 +63,6 @@ NpyFile::NpyFile(String path, NpyType type, unsigned int dim)
5863 m_dim1 = dim;
5964 m_dim2 = type.getTypeLength ();
6065 writeHeader (typeList);
61-
6266}
6367
6468bool NpyFile::openFile (String path)
@@ -67,9 +71,13 @@ bool NpyFile::openFile(String path)
6771 Result res = file.create ();
6872 if (res.failed ())
6973 {
70- std::cerr << " Error creating file " << path << " :" << res.getErrorMessage () << std::endl;
74+ std::cerr << " Error creating file " << path << " :" << res.getErrorMessage ()
75+ << std::endl;
7176 return false ;
7277 }
78+ file.deleteFile (); // overwrite, never append a new .npy file to end of an existing one
79+ // output stream buffer size defaults to 32768 bytes, but is irrelevant because
80+ // each updateHeader() call triggers a m_file->flush() to disk:
7381 m_file = file.createOutputStream ();
7482 if (!m_file)
7583 return false ;
@@ -78,69 +86,103 @@ bool NpyFile::openFile(String path)
7886 return true ;
7987}
8088
89+ String NpyFile::getShapeString ()
90+ {
91+ String shape;
92+ shape.preallocateBytes (32 );
93+ shape = " (" ;
94+ shape += String (m_recordCount) + " ," ;
95+ if (m_dim1 > 1 )
96+ {
97+ shape += " " + String (m_dim1) + " ," ;
98+ }
99+ if (m_dim2 > 1 )
100+ shape += " " + String (m_dim2);
101+ shape += " ), }" ;
102+ return shape;
103+ }
104+
81105void NpyFile::writeHeader (const Array<NpyType>& typeList)
82106{
107+ uint8 magicNum = 0x93 ;
108+ String magicStr = " NUMPY" ;
109+ uint16 ver = 0x0001 ;
110+ // magic = magic number + magic string + magic version
111+ int magicLen = sizeof (uint8) + magicStr.getNumBytesAsUTF8 () + sizeof (uint16);
112+ int nbytesAlign = 64 ; // header should use an integer multiple of this many bytes
113+
83114 bool multiValue = typeList.size () > 1 ;
84- String header = " {'descr': " ;
85- header.preallocateBytes (100 );
115+ String strHeader;
116+ strHeader.preallocateBytes (128 );
117+ strHeader = " {'descr': " ;
86118
87119 if (multiValue)
88- header += " [" ;
120+ strHeader += " [" ;
89121
90122 int nTypes = typeList.size ();
91123
92124 for (int i = 0 ; i < nTypes; i++)
93125 {
94126 NpyType& type = typeList.getReference (i);
95- if (i > 0 ) header += " , " ;
127+ if (i > 0 ) strHeader += " , " ;
96128 if (multiValue)
97- header += " ('" + type.getName () + " ', '" + type.getTypeString () + " ', (" + String (type.getTypeLength ()) + " ,))" ;
129+ strHeader += " ('" + type.getName () + " ', '" + type.getTypeString ()
130+ + " ', (" + String (type.getTypeLength ()) + " ,))" ;
98131 else
99- header += " '" + type.getTypeString () + " '" ;
132+ strHeader += " '" + type.getTypeString () + " '" ;
100133 }
101134 if (multiValue)
102- header += " ]" ;
103- header += " , 'fortran_order': False, 'shape': " ;
104-
105- m_countPos = header.length () + 10 ;
106- header += " (1,), }" ;
107- int padding = (int ((header.length () + 30 ) / 16 ) + 1 ) * 16 ;
108- header = header.paddedRight (' ' , padding);
109- header += ' \n ' ;
135+ strHeader += " ]" ;
136+ strHeader += " , 'fortran_order': False, 'shape': " ;
137+
138+ // save byte offset of shape field in .npy file
139+ // magic + header length field + current string header length:
140+ m_shapePos = magicLen + sizeof (uint16) + strHeader.length ();
141+ strHeader += getShapeString (); // inits to 0 records, i.e. 1st dim has length 0
142+ int baseHeaderLen = magicLen + sizeof (uint16) + strHeader.length () + 1 ; // +1 for newline
143+ int padlen = nbytesAlign - (baseHeaderLen % nbytesAlign);
144+ strHeader = strHeader.paddedRight (' ' , strHeader.length () + padlen);
145+ strHeader += ' \n ' ;
146+ uint16 strHeaderLen = strHeader.length ();
110147
111- uint8 magicNum = 0x093 ;
112148 m_file->write (&magicNum, sizeof (uint8));
113- String magic = " NUMPY" ;
114- uint16 len = header.length ();
115- m_file->write (magic.toUTF8 (), magic.getNumBytesAsUTF8 ());
116- uint16 ver = 0x0001 ;
149+ m_file->write (magicStr.toUTF8 (), magicStr.getNumBytesAsUTF8 ());
117150 m_file->write (&ver, sizeof (uint16));
118- m_file->write (&len, sizeof (uint16));
119- m_file->write (header.toUTF8 (), len);
151+ m_file->write (&strHeaderLen, sizeof (uint16));
152+ m_file->write (strHeader.toUTF8 (), strHeaderLen);
153+ m_headerLen = m_file->getPosition (); // total header length
154+ m_file->flush ();
120155}
121156
122- NpyFile::~NpyFile ()
157+ void NpyFile::updateHeader ()
123158{
124- if (m_file->setPosition (m_countPos))
159+ // overwrite the shape part of the header - even without explicitly calling
160+ // m_file->flush(), overwriting seems to trigger a flush to disk,
161+ // while appending to end of file does not
162+ int currentPos = m_file->getPosition ();
163+ if (m_file->setPosition (m_shapePos))
125164 {
126- String newShape = " (" ;
127- newShape.preallocateBytes (20 );
128- newShape += String (m_recordCount) + " ," ;
129- if (m_dim1 > 1 )
165+ String newShape = getShapeString ();
166+ if (m_shapePos + newShape.getNumBytesAsUTF8 () + 1 > m_headerLen) // +1 for newline
130167 {
131- newShape += String (m_dim1) + " , " ;
168+ std::cerr << " Error. Header has grown too big to update in-place " << std::endl ;
132169 }
133- if (m_dim2 > 1 )
134- newShape += String (m_dim2);
135- newShape += " ), }" ;
136170 m_file->write (newShape.toUTF8 (), newShape.getNumBytesAsUTF8 ());
171+ // m_file->flush(); // doesn't seem to be necessary, already flushed due to overwrite
172+ m_file->setPosition (currentPos); // restore position to end of file
137173 }
138174 else
139175 {
140- std::cerr << " Error. Unable to seek to update header on file " << m_file->getFile ().getFullPathName () << std::endl;
176+ std::cerr << " Error. Unable to seek to update file header"
177+ << m_file->getFile ().getFullPathName () << std::endl;
141178 }
142179}
143180
181+ NpyFile::~NpyFile ()
182+ {
183+ updateHeader ();
184+ }
185+
144186void NpyFile::writeData (const void * data, size_t size)
145187{
146188 m_file->write (data, size);
@@ -149,9 +191,10 @@ void NpyFile::writeData(const void* data, size_t size)
149191void NpyFile::increaseRecordCount (int count)
150192{
151193 m_recordCount += count;
194+ if (m_recordCount % recordBufferSize == 0 )
195+ updateHeader (); // also triggers a flush to disk
152196}
153197
154-
155198NpyType::NpyType (String n, BaseType t, size_t l)
156199 : name(n), type(t), length(l)
157200{
@@ -173,11 +216,11 @@ String NpyType::getTypeString() const
173216 switch (type)
174217 {
175218 case BaseType::CHAR:
176- return " S" + String (length + 1 ); // account for the null separator
219+ return " | S" + String (length + 1 ); // null-terminated bytes, account for null separator
177220 case BaseType::INT8:
178- return " < i1" ;
221+ return " | i1" ;
179222 case BaseType::UINT8:
180- return " < u1" ;
223+ return " | u1" ;
181224 case BaseType::INT16:
182225 return " <i2" ;
183226 case BaseType::UINT16:
@@ -195,7 +238,7 @@ String NpyType::getTypeString() const
195238 case BaseType::DOUBLE:
196239 return " <f8" ;
197240 default :
198- return " <b1 " ;
241+ return " |i1 " ; // signed byte
199242 }
200243}
201244
@@ -215,4 +258,4 @@ String NpyType::getName() const
215258BaseType NpyType::getType () const
216259{
217260 return type;
218- }
261+ }
0 commit comments