Hi, I have a custom python generator that’s pulling data out of a hail blockmatrix to use for deep learning, as previously discussed here.
This has been working pretty well by itself, but when I try to parallelise the data generator (by running multiple workers), I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-4cdb9d3fb71c> in <module>
----> 1 for X,Y in train_loader:
2 print(X)
~/anaconda3/envs/hailpyt/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
817 else:
818 del self._task_info[idx]
--> 819 return self._process_data(data)
820
821 next = __next__ # Python 2 compatibility
~/anaconda3/envs/hailpyt/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
844 self._try_put_index()
845 if isinstance(data, ExceptionWrapper):
--> 846 data.reraise()
847 return data
848
~/anaconda3/envs/hailpyt/lib/python3.6/site-packages/torch/_utils.py in reraise(self)
383 # (https://bugs.python.org/issue2651), so we work around it.
384 msg = KeyErrorMessage(msg)
--> 385 raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ch283/anaconda3/envs/hailpyt/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/home/ch283/anaconda3/envs/hailpyt/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 34, in fetch
data = next(self.dataset_iter)
File "/data/ch283/MyIterableDataset.py", line 62, in __iter__
yield self.__get_data(batch)
File "/data/ch283/MyIterableDataset.py", line 36, in __get_data
X = self.bm.filter_cols(batch).to_numpy()
File "/data/ch283/modelfit_norm.py", line 36, in to_numpy
bm.tofile(uri)
File "<decorator-gen-1467>", line 2, in tofile
File "/home/ch283/anaconda3/envs/hailpyt/lib/python3.6/site-packages/hail/typecheck/check.py", line 614, in wrapper
return __original_func(*args_, **kwargs_)
File "/home/ch283/anaconda3/envs/hailpyt/lib/python3.6/site-packages/hail/linalg/blockmatrix.py", line 1170, in tofile
Env.backend().execute(BlockMatrixWrite(self._bmir, writer))
File "/home/ch283/anaconda3/envs/hailpyt/lib/python3.6/site-packages/hail/backend/spark_backend.py", line 296, in execute
result = json.loads(self._jhc.backend().executeJSON(jir))
File "/home/ch283/anaconda3/envs/hailpyt/lib/python3.6/json/__init__.py", line 348, in loads
'not {!r}'.format(s.__class__.__name__))
TypeError: the JSON object must be str, bytes or bytearray, not 'JavaMap'
Wondering if anyone can shed some light on this. Is it due to multiple workers trying to interact with the blockmatrix at once?