#!/usr/bin/env python3
import numpy as np
import struct
import pycuda.driver as cuda
[docs]class GPUStruct(object):
def __init__(self, objs):
"""
Initialize the link to the struct on the GPU device.
objs - must be a list of variable in the order they are in the
C struct. Pointers are indicated with a * as in C.
kwargs - sets the values of this struct.
For example, if the struct is like this:
struct Results
{
unsigned int n; //, __padding;
float k;
float *A;
float *B;
};
your initialization could look like this:
res = GPUStruct([(np.uint32,'n', 10),
(np.float32,'k', 0),
(np.float32,'*A', np.zeros(10,dtype=np.float32)),
(np.float32,'*B', np.ones(10,dtype=np.float32))])
You can then use it like this:
func(res.get_ptr(),block=(1,1,1))
And get data like this:
res.copy_from_gpu()
res.A
res.B
res.n
"""
# set the objs
# self.__formats,self.__objs = zip(*[(obj[0],obj[1]) for obj in objs])
# make them tuples to prevent modification
self.__objs = []
self.__objnames = []
inits = {}
for obj in objs:
oname = obj[1].replace("*", "")
self.__objs.append((obj[0], obj[1]))
self.__objnames.append(oname)
inits[oname] = obj[2]
# make them both tuples
self.__objs = tuple(self.__objs)
self.__objnames = tuple(self.__objnames)
# self.__objs = tuple(objs)
# self.__objnames = tuple([obj.replace('*','') for fmt,obj in self.__objs])
# set a dict for holding nbytes
self.__nbytes = {}
self.__ptrs = {}
# loop over objs, setting attributes from kwargs
for fmt, obj in self.__objs:
if obj.find("*") == 0:
# set the obj name without the *
obj = obj[1:]
# it's a pointer
self.__ptrs[obj] = None
# also save the data
# setattr(self,obj,kwargs[obj])
setattr(self, obj, inits[obj])
self.__ptr = None
self.__fromstr = None
def __del__(self):
# loop and delete non-none pointers
for ptr in self.__ptrs:
if not self.__ptrs[ptr] is None:
# free it
self.__ptrs[ptr].free()
self.__ptrs[ptr] = None
if not self.__ptr is None:
# free the main pointer struct
self.__ptr.free()
self.__ptr = None
def __str__(self):
ostring = ""
for oname in self.__objnames:
ostring += "%s: %s\n" % (oname, str(getattr(self, oname)))
return ostring
[docs] def copy_to_gpu(self, skip=None):
# get skip list
if skip is None:
skip = []
# loop over obj and send the data for the pointers
for fmt, obj in self.__objs:
if obj.find("*") == 0:
# set the obj name without the *
obj = obj[1:]
# verify the nbytes did not change, if so, free old
# ptr and allocate for new one.
# get the current bytes
dat = np.ascontiguousarray(fmt(getattr(self, obj)))
cur_nbytes = dat.nbytes
if obj in self.__nbytes and self.__nbytes[obj] != cur_nbytes:
# free it
self.__ptrs[obj].free()
self.__ptrs[obj] = None
# see if we need to reallocate
if self.__ptrs[obj] is None:
# create mem for the pointer
self.__nbytes[obj] = cur_nbytes
self.__ptrs[obj] = cuda.mem_alloc(cur_nbytes)
# send the data to the memory space
if not obj in skip:
cuda.memcpy_htod(self.__ptrs[obj], dat)
# pack everything and send struct to device
self.__packstr = self._pack()
if self.__ptr is None:
# send it for the first time
self.__ptr = cuda.to_device(self.__packstr)
else:
# copy out to the existing pointer
cuda.memcpy_htod(self.__ptr, self.__packstr)
# create a fromstring to get data back
self.__fromstr = np.array(b" " * len(self.__packstr))
[docs] def get_ptr(self):
if self.__ptr is None:
raise RuntimeError("You never called copy_to_gpu.")
return self.__ptr
[docs] def get_packed(self):
return self.__packstr
def _pack(self):
packed = ""
self.__fmt = ""
topack = []
for fmt, obj in self.__objs:
if obj.find("*") == 0:
# set the obj name without the *
obj = obj[1:]
# is pointer
self.__fmt += "P"
topack.append(np.intp(int(self.__ptrs[obj])))
else:
# is normal, so just get it
toadd = fmt(getattr(self, obj))
self.__fmt += toadd.dtype.char
topack.append(toadd)
# pack it up
return struct.pack(self.__fmt, *topack)
[docs] def copy_from_gpu(self, skip=None):
# try:
# # try and get the passed struct back
# cuda.memcpy_dtoh(self.__fromstr, self.__ptr)
# self.__unpacked = struct.unpack(self.__fmt, self.__fromstr)
# except:
# # just use the original packstr
# self.__unpacked = struct.unpack(self.__fmt, self.__packstr)
# get skip list
if skip is None:
skip = []
# makre sure we've sent there
if self.__fromstr is None:
raise RuntimeError("You never called copy_to_gpu.")
# try and get the passed struct back
cuda.memcpy_dtoh(self.__fromstr, self.__ptr)
self.__unpacked = struct.unpack(self.__fmt, self.__fromstr)
# now fill the attributes from the unpacked data
for ind, (fmt, obj) in enumerate(self.__objs):
if obj.find("*") == 0:
# set the obj name without the *
obj = obj[1:]
# is a pointer, so retrieve from card
if not obj in skip:
# first make sure dest is correct datatype
setattr(self, obj, fmt(getattr(self, obj)))
cuda.memcpy_dtoh(getattr(self, obj), self.__ptrs[obj])
else:
# get it from the unpacked values
# trying to keep the dtype with a hack
# setattr(self, obj,
# getattr(np,str(getattr(self,obj).dtype))(self.__unpacked[ind]))
setattr(self, obj, fmt(self.__unpacked[ind]))
# def __getattr__(self, attr):
# if attr in self.__objnames:
# if self.__unpacked is None:
# # must retrieve first
# self.retrieve()
# # get the index
# ind = self.__objnames.index(attr)
# if '*'+attr == self.__objs[ind]:
# # is pointer, so retrieve from card
# data = getattr(self, self.__objnames[ind]+'_data')
# cuda.memcpy_dtoh(data,getattr(self,self.__objnames[ind]))
# return data
# #return cuda.from_device(getattr(self,self.__objnames[ind]),
# # data.shape,
# # data.dtype)
# else:
# # just lookup in unpacked
# return self.__unpacked[ind]
# else:
# raise AttributeError("Attribute not found %s." % (attr))