Source code for toor.Optimizer.selectable_events

import numpy as np
import os
from array import array
import time
import math
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import matplotlib.pyplot as plt


[docs] class ROIEvents: def __init__(self, EM_obj): self.cuda_drv = EM_obj.cuda_drv self.EM_obj = EM_obj self.a = EM_obj.a self.a_normal = EM_obj.a_normal self.a_cf = EM_obj.a_cf self.b = EM_obj.b self.b_normal = EM_obj.b_normal self.b_cf = EM_obj.b_cf self.c = EM_obj.c self.c_normal = EM_obj.c_normal self.c_cf = EM_obj.c_cf self.d = EM_obj.d self.d_normal = EM_obj.d_normal self.d_cf = EM_obj.d_cf self.A = EM_obj.im_index_x self.B = EM_obj.im_index_y self.C = EM_obj.im_index_z self.im = EM_obj.im self.half_crystal_pitch_xy = EM_obj.half_crystal_pitch_xy # self.half_crystal_pitch_xy = np.float32(2.0) self.half_crystal_pitch_z = EM_obj.half_crystal_pitch_z # self.half_crystal_pitch_z = np.float32(2.0) self.sum_vor = EM_obj.sum_pixel self.active_x = EM_obj.active_pixel_x self.active_y = EM_obj.active_pixel_y self.active_z = EM_obj.active_pixel_z self.distance_between_array_pixel = EM_obj.distance_between_array_pixel self.distance_to_center_plane = EM_obj.distance_to_center_plane self.distance_to_center_plane_normal = EM_obj.distance_to_center_plane_normal self.A_gpu = None self.B_gpu = None self.C_gpu = None self.im_gpu = None self.active_x_gpu = None self.active_y_gpu = None self.active_z_gpu = None self.mod_pixel2pos = None self.valid_vor = None self.weight = np.int32(self.A.shape[0]) self.height = np.int32(self.A.shape[1]) self.depth = np.int32(self.A.shape[2]) self.pixel2pos_source_model_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "PET", "mod_pixel2pos.c") self.pixel2pos_source_model = open(self.pixel2pos_source_model_file) self._load_machine_C_code() def _load_machine_C_code(self): self.mod_pixel2pos = SourceModule("""{}""".format((self.pixel2pos_source_model.read())))
[docs] def pixel2Position(self ): print('Optimizer-Pixel2Position') cuda = self.cuda_drv # device = cuda.Device(0) # enter your gpu id here # ctx = device.make_context() self.A = np.ascontiguousarray(self.A.reshape( self.A.shape[0] * self.A.shape[1] * self.A.shape[2]), dtype=np.int32) self.B = np.ascontiguousarray(self.B.reshape( self.B.shape[0] * self.B.shape[1] * self.B.shape[2]), dtype=np.int32) self.C = np.ascontiguousarray(self.C.reshape( self.C.shape[0] * self.C.shape[1] * self.C.shape[2]), dtype=np.int32) self.im = np.ascontiguousarray(self.im.reshape( self.im.shape[0] * self.im.shape[1] * self.im.shape[2]), dtype=np.float32) number_of_events = np.int32(len(self.a)) number_of_datasets = np.int32(1) # Number of datasets (and concurrent operations) used. # Event as reference point ref = cuda.Event() ref.record() # Create the streams and events needed to calculation stream, event = [], [] marker_names = ['kernel_begin', 'kernel_end'] # Create List to allocate chunks of data for dataset in range(number_of_datasets): stream.append(cuda.Stream()) event.append(dict([(marker_names[l], cuda.Event()) for l in range(len(marker_names))])) one_alloc_original_array = [self.A, self.B, self.C, self.im, self.active_x, self.active_y, self.active_z] one_alloc_gpu_array = [None]*len(one_alloc_original_array) for st in range(len(one_alloc_original_array)): one_alloc_gpu_array[st] = cuda.mem_alloc(one_alloc_original_array[st].size * one_alloc_original_array[st].dtype.itemsize) cuda.memcpy_htod_async(one_alloc_gpu_array[st], one_alloc_original_array[st]) # self.A_gpu = cuda.mem_alloc(self.A.size * self.A.dtype.itemsize) # self.B_gpu = cuda.mem_alloc(self.B.size * self.B.dtype.itemsize) # self.C_gpu = cuda.mem_alloc(self.C.size * self.C.dtype.itemsize) # self.im_gpu = cuda.mem_alloc(self.im.size * self.im.dtype.itemsize) # self.active_x_gpu = cuda.mem_alloc(self.active_x.size * self.active_x.dtype.itemsize) # self.active_y_gpu = cuda.mem_alloc(self.active_y.size * self.active_y.dtype.itemsize) # self.active_z_gpu = cuda.mem_alloc(self.active_z.size * self.active_z.dtype.itemsize) # # cuda.memcpy_htod_async(self.A_gpu, self.A) # cuda.memcpy_htod_async(self.B_gpu, self.B) # cuda.memcpy_htod_async(self.C_gpu, self.C) # cuda.memcpy_htod_async(self.im_gpu, self.im) # cuda.memcpy_htod_async(self.active_x_gpu, self.active_x) # cuda.memcpy_htod_async(self.active_y_gpu, self.active_y) # cuda.memcpy_htod_async(self.active_z_gpu, self.active_z) partial_allocations_original_array = [self.a, self.b, self.c, self.d, self.a_normal, self.b_normal, self.c_normal,self.d_normal, self.a_cf, self.b_cf, self.c_cf, self.d_cf, self.sum_vor, self.distance_to_center_plane, self.distance_to_center_plane_normal] # partial_allocations_cutted_array = [a_cut, b_cut,c_cut, d_cut, # a_normal_cut, b_normal_cut, c_normal_cut, d_normal_cut, # a_cf_cut, b_cf_cut, c_cf_cut, d_cf_cut, sum_vor_cut] # partial_allocations_cutted_array = [[None] * number_of_datasets for _ in range(len(partial_allocations_original_array))] # partial_allocations_cutted_array_gpu = [a_cut_gpu, b_cut_gpu, c_cut_gpu, d_cut_gpu, # a_cut_normal_gpu, b_cut_normal_gpu, c_cut_normal_gpu, d_cut_normal_gpu, # a_cf_cut_gpu, b_cf_cut_gpu, c_cf_cut_gpu, d_cf_cut_gpu, sum_vor_gpu] partial_allocations_cutted_array_gpu = [[None] * number_of_datasets for _ in range(len(partial_allocations_original_array))] for dataset in range(number_of_datasets): begin_dataset = np.int32(dataset * number_of_events / number_of_datasets) end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets) for st in range(len(partial_allocations_cutted_array)): partial_allocations_cutted_array[st][dataset] = partial_allocations_original_array[st][begin_dataset:end_dataset] partial_allocations_cutted_array_gpu[st][dataset] = cuda.mem_alloc(partial_allocations_cutted_array[st][dataset].size * partial_allocations_cutted_array[st][dataset].dtype.itemsize) cuda.memcpy_htod_async(partial_allocations_cutted_array_gpu[st][dataset], partial_allocations_cutted_array[st][dataset]) # Cutting dataset # For forward projection the data is cutted by number of events. For backprojection is cutted per image pieces of image # a_cut[dataset] = self.a[begin_dataset:end_dataset] # b_cut[dataset] = self.b[begin_dataset:end_dataset] # c_cut[dataset] = self.c[begin_dataset:end_dataset] # d_cut[dataset] = d[begin_dataset:end_dataset] # a_normal_cut[dataset] = a_normal[begin_dataset:end_dataset] # b_normal_cut[dataset] = b_normal[begin_dataset:end_dataset] # c_normal_cut[dataset] = c_normal[begin_dataset:end_dataset] # d_normal_cut[dataset] = d_normal[begin_dataset:end_dataset] # # a_cf_cut[dataset] = a_cf[begin_dataset:end_dataset] # b_cf_cut[dataset] = b_cf[begin_dataset:end_dataset] # c_cf_cut[dataset] = c_cf[begin_dataset:end_dataset] # d_cf_cut[dataset] = d_cf[begin_dataset:end_dataset] # sum_vor_cut[dataset] = sum_vor[begin_dataset:end_dataset] # Forward # a_cut_gpu[dataset] = cuda.mem_alloc(a_cut[dataset].size * a_cut[dataset].dtype.itemsize) # b_cut_gpu[dataset] = cuda.mem_alloc(b_cut[dataset].size * b_cut[dataset].dtype.itemsize) # c_cut_gpu[dataset] = cuda.mem_alloc(c_cut[dataset].size * c_cut[dataset].dtype.itemsize) # d_cut_gpu[dataset] = cuda.mem_alloc(d_cut[dataset].size * d_cut[dataset].dtype.itemsize) # a_cut_normal_gpu[dataset] = cuda.mem_alloc( # a_normal_cut[dataset].size * a_normal_cut[dataset].dtype.itemsize) # b_cut_normal_gpu[dataset] = cuda.mem_alloc( # b_normal_cut[dataset].size * b_normal_cut[dataset].dtype.itemsize) # c_cut_normal_gpu[dataset] = cuda.mem_alloc( # c_normal_cut[dataset].size * c_normal_cut[dataset].dtype.itemsize) # d_cut_normal_gpu[dataset] = cuda.mem_alloc( # d_normal_cut[dataset].size * d_normal_cut[dataset].dtype.itemsize) # # a_cf_cut_gpu[dataset] = cuda.mem_alloc(a_cf_cut[dataset].size * a_cf_cut[dataset].dtype.itemsize) # b_cf_cut_gpu[dataset] = cuda.mem_alloc(b_cf_cut[dataset].size * b_cf_cut[dataset].dtype.itemsize) # c_cf_cut_gpu[dataset] = cuda.mem_alloc(c_cf_cut[dataset].size * c_cf_cut[dataset].dtype.itemsize) # d_cf_cut_gpu[dataset] = cuda.mem_alloc(d_cf_cut[dataset].size * d_cf_cut[dataset].dtype.itemsize) # # sum_vor_gpu[dataset] = cuda.mem_alloc(sum_vor_cut[dataset].size * sum_vor_cut[dataset].dtype.itemsize) # # cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_cut[dataset]) # # cuda.memcpy_htod_async(a_cut_gpu[dataset], a_cut[dataset]) # cuda.memcpy_htod_async(b_cut_gpu[dataset], b_cut[dataset]) # cuda.memcpy_htod_async(c_cut_gpu[dataset], c_cut[dataset]) # cuda.memcpy_htod_async(d_cut_gpu[dataset], d_cut[dataset]) # cuda.memcpy_htod_async(a_cut_normal_gpu[dataset], a_normal_cut[dataset]) # cuda.memcpy_htod_async(b_cut_normal_gpu[dataset], b_normal_cut[dataset]) # cuda.memcpy_htod_async(c_cut_normal_gpu[dataset], c_normal_cut[dataset]) # cuda.memcpy_htod_async(d_cut_normal_gpu[dataset], d_normal_cut[dataset]) # # cuda.memcpy_htod_async(a_cf_cut_gpu[dataset], a_cf_cut[dataset]) # cuda.memcpy_htod_async(b_cf_cut_gpu[dataset], b_cf_cut[dataset]) # cuda.memcpy_htod_async(c_cf_cut_gpu[dataset], c_cf_cut[dataset]) # cuda.memcpy_htod_async(d_cf_cut_gpu[dataset], d_cf_cut[dataset]) # Backprojection tic = time.time() number_of_active_pixels = np.int32(len(self.active_x)) for dataset in range(number_of_datasets): begin_dataset = np.int32(dataset * number_of_events / number_of_datasets) end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets) threadsperblock = (256, 1, 1) blockspergrid_x = int(math.ceil(((end_dataset - begin_dataset)) / threadsperblock[0])) blockspergrid_y = int(math.ceil(1 / threadsperblock[1])) blockspergrid_z = int(math.ceil(1 / threadsperblock[2])) blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z) event[dataset]['kernel_begin'].record(stream[dataset]) func_forward = self.mod_pixel2pos.get_function("pixeltoangle") func_forward(self.weight, self.height, self.depth, partial_allocations_cutted_array_gpu[13][dataset], partial_allocations_cutted_array_gpu[14][dataset], self.distance_between_array_pixel, number_of_events, begin_dataset, end_dataset, partial_allocations_cutted_array_gpu[0][dataset], partial_allocations_cutted_array_gpu[4][dataset], partial_allocations_cutted_array_gpu[8][dataset], partial_allocations_cutted_array_gpu[1][dataset], partial_allocations_cutted_array_gpu[5][dataset], partial_allocations_cutted_array_gpu[9][dataset], partial_allocations_cutted_array_gpu[2][dataset], partial_allocations_cutted_array_gpu[6][dataset], partial_allocations_cutted_array_gpu[10][dataset], partial_allocations_cutted_array_gpu[3][dataset], partial_allocations_cutted_array_gpu[7][dataset], partial_allocations_cutted_array_gpu[11][dataset], one_alloc_gpu_array[0], one_alloc_gpu_array[1], one_alloc_gpu_array[2], partial_allocations_cutted_array_gpu[12][dataset], one_alloc_gpu_array[3], one_alloc_gpu_array[4], one_alloc_gpu_array[5], one_alloc_gpu_array[6], number_of_active_pixels, block=threadsperblock, grid=blockspergrid, stream=stream[dataset]) # Sincronization of streams for dataset in range(number_of_datasets): # Commenting out this line should break concurrency. event[dataset]['kernel_end'].record(stream[dataset]) # Transfering data from Optimizer for dataset in range(number_of_datasets): begin_dataset = np.int32(dataset * number_of_events / number_of_datasets) end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets) cuda.memcpy_dtoh_async(self.sum_vor, partial_allocations_cutted_array_gpu[12][dataset]) toc = time.time() print('Time part Forward Projection {} : {}'.format(1, toc - tic)) cuda.memcpy_dtoh_async(self.im, one_alloc_gpu_array[3]) print('IM: {}'.format(np.sum(self.im))) # free, total = cuda.mem_get_info() print('Sum_active_pixels: {}'.format(np.sum(self.sum_vor))) valid_vor = np.copy(self.sum_vor) valid_vor[valid_vor > 0] = 1 self.save_gpu_results() self.valid_vor = valid_vor # self.generate_roi_listmode_data() return valid_vor
[docs] def generate_roi_listmode_data(self): roi_listmode = self.EM_obj.easypetdata.fileBodyData[self.valid_vor == 1] # crystal_geometry = self.crystal_geometry # self.path_data_validation = os.path.join(os.path.dirname(self.study_file), "Data_Validation") # if not os.path.isdir(self.path_data_validation): # os.makedirs(self.path_data_validation) f_energy_corrected, ((ax_EA, ax_EB)) = plt.subplots(1, 2) f_energy_corrected.suptitle('Energy Cut (keV)', fontsize=16) # f_individuals f_ids, ((ax_idA, ax_idB)) = plt.subplots(1, 2) f_ids.suptitle('Crystal ID', fontsize=16) f_motor_c, ((ax_top_c, ax_bot_c)) = plt.subplots(1, 2) f_motor_c.suptitle('Motors detection angles (.easypet)', fontsize=16) f_time, ax_time = plt.subplots(1, 1) u_EA, indices_EA = np.unique(roi_listmode[:, 0], return_index=True) u_EB, indices_EB = np.unique(roi_listmode[:, 1], return_index=True) u_idA, indices_idA = np.unique(roi_listmode[:, 2], return_index=True) u_idB, indices_idB = np.unique(roi_listmode[:, 3], return_index=True) u_bot_c, indices_bot = np.unique(roi_listmode[:, 4], return_index=True) u_c, indices = np.unique(roi_listmode[:, 5], return_index=True) u_time, indices_time = np.unique(roi_listmode[:, 6], return_index=True) ax_EA.hist(roi_listmode[:, 0], u_EA, [0, 1200]) ax_EA.set_xlabel("KeV") ax_EA.set_ylabel("Counts") ax_EB.hist(roi_listmode[:, 1], u_EB, [0, 1200]) ax_idA.hist(roi_listmode[:, 2], len(u_idA) + 1, [np.min(roi_listmode[:, 2]), np.max(roi_listmode[:, 2]) + 1]) ax_idB.hist(roi_listmode[:, 3], len(u_idB) + 1, [np.min(roi_listmode[:, 3]), np.max(roi_listmode[:, 3]) + 1]) ax_bot_c.hist(roi_listmode[:, 4], u_bot_c) ax_top_c.hist(roi_listmode[:, 5], u_c) ax_time.hist(roi_listmode[:, 6], u_time) plt.show()
[docs] def save_gpu_results(self): im = self.im.reshape(self.weight, self.height, self.depth) volume = im.astype(np.float32) length = volume.shape[0] * volume.shape[2] * volume.shape[1] data = np.reshape(volume, [1, length], order='F') shapeIm = volume.shape file_name_im = os.path.join(self.EM_obj.directory, "gpu_surface") file_name_selected_events = os.path.join(self.EM_obj.directory, "sum_vor") np.save(file_name_selected_events, self.sum_vor) output_file = open(file_name_im, 'wb') arr = array('f', data[0]) # arr = array('d', data[0]) arr.tofile(output_file) output_file.close()