import numpy as np
import os
from array import array
import time
import math
import pycuda.driver as cuda
# import pycuda.autoinit
from pycuda.compiler import SourceModule
# from ..Segmentation.findisosurface import FindISOSurface
from .selectable_events import ROIEvents
from .CPU import IterativeAlgorithmCPU
from .gpu_shared_memory import GPUSharedMemorySingleKernel
from .gpu_multiple_kernel import GPUSharedMemoryMultipleKernel
[docs]
class EM:
def __init__(self, easypetdata=None, planes_equation=None, algorithm="LM-MLEM", algorithm_options=None,
number_of_iterations=10, number_of_subsets=1, projector_type="Box Counts",
normalization_matrix=None, attenuation_correction=False, attenuation_map=None,
decay_correction=False, time_correction=None, doi_correction=None, doi_mapping=None,
normalization_calculation_flag=False, random_correction=False, scatter_correction=False,
scatter_angle_correction=False, cut_fov=True,
cuda_drv=None, GPU=True, directory=None,
shared_memory=True, saved_image_by_iteration=True, pixeltoangle=False,
entry_im=None, multiple_kernel=False, signals_interface=None,
current_info_step=""):
if planes_equation is None:
return
if directory is None:
return
self.easypetdata = easypetdata
self.algorithm = algorithm
self.algorithm_options = algorithm_options
self.cuda_drv = cuda_drv
self.normalization_calculation_flag = normalization_calculation_flag
self.signals_interface = signals_interface
self.current_info_step = current_info_step
self.directory = directory # study directory
self.number_of_events = planes_equation.number_of_events
# Algorithm
self.number_of_iterations = number_of_iterations
self.number_of_subsets = number_of_subsets
self.saved_image_by_iteration = saved_image_by_iteration
# Optimizer
self.shared_memory = shared_memory
# Geometry
# self.crystal_central_planes = planes_equation.crystal_planes
self.number_of_pixels_x = planes_equation.number_of_pixels_x
self.number_of_pixels_y = planes_equation.number_of_pixels_y
self.number_of_pixels_z = planes_equation.number_of_pixels_z
self.distance_to_center_plane = planes_equation.distance_to_central_plane
self.distance_to_center_plane_normal = planes_equation.distance_to_central_plane_normal
self.im_index_x = planes_equation.im_index_x
self.im_index_y = planes_equation.im_index_y
self.im_index_z = planes_equation.im_index_z
#
# self.p1_list = planes_equation.p1_list
# self.p2_list = planes_equation.p2_list
# self.p3_list = planes_equation.p3_list
# self.p4_list = planes_equation.p4_list
self.a = planes_equation.a
self.b = planes_equation.b
self.c = planes_equation.c
self.d = planes_equation.d
self.a_normal = planes_equation.a_normal
self.b_normal = planes_equation.b_normal
self.c_normal = planes_equation.c_normal
self.d_normal = planes_equation.d_normal
self.a_cf = planes_equation.a_cf
self.b_cf = planes_equation.b_cf
self.c_cf = planes_equation.c_cf
self.d_cf = planes_equation.d_cf
self.half_crystal_pitch_z = planes_equation.half_crystal_pitch_z
self.distance_between_array_pixel = planes_equation.distance_between_array_pixel
self.half_crystal_pitch_xy = planes_equation.half_crystal_pitch_xy
self.x_min_f = planes_equation.x_min_f
self.x_max_f = planes_equation.x_max_f
self.y_min_f = planes_equation.y_min_f
self.y_max_f = planes_equation.y_max_f
self.z_min_f = planes_equation.z_min_f
self.z_max_f = planes_equation.z_max_f
self.voxelSize =[planes_equation.pixelSizeXY, planes_equation.pixelSizeXY, planes_equation.pixelSizeXYZ]
# Corrections
self.isosurface = None
self.surface = None
self.active_pixel_x = None
self.active_pixel_y = None
self.active_pixel_z = None
self.attenuation_matrix = None
self.adjust_coef = None
self.sum_pixel = None
self.doi_mapping = doi_mapping
self.time_correction = time_correction
self.time_correction = np.ascontiguousarray(time_correction, dtype=np.float32)
self.projector_type = projector_type
self.doi_correction = doi_correction
self.decay_correction = decay_correction
self.random_correction = random_correction
self.scatter_correction = scatter_correction
self.scatter_angle_correction = scatter_angle_correction
self.roi_map = None
if normalization_matrix is None:
self.normalization_matrix = np.ascontiguousarray(
np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
dtype=np.float32))
else:
normalize_sens_size = ((np.array(normalization_matrix.shape) - np.array(self.im_index_x.shape)) / 2).astype(
np.int16)
if normalize_sens_size[0] >= 0:
self.normalization_matrix = normalization_matrix[
normalize_sens_size[0]:(
normalization_matrix.shape[0] - normalize_sens_size[0]),
normalize_sens_size[1]:(
normalization_matrix.shape[1] - normalize_sens_size[1]),
normalize_sens_size[2]:(
normalization_matrix.shape[2] - normalize_sens_size[2])]
else:
mask = np.zeros((self.im_index_x.shape))
normalize_sens_size_abs = np.abs(normalize_sens_size)
rest = normalize_sens_size_abs % 2
mask[normalize_sens_size_abs[0]:mask.shape[0] - normalize_sens_size_abs[0]- rest[0],
normalize_sens_size_abs[1]:mask.shape[1] - normalize_sens_size_abs[1]-rest[1],
normalize_sens_size_abs[2]:mask.shape[2] - normalize_sens_size_abs[2]-rest[2]] = normalization_matrix
self.normalization_matrix = mask
if self.normalization_matrix.shape != self.im_index_x.shape:
self.normalization_matrix = self.normalization_matrix[1:self.im_index_x.shape[0] + 1,
1: self.im_index_x.shape[1] + 1, 0:self.im_index_x.shape[2]]
# self.normalization_matrix = np.ascontiguousarray(self.normalization_matrix, dtype=np.float32)
self.normalization_matrix = np.ascontiguousarray(normalization_matrix, dtype=np.float32)
if cut_fov:
self.apply_circular_mask_fov()
self.load_initial_arrays()
if pixeltoangle:
self.load_roi_map()
if self.roi_map is not None:
self.get_active_roi_voxels()
self.remove_events_outside_ROI()
self.im = self._fullFoVPreviousProbability()
self.im = np.ascontiguousarray(self.im, dtype=np.float32)
# self.normalization_matrix *= im
# self.normalization_matrix /= np.sum(self.normalization_matrix)
# self.normalization_matrix[self.normalization_matrix < 0.00001] = 0
# self.im = np.ascontiguousarray(self.im, dtype=np.float32)
# self.im = np.ascontiguousarray(
# np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# dtype=np.float32))
# self.im = self.roi_map / np.max(self.roi_map)
# self.im = self.surface / np.max(self.surface)
# self.im = np.ascontiguousarray(self.im, dtype=np.float32)
self.fov_matrix_cut = self.surface
self.fov_matrix_cut = np.ascontiguousarray(self.fov_matrix_cut, dtype=np.byte)
# if self.normalization_calculation_flag:
# self.im = np.ascontiguousarray(
# np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# dtype=np.float32))
# # self.im = self.roi_map / np.max(self.roi_map)
# self.im = self.surface / np.max(self.surface)
if GPU:
tic = time.time()
if not multiple_kernel:
self.im = self._vor_design_gpu_shared_memory(self.a, self.a_normal, self.a_cf, self.im_index_x, self.b,
self.b_normal, self.b_cf, self.im_index_y, self.c,
self.c_normal, self.c_cf, self.im_index_z,
self.d, self.d_normal, self.d_cf, self.adjust_coef,
self.im, self.half_crystal_pitch_xy,
self.half_crystal_pitch_z, self.sum_pixel,
self.fov_matrix_cut,
self.normalization_matrix, self.time_correction)
# self.im = self._vor_design_gpu_shared_memory_multiple_reads(self.a, self.a_normal,self.a_cf, self.im_index_x, self.b, self.b_normal, self.b_cf, self.im_index_y, self.c, self.c_normal,self.c_cf,self.im_index_z,
# self.d, self.d_normal, self.d_cf, self.adjust_coef, self.im, self.half_crystal_pitch_xy,
# self.half_crystal_pitch_z, self.sum_pixel, self.sensivity_matrix,
# self.normalization_matrix, self.time_correction)
elif multiple_kernel:
gpu_alg = GPUSharedMemoryMultipleKernel(self, optimize_reads_and_calcs=False)
# gpu_alg.multikernel_optimized_memory_reads()
gpu_alg.multiplekernel()
self.im = gpu_alg.im
# self.cdrf = True
# self.im = self._vor_design_gpu_shared_memory_multiple_reads(self.a, self.a_normal, self.a_cf,
# self.im_index_x, self.b,
# self.b_normal, self.b_cf,
# self.im_index_y, self.c,
# self.c_normal, self.c_cf,
# self.im_index_z,
# self.d, self.d_normal, self.d_cf,
# self.adjust_coef,
# self.im, self.half_crystal_pitch_xy,
# self.half_crystal_pitch_z,
# self.sum_pixel,
# self.fov_matrix_cut,
# self.normalization_matrix,
# self.time_correction)
# # if use_half_precision:
# self.im = self._vor_design_gpu_shared_memory_multiple_reads_streamed(self.a, self.a_normal,
# self.a_cf, self.im_index_x,
# self.b,
# self.b_normal, self.b_cf,
# self.im_index_y, self.c,
# self.c_normal, self.c_cf,
# self.im_index_z,
# self.d, self.d_normal,
# self.d_cf, self.adjust_coef,
# self.im,
# self.half_crystal_pitch_xy,
# self.half_crystal_pitch_z,
# self.sum_pixel,
# self.fov_matrix_cut,
# self.normalization_matrix,
# self.time_correction)
# else:
toc = time.time()
print('Optimizer CALCULATION OSEM: {}'.format(toc - tic))
# np.save('im.npy', im)
# self.vor_design_cpu()
# self.im =self.im_cpu-self.im
else:
cpu_alg = IterativeAlgorithmCPU(self)
self.im = cpu_alg.im
if self.normalization_calculation_flag:
self._save_image_by_it(self.im, normalization=True)
[docs]
def load_initial_arrays(self):
self.attenuation_matrix = np.ascontiguousarray(
np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
dtype=np.float32))
self.adjust_coef = np.ascontiguousarray(
np.zeros((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z), dtype=np.float32))
self.sum_pixel = np.ascontiguousarray(
np.zeros(self.a.shape, dtype=np.float32))
if self.normalization_calculation_flag:
self.im = np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z)) * \
len(self.a) / (self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z)
self.im = np.ascontiguousarray(self.im, dtype=np.float32)
# self.im = np.ascontiguousarray(
# np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# dtype=np.float32))
# self.im = self.roi_map / np.max(self.roi_map)
else:
if self.roi_map is not None:
self.im = np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z)) * \
len(self.a) / (self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z)
self.im = np.ascontiguousarray(self.im, dtype=np.float32)
else:
self.im = np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z)) * \
len(self.a) / (self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z)
self.im = np.ascontiguousarray(self.im, dtype=np.float32)
[docs]
def get_active_roi_voxels(self):
self.isosurface = FindISOSurface(volume=self.roi_map, directory=self.directory)
# self.isosurface.threshold3DISOSurface()
self.isosurface.get_active_pixels()
self.isosurface.save_calculated_surface()
active_pixels = self.isosurface.active_pixels
self.surface = self.isosurface.surface_volume
self.active_pixel_x = active_pixels[0]
self.active_pixel_y = active_pixels[1]
self.active_pixel_z = active_pixels[2]
self.active_pixel_x = np.ascontiguousarray(self.active_pixel_x, dtype=np.int32)
self.active_pixel_y = np.ascontiguousarray(self.active_pixel_y, dtype=np.int32)
self.active_pixel_z = np.ascontiguousarray(self.active_pixel_z, dtype=np.int32)
self.sum_pixel = np.ascontiguousarray(
np.zeros(self.a.shape, dtype=np.float32))
# file_name = os.path.join(self.directory, "map_total")
# sizefile = self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z
# output_file = open(file_name, 'rb')
# a = array('f')
# a.fromfile(output_file, sizefile)
# output_file.close()
# self.roi_map_total = np.array(a)
# self.roi_map_total = self.roi_map.reshape(
# (self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# order='F')
# self.im = np.ascontiguousarray(
# np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# dtype=np.float32)) *self.surface
def _fullFoVPreviousProbability(self):
try:
file_name = os.path.join(self.directory, "whole_body", "ID_26 Jan 2022 - 14h 57m 00s_None_ IMAGE (154, 154, 373).T")
# file_name = os.path.join(self.directory, "map_heart")
sizefile = self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z
# sizefile = 101 * 101 * 153
sizeMatrix = [154,154,373]
sizefile = sizeMatrix[0]*sizeMatrix[1]*sizeMatrix[2]
output_file = open(file_name, 'rb')
a = array('f')
a.fromfile(output_file, sizefile)
output_file.close()
map_probability= np.array(a)
# self.roi_map = self.roi_map.reshape(
# (self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# order='F')
# self.roi_map = self.roi_map.reshape(
# (101, 101, 153),
# order='F')
map_probability = map_probability.reshape(
(sizeMatrix),
order='F')
import scipy.ndimage
map_probability = scipy.ndimage.zoom(map_probability, np.array([self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z])/map_probability.shape, order=0)
map_probability = map_probability / np.sum(map_probability)
map_probability = 1 - map_probability
except FileNotFoundError:
map_probability = None
return map_probability
[docs]
def load_roi_map(self):
try:
file_name = os.path.join(self.directory, "map_2")
# file_name = os.path.join(self.directory, "map_heart")
sizefile = self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z
# sizefile = 101 * 101 * 153
sizeMatrix = [154, 154, 373]
sizefile = sizeMatrix[0] * sizeMatrix[1] * sizeMatrix[2]
output_file = open(file_name, 'rb')
a = array('f')
a.fromfile(output_file, sizefile)
output_file.close()
self.roi_map = np.array(a)
# self.roi_map = self.roi_map.reshape(
# (self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
# order='F')
# self.roi_map = self.roi_map.reshape(
# (101, 101, 153),
# order='F')
self.roi_map = self.roi_map.reshape(
(sizeMatrix),
order='F')
import scipy.ndimage
pixel_size_roi=np.array([0.2,0.2,0.2])
pixel_size_image=np.array([0.8,0.8,0.8])
pixel_relation = 1/((pixel_size_image/pixel_size_roi)*(np.array([self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z])/self.roi_map.shape))
self.roi_map = scipy.ndimage.zoom(self.roi_map, pixel_relation*np.array([self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z])/self.roi_map.shape, order=0)
mask = np.zeros(self.im_index_x.shape)
sens_size = np.abs((np.array(self.roi_map.shape) - np.array(self.im_index_x.shape)) / 2).astype(
np.int16)
mask[sens_size[0]:(mask.shape[0] - sens_size[0]),
sens_size[1]:(mask.shape[1] - sens_size[1]),
sens_size[2]:(mask.shape[2] - sens_size[2])]+=self.roi_map
self.roi_map = mask
# sens_size = ((np.array(self.roi_map.shape) - np.array(self.im_index_x.shape)) / 2).astype(
# np.int16)
# if sens_size[0] >= 0:
# self.roi_map = self.roi_map[
# sens_size[0]:(
# self.roi_map.shape[0] - sens_size[0]),
# sens_size[1]:(
# self.roi_map.shape[1] - sens_size[1]),
# sens_size[2]:(
# self.roi_map.shape[2] - sens_size[2])]
# self.roi_map = self.roi_map / self.roi_map
except FileNotFoundError:
self.roi_map = None
[docs]
def remove_events_outside_ROI(self):
roievents = ROIEvents(self)
valid_vor = roievents.pixel2Position()
self.a = self.a[valid_vor == 1]
self.b = self.b[valid_vor == 1]
self.c = self.c[valid_vor == 1]
self.d = self.d[valid_vor == 1]
self.a_normal = self.a_normal[valid_vor == 1]
self.b_normal = self.b_normal[valid_vor == 1]
self.c_normal = self.c_normal[valid_vor == 1]
self.d_normal = self.d_normal[valid_vor == 1]
self.a_cf = self.a_cf[valid_vor == 1]
self.b_cf = self.b_cf[valid_vor == 1]
self.c_cf = self.c_cf[valid_vor == 1]
self.d_cf = self.d_cf[valid_vor == 1]
self.time_correction = self.time_correction[valid_vor == 1]
self.sum_pixel = self.sum_pixel[valid_vor == 1]
self.distance_to_center_plane_normal = self.distance_to_center_plane_normal[valid_vor == 1]
self.distance_to_center_plane = self.distance_to_center_plane[valid_vor == 1]
# self.distance_between_array_pixel = self.distance_between_array_pixel[valid_vor == 1]
listmode = self.easypetdata
np.save("_listmode.npy", listmode)
listmode = listmode[valid_vor == 1]
np.save("_listmode_cut.npy", listmode)
self.a = np.ascontiguousarray(self.a, dtype=np.float32)
self.b = np.ascontiguousarray(self.b, dtype=np.float32)
self.c = np.ascontiguousarray(self.c, dtype=np.float32)
self.d = np.ascontiguousarray(self.d, dtype=np.float32)
self.a_normal = np.ascontiguousarray(self.a_normal, dtype=np.float32)
self.b_normal = np.ascontiguousarray(self.b_normal, dtype=np.float32)
self.c_normal = np.ascontiguousarray(self.c_normal, dtype=np.float32)
self.d_normal = np.ascontiguousarray(self.d_normal, dtype=np.float32)
self.a_cf = np.ascontiguousarray(self.a_cf, dtype=np.float32)
self.b_cf = np.ascontiguousarray(self.b_cf, dtype=np.float32)
self.c_cf = np.ascontiguousarray(self.c_cf, dtype=np.float32)
self.d_cf = np.ascontiguousarray(self.d_cf, dtype=np.float32)
self.time_correction = np.ascontiguousarray(self.time_correction, dtype=np.float32)
self.sum_pixel = np.ascontiguousarray(self.sum_pixel, dtype=np.float32)
self.distance_to_center_plane_normal = np.ascontiguousarray(self.distance_to_center_plane_normal,
dtype=np.float32)
self.distance_to_center_plane = np.ascontiguousarray(self.distance_to_center_plane, dtype=np.float32)
[docs]
def apply_circular_mask_fov(self):
self.fov_matrix_cut = np.ascontiguousarray(
np.ones((self.number_of_pixels_x, self.number_of_pixels_y, self.number_of_pixels_z),
dtype=np.float32))
xx = (np.tile(np.arange(0, self.im_index_x.shape[0]), (self.im_index_x.shape[0], 1)) - (
self.im_index_x.shape[0] - 1) / 2) ** 2
yy = (np.tile(np.arange(0, self.im_index_x.shape[1]), (self.im_index_x.shape[1], 1)) - (
self.im_index_x.shape[1] - 1) / 2) ** 2
xx = xx.T
# correct xx and yy shape in case of missmatch
if xx.shape[0] != yy.shape[0]:
xx, yy = np.meshgrid(np.arange(0, self.im_index_x.shape[0])-((self.im_index_x.shape[0] - 1) / 2) ** 2,
np.arange(0, self.im_index_x.shape[1])-((self.im_index_x.shape[1] - 1) / 2) ** 2)
xx = xx.T
yy=yy.T
# circle_cut = xx + yy - (self.im_index_x.shape[1] * np.sin(np.radians(easypetdata.half_real_range))*0.5) ** 2
circle_cut = xx + yy - (self.im_index_x.shape[1] * 0.5) ** 2
# circle_cut = xx + yy - (self.im_index_x.shape[1] * np.sin(np.radians(120/2))*.50) ** 2
circle_cut[circle_cut > 0] = 0
circle_cut[circle_cut < 0] = 1
circle_cut = np.tile(circle_cut[:, :, None], (1, 1, self.im_index_x.shape[2]))
self.fov_matrix_cut = self.fov_matrix_cut * circle_cut
self.fov_matrix_cut = np.ascontiguousarray(self.fov_matrix_cut, dtype=np.byte)
# calculate non-zero pixels
# def _vor_design_gpu_shared_memory_multiple_reads(self, a, a_normal, a_cf, A, b, b_normal, b_cf, B, c, c_normal,
# c_cf, C,
# d, d_normal, d_cf, adjust_coef, im, half_crystal_pitch_xy,
# half_crystal_pitch_z,
# sum_vor, fov_cut_matrix, normalization_matrix, time_factor):
# print('Optimizer STARTED - Multiple reads')
# # cuda.init()
# cuda = self.cuda_drv
# # device = cuda.Device(0) # enter your gpu id here
# # ctx = device.make_context()
# number_of_events = np.int32(len(a))
# weight = np.int32(A.shape[0])
# height = np.int32(A.shape[1])
# depth = np.int32(A.shape[2])
# # start_x = np.int32(A[0, 0, 0])
# start_x = np.int32(A[0, 0, 0])
# start_y = np.int32(B[0, 0, 0])
# start_z = np.int32(C[0, 0, 0])
# print("Start_point: {},{},{}".format(start_x, start_y, start_z))
# print('Image size: {},{}, {}'.format(weight, height, depth))
#
# half_distance_between_array_pixel = np.float32(self.distance_between_array_pixel / 2)
# normalization_matrix = normalization_matrix.reshape(
# normalization_matrix.shape[0] * normalization_matrix.shape[1] * normalization_matrix.shape[2])
#
# # SOURCE MODELS (DEVICE CODE)
#
# mod_forward_projection_shared_mem = SourceModule("""
# #include <stdint.h>
#
# texture<char, 1> tex;
#
#
# __global__ void forward_projection
# (const int n, const int m, const int p, const int start_x, const int start_y, const int start_z,
# const float crystal_pitch_XY,const float crystal_pitch_Z,const float distance_between_array_pixel,
# int number_of_events, const int begin_event_gpu_limitation, const int end_event_gpu_limitation,
# float *a, float *a_normal, float *a_cf, float *b,float *b_normal, float *b_cf,float *c,float *c_normal,
# float *c_cf, float *d, float *d_normal, float *d_cf, float *sum_vor, const char *fov_cut_matrix, const float *im_old)
# {
# const int shared_memory_size = 256;
# __shared__ float a_shared[shared_memory_size];
# __shared__ float b_shared[shared_memory_size];
# __shared__ float c_shared[shared_memory_size];
# __shared__ float d_shared[shared_memory_size];
# __shared__ float a_normal_shared[shared_memory_size];
# __shared__ float b_normal_shared[shared_memory_size];
# __shared__ float c_normal_shared[shared_memory_size];
# __shared__ float d_normal_shared[shared_memory_size];
# __shared__ float a_cf_shared[shared_memory_size];
# __shared__ float b_cf_shared[shared_memory_size];
# __shared__ float c_cf_shared[shared_memory_size];
# __shared__ float d_cf_shared[shared_memory_size];
# __shared__ float sum_vor_shared[shared_memory_size];
# __shared__ char fov_cut_matrix_shared[shared_memory_size];
# /*
# const int blockId = blockIdx.x+ blockIdx.y * gridDim.x+ gridDim.x * gridDim.y * blockIdx.z;
# const int threadId= blockId * (blockDim.x * blockDim.y * blockDim.z)+ (threadIdx.z * (blockDim.x * blockDim.y))+ (threadIdx.y * blockDim.x)+ threadIdx.x;
# */
# int threadId=blockIdx.x *blockDim.x + threadIdx.x;
# int e;
#
# float d2;
# float d2_normal;
# float d2_cf;
# float value;
# float value_normal;
# float value_cf;
# float sum_vor_temp;
# int index;
# int x_t;
# int y_t;
# int z_t;
# float max_distance_projector;
# const int number_events_max = end_event_gpu_limitation-begin_event_gpu_limitation;
# const float error_pixel = 0.0000f;
# if (threadIdx.x>shared_memory_size)
# {
# return;
# }
# if(threadId>number_events_max)
# {
# return;
# }
#
# __syncthreads();
# e = threadId;
# int e_m = threadIdx.x;
# a_shared[e_m] = a[e];
# b_shared[e_m] = b[e];
# c_shared[e_m] = c[e];
# d_shared[e_m] = d[e];
# a_normal_shared[e_m] = a_normal[e];
# b_normal_shared[e_m] = b_normal[e];
# c_normal_shared[e_m] = c_normal[e];
# d_normal_shared[e_m] = d_normal[e];
# a_cf_shared[e_m] = a_cf[e];
# b_cf_shared[e_m] = b_cf[e];
# c_cf_shared[e_m] = c_cf[e];
# d_cf_shared[e_m] = d_cf[e];
# sum_vor_shared[e_m] = sum_vor[e];
#
# d2_normal = crystal_pitch_XY * sqrt(a_normal_shared[e_m]*a_normal_shared[e_m]+b_normal_shared[e_m]*b_normal_shared[e_m]+c_normal_shared[e_m]*c_normal_shared[e_m]);
# d2 = crystal_pitch_Z * sqrt(a_shared[e_m]*a_shared[e_m] + b_shared[e_m]*b_shared[e_m] + c_shared[e_m]*c_shared[e_m]);
# d2_cf = distance_between_array_pixel*sqrt(a_cf_shared[e_m]*a_cf_shared[e_m]+b_cf_shared[e_m]*b_cf_shared[e_m]+c_cf_shared[e_m]*c_cf_shared[e_m]);
# max_distance_projector=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf);
#
# for(int l=0; l<n; l++)
# {
# x_t = l+start_x;
# for(int j=0; j<m; j++)
# {
# y_t = j+start_y;
# for(int k=0; k<p; k++)
# {
# /*
# index = l+j*n+k*m*n;
# fov_cut_matrix_shared[k]=fov_cut_matrix[k+j*p+l*p*m];
# index = l+j*n+k*m*n;
# */
# index = k+j*p+l*p*m;
#
#
# if (fov_cut_matrix[index]!=0)
# {
#
# z_t = k+start_z;
# value_normal = a_normal_shared[e_m]*x_t+b_normal_shared[e_m]*y_t+c_normal_shared[e_m] * z_t -d_normal_shared[e_m];
#
# if (value_normal < d2_normal && value_normal >= -d2_normal)
# {
# value = a_shared[e_m]*x_t+b_shared[e_m]*y_t+c_shared[e_m]*z_t-d_shared[e_m];
#
# if (value < d2 && value >=-d2 )
# {
# value_cf = a_cf_shared[e_m]*x_t+b_cf_shared[e_m]*y_t+c_cf_shared[e_m]*z_t-d_cf_shared[e_m];
#
#
# if (value_cf >= -d2_cf && value_cf<d2_cf)
# {
#
# sum_vor_temp +=im_old[index]*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector);
# /* im_old[index]*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector)
#
# */
# }
#
#
# }
#
#
#
#
# }
# }
#
# }
# }
# }
#
# /*
# sum_vor[e]= sum_vor_temp;
# sum_vor[e]= sum_vor_shared[e_m];
# __syncthreads();
# sum_vor[e]= sum_vor_shared[e_m];
# */
# sum_vor[e]= sum_vor_temp;
#
#
#
# }""")
# mod_normalization_shared_mem = SourceModule("""
# #include <stdint.h>
# texture<uint8_t, 1> tex;
#
# __global__ void normalization
# ( int dataset_number, int n, int m, int p, const float crystal_pitch_XY, const float crystal_pitch_Z,
# const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
# const int end_event_gpu_limitation, const float *a, const float *a_normal, const float *a_cf, const float *b,
# const float *b_normal, const float *b_cf, const float *c, const float *c_normal, const float *c_cf, const float *d, const float *d_normal,
# const float *d_cf, const short *A, const short *B, const short *C, float *adjust_coef, float *sum_vor,
# char *fov_cut_matrix, float *time_factor)
# {
# extern __shared__ float adjust_coef_shared[];
# int idt=blockIdx.x *blockDim.x + threadIdx.x;
#
#
# float d2;
# float d2_normal;
# float d2_cf;
# float normal_value;
# float value;
# float value_cf;
# short a_temp;
# short b_temp;
# short c_temp;
# char fov_cut_temp;
# int i_s=threadIdx.x;
#
# if (idt>n*m*p)
# {
# return;
# }
#
# __syncthreads();
# adjust_coef_shared[i_s] = adjust_coef[idt];
# a_temp = A[idt];
# b_temp = B[idt];
# c_temp = C[idt];
# fov_cut_temp = fov_cut_matrix[idt];
#
#
#
# for(int e=begin_event_gpu_limitation; e<end_event_gpu_limitation; e++)
# {
# if (fov_cut_temp!=0)
# {
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
#
# if (normal_value< d2_normal && normal_value >= -d2_normal)
# {
# value = a[e]*a_temp+b[e]*b_temp +c[e]*c_temp- d[e];
# d2 = crystal_pitch_Z * sqrt(a[e]*a[e] + b[e]*b[e] + c[e]*c[e]);
#
#
# if (value < d2 && value >=-d2)
# {
# value_cf = a_cf[e]*a_temp+b_cf[e]*b_temp +c_cf[e] * c_temp-d_cf[e];
# d2_cf = distance_between_array_pixel*sqrt(a_cf[e]*a_cf[e]+b_cf[e]*b_cf[e]+c_cf[e]*c_cf[e]);
#
#
# if (value_cf >= -d2_cf && value_cf<d2_cf)
# {
#
#
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf));
#
#
# }
#
# /*
#
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf));
# adjust_coef_shared[i_s] += 1/sum_vor[e];
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
# adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
# */
#
#
#
#
#
# }
# }
#
# }
#
# }
#
# adjust_coef[idt] = adjust_coef_shared[i_s];
# __syncthreads();
#
#
#
# }
# """)
#
# mod_backward_projection_shared_mem = SourceModule("""
# #include <stdint.h>
# texture<uint8_t, 1> tex;
#
# __global__ void backprojection
# ( int dataset_number, int n, int m, int p, const float crystal_pitch_XY, const float crystal_pitch_Z,
# const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
# const int end_event_gpu_limitation, const float *a, const float *a_normal, const float *a_cf, const float *b,
# const float *b_normal, const float *b_cf, const float *c, const float *c_normal, const float *c_cf, const float *d, const float *d_normal,
# const float *d_cf, const short *A, const short *B, const short *C, float *adjust_coef, float *sum_vor,
# char *fov_cut_matrix, float *time_factor)
# {
# extern __shared__ float adjust_coef_shared[];
# int idt=blockIdx.x *blockDim.x + threadIdx.x;
#
#
# float d2;
# float d2_normal;
# float d2_cf;
# float normal_value;
# float value;
# float value_cf;
# short a_temp;
# short b_temp;
# short c_temp;
# char fov_cut_temp;
# int i_s=threadIdx.x;
#
# if (idt>n*m*p)
# {
# return;
# }
#
# __syncthreads();
# adjust_coef_shared[i_s] = adjust_coef[idt];
# a_temp = A[idt];
# b_temp = B[idt];
# c_temp = C[idt];
# fov_cut_temp = fov_cut_matrix[idt];
#
#
#
# for(int e=begin_event_gpu_limitation; e<end_event_gpu_limitation; e++)
# {
# if (fov_cut_temp!=0)
# {
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
#
# if (normal_value< d2_normal && normal_value >= -d2_normal)
# {
# value = a[e]*a_temp+b[e]*b_temp +c[e]*c_temp- d[e];
# d2 = crystal_pitch_Z * sqrt(a[e]*a[e] + b[e]*b[e] + c[e]*c[e]);
#
#
# if (value < d2 && value >=-d2)
# {
# value_cf = a_cf[e]*a_temp+b_cf[e]*b_temp +c_cf[e] * c_temp-d_cf[e];
# d2_cf = distance_between_array_pixel*sqrt(a_cf[e]*a_cf[e]+b_cf[e]*b_cf[e]+c_cf[e]*c_cf[e]);
#
#
# if (value_cf >= -d2_cf && value_cf<d2_cf)
# {
#
#
#
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]*time_factor[e]);
#
# }
#
# /*
#
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]*time_factor[e]);
# adjust_coef_shared[i_s] += 1/sum_vor[e];
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
# adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
# */
#
#
#
#
#
# }
# }
#
# }
#
# }
#
# adjust_coef[idt] = adjust_coef_shared[i_s];
# __syncthreads();
#
#
#
# }
# """)
#
# mod_forward_projection_shared_mem_cdrf = SourceModule("""
# #include <stdint.h>
#
# texture<char, 1> tex;
#
#
# __global__ void forward_projection_cdrf
# (const int n, const int m, const int p, const int start_x, const int start_y, const int start_z,
# float *crystal_pitch_XY, float *crystal_pitch_Z,const float distance_between_array_pixel,
# int number_of_events, const int begin_event_gpu_limitation, const int end_event_gpu_limitation,
# float *a, float *a_normal, float *a_cf, float *b,float *b_normal, float *b_cf,float *c,float *c_normal,
# float *c_cf, float *d, float *d_normal, float *d_cf, float *sum_vor, const char *fov_cut_matrix, const float *im_old)
# {
# const int shared_memory_size = 256;
# __shared__ float a_shared[shared_memory_size];
# __shared__ float b_shared[shared_memory_size];
# __shared__ float c_shared[shared_memory_size];
# __shared__ float d_shared[shared_memory_size];
# __shared__ float a_normal_shared[shared_memory_size];
# __shared__ float b_normal_shared[shared_memory_size];
# __shared__ float c_normal_shared[shared_memory_size];
# __shared__ float d_normal_shared[shared_memory_size];
# __shared__ float a_cf_shared[shared_memory_size];
# __shared__ float b_cf_shared[shared_memory_size];
# __shared__ float c_cf_shared[shared_memory_size];
# __shared__ float d_cf_shared[shared_memory_size];
# __shared__ float sum_vor_shared[shared_memory_size];
# __shared__ char fov_cut_matrix_shared[shared_memory_size];
# __shared__ float crystal_pitch_Z_shared[shared_memory_size];
# __shared__ float crystal_pitch_XY_shared[shared_memory_size];
# /*
# const int blockId = blockIdx.x+ blockIdx.y * gridDim.x+ gridDim.x * gridDim.y * blockIdx.z;
# const int threadId= blockId * (blockDim.x * blockDim.y * blockDim.z)+ (threadIdx.z * (blockDim.x * blockDim.y))+ (threadIdx.y * blockDim.x)+ threadIdx.x;
# */
# int threadId=blockIdx.x *blockDim.x + threadIdx.x;
# int e;
#
# float d2;
# float d2_normal;
# float d2_cf;
# float value;
# float value_normal;
# float value_cf;
# float sum_vor_temp;
# int index;
# int x_t;
# int y_t;
# int z_t;
# float width;
# float height;
# float distance;
# float distance_other;
# float solid_angle;
#
#
# float max_distance_projector;
#
# const int number_events_max = end_event_gpu_limitation-begin_event_gpu_limitation;
# const float error_pixel = 0.0000f;
# if (threadIdx.x>shared_memory_size)
# {
# return;
# }
# if(threadId>number_events_max)
# {
# return;
# }
#
# __syncthreads();
# e = threadId;
# int e_m = threadIdx.x;
# a_shared[e_m] = a[e];
# b_shared[e_m] = b[e];
# c_shared[e_m] = c[e];
# d_shared[e_m] = d[e];
# a_normal_shared[e_m] = a_normal[e];
# b_normal_shared[e_m] = b_normal[e];
# c_normal_shared[e_m] = c_normal[e];
# d_normal_shared[e_m] = d_normal[e];
# a_cf_shared[e_m] = a_cf[e];
# b_cf_shared[e_m] = b_cf[e];
# c_cf_shared[e_m] = c_cf[e];
# d_cf_shared[e_m] = d_cf[e];
# sum_vor_shared[e_m] = sum_vor[e];
# crystal_pitch_Z_shared[e_m] = crystal_pitch_Z[e];
# crystal_pitch_XY_shared[e_m] = crystal_pitch_XY[e];
#
# d2_normal = crystal_pitch_XY_shared[e_m] * sqrt(a_normal_shared[e_m]*a_normal_shared[e_m]+b_normal_shared[e_m]*b_normal_shared[e_m]+c_normal_shared[e_m]*c_normal_shared[e_m]);
# d2 = crystal_pitch_Z_shared[e_m]* sqrt(a_shared[e_m]*a_shared[e_m] + b_shared[e_m]*b_shared[e_m] + c_shared[e_m]*c_shared[e_m]);
# d2_cf = distance_between_array_pixel*sqrt(a_cf_shared[e_m]*a_cf_shared[e_m]+b_cf_shared[e_m]*b_cf_shared[e_m]+c_cf_shared[e_m]*c_cf_shared[e_m]);
# max_distance_projector=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf);
#
# for(int l=0; l<n; l++)
# {
# x_t = l+start_x;
# for(int j=0; j<m; j++)
# {
# y_t = j+start_y;
# for(int k=0; k<p; k++)
# {
# /*
# index = l+j*n+k*m*n;
# fov_cut_matrix_shared[k]=fov_cut_matrix[k+j*p+l*p*m];
# index = l+j*n+k*m*n;
# */
# index = k+j*p+l*p*m;
#
#
# if (fov_cut_matrix[index]!=0)
# {
#
# z_t = k+start_z;
# value_normal = a_normal_shared[e_m]*x_t+b_normal_shared[e_m]*y_t+c_normal_shared[e_m] * z_t -d_normal_shared[e_m];
#
# if (value_normal < d2_normal && value_normal >= -d2_normal)
# {
# value = a_shared[e_m]*x_t+b_shared[e_m]*y_t+c_shared[e_m]*z_t-d_shared[e_m];
#
# if (value < d2 && value >=-d2 )
# {
# value_cf = a_cf_shared[e_m]*x_t+b_cf_shared[e_m]*y_t+c_cf_shared[e_m]*z_t-d_cf_shared[e_m];
#
#
# if (value_cf >= -d2_cf && value_cf<d2_cf)
# {
# if (sqrt(value*value+value_normal*value_normal+value_cf*value_cf)<=max_distance_projector)
# {
# width = 2*(crystal_pitch_Z_shared[e_m] - abs(value));
# height = 2*(crystal_pitch_XY_shared[e_m] - abs(value_normal));
#
# distance =d2_cf+abs(value_cf);
# distance_other = abs(d2_cf+value_cf);
# solid_angle = 4*(width*width*height*height/(distance*distance*(4*distance*distance+width*width+height*height)));
# sum_vor_shared[e_m]+= im_old[index]*solid_angle;
#
# }
# /*
# sum_vor_shared[e_m]+= im_old[index]*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector);
# 4*arctan(
# 4*np.arctan(width*height/(2*distance_to_anh*np.sqrt(4*distance_between_array_pixel-value_cf)**2+width**2+height**2)))
# *(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector)
# */
# }
#
#
# }
#
#
#
#
# }
# }
#
# }
# }
# }
#
# /*
# sum_vor[e]= sum_vor_temp;
# sum_vor[e]= sum_vor_shared[e_m];
#
#
# sum_vor[e]= sum_vor_temp;
# */
# __syncthreads();
# sum_vor[e]= sum_vor_shared[e_m];
#
#
#
# }""")
#
# mod_normalization_shared_mem_cdrf = SourceModule("""
# #include <stdint.h>
# texture<uint8_t, 1> tex;
#
# __global__ void normalization_cdrf
# ( int dataset_number, int n, int m, int p, float *crystal_pitch_XY, float *crystal_pitch_Z,
# const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
# const int end_event_gpu_limitation, const float *a, const float *a_normal, const float *a_cf, const float *b,
# const float *b_normal, const float *b_cf, const float *c, const float *c_normal, const float *c_cf, const float *d, const float *d_normal,
# const float *d_cf, const short *A, const short *B, const short *C, float *adjust_coef, float *sum_vor,
# char *fov_cut_matrix, float *time_factor)
# {
# extern __shared__ float adjust_coef_shared[];
# int idt=blockIdx.x *blockDim.x + threadIdx.x;
#
#
# float d2;
# float d2_normal;
# float d2_cf;
# float normal_value;
# float value;
# float value_cf;
# short a_temp;
# short b_temp;
# short c_temp;
# char fov_cut_temp;
# int i_s=threadIdx.x;
# float width;
# float height;
# float distance;
# float distance_other;
# float solid_angle;
#
# if (idt>=n*m*p)
# {
# return;
# }
#
# __syncthreads();
# adjust_coef_shared[i_s] = adjust_coef[idt];
# a_temp = A[idt];
# b_temp = B[idt];
# c_temp = C[idt];
# fov_cut_temp = fov_cut_matrix[idt];
#
#
#
# for(int e=begin_event_gpu_limitation; e<end_event_gpu_limitation; e++)
# {
# if (fov_cut_temp!=0)
# {
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = crystal_pitch_XY[e]* sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
#
# if (normal_value< d2_normal && normal_value >= -d2_normal)
# {
# value = a[e]*a_temp+b[e]*b_temp +c[e]*c_temp- d[e];
# d2 = crystal_pitch_Z[e] * sqrt(a[e]*a[e] + b[e]*b[e] + c[e]*c[e]);
#
#
# if (value < d2 && value >=-d2)
# {
# value_cf = a_cf[e]*a_temp+b_cf[e]*b_temp +c_cf[e] * c_temp-d_cf[e];
# d2_cf = distance_between_array_pixel*sqrt(a_cf[e]*a_cf[e]+b_cf[e]*b_cf[e]+c_cf[e]*c_cf[e]);
#
#
# if (value_cf >= -d2_cf && value_cf<d2_cf)
# {
# if (sqrt(value*value+normal_value*normal_value+value_cf*value_cf)<=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))
# {
# width = 2*(crystal_pitch_Z[e]- abs(value));
# height = 2*(crystal_pitch_XY[e] - abs(normal_value));
# distance =d2_cf+abs(value_cf);
#
# solid_angle = 4*(width*width*height*height/(distance*distance*(4*distance*distance+width*width+height*height)));
# adjust_coef_shared[i_s] += solid_angle/(sum_vor[e]);
# /*
# distance_other =d2_cf-abs(value_cf);
# solid_angle = 4*(width*height/(2*distance*sqrt(4*distance*distance+width*width+height*height)));
# adjust_coef_shared[i_s] += time_factor[e]*(1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]);
# */
# }
#
# }
#
# /*
# (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
# adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
# */
#
#
#
#
#
# }
# }
#
# }
#
# }
#
# adjust_coef[idt] = adjust_coef_shared[i_s];
# __syncthreads();
#
#
#
# }
# """)
#
# mod_backward_projection_shared_mem_cdrf = SourceModule("""
# #include <stdint.h>
# texture<uint8_t, 1> tex;
#
# __global__ void backprojection_cdrf
# ( int dataset_number, int n, int m, int p, const float *crystal_pitch_XY, const float *crystal_pitch_Z,
# const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
# const int end_event_gpu_limitation, const float *a, const float *a_normal, const float *a_cf, const float *b,
# const float *b_normal, const float *b_cf, const float *c, const float *c_normal, const float *c_cf, const float *d, const float *d_normal,
# const float *d_cf, const short *A, const short *B, const short *C, float *adjust_coef, float *sum_vor,
# char *fov_cut_matrix, float *time_factor)
# {
# extern __shared__ float adjust_coef_shared[];
# int idt=blockIdx.x *blockDim.x + threadIdx.x;
#
#
# float d2;
# float d2_normal;
# float d2_cf;
# float normal_value;
# float value;
# float value_cf;
# short a_temp;
# short b_temp;
# short c_temp;
# char fov_cut_temp;
# int i_s=threadIdx.x;
# float width;
# float height;
# float distance;
# float distance_other;
# float solid_angle;
#
# if (idt>=n*m*p)
# {
# return;
# }
#
# __syncthreads();
# adjust_coef_shared[i_s] = adjust_coef[idt];
# a_temp = A[idt];
# b_temp = B[idt];
# c_temp = C[idt];
#
# fov_cut_temp = fov_cut_matrix[idt];
#
#
#
# for(int e=begin_event_gpu_limitation; e<end_event_gpu_limitation; e++)
# {
# if (fov_cut_temp!=0)
# {
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = crystal_pitch_XY[e]* sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
#
# if (normal_value< d2_normal && normal_value >= -d2_normal)
# {
# value = a[e]*a_temp+b[e]*b_temp +c[e]*c_temp- d[e];
# d2 = crystal_pitch_Z[e] * sqrt(a[e]*a[e] + b[e]*b[e] + c[e]*c[e]);
#
#
# if (value < d2 && value >=-d2)
# {
# value_cf = a_cf[e]*a_temp+b_cf[e]*b_temp +c_cf[e] * c_temp-d_cf[e];
# d2_cf = distance_between_array_pixel*sqrt(a_cf[e]*a_cf[e]+b_cf[e]*b_cf[e]+c_cf[e]*c_cf[e]);
#
#
# if (value_cf >= -d2_cf && value_cf<d2_cf)
# {
# if (sqrt(value*value+normal_value*normal_value+value_cf*value_cf)<=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))
# {
# width = 2*(crystal_pitch_Z[e]- abs(value));
# height = 2*(crystal_pitch_XY[e] - abs(normal_value));
# distance =d2_cf+abs(value_cf);
# distance_other =d2_cf-abs(value_cf);
# solid_angle = 4*(width*width*height*height/(distance*distance*(4*distance*distance+width*width+height*height)));
# if (sum_vor[e]!=0)
#
# {
# adjust_coef_shared[i_s] += solid_angle/(sum_vor[e]);
# }
# /*
# (4 * asin(sin(tan(width/distance))*sin(tan(height/distance))))*(4 * asin(sin(tan(width/distance))*sin(tan(height/distance))))/sum_vor[e];
# adjust_coef_shared[i_s] += time_factor[e]*(1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]);
#
# */
# }
#
# }
#
# /*
# (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/
# normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
# d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
# adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
# adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
# */
#
#
#
#
#
# }
# }
#
# }
#
# }
#
# adjust_coef[idt] = adjust_coef_shared[i_s];
# __syncthreads();
#
#
#
# }
# """)
#
# # float crystal_pitch, int number_of_events, float *a, float *b, float *c, float *d, int *A, int *B, int *C, float *im, float *vector_matrix
# # Host Code B, C, im, vector_matrix,
# number_of_datasets = np.int32(1) # Number of datasets (and concurrent operations) used.
# number_of_datasets_back = np.int32(1) # Number of datasets (and concurrent operations) used.
# # Start concurrency Test
# # Event as reference point
# ref = cuda.Event()
# ref.record()
#
# # Create the streams and events needed to calculation
# stream, event = [], []
# marker_names = ['kernel_begin', 'kernel_end']
# # Create List to allocate chunks of data
# A_cut_gpu, B_cut_gpu, C_cut_gpu = [None] * number_of_datasets, [None] * number_of_datasets, [
# None] * number_of_datasets
# A_cut, B_cut, C_cut = [None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
#
# a_gpu, b_gpu, c_gpu, d_gpu = [None] * number_of_datasets, [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets
# a_normal_gpu, b_normal_gpu, c_normal_gpu, d_normal_gpu = [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
#
# a_cut, b_cut, c_cut, d_cut = [None] * number_of_datasets, [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets
# a_normal_cut, b_normal_cut, c_normal_cut, d_normal_cut = [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
#
# a_cf_cut, b_cf_cut, c_cf_cut, d_cf_cut = [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
#
# a_cut_gpu, b_cut_gpu, c_cut_gpu, d_cut_gpu = [None] * number_of_datasets, [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets
#
# a_cut_normal_gpu, b_cut_normal_gpu, c_cut_normal_gpu, d_cut_normal_gpu = [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
#
# a_cf_cut_gpu, b_cf_cut_gpu, c_cf_cut_gpu, d_cf_cut_gpu = [None] * number_of_datasets, [
# None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
#
# time_factor_cut_gpu = [None] * number_of_datasets
# sum_vor_gpu = [None] * number_of_datasets
# sum_vor_cut = [None] * number_of_datasets
# probability_cut = [None] * number_of_datasets
# probability_gpu = [None] * number_of_datasets
# adjust_coef_cut = [None] * number_of_datasets
# adjust_coef_gpu = [None] * number_of_datasets
# adjust_coef_pinned = [None] * number_of_datasets_back
# fov_cut_matrix_cutted_gpu = [None] * number_of_datasets_back
# fov_cut_matrix_cut = [None] * number_of_datasets
# # fov_cut_matrix_gpu = [None] * number_of_datasets
# sum_vor_pinned = [None] * number_of_datasets
#
# distance_to_center_plane_cut = [None] * number_of_datasets
# distance_to_center_plane_gpu_cut = [None] * number_of_datasets
# distance_to_center_plane_normal_cut = [None] * number_of_datasets
# distance_to_center_plane_normal_gpu_cut = [None] * number_of_datasets
#
# # Streams and Events creation
# for dataset in range(number_of_datasets):
# stream.append(cuda.Stream())
# event.append(dict([(marker_names[l], cuda.Event()) for l in range(len(marker_names))]))
#
# # Foward Projection Memory Allocation
# # Variables that need an unique alocation
# # A_shappened = np.ascontiguousarray(A.reshape(A.shape[0]*A.shape[1]*A.shape[2]), dtype=np.int32)
# # B_shappened = np.ascontiguousarray(B.reshape(B.shape[0]*B.shape[1]*B.shape[2]), dtype=np.int32)
# # C_shappened = np.ascontiguousarray(C.reshape(C.shape[0]*C.shape[1]*C.shape[2]), dtype=np.int32)
# im_shappened = np.ascontiguousarray(im.reshape(im.shape[0] * im.shape[1] * im.shape[2]), dtype=np.float32)
# fov_cut_matrix_shappened = np.ascontiguousarray(
# fov_cut_matrix.reshape(fov_cut_matrix.shape[0] * fov_cut_matrix.shape[1] * fov_cut_matrix.shape[2]),
# dtype=np.byte)
#
# # forward_projection_arrays_page_locked_memory_allocations = [A_gpu, B_gpu, C_gpu, im_gpu]
#
# # A_gpu = cuda.mem_alloc(A_shappened.size * A_shappened.dtype.itemsize)
# # B_gpu = cuda.mem_alloc(B_shappened.size * B_shappened.dtype.itemsize)
# # C_gpu = cuda.mem_alloc(C_shappened.size * C_shappened.dtype.itemsize)
# im_gpu = cuda.mem_alloc(im_shappened.size * im_shappened.dtype.itemsize)
# fov_cut_matrix_gpu = cuda.mem_alloc(fov_cut_matrix_shappened.size * fov_cut_matrix_shappened.dtype.itemsize)
# # texref = mod_forward_projection_shared_mem.get_texref('tex')
# # texref.set_address(fov_cut_matrix_gpu, fov_cut_matrix_shappened.size * fov_cut_matrix_shappened.dtype.itemsize)
# # # texref.set_format(cuda.array_format.UNSIGNED_INT8, 1)
#
# # cuda.memcpy_htod_async(A_gpu, A_shappened)
# # cuda.memcpy_htod_async(B_gpu, B_shappened)
# # cuda.memcpy_htod_async(C_gpu, C_shappened)
# cuda.memcpy_htod_async(im_gpu, im_shappened)
# cuda.memcpy_htod_async(fov_cut_matrix_gpu, fov_cut_matrix_shappened)
#
# a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)
# b_gpu = cuda.mem_alloc(b.size * b.dtype.itemsize)
# c_gpu = cuda.mem_alloc(c.size * c.dtype.itemsize)
# d_gpu = cuda.mem_alloc(d.size * d.dtype.itemsize)
# a_normal_gpu = cuda.mem_alloc(a_normal.size * a_normal.dtype.itemsize)
# b_normal_gpu = cuda.mem_alloc(b_normal.size * b_normal.dtype.itemsize)
# c_normal_gpu = cuda.mem_alloc(c_normal.size * c_normal.dtype.itemsize)
# d_normal_gpu = cuda.mem_alloc(d_normal.size * d_normal.dtype.itemsize)
#
# a_cf_gpu = cuda.mem_alloc(a_cf.size * a_cf.dtype.itemsize)
# b_cf_gpu = cuda.mem_alloc(b_cf.size * b_cf.dtype.itemsize)
# c_cf_gpu = cuda.mem_alloc(c_cf.size * c_cf.dtype.itemsize)
# d_cf_gpu = cuda.mem_alloc(d_cf.size * d_cf.dtype.itemsize)
# sum_vor_t_gpu = cuda.mem_alloc(sum_vor.size * sum_vor.dtype.itemsize)
# distance_to_center_plane_gpu = cuda.mem_alloc(
# self.distance_to_center_plane.size * self.distance_to_center_plane.dtype.itemsize)
# distance_to_center_plane_normal_gpu = cuda.mem_alloc(
# self.distance_to_center_plane_normal.size * self.distance_to_center_plane_normal.dtype.itemsize)
#
# time_factor_gpu = cuda.mem_alloc(time_factor.size * time_factor.dtype.itemsize)
# # Transfer memory to Optimizer
# cuda.memcpy_htod_async(a_gpu, a)
# cuda.memcpy_htod_async(b_gpu, b)
# cuda.memcpy_htod_async(c_gpu, c)
# cuda.memcpy_htod_async(d_gpu, d)
# cuda.memcpy_htod_async(a_normal_gpu, a_normal)
# cuda.memcpy_htod_async(b_normal_gpu, b_normal)
# cuda.memcpy_htod_async(c_normal_gpu, c_normal)
# cuda.memcpy_htod_async(d_normal_gpu, d_normal)
#
# cuda.memcpy_htod_async(a_cf_gpu, a_cf)
# cuda.memcpy_htod_async(b_cf_gpu, b_cf)
# cuda.memcpy_htod_async(c_cf_gpu, c_cf)
# cuda.memcpy_htod_async(d_cf_gpu, d_cf)
# cuda.memcpy_htod_async(time_factor_gpu, time_factor)
# cuda.memcpy_htod_async(distance_to_center_plane_gpu, self.distance_to_center_plane)
# cuda.memcpy_htod_async(distance_to_center_plane_normal_gpu, self.distance_to_center_plane_normal)
#
# for dataset in range(number_of_datasets):
# # if dataset == number_of_datasets:
# # begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# # end_dataset = number_of_events
# # adjust_coef_cut[dataset] = np.ascontiguousarray(
# # adjust_coef[int(np.floor(im_cut_dim[0] * dataset)):adjust_coef.shape[0],
# # int(np.floor(im_cut_dim[1] * dataset)):adjust_coef.shape[1],
# # int(np.floor(im_cut_dim[2] * dataset)):adjust_coef.shape[2]],
# # dtype=np.float32)
# # fov_cut_matrix_cut[dataset] = np.ascontiguousarray(
# # fov_cut_matrix[int(np.floor(im_cut_dim[0] * dataset)):fov_cut_matrix.shape[0],
# # int(np.floor(im_cut_dim[1] * dataset)):fov_cut_matrix.shape[1],
# # int(np.floor(im_cut_dim[2] * dataset)):fov_cut_matrix.shape[2]],
# # dtype=np.float32)
# # else:
# begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets)
#
# # Cutting dataset
# # For forward projection the data is cutted by number of events. For backprojection is cutted per image pieces of image
# a_cut[dataset] = a[begin_dataset:end_dataset]
# b_cut[dataset] = b[begin_dataset:end_dataset]
# c_cut[dataset] = c[begin_dataset:end_dataset]
# d_cut[dataset] = d[begin_dataset:end_dataset]
# a_normal_cut[dataset] = a_normal[begin_dataset:end_dataset]
# b_normal_cut[dataset] = b_normal[begin_dataset:end_dataset]
# c_normal_cut[dataset] = c_normal[begin_dataset:end_dataset]
# d_normal_cut[dataset] = d_normal[begin_dataset:end_dataset]
#
# a_cf_cut[dataset] = a_cf[begin_dataset:end_dataset]
# b_cf_cut[dataset] = b_cf[begin_dataset:end_dataset]
# c_cf_cut[dataset] = c_cf[begin_dataset:end_dataset]
# d_cf_cut[dataset] = d_cf[begin_dataset:end_dataset]
# sum_vor_cut[dataset] = sum_vor[begin_dataset:end_dataset]
# distance_to_center_plane_cut[dataset] = self.distance_to_center_plane[begin_dataset:end_dataset]
# distance_to_center_plane_normal_cut[dataset] = self.distance_to_center_plane_normal[
# begin_dataset:end_dataset]
#
# # Forward
# a_cut_gpu[dataset] = cuda.mem_alloc(a_cut[dataset].size * a_cut[dataset].dtype.itemsize)
# b_cut_gpu[dataset] = cuda.mem_alloc(b_cut[dataset].size * b_cut[dataset].dtype.itemsize)
# c_cut_gpu[dataset] = cuda.mem_alloc(c_cut[dataset].size * c_cut[dataset].dtype.itemsize)
# d_cut_gpu[dataset] = cuda.mem_alloc(d_cut[dataset].size * d_cut[dataset].dtype.itemsize)
# a_cut_normal_gpu[dataset] = cuda.mem_alloc(
# a_normal_cut[dataset].size * a_normal_cut[dataset].dtype.itemsize)
# b_cut_normal_gpu[dataset] = cuda.mem_alloc(
# b_normal_cut[dataset].size * b_normal_cut[dataset].dtype.itemsize)
# c_cut_normal_gpu[dataset] = cuda.mem_alloc(
# c_normal_cut[dataset].size * c_normal_cut[dataset].dtype.itemsize)
# d_cut_normal_gpu[dataset] = cuda.mem_alloc(
# d_normal_cut[dataset].size * d_normal_cut[dataset].dtype.itemsize)
#
# a_cf_cut_gpu[dataset] = cuda.mem_alloc(a_cf_cut[dataset].size * a_cf_cut[dataset].dtype.itemsize)
# b_cf_cut_gpu[dataset] = cuda.mem_alloc(b_cf_cut[dataset].size * b_cf_cut[dataset].dtype.itemsize)
# c_cf_cut_gpu[dataset] = cuda.mem_alloc(c_cf_cut[dataset].size * c_cf_cut[dataset].dtype.itemsize)
# d_cf_cut_gpu[dataset] = cuda.mem_alloc(d_cf_cut[dataset].size * d_cf_cut[dataset].dtype.itemsize)
#
# distance_to_center_plane_gpu_cut[dataset] = cuda.mem_alloc(
# distance_to_center_plane_cut[dataset].size * distance_to_center_plane_cut[dataset].dtype.itemsize)
# distance_to_center_plane_normal_gpu_cut[dataset] = cuda.mem_alloc(
# distance_to_center_plane_normal_cut[dataset].size * distance_to_center_plane_normal_cut[
# dataset].dtype.itemsize)
#
# sum_vor_gpu[dataset] = cuda.mem_alloc(sum_vor_cut[dataset].size * sum_vor_cut[dataset].dtype.itemsize)
#
# sum_vor_pinned[dataset] = cuda.register_host_memory(sum_vor_cut[dataset])
# assert np.all(sum_vor_pinned[dataset] == sum_vor_cut[dataset])
# cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_pinned[dataset], stream[dataset])
# # sum_vor_gpu[dataset] = np.intp(x.base.get_device_pointer())
# # cuda.memcpy_htod_async(probability_gpu[dataset], probability_cut[dataset])
# cuda.memcpy_htod_async(a_cut_gpu[dataset], a_cut[dataset])
# cuda.memcpy_htod_async(b_cut_gpu[dataset], b_cut[dataset])
# cuda.memcpy_htod_async(c_cut_gpu[dataset], c_cut[dataset])
# cuda.memcpy_htod_async(d_cut_gpu[dataset], d_cut[dataset])
# cuda.memcpy_htod_async(a_cut_normal_gpu[dataset], a_normal_cut[dataset])
# cuda.memcpy_htod_async(b_cut_normal_gpu[dataset], b_normal_cut[dataset])
# cuda.memcpy_htod_async(c_cut_normal_gpu[dataset], c_normal_cut[dataset])
# cuda.memcpy_htod_async(d_cut_normal_gpu[dataset], d_normal_cut[dataset])
#
# cuda.memcpy_htod_async(a_cf_cut_gpu[dataset], a_cf_cut[dataset])
# cuda.memcpy_htod_async(b_cf_cut_gpu[dataset], b_cf_cut[dataset])
# cuda.memcpy_htod_async(c_cf_cut_gpu[dataset], c_cf_cut[dataset])
# cuda.memcpy_htod_async(d_cf_cut_gpu[dataset], d_cf_cut[dataset])
# cuda.memcpy_htod_async(distance_to_center_plane_gpu_cut[dataset], distance_to_center_plane_cut[dataset])
# cuda.memcpy_htod_async(distance_to_center_plane_normal_gpu_cut[dataset],
# distance_to_center_plane_normal_cut[dataset])
#
# adjust_coef = np.ascontiguousarray(adjust_coef.reshape(
# adjust_coef.shape[0] * adjust_coef.shape[1] * adjust_coef.shape[2]),
# dtype=np.float32)
# # fov_cut_matrix = np.ascontiguousarray(fov_cut_matrix.reshape(
# # fov_cut_matrix.shape[0] * fov_cut_matrix.shape[1] * fov_cut_matrix.shape[2]),
# # dtype=np.float32)
# A = np.ascontiguousarray(A.reshape(
# A.shape[0] * A.shape[1] * A.shape[2]),
# dtype=np.short)
# B = np.ascontiguousarray(B.reshape(
# B.shape[0] * B.shape[1] * B.shape[2]),
# dtype=np.short)
# C = np.ascontiguousarray(C.reshape(
# C.shape[0] * C.shape[1] * C.shape[2]),
# dtype=np.short)
#
# # ---- Divide into datasets variables backprojection
# for dataset in range(number_of_datasets_back):
# voxels_division = adjust_coef.shape[0] // number_of_datasets_back
# adjust_coef_cut[dataset] = np.ascontiguousarray(
# adjust_coef[dataset * voxels_division:(dataset + 1) * voxels_division],
# dtype=np.float32)
#
# fov_cut_matrix_cut[dataset] = np.ascontiguousarray(
# fov_cut_matrix_shappened[dataset * voxels_division:(dataset + 1) * voxels_division],
# dtype=np.byte)
#
# A_cut[dataset] = np.ascontiguousarray(
# A[dataset * voxels_division:(dataset + 1) * voxels_division],
# dtype=np.short)
#
# B_cut[dataset] = np.ascontiguousarray(
# B[dataset * voxels_division:(dataset + 1) * voxels_division],
# dtype=np.short)
#
# C_cut[dataset] = np.ascontiguousarray(
# C[dataset * voxels_division:(dataset + 1) * voxels_division],
# dtype=np.short)
# # Backprojection
# adjust_coef_gpu[dataset] = cuda.mem_alloc(
# adjust_coef_cut[dataset].size * adjust_coef_cut[dataset].dtype.itemsize)
#
# adjust_coef_pinned[dataset] = cuda.register_host_memory(adjust_coef_cut[dataset])
# assert np.all(adjust_coef_pinned[dataset] == adjust_coef_cut[dataset])
# cuda.memcpy_htod_async(adjust_coef_gpu[dataset], adjust_coef_pinned[dataset], stream[dataset])
#
# fov_cut_matrix_cutted_gpu[dataset] = cuda.mem_alloc(
# fov_cut_matrix_cut[dataset].size * fov_cut_matrix_cut[dataset].dtype.itemsize)
#
# A_cut_gpu[dataset] = cuda.mem_alloc(
# A_cut[dataset].size * A_cut[dataset].dtype.itemsize)
#
# B_cut_gpu[dataset] = cuda.mem_alloc(
# B_cut[dataset].size * B_cut[dataset].dtype.itemsize)
#
# C_cut_gpu[dataset] = cuda.mem_alloc(
# C_cut[dataset].size * C_cut[dataset].dtype.itemsize)
#
# cuda.memcpy_htod_async(adjust_coef_gpu[dataset], adjust_coef_cut[dataset])
# cuda.memcpy_htod_async(fov_cut_matrix_cutted_gpu[dataset], fov_cut_matrix_cut[dataset])
# cuda.memcpy_htod_async(A_cut_gpu[dataset], A_cut[dataset])
# cuda.memcpy_htod_async(B_cut_gpu[dataset], B_cut[dataset])
# cuda.memcpy_htod_async(C_cut_gpu[dataset], C_cut[dataset])
#
# free, total = cuda.mem_get_info()
#
# print('%.1f %% of device memory is free.' % ((free / float(total)) * 100))
#
# # -------------OSEM---------
# it = self.number_of_iterations
# subsets = self.number_of_subsets
# print('Number events for reconstruction: {}'.format(number_of_events))
#
# im = np.ascontiguousarray(im.reshape(im.shape[0] * im.shape[1] * im.shape[2]), dtype=np.float32)
# for i in range(it):
# print('Iteration number: {}\n----------------'.format(i + 1))
# begin_event = np.int32(0)
# end_event = np.int32(number_of_events / subsets)
# for sb in range(subsets):
# print('Subset number: {}'.format(sb))
# number_of_events_subset = np.int32(end_event - begin_event)
# tic = time.time()
# # Cycle forward Projection
# for dataset in range(number_of_datasets):
#
# if dataset == number_of_datasets:
# begin_dataset = np.int32(dataset * number_of_events_subset / number_of_datasets)
# end_dataset = number_of_events_subset
# else:
# begin_dataset = np.int32(dataset * number_of_events_subset / number_of_datasets)
# end_dataset = np.int32((dataset + 1) * number_of_events_subset / number_of_datasets)
#
# threadsperblock = (256, 1, 1)
# blockspergrid_x = int(math.ceil(((end_dataset - begin_dataset)) / threadsperblock[0]))
# blockspergrid_y = int(math.ceil(1 / threadsperblock[1]))
# blockspergrid_z = int(math.ceil(1 / threadsperblock[2]))
# blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)
# event[dataset]['kernel_begin'].record(stream[dataset])
# # depth = np.int32(5)
# # weight = np.int32(A.shape[2]/2)
# # height = np.int32(A.shape[2]/2)
# if self.cdrf:
# func_forward = mod_forward_projection_shared_mem_cdrf.get_function("forward_projection_cdrf")
# func_forward(weight, height, depth, start_x, start_y, start_z,
# distance_to_center_plane_normal_gpu_cut[dataset],
# distance_to_center_plane_gpu_cut[dataset],
# half_distance_between_array_pixel,
# number_of_events, begin_dataset, end_dataset, a_cut_gpu[dataset],
# a_cut_normal_gpu[dataset], a_cf_cut_gpu[dataset],
# b_cut_gpu[dataset], b_cut_normal_gpu[dataset], b_cf_cut_gpu[dataset],
# c_cut_gpu[dataset], c_cut_normal_gpu[dataset],
# c_cf_cut_gpu[dataset],
# d_cut_gpu[dataset],
# d_cut_normal_gpu[dataset], d_cf_cut_gpu[dataset],
# sum_vor_gpu[dataset], fov_cut_matrix_gpu, im_gpu,
# block=threadsperblock,
# grid=blockspergrid,
# stream=stream[dataset])
# else:
# func_forward = mod_forward_projection_shared_mem.get_function("forward_projection")
# func_forward(weight, height, depth, start_x, start_y, start_z, half_crystal_pitch_xy,
# half_crystal_pitch_z,
# half_distance_between_array_pixel,
# number_of_events, begin_dataset, end_dataset, a_cut_gpu[dataset],
# a_cut_normal_gpu[dataset], a_cf_cut_gpu[dataset],
# b_cut_gpu[dataset], b_cut_normal_gpu[dataset], b_cf_cut_gpu[dataset],
# c_cut_gpu[dataset], c_cut_normal_gpu[dataset],
# c_cf_cut_gpu[dataset],
# d_cut_gpu[dataset],
# d_cut_normal_gpu[dataset], d_cf_cut_gpu[dataset],
# sum_vor_gpu[dataset], fov_cut_matrix_gpu, im_gpu,
# block=threadsperblock,
# grid=blockspergrid,
# stream=stream[dataset])
#
# # Sincronization of streams
# for dataset in range(number_of_datasets): # Commenting out this line should break concurrency.
# event[dataset]['kernel_end'].record(stream[dataset])
#
# # Transfering data from Optimizer
# for dataset in range(number_of_datasets):
# begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets)
# # cuda.memcpy_dtoh_async(, sum_vor_gpu[dataset])
# cuda.memcpy_dtoh_async(sum_vor_pinned[dataset], sum_vor_gpu[dataset], stream[dataset])
# sum_vor[begin_dataset:end_dataset] = sum_vor_pinned[dataset]
# # cuda.cudaStream.Synchronize(stream[dataset])
#
# toc = time.time()
#
# cuda.Context.synchronize()
#
# print('Time part Forward Projection {} : {}'.format(1, toc - tic))
# # number_of_datasets = np.int32(2)
# # teste = np.copy(sum_vor)
# # # sum_vor[sum_vor<1]=0
# # sum_vor = np.ascontiguousarray(teste, dtype=np.float32)
# # sum_vor=np.ascontiguousarray(np.ones((self.a.shape)), dtype=np.float32)
# print('SUM VOR: {}'.format(np.sum(sum_vor)))
# # cuda.memcpy_htod_async(sum_vor_t_gpu, sum_vor)
#
# cuda.memcpy_htod_async(sum_vor_t_gpu, sum_vor)
#
# # ------------BACKPROJECTION-----------
#
# for dataset in range(number_of_datasets_back):
# dataset = np.int32(dataset)
# begin_dataset = np.int32(0)
# end_dataset = np.int32(number_of_events_subset)
#
# # begin_dataset = np.int32(0)
# # end_dataset = np.int32(number_of_events)
#
# event[dataset]['kernel_begin'].record(stream[dataset])
# # weight_cutted, height_cutted, depth_cutted = np.int32(adjust_coef_cut[dataset].shape[0]), np.int32(
# # adjust_coef_cut[dataset].shape[1]), np.int32(adjust_coef_cut[dataset].shape[2])
# weight_cutted, height_cutted, depth_cutted = np.int32(adjust_coef_cut[dataset].shape[0]), np.int32(
# 1), np.int32(1)
#
# number_of_voxels_thread = 128
# threadsperblock = (np.int(number_of_voxels_thread), 1, 1)
# blockspergrid_x = int(math.ceil(adjust_coef_cut[dataset].shape[0] / threadsperblock[0]))
# blockspergrid_y = int(math.ceil(1 / threadsperblock[1]))
# blockspergrid_z = int(math.ceil(1 / threadsperblock[2]))
# # blockspergrid_y = int(math.ceil(adjust_coef_cut[dataset].shape[1] / threadsperblock[1]))
# # blockspergrid_z = int(math.ceil(adjust_coef_cut[dataset].shape[2] / threadsperblock[2]))
# blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)
# shared_memory = threadsperblock[0] * threadsperblock[1] * threadsperblock[2] * 4
# if self.cdrf:
# if self.normalization_calculation_flag:
# func_backward = mod_normalization_shared_mem_cdrf.get_function("normalization_cdrf")
#
#
# else:
# func_backward = mod_backward_projection_shared_mem_cdrf.get_function("backprojection_cdrf")
# func_backward(dataset, weight_cutted, height_cutted, depth_cutted,
# distance_to_center_plane_normal_gpu,
# distance_to_center_plane_gpu, half_distance_between_array_pixel,
# number_of_events, begin_dataset, end_dataset, a_gpu, a_normal_gpu, a_cf_gpu,
# b_gpu, b_normal_gpu, b_cf_gpu, c_gpu, c_normal_gpu, c_cf_gpu, d_gpu,
# d_normal_gpu, d_cf_gpu, A_cut_gpu[dataset], B_cut_gpu[dataset],
# C_cut_gpu[dataset],
# adjust_coef_gpu[dataset],
# sum_vor_t_gpu, fov_cut_matrix_cutted_gpu[dataset], time_factor_gpu,
# block=threadsperblock,
# grid=blockspergrid,
# shared=np.int(4 * number_of_voxels_thread),
# stream=stream[dataset],
# )
# else:
# if self.normalization_calculation_flag:
# func_backward = mod_normalization_shared_mem.get_function("normalization")
#
#
# else:
# func_backward = mod_backward_projection_shared_mem.get_function("backprojection")
#
# func_backward(dataset, weight_cutted, height_cutted, depth_cutted, half_crystal_pitch_xy,
# half_crystal_pitch_z, half_distance_between_array_pixel,
# number_of_events, begin_dataset, end_dataset, a_gpu, a_normal_gpu, a_cf_gpu,
# b_gpu, b_normal_gpu, b_cf_gpu, c_gpu, c_normal_gpu, c_cf_gpu, d_gpu,
# d_normal_gpu, d_cf_gpu, A_cut_gpu[dataset], B_cut_gpu[dataset],
# C_cut_gpu[dataset],
# adjust_coef_gpu[dataset],
# sum_vor_t_gpu, fov_cut_matrix_cutted_gpu[dataset], time_factor_gpu,
# block=threadsperblock,
# grid=blockspergrid,
# shared=np.int(4 * number_of_voxels_thread),
# stream=stream[dataset],
# )
#
# for dataset in range(number_of_datasets_back): # Commenting out this line should break concurrency.
# event[dataset]['kernel_end'].record(stream[dataset])
#
# for dataset in range(number_of_datasets_back):
# cuda.memcpy_dtoh_async(adjust_coef_cut[dataset], adjust_coef_gpu[dataset])
# adjust_coef[dataset * voxels_division:(dataset + 1) * voxels_division] = adjust_coef_cut[dataset]
#
# cuda.Context.synchronize()
# print('Time part Backward Projection {} : {}'.format(1, time.time() - toc))
#
# # Image Normalization
# # if i ==0:
# # norm_im=np.copy(adjust_coef)
# # norm_im=norm_im/np.max(norm_im)
# # norm_im[norm_im == 0] = np.min(norm_im[np.nonzero(norm_im)])
# # normalization_matrix = gaussian_filter(normalization_matrix, 0.5)
#
# # im_med = np.load("C:\\Users\\pedro.encarnacao\\OneDrive - Universidade de Aveiro\\PhD\\Reconstrução\\NAF+FDG\\Easypet Scan 05 Aug 2019 - 14h 36m 33s\\static_image\\Easypet Scan 05 Aug 2019 - 14h 36m 33s mlem.npy")
# # self.algorithm = "LM-MRP"
# if self.algorithm == "LM-MRP":
# beta = self.algorithm_options[0]
# kernel_filter_size = self.algorithm_options[1]
# im_to_filter = im.reshape(weight, height, depth)
# im_med = median_filter(im_to_filter, kernel_filter_size)
# penalized_term = np.copy(im_to_filter)
# penalized_term[im_med != 0] = 1 + beta * (im_to_filter[im_med != 0] - im_med[im_med != 0]) / im_med[
# im_med != 0]
# penalized_term = np.ascontiguousarray(penalized_term.reshape(weight * height * depth),
# dtype=np.float32)
# # penalized_term = np.ascontiguousarray(penalized_term, dtype=np.float32)
#
# if self.algorithm == "MAP":
# beta = 0.5
# im_map = np.load(
# "C:\\Users\\pedro.encarnacao\\OneDrive - Universidade de Aveiro\\PhD\\Reconstrução\\NAF+FDG\\Easypet Scan 05 Aug 2019 - 14h 36m 33s\\static_image\\Easypet Scan 05 Aug 2019 - 14h 36m 33s mlem.npy")
#
# im[normalization_matrix != 0] = im[normalization_matrix != 0] * adjust_coef[
# normalization_matrix != 0] / (normalization_matrix[normalization_matrix != 0])
# im[normalization_matrix == 0] = 0
# if self.algorithm == "LM-MRP":
# im[penalized_term != 0] = im[penalized_term != 0] / penalized_term[penalized_term != 0]
# # im = fourier_gaussian(im, sigma=0.2)
# # im = gaussian_filter(im, 0.4)
# print('SUM IMAGE: {}'.format(np.sum(im)))
# im = np.ascontiguousarray(im, dtype=np.float32)
# # im = im * adjust_coef / sensivity_matrix[np.nonzero(sensivity_matrix)]
# cuda.memcpy_htod_async(im_gpu, im)
#
# # Clearing variables
# sum_vor = np.ascontiguousarray(
# np.zeros(self.a.shape, dtype=np.float32))
#
# adjust_coef = np.ascontiguousarray(
# np.zeros((self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z),
# dtype=np.float32))
#
# for dataset in range(number_of_datasets):
# # if dataset == number_of_datasets:
# # begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# # end_dataset = number_of_events
# #
# # else:
# begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets)
# # adjust_coef_cut[dataset] = np.ascontiguousarray(adjust_coef[:, :,
# # int(np.floor(im_cut_dim[2] * dataset)):int(
# # np.floor(im_cut_dim[2] * (dataset + 1)))],
# # dtype=np.float32)
#
# sum_vor_cut[dataset] = sum_vor[begin_dataset:end_dataset]
# # cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_cut[dataset])
# sum_vor_pinned[dataset] = cuda.register_host_memory(sum_vor_cut[dataset])
# assert np.all(sum_vor_pinned[dataset] == sum_vor_cut[dataset])
# cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_pinned[dataset], stream[dataset])
#
# for dataset in range(number_of_datasets_back):
# adjust_coef_cut[dataset] = adjust_coef[dataset * voxels_division:(dataset + 1) * voxels_division]
# adjust_coef_pinned[dataset] = cuda.register_host_memory(adjust_coef_cut[dataset])
# assert np.all(adjust_coef_pinned[dataset] == adjust_coef_cut[dataset])
# cuda.memcpy_htod_async(adjust_coef_gpu[dataset], adjust_coef_pinned[dataset], stream[dataset])
#
# if self.saved_image_by_iteration:
# im = im.reshape(weight, height, depth)
# self._save_image_by_it(im, i, sb)
#
# if self.signals_interface is not None:
# self.signals_interface.trigger_update_label_reconstruction_status.emit(
# "{}: Iteration {}".format(self.current_info_step, i + 1))
# self.signals_interface.trigger_progress_reconstruction_partial.emit(
# int(np.round(100 * (i + 1) * (sb + subsets) / (it * subsets), 0)))
#
# im = im.reshape(weight, height, depth)
# return im * subsets
def _vor_design_gpu_shared_memory_multiple_reads_DOI(self, a, a_normal, a_cf, A, b, b_normal, b_cf, B, c, c_normal,
c_cf, C,
d, d_normal, d_cf, adjust_coef, im, half_crystal_pitch_xy,
half_crystal_pitch_z,
sum_vor, fov_cut_matrix, normalization_matrix, time_factor):
print('Optimizer STARTED - Multiple reads')
# cuda.init()
cuda = self.cuda_drv
# device = cuda.Device(0) # enter your gpu id here
# ctx = device.make_context()
number_of_events = np.int32(len(a))
weight = np.int32(A.shape[0])
height = np.int32(A.shape[1])
depth = np.int32(A.shape[2])
# start_x = np.int32(A[0, 0, 0])
start_x = np.int32(A[0, 0, 0])
start_y = np.int32(B[0, 0, 0])
start_z = np.int32(C[0, 0, 0])
print("Start_point: {},{},{}".format(start_x, start_y, start_z))
print('Image size: {},{}, {}'.format(weight, height, depth))
half_distance_between_array_pixel = np.float32(self.distance_between_array_pixel / 2)
normalization_matrix = normalization_matrix.reshape(
normalization_matrix.shape[0] * normalization_matrix.shape[1] * normalization_matrix.shape[2])
# SOURCE MODELS (DEVICE CODE)
mod_forward_projection_shared_mem = SourceModule("""
#include <stdint.h>
texture<char, 1> tex;
__global__ void forward_projection
(const int n, const int m, const int p, const int start_x, const int start_y, const int start_z,
const float crystal_pitch_XY,const float crystal_pitch_Z,const float distance_between_array_pixel,
int number_of_events, const int begin_event_gpu_limitation, const int end_event_gpu_limitation,
float *a, float *a_normal, float *a_cf, float *b,float *b_normal, float *b_cf,float *c,float *c_normal,
float *c_cf, float *d, float *d_normal, float *d_cf, float *sum_vor, const char *fov_cut_matrix, const float *im_old)
{
const int shared_memory_size = 256;
__shared__ float a_shared[shared_memory_size];
__shared__ float b_shared[shared_memory_size];
__shared__ float c_shared[shared_memory_size];
__shared__ float d_shared[shared_memory_size];
__shared__ float a_normal_shared[shared_memory_size];
__shared__ float b_normal_shared[shared_memory_size];
__shared__ float c_normal_shared[shared_memory_size];
__shared__ float d_normal_shared[shared_memory_size];
__shared__ float a_cf_shared[shared_memory_size];
__shared__ float b_cf_shared[shared_memory_size];
__shared__ float c_cf_shared[shared_memory_size];
__shared__ float d_cf_shared[shared_memory_size];
__shared__ float sum_vor_shared[shared_memory_size];
__shared__ char fov_cut_matrix_shared[shared_memory_size];
/*
const int blockId = blockIdx.x+ blockIdx.y * gridDim.x+ gridDim.x * gridDim.y * blockIdx.z;
const int threadId= blockId * (blockDim.x * blockDim.y * blockDim.z)+ (threadIdx.z * (blockDim.x * blockDim.y))+ (threadIdx.y * blockDim.x)+ threadIdx.x;
*/
int threadId=blockIdx.x *blockDim.x + threadIdx.x;
int e;
float d2;
float d2_normal;
float d2_cf;
float value;
float value_normal;
float value_cf;
float sum_vor_temp;
int index;
int x_t;
int y_t;
int z_t;
float max_distance_projector;
const int number_events_max = end_event_gpu_limitation-begin_event_gpu_limitation;
const float error_pixel = 0.0000f;
if (threadIdx.x>shared_memory_size)
{
return;
}
if(threadId>number_events_max)
{
return;
}
__syncthreads();
e = threadId;
int e_m = threadIdx.x;
a_shared[e_m] = a[e];
b_shared[e_m] = b[e];
c_shared[e_m] = c[e];
d_shared[e_m] = d[e];
a_normal_shared[e_m] = a_normal[e];
b_normal_shared[e_m] = b_normal[e];
c_normal_shared[e_m] = c_normal[e];
d_normal_shared[e_m] = d_normal[e];
a_cf_shared[e_m] = a_cf[e];
b_cf_shared[e_m] = b_cf[e];
c_cf_shared[e_m] = c_cf[e];
d_cf_shared[e_m] = d_cf[e];
sum_vor_shared[e_m] = sum_vor[e];
d2_normal = crystal_pitch_XY * sqrt(a_normal_shared[e_m]*a_normal_shared[e_m]+b_normal_shared[e_m]*b_normal_shared[e_m]+c_normal_shared[e_m]*c_normal_shared[e_m]);
d2 = crystal_pitch_Z * sqrt(a_shared[e_m]*a_shared[e_m] + b_shared[e_m]*b_shared[e_m] + c_shared[e_m]*c_shared[e_m]);
d2_cf = distance_between_array_pixel*sqrt(a_cf_shared[e_m]*a_cf_shared[e_m]+b_cf_shared[e_m]*b_cf_shared[e_m]+c_cf_shared[e_m]*c_cf_shared[e_m]);
max_distance_projector=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf);
for(int l=0; l<n; l++)
{
x_t = l+start_x;
for(int j=0; j<m; j++)
{
y_t = j+start_y;
for(int k=0; k<p; k++)
{
/*
index = l+j*n+k*m*n;
fov_cut_matrix_shared[k]=fov_cut_matrix[k+j*p+l*p*m];
index = l+j*n+k*m*n;
*/
index = k+j*p+l*p*m;
if (fov_cut_matrix[index]!=0)
{
z_t = k+start_z;
value_normal = a_normal_shared[e_m]*x_t+b_normal_shared[e_m]*y_t+c_normal_shared[e_m] * z_t -d_normal_shared[e_m];
if (value_normal < d2_normal && value_normal >= -d2_normal)
{
value = a_shared[e_m]*x_t+b_shared[e_m]*y_t+c_shared[e_m]*z_t-d_shared[e_m];
if (value < d2 && value >=-d2 )
{
value_cf = a_cf_shared[e_m]*x_t+b_cf_shared[e_m]*y_t+c_cf_shared[e_m]*z_t-d_cf_shared[e_m];
if (value_cf >= -d2_cf && value_cf<d2_cf)
{
sum_vor_temp +=im_old[index]*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector);
/* im_old[index]*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector)
*/
}
}
}
}
}
}
}
/*
sum_vor[e]= sum_vor_temp;
sum_vor[e]= sum_vor_shared[e_m];
__syncthreads();
sum_vor[e]= sum_vor_shared[e_m];
*/
sum_vor[e]= sum_vor_temp;
}""")
mod_normalization_shared_mem = SourceModule("""
#include <stdint.h>
texture<uint8_t, 1> tex;
__global__ void normalization
( int dataset_number, int n, int m, int p, const float crystal_pitch_XY, const float crystal_pitch_Z,
const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
const int end_event_gpu_limitation, const float *a, const float *a_normal, const float *a_cf, const float *b,
const float *b_normal, const float *b_cf, const float *c, const float *c_normal, const float *c_cf, const float *d, const float *d_normal,
const float *d_cf, const short *A, const short *B, const short *C, float *adjust_coef, float *sum_vor,
char *fov_cut_matrix, float *time_factor)
{
extern __shared__ float adjust_coef_shared[];
int idt=blockIdx.x *blockDim.x + threadIdx.x;
float d2;
float d2_normal;
float d2_cf;
float normal_value;
float value;
float value_cf;
short a_temp;
short b_temp;
short c_temp;
char fov_cut_temp;
int i_s=threadIdx.x;
if (idt>n*m*p)
{
return;
}
__syncthreads();
adjust_coef_shared[i_s] = adjust_coef[idt];
a_temp = A[idt];
b_temp = B[idt];
c_temp = C[idt];
fov_cut_temp = fov_cut_matrix[idt];
for(int e=begin_event_gpu_limitation; e<end_event_gpu_limitation; e++)
{
if (fov_cut_temp!=0)
{
normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
d2_normal = crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
if (normal_value< d2_normal && normal_value >= -d2_normal)
{
value = a[e]*a_temp+b[e]*b_temp +c[e]*c_temp- d[e];
d2 = crystal_pitch_Z * sqrt(a[e]*a[e] + b[e]*b[e] + c[e]*c[e]);
if (value < d2 && value >=-d2)
{
value_cf = a_cf[e]*a_temp+b_cf[e]*b_temp +c_cf[e] * c_temp-d_cf[e];
d2_cf = distance_between_array_pixel*sqrt(a_cf[e]*a_cf[e]+b_cf[e]*b_cf[e]+c_cf[e]*c_cf[e]);
if (value_cf >= -d2_cf && value_cf<d2_cf)
{
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf));
}
/*
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf));
adjust_coef_shared[i_s] += 1/sum_vor[e];
normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
*/
}
}
}
}
adjust_coef[idt] = adjust_coef_shared[i_s];
__syncthreads();
}
""")
mod_backward_projection_shared_mem = SourceModule("""
#include <stdint.h>
texture<uint8_t, 1> tex;
__global__ void backprojection
( int dataset_number, int n, int m, int p, const float crystal_pitch_XY, const float crystal_pitch_Z,
const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
const int end_event_gpu_limitation, const float *a, const float *a_normal, const float *a_cf, const float *b,
const float *b_normal, const float *b_cf, const float *c, const float *c_normal, const float *c_cf, const float *d, const float *d_normal,
const float *d_cf, const short *A, const short *B, const short *C, float *adjust_coef, float *sum_vor,
char *fov_cut_matrix, float *time_factor)
{
extern __shared__ float adjust_coef_shared[];
int idt=blockIdx.x *blockDim.x + threadIdx.x;
float d2;
float d2_normal;
float d2_cf;
float normal_value;
float value;
float value_cf;
short a_temp;
short b_temp;
short c_temp;
char fov_cut_temp;
int i_s=threadIdx.x;
if (idt>n*m*p)
{
return;
}
__syncthreads();
adjust_coef_shared[i_s] = adjust_coef[idt];
a_temp = A[idt];
b_temp = B[idt];
c_temp = C[idt];
fov_cut_temp = fov_cut_matrix[idt];
for(int e=begin_event_gpu_limitation; e<end_event_gpu_limitation; e++)
{
if (fov_cut_temp!=0)
{
normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
d2_normal = crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
if (normal_value< d2_normal && normal_value >= -d2_normal)
{
value = a[e]*a_temp+b[e]*b_temp +c[e]*c_temp- d[e];
d2 = crystal_pitch_Z * sqrt(a[e]*a[e] + b[e]*b[e] + c[e]*c[e]);
if (value < d2 && value >=-d2)
{
value_cf = a_cf[e]*a_temp+b_cf[e]*b_temp +c_cf[e] * c_temp-d_cf[e];
d2_cf = distance_between_array_pixel*sqrt(a_cf[e]*a_cf[e]+b_cf[e]*b_cf[e]+c_cf[e]*c_cf[e]);
if (value_cf >= -d2_cf && value_cf<d2_cf)
{
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]*time_factor[e]);
}
/*
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]*time_factor[e]);
adjust_coef_shared[i_s] += 1/sum_vor[e];
normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
*/
}
}
}
}
adjust_coef[idt] = adjust_coef_shared[i_s];
__syncthreads();
}
""")
mod_forward_projection_shared_mem_cdrf = SourceModule("""
#include <stdint.h>
texture<char, 1> tex;
__global__ void forward_projection_cdrf
(const int n, const int m, const int p, const int start_x, const int start_y, const int start_z,
float *crystal_pitch_XY, float *crystal_pitch_Z,const float distance_between_array_pixel,
int number_of_events, const int begin_event_gpu_limitation, const int end_event_gpu_limitation,
float *a, float *a_normal, float *a_cf, float *b,float *b_normal, float *b_cf,float *c,float *c_normal,
float *c_cf, float *d, float *d_normal, float *d_cf, float *sum_vor, const char *fov_cut_matrix,
const float *im_old, float* m_values, float* b_values, float* m_values_at, float* b_values_at,
float* max_D, float* inflex_points_x, float* linear_attenuation_A,
float* linear_attenuation_B)
{
const int shared_memory_size = 256;
__shared__ float a_shared[shared_memory_size];
__shared__ float b_shared[shared_memory_size];
__shared__ float c_shared[shared_memory_size];
__shared__ float d_shared[shared_memory_size];
__shared__ float a_normal_shared[shared_memory_size];
__shared__ float b_normal_shared[shared_memory_size];
__shared__ float c_normal_shared[shared_memory_size];
__shared__ float d_normal_shared[shared_memory_size];
__shared__ float a_cf_shared[shared_memory_size];
__shared__ float b_cf_shared[shared_memory_size];
__shared__ float c_cf_shared[shared_memory_size];
__shared__ float d_cf_shared[shared_memory_size];
__shared__ float sum_vor_shared[shared_memory_size];
__shared__ char fov_cut_matrix_shared[shared_memory_size];
__shared__ float crystal_pitch_Z_shared[shared_memory_size];
__shared__ float crystal_pitch_XY_shared[shared_memory_size];
__shared__ float m_values_init_shared[shared_memory_size];
__shared__ float m_values_end_shared[shared_memory_size];
__shared__ float b_values_init_shared[shared_memory_size];
__shared__ float b_values_end_shared[shared_memory_size];
__shared__ float m_values_at_shared[shared_memory_size];
__shared__ float b_values_at_shared[shared_memory_size];
__shared__ float max_D_shared[shared_memory_size];
__shared__ float inflex_points_x_init_shared[shared_memory_size];
__shared__ float inflex_points_x_end_shared[shared_memory_size];
__shared__ float linear_attenuation_A_shared[shared_memory_size];
__shared__ float linear_attenuation_B_shared[shared_memory_size];
/*
const int blockId = blockIdx.x+ blockIdx.y * gridDim.x+ gridDim.x * gridDim.y * blockIdx.z;
const int threadId= blockId * (blockDim.x * blockDim.y * blockDim.z)+ (threadIdx.z * (blockDim.x * blockDim.y))+ (threadIdx.y * blockDim.x)+ threadIdx.x;
*/
int threadId=blockIdx.x *blockDim.x + threadIdx.x;
int e;
float d2;
float d2_normal;
float d2_cf;
float value;
float value_normal;
float value_cf;
float sum_vor_temp;
int index;
int x_t;
int y_t;
int z_t;
float width;
float height;
float distance;
float distance_other;
float distance_crystal;
float distance_at;
float idrf;
float max_distance_projector;
const int number_events_max = end_event_gpu_limitation-begin_event_gpu_limitation;
const float error_pixel = 0.0000f;
if (threadIdx.x>shared_memory_size)
{
return;
}
if(threadId>number_events_max)
{
return;
}
__syncthreads();
e = threadId;
int e_m = threadIdx.x;
a_shared[e_m] = a[e];
b_shared[e_m] = b[e];
c_shared[e_m] = c[e];
d_shared[e_m] = d[e];
a_normal_shared[e_m] = a_normal[e];
b_normal_shared[e_m] = b_normal[e];
c_normal_shared[e_m] = c_normal[e];
d_normal_shared[e_m] = d_normal[e];
a_cf_shared[e_m] = a_cf[e];
b_cf_shared[e_m] = b_cf[e];
c_cf_shared[e_m] = c_cf[e];
d_cf_shared[e_m] = d_cf[e];
sum_vor_shared[e_m] = sum_vor[e];
crystal_pitch_Z_shared[e_m] = crystal_pitch_Z[e];
crystal_pitch_XY_shared[e_m] = crystal_pitch_XY[e];
m_values_init_shared[e_m] = m_values[2*e];
m_values_end_shared[e_m] = m_values[2*e+1];
b_values_init_shared[e_m] = b_values[2*e];
b_values_end_shared[e_m] = b_values[2*e+1];
m_values_at_shared[e_m] = m_values_at[e];
b_values_at_shared[e_m] = b_values_at[e];
max_D_shared[e_m] = max_D[e];
inflex_points_x_init_shared[e_m] = inflex_points_x[2*e];
inflex_points_x_end_shared[e_m] = inflex_points_x[2*e+1];
linear_attenuation_A_shared[e_m] = linear_attenuation_A[e];
linear_attenuation_B_shared[e_m] = linear_attenuation_B[e];
d2_normal = crystal_pitch_XY_shared[e_m] * sqrt(a_normal_shared[e_m]*a_normal_shared[e_m]+b_normal_shared[e_m]*b_normal_shared[e_m]+c_normal_shared[e_m]*c_normal_shared[e_m]);
d2 = crystal_pitch_Z_shared[e_m]* sqrt(a_shared[e_m]*a_shared[e_m] + b_shared[e_m]*b_shared[e_m] + c_shared[e_m]*c_shared[e_m]);
d2_cf = distance_between_array_pixel*sqrt(a_cf_shared[e_m]*a_cf_shared[e_m]+b_cf_shared[e_m]*b_cf_shared[e_m]+c_cf_shared[e_m]*c_cf_shared[e_m]);
max_distance_projector=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf);
for(int l=0; l<n; l++)
{
x_t = l+start_x;
for(int j=0; j<m; j++)
{
y_t = j+start_y;
for(int k=0; k<p; k++)
{
/*
index = l+j*n+k*m*n;
fov_cut_matrix_shared[k]=fov_cut_matrix[k+j*p+l*p*m];
index = l+j*n+k*m*n;
*/
index = k+j*p+l*p*m;
if (fov_cut_matrix[index]!=0)
{
z_t = k+start_z;
value_normal = a_normal_shared[e_m]*x_t+b_normal_shared[e_m]*y_t+c_normal_shared[e_m] * z_t -d_normal_shared[e_m];
if (value_normal < d2_normal && value_normal >= -d2_normal)
{
value = a_shared[e_m]*x_t+b_shared[e_m]*y_t+c_shared[e_m]*z_t-d_shared[e_m];
if (value < d2 && value >=-d2 )
{
value_cf = a_cf_shared[e_m]*x_t+b_cf_shared[e_m]*y_t+c_cf_shared[e_m]*z_t-d_cf_shared[e_m];
if (value_cf >= -d2_cf && value_cf<d2_cf)
{
if (sqrt(value*value+value_normal*value_normal+value_cf*value_cf)<=max_distance_projector)
{
width = 2*(crystal_pitch_Z_shared[e_m] - abs(value));
height = 2*(crystal_pitch_XY_shared[e_m] - abs(value_normal));
distance =d2_cf+abs(value_cf);
distance_other = abs(d2_cf+value_cf);
distance_at = m_values_at_shared[e_m]*value+b_values_at_shared[e_m];
if(value<=inflex_points_x_init_shared[e_m])
{
distance_crystal = m_values_init_shared[e_m]*value+b_values_init_shared[e_m];
distance_at = 0;
}
else if(value>=inflex_points_x_end_shared[e_m])
{
distance_crystal = m_values_end_shared[e_m]*value+b_values_end_shared[e_m];
}
else
{
distance_crystal = max_D_shared[e_m];
}
idrf=(1-exp(-linear_attenuation_A_shared[e_m]*distance_crystal))*exp(-linear_attenuation_A_shared[e_m]*distance_at);
if (idrf<0)
{
idrf = 0;
}
sum_vor_shared[e_m]+= idrf*im_old[index];
}
/*
pow(4*atan(width*height/(2*distance*sqrt(4*distance*distance+width*width+height*height))),2)
sum_vor_shared[e_m]+= im_old[index]*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector);
4*arctan(
4*np.arctan(width*height/(2*distance_to_anh*np.sqrt(4*distance_between_array_pixel-value_cf)**2+width**2+height**2)))
*(1-sqrt(value*value+value_normal*value_normal+value_cf*value_cf)/max_distance_projector)
*/
}
}
}
}
}
}
}
/*
sum_vor[e]= sum_vor_temp;
sum_vor[e]= sum_vor_shared[e_m];
sum_vor[e]= sum_vor_temp;
*/
__syncthreads();
sum_vor[e]= sum_vor_shared[e_m];
}""")
mod_normalization_shared_mem_cdrf = SourceModule("""
#include <stdint.h>
texture<uint8_t, 1> tex;
__device__ float* three_plane_intersection(float plane1A, float plane1B,
float plane1C, float plane1D, float plane2A, float plane2B, float plane2C,
float plane2D, float plane3A, float plane3B, float plane3C, float plane3D);
__device__ float intersection_determinant(float matrix[3][3]);
__device__ float point_distance_to_plane(float *point, float A, float B, float C, float D);
__global__ void normalization_cdrf
(int dataset_number, int n, int m, int p, float* crystal_pitch_XY, float* crystal_pitch_Z,
const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
const int end_event_gpu_limitation, const float* a, const float* a_normal, const float* a_cf, const float* b,
const float* b_normal, const float* b_cf, const float* c, const float* c_normal, const float* c_cf, const float* d, const float* d_normal,
const float* d_cf, const short* A, const short* B, const short* C, float* adjust_coef, float* sum_vor,
char* fov_cut_matrix, float* time_factor, float* plane_centerA1_A,
float* plane_centerA1_B, float* plane_centerA1_C, float* plane_centerA1_D,
float* plane_centerB1_A, float* plane_centerB1_B, float* plane_centerB1_C,
float* plane_centerB1_D, float* plane_centerC1_A, float* plane_centerC1_B,
float* plane_centerC1_C, float* plane_centerC1_D, float* intersection_points, float* m_values, float* m_values_at,
float* b_values, float* b_values_at, float* max_D, float* inflex_points_x, float* linear_attenuation_A,
float* linear_attenuation_B)
{
extern __shared__ float adjust_coef_shared[];
int idt = blockIdx.x * blockDim.x + threadIdx.x;
float d2;
float d2_normal;
float d2_cf;
float normal_value;
float value;
float value_cf;
short a_temp;
short b_temp;
short c_temp;
char fov_cut_temp;
int i_s = threadIdx.x;
float width;
float height;
float distance;
float solid_angle;
float face_1_distance_to_center;
float face_2_distance_to_center;
float face_3_distance_to_center;
float* p1;
float* p2;
float* p3;
float* p4;
float* p5;
float* p6;
float dist_p1;
float distance_crystal;
float distance_at;
float idrf;
if (idt >= n * m * p)
{
return;
}
__syncthreads();
adjust_coef_shared[i_s] = adjust_coef[idt];
a_temp = A[idt];
b_temp = B[idt];
c_temp = C[idt];
fov_cut_temp = fov_cut_matrix[idt];
for (int e = begin_event_gpu_limitation; e < end_event_gpu_limitation; e++)
{
if (fov_cut_temp != 0)
{
normal_value = a_normal[e] * a_temp + b_normal[e] * b_temp + c_normal[e] * c_temp - d_normal[e];
d2_normal = crystal_pitch_XY[e] * sqrt(a_normal[e] * a_normal[e] + b_normal[e] * b_normal[e] + c_normal[e] * c_normal[e]);
if (normal_value < d2_normal && normal_value >= -d2_normal)
{
value = a[e] * a_temp + b[e] * b_temp + c[e] * c_temp - d[e];
d2 = crystal_pitch_Z[e] * sqrt(a[e] * a[e] + b[e] * b[e] + c[e] * c[e]);
if (value < d2 && value >= -d2)
{
value_cf = a_cf[e] * a_temp + b_cf[e] * b_temp + c_cf[e] * c_temp - d_cf[e];
d2_cf = distance_between_array_pixel * sqrt(a_cf[e] * a_cf[e] + b_cf[e] * b_cf[e] + c_cf[e] * c_cf[e]);
if (value_cf >= -d2_cf && value_cf < d2_cf)
{
width = 2 * (crystal_pitch_Z[e] - abs(value));
height = 2 * (crystal_pitch_XY[e] - abs(normal_value));
distance = d2_cf + abs(value_cf);
/*
face_1_distance_to_center = 1.0f * sqrt(plane_centerA1_A[e] * plane_centerA1_A[e] + plane_centerA1_B[e] * plane_centerA1_B[e] + plane_centerA1_C[e] * plane_centerA1_C[e]);
face_2_distance_to_center = 1.0f * sqrt(plane_centerB1_A[e] * plane_centerB1_A[e] + plane_centerB1_B[e] * plane_centerB1_B[e] + plane_centerB1_C[e] * plane_centerB1_C[e]);
face_3_distance_to_center = 15.0f * sqrt(plane_centerC1_A[e] * plane_centerC1_A[e] + plane_centerC1_B[e] * plane_centerC1_B[e] + plane_centerC1_C[e] * plane_centerC1_C[e]);
p1 = three_plane_intersection(a[e],
b[e], c[e], d[e],
a_normal[e], b_normal[e], c_normal[e],
d_normal[e], plane_centerA1_A[e], plane_centerA1_B[e],
plane_centerA1_C[e], plane_centerA1_D[e]+face_1_distance_to_center);
p2 = three_plane_intersection(a[e],
b[e], c[e], d[e],
a_normal[e], b_normal[e], c_normal[e],
d_normal[e], plane_centerA1_A[e], plane_centerA1_B[e],
plane_centerA1_C[e], plane_centerA1_D[e]-face_1_distance_to_center);
p3 = three_plane_intersection(a[e],
b[e], c[e], d[e],
a_normal[e], b_normal[e], c_normal[e],
d_normal[e], plane_centerB1_A[e], plane_centerB1_B[e],
plane_centerB1_C[e], plane_centerB1_D[e]+face_2_distance_to_center);
p4 = three_plane_intersection(a[e],
b[e], c[e], d[e],
a_normal[e], b_normal[e], c_normal[e],
d_normal[e], plane_centerB1_A[e], plane_centerB1_B[e],
plane_centerB1_C[e], plane_centerB1_D[e]-face_2_distance_to_center);
p5 = three_plane_intersection(a[e],
b[e], c[e], d[e],
a_normal[e], b_normal[e], c_normal[e],
d_normal[e], plane_centerC1_A[e], plane_centerC1_B[e],
plane_centerC1_C[e], plane_centerC1_D[e]+face_3_distance_to_center);
p6 = three_plane_intersection(a[e],
b[e], c[e], d[e],
a_normal[e], b_normal[e], c_normal[e],
d_normal[e], plane_centerC1_A[e], plane_centerC1_B[e],
plane_centerC1_C[e], plane_centerC1_D[e]-face_3_distance_to_center);
intersection_points[e*6] = point_distance_to_plane(p1, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
dist_p2 = point_distance_to_plane(p2, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
printf(" Distance p1: %f", intersection_points[e*6]);
dist_p2 = point_distance_to_plane(p2, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
dist_p3 = point_distance_to_plane(p3, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
dist_p4 = point_distance_to_plane(p4, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
dist_p5 = point_distance_to_plane(p5, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
dist_p6 = point_distance_to_plane(p6, a_cf[e], b_cf[e],c_cf[e], d_cf[e]);
printf(" Distance p1: %f", dist_p1);
printf(" x: %f, y: %f, z: %f ",p1[0], p1[1], p1[2]);
printf("----------------");
printf(" x_2: %f, y_2: %f, z_2: %f ",p2[0], p2[1], p2[2]);
*/
distance_at = m_values_at[e]*value+b_values_at[e];
if(value<=inflex_points_x[2*e])
{
distance_crystal = m_values[2*e]*value+b_values[2*e];
distance_at = 0;
}
else if(value>=inflex_points_x[2*e+1])
{
distance_crystal = m_values[2*e+1]*value+b_values[2*e+1];
}
else
{
distance_crystal = max_D[e];
}
distance_crystal = max_D[e];
idrf = (1-exp(-linear_attenuation_A[e]*distance_crystal))*exp(-linear_attenuation_A[e]*distance_at);
solid_angle = 4 * width * height / (2 * distance * sqrt(4 * distance * distance + width * width + height * height));
adjust_coef_shared[i_s] += idrf/(sum_vor[e]);
/*
else if(value<=inflex_points_x[2*e+1])
{
distance_crystal = m_values[2*e+1]*value+b_values[2*e+1];
}
printf(" x_2: %f, distance: %f, att: %f ",idrf,distance_crystal, linear_attenuation_A[e] );
if (sqrt(value*value+normal_value*normal_value+value_cf*value_cf)<=sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))
{
adjust_coef_shared[i_s] += time_factor[e]*(1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]);
}
*/
}
/*
(1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/
normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
*/
}
}
}
}
adjust_coef[idt] = adjust_coef_shared[i_s];
__syncthreads();
}
__device__ float point_distance_to_plane(float* point, float A, float B, float C, float D)
{
float distance;
distance = abs(A*point[0]+B*point[1]+C*point[2]-D)/sqrt(A*A+B*B+C*C);
return distance;
}
__device__ float* three_plane_intersection(float plane1A, float plane1B,
float plane1C, float plane1D, float plane2A, float plane2B, float plane2C,
float plane2D, float plane3A, float plane3B, float plane3C, float plane3D)
{
float m_det[3][3];
float m_x[3][3];
float m_y[3][3];
float m_z[3][3];
float det;
float det_x;
float det_y;
float det_z;
float result[3];
m_det[0][0] = plane1A;
m_det[0][1] = plane1B;
m_det[0][2] = plane1C;
m_det[1][0] = plane2A;
m_det[1][1] = plane2B;
m_det[1][2] = plane2C;
m_det[2][0] = plane3A;
m_det[2][1] = plane3B;
m_det[2][2] = plane3C;
m_x[0][0] = plane1D;
m_x[0][1] = plane1B;
m_x[0][2] = plane1C;
m_x[1][0] = plane2D;
m_x[1][1] = plane2B;
m_x[1][2] = plane2C;
m_x[2][0] = plane3D;
m_x[2][1] = plane3B;
m_x[2][2] = plane3C;
m_y[0][0] = plane1A;
m_y[0][1] = plane1D;
m_y[0][2] = plane1C;
m_y[1][0] = plane2A;
m_y[1][1] = plane2D;
m_y[1][2] = plane2C;
m_y[2][0] = plane3A;
m_y[2][1] = plane3D;
m_y[2][2] = plane3C;
m_z[0][0] = plane1A;
m_z[0][1] = plane1B;
m_z[0][2] = plane1D;
m_z[1][0] = plane2A;
m_z[1][1] = plane2B;
m_z[1][2] = plane2D;
m_z[2][0] = plane3A;
m_z[2][1] = plane3B;
m_z[2][2] = plane3D;
det = intersection_determinant(m_det);
det_x = intersection_determinant(m_x);
det_y = intersection_determinant(m_y);
det_z = intersection_determinant(m_z);
if (det != 0.0f)
{
result[0] = det_x / det;
result[1] = det_y / det;
result[2] = det_z / det;
}
return result;
}
__device__ float intersection_determinant(float matrix[3][3])
{
float a;
float b;
float c;
float d;
float e;
float f;
float g;
float h;
float i;
float det;
a = matrix[0][0];
b = matrix[0][1];
c = matrix[0][2];
d = matrix[1][0];
e = matrix[1][1];
f = matrix[1][2];
g = matrix[2][0];
h = matrix[2][1];
i = matrix[2][2];
det = (a * e * i + b * f * g + c * d * h) - (a * f * h + b * d * i + c * e * g);
return det;
}
""")
mod_backward_projection_shared_mem_cdrf = SourceModule("""
#include <stdint.h>
texture<uint8_t, 1> tex;
__global__ void backprojection_cdrf
(int dataset_number, int n, int m, int p, const float* crystal_pitch_XY, const float* crystal_pitch_Z,
const float distance_between_array_pixel, int number_of_events, const int begin_event_gpu_limitation,
const int end_event_gpu_limitation, const float* a, const float* a_normal, const float* a_cf, const float* b,
const float* b_normal, const float* b_cf, const float* c, const float* c_normal, const float* c_cf, const float* d, const float* d_normal,
const float* d_cf, const short* A, const short* B, const short* C, float* adjust_coef, float* sum_vor,
char* fov_cut_matrix, float* time_factor, float* m_values, float* m_values_at,
float* b_values, float* b_values_at, float* max_D, float* inflex_points_x, float* linear_attenuation_A,
float* linear_attenuation_B)
{
extern __shared__ float adjust_coef_shared[];
int idt = blockIdx.x * blockDim.x + threadIdx.x;
float d2;
float d2_normal;
float d2_cf;
float normal_value;
float value;
float value_cf;
short a_temp;
short b_temp;
short c_temp;
char fov_cut_temp;
int i_s = threadIdx.x;
float width;
float height;
float distance;
float distance_other;
float solid_angle;
float distance_crystal;
float distance_at;
float idrf;
if (idt >= n * m * p)
{
return;
}
__syncthreads();
adjust_coef_shared[i_s] = adjust_coef[idt];
a_temp = A[idt];
b_temp = B[idt];
c_temp = C[idt];
fov_cut_temp = fov_cut_matrix[idt];
for (int e = begin_event_gpu_limitation; e < end_event_gpu_limitation; e++)
{
if (fov_cut_temp != 0)
{
normal_value = a_normal[e] * a_temp + b_normal[e] * b_temp + c_normal[e] * c_temp - d_normal[e];
d2_normal = crystal_pitch_XY[e] * sqrt(a_normal[e] * a_normal[e] + b_normal[e] * b_normal[e] + c_normal[e] * c_normal[e]);
if (normal_value < d2_normal && normal_value >= -d2_normal)
{
value = a[e] * a_temp + b[e] * b_temp + c[e] * c_temp - d[e];
d2 = crystal_pitch_Z[e] * sqrt(a[e] * a[e] + b[e] * b[e] + c[e] * c[e]);
if (value < d2 && value >= -d2)
{
value_cf = a_cf[e] * a_temp + b_cf[e] * b_temp + c_cf[e] * c_temp - d_cf[e];
d2_cf = distance_between_array_pixel * sqrt(a_cf[e] * a_cf[e] + b_cf[e] * b_cf[e] + c_cf[e] * c_cf[e]);
if (value_cf >= -d2_cf && value_cf < d2_cf)
{
if (sqrt(value * value + normal_value * normal_value + value_cf * value_cf) <= sqrt(d2 * d2 + d2_normal * d2_normal + d2_cf * d2_cf))
{
width = 2 * (crystal_pitch_Z[e] - abs(value));
height = 2 * (crystal_pitch_XY[e] - abs(normal_value));
distance = d2_cf + abs(value_cf);
distance_other = d2_cf - abs(value_cf);
distance_at = m_values_at[e] * value + b_values_at[e];
if (value <= inflex_points_x[2 * e])
{
distance_crystal = m_values[2 * e] * value + b_values[2 * e];
distance_at = 0;
}
else if (value > inflex_points_x[2 * e + 1])
{
distance_crystal = m_values[2 * e + 1] * value + b_values[2 * e + 1];
}
else
{
distance_crystal = max_D[e];
}
idrf = (1 - exp(-linear_attenuation_A[e] * distance_crystal)) * exp(-linear_attenuation_A[e] * distance_at);
if (idrf < 0)
{
idrf = 0;
}
solid_angle = 4 * width * height / (2 * distance * sqrt(4 * distance * distance + width * width + height * height));
if (sum_vor[e]!=0)
{
adjust_coef_shared[i_s] += idrf / sum_vor[e];
}
/*
(4 * asin(sin(tan(width/distance))*sin(tan(height/distance))))*(4 * asin(sin(tan(width/distance))*sin(tan(height/distance))))/sum_vor[e];
adjust_coef_shared[i_s] += time_factor[e]*(1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[e]);
*/
}
}
/*
(1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/
normal_value= a_normal[e]*a_temp+b_normal[e]*b_temp+c_normal[e] * c_temp-d_normal[e];
d2_normal = error_pixel+crystal_pitch_XY * sqrt(a_normal[e]*a_normal[e]+b_normal[e]*b_normal[e]+c_normal[e]*c_normal[e]);
adjust_coef_shared[i_s] += /(sum_vor[e]*time_factor[e]);
adjust_coef_shared[i_s] += (1-(sqrt(value*value+normal_value*normal_value+value_cf*value_cf))/sqrt(d2*d2+d2_normal*d2_normal+d2_cf*d2_cf))/(sum_vor[0]*time_factor[e]);
*/
}
}
}
}
adjust_coef[idt] = adjust_coef_shared[i_s];
__syncthreads();
}
""")
# float crystal_pitch, int number_of_events, float *a, float *b, float *c, float *d, int *A, int *B, int *C, float *im, float *vector_matrix
# Host Code B, C, im, vector_matrix,
number_of_datasets = np.int32(1) # Number of datasets (and concurrent operations) used.
number_of_datasets_back = np.int32(1) # Number of datasets (and concurrent operations) used.
# Start concurrency Test
# Event as reference point
ref = cuda.Event()
ref.record()
# Create the streams and events needed to calculation
stream, event = [], []
marker_names = ['kernel_begin', 'kernel_end']
# Create List to allocate chunks of data
A_cut_gpu, B_cut_gpu, C_cut_gpu = [None] * number_of_datasets, [None] * number_of_datasets, [
None] * number_of_datasets
A_cut, B_cut, C_cut = [None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
a_gpu, b_gpu, c_gpu, d_gpu = [None] * number_of_datasets, [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets
a_normal_gpu, b_normal_gpu, c_normal_gpu, d_normal_gpu = [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
a_cut, b_cut, c_cut, d_cut = [None] * number_of_datasets, [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets
a_normal_cut, b_normal_cut, c_normal_cut, d_normal_cut = [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
a_cf_cut, b_cf_cut, c_cf_cut, d_cf_cut = [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
a_cut_gpu, b_cut_gpu, c_cut_gpu, d_cut_gpu = [None] * number_of_datasets, [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets
a_cut_normal_gpu, b_cut_normal_gpu, c_cut_normal_gpu, d_cut_normal_gpu = [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
a_cf_cut_gpu, b_cf_cut_gpu, c_cf_cut_gpu, d_cf_cut_gpu = [None] * number_of_datasets, [
None] * number_of_datasets, [None] * number_of_datasets, [None] * number_of_datasets
time_factor_cut_gpu = [None] * number_of_datasets
sum_vor_gpu = [None] * number_of_datasets
sum_vor_cut = [None] * number_of_datasets
adjust_coef_cut = [None] * number_of_datasets
adjust_coef_gpu = [None] * number_of_datasets
adjust_coef_pinned = [None] * number_of_datasets_back
fov_cut_matrix_cutted_gpu = [None] * number_of_datasets_back
fov_cut_matrix_cut = [None] * number_of_datasets
# fov_cut_matrix_gpu = [None] * number_of_datasets
sum_vor_pinned = [None] * number_of_datasets
distance_to_center_plane_cut = [None] * number_of_datasets
distance_to_center_plane_gpu_cut = [None] * number_of_datasets
distance_to_center_plane_normal_cut = [None] * number_of_datasets
distance_to_center_plane_normal_gpu_cut = [None] * number_of_datasets
plane_centerA1_cut = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerA1_gpu_cut = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerB1_cut = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerB1_gpu_cut = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerC1_cut = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerC1_gpu_cut = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerA1_gpu = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerB1_gpu = [None] * len(self.crystal_central_planes.plane_centerA1)
plane_centerC1_gpu = [None] * len(self.crystal_central_planes.plane_centerA1)
m_values_listMode_cut = [None] * number_of_datasets
m_values_at_listMode_cut = [None] * number_of_datasets
b_values_listMode_cut = [None] * number_of_datasets
b_values_at_listMode_cut = [None] * number_of_datasets
max_D_listMode_cut = [None] * number_of_datasets
inflex_points_x_listMode_cut = [None] * number_of_datasets
linear_attenuation_crystal_A_listMode_cut = [None] * number_of_datasets
linear_attenuation_crystal_B_listMode_cut = [None] * number_of_datasets
m_values_gpu_cut = [None] * number_of_datasets
b_values_gpu_cut = [None] * number_of_datasets
m_values_at_gpu_cut = [None] * number_of_datasets
b_values_at_gpu_cut = [None] * number_of_datasets
max_D_gpu_cut = [None] * number_of_datasets
inflex_points_x_gpu_cut = [None] * number_of_datasets
linear_attenuation_A_gpu_cut = [None] * number_of_datasets
linear_attenuation_B_gpu_cut = [None] * number_of_datasets
intersection_points = np.ascontiguousarray(np.zeros((number_of_events, 6)), dtype=np.float32)
intersection_points_gpu = cuda.mem_alloc(intersection_points.size
* intersection_points.dtype.itemsize)
cuda.memcpy_htod_async(intersection_points_gpu, intersection_points)
for i in range(len(self.crystal_central_planes.plane_centerA1)):
plane_centerA1_cut[i] = [None] * number_of_datasets
plane_centerA1_gpu_cut[i] = [None] * number_of_datasets
plane_centerB1_cut[i] = [None] * number_of_datasets
plane_centerB1_gpu_cut[i] = [None] * number_of_datasets
plane_centerC1_cut[i] = [None] * number_of_datasets
plane_centerC1_gpu_cut[i] = [None] * number_of_datasets
plane_centerA1_gpu[i] = cuda.mem_alloc(self.crystal_central_planes.plane_centerA1[i].size
* self.crystal_central_planes.plane_centerA1[i].dtype.itemsize)
plane_centerB1_gpu[i] = cuda.mem_alloc(self.crystal_central_planes.plane_centerB1[i].size
* self.crystal_central_planes.plane_centerB1[i].dtype.itemsize)
plane_centerC1_gpu[i] = cuda.mem_alloc(self.crystal_central_planes.plane_centerC1[i].size
* self.crystal_central_planes.plane_centerC1[i].dtype.itemsize)
cuda.memcpy_htod_async(plane_centerA1_gpu[i], self.crystal_central_planes.plane_centerA1[i])
cuda.memcpy_htod_async(plane_centerB1_gpu[i], self.crystal_central_planes.plane_centerB1[i])
cuda.memcpy_htod_async(plane_centerC1_gpu[i], self.crystal_central_planes.plane_centerC1[i])
# Streams and Events creation
for dataset in range(number_of_datasets):
stream.append(cuda.Stream())
event.append(dict([(marker_names[l], cuda.Event()) for l in range(len(marker_names))]))
# Foward Projection Memory Allocation
# Variables that need an unique alocation
# A_shappened = np.ascontiguousarray(A.reshape(A.shape[0]*A.shape[1]*A.shape[2]), dtype=np.int32)
# B_shappened = np.ascontiguousarray(B.reshape(B.shape[0]*B.shape[1]*B.shape[2]), dtype=np.int32)
# C_shappened = np.ascontiguousarray(C.reshape(C.shape[0]*C.shape[1]*C.shape[2]), dtype=np.int32)
im_shappened = np.ascontiguousarray(im.reshape(im.shape[0] * im.shape[1] * im.shape[2]), dtype=np.float32)
fov_cut_matrix_shappened = np.ascontiguousarray(
fov_cut_matrix.reshape(fov_cut_matrix.shape[0] * fov_cut_matrix.shape[1] * fov_cut_matrix.shape[2]),
dtype=np.byte)
# forward_projection_arrays_page_locked_memory_allocations = [A_gpu, B_gpu, C_gpu, im_gpu]
# A_gpu = cuda.mem_alloc(A_shappened.size * A_shappened.dtype.itemsize)
# B_gpu = cuda.mem_alloc(B_shappened.size * B_shappened.dtype.itemsize)
# C_gpu = cuda.mem_alloc(C_shappened.size * C_shappened.dtype.itemsize)
im_gpu = cuda.mem_alloc(im_shappened.size * im_shappened.dtype.itemsize)
fov_cut_matrix_gpu = cuda.mem_alloc(fov_cut_matrix_shappened.size * fov_cut_matrix_shappened.dtype.itemsize)
# texref = mod_forward_projection_shared_mem.get_texref('tex')
# texref.set_address(fov_cut_matrix_gpu, fov_cut_matrix_shappened.size * fov_cut_matrix_shappened.dtype.itemsize)
# # texref.set_format(cuda.array_format.UNSIGNED_INT8, 1)
# cuda.memcpy_htod_async(A_gpu, A_shappened)
# cuda.memcpy_htod_async(B_gpu, B_shappened)
# cuda.memcpy_htod_async(C_gpu, C_shappened)
cuda.memcpy_htod_async(im_gpu, im_shappened)
cuda.memcpy_htod_async(fov_cut_matrix_gpu, fov_cut_matrix_shappened)
a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)
b_gpu = cuda.mem_alloc(b.size * b.dtype.itemsize)
c_gpu = cuda.mem_alloc(c.size * c.dtype.itemsize)
d_gpu = cuda.mem_alloc(d.size * d.dtype.itemsize)
a_normal_gpu = cuda.mem_alloc(a_normal.size * a_normal.dtype.itemsize)
b_normal_gpu = cuda.mem_alloc(b_normal.size * b_normal.dtype.itemsize)
c_normal_gpu = cuda.mem_alloc(c_normal.size * c_normal.dtype.itemsize)
d_normal_gpu = cuda.mem_alloc(d_normal.size * d_normal.dtype.itemsize)
a_cf_gpu = cuda.mem_alloc(a_cf.size * a_cf.dtype.itemsize)
b_cf_gpu = cuda.mem_alloc(b_cf.size * b_cf.dtype.itemsize)
c_cf_gpu = cuda.mem_alloc(c_cf.size * c_cf.dtype.itemsize)
d_cf_gpu = cuda.mem_alloc(d_cf.size * d_cf.dtype.itemsize)
sum_vor_t_gpu = cuda.mem_alloc(sum_vor.size * sum_vor.dtype.itemsize)
distance_to_center_plane_gpu = cuda.mem_alloc(
self.distance_to_center_plane.size * self.distance_to_center_plane.dtype.itemsize)
distance_to_center_plane_normal_gpu = cuda.mem_alloc(
self.distance_to_center_plane_normal.size * self.distance_to_center_plane_normal.dtype.itemsize)
time_factor_gpu = cuda.mem_alloc(time_factor.size * time_factor.dtype.itemsize)
# Transfer memory to Optimizer
cuda.memcpy_htod_async(a_gpu, a)
cuda.memcpy_htod_async(b_gpu, b)
cuda.memcpy_htod_async(c_gpu, c)
cuda.memcpy_htod_async(d_gpu, d)
cuda.memcpy_htod_async(a_normal_gpu, a_normal)
cuda.memcpy_htod_async(b_normal_gpu, b_normal)
cuda.memcpy_htod_async(c_normal_gpu, c_normal)
cuda.memcpy_htod_async(d_normal_gpu, d_normal)
cuda.memcpy_htod_async(a_cf_gpu, a_cf)
cuda.memcpy_htod_async(b_cf_gpu, b_cf)
cuda.memcpy_htod_async(c_cf_gpu, c_cf)
cuda.memcpy_htod_async(d_cf_gpu, d_cf)
cuda.memcpy_htod_async(time_factor_gpu, time_factor)
cuda.memcpy_htod_async(distance_to_center_plane_gpu, self.distance_to_center_plane)
cuda.memcpy_htod_async(distance_to_center_plane_normal_gpu, self.distance_to_center_plane_normal)
m_values_gpu = cuda.mem_alloc(
self.doi_mapping.m_values_listMode.size * self.doi_mapping.m_values_listMode.dtype.itemsize)
b_values_gpu = cuda.mem_alloc(
self.doi_mapping.b_values_listMode.size * self.doi_mapping.b_values_listMode.dtype.itemsize)
m_values_at_gpu = cuda.mem_alloc(
self.doi_mapping.m_values_at_listMode.size * self.doi_mapping.m_values_at_listMode.dtype.itemsize)
b_values_at_gpu = cuda.mem_alloc(
self.doi_mapping.b_values_at_listMode.size * self.doi_mapping.b_values_at_listMode.dtype.itemsize)
max_D_gpu = cuda.mem_alloc(
self.doi_mapping.max_D_listMode.size * self.doi_mapping.max_D_listMode.dtype.itemsize)
inflex_points_x_gpu = cuda.mem_alloc(
self.doi_mapping.inflex_points_x_listMode.size * self.doi_mapping.inflex_points_x_listMode.dtype.itemsize)
linear_attenuation_A_gpu = cuda.mem_alloc(
self.doi_mapping.linear_attenuation_crystal_A_listMode.size * self.doi_mapping.linear_attenuation_crystal_A_listMode.dtype.itemsize)
linear_attenuation_B_gpu = cuda.mem_alloc(
self.doi_mapping.linear_attenuation_crystal_B_listMode.size * self.doi_mapping.linear_attenuation_crystal_B_listMode.dtype.itemsize)
cuda.memcpy_htod_async(m_values_gpu, self.doi_mapping.m_values_listMode)
cuda.memcpy_htod_async(b_values_gpu, self.doi_mapping.b_values_listMode)
cuda.memcpy_htod_async(m_values_at_gpu, self.doi_mapping.m_values_at_listMode)
cuda.memcpy_htod_async(b_values_at_gpu, self.doi_mapping.b_values_at_listMode)
cuda.memcpy_htod_async(max_D_gpu, self.doi_mapping.max_D_listMode)
cuda.memcpy_htod_async(inflex_points_x_gpu, self.doi_mapping.inflex_points_x_listMode)
cuda.memcpy_htod_async(linear_attenuation_A_gpu, self.doi_mapping.linear_attenuation_crystal_A_listMode)
cuda.memcpy_htod_async(linear_attenuation_B_gpu, self.doi_mapping.linear_attenuation_crystal_B_listMode)
for dataset in range(number_of_datasets):
# if dataset == number_of_datasets:
# begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# end_dataset = number_of_events
# adjust_coef_cut[dataset] = np.ascontiguousarray(
# adjust_coef[int(np.floor(im_cut_dim[0] * dataset)):adjust_coef.shape[0],
# int(np.floor(im_cut_dim[1] * dataset)):adjust_coef.shape[1],
# int(np.floor(im_cut_dim[2] * dataset)):adjust_coef.shape[2]],
# dtype=np.float32)
# fov_cut_matrix_cut[dataset] = np.ascontiguousarray(
# fov_cut_matrix[int(np.floor(im_cut_dim[0] * dataset)):fov_cut_matrix.shape[0],
# int(np.floor(im_cut_dim[1] * dataset)):fov_cut_matrix.shape[1],
# int(np.floor(im_cut_dim[2] * dataset)):fov_cut_matrix.shape[2]],
# dtype=np.float32)
# else:
begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets)
# Cutting dataset
# For forward projection the data is cutted by number of events. For backprojection is cutted per image pieces of image
a_cut[dataset] = a[begin_dataset:end_dataset]
b_cut[dataset] = b[begin_dataset:end_dataset]
c_cut[dataset] = c[begin_dataset:end_dataset]
d_cut[dataset] = d[begin_dataset:end_dataset]
a_normal_cut[dataset] = a_normal[begin_dataset:end_dataset]
b_normal_cut[dataset] = b_normal[begin_dataset:end_dataset]
c_normal_cut[dataset] = c_normal[begin_dataset:end_dataset]
d_normal_cut[dataset] = d_normal[begin_dataset:end_dataset]
a_cf_cut[dataset] = a_cf[begin_dataset:end_dataset]
b_cf_cut[dataset] = b_cf[begin_dataset:end_dataset]
c_cf_cut[dataset] = c_cf[begin_dataset:end_dataset]
d_cf_cut[dataset] = d_cf[begin_dataset:end_dataset]
sum_vor_cut[dataset] = sum_vor[begin_dataset:end_dataset]
distance_to_center_plane_cut[dataset] = self.distance_to_center_plane[begin_dataset:end_dataset]
distance_to_center_plane_normal_cut[dataset] = self.distance_to_center_plane_normal[
begin_dataset:end_dataset]
# Forward
a_cut_gpu[dataset] = cuda.mem_alloc(a_cut[dataset].size * a_cut[dataset].dtype.itemsize)
b_cut_gpu[dataset] = cuda.mem_alloc(b_cut[dataset].size * b_cut[dataset].dtype.itemsize)
c_cut_gpu[dataset] = cuda.mem_alloc(c_cut[dataset].size * c_cut[dataset].dtype.itemsize)
d_cut_gpu[dataset] = cuda.mem_alloc(d_cut[dataset].size * d_cut[dataset].dtype.itemsize)
a_cut_normal_gpu[dataset] = cuda.mem_alloc(
a_normal_cut[dataset].size * a_normal_cut[dataset].dtype.itemsize)
b_cut_normal_gpu[dataset] = cuda.mem_alloc(
b_normal_cut[dataset].size * b_normal_cut[dataset].dtype.itemsize)
c_cut_normal_gpu[dataset] = cuda.mem_alloc(
c_normal_cut[dataset].size * c_normal_cut[dataset].dtype.itemsize)
d_cut_normal_gpu[dataset] = cuda.mem_alloc(
d_normal_cut[dataset].size * d_normal_cut[dataset].dtype.itemsize)
a_cf_cut_gpu[dataset] = cuda.mem_alloc(a_cf_cut[dataset].size * a_cf_cut[dataset].dtype.itemsize)
b_cf_cut_gpu[dataset] = cuda.mem_alloc(b_cf_cut[dataset].size * b_cf_cut[dataset].dtype.itemsize)
c_cf_cut_gpu[dataset] = cuda.mem_alloc(c_cf_cut[dataset].size * c_cf_cut[dataset].dtype.itemsize)
d_cf_cut_gpu[dataset] = cuda.mem_alloc(d_cf_cut[dataset].size * d_cf_cut[dataset].dtype.itemsize)
distance_to_center_plane_gpu_cut[dataset] = cuda.mem_alloc(
distance_to_center_plane_cut[dataset].size * distance_to_center_plane_cut[dataset].dtype.itemsize)
distance_to_center_plane_normal_gpu_cut[dataset] = cuda.mem_alloc(
distance_to_center_plane_normal_cut[dataset].size * distance_to_center_plane_normal_cut[
dataset].dtype.itemsize)
sum_vor_gpu[dataset] = cuda.mem_alloc(sum_vor_cut[dataset].size * sum_vor_cut[dataset].dtype.itemsize)
sum_vor_pinned[dataset] = cuda.register_host_memory(sum_vor_cut[dataset])
assert np.all(sum_vor_pinned[dataset] == sum_vor_cut[dataset])
cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_pinned[dataset], stream[dataset])
# sum_vor_gpu[dataset] = np.intp(x.base.get_device_pointer())
# cuda.memcpy_htod_async(probability_gpu[dataset], probability_cut[dataset])
cuda.memcpy_htod_async(a_cut_gpu[dataset], a_cut[dataset])
cuda.memcpy_htod_async(b_cut_gpu[dataset], b_cut[dataset])
cuda.memcpy_htod_async(c_cut_gpu[dataset], c_cut[dataset])
cuda.memcpy_htod_async(d_cut_gpu[dataset], d_cut[dataset])
cuda.memcpy_htod_async(a_cut_normal_gpu[dataset], a_normal_cut[dataset])
cuda.memcpy_htod_async(b_cut_normal_gpu[dataset], b_normal_cut[dataset])
cuda.memcpy_htod_async(c_cut_normal_gpu[dataset], c_normal_cut[dataset])
cuda.memcpy_htod_async(d_cut_normal_gpu[dataset], d_normal_cut[dataset])
cuda.memcpy_htod_async(a_cf_cut_gpu[dataset], a_cf_cut[dataset])
cuda.memcpy_htod_async(b_cf_cut_gpu[dataset], b_cf_cut[dataset])
cuda.memcpy_htod_async(c_cf_cut_gpu[dataset], c_cf_cut[dataset])
cuda.memcpy_htod_async(d_cf_cut_gpu[dataset], d_cf_cut[dataset])
cuda.memcpy_htod_async(distance_to_center_plane_gpu_cut[dataset], distance_to_center_plane_cut[dataset])
cuda.memcpy_htod_async(distance_to_center_plane_normal_gpu_cut[dataset],
distance_to_center_plane_normal_cut[dataset])
m_values_listMode_cut[dataset] = np.copy(self.doi_mapping.m_values_listMode[begin_dataset:end_dataset])
m_values_at_listMode_cut[dataset] = np.copy(
self.doi_mapping.m_values_at_listMode[begin_dataset:end_dataset])
b_values_listMode_cut[dataset] = np.copy(self.doi_mapping.b_values_listMode[begin_dataset:end_dataset])
b_values_at_listMode_cut[dataset] = np.copy(
self.doi_mapping.b_values_at_listMode[begin_dataset:end_dataset])
max_D_listMode_cut[dataset] = np.copy(self.doi_mapping.max_D_listMode[begin_dataset:end_dataset])
inflex_points_x_listMode_cut[dataset] = np.copy(
self.doi_mapping.inflex_points_x_listMode[begin_dataset:end_dataset])
linear_attenuation_crystal_A_listMode_cut[dataset] = np.copy(
self.doi_mapping.linear_attenuation_crystal_A_listMode[begin_dataset:end_dataset])
linear_attenuation_crystal_B_listMode_cut[dataset] = np.copy(
self.doi_mapping.linear_attenuation_crystal_B_listMode[begin_dataset:end_dataset])
m_values_gpu_cut[dataset] = cuda.mem_alloc(
m_values_listMode_cut[dataset].size * m_values_listMode_cut[dataset].dtype.itemsize)
b_values_gpu_cut[dataset] = cuda.mem_alloc(
b_values_listMode_cut[dataset].size * b_values_listMode_cut[dataset].dtype.itemsize)
m_values_at_gpu_cut[dataset] = cuda.mem_alloc(
m_values_at_listMode_cut[dataset].size * m_values_at_listMode_cut[dataset].dtype.itemsize)
b_values_at_gpu_cut[dataset] = cuda.mem_alloc(
b_values_at_listMode_cut[dataset].size * b_values_at_listMode_cut[dataset].dtype.itemsize)
max_D_gpu_cut[dataset] = cuda.mem_alloc(
max_D_listMode_cut[dataset].size * max_D_listMode_cut[dataset].dtype.itemsize)
inflex_points_x_gpu_cut[dataset] = cuda.mem_alloc(
inflex_points_x_listMode_cut[dataset].size * inflex_points_x_listMode_cut[dataset].dtype.itemsize)
linear_attenuation_A_gpu_cut[dataset] = cuda.mem_alloc(
linear_attenuation_crystal_A_listMode_cut[dataset].size * linear_attenuation_crystal_A_listMode_cut[
dataset].dtype.itemsize)
linear_attenuation_B_gpu_cut[dataset] = cuda.mem_alloc(
linear_attenuation_crystal_B_listMode_cut[dataset].size * linear_attenuation_crystal_B_listMode_cut[
dataset].dtype.itemsize)
cuda.memcpy_htod_async(m_values_gpu_cut[dataset], m_values_listMode_cut[dataset])
cuda.memcpy_htod_async(b_values_gpu_cut[dataset], b_values_listMode_cut[dataset])
cuda.memcpy_htod_async(m_values_at_gpu_cut[dataset], m_values_at_listMode_cut[dataset])
cuda.memcpy_htod_async(b_values_at_gpu_cut[dataset], b_values_at_listMode_cut[dataset])
cuda.memcpy_htod_async(max_D_gpu_cut[dataset], max_D_listMode_cut[dataset])
cuda.memcpy_htod_async(inflex_points_x_gpu_cut[dataset], inflex_points_x_listMode_cut[dataset])
cuda.memcpy_htod_async(linear_attenuation_A_gpu_cut[dataset],
linear_attenuation_crystal_A_listMode_cut[dataset])
cuda.memcpy_htod_async(linear_attenuation_B_gpu_cut[dataset],
linear_attenuation_crystal_B_listMode_cut[dataset])
adjust_coef = np.ascontiguousarray(adjust_coef.reshape(
adjust_coef.shape[0] * adjust_coef.shape[1] * adjust_coef.shape[2]),
dtype=np.float32)
# fov_cut_matrix = np.ascontiguousarray(fov_cut_matrix.reshape(
# fov_cut_matrix.shape[0] * fov_cut_matrix.shape[1] * fov_cut_matrix.shape[2]),
# dtype=np.float32)
A = np.ascontiguousarray(A.reshape(
A.shape[0] * A.shape[1] * A.shape[2]),
dtype=np.short)
B = np.ascontiguousarray(B.reshape(
B.shape[0] * B.shape[1] * B.shape[2]),
dtype=np.short)
C = np.ascontiguousarray(C.reshape(
C.shape[0] * C.shape[1] * C.shape[2]),
dtype=np.short)
# ---- Divide into datasets variables backprojection
for dataset in range(number_of_datasets_back):
voxels_division = adjust_coef.shape[0] // number_of_datasets_back
adjust_coef_cut[dataset] = np.ascontiguousarray(
adjust_coef[dataset * voxels_division:(dataset + 1) * voxels_division],
dtype=np.float32)
fov_cut_matrix_cut[dataset] = np.ascontiguousarray(
fov_cut_matrix_shappened[dataset * voxels_division:(dataset + 1) * voxels_division],
dtype=np.byte)
A_cut[dataset] = np.ascontiguousarray(
A[dataset * voxels_division:(dataset + 1) * voxels_division],
dtype=np.short)
B_cut[dataset] = np.ascontiguousarray(
B[dataset * voxels_division:(dataset + 1) * voxels_division],
dtype=np.short)
C_cut[dataset] = np.ascontiguousarray(
C[dataset * voxels_division:(dataset + 1) * voxels_division],
dtype=np.short)
# Backprojection
adjust_coef_gpu[dataset] = cuda.mem_alloc(
adjust_coef_cut[dataset].size * adjust_coef_cut[dataset].dtype.itemsize)
adjust_coef_pinned[dataset] = cuda.register_host_memory(adjust_coef_cut[dataset])
assert np.all(adjust_coef_pinned[dataset] == adjust_coef_cut[dataset])
cuda.memcpy_htod_async(adjust_coef_gpu[dataset], adjust_coef_pinned[dataset], stream[dataset])
fov_cut_matrix_cutted_gpu[dataset] = cuda.mem_alloc(
fov_cut_matrix_cut[dataset].size * fov_cut_matrix_cut[dataset].dtype.itemsize)
A_cut_gpu[dataset] = cuda.mem_alloc(
A_cut[dataset].size * A_cut[dataset].dtype.itemsize)
B_cut_gpu[dataset] = cuda.mem_alloc(
B_cut[dataset].size * B_cut[dataset].dtype.itemsize)
C_cut_gpu[dataset] = cuda.mem_alloc(
C_cut[dataset].size * C_cut[dataset].dtype.itemsize)
cuda.memcpy_htod_async(adjust_coef_gpu[dataset], adjust_coef_cut[dataset])
cuda.memcpy_htod_async(fov_cut_matrix_cutted_gpu[dataset], fov_cut_matrix_cut[dataset])
cuda.memcpy_htod_async(A_cut_gpu[dataset], A_cut[dataset])
cuda.memcpy_htod_async(B_cut_gpu[dataset], B_cut[dataset])
cuda.memcpy_htod_async(C_cut_gpu[dataset], C_cut[dataset])
free, total = cuda.mem_get_info()
print('%.1f %% of device memory is free.' % ((free / float(total)) * 100))
# -------------OSEM---------
it = self.number_of_iterations
subsets = self.number_of_subsets
print('Number events for reconstruction: {}'.format(number_of_events))
im = np.ascontiguousarray(im.reshape(im.shape[0] * im.shape[1] * im.shape[2]), dtype=np.float32)
for i in range(it):
print('Iteration number: {}\n----------------'.format(i + 1))
begin_event = np.int32(0)
end_event = np.int32(number_of_events / subsets)
for sb in range(subsets):
print('Subset number: {}'.format(sb))
number_of_events_subset = np.int32(end_event - begin_event)
tic = time.time()
# Cycle forward Projection
for dataset in range(number_of_datasets):
if dataset == number_of_datasets:
begin_dataset = np.int32(dataset * number_of_events_subset / number_of_datasets)
end_dataset = number_of_events_subset
else:
begin_dataset = np.int32(dataset * number_of_events_subset / number_of_datasets)
end_dataset = np.int32((dataset + 1) * number_of_events_subset / number_of_datasets)
threadsperblock = (256, 1, 1)
blockspergrid_x = int(math.ceil(((end_dataset - begin_dataset)) / threadsperblock[0]))
blockspergrid_y = int(math.ceil(1 / threadsperblock[1]))
blockspergrid_z = int(math.ceil(1 / threadsperblock[2]))
blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)
event[dataset]['kernel_begin'].record(stream[dataset])
# depth = np.int32(5)
# weight = np.int32(A.shape[2]/2)
# height = np.int32(A.shape[2]/2)
if self.cdrf:
func_forward = mod_forward_projection_shared_mem_cdrf.get_function("forward_projection_cdrf")
func_forward(weight, height, depth, start_x, start_y, start_z,
distance_to_center_plane_normal_gpu_cut[dataset],
distance_to_center_plane_gpu_cut[dataset],
half_distance_between_array_pixel,
number_of_events, begin_dataset, end_dataset, a_cut_gpu[dataset],
a_cut_normal_gpu[dataset], a_cf_cut_gpu[dataset],
b_cut_gpu[dataset], b_cut_normal_gpu[dataset], b_cf_cut_gpu[dataset],
c_cut_gpu[dataset], c_cut_normal_gpu[dataset],
c_cf_cut_gpu[dataset],
d_cut_gpu[dataset],
d_cut_normal_gpu[dataset], d_cf_cut_gpu[dataset],
sum_vor_gpu[dataset], fov_cut_matrix_gpu, im_gpu, m_values_gpu_cut[dataset],
b_values_gpu_cut[dataset], m_values_at_gpu_cut[dataset],
b_values_at_gpu_cut[dataset],
max_D_gpu_cut[dataset], inflex_points_x_gpu_cut[dataset],
linear_attenuation_A_gpu_cut[dataset],
linear_attenuation_B_gpu_cut[dataset],
block=threadsperblock,
grid=blockspergrid,
stream=stream[dataset])
else:
func_forward = mod_forward_projection_shared_mem.get_function("forward_projection")
func_forward(weight, height, depth, start_x, start_y, start_z, half_crystal_pitch_xy,
half_crystal_pitch_z,
half_distance_between_array_pixel,
number_of_events, begin_dataset, end_dataset, a_cut_gpu[dataset],
a_cut_normal_gpu[dataset], a_cf_cut_gpu[dataset],
b_cut_gpu[dataset], b_cut_normal_gpu[dataset], b_cf_cut_gpu[dataset],
c_cut_gpu[dataset], c_cut_normal_gpu[dataset],
c_cf_cut_gpu[dataset],
d_cut_gpu[dataset],
d_cut_normal_gpu[dataset], d_cf_cut_gpu[dataset],
sum_vor_gpu[dataset], fov_cut_matrix_gpu, im_gpu,
block=threadsperblock,
grid=blockspergrid,
stream=stream[dataset])
# Sincronization of streams
for dataset in range(number_of_datasets): # Commenting out this line should break concurrency.
event[dataset]['kernel_end'].record(stream[dataset])
# Transfering data from Optimizer
for dataset in range(number_of_datasets):
begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets)
# cuda.memcpy_dtoh_async(sum_vor[begin_dataset:end_dataset], sum_vor_gpu[dataset])
cuda.memcpy_dtoh_async(sum_vor_pinned[dataset], sum_vor_gpu[dataset], stream[dataset])
# cuda.cudaStream.Synchronize(stream[dataset])
toc = time.time()
cuda.Context.synchronize()
print('Time part Forward Projection {} : {}'.format(1, toc - tic))
# number_of_datasets = np.int32(2)
teste = np.copy(sum_vor)
# sum_vor[sum_vor<1]=0
sum_vor = np.ascontiguousarray(teste, dtype=np.float32)
# sum_vor=np.ascontiguousarray(np.ones((self.a.shape)), dtype=np.float32)
print('SUM VOR: {}'.format(np.sum(teste)))
print('SUM VOR: {}'.format(teste))
# cuda.memcpy_htod_async(sum_vor_t_gpu, sum_vor)
cuda.memcpy_htod_async(sum_vor_t_gpu, sum_vor)
# ------------BACKPROJECTION-----------
for dataset in range(number_of_datasets_back):
dataset = np.int32(dataset)
begin_dataset = np.int32(0)
end_dataset = np.int32(number_of_events_subset)
# begin_dataset = np.int32(0)
# end_dataset = np.int32(number_of_events)
event[dataset]['kernel_begin'].record(stream[dataset])
# weight_cutted, height_cutted, depth_cutted = np.int32(adjust_coef_cut[dataset].shape[0]), np.int32(
# adjust_coef_cut[dataset].shape[1]), np.int32(adjust_coef_cut[dataset].shape[2])
weight_cutted, height_cutted, depth_cutted = np.int32(adjust_coef_cut[dataset].shape[0]), np.int32(
1), np.int32(1)
number_of_voxels_thread = 64
threadsperblock = (np.int(number_of_voxels_thread), 1, 1)
blockspergrid_x = int(math.ceil(adjust_coef_cut[dataset].shape[0] / threadsperblock[0]))
blockspergrid_y = int(math.ceil(1 / threadsperblock[1]))
blockspergrid_z = int(math.ceil(1 / threadsperblock[2]))
# blockspergrid_y = int(math.ceil(adjust_coef_cut[dataset].shape[1] / threadsperblock[1]))
# blockspergrid_z = int(math.ceil(adjust_coef_cut[dataset].shape[2] / threadsperblock[2]))
blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)
shared_memory = threadsperblock[0] * threadsperblock[1] * threadsperblock[2] * 4
if self.cdrf:
if self.normalization_calculation_flag:
func_backward = mod_normalization_shared_mem_cdrf.get_function("normalization_cdrf")
func_backward(dataset, weight_cutted, height_cutted, depth_cutted,
distance_to_center_plane_normal_gpu,
distance_to_center_plane_gpu, half_distance_between_array_pixel,
number_of_events, begin_dataset, end_dataset, a_gpu, a_normal_gpu, a_cf_gpu,
b_gpu, b_normal_gpu, b_cf_gpu, c_gpu, c_normal_gpu, c_cf_gpu, d_gpu,
d_normal_gpu, d_cf_gpu, A_cut_gpu[dataset], B_cut_gpu[dataset],
C_cut_gpu[dataset],
adjust_coef_gpu[dataset],
sum_vor_t_gpu, fov_cut_matrix_cutted_gpu[dataset], time_factor_gpu,
plane_centerA1_gpu[0], plane_centerA1_gpu[1], plane_centerA1_gpu[2],
plane_centerA1_gpu[3],
plane_centerB1_gpu[0], plane_centerB1_gpu[1], plane_centerB1_gpu[2],
plane_centerB1_gpu[3],
plane_centerC1_gpu[0], plane_centerC1_gpu[1], plane_centerC1_gpu[2],
plane_centerC1_gpu[3], intersection_points_gpu, m_values_gpu, m_values_at_gpu,
b_values_gpu, b_values_at_gpu,
max_D_gpu, inflex_points_x_gpu, linear_attenuation_A_gpu,
linear_attenuation_B_gpu,
block=threadsperblock,
grid=blockspergrid,
shared=np.int(4 * number_of_voxels_thread),
stream=stream[dataset],
)
else:
func_backward = mod_backward_projection_shared_mem_cdrf.get_function("backprojection_cdrf")
func_backward(dataset, weight_cutted, height_cutted, depth_cutted,
distance_to_center_plane_normal_gpu,
distance_to_center_plane_gpu, half_distance_between_array_pixel,
number_of_events, begin_dataset, end_dataset, a_gpu, a_normal_gpu, a_cf_gpu,
b_gpu, b_normal_gpu, b_cf_gpu, c_gpu, c_normal_gpu, c_cf_gpu, d_gpu,
d_normal_gpu, d_cf_gpu, A_cut_gpu[dataset], B_cut_gpu[dataset],
C_cut_gpu[dataset],
adjust_coef_gpu[dataset],
sum_vor_t_gpu, fov_cut_matrix_cutted_gpu[dataset], time_factor_gpu,
m_values_gpu, m_values_at_gpu,
b_values_gpu, b_values_at_gpu,
max_D_gpu, inflex_points_x_gpu, linear_attenuation_A_gpu,
linear_attenuation_B_gpu,
block=threadsperblock,
grid=blockspergrid,
shared=np.int(4 * number_of_voxels_thread),
stream=stream[dataset],
)
else:
if self.normalization_calculation_flag:
func_backward = mod_normalization_shared_mem.get_function("normalization")
else:
func_backward = mod_backward_projection_shared_mem.get_function("backprojection")
func_backward(dataset, weight_cutted, height_cutted, depth_cutted, half_crystal_pitch_xy,
half_crystal_pitch_z, half_distance_between_array_pixel,
number_of_events, begin_dataset, end_dataset, a_gpu, a_normal_gpu, a_cf_gpu,
b_gpu, b_normal_gpu, b_cf_gpu, c_gpu, c_normal_gpu, c_cf_gpu, d_gpu,
d_normal_gpu, d_cf_gpu, A_cut_gpu[dataset], B_cut_gpu[dataset],
C_cut_gpu[dataset],
adjust_coef_gpu[dataset],
sum_vor_t_gpu, fov_cut_matrix_cutted_gpu[dataset], time_factor_gpu,
block=threadsperblock,
grid=blockspergrid,
shared=np.int(4 * number_of_voxels_thread),
stream=stream[dataset],
)
for dataset in range(number_of_datasets_back): # Commenting out this line should break concurrency.
event[dataset]['kernel_end'].record(stream[dataset])
for dataset in range(number_of_datasets_back):
cuda.memcpy_dtoh_async(adjust_coef_cut[dataset], adjust_coef_gpu[dataset])
adjust_coef[dataset * voxels_division:(dataset + 1) * voxels_division] = adjust_coef_cut[dataset]
cuda.Context.synchronize()
print('Time part Backward Projection {} : {}'.format(1, time.time() - toc))
# Image Normalization
# if i ==0:
# norm_im=np.copy(adjust_coef)
# norm_im=norm_im/np.max(norm_im)
# norm_im[norm_im == 0] = np.min(norm_im[np.nonzero(norm_im)])
# normalization_matrix = gaussian_filter(normalization_matrix, 0.5)
# im_med = np.load("C:\\Users\\pedro.encarnacao\\OneDrive - Universidade de Aveiro\\PhD\\Reconstrução\\NAF+FDG\\Easypet Scan 05 Aug 2019 - 14h 36m 33s\\static_image\\Easypet Scan 05 Aug 2019 - 14h 36m 33s mlem.npy")
# self.algorithm = "LM-MRP"
if self.algorithm == "LM-MRP":
beta = self.algorithm_options[0]
kernel_filter_size = self.algorithm_options[1]
im_to_filter = im.reshape(weight, height, depth)
im_med = median_filter(im_to_filter, kernel_filter_size)
penalized_term = np.copy(im_to_filter)
penalized_term[im_med != 0] = 1 + beta * (im_to_filter[im_med != 0] - im_med[im_med != 0]) / im_med[
im_med != 0]
penalized_term = np.ascontiguousarray(penalized_term.reshape(weight * height * depth),
dtype=np.float32)
# penalized_term = np.ascontiguousarray(penalized_term, dtype=np.float32)
if self.algorithm == "MAP":
beta = 0.5
im_map = np.load(
"C:\\Users\\pedro.encarnacao\\OneDrive - Universidade de Aveiro\\PhD\\Reconstrução\\NAF+FDG\\Easypet Scan 05 Aug 2019 - 14h 36m 33s\\static_image\\Easypet Scan 05 Aug 2019 - 14h 36m 33s mlem.npy")
im[normalization_matrix != 0] = im[normalization_matrix != 0] * adjust_coef[
normalization_matrix != 0] / (normalization_matrix[normalization_matrix != 0])
im[normalization_matrix == 0] = 0
if self.algorithm == "LM-MRP":
im[penalized_term != 0] = im[penalized_term != 0] / penalized_term[penalized_term != 0]
# im = fourier_gaussian(im, sigma=0.2)
# im = gaussian_filter(im, 0.4)
print('SUM IMAGE: {}'.format(np.sum(im)))
im = np.ascontiguousarray(im, dtype=np.float32)
# im = im * adjust_coef / sensivity_matrix[np.nonzero(sensivity_matrix)]
cuda.memcpy_htod_async(im_gpu, im)
# Clearing variables
sum_vor = np.ascontiguousarray(
np.zeros(self.a.shape, dtype=np.float32))
adjust_coef = np.ascontiguousarray(
np.zeros((self.number_of_pixels_x * self.number_of_pixels_y * self.number_of_pixels_z),
dtype=np.float32))
for dataset in range(number_of_datasets):
# if dataset == number_of_datasets:
# begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
# end_dataset = number_of_events
#
# else:
begin_dataset = np.int32(dataset * number_of_events / number_of_datasets)
end_dataset = np.int32((dataset + 1) * number_of_events / number_of_datasets)
# adjust_coef_cut[dataset] = np.ascontiguousarray(adjust_coef[:, :,
# int(np.floor(im_cut_dim[2] * dataset)):int(
# np.floor(im_cut_dim[2] * (dataset + 1)))],
# dtype=np.float32)
sum_vor_cut[dataset] = sum_vor[begin_dataset:end_dataset]
# cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_cut[dataset])
sum_vor_pinned[dataset] = cuda.register_host_memory(sum_vor_cut[dataset])
assert np.all(sum_vor_pinned[dataset] == sum_vor_cut[dataset])
cuda.memcpy_htod_async(sum_vor_gpu[dataset], sum_vor_pinned[dataset], stream[dataset])
for dataset in range(number_of_datasets_back):
adjust_coef_cut[dataset] = adjust_coef[dataset * voxels_division:(dataset + 1) * voxels_division]
adjust_coef_pinned[dataset] = cuda.register_host_memory(adjust_coef_cut[dataset])
assert np.all(adjust_coef_pinned[dataset] == adjust_coef_cut[dataset])
cuda.memcpy_htod_async(adjust_coef_gpu[dataset], adjust_coef_pinned[dataset], stream[dataset])
if self.saved_image_by_iteration:
im = im.reshape(weight, height, depth)
self._save_image_by_it(im, i, sb)
if self.signals_interface is not None:
self.signals_interface.trigger_update_label_reconstruction_status.emit(
"{}: Iteration {}".format(self.current_info_step, i + 1))
self.signals_interface.trigger_progress_reconstruction_partial.emit(
int(np.round(100 * (i + 1) * (sb + subsets) / (it * subsets), 0)))
im = im.reshape(weight, height, depth)
return im * subsets
def _save_image_by_it(self, im, normalization=False, it=None, sb=None):
directory = os.path.dirname(os.path.abspath(__file__))
if normalization:
file_name = os.path.join(self.directory, "Normalization".format(it, sb))
else:
file_name = os.path.join(self.directory, "EasyPETScan_it{}_sb{}".format(it, sb))
print(file_name)
volume = im.astype(np.float32)
length = 1
for i in volume.shape:
length *= i
# length = volume.shape[0] * volume.shape[2] * volume.shape[1]
if len(volume.shape) > 1:
data = np.reshape(volume, [1, length], order='F')
else:
data = volume
shapeIm = volume.shape
output_file = open(file_name, 'wb')
arr = array('f', data[0])
arr.tofile(output_file)
output_file.close()
# file_name = directory+"/Acquisitions/SENS_0_5"
# file_name = directory+"/Acquisitions/sens_0_25"