diff --git a/demo_leapctype/d01_standard_geometries.py b/demo_leapctype/d01_standard_geometries.py
index 4bc4bc7..7393313 100644
--- a/demo_leapctype/d01_standard_geometries.py
+++ b/demo_leapctype/d01_standard_geometries.py
@@ -71,7 +71,7 @@
 #leapct.sketch_system()
 
 # Set the backprojector model, 'SF' (the default setting), is more accurate, but 'VD' is faster
-leapct.set_projector('VD')
+#leapct.set_projector('VD')
 
 # Allocate space for the projections and the volume
 # You don't have to use these functions; they are provided just for convenience
diff --git a/demo_leapctype/d23_offsetScan.py b/demo_leapctype/d23_offsetScan.py
index 946208d..e34e11b 100644
--- a/demo_leapctype/d23_offsetScan.py
+++ b/demo_leapctype/d23_offsetScan.py
@@ -56,7 +56,7 @@
 # It is best to do this after the CT geometry is set
 leapct.set_default_volume()
 #leapct.set_diameterFOV(leapct.get_voxelWidth()*leapct.get_numX())
-leapct.convert_to_modularbeam()
+#leapct.convert_to_modularbeam()
 
 # Trouble-Shooting Functions
 leapct.print_parameters()
diff --git a/src/analytic_ray_tracing.cpp b/src/analytic_ray_tracing.cpp
index 35da762..8db6025 100644
--- a/src/analytic_ray_tracing.cpp
+++ b/src/analytic_ray_tracing.cpp
@@ -221,6 +221,18 @@ bool analyticRayTracing::setTrajectory(int iProj, int iRow, int iCol, double* r,
     double cos_phi = cos(phi);
     double sin_phi = sin(phi);
 
+    double cos_tilt = 1.0;
+    double sin_tilt = 0.0;
+    if (fabs(params->tiltAngle) > 1.0e-6 && params->geometry == parameters::CONE)
+    {
+        cos_tilt = cos(params->tiltAngle * PI / 180.0);
+        sin_tilt = sin(params->tiltAngle * PI / 180.0);
+        double u_tilt = u * cos_tilt - v * sin_tilt;
+        double v_tilt = u * sin_tilt + v * cos_tilt;
+        u = u_tilt;
+        v = v_tilt;
+    }
+
     float* s = NULL;
     float* c = NULL;
     float* u_vec = NULL;
diff --git a/src/analytic_ray_tracing_gpu.cu b/src/analytic_ray_tracing_gpu.cu
index 876b2a5..0195455 100644
--- a/src/analytic_ray_tracing_gpu.cu
+++ b/src/analytic_ray_tracing_gpu.cu
@@ -37,6 +37,8 @@ __constant__ int d_CURVED;
 __constant__ float d_sod;
 __constant__ float d_sdd;
 __constant__ float d_tau;
+__constant__ float d_cos_tilt;
+__constant__ float d_sin_tilt;
 __constant__ int4 d_N_g;
 __constant__ float4 d_T_g;
 __constant__ float4 d_startVal_g;
@@ -115,6 +117,14 @@ __device__ float3 setTrajectory(const float phi, const int iProj, const int iRow
     const float cos_phi = cos(phi);
     const float sin_phi = sin(phi);
 
+	float u_tilt = u_val;
+	float v_tilt = v_val;
+	if (d_sin_tilt != 0.0f)
+	{
+		u_tilt = u_val * d_cos_tilt - v_val * d_sin_tilt;
+		v_tilt = u_val * d_sin_tilt + v_val * d_cos_tilt;
+	}
+
     if (d_geometry == d_PARALLEL)
     {
         const float3 r = make_float3(-cos_phi, -sin_phi, 0.0f);
@@ -137,7 +147,7 @@ __device__ float3 setTrajectory(const float phi, const int iProj, const int iRow
         }
         else
         {
-			const float3 r = make_float3(-(cos_phi + u_val * sin_phi), -(sin_phi - u_val * cos_phi), v_val);
+			const float3 r = make_float3(-(cos_phi + u_tilt * sin_phi), -(sin_phi - u_tilt * cos_phi), v_tilt);
 			const float r_mag_inv = rsqrtf(r.x * r.x + r.y * r.y + r.z * r.z);
 			return make_float3(r.x * r_mag_inv, r.y * r_mag_inv, r.z * r_mag_inv);
         }
@@ -838,9 +848,18 @@ void setConstantMemoryGeometryParameters(parameters* params, int oversampling)
     float sod = params->sod;
     float sdd = params->sdd;
     float tau = params->tau;
+	float cos_tilt = 1.0;
+	float sin_tilt = 0.0;
+	if (params->geometry == parameters::CONE)
+	{
+		cos_tilt = cos(params->tiltAngle * PI / 180.0);
+		sin_tilt = sin(params->tiltAngle * PI / 180.0);
+	}
     cudaMemcpyToSymbol(d_sod, &sod, sizeof(float));
     cudaMemcpyToSymbol(d_sdd, &sdd, sizeof(float));
     cudaMemcpyToSymbol(d_tau, &tau, sizeof(float));
+	cudaMemcpyToSymbol(d_cos_tilt, &cos_tilt, sizeof(float));
+	cudaMemcpyToSymbol(d_sin_tilt, &sin_tilt, sizeof(float));
 	cudaStatus = cudaMemcpyToSymbol(d_N_g, &N_g, sizeof(int4));
     cudaMemcpyToSymbol(d_T_g, &T_g, sizeof(float4));
     cudaMemcpyToSymbol(d_startVal_g, &startVal_g, sizeof(float4));
diff --git a/src/backprojectors_VD.cu b/src/backprojectors_VD.cu
index 30a6657..5ada1cf 100644
--- a/src/backprojectors_VD.cu
+++ b/src/backprojectors_VD.cu
@@ -92,33 +92,33 @@ __global__ void modularBeamBackprojectorKernel_vox_stack(cudaTextureObject_t g,
 
     for (int iphi = 0; iphi < N_g.x; iphi++)
     {
+        const float3 sourcePosition = make_float3(sourcePositions[3 * iphi + 0], sourcePositions[3 * iphi + 1], sourcePositions[3 * iphi + 2]);
+        const float3 moduleCenter = make_float3(moduleCenters[3 * iphi + 0], moduleCenters[3 * iphi + 1], moduleCenters[3 * iphi + 2]);
+        const float3 v_vec = make_float3(rowVectors[3 * iphi + 0], rowVectors[3 * iphi + 1], rowVectors[3 * iphi + 2]);
+        const float3 u_vec = make_float3(colVectors[3 * iphi + 0], colVectors[3 * iphi + 1], colVectors[3 * iphi + 2]);
+
         float L = float(iphi) + 0.5f;
-        float* sourcePosition = &sourcePositions[3 * iphi];
-        float* moduleCenter = &moduleCenters[3 * iphi];
-        float* v_vec = &rowVectors[3 * iphi];
-        float* u_vec = &colVectors[3 * iphi];
-        const float3 detNormal = make_float3(u_vec[1] * v_vec[2] - u_vec[2] * v_vec[1],
-            u_vec[2] * v_vec[0] - u_vec[0] * v_vec[2],
-            u_vec[0] * v_vec[1] - u_vec[1] * v_vec[0]);
+        const float3 detNormal = make_float3(u_vec.y * v_vec.z - u_vec.z * v_vec.y,
+                                             u_vec.z * v_vec.x - u_vec.x * v_vec.z,
+                                             u_vec.x * v_vec.y - u_vec.y * v_vec.x);
 
-        const float3 p_minus_c = make_float3(sourcePosition[0] - moduleCenter[0], sourcePosition[1] - moduleCenter[1], sourcePosition[2] - moduleCenter[2]);
+        const float3 p_minus_c = make_float3(sourcePosition.x - moduleCenter.x, sourcePosition.y - moduleCenter.y, sourcePosition.z - moduleCenter.z);
         const float p_minus_c_dot_n = p_minus_c.x * detNormal.x + p_minus_c.y * detNormal.y + p_minus_c.z * detNormal.z;
-        const float p_minus_c_dot_u = p_minus_c.x * u_vec[0] + p_minus_c.y * u_vec[1] + p_minus_c.z * u_vec[2];
-        const float p_minus_c_dot_v = p_minus_c.x * v_vec[0] + p_minus_c.y * v_vec[1] + p_minus_c.z * v_vec[2];
-
+        const float p_minus_c_dot_u = p_minus_c.x * u_vec.x + p_minus_c.y * u_vec.y + p_minus_c.z * u_vec.z;
+        const float p_minus_c_dot_v = p_minus_c.x * v_vec.x + p_minus_c.y * v_vec.y + p_minus_c.z * v_vec.z;
         
         if (fabs(detNormal.z) <= 0.00001f)
         {
-            float3 r = make_float3(x - sourcePosition[0], y - sourcePosition[1], z - sourcePosition[2]);
+            float3 r = make_float3(x - sourcePosition.x, y - sourcePosition.y, z - sourcePosition.z);
 
             const float r_dot_d_inv = 1.0f / (r.x * detNormal.x + r.y * detNormal.y + r.z * detNormal.z);
             const float D = -p_minus_c_dot_n * r_dot_d_inv;
 
-            const float r_dot_u_0 = r.x * u_vec[0] + r.y * u_vec[1] + r.z * u_vec[2];
-            const float r_dot_v_0 = r.x * v_vec[0] + r.y * v_vec[1] + r.z * v_vec[2];
+            const float r_dot_u_0 = r.x * u_vec.x + r.y * u_vec.y + r.z * u_vec.z;
+            const float r_dot_v_0 = r.x * v_vec.x + r.y * v_vec.y + r.z * v_vec.z;
 
-            const float r_dot_u_inc = T_f.z * u_vec[2];
-            const float r_dot_v_inc = T_f.z * v_vec[2];
+            const float r_dot_u_inc = T_f.z * u_vec.z;
+            const float r_dot_v_inc = T_f.z * v_vec.z;
 
             for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
@@ -133,12 +133,12 @@ __global__ void modularBeamBackprojectorKernel_vox_stack(cudaTextureObject_t g,
         {
             for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
-                float3 r = make_float3(x - sourcePosition[0], y - sourcePosition[1], z+k_offset*T_f.z - sourcePosition[2]);
+                float3 r = make_float3(x - sourcePosition.x, y - sourcePosition.y, z+k_offset*T_f.z - sourcePosition.z);
                 const float r_dot_d_inv = 1.0f / (r.x * detNormal.x + r.y * detNormal.y + r.z * detNormal.z);
                 const float D = -p_minus_c_dot_n * r_dot_d_inv;
 
-                const float r_dot_u = r.x * u_vec[0] + r.y * u_vec[1] + r.z * u_vec[2];
-                const float r_dot_v = r.x * v_vec[0] + r.y * v_vec[1] + r.z * v_vec[2];
+                const float r_dot_u = r.x * u_vec.x + r.y * u_vec.y + r.z * u_vec.z;
+                const float r_dot_v = r.x * v_vec.x + r.y * v_vec.y + r.z * v_vec.z;
 
                 const float backprojectionWeight = p_minus_c_dot_n * sqrtf( D * D * (r_dot_u * r_dot_u + r_dot_v * r_dot_v) + p_minus_c_dot_n * p_minus_c_dot_n )*r_dot_d_inv*r_dot_d_inv;
 
@@ -170,6 +170,7 @@ __global__ void modularBeamBackprojectorKernel_vox_stack(cudaTextureObject_t g,
     }
 }
 
+// This function is not used anymore because modularBeamBackprojectorKernel_vox works faster
 __global__ void modularBeamBackprojectorKernel_vox(cudaTextureObject_t g, int4 N_g, float4 T_g, float4 startVals_g, float* f, int4 N_f, float4 T_f, float4 startVals_f, float* sourcePositions, float* moduleCenters, float* rowVectors, float* colVectors, int volumeDimensionOrder, const float rFOV_sq, bool accum)
 {
     const int i = threadIdx.x + blockIdx.x * blockDim.x;
@@ -761,7 +762,7 @@ __global__ void curvedConeBeamBackprojectorKernel_vox(cudaTextureObject_t g, con
     }
 }
 
-__global__ void coneBeamBackprojectorKernel_vox(cudaTextureObject_t g, const int4 N_g, const float4 T_g, const float4 startVals_g, float* f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float rFOVsq, const float* phis, const int volumeDimensionOrder, bool accum)
+__global__ void coneBeamBackprojectorKernel_vox(cudaTextureObject_t g, const int4 N_g, const float4 T_g, const float4 startVals_g, float* f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float tiltAngle, const float rFOVsq, const float* phis, const int volumeDimensionOrder, bool accum)
 {
     const int i = threadIdx.x + blockIdx.x * blockDim.x;
     const int j = threadIdx.y + blockIdx.y * blockDim.y;
@@ -796,6 +797,9 @@ __global__ void coneBeamBackprojectorKernel_vox(cudaTextureObject_t g, const int
         return;
     }
 
+    const float cos_tilt = cos(tiltAngle);
+    const float sin_tilt = sin(tiltAngle);
+
     const float v0_over_Tv = startVals_g.y / T_g.y;
     const float Tz_over_Tv = T_f.z / T_g.y;
     const float v_phi_x_start_num = z / T_g.y;
@@ -816,18 +820,32 @@ __global__ void coneBeamBackprojectorKernel_vox(cudaTextureObject_t g, const int
 
         const float R_minus_x_dot_theta_inv = 1.0f / (R - x * cos_phi - y * sin_phi);
 
-        const float u_val = (cos_phi * y - sin_phi * x + tau) * R_minus_x_dot_theta_inv;
-        const float u_arg = (u_val - startVals_g.z) * Tu_inv + 0.5f;
-
         //const float backprojectionWeight = sqrtf(1.0f + u_val * u_val) * R_minus_x_dot_theta_inv * R_minus_x_dot_theta_inv;
         const float backprojectionWeight = R_minus_x_dot_theta_inv * R_minus_x_dot_theta_inv;
 
-        const float v_phi_x_step_A = Tz_over_Tv * R_minus_x_dot_theta_inv;
-        const float v_phi_x_first = (v_phi_x_start_num - z_source_over_T_v) * R_minus_x_dot_theta_inv - v0_over_Tv + 0.5f;
-        for (int k_offset = 0; k_offset < numZ; k_offset++)
+        if (tiltAngle == 0.0f)
         {
-            const float v_arg = (v_phi_x_first + k_offset * v_phi_x_step_A + v0_over_Tv - 0.5f)*T_g.y;
-            vals[k_offset] += tex3D<float>(g, u_arg, v_phi_x_first + k_offset * v_phi_x_step_A, L) * backprojectionWeight * sqrtf(1.0f + u_val * u_val + v_arg * v_arg);
+            const float u_val = (cos_phi * y - sin_phi * x + tau) * R_minus_x_dot_theta_inv;
+            const float u_arg = (u_val - startVals_g.z) * Tu_inv + 0.5f;
+
+            const float v_phi_x_step_A = Tz_over_Tv * R_minus_x_dot_theta_inv;
+            const float v_phi_x_first = (v_phi_x_start_num - z_source_over_T_v) * R_minus_x_dot_theta_inv - v0_over_Tv + 0.5f;
+            for (int k_offset = 0; k_offset < numZ; k_offset++)
+            {
+                const float v_arg = (v_phi_x_first + k_offset * v_phi_x_step_A + v0_over_Tv - 0.5f) * T_g.y;
+                vals[k_offset] += tex3D<float>(g, u_arg, v_phi_x_first + k_offset * v_phi_x_step_A, L) * backprojectionWeight * sqrtf(1.0f + u_val * u_val + v_arg * v_arg);
+            }
+        }
+        else
+        {
+            const float x_dot_theta_perp = cos_phi * y - sin_phi * x + tau;
+            for (int k_offset = 0; k_offset < numZ; k_offset++)
+            {
+                const float u_val = (x_dot_theta_perp * cos_tilt + (z+k_offset*T_f.z) * sin_tilt) * R_minus_x_dot_theta_inv;
+                const float v_val = ((z + k_offset * T_f.z) * cos_tilt - x_dot_theta_perp * sin_tilt) * R_minus_x_dot_theta_inv;
+
+                vals[k_offset] += tex3D<float>(g, (u_val - startVals_g.z) * Tu_inv + 0.5f, (v_val - startVals_g.y) * Tv_inv + 0.5f, L) * backprojectionWeight * sqrtf(1.0f + u_val * u_val + v_val * v_val);
+            }
         }
     }
 
@@ -1164,7 +1182,7 @@ bool backproject_VD(float *g, float *&f, parameters* params, bool data_on_cpu, b
     else if (params->geometry == parameters::CONE)
     {
         if (params->detectorType == parameters::FLAT)
-            coneBeamBackprojectorKernel_vox <<< dimGrid_slab, dimBlock_slab >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
+            coneBeamBackprojectorKernel_vox <<< dimGrid_slab, dimBlock_slab >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, params->tiltAngle*PI/180.0, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
         else
             curvedConeBeamBackprojectorKernel_vox <<< dimGrid_slab, dimBlock_slab >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
     }
diff --git a/src/find_center_cpu.cpp b/src/find_center_cpu.cpp
index 36757d8..1dfff1b 100644
--- a/src/find_center_cpu.cpp
+++ b/src/find_center_cpu.cpp
@@ -256,7 +256,8 @@ float estimateTilt(float* g, parameters* params)
 	//*/
 	//for (int itilt = 0; itilt < N_tilt; itilt++)
 	//	printf("error[%f] = %f\n", itilt * T_tilt + tilt_0, errors[itilt]);
-	float retVal = T_tilt*findMinimum(errors, 0, N_tilt) + tilt_0;
+	float minValue;
+	float retVal = T_tilt*findMinimum(errors, 0, N_tilt, minValue) + tilt_0;
 
 	// for loop over [centerCol-?, centerCol+?] in 1 pixel steps
 	//	for loop over [tiltAngle-4.9, tiltAngle+4.9] in 0.1 degree steps
@@ -317,10 +318,10 @@ float interpolate2D(float* data, float ind_1, float ind_2, int N_1, int N_2)
 		d_1 * ((1.0 - d_2) * data[ind_1_hi * N_2 + ind_2_lo] + d_2 * data[ind_1_hi * N_2 + ind_2_hi]);
 }
 
-bool findCenter_cpu(float* g, parameters* params, int iRow, bool find_tau)
+float findCenter_cpu(float* g, parameters* params, int iRow, bool find_tau)
 {
 	if (g == NULL || params == NULL)
-		return false;
+		return 0.0;
 	if (params->offsetScan == true)
 	{
 		if (find_tau)
@@ -333,7 +334,7 @@ bool findCenter_cpu(float* g, parameters* params, int iRow, bool find_tau)
 		if (find_tau)
 		{
 			printf("Error: find_tau only works for fan- or cone-beam data\n");
-			return false;
+			return 0.0;
 		}
 		else
 			return findCenter_parallel_cpu(g, params, iRow);
@@ -349,16 +350,16 @@ bool findCenter_cpu(float* g, parameters* params, int iRow, bool find_tau)
     else
     {
         printf("Error: currently find_centerCol/find_tau only works for parallel-, fan-, or cone-beam data\n");
-        return false;
+        return 0.0;
     }
 }
 
-bool findCenter_fan_cpu(float* g, parameters* params, int iRow, bool find_tau)
+float findCenter_fan_cpu(float* g, parameters* params, int iRow, bool find_tau)
 {
 	return findCenter_cone_cpu(g, params, iRow, find_tau);
 }
 
-bool findCenter_parallel_cpu(float* g, parameters* params, int iRow)
+float findCenter_parallel_cpu(float* g, parameters* params, int iRow)
 {
     if (iRow < 0 || iRow > params->numRows - 1)
         iRow = max(0, min(params->numRows-1, int(floor(0.5 + params->centerRow))));
@@ -369,7 +370,7 @@ bool findCenter_parallel_cpu(float* g, parameters* params, int iRow)
 	if (params->angularRange + 2.0*fabs(params->T_phi()) * 180.0 / PI < 180.0)
 	{
 		printf("Error: angular range insufficient to estimate centerCol\n");
-		return false;
+		return 0.0;
 	}
 	else if (params->angularRange > 225.0)
 	{
@@ -464,13 +465,14 @@ bool findCenter_parallel_cpu(float* g, parameters* params, int iRow)
 			shiftCosts[i] = 1e12;
 	}
 
-	params->centerCol = findMinimum(shiftCosts, centerCol_low, centerCol_high);
+	float retVal = 0.0;
+	params->centerCol = findMinimum(shiftCosts, centerCol_low, centerCol_high, retVal);
 	free(shiftCosts);
 
-	return true;
+	return retVal;
 }
 
-bool findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
+float findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
 {
     if (iRow < 0 || iRow > params->numRows - 1)
         iRow = max(0, min(params->numRows - 1, int(floor(0.5 + params->centerRow))));
@@ -480,6 +482,8 @@ bool findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
 	rowStart = iRow;
 	rowEnd = iRow;
 
+	float* sino = get_rotated_sinogram(g, params, iRow);
+
 	int conj_ind = 0;
 	if (params->T_phi() > 0.0)
 		conj_ind = int(floor(0.5 + params->phi_inv(params->phis[0] + PI)));
@@ -539,10 +543,12 @@ bool findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
 		for (int i = 0; i <= conj_ind - 1; i++)
 		{
 			float phi = params->phis[i];
-			float* projA = &g[uint64(i) * uint64(params->numRows * params->numCols)];
+			//float* projA = &g[uint64(i) * uint64(params->numRows * params->numCols)];
+			float* projA = &sino[uint64(i) * uint64(params->numCols)];
 			for (int j = rowStart; j <= rowEnd; j++)
 			{
-				float* lineA = &projA[j * params->numCols];
+				//float* lineA = &projA[j * params->numCols];
+				float* lineA = projA;
 				for (int k = 0; k < params->numCols; k++)
 				{
 					//float u = params->u(k);
@@ -563,7 +569,8 @@ bool findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
 							u_conj_ind = int(0.5 + (tan(u_conj) - u_0) / atanTu);
 
 						float val = lineA[k];
-						float val_conj = g[uint64(phi_conj_ind) * uint64(params->numRows * params->numCols) + uint64(j * params->numCols + u_conj_ind)];
+						//float val_conj = g[uint64(phi_conj_ind) * uint64(params->numRows * params->numCols) + uint64(j * params->numCols + u_conj_ind)];
+						float val_conj = sino[uint64(phi_conj_ind) * uint64(params->numCols) + uint64(u_conj_ind)];
 						//if (val != 0.0 || val_conj != 0.0)
 						//	printf("%f and %f\n", val, val_conj);
 
@@ -584,6 +591,8 @@ bool findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
 #endif
 	}
 
+	delete[] sino;
+
 	for (int i = centerCol_low; i <= centerCol_high; i++)
 	{
 		//printf("%f\n", shiftCosts[i]);
@@ -593,17 +602,18 @@ bool findCenter_cone_cpu(float* g, parameters* params, int iRow, bool find_tau)
 
 	params->normalizeConeAndFanCoordinateFunctions = normalizeConeAndFanCoordinateFunctions_save;
 
-	float new_center = findMinimum(shiftCosts, centerCol_low, centerCol_high);
+	float retVal = 0.0;
+	float new_center = findMinimum(shiftCosts, centerCol_low, centerCol_high, retVal);
 	if (find_tau)
 		params->tau = tau_shift * (new_center - 0.5 * float(params->numCols - 1));
 	else
 		params->centerCol = new_center;
 	free(shiftCosts);
 
-    return true;
+    return retVal;
 }
 
-float findMinimum(double* costVec, int startInd, int endInd, bool findOnlyLocalMin/* = true*/)
+float findMinimum(double* costVec, int startInd, int endInd, float& minValue)
 {
 	int localMin_ind = -1;
 	double localMin_value = 1e12;
@@ -629,14 +639,27 @@ float findMinimum(double* costVec, int startInd, int endInd, bool findOnlyLocalM
 
 	if ((minCost_ind == startInd || minCost_ind == endInd) && localMin_ind != -1)
 	{
-		// min cost is at the end of estimation region and these does exist a local minimum
+		// min cost is at the end of estimation region and there does not exist a local minimum
 		// so min cost is likely just an edge effect and thus the local minimum should be used instead
 		minCost_ind = localMin_ind;
+		minValue = localMin_value;
 	}
+	else
+		minValue = minCost;
 
 	float retVal = float(minCost_ind);
 	if (minCost_ind > startInd && minCost_ind < endInd && costVec[minCost_ind - 1] != 1.0e12 && costVec[minCost_ind + 1] != 1.0e12 && costVec[minCost_ind - 1] > 0.0 && costVec[minCost_ind + 1] > 0.0)
+	{
+		// assume error function is locally quadratic and update the minimum location accordingly
 		retVal += 0.5 * (costVec[minCost_ind - 1] - costVec[minCost_ind + 1]) / (costVec[minCost_ind - 1] + costVec[minCost_ind + 1] - 2.0 * costVec[minCost_ind]);
+
+		float a = 0.5 * (costVec[minCost_ind + 1] + costVec[minCost_ind - 1]) - costVec[minCost_ind];
+		float b = 0.5 * (costVec[minCost_ind + 1] - costVec[minCost_ind - 1]);
+		float c = costVec[minCost_ind];
+
+		if (a > 0.0)
+			minValue = c - b * b / (4.0 * a);
+	}
 	return retVal;
 }
 
@@ -672,3 +695,53 @@ bool setDefaultRange_centerCol(parameters* params, int& centerCol_low, int& cent
 	}
 	return true;
 }
+
+float* get_rotated_sinogram(float* g, parameters* params, int iRow)
+{
+	if (g == NULL || params == NULL)
+		return NULL;
+	if (iRow < 0 || iRow > params->numRows - 1)
+		iRow = max(0, min(params->numRows - 1, int(floor(0.5 + params->centerRow))));
+
+	float row_0_centered = -0.5 * float(params->numRows - 1) * params->pixelHeight;
+	float col_0_centered = -0.5 * float(params->numCols - 1) * params->pixelWidth;
+
+	float row_0 = -params->centerRow * params->pixelHeight;
+	float col_0 = -params->centerCol * params->pixelWidth;
+
+	float tiltAngle = params->tiltAngle;
+	float cos_alpha = cos(PI / 180.0 * tiltAngle);
+	float sin_alpha = sin(PI / 180.0 * tiltAngle);
+
+	float* sino = new float[params->numAngles*params->numCols];
+	omp_set_num_threads(omp_get_num_procs());
+	#pragma omp parallel for
+	for (int i = 0; i < params->numAngles; i++)
+	{
+		float* aProj = &g[uint64(i) * uint64(params->numRows) * uint64(params->numCols)];
+		float* sino_line = &sino[uint64(i) * uint64(params->numCols)];
+		if (params->tiltAngle == 0.0)
+		{
+			for (int iCol = 0; iCol < params->numCols; iCol++)
+				sino_line[iCol] = aProj[uint64(iRow)*uint64(params->numRows) + uint64(iCol)];
+		}
+		else
+		{
+			float row = iRow * params->pixelHeight + row_0_centered;
+			for (int iCol = 0; iCol < params->numCols; iCol++)
+			{
+				//float col = iCol * params->pixelWidth + col_0;
+				float col = iCol * params->pixelWidth + col_0_centered;
+
+				float col_A = cos_alpha * col + sin_alpha * row - col_0 + col_0_centered;
+				float row_A = -sin_alpha * col + cos_alpha * row;
+				float col_A_ind = (col_A - col_0) / params->pixelWidth;
+				float row_A_ind = (row_A - row_0_centered) / params->pixelHeight;
+
+				float proj_cur = interpolate2D(aProj, row_A_ind, col_A_ind, params->numRows, params->numCols);
+				sino_line[iCol] = proj_cur;
+			}
+		}
+	}
+	return sino;
+}
\ No newline at end of file
diff --git a/src/find_center_cpu.h b/src/find_center_cpu.h
index 1a57686..4e1616a 100644
--- a/src/find_center_cpu.h
+++ b/src/find_center_cpu.h
@@ -22,11 +22,11 @@
  * which is also known as a half-fan or half-cone.
  */
 
-bool findCenter_cpu(float* g, parameters* params, int iRow = -1, bool find_tau = false);
+float findCenter_cpu(float* g, parameters* params, int iRow = -1, bool find_tau = false);
 
-bool findCenter_parallel_cpu(float* g, parameters* params, int iRow = -1);
-bool findCenter_fan_cpu(float* g, parameters* params, int iRow = -1, bool find_tau = false);
-bool findCenter_cone_cpu(float* g, parameters* params, int iRow = -1, bool find_tau = false);
+float findCenter_parallel_cpu(float* g, parameters* params, int iRow = -1);
+float findCenter_fan_cpu(float* g, parameters* params, int iRow = -1, bool find_tau = false);
+float findCenter_cone_cpu(float* g, parameters* params, int iRow = -1, bool find_tau = false);
 
 float estimateTilt(float* g, parameters* params);
 bool getConjugateDifference(float* g, parameters* params, float alpha, float centerCol, float* diff);
@@ -34,6 +34,8 @@ bool getConjugateProjections(float* g, parameters* params, float*& proj_A, float
 float interpolate2D(float*, float ind_1, float ind_2, int N_1, int N_2);
 
 bool setDefaultRange_centerCol(parameters* params, int& centerCol_low, int& centerCol_high);
-float findMinimum(double* costVec, int startInd, int endInd, bool findOnlyLocalMin = true);
+float findMinimum(double* costVec, int startInd, int endInd, float& minValue);
+
+float* get_rotated_sinogram(float* g, parameters* params, int iRow);
 
 #endif
diff --git a/src/leap_preprocessing_algorithms.py b/src/leap_preprocessing_algorithms.py
index 5860767..417c70f 100644
--- a/src/leap_preprocessing_algorithms.py
+++ b/src/leap_preprocessing_algorithms.py
@@ -21,6 +21,7 @@
 from scipy.optimize import least_squares
 from scipy.spatial.transform import Rotation as R
 from leapctype import *
+leapct_sweep = tomographicModels() # used in parameter_sweep function
 
 def gain_correction(leapct, g, air_scan, dark_scan, calibration_scans=None, ROI=None, badPixelMap=None, flux_response=None):
     r""" Performs gain correction
@@ -677,15 +678,8 @@ def parameter_sweep(leapct, g, values, param='centerCol', iz=None, algorithmName
     
     The CT geometry parameters and the CT volume parameters must be set prior to running this function.
     
-    The parameters to sweep are all standard LEAP CT geometry parameter names, except 'tilt'
-    The 'tilt' parameter is swept by converting the geometry to modular-beam (see the convert_to_modularbeam function).
-    Then the tilt is achieved by rotating the colVecs and rowVecs parameters (note that the data g is not rotated, 
-    just the model of the detector orientation which is better because no interpolation is necessary).  This rotation
-    is achieved with the rotate_detector function.
-    
-    When sweeping the tau parameter, the centerCol parameter is also adjusted because these both apply to the location
-    and the center of rotation of the scanner.  This additional adjustment of centerCol keeps the center of rotation
-    in the same place and thus has the effect of rotating the detector around the axis pointing across the detector rows.
+    The parameters to sweep are all standard LEAP CT geometry parameter names, except 'tilt' which is only available for cone- and modular-beam data.
+    (note that the data g is not rotated, just the model of the detector orientation which is better because no interpolation is necessary).
     
     Args:
         leapct (tomographicModels object): This is just needed to access LEAP algorithms
@@ -727,11 +721,12 @@ def parameter_sweep(leapct, g, values, param='centerCol', iz=None, algorithmName
         f_stack = np.zeros((len(values), leapct.get_numY(), leapct.get_numX()), dtype=np.float32)
 
     g_sweep = g
-    leapct_sweep = tomographicModels()
+    #leapct_sweep = tomographicModels()
     leapct_sweep.copy_parameters(leapct)
 
     if param == 'tilt':
-        leapct_sweep.convert_to_modularbeam()
+        if leapct.get_geometry() != 'CONE':
+            leapct_sweep.convert_to_modularbeam()
     elif leapct_sweep.get_geometry() == 'FAN' or leapct_sweep.get_geometry() == 'PARALLEL':
         leapct_sweep.set_numRows(1)
         if has_torch == True and type(g) is torch.Tensor:
diff --git a/src/leapctype.py b/src/leapctype.py
index f7941e4..f799385 100644
--- a/src/leapctype.py
+++ b/src/leapctype.py
@@ -476,7 +476,7 @@ def set_coneparallel(self, numAngles, numRows, numCols, pixelHeight, pixelWidth,
         self.set_model()
         return self.libprojectors.set_coneparallel(numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau, helicalPitch)
     
-    def set_conebeam(self, numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau=0.0, helicalPitch=0.0):
+    def set_conebeam(self, numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau=0.0, helicalPitch=0.0, tiltAngle=0.0):
         r"""Sets the parameters for a cone-beam CT geometry
         
         The origin of the coordinate system is always at the center of rotation.  The forward (P) and back (P*) projection operators are given by
@@ -517,11 +517,12 @@ def set_conebeam(self, numAngles, numRows, numCols, pixelHeight, pixelWidth, cen
             sdd (float): source to detector distance, measured in mm
             tau (float): center of rotation offset
             helicalPitch (float): the helical pitch (mm/radians)
+            tiltAngle (float) the rotation of the detector around the optical axis (degrees)
             
         Returns:
             True if the parameters were valid, false otherwise
         """
-        self.libprojectors.set_conebeam.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float]
+        self.libprojectors.set_conebeam.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float, ctypes.c_float]
         self.libprojectors.set_conebeam.restype = ctypes.c_bool
         if has_torch and type(phis) is torch.Tensor:
             phis = phis.cpu().detach().numpy()
@@ -534,7 +535,7 @@ def set_conebeam(self, numAngles, numRows, numCols, pixelHeight, pixelWidth, cen
             return False
             
         self.set_model()
-        return self.libprojectors.set_conebeam(numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau, helicalPitch)
+        return self.libprojectors.set_conebeam(numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau, tiltAngle, helicalPitch)
     
     def set_coneBeam(self, numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau=0.0, helicalPitch=0.0):
         """Alias for set_conebeam
@@ -745,6 +746,20 @@ def set_tau(self, tau):
         self.libprojectors.set_tau.restype = ctypes.c_bool
         self.set_model()
         return self.libprojectors.set_tau(tau)
+        
+    def set_tiltAngle(self, tiltAngle):
+        """Set the tiltAngle parameter
+        
+        Args:
+            tiltAngle (float): the rotation of the detector around the optical axis (degrees)
+            
+        Returns:
+            True if the parameters were valid, false otherwise
+        """
+        self.libprojectors.set_tiltAngle.argtypes = [ctypes.c_float]
+        self.libprojectors.set_tiltAngle.restype = ctypes.c_bool
+        self.set_model()
+        return self.libprojectors.set_tiltAngle(tiltAngle)
     
     def set_helicalPitch(self, helicalPitch):
         r"""Set the helicalPitch parameter
@@ -855,20 +870,19 @@ def find_centerCol(self, g, iRow=-1):
             iRow (int): The detector row index to be used for the estimation, if no value is given, uses the row closest to the centerRow parameter
             
         Returns:
-            g, the same as the input
+            the error metric value
         
         """
         if iRow is None:
             iRow = -1
-        self.libprojectors.find_centerCol.restype = ctypes.c_bool
+        self.libprojectors.find_centerCol.restype = ctypes.c_float
         self.set_model()
         if has_torch == True and type(g) is torch.Tensor:
             self.libprojectors.find_centerCol.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_bool]
-            self.libprojectors.find_centerCol(g.data_ptr(), iRow, g.is_cuda == False)
+            return self.libprojectors.find_centerCol(g.data_ptr(), iRow, g.is_cuda == False)
         else:
             self.libprojectors.find_centerCol.argtypes = [ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), ctypes.c_int, ctypes.c_bool]
-            self.libprojectors.find_centerCol(g, iRow, True)
-        return g
+            return self.libprojectors.find_centerCol(g, iRow, True)
         
     def find_tau(self, g, iRow=-1):
         r"""Find the tau parameter
@@ -896,20 +910,19 @@ def find_tau(self, g, iRow=-1):
             iRow (int): The detector row index to be used for the estimation, if no value is given, uses the row closest to the centerRow parameter
             
         Returns:
-            g, the same as the input
+            the error metric value
         
         """
         if iRow is None:
             iRow = -1
-        self.libprojectors.find_tau.restype = ctypes.c_bool
+        self.libprojectors.find_tau.restype = ctypes.c_float
         self.set_model()
         if has_torch == True and type(g) is torch.Tensor:
             self.libprojectors.find_tau.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_bool]
-            self.libprojectors.find_tau(g.data_ptr(), iRow, g.is_cuda == False)
+            return self.libprojectors.find_tau(g.data_ptr(), iRow, g.is_cuda == False)
         else:
             self.libprojectors.find_tau.argtypes = [ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), ctypes.c_int, ctypes.c_bool]
-            self.libprojectors.find_tau(g, iRow, True)
-        return g
+            return self.libprojectors.find_tau(g, iRow, True)
         
     def estimate_tilt(self, g):
         """Estimates the tilt angle (around the optical axis) of the detector
@@ -923,9 +936,8 @@ def estimate_tilt(self, g):
         See also the conjugate_difference function.
         
         Example Usage:
-        alpha = leapct.estimate_tilt(g)
-        leapct.convert_to_modularbeam()
-        leapct.rotate_detector(alpha)
+        gamma = leapct.estimate_tilt(g)
+        leapct.set_tiltAngle(gamma)
 
         Note that it only works for parallel-beam, fan-beam, cone-beam, and cone-parallel CT geometry types (i.e., everything but modular-beam)
         and one may not get an accurate estimate if the projections are truncated on the right and/or left sides.
@@ -1111,11 +1123,17 @@ def rotate_detector(self, alpha):
             True if the operation was successful, False overwise
         
         """
-        if self.get_geometry() != 'MODULAR':
-            print('Error: can only rotate modular-beam detectors')
+        geom_str = self.get_geometry()
+        
+        if geom_str != 'MODULAR' and geom_str != 'CONE':
+            print('Error: can only rotate cone- and modular-beam detectors')
             print('Use convert_to_modularbeam() first')
             return False
         if type(alpha) is np.ndarray:
+            if geom_str != 'MODULAR':
+                print('Error: can only perform arbitrary rotations with modular-beam geometries')
+                print('Use convert_to_modularbeam() first')
+                return False
             if alpha.size == 3:
                 from scipy.spatial.transform import Rotation as R
                 A = R.from_euler('xyz', alpha, degrees=True).as_matrix()
@@ -1150,9 +1168,12 @@ def rotate_detector(self, alpha):
             
         else:
             self.set_model()
-            self.libprojectors.rotate_detector.restype = ctypes.c_bool
-            self.libprojectors.rotate_detector.argtypes = [ctypes.c_float]
-            return self.libprojectors.rotate_detector(alpha)
+            if geom_str == 'MODULAR':
+                self.libprojectors.rotate_detector.restype = ctypes.c_bool
+                self.libprojectors.rotate_detector.argtypes = [ctypes.c_float]
+                return self.libprojectors.rotate_detector(alpha)
+            else:
+                return self.set_tiltAngle(self.get_tiltAngle()+alpha)
         
     def rotate_coordinate_system(self, R):
         """Rotates the coordinate system
@@ -5990,6 +6011,12 @@ def get_tau(self):
         self.set_model()
         return self.libprojectors.get_tau()
         
+    def get_tiltAngle(self):
+        """Get the tiltAngle parameter"""
+        self.libprojectors.get_tiltAngle.restype = ctypes.c_float
+        self.set_model()
+        return self.libprojectors.get_tiltAngle()
+        
     def get_sourcePositions(self):
         """Get the sourcePositions parameter (modular-beam only)"""
         #bool get_sourcePositions(float*);
diff --git a/src/parameters.cpp b/src/parameters.cpp
index d95e6e4..306a8d5 100644
--- a/src/parameters.cpp
+++ b/src/parameters.cpp
@@ -79,6 +79,7 @@ void parameters::initialize()
 	centerCol = 0.0;
 	centerRow = 0.0;
 	tau = 0.0;
+	tiltAngle = 0.0;
 	helicalPitch = 0.0;
 	z_source_offset = 0.0;
 	rFOVspecified = 0.0;
@@ -182,6 +183,7 @@ void parameters::assign(const parameters& other)
     this->centerCol = other.centerCol;
     this->centerRow = other.centerRow;
     this->tau = other.tau;
+	this->tiltAngle = other.tiltAngle;
 	this->helicalPitch = other.helicalPitch;
 	this->z_source_offset = other.z_source_offset;
     this->rFOVspecified = other.rFOVspecified;
@@ -673,6 +675,12 @@ bool parameters::geometryDefined(bool doPrint)
 			printf("Error: curved detector only defined for cone-beam geometries\n");
 		return false;
 	}
+	if ((geometry != CONE || detectorType == CURVED) && tiltAngle != 0.0) // not: flat cone or tiltAngle==0
+	{
+		if (doPrint)
+			printf("Error: tiltAngle only defined for flat panel cone-beam geometries\n");
+		return false;
+	}
 
 	return true;
 }
@@ -933,6 +941,8 @@ void parameters::printAll()
 		printf("sdd = %f mm\n", sdd);
 		if (tau != 0.0)
 			printf("tau = %f mm\n", tau);
+		if (geometry == CONE)
+			printf("gamma = %f degrees\n", tiltAngle);
 	}
 	else if (geometry == MODULAR)
 	{
@@ -1491,8 +1501,9 @@ float parameters::row(int i)
 	return (i - (centerRow + rowShiftFromFilter)) * pixelHeight;
 }
 
-float parameters::u(int i, int iphi, bool includeModuleShift)
+float parameters::u(int i, int iphi)
 {
+	bool includeModuleShift = true;
 	if (modularbeamIsAxiallyAligned())
 	{
 		float u_val = 0.0;
@@ -1524,8 +1535,9 @@ float parameters::u_inv(float val)
 		return (val - u_0()) / pixelWidth;
 }
 
-float parameters::v(int i, int iphi, bool includeModuleShift)
+float parameters::v(int i, int iphi)
 {
+	bool includeModuleShift = true;
 	if (modularbeamIsAxiallyAligned() /*&& iphi >= 0 && iphi < numAngles*/)
 	{
 		if (includeModuleShift)
@@ -1723,6 +1735,32 @@ bool parameters::set_helicalPitch(float h)
 	}
 }
 
+bool parameters::set_tiltAngle(float tiltAngle_in)
+{
+	if (fabs(tiltAngle_in) < 1.0e-6)
+		tiltAngle_in = 0.0;
+	if (tiltAngle_in == 0.0)
+	{
+		tiltAngle = 0.0;
+		return true;
+	}
+	else if (geometry != CONE || detectorType != FLAT)
+	{
+		printf("Error: CT geometry must be CONE for tilted detector\n");
+		return false;
+	}
+	else if (fabs(tiltAngle_in) > 5.0)
+	{
+		printf("Error: tilt angle cannot exceed five degrees\n");
+		return false;
+	}
+	else
+	{
+		tiltAngle = tiltAngle_in;
+		return true;
+	}
+}
+
 float parameters::normalizedHelicalPitch()
 {
 	//helicalPitch = normalizedHelicalPitch*pzsize*nslices*(sod/sdd)/(2pi)
@@ -1907,8 +1945,12 @@ bool parameters::rowRangeNeededForBackprojection(int firstSlice, int lastSlice,
 			rowsNeeded[1] = numRows - 1;
 		}
 	}
-	else
+	else // CONE || CONE_PARALLEL
 	{
+		//float accountForClocking = fabs(sin(tiltAngle*PI/180.0) * pixelWidth * 0.5 * float(numCols - 1)) / sdd;
+		float u_max = std::max(fabs(col(0)), fabs(col(numCols - 1)));
+		float accountForClocking = fabs(sin(tiltAngle * PI / 180.0) * u_max) / sdd;
+
 		float z_lo = float(firstSlice) * voxelHeight + z_0() - 0.5 * voxelHeight;
 		float z_hi = float(lastSlice) * voxelHeight + z_0() + 0.5 * voxelHeight;
 
@@ -1917,8 +1959,8 @@ bool parameters::rowRangeNeededForBackprojection(int firstSlice, int lastSlice,
 		float T_v = pixelHeight / sdd;
 		float v_denom_min = sod - radiusFOV - voxelWidth;
 		float v_denom_max = sod + radiusFOV + voxelWidth;
-		float v_lo = min(z_lo / v_denom_min, z_lo / v_denom_max) - 0.5 * pixelHeight / sdd;
-		float v_hi = max(z_hi / v_denom_min, z_hi / v_denom_max) + 0.5 * pixelHeight / sdd;
+		float v_lo = min(z_lo / v_denom_min, z_lo / v_denom_max) - 0.5 * pixelHeight / sdd - accountForClocking;
+		float v_hi = max(z_hi / v_denom_min, z_hi / v_denom_max) + 0.5 * pixelHeight / sdd + accountForClocking;
 
 		rowsNeeded[0] = min(numRows - 1, max(0, int(floor((v_lo - row(0) / sdd) / T_v))));
 		rowsNeeded[1] = max(0, min(numRows - 1, int(ceil((v_hi - row(0) / sdd) / T_v))));
@@ -2108,11 +2150,17 @@ bool parameters::sliceRangeNeededForProjection(int firstRow, int lastRow, int* s
 			slicesNeeded[1] = numZ - 1;
 		}
 	}
-	else
+	else // CONE || CONE_PARALLEL
 	{
 		float v_lo = (float(firstRow) * pixelHeight + row(0) - 0.5 * pixelHeight) / sdd;
 		float v_hi = (float(lastRow) * pixelHeight + row(0) + 0.5 * pixelHeight) / sdd;
 
+		//float accountForClocking = fabs(sin(tiltAngle * PI / 180.0) * pixelWidth * 0.5 * float(numCols - 1)) / sdd;
+		float u_max = std::max(fabs(col(0)), fabs(col(numCols - 1)));
+		float accountForClocking = fabs(sin(tiltAngle * PI / 180.0) * u_max) / sdd;
+		v_lo -= accountForClocking;
+		v_hi += accountForClocking;
+
 		// v = z / (R - <x, theta>)
 		// v = z / (R - rFOV())
 		float T_z = voxelHeight;
@@ -2300,6 +2348,14 @@ bool parameters::convert_conebeam_to_modularbeam()
 	float verticalDetectorShift = 0.5 * float(numRows - 1) * pixelHeight + row(0);
 	//printf("horizontalDetectorShift = %f\n", horizontalDetectorShift);
 
+	float cos_tilt = cos(tiltAngle * PI / 180.0);
+	float sin_tilt = sin(tiltAngle * PI / 180.0);
+	if (fabs(tiltAngle) < 1.0e-6)
+	{
+		cos_tilt = 1.0;
+		sin_tilt = 0.0;
+	}
+
 	float* s_pos = new float[3 * numAngles];
 	float* d_pos = new float[3 * numAngles];
 	float* v_vec = new float[3 * numAngles];
@@ -2317,13 +2373,13 @@ bool parameters::convert_conebeam_to_modularbeam()
 		d_pos[3 * iphi + 1] = (sod - sdd) * sin_phi;
 		d_pos[3 * iphi + 2] = z_source(iphi);
 
-		v_vec[3 * iphi + 0] = 0.0;
-		v_vec[3 * iphi + 1] = 0.0;
-		v_vec[3 * iphi + 2] = 1.0;
+		v_vec[3 * iphi + 0] = sin_phi * sin_tilt;
+		v_vec[3 * iphi + 1] = -cos_phi * sin_tilt;
+		v_vec[3 * iphi + 2] = cos_tilt;
 
-		u_vec[3 * iphi + 0] = -sin_phi;
-		u_vec[3 * iphi + 1] = cos_phi;
-		u_vec[3 * iphi + 2] = 0.0;
+		u_vec[3 * iphi + 0] = -sin_phi * cos_tilt;
+		u_vec[3 * iphi + 1] = cos_phi * cos_tilt;
+		u_vec[3 * iphi + 2] = sin_tilt;
 
 		d_pos[3 * iphi + 0] += horizontalDetectorShift * u_vec[3 * iphi + 0];
 		d_pos[3 * iphi + 1] += horizontalDetectorShift * u_vec[3 * iphi + 1];
@@ -2335,6 +2391,7 @@ bool parameters::convert_conebeam_to_modularbeam()
 	}
 	centerRow = 0.5 * float(numRows - 1);
 	centerCol = 0.5 * float(numCols - 1);
+	tiltAngle = 0.0;
 	if (set_sourcesAndModules(s_pos, d_pos, v_vec, u_vec, numAngles))
 		geometry = MODULAR;
 	delete[] s_pos;
diff --git a/src/parameters.h b/src/parameters.h
index fc8249e..5214586 100644
--- a/src/parameters.h
+++ b/src/parameters.h
@@ -239,7 +239,7 @@ class parameters
 	 * \param       i the index of the i-th detector column
 	 * \return      returns the position of the i-th detector column (mm)
 	 */
-	float u(int i, int iphi = -1, bool includeModuleShift = true);
+	float u(int i, int iphi = -1);
 
 	/**
 	 * \fn          u_inv
@@ -255,7 +255,7 @@ class parameters
 	 * \param       i the index of the i-th detector row
 	 * \return      returns the position of the i-th detector row (mm)
 	 */
-	float v(int i, int iphi = -1, bool includeModuleShift = true);
+	float v(int i, int iphi = -1);
 
 	float u_offset(int iphi = -1);
 	float v_offset(int iphi = -1);
@@ -308,6 +308,14 @@ class parameters
 	 */
 	bool set_normalizedHelicalPitch(float h_normalized);
 
+	/**
+	 * \fn          set_tiltAngle
+	 * \brief       sets tiltAngle
+	 * \param[in]   tiltAngle the value for tiltAngle (degrees)
+	 * \return      true if the value is valid, false otherwise
+	 */
+	bool set_tiltAngle(float tiltAngle_in);
+
 	/**
 	 * \fn          convert_conebeam_to_modularbeam
 	 * \brief       sets modular-beam parameters from a cone-beam specification
@@ -335,7 +343,7 @@ class parameters
 	int numCols, numRows, numAngles;
 	float centerCol, centerRow;
 	float* phis;
-	float tau;
+	float tau, tiltAngle;
 	float helicalPitch;
 	float z_source_offset;
 	float helicalFBPWeight;
diff --git a/src/projectors_Joseph.cu b/src/projectors_Joseph.cu
index aee90db..dca65b9 100644
--- a/src/projectors_Joseph.cu
+++ b/src/projectors_Joseph.cu
@@ -31,7 +31,7 @@ __global__ void modularBeamProjectorKernel_SF(float* g, int4 N_g, float4 T_g, fl
     if (l >= N_g.x || m >= N_g.y || n >= N_g.z)
         return;
 
-    float* moduleCenter = &moduleCenters[3 * l];
+    const float3 moduleCenter = make_float3(moduleCenters[3 * l + 0], moduleCenters[3 * l + 1], moduleCenters[3 * l + 2]);
     const float3 p = make_float3(sourcePositions[3 * l + 0], sourcePositions[3 * l + 1], sourcePositions[3 * l + 2]);
     const float3 u_vec = make_float3(colVectors[3 * l + 0], colVectors[3 * l + 1], colVectors[3 * l + 2]);
     const float3 v_vec = make_float3(rowVectors[3 * l + 0], rowVectors[3 * l + 1], rowVectors[3 * l + 2]);
@@ -58,11 +58,11 @@ __global__ void modularBeamProjectorKernel_SF(float* g, int4 N_g, float4 T_g, fl
     const float T_y_inv = 1.0f / T_f.y;
     const float T_z_inv = 1.0f / T_f.z;
 
-    const float3 detPos = make_float3(moduleCenter[0] + u_vec.x * s + v_vec.x * t, moduleCenter[1] + u_vec.y * s + v_vec.y * t, moduleCenter[2] + u_vec.z * s + v_vec.z * t);
+    const float3 detPos = make_float3(moduleCenter.x + u_vec.x * s + v_vec.x * t, moduleCenter.y + u_vec.y * s + v_vec.y * t, moduleCenter.z + u_vec.z * s + v_vec.z * t);
     const float3 r = make_float3(detPos.x - p.x, detPos.y - p.y, detPos.z - p.z);
     const float D = sqrtf(r.x * r.x + r.y * r.y + r.z * r.z);
 
-    const float3 p_minus_c = make_float3(p.x - moduleCenter[0], p.y - moduleCenter[1], p.z - moduleCenter[2]);
+    const float3 p_minus_c = make_float3(p.x - moduleCenter.x, p.y - moduleCenter.y, p.z - moduleCenter.z);
 
     const float p_minus_c_dot_u = p_minus_c.x * u_vec.x + p_minus_c.y * u_vec.y + p_minus_c.z * u_vec.z;
     const float p_minus_c_dot_v = p_minus_c.x * v_vec.x + p_minus_c.y * v_vec.y + p_minus_c.z * v_vec.z;
@@ -470,33 +470,35 @@ __global__ void modularBeamBackprojectorKernel_SF_stack(cudaTextureObject_t g, i
     {
         const float L = (float)iphi + 0.5f;
 
-        float* sourcePosition = &sourcePositions[3 * iphi];
-        float* moduleCenter = &moduleCenters[3 * iphi];
-        float* v_vec = &rowVectors[3 * iphi];
-        float* u_vec = &colVectors[3 * iphi];
-        const float3 detNormal = make_float3(u_vec[1] * v_vec[2] - u_vec[2] * v_vec[1],
-            u_vec[2] * v_vec[0] - u_vec[0] * v_vec[2],
-            u_vec[0] * v_vec[1] - u_vec[1] * v_vec[0]);
+        const float3 sourcePosition = make_float3(sourcePositions[3 * iphi + 0], sourcePositions[3 * iphi + 1], sourcePositions[3 * iphi + 2]);
+        const float3 moduleCenter = make_float3(moduleCenters[3 * iphi + 0], moduleCenters[3 * iphi + 1], moduleCenters[3 * iphi + 2]);
+        const float3 v_vec = make_float3(rowVectors[3 * iphi + 0], rowVectors[3 * iphi + 1], rowVectors[3 * iphi + 2]);
+        const float3 u_vec = make_float3(colVectors[3 * iphi + 0], colVectors[3 * iphi + 1], colVectors[3 * iphi + 2]);
 
-        const float c_minus_s_dot_u = (moduleCenter[0] - sourcePosition[0]) * u_vec[0] + (moduleCenter[1] - sourcePosition[1]) * u_vec[1] + (moduleCenter[2] - sourcePosition[2]) * u_vec[2];
-        const float c_minus_s_dot_v = (moduleCenter[0] - sourcePosition[0]) * v_vec[0] + (moduleCenter[1] - sourcePosition[1]) * v_vec[1] + (moduleCenter[2] - sourcePosition[2]) * v_vec[2];
-        const float c_minus_s_dot_n = (moduleCenter[0] - sourcePosition[0]) * detNormal.x + (moduleCenter[1] - sourcePosition[1]) * detNormal.y + (moduleCenter[2] - sourcePosition[2]) * detNormal.z;
-        if (fabs(x - sourcePosition[0]) > fabs(y - sourcePosition[1]))
+        const float3 n_vec = make_float3(u_vec.y * v_vec.z - u_vec.z * v_vec.y,
+            u_vec.z * v_vec.x - u_vec.x * v_vec.z,
+            u_vec.x * v_vec.y - u_vec.y * v_vec.x);
+
+        const float c_minus_s_dot_u = (moduleCenter.x - sourcePosition.x) * u_vec.x + (moduleCenter.y - sourcePosition.y) * u_vec.y + (moduleCenter.z - sourcePosition.z) * u_vec.z;
+        const float c_minus_s_dot_v = (moduleCenter.x - sourcePosition.x) * v_vec.x + (moduleCenter.y - sourcePosition.y) * v_vec.y + (moduleCenter.z - sourcePosition.z) * v_vec.z;
+        const float c_minus_s_dot_n = (moduleCenter.x - sourcePosition.x) * n_vec.x + (moduleCenter.y - sourcePosition.y) * n_vec.y + (moduleCenter.z - sourcePosition.z) * n_vec.z;
+        if (fabs(x - sourcePosition.x) > fabs(y - sourcePosition.y))
         {
             for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
                 const float z = float(k+k_offset) * T_f.z + startVals_f.z;
+                const float3 x_minus_s = make_float3(x - sourcePosition.x, y - sourcePosition.y, z - sourcePosition.z);
 
-                const float denom = (x - sourcePosition[0]) * detNormal.x + (y - sourcePosition[1]) * detNormal.y + (z - sourcePosition[2]) * detNormal.z;
+                const float denom = x_minus_s.x * n_vec.x + x_minus_s.y * n_vec.y + x_minus_s.z * n_vec.z;
                 const float t_C = c_minus_s_dot_n / denom;
-                const float t_A = c_minus_s_dot_n / (denom - half_T_x * detNormal.y);
-                const float t_B = c_minus_s_dot_n / (denom + half_T_x * detNormal.y);
+                const float t_A = c_minus_s_dot_n / (denom - half_T_x * n_vec.y);
+                const float t_B = c_minus_s_dot_n / (denom + half_T_x * n_vec.y);
 
-                const float u_arg_A = t_A * ((x - sourcePosition[0]) * u_vec[0] + (y - half_T_x - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u;
-                const float u_arg_B = t_B * ((x - sourcePosition[0]) * u_vec[0] + (y + half_T_x - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u;
+                const float u_arg_A = t_A * (x_minus_s.x * u_vec.x + (x_minus_s.y - half_T_x) * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u;
+                const float u_arg_B = t_B * (x_minus_s.x * u_vec.x + (x_minus_s.y + half_T_x) * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u;
 
                 //const float l_phi = sqrtf((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1])) / fabs(x - sourcePosition[0]);
-                const float l_phi = sqrtf(((x - sourcePosition[0]) * (x - sourcePosition[0]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) * ((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1]))) / ((x - sourcePosition[0]) * (x - sourcePosition[0]));
+                const float l_phi = sqrtf((x_minus_s.x * x_minus_s.x + x_minus_s.z * x_minus_s.z) * (x_minus_s.x * x_minus_s.x + x_minus_s.y * x_minus_s.y)) / (x_minus_s.x * x_minus_s.x);
                 //const float l_phi = sqrtf((x - sourcePosition[0]) * (x - sourcePosition[0]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) / fabs(x - sourcePosition[0]);
 
                 // Weights for u
@@ -515,8 +517,8 @@ __global__ void modularBeamBackprojectorKernel_SF_stack(cudaTextureObject_t g, i
                 //const float vWeight = sqrtf(1.0f + v_val*v_val);
 
                 //const float v_phi_x = (v_val - startVals_g.y) * Tv_inv;
-                const float v_phi_x = (t_C * ((x - sourcePosition[0]) * v_vec[0] + (y - sourcePosition[1]) * v_vec[1] + (z - sourcePosition[2]) * v_vec[2]) - c_minus_s_dot_v - startVals_g.y) * Tv_inv;
-                const float v_phi_x_step_A = t_C * (T_f.z * v_vec[2]) * Tv_inv;
+                const float v_phi_x = (t_C * (x_minus_s.x * v_vec.x + x_minus_s.y * v_vec.y + x_minus_s.z * v_vec.z) - c_minus_s_dot_v - startVals_g.y) * Tv_inv;
+                const float v_phi_x_step_A = t_C * (T_f.z * v_vec.z) * Tv_inv;
 
                 const float row_high_A = floor(v_phi_x - 0.5f * v_phi_x_step_A + 0.5f) + 0.5f;
                 const float z_high_A = v_phi_x + 0.5f * v_phi_x_step_A - row_high_A;
@@ -545,17 +547,18 @@ __global__ void modularBeamBackprojectorKernel_SF_stack(cudaTextureObject_t g, i
             for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
                 const float z = float(k + k_offset) * T_f.z + startVals_f.z;
+                const float3 x_minus_s = make_float3(x - sourcePosition.x, y - sourcePosition.y, z - sourcePosition.z);
 
-                const float denom = (x - sourcePosition[0]) * detNormal.x + (y - sourcePosition[1]) * detNormal.y + (z - sourcePosition[2]) * detNormal.z;
+                const float denom = x_minus_s.x * n_vec.x + x_minus_s.y * n_vec.y + x_minus_s.z * n_vec.z;
                 const float t_C = c_minus_s_dot_n / denom;
-                const float t_A = c_minus_s_dot_n / (denom - half_T_x * detNormal.x);
-                const float t_B = c_minus_s_dot_n / (denom + half_T_x * detNormal.x);
+                const float t_A = c_minus_s_dot_n / (denom - half_T_x * n_vec.x);
+                const float t_B = c_minus_s_dot_n / (denom + half_T_x * n_vec.x);
 
-                const float u_arg_A = t_A * ((x - half_T_x - sourcePosition[0]) * u_vec[0] + (y - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u;
-                const float u_arg_B = t_B * ((x + half_T_x - sourcePosition[0]) * u_vec[0] + (y - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u;
+                const float u_arg_A = t_A * ((x_minus_s.x - half_T_x) * u_vec.x + x_minus_s.y * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u;
+                const float u_arg_B = t_B * ((x_minus_s.x + half_T_x) * u_vec.x + x_minus_s.y * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u;
 
                 //const float l_phi = sqrtf((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1])) / fabs(y - sourcePosition[1]);
-                const float l_phi = sqrtf(((y - sourcePosition[1]) * (y - sourcePosition[1]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) * ((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1]))) / ((y - sourcePosition[1]) * (y - sourcePosition[1]));
+                const float l_phi = sqrtf((x_minus_s.y * x_minus_s.y + x_minus_s.z * x_minus_s.z) * (x_minus_s.x * x_minus_s.x + x_minus_s.y * x_minus_s.y)) / (x_minus_s.y * x_minus_s.y);
 
                 // Weights for u
                 const float tau_low = (min(u_arg_A, u_arg_B) - startVals_g.z) * Tu_inv;
@@ -573,8 +576,8 @@ __global__ void modularBeamBackprojectorKernel_SF_stack(cudaTextureObject_t g, i
                 //const float vWeight = sqrtf(1.0f + v_val*v_val);
 
                 //const float v_phi_x = (v_val - startVals_g.y) * Tv_inv;
-                const float v_phi_x = (t_C * ((x - sourcePosition[0]) * v_vec[0] + (y - sourcePosition[1]) * v_vec[1] + (z - sourcePosition[2]) * v_vec[2]) - c_minus_s_dot_v - startVals_g.y) * Tv_inv;
-                const float v_phi_x_step_A = t_C * (T_f.z * v_vec[2]) * Tv_inv;
+                const float v_phi_x = (t_C * (x_minus_s.x * v_vec.x + x_minus_s.y * v_vec.y + x_minus_s.z * v_vec.z) - c_minus_s_dot_v - startVals_g.y) * Tv_inv;
+                const float v_phi_x_step_A = t_C * (T_f.z * v_vec.z) * Tv_inv;
 
                 const float row_high_A = floor(v_phi_x - 0.5f * v_phi_x_step_A + 0.5f) + 0.5f;
                 const float z_high_A = v_phi_x + 0.5f * v_phi_x_step_A - row_high_A;
@@ -958,39 +961,40 @@ __global__ void modularBeamBackprojectorKernel_eSF_stack(cudaTextureObject_t g,
     {
         //const float L = (float)iphi + 0.5f;
 
-        float* sourcePosition = &sourcePositions[3 * iphi];
-        float* moduleCenter = &moduleCenters[3 * iphi];
-        float* v_vec = &rowVectors[3 * iphi];
-        float* u_vec = &colVectors[3 * iphi];
-        const float3 detNormal = make_float3(u_vec[1] * v_vec[2] - u_vec[2] * v_vec[1],
-            u_vec[2] * v_vec[0] - u_vec[0] * v_vec[2],
-            u_vec[0] * v_vec[1] - u_vec[1] * v_vec[0]);
+        const float3 sourcePosition = make_float3(sourcePositions[3 * iphi + 0], sourcePositions[3 * iphi + 1], sourcePositions[3 * iphi + 2]);
+        const float3 moduleCenter = make_float3(moduleCenters[3 * iphi + 0], moduleCenters[3 * iphi + 1], moduleCenters[3 * iphi + 2]);
+        const float3 v_vec = make_float3(rowVectors[3 * iphi + 0], rowVectors[3 * iphi + 1], rowVectors[3 * iphi + 2]);
+        const float3 u_vec = make_float3(colVectors[3 * iphi + 0], colVectors[3 * iphi + 1], colVectors[3 * iphi + 2]);
 
-        const float c_minus_s_dot_u = (moduleCenter[0] - sourcePosition[0]) * u_vec[0] + (moduleCenter[1] - sourcePosition[1]) * u_vec[1] + (moduleCenter[2] - sourcePosition[2]) * u_vec[2];
-        const float c_minus_s_dot_v = (moduleCenter[0] - sourcePosition[0]) * v_vec[0] + (moduleCenter[1] - sourcePosition[1]) * v_vec[1] + (moduleCenter[2] - sourcePosition[2]) * v_vec[2];
-        const float c_minus_s_dot_n = (moduleCenter[0] - sourcePosition[0]) * detNormal.x + (moduleCenter[1] - sourcePosition[1]) * detNormal.y + (moduleCenter[2] - sourcePosition[2]) * detNormal.z;
+        const float3 n_vec = make_float3(u_vec.y * v_vec.z - u_vec.z * v_vec.y,
+            u_vec.z * v_vec.x - u_vec.x * v_vec.z,
+            u_vec.x * v_vec.y - u_vec.y * v_vec.x);
 
-        if (fabs(x - sourcePosition[0]) > fabs(y - sourcePosition[1]))
+        const float c_minus_s_dot_u = (moduleCenter.x - sourcePosition.x) * u_vec.x + (moduleCenter.y - sourcePosition.y) * u_vec.y + (moduleCenter.z - sourcePosition.z) * u_vec.z;
+        const float c_minus_s_dot_v = (moduleCenter.x - sourcePosition.x) * v_vec.x + (moduleCenter.y - sourcePosition.y) * v_vec.y + (moduleCenter.z - sourcePosition.z) * v_vec.z;
+        const float c_minus_s_dot_n = (moduleCenter.x - sourcePosition.x) * n_vec.x + (moduleCenter.y - sourcePosition.y) * n_vec.y + (moduleCenter.z - sourcePosition.z) * n_vec.z;
+        if (fabs(x - sourcePosition.x) > fabs(y - sourcePosition.y))
         {
             for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
                 const float z = float(k + k_offset) * T_f.z + startVals_f.z;
-                const float denom = (x - sourcePosition[0]) * detNormal.x + (y - sourcePosition[1]) * detNormal.y + (z - sourcePosition[2]) * detNormal.z;
-                const float t_C = c_minus_s_dot_n / denom;
+                const float3 x_minus_s = make_float3(x - sourcePosition.x, y - sourcePosition.y, z - sourcePosition.z);
 
-                const float v_c = t_C * ((x - sourcePosition[0]) * v_vec[0] + (y - sourcePosition[1]) * v_vec[1] + (z - sourcePosition[2]) * v_vec[2]) - c_minus_s_dot_v;
-                const int div = max(1, int(ceil(half_T_z * v_vec[2] * t_C * Tv_inv)));
+                const float denom = x_minus_s.x * n_vec.x + x_minus_s.y * n_vec.y + x_minus_s.z * n_vec.z;
+                const float t_C = c_minus_s_dot_n / denom;
+                const float v_c = t_C * (x_minus_s.x * v_vec.x + x_minus_s.y * v_vec.y + x_minus_s.z * v_vec.z) - c_minus_s_dot_v;
+                const int div = max(1, int(ceil(half_T_z * v_vec.z * t_C * Tv_inv)));
 
-                const float v_A = (v_c - half_T_z * v_vec[2] * t_C - startVals_g.y) * Tv_inv;
-                const float v_B = (v_c + half_T_z * v_vec[2] * t_C - startVals_g.y) * Tv_inv;
+                const float v_A = (v_c - half_T_z * v_vec.z * t_C - startVals_g.y) * Tv_inv;
+                const float v_B = (v_c + half_T_z * v_vec.z * t_C - startVals_g.y) * Tv_inv;
                 const int iv_min = int(ceil(v_A - 0.5f));
                 const int iv_max = int(floor(v_B + 0.5f));
 
-                const float t_A = c_minus_s_dot_n / (denom - half_T_x * detNormal.y);
-                const float t_B = c_minus_s_dot_n / (denom + half_T_x * detNormal.y);
+                const float t_A = c_minus_s_dot_n / (denom - half_T_x * n_vec.y);
+                const float t_B = c_minus_s_dot_n / (denom + half_T_x * n_vec.y);
 
-                const float u_A = (t_A * ((x - sourcePosition[0]) * u_vec[0] + (y - half_T_x - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
-                const float u_B = (t_B * ((x - sourcePosition[0]) * u_vec[0] + (y + half_T_x - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
+                const float u_A = (t_A * (x_minus_s.x * u_vec.x + (x_minus_s.y - half_T_x) * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
+                const float u_B = (t_B * (x_minus_s.x * u_vec.x + (x_minus_s.y + half_T_x) * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
 
                 const float u_min = min(u_A, u_B);
                 const float u_max = max(u_A, u_B);
@@ -1000,7 +1004,8 @@ __global__ void modularBeamBackprojectorKernel_eSF_stack(cudaTextureObject_t g,
                 const int iu_max = int(floor(u_max + 0.5f));
 
                 //const float l_phi = sqrtf((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1])) / fabs(x - sourcePosition[0]);
-                const float l_phi = sqrtf(((x - sourcePosition[0]) * (x - sourcePosition[0]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) * ((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1]))) / ((x - sourcePosition[0]) * (x - sourcePosition[0]));
+                //const float l_phi = sqrtf(((x - sourcePosition[0]) * (x - sourcePosition[0]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) * ((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1]))) / ((x - sourcePosition[0]) * (x - sourcePosition[0]));
+                const float l_phi = sqrtf((x_minus_s.x * x_minus_s.x + x_minus_s.z * x_minus_s.z) * (x_minus_s.x * x_minus_s.x + x_minus_s.y * x_minus_s.y)) / (x_minus_s.x * x_minus_s.x);
 
                 for (int iu = iu_min; iu <= iu_max; iu += 2)
                 {
@@ -1032,22 +1037,24 @@ __global__ void modularBeamBackprojectorKernel_eSF_stack(cudaTextureObject_t g,
             for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
                 const float z = float(k + k_offset) * T_f.z + startVals_f.z;
-                const float denom = (x - sourcePosition[0]) * detNormal.x + (y - sourcePosition[1]) * detNormal.y + (z - sourcePosition[2]) * detNormal.z;
+                const float3 x_minus_s = make_float3(x - sourcePosition.x, y - sourcePosition.y, z - sourcePosition.z);
+
+                const float denom = x_minus_s.x * n_vec.x + x_minus_s.y * n_vec.y + x_minus_s.z * n_vec.z;
                 const float t_C = c_minus_s_dot_n / denom;
 
-                const float v_c = t_C * ((x - sourcePosition[0]) * v_vec[0] + (y - sourcePosition[1]) * v_vec[1] + (z - sourcePosition[2]) * v_vec[2]) - c_minus_s_dot_v;
-                const int div = max(1, int(ceil(half_T_z * v_vec[2] * t_C * Tv_inv)));
+                const float v_c = t_C * (x_minus_s.x * v_vec.x + x_minus_s.y * v_vec.y + x_minus_s.z * v_vec.z) - c_minus_s_dot_v;
+                const int div = max(1, int(ceil(half_T_z * v_vec.z * t_C * Tv_inv)));
 
-                const float v_A = (v_c - half_T_z * v_vec[2] * t_C - startVals_g.y) * Tv_inv;
-                const float v_B = (v_c + half_T_z * v_vec[2] * t_C - startVals_g.y) * Tv_inv;
+                const float v_A = (v_c - half_T_z * v_vec.z * t_C - startVals_g.y) * Tv_inv;
+                const float v_B = (v_c + half_T_z * v_vec.z * t_C - startVals_g.y) * Tv_inv;
                 const int iv_min = int(ceil(v_A - 0.5f));
                 const int iv_max = int(floor(v_B + 0.5f));
 
-                const float t_A = c_minus_s_dot_n / (denom - half_T_x * detNormal.x);
-                const float t_B = c_minus_s_dot_n / (denom + half_T_x * detNormal.x);
+                const float t_A = c_minus_s_dot_n / (denom - half_T_x * n_vec.x);
+                const float t_B = c_minus_s_dot_n / (denom + half_T_x * n_vec.x);
 
-                const float u_A = (t_A * ((x - half_T_x - sourcePosition[0]) * u_vec[0] + (y - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
-                const float u_B = (t_B * ((x + half_T_x - sourcePosition[0]) * u_vec[0] + (y - sourcePosition[1]) * u_vec[1] + (z - sourcePosition[2]) * u_vec[2]) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
+                const float u_A = (t_A * ((x_minus_s.x - half_T_x) * u_vec.x + x_minus_s.y * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
+                const float u_B = (t_B * ((x_minus_s.x + half_T_x) * u_vec.x + x_minus_s.y * u_vec.y + x_minus_s.z * u_vec.z) - c_minus_s_dot_u - startVals_g.z) * Tu_inv;
 
                 const float u_min = min(u_A, u_B);
                 const float u_max = max(u_A, u_B);
@@ -1057,7 +1064,8 @@ __global__ void modularBeamBackprojectorKernel_eSF_stack(cudaTextureObject_t g,
                 const int iu_max = int(floor(u_max + 0.5f));
 
                 //const float l_phi = sqrtf((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1])) / fabs(y - sourcePosition[1]);
-                const float l_phi = sqrtf(((y - sourcePosition[1]) * (y - sourcePosition[1]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) * ((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1]))) / ((y - sourcePosition[1]) * (y - sourcePosition[1]));
+                //const float l_phi = sqrtf(((y - sourcePosition[1]) * (y - sourcePosition[1]) + (z - sourcePosition[2]) * (z - sourcePosition[2])) * ((x - sourcePosition[0]) * (x - sourcePosition[0]) + (y - sourcePosition[1]) * (y - sourcePosition[1]))) / ((y - sourcePosition[1]) * (y - sourcePosition[1]));
+                const float l_phi = sqrtf((x_minus_s.y * x_minus_s.y + x_minus_s.z * x_minus_s.z) * (x_minus_s.x * x_minus_s.x + x_minus_s.y * x_minus_s.y)) / (x_minus_s.y * x_minus_s.y);
 
                 for (int iu = iu_min; iu <= iu_max; iu += 2)
                 {
diff --git a/src/projectors_SF.cu b/src/projectors_SF.cu
index bc55b3c..ac03d2e 100644
--- a/src/projectors_SF.cu
+++ b/src/projectors_SF.cu
@@ -1743,7 +1743,7 @@ __global__ void curvedConeBeamProjectorKernel_SF(float* g, int4 N_g, float4 T_g,
     }
 }
 
-__global__ void coneBeamBackprojectorKernel_SF(cudaTextureObject_t g, const int4 N_g, const float4 T_g, const float4 startVals_g, float* f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float rFOVsq, const float* phis, const int volumeDimensionOrder, bool accum)
+__global__ void coneBeamBackprojectorKernel_SF(cudaTextureObject_t g, const int4 N_g, const float4 T_g, const float4 startVals_g, float* f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float tiltAngle, const float rFOVsq, const float* phis, const int volumeDimensionOrder, bool accum)
 {
     const int i = threadIdx.x + blockIdx.x * blockDim.x;
     const int j = threadIdx.y + blockIdx.y * blockDim.y;
@@ -1761,7 +1761,6 @@ __global__ void coneBeamBackprojectorKernel_SF(cudaTextureObject_t g, const int4
 
     const float x = i * T_f.x + startVals_f.x;
     const float y = j * T_f.y + startVals_f.y;
-    const float z = k * T_f.z + startVals_f.z;
     if (x * x + y * y > rFOVsq)
     {
         if (volumeDimensionOrder == 0)
@@ -1781,7 +1780,6 @@ __global__ void coneBeamBackprojectorKernel_SF(cudaTextureObject_t g, const int4
     const float T_x_over_2 = 0.5f * T_f.x;
     const float v0_over_Tv = startVals_g.y / T_g.y;
     const float Tz_over_Tv = T_f.z / T_g.y;
-    const float v_phi_x_start_num = z / T_g.y;
     const float Tv_inv = 1.0f / T_g.y;
     const float Tu_inv = 1.0f / T_g.z;
 
@@ -1789,92 +1787,157 @@ __global__ void coneBeamBackprojectorKernel_SF(cudaTextureObject_t g, const int4
     for (int k_offset = 0; k_offset < numZ; k_offset++)
         vals[k_offset] = 0.0f;
 
-    for (int l = 0; l < N_g.x; l++)
+    if (tiltAngle == 0.0f)
     {
-        const float L = (float)l + 0.5f;
-        const float z_source_over_T_v = (phis[l] * T_g.w + startVals_g.w) * Tv_inv;
-        const float sin_phi = sin(phis[l]);
-        const float cos_phi = cos(phis[l]);
+        const float z = k * T_f.z + startVals_f.z;
+        const float v_phi_x_start_num = z / T_g.y;
+        for (int l = 0; l < N_g.x; l++)
+        {
+            const float L = (float)l + 0.5f;
+            const float z_source_over_T_v = (phis[l] * T_g.w + startVals_g.w) * Tv_inv;
+            const float sin_phi = sin(phis[l]);
+            const float cos_phi = cos(phis[l]);
 
-        float B_x = (sin_phi < 0.0f) ? -cos_phi * T_x_over_2 : cos_phi * T_x_over_2;
-        const float B_y = (cos_phi < 0.0f) ? sin_phi * T_x_over_2 : -sin_phi * T_x_over_2;
+            float B_x = (sin_phi < 0.0f) ? -cos_phi * T_x_over_2 : cos_phi * T_x_over_2;
+            const float B_y = (cos_phi < 0.0f) ? sin_phi * T_x_over_2 : -sin_phi * T_x_over_2;
 
-        const float x_dot_theta_perp = cos_phi * y - sin_phi * x + tau;
-        const float R_minus_x_dot_theta = R - x * cos_phi - y * sin_phi;
-        const float R_minus_x_dot_theta_inv = 1.0f / R_minus_x_dot_theta;
+            const float x_dot_theta_perp = cos_phi * y - sin_phi * x + tau;
+            const float R_minus_x_dot_theta = R - x * cos_phi - y * sin_phi;
+            const float R_minus_x_dot_theta_inv = 1.0f / R_minus_x_dot_theta;
 
-        const float u_arg = x_dot_theta_perp * R_minus_x_dot_theta_inv;
-        const float x_denom = fabs(u_arg * cos_phi - sin_phi);
-        const float y_denom = fabs(u_arg * sin_phi + cos_phi);
-        const float l_phi = T_f.x * sqrtf(1.0f + u_arg * u_arg) / max(x_denom, y_denom);
-        /*
-        float l_phi;
-        if (x * cos_phi + y * sin_phi >= 0.0f)
-        {
-            const float R_minus_x_dot_theta_inv_conj = 1.0f / (R + x * cos_phi + y * sin_phi);
-            const float u_arg_conj = -x_dot_theta_perp * R_minus_x_dot_theta_inv_conj;
-            const float rt = sqrt(1.0f + u_arg_conj * u_arg_conj) * rsqrtf(1.0f + u_arg * u_arg);
-            //const float l_phi_conj = T_f.x * sqrt(1.0f + u_arg_conj * u_arg_conj) / max(fabs(u_arg_conj * cos_phi - sin_phi), fabs(u_arg_conj * sin_phi + cos_phi));
-            l_phi = T_f.x * sqrt(1.0f + u_arg * u_arg) / max(x_denom, y_denom);
-            //l_phi = 2.0f * T_f.x * sqrtf(1.0f + u_arg * u_arg) / max(x_denom, y_denom) * R_minus_x_dot_theta * R_minus_x_dot_theta / (R * R);
-            //l_phi = 2.0f * T_f.x / max(x_denom, y_denom) * R_minus_x_dot_theta* R_minus_x_dot_theta / (R*R) * ((R + x * cos_phi + y * sin_phi)/R);
-            //const float temp = R_minus_x_dot_theta * R_minus_x_dot_theta / (R * R);
-            //l_phi*temp*(1/temp + 1/temp_conv) = l_phi*(1+temp/temp_conj)
-            l_phi = l_phi * (1.0f + rt*R_minus_x_dot_theta * R_minus_x_dot_theta / ((R + x * cos_phi + y * sin_phi) * (R + x * cos_phi + y * sin_phi)));
-            //l_phi += l_phi_conj * R * R / ((R + x * cos_phi + y * sin_phi) * (R + x * cos_phi + y * sin_phi));
-        }
-        else
-            continue;
-        //*/
+            const float u_arg = x_dot_theta_perp * R_minus_x_dot_theta_inv;
+            const float x_denom = fabs(u_arg * cos_phi - sin_phi);
+            const float y_denom = fabs(u_arg * sin_phi + cos_phi);
+            const float l_phi = T_f.x * sqrtf(1.0f + u_arg * u_arg) / max(x_denom, y_denom);
 
-        float A_x;
-        if (x_denom > y_denom)
-            A_x = fabs(sin_phi) * T_x_over_2;
-        else
-        {
-            A_x = fabs(cos_phi) * T_x_over_2;
-            B_x = B_y;
-        }
-        const float tau_low = ((x_dot_theta_perp - A_x) / (R_minus_x_dot_theta - B_x) - startVals_g.z) * Tu_inv;
-        const float tau_high = ((x_dot_theta_perp + A_x) / (R_minus_x_dot_theta + B_x) - startVals_g.z) * Tu_inv;
+            float A_x;
+            if (x_denom > y_denom)
+                A_x = fabs(sin_phi) * T_x_over_2;
+            else
+            {
+                A_x = fabs(cos_phi) * T_x_over_2;
+                B_x = B_y;
+            }
+            const float tau_low = ((x_dot_theta_perp - A_x) / (R_minus_x_dot_theta - B_x) - startVals_g.z) * Tu_inv;
+            const float tau_high = ((x_dot_theta_perp + A_x) / (R_minus_x_dot_theta + B_x) - startVals_g.z) * Tu_inv;
 
-        float ind_first = floor(tau_low + 0.5f); // first detector index
+            float ind_first = floor(tau_low + 0.5f); // first detector index
 
-        const float horizontalWeights_0_A = (min(tau_high, ind_first + 1.5f) - tau_low) * l_phi;
-        const float horizontalWeights_1_A = l_phi * (tau_high - tau_low) - horizontalWeights_0_A;
+            const float horizontalWeights_0_A = (min(tau_high, ind_first + 1.5f) - tau_low) * l_phi;
+            const float horizontalWeights_1_A = l_phi * (tau_high - tau_low) - horizontalWeights_0_A;
 
-        const float ind_last = ind_first + 2.5f;
-        ind_first = ind_first + 0.5f + max(0.0f, min(tau_high - ind_first - 0.5f, 1.0f)) * l_phi / horizontalWeights_0_A;
+            const float ind_last = ind_first + 2.5f;
+            ind_first = ind_first + 0.5f + max(0.0f, min(tau_high - ind_first - 0.5f, 1.0f)) * l_phi / horizontalWeights_0_A;
 
-        const float v_phi_x_step_A = Tz_over_Tv * R_minus_x_dot_theta_inv;
-        const float v_phi_x_first = (v_phi_x_start_num - z_source_over_T_v) * R_minus_x_dot_theta_inv - v0_over_Tv;
-        for (int k_offset = 0; k_offset < numZ; k_offset++)
+            const float v_phi_x_step_A = Tz_over_Tv * R_minus_x_dot_theta_inv;
+            const float v_phi_x_first = (v_phi_x_start_num - z_source_over_T_v) * R_minus_x_dot_theta_inv - v0_over_Tv;
+            for (int k_offset = 0; k_offset < numZ; k_offset++)
+            {
+                //const float v_phi_x = (v_phi_x_start_num + k_offset * Tz_over_Tv - z_source_over_T_v) * R_minus_x_dot_theta_inv - v0_over_Tv;
+                const float v_phi_x = v_phi_x_first + k_offset * v_phi_x_step_A;
+
+                const float v_arg = (v_phi_x + v0_over_Tv) * T_g.y;
+                const float v_weight = sqrtf(1.0f + v_arg * v_arg);
+
+                const float row_high_A = floor(v_phi_x - 0.5f * v_phi_x_step_A + 0.5f) + 0.5f;
+                const float z_high_A = v_phi_x + 0.5f * v_phi_x_step_A - row_high_A;
+
+                const float v_weight_one = min(v_phi_x_step_A, v_phi_x_step_A - z_high_A);
+                const float v_weight_two = max(0.0f, min(z_high_A, 1.0f));
+                const float v_oneAndTwo = v_weight_two / (v_weight_one + v_weight_two);
+                const float row_high_plus_two_A = row_high_A + 2.0f;
+
+                if (z_high_A > 1.0f)
+                {
+                    vals[k_offset] += ((tex3D<float>(g, ind_first, row_high_A + v_oneAndTwo, L) * horizontalWeights_0_A
+                        + tex3D<float>(g, ind_last, row_high_A + v_oneAndTwo, L) * horizontalWeights_1_A) * (v_weight_one + v_weight_two)
+                        + (tex3D<float>(g, ind_first, row_high_plus_two_A, L) * horizontalWeights_0_A
+                            + tex3D<float>(g, ind_last, row_high_plus_two_A, L) * horizontalWeights_1_A) * (z_high_A - 1.0f)) * v_weight;
+                }
+                else
+                {
+                    vals[k_offset] += ((tex3D<float>(g, ind_first, row_high_A + v_oneAndTwo, L) * horizontalWeights_0_A
+                        + tex3D<float>(g, ind_last, row_high_A + v_oneAndTwo, L) * horizontalWeights_1_A) * (v_weight_one + v_weight_two)) * v_weight;
+                }
+            }
+        }
+    }
+    else
+    {
+        const float voxz_half = 0.5f * T_f.z;
+        const float cos_tilt = cos(tiltAngle);
+        const float sin_tilt = sin(tiltAngle);
+        for (int l = 0; l < N_g.x; l++)
         {
-            //const float v_phi_x = (v_phi_x_start_num + k_offset * Tz_over_Tv - z_source_over_T_v) * R_minus_x_dot_theta_inv - v0_over_Tv;
-            const float v_phi_x = v_phi_x_first + k_offset * v_phi_x_step_A;
-
-            const float v_arg = (v_phi_x+v0_over_Tv)*T_g.y;
-            const float v_weight = sqrtf(1.0f + v_arg*v_arg);
+            const float L = (float)l + 0.5f;
+            const float z_source = phis[l] * T_g.w + startVals_g.w;
+            const float sin_phi = sin(phis[l]);
+            const float cos_phi = cos(phis[l]);
 
-            const float row_high_A = floor(v_phi_x - 0.5f * v_phi_x_step_A + 0.5f) + 0.5f;
-            const float z_high_A = v_phi_x + 0.5f * v_phi_x_step_A - row_high_A;
+            float B_x = (sin_phi < 0.0f) ? -cos_phi * T_x_over_2 : cos_phi * T_x_over_2;
+            const float B_y = (cos_phi < 0.0f) ? sin_phi * T_x_over_2 : -sin_phi * T_x_over_2;
 
-            const float v_weight_one = min(v_phi_x_step_A, v_phi_x_step_A - z_high_A);
-            const float v_weight_two = max(0.0f, min(z_high_A, 1.0f));
-            const float v_oneAndTwo = v_weight_two / (v_weight_one + v_weight_two);
-            const float row_high_plus_two_A = row_high_A + 2.0f;
+            const float x_dot_theta_perp = cos_phi * y - sin_phi * x + tau;
+            const float R_minus_x_dot_theta = R - x * cos_phi - y * sin_phi;
+            const float R_minus_x_dot_theta_inv = 1.0f / R_minus_x_dot_theta;
 
-            if (z_high_A > 1.0f)
-            {
-                vals[k_offset] += ((tex3D<float>(g, ind_first, row_high_A + v_oneAndTwo, L) * horizontalWeights_0_A
-                    + tex3D<float>(g, ind_last, row_high_A + v_oneAndTwo, L) * horizontalWeights_1_A) * (v_weight_one + v_weight_two)
-                    + (tex3D<float>(g, ind_first, row_high_plus_two_A, L) * horizontalWeights_0_A
-                        + tex3D<float>(g, ind_last, row_high_plus_two_A, L) * horizontalWeights_1_A) * (z_high_A - 1.0f)) * v_weight;
-            }
-            else
+            for (int k_offset = 0; k_offset < numZ; k_offset++)
             {
-                vals[k_offset] += ((tex3D<float>(g, ind_first, row_high_A + v_oneAndTwo, L) * horizontalWeights_0_A
-                    + tex3D<float>(g, ind_last, row_high_A + v_oneAndTwo, L) * horizontalWeights_1_A) * (v_weight_one + v_weight_two)) * v_weight;
+                const float z = (k+k_offset) * T_f.z + startVals_f.z;
+
+                const float u_num = x_dot_theta_perp * cos_tilt + z * sin_tilt;
+                const float v_num = z * cos_tilt - x_dot_theta_perp * sin_tilt - z_source;
+
+                const float u_arg = u_num * R_minus_x_dot_theta_inv;
+                const float v_arg = v_num * R_minus_x_dot_theta_inv;
+                const float x_denom = fabs(u_arg * cos_phi - sin_phi);
+                const float y_denom = fabs(u_arg * sin_phi + cos_phi);
+                const float l_phi = T_f.x * sqrtf((1.0f + u_arg * u_arg) * (1.0f + v_arg * v_arg)) / max(x_denom, y_denom);
+
+                // Calculate footprint along columns
+                float A_x;
+                if (x_denom > y_denom)
+                    A_x = fabs(sin_phi) * T_x_over_2;
+                else
+                {
+                    A_x = fabs(cos_phi) * T_x_over_2;
+                    B_x = B_y;
+                }
+                const float tau_low = ((u_num - A_x) / (R_minus_x_dot_theta - B_x) - startVals_g.z) * Tu_inv;
+                const float tau_high = ((u_num + A_x) / (R_minus_x_dot_theta + B_x) - startVals_g.z) * Tu_inv;
+
+                float u_ind_first = floor(tau_low + 0.5f); // first detector index
+
+                const float uWeights_0 = (min(tau_high, u_ind_first + 1.5f) - tau_low) * l_phi;
+                const float uWeights_1 = l_phi * (tau_high - tau_low) - uWeights_0;
+
+                const float u_ind_last = u_ind_first + 2.5f;
+                u_ind_first = u_ind_first + 0.5f + max(0.0f, min(tau_high - u_ind_first - 0.5f, 1.0f)) * l_phi / uWeights_0;
+
+                // Calculate footprint along rows
+                const float v_A = (v_arg - voxz_half * R_minus_x_dot_theta_inv - startVals_g.y) * Tv_inv;
+                const float v_B = (v_arg + voxz_half * R_minus_x_dot_theta_inv - startVals_g.y) * Tv_inv;
+
+                float v_ind_first = floor(v_A + 0.5f); // first detector index
+
+                const float vWeights_0 = (min(v_B, v_ind_first + 1.5f) - v_A);
+                const float vWeights_1 = (v_B - v_A) - vWeights_0;
+
+                const float v_ind_last = v_ind_first + 2.5f;
+                v_ind_first = v_ind_first + 0.5f + max(0.0f, min(v_B - v_ind_first - 0.5f, 1.0f)) / vWeights_0;
+
+                if (vWeights_1 > 0.0f)
+                {
+                    vals[k_offset] += (tex3D<float>(g, u_ind_first, v_ind_first, L) * uWeights_0
+                        + tex3D<float>(g, u_ind_last, v_ind_first, L) * uWeights_1) * vWeights_0
+                        + (tex3D<float>(g, u_ind_first, v_ind_last, L) * uWeights_0
+                            + tex3D<float>(g, u_ind_last, v_ind_last, L) * uWeights_1) * vWeights_1;
+                }
+                else
+                {
+                    vals[k_offset] += (tex3D<float>(g, u_ind_first, v_ind_first, L) * uWeights_0
+                        + tex3D<float>(g, u_ind_last, v_ind_first, L) * uWeights_1) * vWeights_0;
+                }
             }
         }
     }
@@ -1907,7 +1970,7 @@ __global__ void coneBeamBackprojectorKernel_SF(cudaTextureObject_t g, const int4
     }
 }
 
-__global__ void coneBeamProjectorKernel_SF(float* g, int4 N_g, float4 T_g, float4 startVals_g, cudaTextureObject_t f, int4 N_f, float4 T_f, float4 startVals_f, float R, float D, float tau, float rFOVsq, float* phis, int volumeDimensionOrder, bool accum)
+__global__ void coneBeamProjectorKernel_SF(float* g, const int4 N_g, const float4 T_g, const float4 startVals_g, cudaTextureObject_t f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float tiltAngle, const float rFOVsq, const float* phis, const int volumeDimensionOrder, bool accum)
 {
     const int l = threadIdx.x + blockIdx.x * blockDim.x;
     const int m = threadIdx.y + blockIdx.y * blockDim.y;
@@ -1915,16 +1978,37 @@ __global__ void coneBeamProjectorKernel_SF(float* g, int4 N_g, float4 T_g, float
     if (l >= N_g.x || m >= N_g.y || n >= N_g.z)
         return;
 
+    const float cos_tilt = cos(tiltAngle);
+    const float sin_tilt = sin(tiltAngle);
+
+    /*
     const float v = m * T_g.y + startVals_g.y;
     const float u = n * T_g.z + startVals_g.z;
-    
-    const float sin_phi = sin(phis[l]);
-    const float cos_phi = cos(phis[l]);
 
     const float n_minus_half = (float)n - 0.5f + startVals_g.z / T_g.z;
     const float n_plus_half = (float)n + 0.5f + startVals_g.z / T_g.z;
     const float m_minus_half = (float)m - 0.5f;
     const float m_plus_half = (float)m + 0.5f;
+    //*/
+
+    //*
+    const float v_no_tilt = m * T_g.y + startVals_g.y;
+    const float u_no_tilt = n * T_g.z + startVals_g.z;
+    
+    const float u = cos_tilt * u_no_tilt - sin_tilt * v_no_tilt;
+    const float v = sin_tilt * u_no_tilt + cos_tilt * v_no_tilt;
+
+    //const float n_tilt = (u - startVals_g.z) / T_g.z;
+    //const float m_tilt = (v - startVals_g.y) / T_g.y;
+
+    const float n_minus_half = u/T_g.z - 0.5f;
+    const float n_plus_half = u/T_g.z + 0.5f;
+    const float m_minus_half = (v - startVals_g.y) / T_g.y - 0.5f;
+    const float m_plus_half = (v - startVals_g.y) / T_g.y + 0.5f;
+    //*/
+
+    const float sin_phi = sin(phis[l]);
+    const float cos_phi = cos(phis[l]);
 
     const float v0_over_Tv = startVals_g.y / T_g.y;
 
@@ -2181,7 +2265,7 @@ bool project_SF(float *&g, float *f, parameters* params, bool data_on_cpu, bool
     if (params->geometry == parameters::CONE)
     {
         if (params->detectorType == parameters::FLAT)
-            coneBeamProjectorKernel_SF <<< dimGrid, dimBlock >>> (dev_g, N_g, T_g, startVal_g, d_data_txt, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
+            coneBeamProjectorKernel_SF <<< dimGrid, dimBlock >>> (dev_g, N_g, T_g, startVal_g, d_data_txt, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, params->tiltAngle*PI/180.0, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
         else
         {
             curvedConeBeamProjectorKernel_SF <<< dimGrid, dimBlock >>> (dev_g, N_g, T_g, startVal_g, d_data_txt, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
@@ -2376,7 +2460,7 @@ bool backproject_SF(float *g, float *&f, parameters* params, bool data_on_cpu, b
         else
         {
             if (params->detectorType == parameters::FLAT)
-                coneBeamBackprojectorKernel_SF <<< dimGrid_slab, dimBlock_slab >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
+                coneBeamBackprojectorKernel_SF <<< dimGrid_slab, dimBlock_slab >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, params->tiltAngle*PI/180.0, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
             else
                 curvedConeBeamBackprojectorKernel_SF <<< dimGrid_slab, dimBlock_slab >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
         }
diff --git a/src/projectors_extendedSF.cu b/src/projectors_extendedSF.cu
index ffd2da4..b048e15 100644
--- a/src/projectors_extendedSF.cu
+++ b/src/projectors_extendedSF.cu
@@ -226,9 +226,6 @@ __global__ void coneParallelWeightedHelicalBackprojectorKernel_eSF(cudaTextureOb
     }
 
     const float Tv_inv = 1.0f / T_g.y;
-    const float Tu_inv = 1.0f / T_g.z;
-
-    const float vox_half = 0.5f * T_f.x;
     const float voxz_half = 0.5f * T_f.z;
 
     const float asin_tau_over_R = asin(tau / R);
@@ -397,9 +394,6 @@ __global__ void coneParallelBackprojectorKernel_eSF(cudaTextureObject_t g, int4
     }
 
     const float Tv_inv = 1.0f / T_g.y;
-    const float Tu_inv = 1.0f / T_g.z;
-
-    const float vox_half = 0.5f * T_f.x;
     const float voxz_half = 0.5f * T_f.z;
 
     const float asin_tau_over_R = asin(tau / R);
@@ -1289,7 +1283,7 @@ __global__ void curvedConeBeamBackprojectorKernel_eSF(cudaTextureObject_t g, int
         f[ind] = val;
 }
 
-__global__ void coneBeamBackprojectorKernel_eSF(cudaTextureObject_t g, int4 N_g, float4 T_g, float4 startVals_g, float* f, int4 N_f, float4 T_f, float4 startVals_f, float R, float D, float tau, float rFOVsq, float* phis, int volumeDimensionOrder, bool accum)
+__global__ void coneBeamBackprojectorKernel_eSF(cudaTextureObject_t g, const int4 N_g, const float4 T_g, const float4 startVals_g, float* f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float tiltAngle, const float rFOVsq, const float* phis, int volumeDimensionOrder, bool accum)
 {
     const int i = threadIdx.x + blockIdx.x * blockDim.x;
     const int j = threadIdx.y + blockIdx.y * blockDim.y;
@@ -1327,6 +1321,9 @@ __global__ void coneBeamBackprojectorKernel_eSF(cudaTextureObject_t g, int4 N_g,
     const float vox_half = 0.5f * T_f.x;
     const float voxz_half = 0.5f * T_f.z;
 
+    const float cos_tilt = cos(tiltAngle);
+    const float sin_tilt = sin(tiltAngle);
+
     float val = 0.0;
     for (int l = 0; l < N_g.x; l++)
     {
@@ -1338,10 +1335,11 @@ __global__ void coneBeamBackprojectorKernel_eSF(cudaTextureObject_t g, int4 N_g,
         const float R_minus_x_dot_theta = R - x * cos_phi - y * sin_phi;
         const float R_minus_x_dot_theta_inv = 1.0f / R_minus_x_dot_theta;
 
-        const float u_c = x_dot_theta_perp * R_minus_x_dot_theta_inv;
+        const float u_c_num = x_dot_theta_perp * cos_tilt + z * sin_tilt;
+        const float u_c = u_c_num * R_minus_x_dot_theta_inv;
         const float x_denom = fabs(u_c * cos_phi - sin_phi);
         const float y_denom = fabs(u_c * sin_phi + cos_phi);
-        const float v_c = (z - z_source) * R_minus_x_dot_theta_inv;
+        const float v_c = ((z*cos_tilt - x_dot_theta_perp*sin_tilt) - z_source) * R_minus_x_dot_theta_inv;
         const float l_phi = T_f.x * sqrtf(1.0f + u_c * u_c) / max(x_denom, y_denom) * sqrtf(1.0f + v_c * v_c);
 
         //const int iv_c = (v_c - startVals_g.y) / T_g.y;
@@ -1362,8 +1360,8 @@ __global__ void coneBeamBackprojectorKernel_eSF(cudaTextureObject_t g, int4 N_g,
         {
             //const float z_A = ((v - 0.5f * T_g.y) * rayParam_inv - startVals_f.z) * T_z_inv;
             //const float z_B = ((v + 0.5f * T_g.y) * rayParam_inv - startVals_f.z) * T_z_inv;
-            const float u_A = ((x_dot_theta_perp + sin_phi * vox_half) / (R_minus_x_dot_theta + vox_half * cos_phi) - startVals_g.z) * Tu_inv;
-            const float u_B = ((x_dot_theta_perp - sin_phi * vox_half) / (R_minus_x_dot_theta - vox_half * cos_phi) - startVals_g.z) * Tu_inv;
+            const float u_A = ((u_c_num + sin_phi * vox_half) / (R_minus_x_dot_theta + vox_half * cos_phi) - startVals_g.z) * Tu_inv;
+            const float u_B = ((u_c_num - sin_phi * vox_half) / (R_minus_x_dot_theta - vox_half * cos_phi) - startVals_g.z) * Tu_inv;
 
             const float u_min = min(u_A, u_B);
             const float u_max = max(u_A, u_B);
@@ -1400,8 +1398,8 @@ __global__ void coneBeamBackprojectorKernel_eSF(cudaTextureObject_t g, int4 N_g,
         else
         {
             // use y_lo, y_hi
-            const float u_A = ((x_dot_theta_perp - cos_phi * vox_half) / (R_minus_x_dot_theta + vox_half * sin_phi) - startVals_g.z) * Tu_inv;
-            const float u_B = ((x_dot_theta_perp + cos_phi * vox_half) / (R_minus_x_dot_theta - vox_half * sin_phi) - startVals_g.z) * Tu_inv;
+            const float u_A = ((u_c_num - cos_phi * vox_half) / (R_minus_x_dot_theta + vox_half * sin_phi) - startVals_g.z) * Tu_inv;
+            const float u_B = ((u_c_num + cos_phi * vox_half) / (R_minus_x_dot_theta - vox_half * sin_phi) - startVals_g.z) * Tu_inv;
 
             const float u_min = min(u_A, u_B);
             const float u_max = max(u_A, u_B);
@@ -1661,7 +1659,7 @@ __global__ void fanBeamProjectorKernel_eSF(float* g, int4 N_g, float4 T_g, float
     }
 }
 
-__global__ void coneBeamProjectorKernel_eSF(float* g, int4 N_g, float4 T_g, float4 startVals_g, cudaTextureObject_t f, int4 N_f, float4 T_f, float4 startVals_f, float R, float D, float tau, float rFOVsq, float* phis, int volumeDimensionOrder, bool accum)
+__global__ void coneBeamProjectorKernel_eSF(float* g, const int4 N_g, const float4 T_g, const float4 startVals_g, cudaTextureObject_t f, const int4 N_f, const float4 T_f, const float4 startVals_f, const float R, const float D, const float tau, const float tiltAngle, const float rFOVsq, const float* phis, const int volumeDimensionOrder, bool accum)
 {
     const int l = threadIdx.x + blockIdx.x * blockDim.x;
     const int m = threadIdx.y + blockIdx.y * blockDim.y;
@@ -1669,25 +1667,45 @@ __global__ void coneBeamProjectorKernel_eSF(float* g, int4 N_g, float4 T_g, floa
     if (l >= N_g.x || m >= N_g.y || n >= N_g.z)
         return;
 
+    const float T_u_inv = 1.0f / T_g.z;
+    const float T_v_inv = 1.0f / T_g.y;
+
+    const float cos_tilt = cos(tiltAngle);
+    const float sin_tilt = sin(tiltAngle);
+
+    /*
     const float v = m * T_g.y + startVals_g.y;
     const float u = n * T_g.z + startVals_g.z;
 
-    const float z_source = phis[l] * T_g.w + startVals_g.w;
-
     const float u_lo = n - 0.5f;
     const float u_hi = n + 0.5f;
 
     const float v_lo = m - 0.5f;
     const float v_hi = m + 0.5f;
+    //*/
+
+    //*
+    const float v_no_tilt = m * T_g.y + startVals_g.y;
+    const float u_no_tilt = n * T_g.z + startVals_g.z;
+
+    const float u = cos_tilt * u_no_tilt - sin_tilt * v_no_tilt;
+    const float v = sin_tilt * u_no_tilt + cos_tilt * v_no_tilt;
+
+    //const float n_tilt = (u - startVals_g.z) / T_g.z;
+    //const float m_tilt = (v - startVals_g.y) / T_g.y;
+
+    const float u_lo = (u - startVals_g.z) * T_u_inv - 0.5f;
+    const float u_hi = (u - startVals_g.z) * T_u_inv + 0.5f;
+    const float v_lo = (v - startVals_g.y) * T_v_inv - 0.5f;
+    const float v_hi = (v - startVals_g.y) * T_v_inv + 0.5f;
+    //*/
 
     const float T_x_inv = 1.0f / T_f.x;
     const float T_z_inv = 1.0f / T_f.z;
 
-    const float T_u_inv = 1.0f / T_g.z;
-    const float T_v_inv = 1.0f / T_g.y;
-
     const float vox_half = 0.5f * T_f.x;
 
+    const float z_source = phis[l] * T_g.w + startVals_g.w;
     const float sin_phi = sin(phis[l]);
     const float cos_phi = cos(phis[l]);
 
@@ -2054,7 +2072,7 @@ bool project_eSF(float*& g, float* f, parameters* params, bool data_on_cpu, bool
     if (params->geometry == parameters::CONE)
     {
         if (params->detectorType == parameters::FLAT)
-            coneBeamProjectorKernel_eSF <<< dimGrid, dimBlock >>> (dev_g, N_g, T_g, startVal_g, d_data_txt, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
+            coneBeamProjectorKernel_eSF <<< dimGrid, dimBlock >>> (dev_g, N_g, T_g, startVal_g, d_data_txt, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, params->tiltAngle*PI/180.0, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
         else
             curvedConeBeamProjectorKernel_eSF <<< dimGrid, dimBlock >>> (dev_g, N_g, T_g, startVal_g, d_data_txt, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
     }
@@ -2228,7 +2246,7 @@ bool backproject_eSF(float* g, float*& f, parameters* params, bool data_on_cpu,
         else
         {
             if (params->detectorType == parameters::FLAT)
-                coneBeamBackprojectorKernel_eSF <<< dimGrid, dimBlock >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
+                coneBeamBackprojectorKernel_eSF <<< dimGrid, dimBlock >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, params->tiltAngle*PI/180.0, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
             else
                 curvedConeBeamBackprojectorKernel_eSF <<< dimGrid, dimBlock >>> (d_data_txt, N_g, T_g, startVal_g, dev_f, N_f, T_f, startVal_f, params->sod, params->sdd, params->tau, rFOVsq, dev_phis, params->volumeDimensionOrder, accum);
         }
diff --git a/src/ramp_filter.cu b/src/ramp_filter.cu
index 58c6dad..4d379e0 100644
--- a/src/ramp_filter.cu
+++ b/src/ramp_filter.cu
@@ -46,12 +46,12 @@ __global__ void zeroPadForOffsetScanKernel(float* g, float* g_pad, const int3 N,
         if (k < N_add)
             g_pad[ind_pad + k + N_add] = 0.0f;
         else
-            g_pad[ind_pad + uint64(k)] = g[ind + uint64(k - N_add)] * 2.0f * offsetScanWeights[i * numCols + k-N_add];
+            g_pad[ind_pad + uint64(k)] = g[ind + uint64(k - N_add)] * 2.0f * offsetScanWeights[j * numCols + k-N_add];
     }
     else
     {
         if (k < numCols)
-            g_pad[ind_pad + uint64(k)] = g[ind + uint64(k)] * 2.0f * offsetScanWeights[i * numCols + k];
+            g_pad[ind_pad + uint64(k)] = g[ind + uint64(k)] * 2.0f * offsetScanWeights[j * numCols + k];
         else
             g_pad[ind_pad + uint64(k)] = 0.0f;
     }
@@ -2117,7 +2117,7 @@ float* zeroPadForOffsetScan_GPU(float* g, parameters* params, float* g_out, bool
                 return NULL;
             }
         }
-        float* dev_offsetScanWeights = copy1DdataToGPU(offsetScanWeights, params->numAngles * params->numCols, params->whichGPU);
+        float* dev_offsetScanWeights = copy1DdataToGPU(offsetScanWeights, params->numRows * params->numCols, params->whichGPU);
         free(offsetScanWeights);
 
         int3 N_g = make_int3(params->numAngles, params->numRows, params->numCols + N_add);
diff --git a/src/ramp_filter_cpu.cpp b/src/ramp_filter_cpu.cpp
index 0702058..29a2cc5 100644
--- a/src/ramp_filter_cpu.cpp
+++ b/src/ramp_filter_cpu.cpp
@@ -726,7 +726,7 @@ float* zeroPadForOffsetScan(float* g, parameters* params, float* g_out)
                     float* aLine = &aProj[j*params->numCols];
                     float* aLine_pad = &aProj_pad[j * (params->numCols+N_add)];
                     for (int k = 0; k < params->numCols; k++)
-                        aLine_pad[k + N_add] = aLine[k] * 2.0 * offsetScanWeights[i * params->numCols + k];
+                        aLine_pad[k + N_add] = aLine[k] * 2.0 * offsetScanWeights[j * params->numCols + k];
                     if (g_out != NULL)
                     {
                         for (int k = 0; k < N_add; k++)
@@ -750,7 +750,7 @@ float* zeroPadForOffsetScan(float* g, parameters* params, float* g_out)
                     float* aLine = &aProj[j * params->numCols];
                     float* aLine_pad = &aProj_pad[j * (params->numCols + N_add)];
                     for (int k = 0; k < params->numCols; k++)
-                        aLine_pad[k] = aLine[k] *2.0 * offsetScanWeights[i * params->numCols + k];
+                        aLine_pad[k] = aLine[k] *2.0 * offsetScanWeights[j * params->numCols + k];
                     if (g_out != NULL)
                     {
                         for (int k = 0; k < N_add; k++)
diff --git a/src/ray_weighting_cpu.cpp b/src/ray_weighting_cpu.cpp
index f580deb..101771d 100644
--- a/src/ray_weighting_cpu.cpp
+++ b/src/ray_weighting_cpu.cpp
@@ -219,34 +219,72 @@ float* setOffsetScanWeights(parameters* params)
 			float delta = min(abs_minVal, abs_maxVal);
 			float s_arg;
 
-			float* retVal = (float*)malloc(sizeof(float) * params->numAngles * params->numCols);
-			for (int j = 0; j < params->numCols; j++)
+			float* retVal = (float*)malloc(sizeof(float) * params->numRows * params->numCols);
+			if (params->geometry == parameters::CONE && params->detectorType == parameters::FLAT && params->tiltAngle != 0.0)
 			{
-				s_arg = params->u(j);
-				if (params->detectorType == parameters::FLAT)
-					s_arg = (params->sod * s_arg - params->tau) / sqrt(1.0 + s_arg * s_arg);
-				else
-					s_arg = params->sod * sin(s_arg) - params->tau * cos(s_arg);
-
-				float theWeight = 1.0;
-				if (fabs(s_arg) <= delta)
+				float cos_tilt = cos(params->tiltAngle * PI / 180.0);
+				float sin_tilt = sin(params->tiltAngle * PI / 180.0);
+				for (int i = 0; i < params->numRows; i++)
 				{
-					theWeight = cos(PI / 4.0 * (s_arg - delta) / delta);
-					theWeight = theWeight * theWeight;
+					float v = params->v(i);
+					for (int j = 0; j < params->numCols; j++)
+					{
+						s_arg = params->u(j);
+						//s_arg = cos_tilt * s_arg - sin_tilt * v;
+						s_arg = (params->sod * s_arg - params->tau) / sqrt(1.0 + s_arg * s_arg);
+						s_arg = cos_tilt * s_arg - sin_tilt * v;
+
+						float theWeight = 1.0;
+						if (fabs(s_arg) <= delta)
+						{
+							theWeight = cos(PI / 4.0 * (s_arg - delta) / delta);
+							theWeight = theWeight * theWeight;
+						}
+						else if (s_arg < -delta)
+							theWeight = 0.0;
+						else
+							theWeight = 1.0;
+
+						if (abs_maxVal < abs_minVal)
+							theWeight = 1.0 - theWeight;
+
+						if (theWeight < 1e-12)
+							theWeight = float(1e-12);
+
+						retVal[i * params->numCols + j] = theWeight;
+					}
 				}
-				else if (s_arg < -delta)
-					theWeight = 0.0;
-				else
-					theWeight = 1.0;
-
-				if (abs_maxVal < abs_minVal)
-					theWeight = 1.0 - theWeight;
-
-				if (theWeight < 1e-12)
-					theWeight = float(1e-12);
+			}
+			else
+			{
+				for (int j = 0; j < params->numCols; j++)
+				{
+					s_arg = params->u(j);
+					if (params->detectorType == parameters::FLAT)
+						s_arg = (params->sod * s_arg - params->tau) / sqrt(1.0 + s_arg * s_arg);
+					else
+						s_arg = params->sod * sin(s_arg) - params->tau * cos(s_arg);
 
-				for (int i = 0; i < params->numAngles; i++)
-					retVal[i * params->numCols + j] = theWeight;
+					float theWeight = 1.0;
+					if (fabs(s_arg) <= delta)
+					{
+						theWeight = cos(PI / 4.0 * (s_arg - delta) / delta);
+						theWeight = theWeight * theWeight;
+					}
+					else if (s_arg < -delta)
+						theWeight = 0.0;
+					else
+						theWeight = 1.0;
+
+					if (abs_maxVal < abs_minVal)
+						theWeight = 1.0 - theWeight;
+
+					if (theWeight < 1e-12)
+						theWeight = float(1e-12);
+
+					for (int i = 0; i < params->numRows; i++)
+						retVal[i * params->numCols + j] = theWeight;
+				}
 			}
 			params->normalizeConeAndFanCoordinateFunctions = normalizeConeAndFanCoordinateFunctions_save;
 			return retVal;
diff --git a/src/tomographic_models.cpp b/src/tomographic_models.cpp
index 984ae2c..b3be7ba 100644
--- a/src/tomographic_models.cpp
+++ b/src/tomographic_models.cpp
@@ -1627,7 +1627,7 @@ float tomographicModels::get_FBPscalar()
 	return FBPscalar(&params);
 }
 
-bool tomographicModels::set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau, float helicalPitch)
+bool tomographicModels::set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau, float tiltAngle, float helicalPitch)
 {
 	params.geometry = parameters::CONE;
 	params.detectorType = parameters::FLAT;
@@ -1643,6 +1643,7 @@ bool tomographicModels::set_conebeam(int numAngles, int numRows, int numCols, fl
 	params.tau = tau;
 	params.set_angles(phis, numAngles);
 	params.set_helicalPitch(helicalPitch);
+	params.set_tiltAngle(tiltAngle);
 	if (params.geometryDefined())
 	{
 		params.set_offsetScan(params.offsetScan);
@@ -1656,6 +1657,7 @@ bool tomographicModels::set_coneparallel(int numAngles, int numRows, int numCols
 {
 	params.geometry = parameters::CONE_PARALLEL;
 	params.detectorType = parameters::FLAT;
+	params.tiltAngle = 0.0;
 	params.sod = sod;
 	params.sdd = sdd;
 	params.pixelWidth = pixelWidth;
@@ -1670,6 +1672,7 @@ bool tomographicModels::set_coneparallel(int numAngles, int numRows, int numCols
 	params.set_helicalPitch(helicalPitch);
 	if (params.geometryDefined())
 	{
+		params.set_tiltAngle(0.0);
 		params.set_offsetScan(params.offsetScan);
 		return true;
 	}
@@ -1680,6 +1683,7 @@ bool tomographicModels::set_coneparallel(int numAngles, int numRows, int numCols
 bool tomographicModels::set_fanbeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau)
 {
 	params.detectorType = parameters::FLAT;
+	params.tiltAngle = 0.0;
 
 	params.geometry = parameters::FAN;
 	params.sod = sod;
@@ -1696,6 +1700,7 @@ bool tomographicModels::set_fanbeam(int numAngles, int numRows, int numCols, flo
 	if (params.geometryDefined())
 	{
 		params.set_helicalPitch(0.0);
+		params.set_tiltAngle(0.0);
 		params.set_offsetScan(params.offsetScan);
 		return true;
 	}
@@ -1706,6 +1711,7 @@ bool tomographicModels::set_fanbeam(int numAngles, int numRows, int numCols, flo
 bool tomographicModels::set_parallelbeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis)
 {
 	params.detectorType = parameters::FLAT;
+	params.tiltAngle = 0.0;
 
 	params.geometry = parameters::PARALLEL;
 	params.pixelWidth = pixelWidth;
@@ -1719,6 +1725,7 @@ bool tomographicModels::set_parallelbeam(int numAngles, int numRows, int numCols
 	if (params.geometryDefined())
 	{
 		params.set_helicalPitch(0.0);
+		params.set_tiltAngle(0.0);
 		params.set_offsetScan(params.offsetScan);
 		return true;
 	}
@@ -1730,6 +1737,7 @@ bool tomographicModels::set_modularbeam(int numAngles, int numRows, int numCols,
 {
 	params.detectorType = parameters::FLAT;
 	params.tau = 0.0;
+	params.tiltAngle = 0.0;
 	params.helicalPitch = 0.0;
 
 	params.geometry = parameters::MODULAR;
@@ -1744,6 +1752,7 @@ bool tomographicModels::set_modularbeam(int numAngles, int numRows, int numCols,
 	if (params.geometryDefined())
 	{
 		params.set_helicalPitch(0.0);
+		params.set_tiltAngle(0.0);
 		params.set_offsetScan(params.offsetScan);
 		return true;
 	}
@@ -1767,6 +1776,7 @@ bool tomographicModels::set_curvedDetector()
 	else
 	{
 		params.detectorType = parameters::CURVED;
+		params.tiltAngle = 0.0;
 		return true;
 	}
 }
@@ -2202,6 +2212,16 @@ bool tomographicModels::set_normalizedHelicalPitch(float h_normalized)
 	return params.set_normalizedHelicalPitch(h_normalized);
 }
 
+bool tomographicModels::set_tiltAngle(float tiltAngle)
+{
+	return params.set_tiltAngle(tiltAngle);
+}
+
+float tomographicModels::get_tiltAngle()
+{
+	return params.tiltAngle;
+}
+
 float tomographicModels::get_helicalPitch()
 {
 	return params.helicalPitch;
@@ -3894,25 +3914,25 @@ bool tomographicModels::synthesize_symmetry(float* f_radial, float* f)
 	return symObject.synthesizeSymmetry(f_radial, f);
 }
 
-bool tomographicModels::find_centerCol(float* g, int iRow, bool data_on_cpu)
+float tomographicModels::find_centerCol(float* g, int iRow, bool data_on_cpu)
 {
 	if (data_on_cpu)
 		return findCenter_cpu(g, &params, iRow);
 	else
 	{
 		printf("Error: find_centerCol not yet implemented for data on the GPU\n");
-		return false;
+		return 0.0;
 	}
 }
 
-bool tomographicModels::find_tau(float* g, int iRow, bool data_on_cpu)
+float tomographicModels::find_tau(float* g, int iRow, bool data_on_cpu)
 {
 	if (data_on_cpu)
 		return findCenter_cpu(g, &params, iRow, true);
 	else
 	{
 		printf("Error: find_tau not yet implemented for data on the GPU\n");
-		return false;
+		return 0.0;
 	}
 }
 
diff --git a/src/tomographic_models.h b/src/tomographic_models.h
index a59c96b..7de470b 100644
--- a/src/tomographic_models.h
+++ b/src/tomographic_models.h
@@ -297,10 +297,11 @@ class tomographicModels
 	 * \param[in]   sod source to object distance, measured in mm; this can also be viewed as the source to center of rotation distance
 	 * \param[in]   sdd source to detector distance, measured in mm
 	 * \param[in]   tau the center of rotation horizontal translation (mm)
+	 * \param[in]	tiltAngle the rotation of the detector around the optical axis (degrees)
 	 * \param[in]   helicalPitch the helical pitch (mm/radians)
 	 * \return      true if operation  was sucessful, false otherwise
 	 */
-	bool set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau = 0.0, float helicalPitch = 0.0);
+	bool set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau = 0.0, float tiltAngle = 0.0, float helicalPitch = 0.0);
 
 	/**
 	 * \fn          set_fanbeam
@@ -572,6 +573,21 @@ class tomographicModels
 	 */
 	bool set_normalizedHelicalPitch(float h_normalized);
 
+	/**
+	 * \fn          set_tiltAngle
+	 * \brief       sets tiltAngle
+	 * \param[in]   tiltAngle the value for tiltAngle (degrees)
+	 * \return      true if the value is valid, false otherwise
+	 */
+	bool set_tiltAngle(float tiltAngle_in);
+
+	/**
+	 * \fn          get_tiltAngle
+	 * \brief       gets tiltAngle
+	 * \return      tiltAngle
+	 */
+	float get_tiltAngle();
+
 	/**
 	 * \fn          get_helicalPitch
 	 * \brief       gets the helicalPitch
@@ -673,7 +689,7 @@ class tomographicModels
 	 * \param[in]   data_on_cpu true if data (g) is on the cpu, false if they are on the gpu
 	 * \return      true if operation  was sucessful, false otherwise
 	 */
-	bool find_centerCol(float* g, int iRow, bool data_on_cpu);
+	float find_centerCol(float* g, int iRow, bool data_on_cpu);
 
 	/**
 	 * \fn          find_tau
@@ -683,7 +699,7 @@ class tomographicModels
 	 * \param[in]   data_on_cpu true if data (g) is on the cpu, false if they are on the gpu
 	 * \return      true if operation  was sucessful, false otherwise
 	 */
-	bool find_tau(float* g, int iRow, bool data_on_cpu);
+	float find_tau(float* g, int iRow, bool data_on_cpu);
 
 	/**
 	 * \fn          estimate_tilt
diff --git a/src/tomographic_models_c_interface.cpp b/src/tomographic_models_c_interface.cpp
index b7888c1..eeeea12 100644
--- a/src/tomographic_models_c_interface.cpp
+++ b/src/tomographic_models_c_interface.cpp
@@ -290,9 +290,9 @@ float get_FBPscalar()
 	return tomo()->get_FBPscalar();
 }
 
-bool set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau, float helicalPitch)
+bool set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau, float tiltAngle, float helicalPitch)
 {
-	return tomo()->set_conebeam(numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau, helicalPitch);
+	return tomo()->set_conebeam(numAngles, numRows, numCols, pixelHeight, pixelWidth, centerRow, centerCol, phis, sod, sdd, tau, tiltAngle, helicalPitch);
 }
 
 bool set_fanbeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau)
@@ -624,6 +624,11 @@ bool set_tau(float tau)
 	return tomo()->set_tau(tau);
 }
 
+bool set_tiltAngle(float tiltAngle)
+{
+	return tomo()->set_tiltAngle(tiltAngle);
+}
+
 bool set_helicalPitch(float h)
 {
 	return tomo()->set_helicalPitch(h);
@@ -807,6 +812,11 @@ float get_tau()
 	return tomo()->params.tau;
 }
 
+float get_tiltAngle()
+{
+	return tomo()->params.tiltAngle;
+}
+
 float get_helicalPitch()
 {
 	return tomo()->get_helicalPitch();
@@ -902,12 +912,12 @@ float get_z0()
 	return tomo()->params.z_0();
 }
 
-bool find_centerCol(float* g, int iRow, bool data_on_cpu)
+float find_centerCol(float* g, int iRow, bool data_on_cpu)
 {
 	return tomo()->find_centerCol(g, iRow, data_on_cpu);
 }
 
-bool find_tau(float* g, int iRow, bool data_on_cpu)
+float find_tau(float* g, int iRow, bool data_on_cpu)
 {
 	return tomo()->find_tau(g, iRow, data_on_cpu);
 }
@@ -1295,6 +1305,7 @@ PYBIND11_MODULE(leapct, m) {
     m.def("get_centerRow", &get_centerRow, "");
     m.def("get_centerCol", &get_centerCol, "");
     m.def("get_tau", &get_tau, "");
+	m.def("get_tiltAngle", &get_tiltAngle, "");
     m.def("get_helicalPitch", &get_helicalPitch, "");
     m.def("get_normalizedHelicalPitch", &get_normalizedHelicalPitch, "");
     m.def("get_z_source_offset", &get_z_source_offset, "");
diff --git a/src/tomographic_models_c_interface.h b/src/tomographic_models_c_interface.h
index ae9f63c..5c566f4 100644
--- a/src/tomographic_models_c_interface.h
+++ b/src/tomographic_models_c_interface.h
@@ -87,7 +87,7 @@ extern "C" PROJECTOR_API bool sensitivity(float* f, bool data_on_cpu);
 
 extern "C" PROJECTOR_API bool windowFOV(float* f, bool data_on_cpu);
 
-extern "C" PROJECTOR_API bool set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau, float helicalPitch);
+extern "C" PROJECTOR_API bool set_conebeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau, float tiltAngle, float helicalPitch);
 extern "C" PROJECTOR_API bool set_fanbeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis, float sod, float sdd, float tau);
 extern "C" PROJECTOR_API bool set_parallelbeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float centerRow, float centerCol, float* phis);
 extern "C" PROJECTOR_API bool set_modularbeam(int numAngles, int numRows, int numCols, float pixelHeight, float pixelWidth, float*, float*, float*, float*);
@@ -169,6 +169,7 @@ extern "C" PROJECTOR_API int get_rampID();
 extern "C" PROJECTOR_API bool set_FBPlowpass(float W);
 extern "C" PROJECTOR_API float get_FBPlowpass();
 extern "C" PROJECTOR_API bool set_tau(float tau);
+extern "C" PROJECTOR_API bool set_tiltAngle(float tiltAngle);
 extern "C" PROJECTOR_API bool set_helicalPitch(float h);
 extern "C" PROJECTOR_API bool set_normalizedHelicalPitch(float h_normalized);
 extern "C" PROJECTOR_API bool set_attenuationMap(float*);
@@ -190,6 +191,7 @@ extern "C" PROJECTOR_API float get_pixelHeight();
 extern "C" PROJECTOR_API float get_centerRow();
 extern "C" PROJECTOR_API float get_centerCol();
 extern "C" PROJECTOR_API float get_tau();
+extern "C" PROJECTOR_API float get_tiltAngle();
 extern "C" PROJECTOR_API float get_helicalPitch();
 extern "C" PROJECTOR_API float get_normalizedHelicalPitch();
 extern "C" PROJECTOR_API float get_z_source_offset();
@@ -213,8 +215,8 @@ extern "C" PROJECTOR_API float get_offsetY();
 extern "C" PROJECTOR_API float get_offsetZ();
 extern "C" PROJECTOR_API float get_z0();
 
-extern "C" PROJECTOR_API bool find_centerCol(float* g, int iRow, bool data_on_cpu);
-extern "C" PROJECTOR_API bool find_tau(float* g, int iRow, bool data_on_cpu);
+extern "C" PROJECTOR_API float find_centerCol(float* g, int iRow, bool data_on_cpu);
+extern "C" PROJECTOR_API float find_tau(float* g, int iRow, bool data_on_cpu);
 extern "C" PROJECTOR_API float consistency_cost(float* g, float Delta_centerRow, float Delta_centerCol, float Delta_tau, float Delta_tilt, bool data_on_cpu);
 extern "C" PROJECTOR_API float estimate_tilt(float* g, bool data_on_cpu);
 extern "C" PROJECTOR_API float conjugate_difference(float* g, float alpha, float centerCol, float* diff, bool data_on_cpu);
diff --git a/unitTests/unit_tests.py b/unitTests/unit_tests.py
index 09bd82a..02571a6 100644
--- a/unitTests/unit_tests.py
+++ b/unitTests/unit_tests.py
@@ -30,12 +30,14 @@
 projection_methods = ['VD', 'SF']
 angularRange = 360.0
 tau = 0.0
+tiltAngle = 1.0
 pitch = 0.0
 
 #voxelScales = [1.0]
 #voxelScales = [2.0]
+#projection_methods = ['SF']
 #projection_methods = ['VD']
-#geometries = [3]
+#geometries = []
 
 for ii in range(len(geometries)):
     igeom = geometries[ii]
@@ -49,12 +51,12 @@
     elif igeom == 2:
         leapct.set_coneparallel(numAngles, numRows, numCols, pixelSize, sod/sdd*pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau, pitch)
     elif igeom == 3:
-        leapct.set_conebeam(numAngles, numRows, numCols, pixelSize, pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau, pitch)
+        leapct.set_conebeam(numAngles, numRows, numCols, pixelSize, pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau, pitch, tiltAngle)
     elif igeom == 4:
         leapct.set_conebeam(numAngles, numRows, numCols, pixelSize, pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau, pitch)
         leapct.set_curvedDetector()
     elif igeom == 5:
-        leapct.set_conebeam(numAngles, numRows, numCols, pixelSize, pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau)
+        leapct.set_conebeam(numAngles, numRows, numCols, pixelSize, pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau, 0.0, tiltAngle)
         leapct.convert_to_modularbeam()
 
     #leapct.set_offsetScan(True)
@@ -118,4 +120,88 @@
             plt.show()
             #"""
             #quit()
-            
+
+#quit()
+
+# Now compare cone and modular to make sure they match
+leapct.reset()
+leapct.set_conebeam(numAngles, numRows, numCols, pixelSize, pixelSize, centerRow, centerCol, leapct.setAngleArray(numAngles, angularRange), sod, sdd, tau, pitch, tiltAngle)
+leapct_mod = tomographicModels()
+leapct_mod.copy_parameters(leapct)
+leapct_mod.convert_to_modularbeam()
+#leapct_mod.rotate_detector(1.0)
+#leapct.set_offsetScan(True)
+
+# Set true projection data
+g_true = leapct.allocate_projections()
+leapct.set_FORBILD(None, includeEar)
+st = time.time()
+leapct.rayTrace(g_true)
+print('ray trace time: ' + str(time.time()-st) + ' sec')
+
+for iscale in range(len(voxelScales)):
+    # Set true phantom
+    leapct.set_default_volume(voxelScales[iscale])
+    leapct_mod.copy_parameters(leapct)
+    leapct_mod.convert_to_modularbeam()
+    f_true = leapct.allocate_volume()
+    leapct.set_FORBILD(f_true, includeEar)
+    
+    leapct.print_parameters()
+    #leapct.display(f_true)
+    #leapct.sketch_system()
+    #quit()
+    
+    # Set true projection data
+    g_mod = leapct_mod.allocate_projections()
+    st = time.time()
+    leapct_mod.project(g_mod, f_true)
+    print('modular-beam project time: ' + str(time.time()-st) + ' sec')
+
+    # Test projection
+    g = leapct.allocate_projections()
+    st = time.time()
+    leapct.project(g, f_true)
+    print('cone-beam project time: ' + str(time.time()-st) + ' sec')
+
+    #leapct.display(g_true)
+    #leapct.display(f_true)
+
+    for imethod in range(len(projection_methods)):
+
+        leapct.set_projector(projection_methods[imethod])
+        
+        f_mod = leapct_mod.allocate_volume()
+        st = time.time()
+        leapct_mod.backproject(g_true, f_mod)
+        print('modular-beam FBP time: ' + str(time.time()-st) + ' sec')
+        
+        # Test FBP
+        f = leapct.allocate_volume()
+        st = time.time()
+        leapct.backproject(g_true, f)
+        print('cone-beam FBP time: ' + str(time.time()-st) + ' sec')
+        
+        #diff_g = g
+        #diff_f = f
+        diff_g = np.clip(100.0*(g-g_mod)/g_true, -10.0, 10.0)
+        diff_f = np.clip(100.0*(f-f_mod)/f_mod, -10.0, 10.0)
+
+        
+        #"""
+        plt.figure()
+        plt.subplot(2, 2, 1)
+        plt.title('projection')
+        plt.imshow(np.squeeze(diff_g[0,:,:]), cmap='gray')
+        plt.subplot(2, 2, 2)
+        plt.title('sinogram')
+        plt.imshow(np.squeeze(diff_g[:,numRows//2,:]), cmap='gray')
+        plt.subplot(2, 2, 3)
+        plt.title('z-slice')
+        plt.imshow(np.squeeze(diff_f[diff_f.shape[0]//2,:,:]), cmap='gray')
+        plt.subplot(2, 2, 4)
+        plt.title('x-slice')
+        plt.imshow(np.squeeze(diff_f[:,diff_f.shape[1]//2,:]), cmap='gray')
+        plt.show()
+        #"""
+        #quit()
\ No newline at end of file