diff --git a/rtengine/color.cc b/rtengine/color.cc
index 3be412b6d..ab9223358 100644
--- a/rtengine/color.cc
+++ b/rtengine/color.cc
@@ -719,7 +719,7 @@ void Color::rgb2hsv(float r, float g, float b, float &h, float &s, float &v)
     if (del_Max < 0.00001 && del_Max > -0.00001) { // no fabs, slow!
         s = 0.f;
     } else {
-        s = del_Max / var_Max;
+        s = del_Max / (var_Max == 0.0 ? 1.0 : var_Max);
 
         if (var_R == var_Max) {
             h = (var_G - var_B) / del_Max;
@@ -1747,15 +1747,41 @@ void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &
 }
 #endif // __SSE2__
 
+inline float Color::computeXYZ2Lab(float f)
+{
+    if (f < 0.f) {
+        return 327.68 * ((kappa * f / MAXVALF + 16.0) / 116.0);
+    } else if (f > 65535.f) {
+        return (327.68f * xcbrtf(f / MAXVALF));
+    } else {
+        return cachef[f];
+    }
+}
+
+
+inline float Color::computeXYZ2LabY(float f)
+{
+    if (f < 0.f) {
+        return 327.68 * (kappa * f / MAXVALF);
+    } else if (f > 65535.f) {
+        return 327.68f * (116.f * xcbrtf(f / MAXVALF) - 16.f);
+    } else {
+        return cachefy[f];
+    }
+}
+
+
 void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, const float wp[3][3], int width)
 {
 
 #ifdef __SSE2__
+    vfloat minvalfv = F2V(0.f);
     vfloat maxvalfv = F2V(MAXVALF);
     vfloat c500v = F2V(500.f);
     vfloat c200v = F2V(200.f);
 #endif
     int i = 0;
+    
 #ifdef __SSE2__
     for(;i < width - 3; i+=4) {
         const vfloat rv = LVFU(R[i]);
@@ -1766,17 +1792,18 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b,
         const vfloat zv = F2V(wp[2][0]) * rv + F2V(wp[2][1]) * gv + F2V(wp[2][2]) * bv;
 
         vmask maxMask = vmaskf_gt(vmaxf(xv, vmaxf(yv, zv)), maxvalfv);
-        if (_mm_movemask_ps((vfloat)maxMask)) {
+        vmask minMask = vmaskf_lt(vminf(xv, vminf(yv, zv)), minvalfv);
+        if (_mm_movemask_ps((vfloat)maxMask) || _mm_movemask_ps((vfloat)minMask)) {
             // take slower code path for all 4 pixels if one of the values is > MAXVALF. Still faster than non SSE2 version
             for(int k = 0; k < 4; ++k) {
                 float x = xv[k];
                 float y = yv[k];
                 float z = zv[k];
-                float fx = (x <= 65535.f ? cachef[x] : (327.68f * xcbrtf(x / MAXVALF)));
-                float fy = (y <= 65535.f ? cachef[y] : (327.68f * xcbrtf(y / MAXVALF)));
-                float fz = (z <= 65535.f ? cachef[z] : (327.68f * xcbrtf(z / MAXVALF)));
+                float fx = computeXYZ2Lab(x);
+                float fy = computeXYZ2Lab(y);
+                float fz = computeXYZ2Lab(z);
 
-                L[i + k] = (y <= 65535.0f ? cachefy[y] : 327.68f * (116.f * xcbrtf(y / MAXVALF) - 16.f));
+                L[i + k] = computeXYZ2LabY(y);
                 a[i + k] = (500.f * (fx - fy) );
                 b[i + k] = (200.f * (fy - fz) );
             }
@@ -1800,11 +1827,11 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b,
         float z = wp[2][0] * rv + wp[2][1] * gv + wp[2][2] * bv;
         float fx, fy, fz;
 
-        fx = (x <= 65535.0f ? cachef[x] : (327.68f * xcbrtf(x / MAXVALF)));
-        fy = (y <= 65535.0f ? cachef[y] : (327.68f * xcbrtf(y / MAXVALF)));
-        fz = (z <= 65535.0f ? cachef[z] : (327.68f * xcbrtf(z / MAXVALF)));
+        fx = computeXYZ2Lab(x);
+        fy = computeXYZ2Lab(y);
+        fz = computeXYZ2Lab(z);
 
-        L[i] = (y <= 65535.0f ? cachefy[y] : 327.68f * (116.f * xcbrtf(y / MAXVALF) - 16.f));
+        L[i] = computeXYZ2LabY(y);
         a[i] = 500.0f * (fx - fy);
         b[i] = 200.0f * (fy - fz);
     }
@@ -1818,11 +1845,11 @@ void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b)
     float y = Y;
     float fx, fy, fz;
 
-    fx = (x <= 65535.0f ? cachef[x] : (327.68f * xcbrtf(x / MAXVALF)));
-    fy = (y <= 65535.0f ? cachef[y] : (327.68f * xcbrtf(y / MAXVALF)));
-    fz = (z <= 65535.0f ? cachef[z] : (327.68f * xcbrtf(z / MAXVALF)));
+    fx = computeXYZ2Lab(x);
+    fy = computeXYZ2Lab(y);
+    fz = computeXYZ2Lab(z);
 
-    L = (y <= 65535.0f ? cachefy[y] : 327.68f * (116.f * xcbrtf(y / MAXVALF) - 16.f));
+    L = computeXYZ2LabY(y);
     a = (500.0f * (fx - fy) );
     b = (200.0f * (fy - fz) );
 }
@@ -1854,11 +1881,11 @@ void Color::Yuv2Lab(float Yin, float u, float v, float &L, float &a, float &b, c
 
     gamutmap(X, Y, Z, wp);
 
-    float fx = (X <= 65535.0 ? cachef[X] : (327.68 * std::cbrt(X / MAXVALF)));
-    float fy = (Y <= 65535.0 ? cachef[Y] : (327.68 * std::cbrt(Y / MAXVALF)));
-    float fz = (Z <= 65535.0 ? cachef[Z] : (327.68 * std::cbrt(Z / MAXVALF)));
+    float fx = computeXYZ2Lab(X);
+    float fy = computeXYZ2Lab(Y);
+    float fz = computeXYZ2Lab(Z);
 
-    L = (Y <= 65535.0f ? cachefy[Y] : 327.68f * (116.f * xcbrtf(Y / MAXVALF) - 16.f));
+    L = computeXYZ2LabY(Y);
     a = (500.0 * (fx - fy) );
     b = (200.0 * (fy - fz) );
 }
diff --git a/rtengine/color.h b/rtengine/color.h
index 9775e0ecb..5b863d735 100644
--- a/rtengine/color.h
+++ b/rtengine/color.h
@@ -97,6 +97,10 @@ private:
 #ifdef __SSE2__
     static vfloat hue2rgb(vfloat p, vfloat q, vfloat t);
 #endif
+
+    static float computeXYZ2Lab(float f);
+    static float computeXYZ2LabY(float f);
+    
 public:
 
     typedef enum Channel {
diff --git a/rtengine/curves.cc b/rtengine/curves.cc
index e856ee1cc..aab74a7de 100644
--- a/rtengine/curves.cc
+++ b/rtengine/curves.cc
@@ -1827,6 +1827,14 @@ void PerceptualToneCurve::BatchApply(const size_t start, const size_t end, float
     const AdobeToneCurve& adobeTC = static_cast<const AdobeToneCurve&>((const ToneCurve&) * this);
 
     for (size_t i = start; i < end; ++i) {
+        const bool oog_r = OOG(rc[i]);
+        const bool oog_g = OOG(gc[i]);
+        const bool oog_b = OOG(bc[i]);
+
+        if (oog_r && oog_g && oog_b) {
+            continue;
+        }
+        
         float r = CLIP(rc[i]);
         float g = CLIP(gc[i]);
         float b = CLIP(bc[i]);
@@ -1848,12 +1856,18 @@ void PerceptualToneCurve::BatchApply(const size_t start, const size_t end, float
 
         if (ar >= 65535.f && ag >= 65535.f && ab >= 65535.f) {
             // clip fast path, will also avoid strange colours of clipped highlights
-            rc[i] = gc[i] = bc[i] = 65535.f;
+            //rc[i] = gc[i] = bc[i] = 65535.f;
+            if (!oog_r) rc[i] = 65535.f;
+            if (!oog_g) gc[i] = 65535.f;
+            if (!oog_b) bc[i] = 65535.f;
             continue;
         }
 
         if (ar <= 0.f && ag <= 0.f && ab <= 0.f) {
-            rc[i] = gc[i] = bc[i] = 0;
+            //rc[i] = gc[i] = bc[i] = 0;
+            if (!oog_r) rc[i] = 0.f;
+            if (!oog_g) gc[i] = 0.f;
+            if (!oog_b) bc[i] = 0.f;
             continue;
         }
 
@@ -1893,9 +1907,9 @@ void PerceptualToneCurve::BatchApply(const size_t start, const size_t end, float
                 g = newg;
                 b = newb;
             }
-            rc[i] = r;
-            gc[i] = g;
-            bc[i] = b;
+            if (!oog_r) rc[i] = r;
+            if (!oog_g) gc[i] = g;
+            if (!oog_b) bc[i] = b;
 
             continue;
         }
@@ -2003,9 +2017,9 @@ void PerceptualToneCurve::BatchApply(const size_t start, const size_t end, float
                 b = newb;
             }
 
-            rc[i] = r;
-            gc[i] = g;
-            bc[i] = b;
+            if (!oog_r) rc[i] = r;
+            if (!oog_g) gc[i] = g;
+            if (!oog_b) bc[i] = b;
 
             continue;
         }
@@ -2066,9 +2080,9 @@ void PerceptualToneCurve::BatchApply(const size_t start, const size_t end, float
             g = newg;
             b = newb;
         }
-        rc[i] = r;
-        gc[i] = g;
-        bc[i] = b;
+        if (!oog_r) rc[i] = r;
+        if (!oog_g) gc[i] = g;
+        if (!oog_b) bc[i] = b;
     }
 }
 float PerceptualToneCurve::cf_range[2];
diff --git a/rtengine/curves.h b/rtengine/curves.h
index d8e443fc9..23a2aa0c3 100644
--- a/rtengine/curves.h
+++ b/rtengine/curves.h
@@ -45,6 +45,32 @@ namespace rtengine
 class ToneCurve;
 class ColorAppearance;
 
+namespace curves {
+
+inline void setLutVal(const LUTf &lut, float &val)
+{
+    if (!OOG(val)) {
+        val = lut[std::max(val, 0.f)];
+    } else if (val < 0.f) {
+        float m = lut[0.f];
+        val += m;
+    } else {
+        float m = lut[MAXVALF];
+        val += (m - MAXVALF);
+    }
+}
+
+inline void setLutVal(float &val, float lutval, float maxval)
+{
+    if (!OOG(val)) {
+        val = lutval;
+    } else if (val > 0.f) {
+        val += maxval - MAXVALF;
+    }
+}
+
+} // namespace curves
+
 class CurveFactory
 {
 
@@ -733,7 +759,7 @@ inline void Lightcurve::Apply (float& Li) const
 
     assert (lutColCurve);
 
-    Li = lutColCurve[Li];
+    curves::setLutVal(lutColCurve, Li);
 }
 
 class Brightcurve : public ColorAppearance
@@ -748,7 +774,7 @@ inline void Brightcurve::Apply (float& Br) const
 
     assert (lutColCurve);
 
-    Br = lutColCurve[Br];
+    curves::setLutVal(lutColCurve, Br);
 }
 
 class Chromacurve : public ColorAppearance
@@ -763,7 +789,7 @@ inline void Chromacurve::Apply (float& Cr) const
 
     assert (lutColCurve);
 
-    Cr = lutColCurve[Cr];
+    curves::setLutVal(lutColCurve, Cr);
 }
 class Saturcurve : public ColorAppearance
 {
@@ -777,7 +803,7 @@ inline void Saturcurve::Apply (float& Sa) const
 
     assert (lutColCurve);
 
-    Sa = lutColCurve[Sa];
+    curves::setLutVal(lutColCurve, Sa);
 }
 
 class Colorfcurve : public ColorAppearance
@@ -792,7 +818,7 @@ inline void Colorfcurve::Apply (float& Cf) const
 
     assert (lutColCurve);
 
-    Cf = lutColCurve[Cf];
+    curves::setLutVal(lutColCurve, Cf);
 }
 
 
@@ -881,9 +907,9 @@ inline void StandardToneCurve::Apply (float& r, float& g, float& b) const
 
     assert (lutToneCurve);
 
-    r = lutToneCurve[r];
-    g = lutToneCurve[g];
-    b = lutToneCurve[b];
+    curves::setLutVal(lutToneCurve, r);
+    curves::setLutVal(lutToneCurve, g);
+    curves::setLutVal(lutToneCurve, b);
 }
 
 inline void StandardToneCurve::BatchApply(
@@ -910,27 +936,36 @@ inline void StandardToneCurve::BatchApply(
             break;
 #endif
         }
-        r[i] = lutToneCurve[r[i]];
-        g[i] = lutToneCurve[g[i]];
-        b[i] = lutToneCurve[b[i]];
+        curves::setLutVal(lutToneCurve, r[i]);
+        curves::setLutVal(lutToneCurve, g[i]);
+        curves::setLutVal(lutToneCurve, b[i]);
         i++;
     }
 
 #ifdef __SSE2__
+    vfloat tmpr;
+    vfloat tmpg;
+    vfloat tmpb;
+    float mv = lutToneCurve[MAXVALF];
     for (; i + 3 < end; i += 4) {
         __m128 r_val = LVF(r[i]);
         __m128 g_val = LVF(g[i]);
         __m128 b_val = LVF(b[i]);
-        STVF(r[i], lutToneCurve[r_val]);
-        STVF(g[i], lutToneCurve[g_val]);
-        STVF(b[i], lutToneCurve[b_val]);
+        STVF(tmpr[0], lutToneCurve[r_val]);
+        STVF(tmpg[0], lutToneCurve[g_val]);
+        STVF(tmpb[0], lutToneCurve[b_val]);
+        for (int j = 0; j < 4; ++j) {
+            curves::setLutVal(r[i+j], tmpr[j], mv);
+            curves::setLutVal(g[i+j], tmpg[j], mv);
+            curves::setLutVal(b[i+j], tmpb[j], mv);
+        }
     }
 
     // Remainder in non-SSE.
     for (; i < end; ++i) {
-        r[i] = lutToneCurve[r[i]];
-        g[i] = lutToneCurve[g[i]];
-        b[i] = lutToneCurve[b[i]];
+        curves::setLutVal(lutToneCurve, r[i]);
+        curves::setLutVal(lutToneCurve, g[i]);
+        curves::setLutVal(lutToneCurve, b[i]);
     }
 #endif
 }
@@ -938,10 +973,13 @@ inline void StandardToneCurve::BatchApply(
 // Tone curve according to Adobe's reference implementation
 // values in 0xffff space
 // inlined to make sure there will be no cache flush when used
-inline void AdobeToneCurve::Apply (float& r, float& g, float& b) const
+inline void AdobeToneCurve::Apply (float& ir, float& ig, float& ib) const
 {
 
     assert (lutToneCurve);
+    float r = CLIP(ir);
+    float g = CLIP(ig);
+    float b = CLIP(ib);
 
     if (r >= g) {
         if      (g > b) {
@@ -964,6 +1002,10 @@ inline void AdobeToneCurve::Apply (float& r, float& g, float& b) const
             RGBTone (g, b, r);    // Case 7: g >= b >  r
         }
     }
+
+    setUnlessOOG(ir, r);
+    setUnlessOOG(ig, g);
+    setUnlessOOG(ib, b);
 }
 
 inline void AdobeToneCurve::RGBTone (float& r, float& g, float& b) const
@@ -976,10 +1018,14 @@ inline void AdobeToneCurve::RGBTone (float& r, float& g, float& b) const
 }
 
 // Modifying the Luminance channel only
-inline void LuminanceToneCurve::Apply(float &r, float &g, float &b) const
+inline void LuminanceToneCurve::Apply(float &ir, float &ig, float &ib) const
 {
     assert (lutToneCurve);
 
+    float r = CLIP(ir);
+    float g = CLIP(ig);
+    float b = CLIP(ib);
+
     float currLuminance = r * 0.2126729f + g * 0.7151521f + b * 0.0721750f;
     const float newLuminance = lutToneCurve[currLuminance];
     currLuminance = currLuminance == 0.f ? 0.00001f : currLuminance;
@@ -987,6 +1033,10 @@ inline void LuminanceToneCurve::Apply(float &r, float &g, float &b) const
     r = LIM<float>(r * coef, 0.f, 65535.f);
     g = LIM<float>(g * coef, 0.f, 65535.f);
     b = LIM<float>(b * coef, 0.f, 65535.f);
+
+    setUnlessOOG(ir, r);
+    setUnlessOOG(ig, g);
+    setUnlessOOG(ib, b);
 }
 
 inline float WeightedStdToneCurve::Triangle(float a, float a1, float b) const
@@ -1020,14 +1070,14 @@ inline vfloat WeightedStdToneCurve::Triangle(vfloat a, vfloat a1, vfloat b) cons
 
 // Tone curve modifying the value channel only, preserving hue and saturation
 // values in 0xffff space
-inline void WeightedStdToneCurve::Apply (float& r, float& g, float& b) const
+inline void WeightedStdToneCurve::Apply (float& ir, float& ig, float& ib) const
 {
 
     assert (lutToneCurve);
 
-    r = CLIP(r);
-    g = CLIP(g);
-    b = CLIP(b);
+    float r = CLIP(ir);
+    float g = CLIP(ig);
+    float b = CLIP(ib);
     float r1 = lutToneCurve[r];
     float g1 = Triangle(r, r1, g);
     float b1 = Triangle(r, r1, b);
@@ -1043,6 +1093,10 @@ inline void WeightedStdToneCurve::Apply (float& r, float& g, float& b) const
     r = CLIP<float>(r1 * 0.50f + r2 * 0.25f + r3 * 0.25f);
     g = CLIP<float>(g1 * 0.25f + g2 * 0.50f + g3 * 0.25f);
     b = CLIP<float>(b1 * 0.25f + b2 * 0.25f + b3 * 0.50f);
+
+    setUnlessOOG(ir, r);
+    setUnlessOOG(ig, g);
+    setUnlessOOG(ib, b);
 }
 
 inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t end, float *r, float *g, float *b) const {
@@ -1076,6 +1130,10 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
     const vfloat zd5v = F2V(0.5f);
     const vfloat zd25v = F2V(0.25f);
 
+    vfloat tmpr;
+    vfloat tmpg;
+    vfloat tmpb;
+
     for (; i + 3 < end; i += 4) {
         vfloat r_val = LIMV(LVF(r[i]), ZEROV, c65535v);
         vfloat g_val = LIMV(LVF(g[i]), ZEROV, c65535v);
@@ -1092,9 +1150,14 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
         vfloat r3 = Triangle(b_val, b3, r_val);
         vfloat g3 = Triangle(b_val, b3, g_val);
 
-        STVF(r[i], LIMV(r1 * zd5v + r2 * zd25v + r3 * zd25v, ZEROV, c65535v));
-        STVF(g[i], LIMV(g1 * zd25v + g2 * zd5v + g3 * zd25v, ZEROV, c65535v));
-        STVF(b[i], LIMV(b1 * zd25v + b2 * zd25v + b3 * zd5v, ZEROV, c65535v));
+        STVF(tmpr[0], LIMV(r1 * zd5v + r2 * zd25v + r3 * zd25v, ZEROV, c65535v));
+        STVF(tmpg[0], LIMV(g1 * zd25v + g2 * zd5v + g3 * zd25v, ZEROV, c65535v));
+        STVF(tmpb[0], LIMV(b1 * zd25v + b2 * zd25v + b3 * zd5v, ZEROV, c65535v));
+        for (int j = 0; j < 4; ++j) {
+            setUnlessOOG(r[i+j], tmpr[j]);
+            setUnlessOOG(g[i+j], tmpg[j]);
+            setUnlessOOG(b[i+j], tmpb[j]);
+        }
     }
 
     // Remainder in non-SSE.
@@ -1106,14 +1169,14 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
 
 // Tone curve modifying the value channel only, preserving hue and saturation
 // values in 0xffff space
-inline void SatAndValueBlendingToneCurve::Apply (float& r, float& g, float& b) const
+inline void SatAndValueBlendingToneCurve::Apply (float& ir, float& ig, float& ib) const
 {
 
     assert (lutToneCurve);
 
-    r = CLIP(r);
-    g = CLIP(g);
-    b = CLIP(b);
+    float r = CLIP(ir);
+    float g = CLIP(ig);
+    float b = CLIP(ib);
 
     const float lum = (r + g + b) / 3.f;
     const float newLum = lutToneCurve[lum];
@@ -1137,6 +1200,10 @@ inline void SatAndValueBlendingToneCurve::Apply (float& r, float& g, float& b) c
         dV = v * coef;
     }
     Color::hsv2rgbdcp(h, s, v + dV, r, g, b);
+
+    setUnlessOOG(ir, r);
+    setUnlessOOG(ig, g);
+    setUnlessOOG(ib, b);
 }
 
 }
diff --git a/rtengine/dcp.cc b/rtengine/dcp.cc
index d00827d73..09d6bb263 100644
--- a/rtengine/dcp.cc
+++ b/rtengine/dcp.cc
@@ -1190,13 +1190,17 @@ void DCPProfile::step2ApplyTile(float* rc, float* gc, float* bc, int width, int
                 }
 
                 // with looktable and tonecurve we need to clip
-                newr = FCLIP(newr);
-                newg = FCLIP(newg);
-                newb = FCLIP(newb);
+                // newr = FCLIP(newr);
+                // newg = FCLIP(newg);
+                // newb = FCLIP(newb);
 
                 if (as_in.data->apply_look_table) {
+                    float cnewr = FCLIP(newr);
+                    float cnewg = FCLIP(newg);
+                    float cnewb = FCLIP(newb);
+                    
                     float h, s, v;
-                    Color::rgb2hsvdcp(newr, newg, newb, h, s, v);
+                    Color::rgb2hsvdcp(cnewr, cnewg, cnewb, h, s, v);
 
                     hsdApply(look_info, look_table, h, s, v);
                     s = CLIP01(s);
@@ -1209,7 +1213,11 @@ void DCPProfile::step2ApplyTile(float* rc, float* gc, float* bc, int width, int
                         h -= 6.0f;
                     }
 
-                    Color::hsv2rgbdcp( h, s, v, newr, newg, newb);
+                    Color::hsv2rgbdcp( h, s, v, cnewr, cnewg, cnewb);
+
+                    setUnlessOOG(newr, cnewr);
+                    setUnlessOOG(newg, cnewg);
+                    setUnlessOOG(newb, cnewb);
                 }
 
                 if (as_in.data->use_tone_curve) {
diff --git a/rtengine/dirpyr_equalizer.cc b/rtengine/dirpyr_equalizer.cc
index 7d3ace3ff..69c01be8c 100644
--- a/rtengine/dirpyr_equalizer.cc
+++ b/rtengine/dirpyr_equalizer.cc
@@ -240,7 +240,7 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
 
     for (int i = 0; i < srcheight; i++)
         for (int j = 0; j < srcwidth; j++) {
-            dst[i][j] = CLIP(buffer[i][j]);  // TODO: Really a clip necessary?
+            dst[i][j] = /*CLIP*/(buffer[i][j]);  // TODO: Really a clip necessary?
         }
 
 }
@@ -367,7 +367,7 @@ void ImProcFunctions :: dirpyr_equalizercam (CieImage *ncie, float ** src, float
         for (int i = 0; i < srcheight; i++)
             for (int j = 0; j < srcwidth; j++) {
                 if(ncie->J_p[i][j] > 8.f && ncie->J_p[i][j] < 92.f) {
-                    dst[i][j] = CLIP( buffer[i][j] );    // TODO: Really a clip necessary?
+                    dst[i][j] = /*CLIP*/( buffer[i][j] );    // TODO: Really a clip necessary?
                 } else {
                     dst[i][j] = src[i][j];
                 }
@@ -375,7 +375,7 @@ void ImProcFunctions :: dirpyr_equalizercam (CieImage *ncie, float ** src, float
     } else {
         for (int i = 0; i < srcheight; i++)
             for (int j = 0; j < srcwidth; j++) {
-                dst[i][j] = CLIP( buffer[i][j] );  // TODO: Really a clip necessary?
+                dst[i][j] = /*CLIP*/( buffer[i][j] );  // TODO: Really a clip necessary?
             }
     }
 }
diff --git a/rtengine/iimage.h b/rtengine/iimage.h
index d0fe626c2..d09f46a3a 100644
--- a/rtengine/iimage.h
+++ b/rtengine/iimage.h
@@ -119,7 +119,7 @@ inline void ImageDatas::convertTo(unsigned char src, unsigned short& dst) const
 template<>
 inline void ImageDatas::convertTo(float src, unsigned char& dst) const
 {
-    dst = uint16ToUint8Rounded(src);
+    dst = uint16ToUint8Rounded(CLIP(src));
 }
 template<>
 inline void ImageDatas::convertTo(unsigned char src, float& dst) const
diff --git a/rtengine/imagefloat.cc b/rtengine/imagefloat.cc
index f6cc252fa..4aaeb3e24 100644
--- a/rtengine/imagefloat.cc
+++ b/rtengine/imagefloat.cc
@@ -146,6 +146,9 @@ void Imagefloat::setScanline (int row, unsigned char* buffer, int bps, unsigned
     }
 }
 
+
+namespace rtengine { extern void filmlike_clip(float *r, float *g, float *b); }
+
 void Imagefloat::getScanline (int row, unsigned char* buffer, int bps)
 {
 
@@ -163,18 +166,24 @@ void Imagefloat::getScanline (int row, unsigned char* buffer, int bps)
             sbuffer[ix++] = g(row, i) / 65535.f;
             sbuffer[ix++] = b(row, i) / 65535.f;
         }
-    } else if (bps == 16) {
+    } else {
         unsigned short *sbuffer = (unsigned short *)buffer;
         for (int i = 0, ix = 0; i < width; i++) {
-            sbuffer[ix++] = CLIP(r(row, i));
-            sbuffer[ix++] = CLIP(g(row, i));
-            sbuffer[ix++] = CLIP(b(row, i));
-        }
-    } else if (bps == 8) {
-        for (int i = 0, ix = 0; i < width; i++) {
-            buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(r(row, i)));
-            buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(g(row, i)));
-            buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(b(row, i)));
+            float ri = r(row, i);
+            float gi = g(row, i);
+            float bi = b(row, i);
+            if (ri > 65535.f || gi > 65535.f || bi > 65535.f) {
+                filmlike_clip(&ri, &gi, &bi);
+            }
+            if (bps == 16) {
+                sbuffer[ix++] = CLIP(ri);
+                sbuffer[ix++] = CLIP(gi);
+                sbuffer[ix++] = CLIP(bi);
+            } else if (bps == 8) {
+                buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(ri));
+                buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(gi));
+                buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(bi));
+            }
         }
     }
 }
@@ -238,6 +247,8 @@ void Imagefloat::getStdImage (ColorTemp ctemp, int tran, Imagefloat* image, Prev
     gm /= area;
     bm /= area;
 
+    const auto CLIP0 = [](float v) -> float { return std::max(v, 0.f); };
+
 #ifdef _OPENMP
     #pragma omp parallel
     {
@@ -270,9 +281,9 @@ void Imagefloat::getStdImage (ColorTemp ctemp, int tran, Imagefloat* image, Prev
                         continue;
                     }
 
-                    lineR[dst_x] = CLIP(rm2 * r(src_y, src_x));
-                    lineG[dst_x] = CLIP(gm2 * g(src_y, src_x));
-                    lineB[dst_x] = CLIP(bm2 * b(src_y, src_x));
+                    lineR[dst_x] = CLIP0(rm2 * r(src_y, src_x));
+                    lineG[dst_x] = CLIP0(gm2 * g(src_y, src_x));
+                    lineB[dst_x] = CLIP0(bm2 * b(src_y, src_x));
                 }
             } else {
                 // source image, first line of the current destination row
@@ -303,15 +314,15 @@ void Imagefloat::getStdImage (ColorTemp ctemp, int tran, Imagefloat* image, Prev
                     // convert back to gamma and clip
                     if (src_sub_width == skip && src_sub_height == skip) {
                         // Common case where the sub-region is complete
-                        lineR[dst_x] = CLIP(rm * rtot);
-                        lineG[dst_x] = CLIP(gm * gtot);
-                        lineB[dst_x] = CLIP(bm * btot);
+                        lineR[dst_x] = CLIP0(rm * rtot);
+                        lineG[dst_x] = CLIP0(gm * gtot);
+                        lineB[dst_x] = CLIP0(bm * btot);
                     } else {
                         // computing a special factor for this incomplete sub-region
                         float area = src_sub_width * src_sub_height;
-                        lineR[dst_x] = CLIP(rm2 * rtot / area);
-                        lineG[dst_x] = CLIP(gm2 * gtot / area);
-                        lineB[dst_x] = CLIP(bm2 * btot / area);
+                        lineR[dst_x] = CLIP0(rm2 * rtot / area);
+                        lineG[dst_x] = CLIP0(gm2 * gtot / area);
+                        lineB[dst_x] = CLIP0(bm2 * btot / area);
                     }
                 }
             }
@@ -357,9 +368,9 @@ Imagefloat::to8()
 
     for (int h = 0; h < height; ++h) {
         for (int w = 0; w < width; ++w) {
-            img8->r(h, w) = uint16ToUint8Rounded(r(h, w));
-            img8->g(h, w) = uint16ToUint8Rounded(g(h, w));
-            img8->b(h, w) = uint16ToUint8Rounded(b(h, w));
+            img8->r(h, w) = uint16ToUint8Rounded(CLIP(r(h, w)));
+            img8->g(h, w) = uint16ToUint8Rounded(CLIP(g(h, w)));
+            img8->b(h, w) = uint16ToUint8Rounded(CLIP(b(h, w)));
         }
     }
 
@@ -376,9 +387,9 @@ Imagefloat::to16()
 
     for (int h = 0; h < height; ++h) {
         for (int w = 0; w < width; ++w) {
-            img16->r(h, w) = r(h, w);
-            img16->g(h, w) = g(h, w);
-            img16->b(h, w) = b(h, w);
+            img16->r(h, w) = CLIP(r(h, w));
+            img16->g(h, w) = CLIP(g(h, w));
+            img16->b(h, w) = CLIP(b(h, w));
         }
     }
 
diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index ad98f10c9..3a25ca453 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -50,10 +50,13 @@
 namespace {
 
 using namespace rtengine;
-// begin of helper function for rgbProc()
-void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) {
 
-#ifdef __SSE2__
+
+// begin of helper function for rgbProc()
+void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize)
+{
+
+#if defined( __SSE2__ ) && defined( __x86_64__ )
     vfloat cr = F2V(0.299f);
     vfloat cg = F2V(0.587f);
     vfloat cb = F2V(0.114f);
@@ -61,8 +64,9 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float
 
     for (int i = istart, ti = 0; i < tH; i++, ti++) {
         int j = jstart, tj = 0;
-#ifdef __SSE2__
-        for (; j < tW - 3; j+=4, tj+=4) {
+#if defined( __SSE2__ ) && defined( __x86_64__ )
+
+        for (; j < tW - 3; j += 4, tj += 4) {
 
             vfloat rv = LVF(rtemp[ti * tileSize + tj]);
             vfloat gv = LVF(gtemp[ti * tileSize + tj]);
@@ -75,7 +79,9 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float
             STVF(gtemp[ti * tileSize + tj], gv * tonefactorv);
             STVF(btemp[ti * tileSize + tj], bv * tonefactorv);
         }
+
 #endif
+
         for (; j < tW; j++, tj++) {
 
             float r = rtemp[ti * tileSize + tj];
@@ -92,17 +98,19 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float
     }
 }
 
-void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange) {
+void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange)
+{
 
-#ifdef __SSE2__
+#if defined( __SSE2__ ) && defined( __x86_64__ )
     vfloat threev = F2V(3.f);
     vfloat maxvalfv = F2V(MAXVALF);
 #endif
 
     for (int i = istart, ti = 0; i < tH; i++, ti++) {
         int j = jstart, tj = 0;
-#ifdef __SSE2__
-        for (; j < tW - 3; j+=4, tj+=4) {
+#if defined( __SSE2__ ) && defined( __x86_64__ )
+
+        for (; j < tW - 3; j += 4, tj += 4) {
 
             vfloat rv = LVF(rtemp[ti * tileSize + tj]);
             vfloat gv = LVF(gtemp[ti * tileSize + tj]);
@@ -111,14 +119,15 @@ void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, flo
             //TODO: proper treatment of out-of-gamut colors
             //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)];
             vmask maxMask = vmaskf_ge(vmaxf(rv, vmaxf(gv, bv)), maxvalfv);
-            if(_mm_movemask_ps((vfloat)maxMask)) {
+
+            if (_mm_movemask_ps((vfloat)maxMask)) {
                 for (int k = 0; k < 4; ++k) {
                     float r = rtemp[ti * tileSize + tj + k];
                     float g = gtemp[ti * tileSize + tj + k];
                     float b = btemp[ti * tileSize + tj + k];
-                    float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) +
-                                        (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) +
-                                        (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0;
+                    float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve(exp_scale, comp, hlrange, r)) +
+                                        (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve(exp_scale, comp, hlrange, g)) +
+                                        (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve(exp_scale, comp, hlrange, b))) / 3.0;
 
                     // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place
                     rtemp[ti * tileSize + tj + k] = r * tonefactor;
@@ -133,7 +142,9 @@ void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, flo
                 STVF(btemp[ti * tileSize + tj], bv * tonefactorv);
             }
         }
+
 #endif
+
         for (; j < tW; j++, tj++) {
 
             float r = rtemp[ti * tileSize + tj];
@@ -142,9 +153,9 @@ void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, flo
 
             //TODO: proper treatment of out-of-gamut colors
             //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)];
-            float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) +
-                                (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) +
-                                (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0;
+            float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve(exp_scale, comp, hlrange, r)) +
+                                (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve(exp_scale, comp, hlrange, g)) +
+                                (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve(exp_scale, comp, hlrange, b))) / 3.0;
 
             // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place
             rtemp[ti * tileSize + tj] = r * tonefactor;
@@ -167,8 +178,9 @@ void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH,
                 for (int k = 0; k < 4; ++k) {
                     float r = rtemp[ti * tileSize + tj + k];
                     float g = gtemp[ti * tileSize + tj + k];
-                    if (r == 0.0f || g == 0.0f) {
-                        float b = btemp[ti * tileSize + tj + k];
+                    float b = btemp[ti * tileSize + tj + k];
+                    
+                    if ((r == 0.0f || g == 0.0f) && rtengine::min(r, g, b) >= 0.f) {
                         float h, s, v;
                         Color::rgb2hsv (r, g, b, h, s, v);
                         s *= 0.99f;
@@ -181,9 +193,9 @@ void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH,
         for (; j < tW; j++, tj++) {
             float r = rtemp[ti * tileSize + tj];
             float g = gtemp[ti * tileSize + tj];
+            float b = btemp[ti * tileSize + tj];
 
-            if (r == 0.0f || g == 0.0f) {
-                float b = btemp[ti * tileSize + tj];
+            if ((r == 0.0f || g == 0.0f) && rtengine::min(r, g, b) >= 0.f) {
                 float h, s, v;
                 Color::rgb2hsv (r, g, b, h, s, v);
                 s *= 0.99f;
@@ -224,9 +236,6 @@ void customToneCurve(const ToneCurve &customToneCurve, ToneCurveParams::TcMode c
 
         for (int i = istart, ti = 0; i < tH; i++, ti++) {
             for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                rtemp[ti * tileSize + tj] = CLIP<float> (rtemp[ti * tileSize + tj]);
-                gtemp[ti * tileSize + tj] = CLIP<float> (gtemp[ti * tileSize + tj]);
-                btemp[ti * tileSize + tj] = CLIP<float> (btemp[ti * tileSize + tj]);
                 userToneCurve.Apply(rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]);
             }
         }
@@ -3256,7 +3265,7 @@ filmlike_clip_rgb_tone (float *r, float *g, float *b, const float L)
     *b = b_;
 }
 
-static void
+/*static*/ void
 filmlike_clip (float *r, float *g, float *b)
 {
     // This is Adobe's hue-stable film-like curve with a diagonal, ie only used for clipping. Can probably be further optimized.
@@ -3666,6 +3675,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
         }
 
         float out_rgbx[4 * TS] ALIGNED16; // Line buffer for CLUT
+        float clutr[TS] ALIGNED16;
+        float clutg[TS] ALIGNED16;
+        float clutb[TS] ALIGNED16;
 
         LUTu histToneCurveThr;
 
@@ -3756,9 +3768,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             filmlike_clip (&r, &g, &b);
                         }
 
-                        rtemp[ti * TS + tj] = r;
-                        gtemp[ti * TS + tj] = g;
-                        btemp[ti * TS + tj] = b;
+                        setUnlessOOG(rtemp[ti * TS + tj], r);
+                        setUnlessOOG(gtemp[ti * TS + tj], g);
+                        setUnlessOOG(btemp[ti * TS + tj], b);
                     }
                 }
 
@@ -3767,30 +3779,43 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                         for (int j = jstart, tj = 0; j < tW; j++, tj++) {
 
                             //brightness/contrast
-                            rtemp[ti * TS + tj] = tonecurve[ rtemp[ti * TS + tj] ];
-                            gtemp[ti * TS + tj] = tonecurve[ gtemp[ti * TS + tj] ];
-                            btemp[ti * TS + tj] = tonecurve[ btemp[ti * TS + tj] ];
+                            float r = tonecurve[ CLIP(rtemp[ti * TS + tj]) ];
+                            float g = tonecurve[ CLIP(gtemp[ti * TS + tj]) ];
+                            float b = tonecurve[ CLIP(btemp[ti * TS + tj]) ];
 
                             int y = CLIP<int> (lumimulf[0] * Color::gamma2curve[rtemp[ti * TS + tj]] + lumimulf[1] * Color::gamma2curve[gtemp[ti * TS + tj]] + lumimulf[2] * Color::gamma2curve[btemp[ti * TS + tj]]);
                             histToneCurveThr[y >> histToneCurveCompression]++;
+
+                            setUnlessOOG(rtemp[ti * TS + tj], r);
+                            setUnlessOOG(gtemp[ti * TS + tj], g);
+                            setUnlessOOG(btemp[ti * TS + tj], b);
                         }
                     }
                 } else {
+                    vfloat tmpr;
+                    vfloat tmpg;
+                    vfloat tmpb;
+                    
                     for (int i = istart, ti = 0; i < tH; i++, ti++) {
                         int j = jstart, tj = 0;
 #ifdef __SSE2__
                         for (; j < tW - 3; j+=4, tj+=4) {
                             //brightness/contrast
-                            STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj])));
-                            STVF(gtemp[ti * TS + tj], tonecurve(LVF(gtemp[ti * TS + tj])));
-                            STVF(btemp[ti * TS + tj], tonecurve(LVF(btemp[ti * TS + tj])));
+                            STVF(tmpr[0], tonecurve(LVF(rtemp[ti * TS + tj])));
+                            STVF(tmpg[0], tonecurve(LVF(gtemp[ti * TS + tj])));
+                            STVF(tmpb[0], tonecurve(LVF(btemp[ti * TS + tj])));
+                            for (int k = 0; k < 4; ++k) {
+                                setUnlessOOG(rtemp[ti * TS + tj + k], tmpr[k]);
+                                setUnlessOOG(gtemp[ti * TS + tj + k], tmpg[k]);
+                                setUnlessOOG(btemp[ti * TS + tj + k], tmpb[k]);
+                            }
                         }
 #endif
                         for (; j < tW; j++, tj++) {
                             //brightness/contrast
-                            rtemp[ti * TS + tj] = tonecurve[rtemp[ti * TS + tj]];
-                            gtemp[ti * TS + tj] = tonecurve[gtemp[ti * TS + tj]];
-                            btemp[ti * TS + tj] = tonecurve[btemp[ti * TS + tj]];
+                            setUnlessOOG(rtemp[ti * TS + tj], tonecurve[rtemp[ti * TS + tj]]);
+                            setUnlessOOG(gtemp[ti * TS + tj], tonecurve[gtemp[ti * TS + tj]]);
+                            setUnlessOOG(btemp[ti * TS + tj], tonecurve[btemp[ti * TS + tj]]);
                         }
                     }
                 }
@@ -3838,17 +3863,17 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             for (int j = jstart, tj = 0; j < tW; j++, tj++) {
                                 // individual R tone curve
                                 if (rCurve) {
-                                    rtemp[ti * TS + tj] = rCurve[ rtemp[ti * TS + tj] ];
+                                    setUnlessOOG(rtemp[ti * TS + tj], rCurve[ rtemp[ti * TS + tj] ]);
                                 }
 
                                 // individual G tone curve
                                 if (gCurve) {
-                                    gtemp[ti * TS + tj] = gCurve[ gtemp[ti * TS + tj] ];
+                                    setUnlessOOG(gtemp[ti * TS + tj], gCurve[ gtemp[ti * TS + tj] ]);
                                 }
 
                                 // individual B tone curve
                                 if (bCurve) {
-                                    btemp[ti * TS + tj] = bCurve[ btemp[ti * TS + tj] ];
+                                    setUnlessOOG(btemp[ti * TS + tj], bCurve[ btemp[ti * TS + tj] ]);
                                 }
                             }
                         }
@@ -3915,18 +3940,22 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                                     bool neg = false;
                                     bool more_rgb = false;
                                     //gamut control : Lab values are in gamut
-                                    Color::gamutLchonly (HH, sincosval, Lpro, Chpro, rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], wip, highlight, 0.15f, 0.96f, neg, more_rgb);
+                                    Color::gamutLchonly (HH, sincosval, Lpro, Chpro, r, g, b, wip, highlight, 0.15f, 0.96f, neg, more_rgb);
 #else
                                     //gamut control : Lab values are in gamut
-                                    Color::gamutLchonly (HH, sincosval, Lpro, Chpro, rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], wip, highlight, 0.15f, 0.96f);
+                                    Color::gamutLchonly (HH, sincosval, Lpro, Chpro, r, g, b, wip, highlight, 0.15f, 0.96f);
 #endif
                                     //end of gamut control
                                 } else {
                                     float x_, y_, z_;
                                     //calculate RGB with L_2 and old value of a and b
                                     Color::Lab2XYZ (L_2, a_1, b_1, x_, y_, z_) ;
-                                    Color::xyz2rgb (x_, y_, z_, rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], wip);
+                                    Color::xyz2rgb (x_, y_, z_, r, g, b, wip);
                                 }
+
+                                setUnlessOOG(rtemp[ti * TS + tj], r);
+                                setUnlessOOG(gtemp[ti * TS + tj], g);
+                                setUnlessOOG(btemp[ti * TS + tj], b);
                             }
                         }
                     }
@@ -4077,9 +4106,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                                     bo *= preserv;
                                 }
 
-                                rtemp[ti * TS + tj] = CLIP(ro);
-                                gtemp[ti * TS + tj] = CLIP(go);
-                                btemp[ti * TS + tj] = CLIP(bo);
+                                setUnlessOOG(rtemp[ti * TS + tj], CLIP(ro));
+                                setUnlessOOG(gtemp[ti * TS + tj], CLIP(go));
+                                setUnlessOOG(btemp[ti * TS + tj], CLIP(bo));
                             }
                         }
                     }
@@ -4133,9 +4162,11 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                                     float b = btemp[ti * TS + tj];
                                     float ro, go, bo;
                                     labtoning (r, g, b, ro, go, bo, algm, metchrom, twoc, satLimit, satLimitOpacity, ctColorCurve, ctOpacityCurve, clToningcurve, cl2Toningcurve, iplow, iphigh, wp, wip);
-                                    rtemp[ti * TS + tj] = CLIP (ro); //I used CLIP because there is a little bug in gamutLchonly that return 65536.ii instead of 65535 ==> crash
-                                    gtemp[ti * TS + tj] = CLIP (go);
-                                    btemp[ti * TS + tj] = CLIP (bo);
+                                    if (!OOG(rtemp[ti * TS + tj]) || !OOG(gtemp[ti * TS + tj]) || !OOG(btemp[ti * TS + tj])) {
+                                        rtemp[ti * TS + tj] = ro;
+                                        gtemp[ti * TS + tj] = go;
+                                        btemp[ti * TS + tj] = bo;
+                                    }
                                 }
                             }
                         }
@@ -4217,9 +4248,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             for (int i = istart, ti = 0; i < tH; i++, ti++) {
                                 for (int j = jstart, tj = 0; j < tW; j++, tj++) {
                                     const SatAndValueBlendingToneCurve& userToneCurvebw = static_cast<const SatAndValueBlendingToneCurve&> (customToneCurvebw1);
-                                    rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
-                                    gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
-                                    btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
+                                    // rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
+                                    // gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
+                                    // btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
                                     userToneCurvebw.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
                                 }
                             }
@@ -4227,9 +4258,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             for (int i = istart, ti = 0; i < tH; i++, ti++) {
                                 for (int j = jstart, tj = 0; j < tW; j++, tj++) {
                                     const WeightedStdToneCurve& userToneCurvebw = static_cast<const WeightedStdToneCurve&> (customToneCurvebw1);
-                                    rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
-                                    gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
-                                    btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
+                                    // rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
+                                    // gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
+                                    // btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
 
                                     userToneCurvebw.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
                                 }
@@ -4382,28 +4413,32 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                                 Color::rgbxyz (sourceR, sourceG, sourceB, x, y, z, v_work2xyz);
                                 Color::xyz2rgb (x, y, z, sourceR, sourceG, sourceB, v_xyz2clut);
 
-                                STVF (rtemp[ti * TS + tj], sourceR);
-                                STVF (gtemp[ti * TS + tj], sourceG);
-                                STVF (btemp[ti * TS + tj], sourceB);
+                                STVF (clutr[tj], sourceR);
+                                STVF (clutg[tj], sourceG);
+                                STVF (clutb[tj], sourceB);
                             }
 
 #endif
 
                             for (; j < tW; j++, tj++) {
-                                float &sourceR = rtemp[ti * TS + tj];
-                                float &sourceG = gtemp[ti * TS + tj];
-                                float &sourceB = btemp[ti * TS + tj];
+                                float sourceR = rtemp[ti * TS + tj];
+                                float sourceG = gtemp[ti * TS + tj];
+                                float sourceB = btemp[ti * TS + tj];
 
                                 float x, y, z;
                                 Color::rgbxyz ( sourceR, sourceG, sourceB, x, y, z, wprof );
-                                Color::xyz2rgb (x, y, z, sourceR, sourceG, sourceB, xyz2clut);
+                                Color::xyz2rgb (x, y, z, clutr[tj], clutg[tj], clutb[tj], xyz2clut);
                             }
+                        } else {
+                            memcpy(clutr, &rtemp[ti * TS], sizeof(float) * TS);
+                            memcpy(clutg, &gtemp[ti * TS], sizeof(float) * TS);
+                            memcpy(clutb, &btemp[ti * TS], sizeof(float) * TS);
                         }
 
                         for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                            float &sourceR = rtemp[ti * TS + tj];
-                            float &sourceG = gtemp[ti * TS + tj];
-                            float &sourceB = btemp[ti * TS + tj];
+                            float &sourceR = clutr[tj];
+                            float &sourceG = clutg[tj];
+                            float &sourceB = clutb[tj];
 
                             // Apply gamma sRGB (default RT)
                             sourceR = Color::gamma_srgbclipped (sourceR);
@@ -4411,20 +4446,19 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             sourceB = Color::gamma_srgbclipped (sourceB);
                         }
 
-                        const std::size_t line_offset = ti * TS;
                         hald_clut->getRGB (
                             film_simulation_strength,
                             std::min (TS, tW - jstart),
-                            rtemp + line_offset,
-                            gtemp + line_offset,
-                            btemp + line_offset,
+                            clutr,
+                            clutg,
+                            clutb,
                             out_rgbx
                         );
 
                         for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                            float &sourceR = rtemp[ti * TS + tj];
-                            float &sourceG = gtemp[ti * TS + tj];
-                            float &sourceB = btemp[ti * TS + tj];
+                            float &sourceR = clutr[tj];
+                            float &sourceG = clutg[tj];
+                            float &sourceB = clutb[tj];
 
                             // Apply inverse gamma sRGB
                             sourceR = Color::igamma_srgb (out_rgbx[tj * 4 + 0]);
@@ -4440,9 +4474,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
 #ifdef __SSE2__
 
                             for (; j < tW - 3; j += 4, tj += 4) {
-                                vfloat sourceR = LVF (rtemp[ti * TS + tj]);
-                                vfloat sourceG = LVF (gtemp[ti * TS + tj]);
-                                vfloat sourceB = LVF (btemp[ti * TS + tj]);
+                                vfloat sourceR = LVF (clutr[tj]);
+                                vfloat sourceG = LVF (clutg[tj]);
+                                vfloat sourceB = LVF (clutb[tj]);
 
                                 vfloat x;
                                 vfloat y;
@@ -4450,23 +4484,31 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                                 Color::rgbxyz (sourceR, sourceG, sourceB, x, y, z, v_clut2xyz);
                                 Color::xyz2rgb (x, y, z, sourceR, sourceG, sourceB, v_xyz2work);
 
-                                STVF (rtemp[ti * TS + tj], sourceR);
-                                STVF (gtemp[ti * TS + tj], sourceG);
-                                STVF (btemp[ti * TS + tj], sourceB);
+                                STVF (clutr[tj], sourceR);
+                                STVF (clutg[tj], sourceG);
+                                STVF (clutb[tj], sourceB);
                             }
 
 #endif
 
                             for (; j < tW; j++, tj++) {
-                                float &sourceR = rtemp[ti * TS + tj];
-                                float &sourceG = gtemp[ti * TS + tj];
-                                float &sourceB = btemp[ti * TS + tj];
+                                float &sourceR = clutr[tj];
+                                float &sourceG = clutg[tj];
+                                float &sourceB = clutb[tj];
 
                                 float x, y, z;
                                 Color::rgbxyz (sourceR, sourceG, sourceB, x, y, z, clut2xyz);
                                 Color::xyz2rgb ( x, y, z, sourceR, sourceG, sourceB, wiprof );
                             }
                         }
+
+                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                            if (!OOG(rtemp[ti * TS + tj]) || !OOG(gtemp[ti * TS + tj]) || !OOG(btemp[ti * TS + tj])) {
+                                rtemp[ti * TS + tj] = clutr[tj];
+                                gtemp[ti * TS + tj] = clutg[tj];
+                                btemp[ti * TS + tj] = clutb[tj];
+                            }
+                        }
                     }
                 }
 
@@ -4594,7 +4636,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                 for (int j = 0; j < tW; j++) {
 
                     //mix channel
-                    tmpImage->r (i, j) = tmpImage->g (i, j) = tmpImage->b (i, j) = CLIP ((bwr * tmpImage->r (i, j) + bwg * tmpImage->g (i, j) + bwb * tmpImage->b (i, j)) * kcorec);
+                    tmpImage->r (i, j) = tmpImage->g (i, j) = tmpImage->b (i, j) = /*CLIP*/ ((bwr * tmpImage->r (i, j) + bwg * tmpImage->g (i, j) + bwb * tmpImage->b (i, j)) * kcorec);
 
 #ifndef __SSE2__
 
@@ -4651,9 +4693,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                     for (int j = 0; j < tW; j++) {
                         const WeightedStdToneCurve& userToneCurve = static_cast<const WeightedStdToneCurve&> (customToneCurvebw2);
 
-                        tmpImage->r (i, j) = CLIP<float> (tmpImage->r (i, j));
-                        tmpImage->g (i, j) = CLIP<float> (tmpImage->g (i, j));
-                        tmpImage->b (i, j) = CLIP<float> (tmpImage->b (i, j));
+                        // tmpImage->r (i, j) = CLIP<float> (tmpImage->r (i, j));
+                        // tmpImage->g (i, j) = CLIP<float> (tmpImage->g (i, j));
+                        // tmpImage->b (i, j) = CLIP<float> (tmpImage->b (i, j));
 
                         userToneCurve.Apply (tmpImage->r (i, j), tmpImage->g (i, j), tmpImage->b (i, j));
                     }
@@ -4690,9 +4732,9 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                                 bo *= preserv;
                             }
 
-                            tmpImage->r(i, j) = CLIP(ro);
-                            tmpImage->g(i, j) = CLIP(go);
-                            tmpImage->b(i, j) = CLIP(bo);
+                            tmpImage->r(i, j) = /*CLIP*/(ro);
+                            tmpImage->g(i, j) = /*CLIP*/(go);
+                            tmpImage->b(i, j) = /*CLIP*/(bo);
                         }
                     }
                 }
@@ -4799,9 +4841,11 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             float b = tmpImage->b (i, j);
                             float ro, bo, go;
                             labtoning (r, g, b, ro, go, bo, algm, metchrom,  twoc, satLimit, satLimitOpacity, ctColorCurve,  ctOpacityCurve, clToningcurve, cl2Toningcurve,  iplow, iphigh,  wp,  wip);
-                            tmpImage->r (i, j) = CLIP (ro);
-                            tmpImage->g (i, j) = CLIP (go);
-                            tmpImage->b (i, j) = CLIP (bo);
+                            if (!OOG(tmpImage->r(i, j)) || !OOG(tmpImage->g(i, j)) || !OOG(tmpImage->b(i, j))) {
+                                tmpImage->r (i, j) = ro;
+                                tmpImage->g (i, j) = go;
+                                tmpImage->b (i, j) = bo;
+                            }
 
                         }
                     }
@@ -5045,9 +5089,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             r += corr;
         }
 
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     {
@@ -5059,9 +5103,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             g += corr;
         }
 
-        r = CLIP(r);
-        b = CLIP(b);
-        g = CLIP(g);
+        // r = CLIP(r);
+        // b = CLIP(b);
+        // g = CLIP(g);
     }
 
 
@@ -5075,9 +5119,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             b += corr;
         }
 
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     // mid tones
@@ -5108,9 +5152,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             g -= 20000.f * RedM;
             b -= 20000.f * RedM;
         }
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     {
@@ -5125,9 +5169,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             g += 10000.f * GreenM;
             b -= 20000.f * GreenM;
         }
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     {
@@ -5142,9 +5186,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             g -= 20000.f * BlueM;
             b += 10000.f * BlueM;
         }
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     //high tones
@@ -5169,9 +5213,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             b -= corr;
         }
 
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     {
@@ -5184,9 +5228,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             b -= corr;
         }
 
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     {
@@ -5199,9 +5243,9 @@ void ImProcFunctions::toningsmh(float r, float g, float b, float &ro, float &go,
             g -= corr;
         }
 
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     ro = r;
@@ -5258,24 +5302,24 @@ void ImProcFunctions::toning2col (float r, float g, float b, float &ro, float &g
             b -= factor * krl;
         }
 
-        g = CLIP(g);
-        b = CLIP(b);
+        // g = CLIP(g);
+        // b = CLIP(b);
 
         if (kgl > 0.f) {
             r -= factor * kgl;
             b -= factor * kgl;
         }
 
-        r = CLIP(r);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // b = CLIP(b);
 
         if (kbl > 0.f) {
             r -= factor * kbl;
             g -= factor * kbl;
         }
 
-        r = CLIP(r);
-        g = CLIP(g);
+        // r = CLIP(r);
+        // g = CLIP(g);
     }
 
     //high tones
@@ -5302,9 +5346,9 @@ void ImProcFunctions::toning2col (float r, float g, float b, float &ro, float &g
         g += factor * (kgh > 0.f ? kgh : 0.f);
         b += factor * (kbh > 0.f ? kbh : 0.f);
 
-        r = CLIP(r);
-        g = CLIP(g);
-        b = CLIP(b);
+        // r = CLIP(r);
+        // g = CLIP(g);
+        // b = CLIP(b);
     }
 
     float preserv = 1.f;
@@ -5313,9 +5357,9 @@ void ImProcFunctions::toning2col (float r, float g, float b, float &ro, float &g
         preserv = lumbefore / lumafter;
     }
 
-    ro = CLIP(r * preserv);
-    go = CLIP(g * preserv);
-    bo = CLIP(b * preserv);
+    setUnlessOOG(ro, CLIP(r * preserv));
+    setUnlessOOG(go, CLIP(g * preserv));
+    setUnlessOOG(bo, CLIP(b * preserv));
 }
 
 /**
@@ -5332,9 +5376,13 @@ void ImProcFunctions::toning2col (float r, float g, float b, float &ro, float &g
 **/
 void ImProcFunctions::labtoning (float r, float g, float b, float &ro, float &go, float &bo, int algm, int metchrom, int twoc, float satLimit, float satLimitOpacity, const ColorGradientCurve & ctColorCurve, const OpacityCurve & ctOpacityCurve, LUTf & clToningcurve, LUTf & cl2Toningcurve, float iplow, float iphigh, double wp[3][3], double wip[3][3]  )
 {
+    ro = CLIP(r);
+    go = CLIP(g);
+    bo = CLIP(b);
+    
     float realL;
     float h, s, l;
-    Color::rgb2hsl (r, g, b, h, s, l);
+    Color::rgb2hsl (ro, go, bo, h, s, l);
     float x2, y2, z2;
     float xl, yl, zl;
 
diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc
index a8e35bd18..fe8590470 100644
--- a/rtengine/iplab2rgb.cc
+++ b/rtengine/iplab2rgb.cc
@@ -30,8 +30,66 @@
 namespace rtengine
 {
 
+extern void filmlike_clip(float *r, float *g, float *b);
+
+namespace {
+
+inline void clipLAB(float iL, float ia, float ib, float &oL, float &oa, float &ob, const float scale, const float wp[3][3], const float wip[3][3])
+{
+    if (iL < 0.f) {
+        oL = oa = ob = 0.f;
+    } else if (iL > 32768.f) {
+        
+        float X, Y, Z;
+        float r, g, b;
+        Color::Lab2XYZ(iL, ia, ib, X, Y, Z);
+        Color::xyz2rgb(X, Y, Z, r, g, b, wip);
+        filmlike_clip(&r, &g, &b);
+        Color::rgbxyz(r, g, b, X, Y, Z, wp);
+        Color::XYZ2Lab(X, Y, Z, oL, oa, ob);
+        oL /= scale;
+        oa /= scale;
+        ob /= scale;
+        
+        // oL = 32768.f / scale;
+        // oa = ob = 0.f;
+    } else {
+        oL = iL / scale;
+        oa = ia / scale;
+        ob = ib / scale;
+    }
+}
+
+
+inline void clipLAB(float iL, float ia, float ib, double &oL, double &oa, double &ob, const float scale, const float wp[3][3], const float wip[3][3])
+{
+    float tL, ta, tb;
+    clipLAB(iL, ia, ib, tL, ta, tb, scale, wp, wip);
+    oL = tL;
+    oa = ta;
+    ob = tb;
+}
+
+} // namespace
+
 extern const Settings* settings;
 
+#define DECLARE_WORKING_MATRICES_(space) \
+    TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix ( space ); \
+    const float wp[3][3] = {                                            \
+        {static_cast<float> (wprof[0][0]), static_cast<float> (wprof[0][1]), static_cast<float> (wprof[0][2])}, \
+        {static_cast<float> (wprof[1][0]), static_cast<float> (wprof[1][1]), static_cast<float> (wprof[1][2])}, \
+        {static_cast<float> (wprof[2][0]), static_cast<float> (wprof[2][1]), static_cast<float> (wprof[2][2])} \
+    };                                                                  \
+                                                                        \
+    TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix ( space ); \
+    const float wip[3][3] = {                                           \
+        {static_cast<float> (wiprof[0][0]), static_cast<float> (wiprof[0][1]), static_cast<float> (wiprof[0][2])}, \
+        {static_cast<float> (wiprof[1][0]), static_cast<float> (wiprof[1][1]), static_cast<float> (wiprof[1][2])}, \
+        {static_cast<float> (wiprof[2][0]), static_cast<float> (wiprof[2][1]), static_cast<float> (wiprof[2][2])} \
+    }
+    
+
 // Used in ImProcCoordinator::updatePreviewImage  (rtengine/improccoordinator.cc)
 //         Crop::update                           (rtengine/dcrop.cc)
 //         Thumbnail::processImage                (rtengine/rtthumbnail.cc)
@@ -40,6 +98,8 @@ extern const Settings* settings;
 // otherwise divide by 327.68, convert to xyz and apply the sRGB transform, before converting with gamma2curve
 void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image)
 {
+    DECLARE_WORKING_MATRICES_(params->icm.working);
+    
     if (monitorTransform) {
 
         int W = lab->W;
@@ -76,9 +136,8 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image)
                 float* rb = lab->b[i];
 
                 for (int j = 0; j < W; j++) {
-                    buffer[iy++] = rL[j] / 327.68f;
-                    buffer[iy++] = ra[j] / 327.68f;
-                    buffer[iy++] = rb[j] / 327.68f;
+                    clipLAB(rL[j], ra[j], rb[j], buffer[iy], buffer[iy+1], buffer[iy+2], 327.68f, wp, wip);
+                    iy += 3;
                 }
 
                 cmsDoTransform (monitorTransform, buffer, data + ix, W);
@@ -106,12 +165,14 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image)
 
             float R, G, B;
             float x_, y_, z_;
+            float L, a, b;
 
             for (int j = 0; j < W; ++j) {
 
                 //float L1=rL[j],a1=ra[j],b1=rb[j];//for testing
+                clipLAB(rL[j], ra[j], rb[j], L, a, b, 1.f, wp, wip);
 
-                Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ );
+                Color::Lab2XYZ(L, a, b, x_, y_, z_ );
 
                 Color::xyz2srgb(x_, y_, z_, R, G, B);
 
@@ -136,6 +197,8 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image)
 // otherwise divide by 327.68, convert to xyz and apply the RGB transform, before converting with gamma2curve
 Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings)
 {
+    DECLARE_WORKING_MATRICES_(icm.working);
+    
     //gamutmap(lab);
 
     if (cx < 0) {
@@ -212,9 +275,8 @@ Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch,
                 float* rb = lab->b[i];
 
                 for (int j = cx; j < cx + cw; j++) {
-                    buffer[iy++] = rL[j] / 327.68f;
-                    buffer[iy++] = ra[j] / 327.68f;
-                    buffer[iy++] = rb[j] / 327.68f;
+                    clipLAB(rL[j], ra[j], rb[j], buffer[iy], buffer[iy+1], buffer[iy+2], 327.68f, wp, wip);
+                    iy += 3;
                 }
 
                 cmsDoTransform (hTransform, buffer, data + ix, cw);
@@ -242,8 +304,10 @@ Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch,
 
             float R, G, B;
             float x_, y_, z_;
+            float L, a, b;
 
             for (int j = cx; j < cx + cw; ++j) {
+                clipLAB(rL[j], ra[j], rb[j], L, a, b, 1.f, wp, wip);
                 Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_);
 
                 Color::xyz2rgb(x_, y_, z_, R, G, B, xyz_rgb);
diff --git a/rtengine/ipresize.cc b/rtengine/ipresize.cc
index 03502e99b..f9ff94ef9 100644
--- a/rtengine/ipresize.cc
+++ b/rtengine/ipresize.cc
@@ -162,9 +162,9 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca
                     b += wh[k] * lb[jj];
                 }
 
-                dst->r (i, j) = CLIP (r);//static_cast<int> (r));
-                dst->g (i, j) = CLIP (g);//static_cast<int> (g));
-                dst->b (i, j) = CLIP (b);//static_cast<int> (b));
+                dst->r (i, j) = /*CLIP*/ (r);//static_cast<int> (r));
+                dst->g (i, j) = /*CLIP*/ (g);//static_cast<int> (g));
+                dst->b (i, j) = /*CLIP*/ (b);//static_cast<int> (b));
             }
         }
 
diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc
index 077974244..a89cfc1bf 100644
--- a/rtengine/rawimagesource.cc
+++ b/rtengine/rawimagesource.cc
@@ -154,7 +154,7 @@ void transLineFuji (const float* const red, const float* const green, const floa
     }
 }
 
-void transLineD1x (const float* const red, const float* const green, const float* const blue, const int i, rtengine::Imagefloat* const image, const int tran, const int imwidth, const int imheight, const bool oddHeight, const bool clip)
+void transLineD1x (const float* const red, const float* const green, const float* const blue, const int i, rtengine::Imagefloat* const image, const int tran, const int imwidth, const int imheight, const bool oddHeight)
 {
     // Nikon D1X has an uncommon sensor with 4028 x 1324 sensels.
     // Vertical sensel size is 2x horizontal sensel size
@@ -223,12 +223,6 @@ void transLineD1x (const float* const red, const float* const green, const float
                     image->r(row, col) = MAX(0.f, -0.0625f * (red[j] + image->r(row + 3, col)) + 0.5625f * (image->r(row - 1, col) + image->r(row + 1, col)));
                     image->g(row, col) = MAX(0.f, -0.0625f * (green[j] + image->g(row + 3, col)) + 0.5625f * (image->g(row - 1, col) + image->g(row + 1, col)));
                     image->b(row, col) = MAX(0.f, -0.0625f * (blue[j] + image->b(row + 3, col)) + 0.5625f * (image->b(row - 1, col) + image->b(row + 1, col)));
-
-                    if(clip) {
-                        image->r(row, col) = MIN(image->r(row, col), rtengine::MAXVALF);
-                        image->g(row, col) = MIN(image->g(row, col), rtengine::MAXVALF);
-                        image->b(row, col) = MIN(image->b(row, col), rtengine::MAXVALF);
-                    }
                 }
             }
 
@@ -286,12 +280,6 @@ void transLineD1x (const float* const red, const float* const green, const float
                     image->r(j, col) = MAX(0.f, -0.0625f * (red[j] + image->r(j, col + 3)) + 0.5625f * (image->r(j, col - 1) + image->r(j, col + 1)));
                     image->g(j, col) = MAX(0.f, -0.0625f * (green[j] + image->g(j, col + 3)) + 0.5625f * (image->g(j, col - 1) + image->g(j, col + 1)));
                     image->b(j, col) = MAX(0.f, -0.0625f * (blue[j] + image->b(j, col + 3)) + 0.5625f * (image->b(j, col - 1) + image->b(j, col + 1)));
-
-                    if(clip) {
-                        image->r(j, col) = MIN(image->r(j, col), rtengine::MAXVALF);
-                        image->g(j, col) = MIN(image->g(j, col), rtengine::MAXVALF);
-                        image->b(j, col) = MIN(image->b(j, col), rtengine::MAXVALF);
-                    }
                 }
             }
 
@@ -319,12 +307,6 @@ void transLineD1x (const float* const red, const float* const green, const float
                     image->g(row, 2 * i - 3) = MAX(0.f, -0.0625f * (green[j] + image->g(row, 2 * i - 6)) + 0.5625f * (image->g(row, 2 * i - 2) + image->g(row, 2 * i - 4)));
                     image->b(row, 2 * i - 3) = MAX(0.f, -0.0625f * (blue[j] + image->b(row, 2 * i - 6)) + 0.5625f * (image->b(row, 2 * i - 2) + image->b(row, 2 * i - 4)));
 
-                    if(clip) {
-                        image->r(row, 2 * i - 3) = MIN(image->r(row, 2 * i - 3), rtengine::MAXVALF);
-                        image->g(row, 2 * i - 3) = MIN(image->g(row, 2 * i - 3), rtengine::MAXVALF);
-                        image->b(row, 2 * i - 3) = MIN(image->b(row, 2 * i - 3), rtengine::MAXVALF);
-                    }
-
                     image->r(row, 2 * i) = red[j];
                     image->g(row, 2 * i) = green[j];
                     image->b(row, 2 * i) = blue[j];
@@ -337,12 +319,6 @@ void transLineD1x (const float* const red, const float* const green, const float
                     image->g(row, 2 * i - 1) = MAX(0.f, -0.0625f * (green[j] + image->g(row, 2 * i - 4)) + 0.5625f * (image->g(row, 2 * i) + image->g(row, 2 * i - 2)));
                     image->b(row, 2 * i - 1) = MAX(0.f, -0.0625f * (blue[j] + image->b(row, 2 * i - 4)) + 0.5625f * (image->b(row, 2 * i) + image->b(row, 2 * i - 2)));
 
-                    if(clip) {
-                        image->r(j, 2 * i - 1) = MIN(image->r(j, 2 * i - 1), rtengine::MAXVALF);
-                        image->g(j, 2 * i - 1) = MIN(image->g(j, 2 * i - 1), rtengine::MAXVALF);
-                        image->b(j, 2 * i - 1) = MIN(image->b(j, 2 * i - 1), rtengine::MAXVALF);
-                    }
-
                     image->r(row, 2 * i + 1) = (red[j] + image->r(row, 2 * i - 1)) / 2;
                     image->g(row, 2 * i + 1) = (green[j] + image->g(row, 2 * i - 1)) / 2;
                     image->b(row, 2 * i + 1) = (blue[j] + image->b(row, 2 * i - 1)) / 2;
@@ -374,12 +350,6 @@ void transLineD1x (const float* const red, const float* const green, const float
                     image->r(2 * i - 3, j) = MAX(0.f, -0.0625f * (red[j] + image->r(2 * i - 6, j)) + 0.5625f * (image->r(2 * i - 2, j) + image->r(2 * i - 4, j)));
                     image->g(2 * i - 3, j) = MAX(0.f, -0.0625f * (green[j] + image->g(2 * i - 6, j)) + 0.5625f * (image->g(2 * i - 2, j) + image->g(2 * i - 4, j)));
                     image->b(2 * i - 3, j) = MAX(0.f, -0.0625f * (blue[j] + image->b(2 * i - 6, j)) + 0.5625f * (image->b(2 * i - 2, j) + image->b(2 * i - 4, j)));
-
-                    if(clip) {
-                        image->r(2 * i - 3, j) = MIN(image->r(2 * i - 3, j), rtengine::MAXVALF);
-                        image->g(2 * i - 3, j) = MIN(image->g(2 * i - 3, j), rtengine::MAXVALF);
-                        image->b(2 * i - 3, j) = MIN(image->b(2 * i - 3, j), rtengine::MAXVALF);
-                    }
                 }
             }
 
@@ -389,12 +359,6 @@ void transLineD1x (const float* const red, const float* const green, const float
                     image->g(2 * i - 1, j) = MAX(0.f, -0.0625f * (green[j] + image->g(2 * i - 4, j)) + 0.5625f * (image->g(2 * i, j) + image->g(2 * i - 2, j)));
                     image->b(2 * i - 1, j) = MAX(0.f, -0.0625f * (blue[j] + image->b(2 * i - 4, j)) + 0.5625f * (image->b(2 * i, j) + image->b(2 * i - 2, j)));
 
-                    if(clip) {
-                        image->r(2 * i - 1, j) = MIN(image->r(2 * i - 1, j), rtengine::MAXVALF);
-                        image->g(2 * i - 1, j) = MIN(image->g(2 * i - 1, j), rtengine::MAXVALF);
-                        image->b(2 * i - 1, j) = MIN(image->b(2 * i - 1, j), rtengine::MAXVALF);
-                    }
-
                     image->r(2 * i + 1, j) = (red[j] + image->r(2 * i - 1, j)) / 2;
                     image->g(2 * i + 1, j) = (green[j] + image->g(2 * i - 1, j)) / 2;
                     image->b(2 * i + 1, j) = (blue[j] + image->b(2 * i - 1, j)) / 2;
@@ -724,8 +688,6 @@ void RawImageSource::getImage (const ColorTemp &ctemp, int tran, Imagefloat* ima
     hlmax[1] = clmax[1] * gm;
     hlmax[2] = clmax[2] * bm;
 
-    const bool doClip = (chmax[0] >= clmax[0] || chmax[1] >= clmax[1] || chmax[2] >= clmax[2]) && !hrp.hrenabled;
-
     float area = skip * skip;
     rm /= area;
     gm /= area;
@@ -768,17 +730,6 @@ void RawImageSource::getImage (const ColorTemp &ctemp, int tran, Imagefloat* ima
                     gtot *= gm;
                     btot *= bm;
 
-                    if (doClip) {
-                        // note: as hlmax[] can be larger than CLIP and we can later apply negative
-                        // exposure this means that we can clip away local highlights which actually
-                        // are not clipped. We have to do that though as we only check pixel by pixel
-                        // and don't know if this will transition into a clipped area, if so we need
-                        // to clip also surrounding to make a good colour transition
-                        rtot = CLIP(rtot);
-                        gtot = CLIP(gtot);
-                        btot = CLIP(btot);
-                    }
-
                     line_red[j] = rtot;
                     line_grn[j] = gtot;
                     line_blue[j] = btot;
@@ -803,12 +754,6 @@ void RawImageSource::getImage (const ColorTemp &ctemp, int tran, Imagefloat* ima
                     gtot *= gm;
                     btot *= bm;
 
-                    if (doClip) {
-                        rtot = CLIP(rtot);
-                        gtot = CLIP(gtot);
-                        btot = CLIP(btot);
-                    }
-
                     line_red[j] = rtot;
                     line_grn[j] = gtot;
                     line_blue[j] = btot;
@@ -822,7 +767,7 @@ void RawImageSource::getImage (const ColorTemp &ctemp, int tran, Imagefloat* ima
             }
 
             if(d1x) {
-                transLineD1x (line_red, line_grn, line_blue, ix, image, tran, imwidth, imheight, d1xHeightOdd, doClip);
+                transLineD1x (line_red, line_grn, line_blue, ix, image, tran, imwidth, imheight, d1xHeightOdd);
             } else if(fuji) {
                 transLineFuji (line_red, line_grn, line_blue, ix, image, tran, imheight, fw);
             } else {
@@ -3931,9 +3876,9 @@ lab2ProphotoRgbD50(float L, float A, float B, float& r, float& g, float& b)
     r = prophoto_xyz[0][0] * X + prophoto_xyz[0][1] * Y + prophoto_xyz[0][2] * Z;
     g = prophoto_xyz[1][0] * X + prophoto_xyz[1][1] * Y + prophoto_xyz[1][2] * Z;
     b = prophoto_xyz[2][0] * X + prophoto_xyz[2][1] * Y + prophoto_xyz[2][2] * Z;
-    r = CLIP01(r);
-    g = CLIP01(g);
-    b = CLIP01(b);
+    // r = CLIP01(r);
+    // g = CLIP01(g);
+    // b = CLIP01(b);
 }
 
 // Converts raw image including ICC input profile to working space - floating point version
diff --git a/rtengine/rt_math.h b/rtengine/rt_math.h
index a1dc11d44..8a32f52b9 100644
--- a/rtengine/rt_math.h
+++ b/rtengine/rt_math.h
@@ -138,6 +138,20 @@ constexpr std::uint8_t uint16ToUint8Rounded(std::uint16_t i)
     return ((i + 128) - ((i + 128) >> 8)) >> 8;
 }
 
+template <typename T>
+constexpr bool OOG(const T &val, const T &high=T(MAXVAL))
+{
+    return (val < T(0)) || (val > high);
+}
+
+template <typename T>
+void setUnlessOOG(T &out, const T &val)
+{
+    if (!OOG(out)) {
+        out = val;
+    }
+}
+
 
 template <typename T>
 bool invertMatrix(const std::array<std::array<T, 3>, 3> &in, std::array<std::array<T, 3>, 3> &out)
@@ -165,6 +179,7 @@ bool invertMatrix(const std::array<std::array<T, 3>, 3> &in, std::array<std::arr
     return true;
 }
 
+
 template <typename T>
 std::array<std::array<T, 3>, 3> dotProduct(const std::array<std::array<T, 3>, 3> &a, const std::array<std::array<T, 3>, 3> &b)
 {
@@ -199,6 +214,5 @@ std::array<T, 3> dotProduct(const std::array<std::array<T, 3>, 3> &a, const std:
     return res;
 }
 
-
 }
 
diff --git a/rtengine/rtthumbnail.cc b/rtengine/rtthumbnail.cc
index 8b04a7be0..971e8cf2a 100644
--- a/rtengine/rtthumbnail.cc
+++ b/rtengine/rtthumbnail.cc
@@ -1131,11 +1131,11 @@ IImage8* Thumbnail::processImage (const procparams::ProcParams& params, eSensorT
 
         for (int j = 0; j < rwidth; j++) {
             float red = baseImg->r (i, j) * rmi;
-            baseImg->r (i, j) = CLIP (red);
+            baseImg->r (i, j) = /*CLIP*/ (red);
             float green = baseImg->g (i, j) * gmi;
-            baseImg->g (i, j) = CLIP (green);
+            baseImg->g (i, j) = /*CLIP*/ (green);
             float blue = baseImg->b (i, j) * bmi;
-            baseImg->b (i, j) = CLIP (blue);
+            baseImg->b (i, j) = /*CLIP*/ (blue);
 
         }
     }
@@ -1327,6 +1327,7 @@ IImage8* Thumbnail::processImage (const procparams::ProcParams& params, eSensorT
             }
     }
 
+    
     // luminance processing
 //  ipf.EPDToneMap(labView,0,6);
 
@@ -1396,7 +1397,7 @@ IImage8* Thumbnail::processImage (const procparams::ProcParams& params, eSensorT
         ipf.ciecam_02float (cieView, adap, 1, 2, labView, &params, customColCurve1, customColCurve2, customColCurve3, dummy, dummy, CAMBrightCurveJ, CAMBrightCurveQ, CAMMean, 5, sk, execsharp, d, dj, yb, rtt);
         delete cieView;
     }
-
+    
     // color processing
     //ipf.colorCurve (labView, labView);
 
diff --git a/rtengine/tmo_fattal02.cc b/rtengine/tmo_fattal02.cc
index 0a88958d1..c6faef2e7 100644
--- a/rtengine/tmo_fattal02.cc
+++ b/rtengine/tmo_fattal02.cc
@@ -1104,7 +1104,7 @@ void ImProcFunctions::ToneMapFattal02 (Imagefloat *rgb)
     }
 
     float oldMedian;
-    const float percentile = float(LIM(1, params->fattal.anchor, 100)) / 100.f;
+    const float percentile = float(LIM(params->fattal.anchor, 1, 100)) / 100.f;
     findMinMaxPercentile (Yr.data(), Yr.getRows() * Yr.getCols(), percentile, oldMedian, percentile, oldMedian, multiThread);
     // median filter on the deep shadows, to avoid boosting noise
     // because w2 >= w and h2 >= h, we can use the L buffer as temporary buffer for Median_Denoise()
@@ -1159,15 +1159,15 @@ void ImProcFunctions::ToneMapFattal02 (Imagefloat *rgb)
         for (int x = 0; x < w; x++) {
             int xx = x * wr + 1;
 
-            float Y = std::max(Yr (x, y), epsilon);
-            float l = std::max (L (xx, yy), epsilon) * (scale / Y);
-            rgb->r (y, x) = std::max (rgb->r (y, x), 0.f) * l;
-            rgb->g (y, x) = std::max (rgb->g (y, x), 0.f) * l;
-            rgb->b (y, x) = std::max (rgb->b (y, x), 0.f) * l;
+            float Y = std::max(Yr(x, y), epsilon);
+            float l = std::max(L(xx, yy), epsilon) * (scale / Y);
+            rgb->r(y, x) *= l;
+            rgb->g(y, x) *= l;
+            rgb->b(y, x) *= l;
 
-            assert (std::isfinite (rgb->r (y, x)));
-            assert (std::isfinite (rgb->g (y, x)));
-            assert (std::isfinite (rgb->b (y, x)));
+            assert(std::isfinite(rgb->r(y, x)));
+            assert(std::isfinite(rgb->g(y, x)));
+            assert(std::isfinite(rgb->b(y, x)));
         }
     }
 }
diff --git a/rtgui/editorpanel.cc b/rtgui/editorpanel.cc
index 278b14fe0..60dd25e7e 100644
--- a/rtgui/editorpanel.cc
+++ b/rtgui/editorpanel.cc
@@ -1978,6 +1978,7 @@ bool EditorPanel::saveImmediately (const Glib::ustring &filename, const SaveForm
 {
     rtengine::procparams::ProcParams pparams;
     ipc->getParams (&pparams);
+
     rtengine::ProcessingJob *job = rtengine::ProcessingJob::create (ipc->getInitialImage(), pparams);
 
     // save immediately
@@ -1985,7 +1986,9 @@ bool EditorPanel::saveImmediately (const Glib::ustring &filename, const SaveForm
 
     int err = 0;
 
-    if (sf.format == "tif") {
+    if (gimpPlugin) {
+        err = img->saveAsTIFF (filename, 32, true);
+    } else if (sf.format == "tif") {
         err = img->saveAsTIFF (filename, sf.tiffBits, sf.tiffUncompressed);
     } else if (sf.format == "png") {
         err = img->saveAsPNG (filename, sf.pngBits);
diff --git a/rtgui/main.cc b/rtgui/main.cc
index 55d27125c..80f055bbf 100644
--- a/rtgui/main.cc
+++ b/rtgui/main.cc
@@ -662,15 +662,8 @@ int main (int argc, char **argv)
             m.run (*rtWindow);
             gdk_threads_leave();
 
-            if (gimpPlugin &&
-                    rtWindow->epanel && rtWindow->epanel->isRealized()) {
-                SaveFormat sf;
-                sf.format = "tif";
-                sf.tiffBits = 16;
-                sf.tiffUncompressed = true;
-                sf.saveParams = true;
-
-                if (!rtWindow->epanel->saveImmediately (argv2, sf)) {
+            if (gimpPlugin && rtWindow->epanel && rtWindow->epanel->isRealized()) {
+                if (!rtWindow->epanel->saveImmediately(argv2, SaveFormat())) {
                     ret = -2;
                 }
             }