Calculate PerlinNoise faster with an optimization to grad()

A simple optimization to the gradient function makes the noise function twice as fast.

PerlinNoise.grad()

inline float grad(int hash, float x, float y, float z)
   {  
 //float u = (h < 8) ? x : y;
 //float v = (h < 4) ? y : ((h == 12 || h == 14) ? x : z);
 //return ((h & 1) == 0 ? u : -u) + ((h & 2) == 0 ? v : -v);

 switch(hash & 0xF)
 {
  case 0x0: return  x + y;
  case 0x1: return -x + y;
  case 0x2: return  x - y;
  case 0x3: return -x - y;
  case 0x4: return  x + z;
  case 0x5: return -x + z;
  case 0x6: return  x - z;
  case 0x7: return -x - z;
  case 0x8: return  y + z;
  case 0x9: return -y + z;
  case 0xA: return  y - z;
  case 0xB: return -y - z;
  case 0xC: return  y + x;
  case 0xD: return -y + z;
  case 0xE: return  y - x;
  case 0xF: return -y - z;
  default: return 0; // never happens
 }
   }

PerlinNoise.noise() (click code to expand)

int   perm[512];

   void initPerlinNoise()
   {
      int i, permutation[] = { 151, 160, 137, 91, 90, 15, 131, 13, 201, 95, 96, 53, 194, 233, 7, 225, 140, 36, 103, 30, 69, 142, 8, 99, 37, 240, 21, 10, 23, 190, 6, 148, 247, 120, 234, 75, 0, 26, 197, 62, 94, 252, 219, 203, 117, 35, 11, 32, 57, 177, 33, 88, 237, 149, 56, 87, 174, 20, 125, 136, 171, 168, 68, 175, 74, 165, 71, 134, 139, 48, 27, 166, 77, 146, 158, 231, 83, 111, 229, 122, 60, 211, 133, 230, 220, 105, 92, 41, 55, 46, 245, 40, 244, 102, 143, 54, 65, 25, 63, 161, 1, 216, 80, 73, 209, 76, 132, 187, 208, 89, 18, 169, 200, 196, 135, 130, 116, 188, 159, 86, 164, 100, 109, 198, 173, 186, 3, 64, 52, 217, 226, 250, 124, 123, 5, 202, 38, 147, 118, 126, 255, 82, 85, 212, 207, 206, 59, 227, 47, 16, 58, 17, 182, 189, 28, 42, 223, 183, 170, 213, 119, 248, 152, 2, 44, 154, 163, 70, 221, 153, 101, 155, 167, 43, 172, 9, 129, 22, 39, 253, 19, 98, 108, 110, 79, 113, 224, 232, 178, 185, 112, 104, 218, 246, 97, 228, 251, 34, 242, 193, 238, 210, 144, 12, 191, 179, 162, 241, 81, 51, 145, 235, 249,
         14, 239, 107, 49, 192, 214, 31, 181, 199, 106, 157, 184, 84, 204, 176, 115, 121, 50, 45, 127, 4, 150, 254, 138, 236, 205, 93, 222, 114, 67, 29, 24, 72, 243, 141, 128, 195, 78, 66, 215, 61, 156, 180 };

      for (i = 0; i < 256; i++)
         perm[256 + i] = perm[i] = permutation[i];
   }


   inline float fade(float t)
   {
      return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
   }

   inline float lerp(float t, float a, float b)
   {
      return a + t * (b - a);
   }

   inline float grad(int hash, float x, float y, float z)
   {  
 //float u = (h < 8) ? x : y;
 //float v = (h < 4) ? y : ((h == 12 || h == 14) ? x : z);
 //return ((h & 1) == 0 ? u : -u) + ((h & 2) == 0 ? v : -v);

 switch(hash & 0xF)
 {
  case 0x0: return  x + y;
  case 0x1: return -x + y;
  case 0x2: return  x - y;
  case 0x3: return -x - y;
  case 0x4: return  x + x;
  case 0x5: return -x + x;
  case 0x6: return  x - x;
  case 0x7: return -x - x;
  case 0x8: return  y + x;
  case 0x9: return -y + x;
  case 0xA: return  y - x;
  case 0xB: return -y - x;
  case 0xC: return  y + z;
  case 0xD: return -y + x;
  case 0xE: return  y - x;
  case 0xF: return -y - z;
  default: return 0; // never happens
 }
   }

   static inline float __fastcall noise(float x, float y, float z)
   {
      jint ix,iy,iz,gx,gy,gz;
      jint a0,b0,aa,ab,ba,bb;

      float aa0,ab0,ba0,bb0;
      float aa1,ab1,ba1,bb1;
      float a1,a2,a3,a4,a5,a6,a7,a8;
      float u,v,w,a8_5,a4_1;

      ix = (jint)x; x-=ix;
      iy = (jint)y; y-=iy;
      iz = (jint)z; z-=iz;

      gx = ix & 0xFF;
      gy = iy & 0xFF;
      gz = iz & 0xFF;
      
      a0 = gy+perm[gx];
      b0 = gy+perm[gx + 1];
      aa = gz+perm[a0];
      ab = gz+perm[a0 + 1];
      ba = gz+perm[b0];
      bb = gz+perm[b0 + 1];

      aa0 = perm[aa]; aa1 = perm[aa + 1];
      ab0 = perm[ab]; ab1 = perm[ab + 1];
      ba0 = perm[ba]; ba1 = perm[ba + 1];
      bb0 = perm[bb]; bb1 = perm[bb + 1];

      a1 = grad(bb1, x-1, y-1, z-1);
      a2 = grad(ab1, x  , y-1, z-1);
      a3 = grad(ba1, x-1, y  , z-1);
      a4 = grad(aa1, x  , y  , z-1);
      a5 = grad(bb0, x-1, y-1, z  );
      a6 = grad(ab0, x  , y-1, z  );
      a7 = grad(ba0, x-1, y  , z  );
      a8 = grad(aa0, x  , y  , z  );

      u = fade(x);
      v = fade(y);
      w = fade(z);

      a8_5 = lerp(v, lerp(u, a8, a7), lerp(u, a6, a5));
      a4_1 = lerp(v, lerp(u, a4, a3), lerp(u, a2, a1));
      return lerp(w, a8_5, a4_1);
   }

8 comments:

Aaron DwyerMay 16, 2011 at 9:44 PM
Riven, I am currently working on a Perlin noise implementation and I happened across your post. I like how you've cleaned up the original code (with respect to Mr. Perlin). Do you attribute the 2X speed gain to the switch statement you used in grad()?
RivenMay 16, 2011 at 9:48 PM
that was the only part of the code I changed, so I'm quite certain that's the reason for the speedup ;o)
Aaron DwyerMay 16, 2011 at 10:02 PM
Gotcha! Well, I like the switch statement--it's a lot easier to understand. I wonder if there would be less repetition in the noise output if more cases were added to the switch. My understanding of Perlin noise isn't good enough to know really. The original paper talks about computing "gradients", but seems like it's done quite differently there:

http://mrl.nyu.edu/~perlin/paper445.pdf

Thoughts? I'll probably just try it :) I'm curious, what are you using Perlin for?
RivenMay 16, 2011 at 10:13 PM
There are only 16 possible cases. There is no way to add more calculations to make the noise less repetitive.

The real solution is to layer noise and apply all all kinds of operations on it, like adding, multiplying, abs(n), max(m,n)... etc. It's virtually impossible to see any patterns after that.

Finetuning takes ages...
Aaron DwyerMay 16, 2011 at 10:39 PM
Oh I see what you mean. In fact looking at it now I believe there are actually less than 16...i.e. case 0x1 and 0xE are the same, and 0x0 and 0xC are the same. It's (x or y or z) (- or +) (x or y or z without repeating the first choice), or 3 * 2 * 2 = 12 possibilities.

I think this is why in Perlin's code he has a comment "Convert lo 4 bits of hash code into 12 gradient directions". This means the hash is nonuniform...but it still looks good ;)
RivenMay 16, 2011 at 11:00 PM
Well, 16-2 (duplicates) = 14, so the remaining pairs are:
0xB and 0xF
0x9 and 0xD

I have no doubt this contributes absolutely nothing to the discussion :o)
UnknownApril 22, 2015 at 4:41 AM
why is perm[512] 512 long? it only ever touches 257, correct?
UnknownAugust 6, 2018 at 2:51 AM
Ivan, that's not true. `perm[]` result (which can be anything from 0 to 255) is fed into itself added to some value between 0 and 255, so it does use all 512 values.

Riven :: code </> brain :: dump

2010-08-19

Calculate PerlinNoise faster with an optimization to grad()

8 comments:

Search my silly blog

About Me

Riven :: Intro

Blog Archive

License: CC - Attribution

Copyright © 2009, 2012 + any other year + Riven + All rights reserved