Hardware Accelerated 3D on the SSD1306

i found something rather intresting, one can achieve hardware-accelerated 3D graphics on the SSD1306.

Here’s how it works:

  • First, we need to set the screen to mux=1. Then, we activate the accelerator by accessing register 0xD2, 4 (5?,0xC?). it wil also work without the register, if unsupported, but then vram needs to be redrawn this is doable on i2c but not as good as the accelerator, and should be easier on spi, but i have not yet tested it.

how to draw triangles directly using the 128x2 image created when mux=1.

I shift this image along the y-axis for each triangle. For example, if a triangle has coordinates (10, 5), (20, 12), and (20, 30), the line will smoothly move across the screen from y=5 to y=30.

To achieve this, I use a conventional triangle drawing technique that usually involves calling a function called “horizontal_line (x_start_of_the_line, y, length_of_line, color)”. Typically, when drawing to a buffer, you can simply use memset to draw the line into the buffer.

However, in this case, I’m not drawing to a buffer; I’m drawing directly onto the display. So, instead, I utilize the “fill rectangle” command to write the horizontal line into row 0. this can often be accomplish this with just one 0x24 draw rectangle command (2 at max). Then i move my 128x2 image to the correct line on the screen, fill the row with next horizontal line, and move the image one pixel down.

see attached code.

(please note that this triangle drawer is a quick hack and has lots of bugs when it comes to clipping, it only works within the screen coordinates…it needs fixing).

By continuously drawing these triangles without stopping, the OLED is showing 3D graphics directly!


void vectoscopeHline(s16 x1, s16 y1, s16 len, u8 c)
{
  if (y1 < 0)return;
  if (y1 > 63)return;
  if (x1 > 127)return;
  if (x1 < 0)len += x1, x1 = 0;
  if (len < 0)return;
  if (x1 + len > 127)
  {
    len -= x1 + len - 127;
  }

  static int last_xstart;
  static int last_xend;
  static int last_y;


  static u8 row = 0;

  // 0x24,direction,row_start,clear_pixels,row_end,x_start,x_end,
  const u8 cmd1[] = {
    0x24, 0,
    row,
    0/* clear*/,
    row,
    0, 127,
    //      0xe3,0xe3,0xe3,0xe3  , // clear line
  };
  const u8 cmd1b[] = {

    0x24, 0, row, 1/*set*/, row, x1, x1 + len,
  };// set line
  const u8 cmd2[] = {

    0xd3, y1 & 63,
    //   0x40//+62
    // , 0xe3,0xe3  // move line
  };
  if ((last_xstart != x1) || (last_xend != x1 + len))
  {
    if ((last_xstart != x1) && (last_xend != x1 + len))
    {
      if ((x1 > last_xstart) || (x1 + len < last_xend)) // the new line to be drawn needs to be cleared, if the white area has not expanded
        for (u8 i = 0; i < sizeof(cmd1); i++)
        {
          os_i2c_write_byte_fast(cmd1[i]);//+(y1&63));
          i2c_counter++;
        }

      for (u8 i = 0; i < sizeof(cmd1b); i++)
      {
        os_i2c_write_byte_fast(cmd1b[i]);//+(y1&63));
        i2c_counter++;
      }
    }
    else   // one is identical
    {
      if (last_xstart == x1) // change at the right side
      {
        if (x1 + len < last_xend) // contract
        {
          const u8 contract_cmd[] = {
            0x24, 0,
            row,
            0/* clear*/,
            row,
            x1 + len + 1, 127,
            //      0xe3,0xe3,0xe3,0xe3  , // clear line
          };
          for (u8 i = 0; i < sizeof(contract_cmd); i++)
          {
            os_i2c_write_byte_fast(contract_cmd[i]);//+(y1&63));
            i2c_counter++;
          }

        }
        else
        {
          for (u8 i = 0; i < sizeof(cmd1b); i++)
          {
            os_i2c_write_byte_fast(cmd1b[i]);//+(y1&63));
            i2c_counter++;
          }

        }
      }
      else
      {
        if (x1 > last_xstart) // contract
        {
          const u8 contract_cmd[] = {
            0x24, 0,
            row,
            0/* clear*/,
            row,
            0, x1 - 1,
            //      0xe3,0xe3,0xe3,0xe3  , // clear line
          };
          for (u8 i = 0; i < sizeof(contract_cmd); i++)
          {
            os_i2c_write_byte_fast(contract_cmd[i]);//+(y1&63));
            i2c_counter++;
          }

        }
        else   //expand, just draw it
        {
          for (u8 i = 0; i < sizeof(cmd1b); i++)
          {
            os_i2c_write_byte_fast(cmd1b[i]);//+(y1&63));
            i2c_counter++;
          }

        }
      }

    }


    last_xstart = x1;
    last_xend = x1 + len;
  }
  if (last_y != y1)
  {
    for (u8 i = 0; i < sizeof(cmd2); i++)
    {
      os_i2c_write_byte_fast(cmd2[i]);//+(y1&63));
      i2c_counter++;
    }
    last_y = y1;
  }

  i2c_counter += 2;
}


void VectoscopeTriangle(s16 x1, s16 y1, s16 x2, s16 y2, s16 x3, s16 y3, u8 c)
{
  cli();

  if (x2 < 0)return;
  if (x2 > 126)return;
  if (y2 < 0)return;
  if (y2 > 62)return;


  if (x1 < 0)return;
  if (x1 > 126)return;
  if (y1 < 0)return;
  if (y1 > 62)return;


  if (x1 < 0)return;
  if (x1 > 126)return;
  if (y1 < 0)return;
  if (y1 > 62)return;
  os_i2c_start();
  os_i2c_write_byte(SSD1306_ADDRESS);
  os_i2c_write_byte(0x0);  // command mode
  os_i2c_write_byte(0x40);  // command mode

  s16 t1x, t2x, y, minx, maxx, t1xp, t2xp;
  u8 change = 0;
  //const u8 terminate_line = _cur_seg + 7;

  //note: x and y are swapped to get better organization of the line

  int i;
  //for(i=20;i<40;i++)vectoscopeHline(30,i,20,0);
  //return;
  minx = y1;
  if (y2 < minx)minx = y2;
  if (y3 < minx)minx = y3;
  maxx = y1;
  if (y2 > maxx)maxx = y2;
  if (y3 > maxx)maxx = y3;


  s16 signx1, signx2, dx1, dy1, dx2, dy2;
  s16 e1, e2;
  // Sort vertices
  if (y1 > y2)
  {
    swapu8(y1, y2);
    swapu8(x1, x2);
  }
  if (y1 > y3)
  {
    swapu8(y1, y3);
    swapu8(x1, x3);
  }
  if (y2 > y3)
  {
    swapu8(y2, y3);
    swapu8(x2, x3);
  }

  t1x = x1;
  t2x = x1;

  y = y1; // Starting points

  dx1 = (s8)(x2 - x1);
  if (dx1 < 0)
  {
    dx1 = -dx1;
    signx1 = -1;
  }
  else
    signx1 = 1;
  dy1 = (s8)(y2 - y1);

  dx2 = (s8)(x3 - x1);
  if (dx2 < 0)
  {
    dx2 = -dx2;
    signx2 = -1;
  }
  else signx2 = 1;

  dy2 = (s8)(y3 - y1);

  if (dy1 > dx1)
  { // swap values
    swapu8(dx1, dy1);
    change |= 1; // = true;
  }
  if (dy2 > dx2)
  { // swap values
    swapu8(dy2, dx2);
    change |= 2; // = true;
  }

  e2 = (u8)(dx2 >> 1);
  // Flat top, just process the second half
  if (y1 == y2) goto next;

  e1 = (u8)(dx1 >> 1);

  for (u8 i = 0; i < dx1;)
  {
    t1xp = 0; t2xp = 0;
    if (t1x < t2x)
    {
      minx = t1x;
      maxx = t2x;
    }
    else
    {
      minx = t2x;
      maxx = t1x;
    }
    // process first line until y value is about to change
    while (i < dx1)
    {
      i++;
      e1 += dy1;
      while (e1 >= dx1)
      {
        e1 -= dx1;
        if (change & 1)
          t1xp = signx1;
        else
          goto next1;
      }
      if (change & 1) break;

      t1x += signx1;
    }
    // Move line
next1:
    // process second line until y value is about to change
    while (1)

    {
      e2 += dy2;
      while (e2 >= dx2) {
        e2 -= dx2;
        if (change & 2)
          t2xp = signx2;
        else
          goto next2;
      }
      if (change & 2)
        break;
      t2x += signx2;
    }
next2:

    if (minx > t1x) minx = t1x;
    if (minx > t2x) minx = t2x;
    if (maxx < t1x) maxx = t1x;
    if (maxx < t2x) maxx = t2x;
    //*/
    vectoscopeHline(minx, y, maxx - minx, c);
    //   x_vline(minx, y, maxx, c, linebuffer);  // Draw line from min to max points found on the y
    //   if (y == terminate_line)return;
    // Now increase y
    if (!(change & 1)) t1x += signx1;
    t1x += t1xp;
    if (!(change & 2)) t2x += signx2;
    t2x += t2xp;
    y += 1;
    if (y == y2) break;

  }
next:
  // Second half
  dx1 = (s8)(x3 - x2);
  if (dx1 < 0)
  {
    dx1 = -dx1;
    signx1 = -1;
  }
  else
    signx1 = 1;
  dy1 = (s8)(y3 - y2);
  t1x = x2;

  if (dy1 > dx1)
  { // swap values
    swapu8(dy1, dx1);
    change |= 1; // = true;
  }
  else
    change &= 2; //false;

  e1 = (u8)(dx1 >> 1);

  for (u8 i = 0; i <= dx1; i++)
  {
    t1xp = 0; t2xp = 0;
    if (t1x < t2x)
    {
      minx = t1x;
      maxx = t2x;
    }
    else
    {
      minx = t2x;
      maxx = t1x;
    }
    // process first line until y value is about to change
    while (i < dx1) {
      e1 += dy1;
      while (e1 >= dx1)
      {
        e1 -= dx1;
        if (change & 1)
        {
          t1xp = signx1;
          break;
        }
        else
          goto next3;
      }
      if (change & 1)
        break;

      t1x += signx1;
      if (i < dx1) i++;
    }
next3:
    // process second line until y value is about to change
    while (t2x != x3)
    {
      e2 += dy2;
      while (e2 >= dx2)
      {
        e2 -= dx2;
        if (change & 2)
          t2xp = signx2;
        else
          goto next4;
      }
      if (change & 2)
        break;

      t2x += signx2;
    }
next4:

    if (minx > t1x) minx = t1x;
    if (minx > t2x) minx = t2x;
    if (maxx < t1x) maxx = t1x;
    if (maxx < t2x) maxx = t2x;
    //  */
    vectoscopeHline(minx, y, maxx - minx, c);

    //    x_vline(minx, y, maxx, c, linebuffer);  // Draw line from min to max points found on the y
    //    if (y == terminate_line)return;
    // Now increase y
    if (!(change & 1)) t1x += signx1;
    t1x += t1xp;
    if (!(change & 2)) t2x += signx2;
    t2x += t2xp;
    y += 1;
    if (y > y3) return;
  }
}


--------------------------------- thecube:

#define SCREEN_WIDTH 128
#define SCREEN_HEIGHT 64
#define NUM_OF_INDICES 12
#define SCREEN_CENTER_X  (SCREEN_WIDTH / 2)
#define SCREEN_CENTER_Y  (SCREEN_HEIGHT / 2)
#define OBJ_SCALE (float)2500
#define CAMERA_DISTANCE 15

float objX = 0;
float objY = 0;
float objZ = 10;
float rotationX = .2;
float rotationY = .3;

 const float vertices[] = {
  -1.0f,-1.0f,-1.0f, 
  -1.0f,-1.0f, 1.0f,
  -1.0f, 1.0f, 1.0f, 
  1.0f, 1.0f,-1.0f, 
  -1.0f,-1.0f,-1.0f,
  -1.0f, 1.0f,-1.0f, 
  1.0f,-1.0f, 1.0f,
  -1.0f,-1.0f,-1.0f,
  1.0f,-1.0f,-1.0f,
  1.0f, 1.0f,-1.0f,
  1.0f,-1.0f,-1.0f,
  -1.0f,-1.0f,-1.0f,
  -1.0f,-1.0f,-1.0f,
  -1.0f, 1.0f, 1.0f,
  -1.0f, 1.0f,-1.0f,
  1.0f,-1.0f, 1.0f,
  -1.0f,-1.0f, 1.0f,
  -1.0f,-1.0f,-1.0f,
  -1.0f, 1.0f, 1.0f,
  -1.0f,-1.0f, 1.0f,
  1.0f,-1.0f, 1.0f,
  1.0f, 1.0f, 1.0f,
  1.0f,-1.0f,-1.0f,
  1.0f, 1.0f,-1.0f,
  1.0f,-1.0f,-1.0f,
  1.0f, 1.0f, 1.0f,
  1.0f,-1.0f, 1.0f,
  1.0f, 1.0f, 1.0f,
  1.0f, 1.0f,-1.0f,
  -1.0f, 1.0f,-1.0f,
  1.0f, 1.0f, 1.0f,
  -1.0f, 1.0f,-1.0f,
  -1.0f, 1.0f, 1.0f,
  1.0f, 1.0f, 1.0f,
  -1.0f, 1.0f, 1.0f,
  1.0f,-1.0f, 1.0f
};

static void rotate(float x, float y, float angle,float s,float c, float *r1, float *r2)
{
  *r1 = x * c - y * s;
  *r2 = y * c + x * s;
}
static  const u8 tri_color[12]={// 0,0,0,0,0,0,
50, 63, 40, 63, 50, 40,
40, 50, 50, 63, 63, 40,
                   };
typedef struct triangle
{
  u8 x1,y1,x2,y2,x3,y3,c,z; 
}triangle;



void Calc3D() {
//  GfxApiBeginTriangles();
  const float srx= sin(rotationX);
  const float crx=cos(rotationX);

  const float sry= sin(rotationY);
  const float cry= cos(rotationY);


  float z_sort=0;
  u8 cnt=3,cnt2=0;

  int x[3],y[3];
  
  for (int i = 0; i < 3*12; i++)
  {
    u8 a = i*3;
    float x1, y1, z1;
    x1 = vertices[a + 0];
    y1 = vertices[a + 1];
    z1 = vertices[a + 2];

    rotate(x1, z1, rotationY,sry,cry,&x1,&z1);
    rotate(y1, z1, rotationX,srx,crx,&y1,&z1);

    x1 = (float)(x1 * OBJ_SCALE);
    y1 = (float)(y1 * OBJ_SCALE);

    x1 += objX;
    y1 += objY;
    z1 += objZ;
    float inverse=1.0/(z1 * CAMERA_DISTANCE);
    x1 *=inverse;
    y1 *=inverse;

    x1 += SCREEN_CENTER_X;
    y1 += SCREEN_CENTER_Y;
    z_sort+=z1*7;
  //  GfxApiStoreTrianglePoint(x1, y1);// , x2, y2);
    cnt--;
    x[cnt]=x1;
    y[cnt]=y1;
    if(!cnt)
    {
   // Perform hidden surface removal by checking triangle orientation
            float dx1 = x[1] - x[0];
            float dy1 = y[1] - y[0];
            float dx2 = x[2] - x[0];
            float dy2 = y[2] - y[0];
            float cross_product = dx1 * dy2 - dy1 * dx2;

            if (cross_product > 0) {
                // Call the triangle function only for visible triangles
                VectoscopeTriangle(x[0],y[0],x[1],y[1],x[2],y[2],0);
           //     yield();
            }
      
      cnt=3;
      os_i2c_stop();
      cnt2++;
      z_sort=0;
    }
  }
  rotationX += .051;
  //if(rotationX>2*M_PI)rotationX-=2*M_PI;
  rotationY += .05;
  //if(rotationY>2*M_PI)rotationX-=2*M_PI;
  
}



4 Likes

and i created a repository with my hack that achieved this, so i can continue exploring and do not lose it. please not that this version of the graphics library is massivly hacked and probably buggy. so it is only usefull as reference. See the Vectorscope functions, this is where it started after @Cnlohr suggested to use that stuff as vectorscope.

1 Like

one more explanaition i wrote on discord how this works:

  1. its actually very simple, set the screen to a height of 2, you got a 128x2 image now for every triangle move this image to the top of the triangle on the screen, if the triangle was 10,10 20,20 10, 30, the two display lines of the ssd1306 are moved to position y=10, the top of the triangle now the accelerator is used to clear the rectangle and draw the line of the triangle, the image is moved down one pixel, the accelerator draws the next line…and this happens very quickly, so every triangle is a single frame, maybe only 3 pixels high, if you switch the ssd1306 to 2 lines and tune everything up it has a frame rate of 200hz * 32 or more…so you can move this line very fast over your triangles, and change its content very fast with the accelerator (but this also could be done with the cpu, if you have spi)

  2. [17:39]

so this really has no frame rate, it is more like a vectorscope, constantly moving, this is where this started when @cnlohr suggested to use my stuff as vectorscope and it worked very well,… ok you have a vectorscope, you can move your dot over your screen, and this very fast, this way you can draw lines, like the vectrex, i can also do this. but… it is no point on the ssd1306, it is a line.

  1. so you should think of this as a vectorscope, but it has no dot, but a line at the position of the dot, and you can set the length of the line that is attached on the right side of the dot. (lets not discuss physics). this is used to draw triangles, by moving it to the top of the triangle along the left outline, and the line is expanded or contracted for every y position you can also do that with the mcu, without the undocumented command, just change the vram, often only very few bytes change, i still have to implement that it uses the cpu if its less than 4 changed bytes, in some conditions using the cpu to modify the line in vram is faster.

  2. [17:47]

also on spi the conditions are a bit better, i use a i2c display so i have to send : start condition, display adress, 0/0x40 to switch from command to data mode and back, on spi you only flip a pin and you are in command mode

  1. [17:47]

and spi is faster can go to 40mhz, but i dont know if the ssd1306 can do that

You really are pushing this controller to the max!
With so many discoveries (and many forum threads!), it’s a bit hard to keep track… will you write up a ‘hackers’ datasheet at some point? It would be useful to pull together all the info in one resource, preferably in a similar style of the official doc :star_struck:

i summed the registers on the main readme of the github grayscale repository
(an well a bit is still in the comments of the code graphics engine where i created “DisplayRegister1234(value…)” functions when fuzzing, and then changed them so i could fuzz the bits i wanted to and commented these functions…this is now in the new 3d repository on github and needs to be collected…

register d1 and d2 are intresting… and there are some more, nobody knows what it really can do, there must be more, i scanned the register range and tried to validate the registers over 2 displays (one “new” which has the “special grayscale command”, i found a additonal clock divider on this thisplay, unfortunatly it has died from the plug in/plug out violence).when i found the ssd1336>>B<< Datatasheet i decided to rescan the “advanced graphics command” (or something like that) range. because there are only a few commands left, and for some strange reason i missed the rectangle command. i did no systematic search for single byte commands. now i have some registers where i know, some where i “almost know”/know a part, some where i guess, and stil a lot where i know nothing…

and scrolling is a good choice, because you will see if the frame rate changes, so i have a chance to find discharge cycles and stuff… i did not do that for every register, i stopped at D2 for obvious reasons.

and i think experiences with that stuff should be collected (somewhere here in one of the threads??) because it makes it much easier to reason out what the stuff actually does…i actually thought that i create threads in this forum and somehow “dump” that stuff at a place where people at find it somehow usefull und intresting, maybe/probably someone finds out more…


the artifacts where it appears like its “torn open” are my camera, which is confused my the many “frames”

and a better shot:


(in real life there are no black artifacts, its the cam trying to sync)

1 Like

the color of the triangle can be set by changing the 1 after the 0x24 command:

together with 4 charge pump levels this will be 8 gray levels i guess…

other charge level:

1 Like

tip: do not apply sigma delta modulation to the charge pump register, the ssd1306 crashes badly.

mega2560, first port of the esp32 version 15 triangles without flicker, much brighter than the es, almost no artifacts, the esp has unstable timing.

the quality can be adjusted on the fly for constant frame rate:

and the accelerator code is much more compact and faster:


u8 __3d_accept_error =1;
void vectoscopeHline2_q (s16 x1, s16 y1, s16 len, u8 c)
{
  if (y1 < 0)return;
  if (y1 > 63)return;
  if (x1 > 127)return;
  if (x1 < 0)len += x1, x1 = 0;
  if (len < 0)return;
  if (x1 + len > 127)
  {
    len -= x1 + len - 127;
  }



  static u8 row = 0,last_x,last_end;

  u8 error=abs(last_x-x1)+abs(last_end-x1-len);


    if(error<__3d_accept_error)goto do_move;

  // 0x24,direction,row_start,clear_pixels,row_end,x_start,x_end,
  const u8 cmd_clear[] = {
    0x24, 0,
    row,
    0/* clear*/,
    row,
    0, 127,
    //      0xe3,0xe3,0xe3,0xe3  , // clear line
  };
  const u8 cmd_set[] = {

    0x24, 0, row, c/*set*/, row, x1, x1 + len,
  };// set line

  

  SEND_CMD(cmd_clear);
  
  SEND_CMD(cmd_set);
  last_x=x1;
  last_end=x1+len;
do_move:

  const u8 cmd_move[] = {
    0xd3, y1 & 63,
  };      

  
  SEND_CMD(cmd_move);

  
}



.....

    static u16 cnts[256];
    for(int i=0;i<256;i++)cnts[i]+=250+90*i;
    
      
    for(int i=0;i<7 ;i++){
    
    VectoscopeTriangle(
      
      sintab128[cnts[i*6+0]/256],
      sintab128[cnts[i*6+1]/256]/2,
      
      sintab128[cnts[i*6+2]/256],
      sintab128[cnts[i*6+3]/256]/2,
      
      sintab128[cnts[i*6+4]/256],
      sintab128[cnts[i*6+5]/256]/2,
      1
      );
   ....

    long long start=millis();

    static u16 cnts[256];
    for(int i=0;i<256;i++)cnts[i]+=250+90*i;
    
      
    for(int i=0;i<7 ;i++){
    
    VectoscopeTriangle(
      
      sintab128[cnts[i*6+0]/256],
      sintab128[cnts[i*6+1]/256]/2,
      
      sintab128[cnts[i*6+2]/256],
      sintab128[cnts[i*6+3]/256]/2,
      
      sintab128[cnts[i*6+4]/256],
      sintab128[cnts[i*6+5]/256]/2,
      1
      );
   
  long long end=millis();
    int _time=end-start;
 
    int fps=1000/_time;

    //if((cnt&0xf)==0)
    if(fps<25)
      __3d_accept_error++;else if(__3d_accept_error>1) __3d_accept_error--;


i built a proper 3d engine with movable camera, triangles can be batched into renderbuffers to increase the triangle count, and render stripes/fans:

5 Likes

That is amazing!

2 Likes

Please say there’s an Adruboy demo comming soon…!

2 Likes