diff --git a/stb_truetype.h b/stb_truetype.h
index 88a2da1..22c9db4 100644
--- a/stb_truetype.h
+++ b/stb_truetype.h
@@ -453,6 +453,14 @@ int main(int arg, char **argv)
 extern "C" {
 #endif
 
+// private structure
+typedef struct
+{
+   stbtt_uint8 *data;
+   int cursor;
+   int size;
+} stbtt__buf;
+
 //////////////////////////////////////////////////////////////////////////////
 //
 // TEXTURE BAKING API
@@ -643,6 +651,13 @@ struct stbtt_fontinfo
    int loca,head,glyf,hhea,hmtx,kern; // table locations as offset from start of .ttf
    int index_map;                     // a cmap mapping for our chosen character encoding
    int indexToLocFormat;              // format needed to map from glyph index to glyph
+
+   stbtt__buf cff;                    // cff font data
+   stbtt__buf charstrings;            // the charstring index
+   stbtt__buf gsubrs;                 // global charstring subroutines index
+   stbtt__buf subrs;                  // private charstring subroutines index
+   stbtt__buf fontdicts;              // array of font dicts
+   stbtt__buf fdselect;               // map from glyph to fontdict
 };
 
 STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset);
@@ -720,7 +735,8 @@ STBTT_DEF int  stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, in
    enum {
       STBTT_vmove=1,
       STBTT_vline,
-      STBTT_vcurve
+      STBTT_vcurve,
+      STBTT_vcubic
    };
 #endif
 
@@ -729,7 +745,7 @@ STBTT_DEF int  stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, in
    #define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file
    typedef struct
    {
-      stbtt_vertex_type x,y,cx,cy;
+      stbtt_vertex_type x,y,cx,cy,cx1,cy1;
       unsigned char type,padding;
    } stbtt_vertex;
 #endif
@@ -951,6 +967,152 @@ typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERS
 #define STBTT__NOTUSED(v)  (void)sizeof(v)
 #endif
 
+//////////////////////////////////////////////////////////////////////////
+//
+// stbtt__buf helpers to parse data from file
+//
+
+static stbtt_uint8 stbtt__buf_get8(stbtt__buf *b)
+{
+   if (b->cursor >= b->size)
+      return 0;
+   return b->data[b->cursor++];
+}
+
+static stbtt_uint8 stbtt__buf_peek8(stbtt__buf *b)
+{
+   if (b->cursor >= b->size)
+      return 0;
+   return b->data[b->cursor];
+}
+
+static void stbtt__buf_seek(stbtt__buf *b, int o)
+{
+   STBTT_assert(!(o > b->size || o < 0));
+   b->cursor = (o > b->size || o < 0) ? b->size : o;
+}
+
+static void stbtt__buf_skip(stbtt__buf *b, int o)
+{
+   stbtt__buf_seek(b, b->cursor + o);
+}
+
+static stbtt_uint32 stbtt__buf_get(stbtt__buf *b, int n)
+{
+   stbtt_uint32 v = 0;
+   int i;
+   STBTT_assert(n >= 1 && n <= 4);
+   for (i = 0; i < n; i++)
+      v = (v << 8) | stbtt__buf_get8(b);
+   return v;
+}
+
+static stbtt__buf stbtt__new_buf(const void *p, size_t size)
+{
+   stbtt__buf r;
+   STBTT_assert(size < 0x40000000);
+   r.data = (stbtt_uint8*) p;
+   r.size = (int) size;
+   r.cursor = 0;
+   return r;
+}
+
+#define stbtt__buf_get16(b)  stbtt__buf_get((b), 2)
+#define stbtt__buf_get32(b)  stbtt__buf_get((b), 4)
+
+static stbtt__buf stbtt__buf_range(const stbtt__buf *b, int o, int s)
+{
+   stbtt__buf r = stbtt__new_buf(NULL, 0);
+   if (o < 0 || s < 0 || o > b->size || s > b->size - o) return r;
+   r.data = b->data + o;
+   r.size = s;
+   return r;
+}
+
+static stbtt__buf stbtt__cff_get_index(stbtt__buf *b)
+{
+   int count, start, offsize;
+   start = b->cursor;
+   count = stbtt__buf_get16(b);
+   if (count) {
+      offsize = stbtt__buf_get8(b);
+      STBTT_assert(offsize >= 1 && offsize <= 4);
+      stbtt__buf_skip(b, offsize * count);
+      stbtt__buf_skip(b, stbtt__buf_get(b, offsize) - 1);
+   }
+   return stbtt__buf_range(b, start, b->cursor - start);
+}
+
+static stbtt_uint32 stbtt__cff_int(stbtt__buf *b)
+{
+   int b0 = stbtt__buf_get8(b);
+   if (b0 >= 32 && b0 <= 246)       return b0 - 139;
+   else if (b0 >= 247 && b0 <= 250) return (b0 - 247)*256 + stbtt__buf_get8(b) + 108;
+   else if (b0 >= 251 && b0 <= 254) return -(b0 - 251)*256 - stbtt__buf_get8(b) - 108;
+   else if (b0 == 28)               return stbtt__buf_get16(b);
+   else if (b0 == 29)               return stbtt__buf_get32(b);
+   STBTT_assert(0);
+   return 0;
+}
+
+static void stbtt__cff_skip_operand(stbtt__buf *b) {
+   int v, b0 = stbtt__buf_peek8(b);
+   STBTT_assert(b0 >= 28);
+   if (b0 == 30) {
+      stbtt__buf_skip(b, 1);
+      while (b->cursor < b->size) {
+         v = stbtt__buf_get8(b);
+         if ((v & 0xF) == 0xF || (v >> 4) == 0xF)
+            break;
+      }
+   } else {
+      stbtt__cff_int(b);
+   }
+}
+
+static stbtt__buf stbtt__dict_get(stbtt__buf *b, int key)
+{
+   stbtt__buf_seek(b, 0);
+   while (b->cursor < b->size) {
+      int start = b->cursor, end, op;
+      while (stbtt__buf_peek8(b) >= 28)
+         stbtt__cff_skip_operand(b);
+      end = b->cursor;
+      op = stbtt__buf_get8(b);
+      if (op == 12)  op = stbtt__buf_get8(b) | 0x100;
+      if (op == key) return stbtt__buf_range(b, start, end-start);
+   }
+   return stbtt__buf_range(b, 0, 0);
+}
+
+static void stbtt__dict_get_ints(stbtt__buf *b, int key, int outcount, stbtt_uint32 *out)
+{
+   int i;
+   stbtt__buf operands = stbtt__dict_get(b, key);
+   for (i = 0; i < outcount && operands.cursor < operands.size; i++)
+      out[i] = stbtt__cff_int(&operands);
+}
+
+static int stbtt__cff_index_count(stbtt__buf *b)
+{
+   stbtt__buf_seek(b, 0);
+   return stbtt__buf_get16(b);
+}
+
+static stbtt__buf stbtt__cff_index_get(stbtt__buf b, int i)
+{
+   int count, offsize, start, end;
+   stbtt__buf_seek(&b, 0);
+   count = stbtt__buf_get16(&b);
+   offsize = stbtt__buf_get8(&b);
+   STBTT_assert(i >= 0 && i < count);
+   STBTT_assert(offsize >= 1 && offsize <= 4);
+   stbtt__buf_skip(&b, i*offsize);
+   start = stbtt__buf_get(&b, offsize);
+   end = stbtt__buf_get(&b, offsize);
+   return stbtt__buf_range(&b, 2+(count+1)*offsize+start, end - start);
+}
+
 //////////////////////////////////////////////////////////////////////////
 //
 // accessors to parse data from file
@@ -1014,6 +1176,19 @@ static int stbtt_GetFontOffsetForIndex_internal(unsigned char *font_collection,
    return -1;
 }
 
+static stbtt__buf stbtt__get_subrs(stbtt__buf cff, stbtt__buf fontdict)
+{
+   stbtt_uint32 subrsoff = 0, private_loc[2] = { 0, 0 };
+   stbtt__buf pdict;
+   stbtt__dict_get_ints(&fontdict, 18, 2, private_loc);
+   if (!private_loc[1] || !private_loc[0]) return stbtt__new_buf(NULL, 0);
+   pdict = stbtt__buf_range(&cff, private_loc[1], private_loc[0]);
+   stbtt__dict_get_ints(&pdict, 19, 1, &subrsoff);
+   if (!subrsoff) return stbtt__new_buf(NULL, 0);
+   stbtt__buf_seek(&cff, private_loc[1]+subrsoff);
+   return stbtt__cff_get_index(&cff);
+}
+
 static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, int fontstart)
 {
    stbtt_uint32 cmap, t;
@@ -1021,6 +1196,7 @@ static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, in
 
    info->data = data;
    info->fontstart = fontstart;
+   info->cff = stbtt__new_buf(NULL, 0);
 
    cmap = stbtt__find_table(data, fontstart, "cmap");       // required
    info->loca = stbtt__find_table(data, fontstart, "loca"); // required
@@ -1029,8 +1205,61 @@ static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, in
    info->hhea = stbtt__find_table(data, fontstart, "hhea"); // required
    info->hmtx = stbtt__find_table(data, fontstart, "hmtx"); // required
    info->kern = stbtt__find_table(data, fontstart, "kern"); // not required
-   if (!cmap || !info->loca || !info->head || !info->glyf || !info->hhea || !info->hmtx)
+
+   if (!cmap || !info->head || !info->hhea || !info->hmtx)
       return 0;
+   if (info->glyf) {
+      // required for truetype
+      if (!info->loca) return 0;
+   } else {
+      // initialization for CFF / Type2 fonts (OTF)
+      stbtt__buf b, topdict, topdictidx;
+      stbtt_uint32 cstype = 2, charstrings = 0, fdarrayoff = 0, fdselectoff = 0;
+      stbtt_uint32 cff;
+
+      cff = stbtt__find_table(data, fontstart, "CFF ");
+      if (!cff) return 0;
+
+      info->fontdicts = stbtt__new_buf(NULL, 0);
+      info->fdselect = stbtt__new_buf(NULL, 0);
+
+      // @TODO this should use size from table (not 512MB)
+      info->cff = stbtt__new_buf(data+cff, 512*1024*1024);
+      b = info->cff;
+
+      // read the header
+      stbtt__buf_skip(&b, 2);
+      stbtt__buf_seek(&b, stbtt__buf_get8(&b)); // hdrsize
+
+      // @TODO the name INDEX could list multiple fonts,
+      // but we just use the first one.
+      stbtt__cff_get_index(&b);  // name INDEX
+      topdictidx = stbtt__cff_get_index(&b);
+      topdict = stbtt__cff_index_get(topdictidx, 0);
+      stbtt__cff_get_index(&b);  // string INDEX
+      info->gsubrs = stbtt__cff_get_index(&b);
+
+      stbtt__dict_get_ints(&topdict, 17, 1, &charstrings);
+      stbtt__dict_get_ints(&topdict, 0x100 | 6, 1, &cstype);
+      stbtt__dict_get_ints(&topdict, 0x100 | 36, 1, &fdarrayoff);
+      stbtt__dict_get_ints(&topdict, 0x100 | 37, 1, &fdselectoff);
+      info->subrs = stbtt__get_subrs(b, topdict);
+
+      // we only support Type 2 charstrings
+      if (cstype != 2) return 0;
+      if (charstrings == 0) return 0;
+
+      if (fdarrayoff) {
+         // looks like a CID font
+         if (!fdselectoff) return 0;
+         stbtt__buf_seek(&b, fdarrayoff);
+         info->fontdicts = stbtt__cff_get_index(&b);
+         info->fdselect = stbtt__buf_range(&b, fdselectoff, b.size-fdselectoff);
+      }
+
+      stbtt__buf_seek(&b, charstrings);
+      info->charstrings = stbtt__cff_get_index(&b);
+   }
 
    t = stbtt__find_table(data, fontstart, "maxp");
    if (t)
@@ -1181,6 +1410,8 @@ static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index)
 {
    int g1,g2;
 
+   STBTT_assert(!info->cff.size);
+
    if (glyph_index >= info->numGlyphs) return -1; // glyph index out of range
    if (info->indexToLocFormat >= 2)    return -1; // unknown index->glyph map format
 
@@ -1195,15 +1426,21 @@ static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index)
    return g1==g2 ? -1 : g1; // if length is 0, return -1
 }
 
+static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
+
 STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
 {
-   int g = stbtt__GetGlyfOffset(info, glyph_index);
-   if (g < 0) return 0;
+   if (info->cff.size) {
+      stbtt__GetGlyphInfoT2(info, glyph_index, x0, y0, x1, y1);
+   } else {
+      int g = stbtt__GetGlyfOffset(info, glyph_index);
+      if (g < 0) return 0;
 
-   if (x0) *x0 = ttSHORT(info->data + g + 2);
-   if (y0) *y0 = ttSHORT(info->data + g + 4);
-   if (x1) *x1 = ttSHORT(info->data + g + 6);
-   if (y1) *y1 = ttSHORT(info->data + g + 8);
+      if (x0) *x0 = ttSHORT(info->data + g + 2);
+      if (y0) *y0 = ttSHORT(info->data + g + 4);
+      if (x1) *x1 = ttSHORT(info->data + g + 6);
+      if (y1) *y1 = ttSHORT(info->data + g + 8);
+   }
    return 1;
 }
 
@@ -1215,7 +1452,10 @@ STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, i
 STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index)
 {
    stbtt_int16 numberOfContours;
-   int g = stbtt__GetGlyfOffset(info, glyph_index);
+   int g;
+   if (info->cff.size)
+      return stbtt__GetGlyphInfoT2(info, glyph_index, NULL, NULL, NULL, NULL) == 0;
+   g = stbtt__GetGlyfOffset(info, glyph_index);
    if (g < 0) return 1;
    numberOfContours = ttSHORT(info->data + g);
    return numberOfContours == 0;
@@ -1237,7 +1477,7 @@ static int stbtt__close_shape(stbtt_vertex *vertices, int num_vertices, int was_
    return num_vertices;
 }
 
-STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+static int stbtt__GetGlyphShapeTT(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
 {
    stbtt_int16 numberOfContours;
    stbtt_uint8 *endPtsOfContours;
@@ -1463,6 +1703,416 @@ STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, s
    return num_vertices;
 }
 
+typedef struct
+{
+   int bounds;
+   int started;
+   float first_x, first_y;
+   float x, y;
+   stbtt_int32 min_x, max_x, min_y, max_y;
+
+   stbtt_vertex *pvertices;
+   int num_vertices;
+} stbtt__csctx;
+
+#define STBTT__CSCTX_INIT(bounds) {bounds,0, 0,0, 0,0, 0,0,0,0, NULL, 0}
+
+static void stbtt__track_vertex(stbtt__csctx *c, stbtt_int32 x, stbtt_int32 y)
+{
+   if (x > c->max_x || !c->started) c->max_x = x;
+   if (y > c->max_y || !c->started) c->max_y = y;
+   if (x < c->min_x || !c->started) c->min_x = x;
+   if (y < c->min_y || !c->started) c->min_y = y;
+   c->started = 1;
+}
+
+static void stbtt__cstx_v(stbtt__csctx *c, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy, stbtt_int32 cx1, stbtt_int32 cy1)
+{
+   if (c->bounds) {
+      stbtt__track_vertex(c, x, y);
+      if (type == STBTT_vcubic) {
+         stbtt__track_vertex(c, cx, cy);
+         stbtt__track_vertex(c, cx1, cy1);
+      }
+   } else {
+      stbtt_setvertex(&c->pvertices[c->num_vertices], type, x, y, cx, cy);
+      c->pvertices[c->num_vertices].cx1 = (stbtt_int16) cx1;
+      c->pvertices[c->num_vertices].cy1 = (stbtt_int16) cy1;
+   }
+   c->num_vertices++;
+}
+
+static void stbtt__csctx_close_shape(stbtt__csctx *ctx)
+{
+   if (ctx->first_x != ctx->x || ctx->first_y != ctx->y)
+      stbtt__cstx_v(ctx, STBTT_vline, (int)ctx->first_x, (int)ctx->first_y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rmove_to(stbtt__csctx *ctx, float dx, float dy)
+{
+   stbtt__csctx_close_shape(ctx);
+   ctx->first_x = ctx->x = ctx->x + dx;
+   ctx->first_y = ctx->y = ctx->y + dy;
+   stbtt__cstx_v(ctx, STBTT_vmove, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rline_to(stbtt__csctx *ctx, float dx, float dy)
+{
+   ctx->x += dx;
+   ctx->y += dy;
+   stbtt__cstx_v(ctx, STBTT_vline, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rccurve_to(stbtt__csctx *ctx, float dx1, float dy1, float dx2, float dy2, float dx3, float dy3)
+{
+   float cx1 = ctx->x + dx1;
+   float cy1 = ctx->y + dy1;
+   float cx2 = cx1 + dx2;
+   float cy2 = cy1 + dy2;
+   ctx->x = cx2 + dx3;
+   ctx->y = cy2 + dy3;
+   stbtt__cstx_v(ctx, STBTT_vcubic, (int)ctx->x, (int)ctx->y, (int)cx1, (int)cy1, (int)cx2, (int)cy2);
+}
+
+static stbtt__buf stbtt__get_subr(stbtt__buf idx, int n)
+{
+   int count = stbtt__cff_index_count(&idx);
+   int bias = 107;
+   if (count >= 33900)
+      bias = 32768;
+   else if (count >= 1240)
+      bias = 1131;
+   n += bias;
+   if (n < 0 || n >= count)
+      return stbtt__new_buf(NULL, 0);
+   return stbtt__cff_index_get(idx, n);
+}
+
+static stbtt__buf stbtt__cid_get_glyph_subrs(const stbtt_fontinfo *info, int glyph_index)
+{
+   stbtt__buf fdselect = info->fdselect;
+   int nranges, start, end, v, fmt, fdselector = -1, i;
+
+   stbtt__buf_seek(&fdselect, 0);
+   fmt = stbtt__buf_get8(&fdselect);
+   if (fmt == 0) {
+      // untested
+      stbtt__buf_skip(&fdselect, glyph_index);
+      fdselector = stbtt__buf_get8(&fdselect);
+   } else if (fmt == 3) {
+      nranges = stbtt__buf_get16(&fdselect);
+      start = stbtt__buf_get16(&fdselect);
+      for (i = 0; i < nranges; i++) {
+         v = stbtt__buf_get8(&fdselect);
+         end = stbtt__buf_get16(&fdselect);
+         if (glyph_index >= start && glyph_index < end) {
+            fdselector = v;
+            break;
+         }
+         start = end;
+      }
+   }
+   if (fdselector == -1) stbtt__new_buf(NULL, 0);
+   return stbtt__get_subrs(info->cff, stbtt__cff_index_get(info->fontdicts, fdselector));
+}
+
+static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, stbtt__csctx *c)
+{
+   int in_header = 1, maskbits = 0, subr_stack_height = 0, sp = 0, v, i, b0;
+   int has_subrs = 0, clear_stack;
+   float s[48];
+   stbtt__buf subr_stack[10], subrs = info->subrs, b;
+   float f;
+
+#define STBTT__CSERR(s) (0)
+
+   // this currently ignores the initial width value, which isn't needed if we have hmtx
+   b = stbtt__cff_index_get(info->charstrings, glyph_index);
+   while (b.cursor < b.size) {
+      i = 0;
+      clear_stack = 1;
+      b0 = stbtt__buf_get8(&b);
+      switch (b0) {
+      // @TODO implement hinting
+      case 0x13: // hintmask
+      case 0x14: // cntrmask
+         if (in_header)
+            maskbits += (sp / 2); // implicit "vstem"
+         in_header = 0;
+         stbtt__buf_skip(&b, (maskbits + 7) / 8);
+         break;
+
+      case 0x01: // hstem
+      case 0x03: // vstem
+      case 0x12: // hstemhm
+      case 0x17: // vstemhm
+         maskbits += (sp / 2);
+         break;
+
+      case 0x15: // rmoveto
+         in_header = 0;
+         if (sp < 2) return STBTT__CSERR("rmoveto stack");
+         stbtt__csctx_rmove_to(c, s[sp-2], s[sp-1]);
+         break;
+      case 0x04: // vmoveto
+         in_header = 0;
+         if (sp < 1) return STBTT__CSERR("vmoveto stack");
+         stbtt__csctx_rmove_to(c, 0, s[sp-1]);
+         break;
+      case 0x16: // hmoveto
+         in_header = 0;
+         if (sp < 1) return STBTT__CSERR("hmoveto stack");
+         stbtt__csctx_rmove_to(c, s[sp-1], 0);
+         break;
+
+      case 0x05: // rlineto
+         if (sp < 2) return STBTT__CSERR("rlineto stack");
+         for (; i + 1 < sp; i += 2)
+            stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         break;
+
+      // hlineto/vlineto and vhcurveto/hvcurveto alternate horizontal and vertical
+      // starting from a different place.
+
+      case 0x07: // vlineto
+         if (sp < 1) return STBTT__CSERR("vlineto stack");
+         goto vlineto;
+      case 0x06: // hlineto
+         if (sp < 1) return STBTT__CSERR("hlineto stack");
+         for (;;) {
+            if (i >= sp) break;
+            stbtt__csctx_rline_to(c, s[i], 0);
+            i++;
+      vlineto:
+            if (i >= sp) break;
+            stbtt__csctx_rline_to(c, 0, s[i]);
+            i++;
+         }
+         break;
+
+      case 0x1F: // hvcurveto
+         if (sp < 4) return STBTT__CSERR("hvcurveto stack");
+         goto hvcurveto;
+      case 0x1E: // vhcurveto
+         if (sp < 4) return STBTT__CSERR("vhcurveto stack");
+         for (;;) {
+            if (i + 3 >= sp) break;
+            stbtt__csctx_rccurve_to(c, 0, s[i], s[i+1], s[i+2], s[i+3], (sp - i == 5) ? s[i + 4] : 0.0f);
+            i += 4;
+      hvcurveto:
+            if (i + 3 >= sp) break;
+            stbtt__csctx_rccurve_to(c, s[i], 0, s[i+1], s[i+2], (sp - i == 5) ? s[i+4] : 0.0f, s[i+3]);
+            i += 4;
+         }
+         break;
+
+      case 0x08: // rrcurveto
+         if (sp < 6) return STBTT__CSERR("rcurveline stack");
+         for (; i + 5 < sp; i += 6)
+            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         break;
+
+      case 0x18: // rcurveline
+         if (sp < 8) return STBTT__CSERR("rcurveline stack");
+         for (; i + 5 < sp - 2; i += 6)
+            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         if (i + 1 >= sp) return STBTT__CSERR("rcurveline stack");
+         stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         break;
+
+      case 0x19: // rlinecurve
+         if (sp < 8) return STBTT__CSERR("rlinecurve stack");
+         for (; i + 1 < sp - 6; i += 2)
+            stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         if (i + 5 >= sp) return STBTT__CSERR("rlinecurve stack");
+         stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         break;
+
+      case 0x1A: // vvcurveto
+      case 0x1B: // hhcurveto
+         if (sp < 4) return STBTT__CSERR("(vv|hh)curveto stack");
+         f = 0.0;
+         if (sp & 1) { f = s[i]; i++; }
+         for (; i + 3 < sp; i += 4) {
+            if (b0 == 0x1B)
+               stbtt__csctx_rccurve_to(c, s[i], f, s[i+1], s[i+2], s[i+3], 0.0);
+            else
+               stbtt__csctx_rccurve_to(c, f, s[i], s[i+1], s[i+2], 0.0, s[i+3]);
+            f = 0.0;
+         }
+         break;
+
+      case 0x0A: // callsubr
+         if (!has_subrs) {
+            if (info->fdselect.size)
+               subrs = stbtt__cid_get_glyph_subrs(info, glyph_index);
+            has_subrs = 1;
+         }
+         // fallthrough
+      case 0x1D: // callgsubr
+         if (sp < 1) return STBTT__CSERR("call(g|)subr stack");
+         v = (int) s[--sp];
+         if (subr_stack_height >= 10) return STBTT__CSERR("recursion limit");
+         subr_stack[subr_stack_height++] = b;
+         b = stbtt__get_subr(b0 == 0x0A ? subrs : info->gsubrs, v);
+         if (b.size == 0) return STBTT__CSERR("subr not found");
+         b.cursor = 0;
+         clear_stack = 0;
+         break;
+
+      case 0x0B: // return
+         if (subr_stack_height <= 0) return STBTT__CSERR("return outside subr");
+         b = subr_stack[--subr_stack_height];
+         clear_stack = 0;
+         break;
+
+      case 0x0E: // endchar
+         stbtt__csctx_close_shape(c);
+         return 1;
+
+      case 0x0C: { // two-byte escape
+         float dx1, dx2, dx3, dx4, dx5, dx6, dy1, dy2, dy3, dy4, dy5, dy6;
+         float dx, dy;
+         int b1 = stbtt__buf_get8(&b);
+         switch (b1) {
+         // @TODO These "flex" implementations ignore the flex-depth and resolution,
+         // and always draw beziers.
+         case 0x22: // hflex
+            if (sp < 7) return STBTT__CSERR("hflex stack");
+            dx1 = s[0];
+            dx2 = s[1];
+            dy2 = s[2];
+            dx3 = s[3];
+            dx4 = s[4];
+            dx5 = s[5];
+            dx6 = s[6];
+            stbtt__csctx_rccurve_to(c, dx1, 0, dx2, dy2, dx3, 0);
+            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, -dy2, dx6, 0);
+            break;
+
+         case 0x23: // flex
+            if (sp < 13) return STBTT__CSERR("flex stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dy3 = s[5];
+            dx4 = s[6];
+            dy4 = s[7];
+            dx5 = s[8];
+            dy5 = s[9];
+            dx6 = s[10];
+            dy6 = s[11];
+            //fd is s[12]
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
+            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
+            break;
+
+         case 0x24: // hflex1
+            if (sp < 9) return STBTT__CSERR("hflex1 stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dx4 = s[5];
+            dx5 = s[6];
+            dy5 = s[7];
+            dx6 = s[8];
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, 0);
+            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, dy5, dx6, -(dy1+dy2+dy5));
+            break;
+
+         case 0x25: // flex1
+            if (sp < 11) return STBTT__CSERR("flex1 stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dy3 = s[5];
+            dx4 = s[6];
+            dy4 = s[7];
+            dx5 = s[8];
+            dy5 = s[9];
+            dx6 = dy6 = s[10];
+            dx = dx1+dx2+dx3+dx4+dx5;
+            dy = dy1+dy2+dy3+dy4+dy5;
+            if (STBTT_fabs(dx) > STBTT_fabs(dy))
+               dy6 = -dy;
+            else
+               dx6 = -dx;
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
+            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
+            break;
+
+         default:
+            return STBTT__CSERR("unimplemented");
+         }
+      } break;
+
+      default:
+         if (b0 != 255 && b0 != 28 && (b0 < 32 || b0 > 254))
+            return STBTT__CSERR("reserved operator");
+
+         // push immediate
+         if (b0 == 255) {
+            f = (float)stbtt__buf_get32(&b) / 0x10000;
+         } else {
+            stbtt__buf_skip(&b, -1);
+            f = (float)(stbtt_int16)stbtt__cff_int(&b);
+         }
+         if (sp >= 48) return STBTT__CSERR("push stack overflow");
+         s[sp++] = f;
+         clear_stack = 0;
+         break;
+      }
+      if (clear_stack) sp = 0;
+   }
+   return STBTT__CSERR("no endchar");
+
+#undef STBTT__CSERR
+}
+
+static int stbtt__GetGlyphShapeT2(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   // runs the charstring twice, once to count and once to output (to avoid realloc)
+   stbtt__csctx count_ctx = STBTT__CSCTX_INIT(1);
+   stbtt__csctx output_ctx = STBTT__CSCTX_INIT(0);
+   if (stbtt__run_charstring(info, glyph_index, &count_ctx)) {
+      *pvertices = (stbtt_vertex*)STBTT_malloc(count_ctx.num_vertices*sizeof(stbtt_vertex), info->userdata);
+      output_ctx.pvertices = *pvertices;
+      if (stbtt__run_charstring(info, glyph_index, &output_ctx)) {
+         STBTT_assert(output_ctx.num_vertices == count_ctx.num_vertices);
+         return output_ctx.num_vertices;
+      }
+   }
+   *pvertices = NULL;
+   return 0;
+}
+
+static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
+{
+   stbtt__csctx c = STBTT__CSCTX_INIT(1);
+   int r = stbtt__run_charstring(info, glyph_index, &c);
+   if (x0) {
+      *x0 = r ? c.min_x : 0;
+      *y0 = r ? c.min_y : 0;
+      *x1 = r ? c.max_x : 0;
+      *y1 = r ? c.max_y : 0;
+   }
+   return r ? c.num_vertices : 0;
+}
+
+STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   if (!info->cff.size)
+      return stbtt__GetGlyphShapeTT(info, glyph_index, pvertices);
+   else
+      return stbtt__GetGlyphShapeT2(info, glyph_index, pvertices);
+}
+
 STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing)
 {
    stbtt_uint16 numOfLongHorMetrics = ttUSHORT(info->data+info->hhea + 34);
@@ -2333,6 +2983,48 @@ static int stbtt__tesselate_curve(stbtt__point *points, int *num_points, float x
    return 1;
 }
 
+static void stbtt__tesselate_cubic(stbtt__point *points, int *num_points, float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3, float objspace_flatness_squared, int n)
+{
+   // @TODO this "flatness" calculation is just made-up nonsense that seems to work well enough
+   float dx0 = x1-x0;
+   float dy0 = y1-y0;
+   float dx1 = x2-x1;
+   float dy1 = y2-y1;
+   float dx2 = x3-x2;
+   float dy2 = y3-y2;
+   float dx = x3-x0;
+   float dy = y3-y0;
+   float longlen = (float) (STBTT_sqrt(dx0*dx0+dy0*dy0)+STBTT_sqrt(dx1*dx1+dy1*dy1)+STBTT_sqrt(dx2*dx2+dy2*dy2));
+   float shortlen = (float) STBTT_sqrt(dx*dx+dy*dy);
+   float flatness_squared = longlen*longlen-shortlen*shortlen;
+
+   if (n > 16) // 65536 segments on one curve better be enough!
+      return;
+
+   if (flatness_squared > objspace_flatness_squared) {
+      float x01 = (x0+x1)/2;
+      float y01 = (y0+y1)/2;
+      float x12 = (x1+x2)/2;
+      float y12 = (y1+y2)/2;
+      float x23 = (x2+x3)/2;
+      float y23 = (y2+y3)/2;
+
+      float xa = (x01+x12)/2;
+      float ya = (y01+y12)/2;
+      float xb = (x12+x23)/2;
+      float yb = (y12+y23)/2;
+
+      float mx = (xa+xb)/2;
+      float my = (ya+yb)/2;
+
+      stbtt__tesselate_cubic(points, num_points, x0,y0, x01,y01, xa,ya, mx,my, objspace_flatness_squared,n+1);
+      stbtt__tesselate_cubic(points, num_points, mx,my, xb,yb, x23,y23, x3,y3, objspace_flatness_squared,n+1);
+   } else {
+      stbtt__add_point(points, *num_points,x3,y3);
+      *num_points = *num_points+1;
+   }
+}
+
 // returns number of contours
 static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts, float objspace_flatness, int **contour_lengths, int *num_contours, void *userdata)
 {
@@ -2389,6 +3081,14 @@ static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts,
                                         objspace_flatness_squared, 0);
                x = vertices[i].x, y = vertices[i].y;
                break;
+            case STBTT_vcubic:
+               stbtt__tesselate_cubic(points, &num_points, x,y,
+                                        vertices[i].cx, vertices[i].cy,
+                                        vertices[i].cx1, vertices[i].cy1,
+                                        vertices[i].x,  vertices[i].y,
+                                        objspace_flatness_squared, 0);
+               x = vertices[i].x, y = vertices[i].y;
+               break;
          }
       }
       (*contour_lengths)[n] = num_points - start;