For the curious, the code in question was a duff's device style memset() implementation that writes 64-bit aligned data as doubles (on PPC this is achievable):
[size=small]
void memset64(void* p, int c, size_t len)
{
/* convert c to 32-bit repeated value */
c &= 0x000000FF;
c |= c<<8;
c |= c<<16;
/*
Don't bother going into the duff loop unless we have
at least 32 bytes to set
*/
if (len>32)
{
union {
float64 f;
uint32 u[2];
} data64;
/*
Our code only wants to write 64-bit data to 64-bit
aligned addresses. If we aren't on such a boundary,
bytecopy up to the next one.
*/
uint32 bytesBefore = 8-(((uint32)p)&7UL);
if (bytesBefore<8) {
/* seems vbcc doesn't like *((uint8*)p)++ = x; */
register uint8* pC = (uint8*)p;
bytesBefore = 1 + (bytesBefore & 7UL);
while (--bytesBefore) {
*pC++ = c;
}
p = pC;
}
/*
Now we make a 64-bit value that we can copy to a
register (a float register on PPC) and write out
to the aligned area in our duff loop.
*/
data64.u[1] = data64.u[0] = c;
{
/* seems vbcc doesn't like *((float64*)p)++ = x; */
register float64* pF = (float64*)p;
register float64 f = data64.f;
register uint32 b = (len+127)>>7; /* number of blocks */
switch((len>>3)&15UL) {
case 0: do { *pF++ = f;
case 15: *pF++ = f;
case 14: *pF++ = f;
case 13: *pF++ = f;
case 12: *pF++ = f;
case 11: *pF++ = f;
case 10: *pF++ = f;
case 9: *pF++ = f;
case 8: *pF++ = f;
case 7: *pF++ = f;
case 6: *pF++ = f;
case 5: *pF++ = f;
case 4: *pF++ = f;
case 3: *pF++ = f;
case 2: *pF++ = f;
case 1: *pF++ = f;
} while (--b);
}
p = pF;
}
/* adjust length for any trailing bytes after the aligned area */
len &= 7UL;
}
/* handle any trailing bytes (or the entire thing if len was < 32) */
{
/* seems vbcc doesn't like *((uint8*)p)++ = x; */
register uint8* pC = (uint8*)p;
len++;
while(--len)
{
*pC++ = c;
}
}
}
[/size]