int
main(void)
{
//
// Setup the system clock to run at 50 Mhz from PLL with crystal reference
//
ROM_SysCtlClockSet(SYSCTL_SYSDIV_4|SYSCTL_USE_PLL|SYSCTL_XTAL_16MHZ|
SYSCTL_OSC_MAIN);
//
// Enable and configure the GPIO port for the LED operation.
//
ROM_SysCtlPeripheralEnable(SYSCTL_PERIPH_GPIOF);
ROM_GPIOPinTypeGPIOOutput(GPIO_PORTF_BASE, RED_LED|BLUE_LED|GREEN_LED);
//
// Loop Forever
//
while(1)
{
//
// Turn on the LED
//
ROM_GPIOPinWrite(GPIO_PORTF_BASE, RED_LED|BLUE_LED|GREEN_LED, RED_LED);
//
// Delay for a bit
//
ROM_SysCtlDelay(5000000);
//
// Turn on the LED
//
ROM_GPIOPinWrite(GPIO_PORTF_BASE, RED_LED|BLUE_LED|GREEN_LED, BLUE_LED);
//
// Delay for a bit
//
ROM_SysCtlDelay(5000000);
ROM_GPIOPinWrite(GPIO_PORTF_BASE, RED_LED|BLUE_LED|GREEN_LED, GREEN_LED);
//
// Delay for a bit
//
ROM_SysCtlDelay(5000000);
}
}
The actual answer here is likely to do with the fact that ROM is always accessed in a single cycle whereas flash may have different numbers of wait states depending upon the system clock frequency in use and the part you are running the code on. To further complicate matters, different parts have different instruction prefetching capabilities. Adding this all up, you will find that the nominal 3 cycle loop inside SysCtlDelay() may not take clock 3 cycles to run when you run it from flash.
We should update the function documentation to make this clear but, if you really want 3 cycle delay loops, you should use ROM_SysCtlDelay() instead (as you found out). Note that even this will be affected by interrupt latencies so the accuracy will be determined by the longest ISR in your system.