This began as an issue caching location publishes on TrackerOne’s when the device was offline, to be published when back in cellular service. Details of that exact use case are in the thread here.
The cached publishes are being written to a file as the entire publish JSON, followed by two bytes indicating the length of the publish. When back in cell service, the cache file is read starting at the end. The JSON is read from the file and uploaded, and the file is then truncated to remove the uploaded publish. This setup has mostly worked for us on TrackerOne’s going out of cellular service for 12 hours + a day, publishing locations every minute, but in some cases has produced SOS Assertion Failures. In these scenarios, the TrackOnes affected have had to be flashed over USB with firmware directing the device to delete the cache file in setup()
.
I have been able to reproduce this failure using the minimal main.cpp
file below, which registers a cloud function emulating the writing and reading of publishes to and from flash. The number of publishes and the length of each publish is sent, and the tracker then writes this many publishes of random data to flash, and reads it back, truncating the file after each read. The file consistently becomes corrupted during truncation at certain file sizes, which is likely caused by a bug demonstrated in the LittleFS Github here, which was eventually patched 8 days ago. Same workflow reading a given length from a file and then truncating.
I am wondering if merging this patch into the implementation of LittleFS in deviceOS would fix the file corruption issues demonstrated by the example below.
Tests:
# Publishes | Publish Length | Result | Truncation Failure File Size | Total File Size Written |
---|---|---|---|---|
100 | 200 | Success | 20400 | |
1440 | 221 | Consistent assertion failure while truncating file after point 1322, file size 294583. | 294583 | 318682 |
1440 | 139 | Consistent assertion failure while truncating file after point 698, file size 98277. | 98277 | 200438 |
100 | 300 | Success | 30600 | |
100 | 1000 | Success | 102000 | |
1200 | 1000 | Consistent assertion failure while truncating file after point 716, file size 716430. | 716430 | 1202000 |
main.cpp:
/*
* Bill DeVoe 12/2/2021
* Testing offline caching functionality by rapidly reading/writing
* example publish JSON to FS to emulate truncation bug listed here:
* https://github.com/littlefs-project/littlefs/issues/268
*/
#include "Particle.h"
#include <stdint.h>
#include <fcntl.h>
#include <sys/stat.h>
SYSTEM_THREAD(ENABLED);
SYSTEM_MODE(SEMI_AUTOMATIC);
SerialLogHandler logHandler(115200, LOG_LEVEL_TRACE, {
{ "app.gps.nmea", LOG_LEVEL_INFO },
{ "app.gps.ubx", LOG_LEVEL_INFO },
{ "ncp.at", LOG_LEVEL_INFO },
{ "net.ppp.client", LOG_LEVEL_INFO },
});
#define TEST_FILE "/test_file"
// Buffer to read/write dummy publishes
char rw_buffer[particle::protocol::MAX_EVENT_DATA_LENGTH];
// Cloud function to send the number of publishes and publish length to test, ie "100,300" is 100 publishes, 300 bytes each
int test_fs(String points_str) {
// Split by comma to get number of publishes, length of each publish
int comma = points_str.indexOf(',');
if (comma == -1) {
Log.warn("No comma found in received command.");
return -1;
}
// Parse integer from number of points sent to function
uint16_t points = static_cast<uint16_t>(strtoul(points_str.substring(0,comma), nullptr, 0));
uint16_t publish_size = static_cast<uint16_t>(strtoul(points_str.substring(comma+1), nullptr, 0));
// If the split string cannot be parsed to uint16, ie if someone sent "hi" or -10 or even 0,
// strtoul returns 0, so return -1
if (points == 0 || publish_size == 0) {
Log.warn("Invalid number of points or publish size to write received.");
return -1;
}
if (publish_size > particle::protocol::MAX_EVENT_DATA_LENGTH) {
publish_size = particle::protocol::MAX_EVENT_DATA_LENGTH;
}
// Else continue with write/read test
Log.info("Writing %i test points of %i length each to file...", points, publish_size);
uint16_t i = 0;
while (i < points) {
// Generate random text buffer of given size
for (int k = 0; k < publish_size - 1; k++) {
rw_buffer[k] = (unsigned char) (rand() % 255 + 1);
}
rw_buffer[publish_size-1] = '\0';
// Now write to file
int fd = open(TEST_FILE, O_WRONLY | O_CREAT | O_APPEND);
if(fd == -1) {
Log.error("File not open for writing");
System.enableReset();
return SYSTEM_ERROR_FILE;
}
struct stat buf;
int retval = fstat(fd, &buf);
if (retval == 0) {
uint16_t size = strlen(rw_buffer)+1;
retval = write(fd, rw_buffer, size);
Log.trace("Wrote point %i of %i. Previous file size: %ld. Wrote %d bytes to file", i+1, points, buf.st_size, retval);
if (write(fd, (void *)&size, 2) == 2) {
retval+=2;
} else {
retval = SYSTEM_ERROR_FILE;
}
} else {
retval = SYSTEM_ERROR_FILE;
}
close(fd);
// Increment points written
i++;
}
// All points written successfully
Log.info("All test points written to FS, beginning read/truncate test...");
// Keep repeating until file is empty
while (true) {
struct stat stat_buf;
// File is empty, break
if(!(stat(TEST_FILE, &stat_buf) == 0 && stat_buf.st_size > 2)) {
Log.info("All points successfully read from file.");
return 1;
}
int fd = open(TEST_FILE, O_RDWR);
if(fd == -1) {
Log.error("File not found for reading");
return SYSTEM_ERROR_FILE;
}
// Last two bytes of each record is the length of the publish
if (lseek(fd, -2, SEEK_END) < 0) goto read_error;
uint16_t len;
if (read(fd, (void *)&len, 2) == -1) goto read_error;
// Seek length of publish plus 2 bytes for len from end of file
if(lseek(fd, -2-len, SEEK_END) < 0) goto read_error;
// Then read in the length of the publish to get the JSON location publish string
if(read(fd, rw_buffer, len) < len) goto read_error;
if(fstat(fd, &stat_buf) == -1) goto read_error;
close(fd);
// Truncate the file to remove the last record just read
if(truncate(TEST_FILE, stat_buf.st_size - len - 2) == -1) goto read_error;
Log.info("Point %i of %i read success. New file size: %ld.", i+1, points, stat_buf.st_size);
i--;
continue;
read_error:
Log.error("Error accessing file");
close(fd);
return SYSTEM_ERROR_FILE;
}
}
bool reset_reason_sent = false;
void setup() {
// Enable system reset information
System.enableFeature(FEATURE_RESET_INFO);
// If reset due to panic (SOS Assertion Failure after flash corrupted) remove the corrupt file
if (System.resetReason() == RESET_REASON_PANIC) {
unlink(TEST_FILE);
}
// Register cloud function to test file read/write
Particle.function("test_fs", test_fs);
Particle.connect();
}
void loop() {
if (Particle.connected() && !reset_reason_sent) {
// Reset reason codes here: https://github.com/particle-iot/device-os/blob/0bc16d94772999c016aa4c70c1b330f53321f90f/services/inc/system_defs.h#L49
Particle.publish("reset_reason", (String)System.resetReason());
reset_reason_sent = true;
}
}```