#include "unused.h"
#define MEMBLK 8192
+/* DEBUG 2 prints information obtained via mincore(2) */
+// #define DEBUG 2
+/* do not calculate exact madvise hints but assume 1 page for headers and
+ * set DONTNEED for the rest, which is assumed to be data */
+//#define ONE_PAGE 1
+/* Avoid calling madvise on areas that were already hinted. May be benefical if
+ * your syscalls are very slow */
+#define CHECK_MADVISE_OVERLAPS 1
+
#ifdef HAVE_MMAP
+/* the cast to void* is there to avoid this warning seen on ia64 with certain
+ versions of gcc: 'cast increases required alignment of target type'
+*/
#define __rrd_read(dst, dst_t, cnt) \
- (dst) = (dst_t*) (data + offset); \
+ (dst) = (dst_t*)(void*) (data + offset); \
offset += sizeof(dst_t) * (cnt)
#else
#define __rrd_read(dst, dst_t, cnt) \
offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
#endif
-/* open a database file, return its header and an open filehandle */
-/* positioned to the first cdp in the first rra */
+/* next page-aligned (i.e. page-align up) */
+#ifndef PAGE_ALIGN
+#define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
+#endif
+/* previous page-aligned (i.e. page-align down) */
+#ifndef PAGE_ALIGN_DOWN
+#define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
+#endif
+
+#ifdef HAVE_MMAP
+/* vector of last madvise hint */
+typedef struct _madvise_vec_t {
+ void *start;
+ ssize_t length;
+} _madvise_vec_t;
+_madvise_vec_t _madv_vec = { NULL, 0 };
+#endif
+
+#if defined CHECK_MADVISE_OVERLAPS
+#define _madvise(_start, _off, _hint) \
+ if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
+ _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
+ madvise((_start), (_off), (_hint)); \
+ }
+#else
+#define _madvise(_start, _off, _hint) \
+ madvise((_start), (_off), (_hint))
+#endif
+
+/* Open a database file, return its header and an open filehandle,
+ * positioned to the first cdp in the first rra.
+ * In the error path of rrd_open, only rrd_free(&rrd) has to be called
+ * before returning an error. Do not call rrd_close upon failure of rrd_open.
+ */
rrd_file_t *rrd_open(
const char *const file_name,
int version;
#ifdef HAVE_MMAP
+ ssize_t _page_size = sysconf(_SC_PAGESIZE);
int mm_prot = PROT_READ, mm_flags = 0;
char *data;
#endif
off_t offset = 0;
struct stat statb;
- rrd_file_t *rrd_file = malloc(sizeof(rrd_file_t));
+ rrd_file_t *rrd_file = NULL;
+ off_t newfile_size = 0;
+ if (rdwr & RRD_CREAT) {
+ newfile_size = (off_t) rrd->stat_head->float_cookie;
+ free(rrd->stat_head);
+ }
+ rrd_init(rrd);
+ rrd_file = malloc(sizeof(rrd_file_t));
if (rrd_file == NULL) {
rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
return NULL;
}
memset(rrd_file, 0, sizeof(rrd_file_t));
- rrd_init(rrd);
+
#ifdef DEBUG
if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
(RRD_READONLY | RRD_READWRITE)) {
#ifdef HAVE_MMAP
mm_flags = MAP_PRIVATE;
# ifdef MAP_NORESERVE
- mm_flags |= MAP_NORESERVE;
+ mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
# endif
- mm_flags |= MAP_PRIVATE;
#endif
} else {
if (rdwr & RRD_READWRITE) {
}
if (rdwr & RRD_READAHEAD) {
#ifdef MAP_POPULATE
- mm_flags |= MAP_POPULATE;
+ mm_flags |= MAP_POPULATE; /* populate ptes and data */
#endif
-#if defined MAP_NONBLOCK && !defined USE_DIRECT_IO
- mm_flags |= MAP_NONBLOCK; /* just populage ptes */
+#if defined MAP_NONBLOCK
+ mm_flags |= MAP_NONBLOCK; /* just populate ptes */
#endif
- } else {
#ifdef USE_DIRECT_IO
+ } else {
flags |= O_DIRECT;
#endif
-#if 0 //def O_NONBLOCK
- flags |= O_NONBLOCK;
-#endif
}
+#ifdef O_NONBLOCK
+ flags |= O_NONBLOCK;
+#endif
if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
- return NULL;
+ goto out_free;
}
/* Better try to avoid seeks as much as possible. stat may be heavy but
* many concurrent seeks are even worse. */
- if ((fstat(rrd_file->fd, &statb)) < 0) {
+ if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
goto out_close;
}
- rrd_file->file_len = statb.st_size;
-
+ if (newfile_size == 0) {
+ rrd_file->file_len = statb.st_size;
+ } else {
+ rrd_file->file_len = newfile_size;
+ lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
+ write(rrd_file->fd, "\0", 1); /* poke */
+ lseek(rrd_file->fd, 0, SEEK_SET);
+ }
#ifdef HAVE_POSIX_FADVISE
/* In general we need no read-ahead when dealing with rrd_files.
When we stop reading, it is highly unlikely that we start up again.
/* lets see if the first read worked */
if (data == MAP_FAILED) {
- rrd_set_error("error mmaping file '%s': %s", file_name,
+ rrd_set_error("mmaping file '%s': %s", file_name,
rrd_strerror(errno));
goto out_close;
}
rrd_file->file_start = data;
-#else
-#endif
-#ifdef USE_MADVISE
- if (rdwr & RRD_COPY) { /*XXX: currently not used! */
- /* We will read everything in a moment (copying) */
- madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
+ if (rdwr & RRD_CREAT) {
+ memset(data, DNAN, newfile_size - 1);
goto out_done;
}
- /* We do not need to read anything in for the moment */
- madvise(data, rrd_file->file_len, MADV_DONTNEED);
#endif
-
+ if (rdwr & RRD_CREAT)
+ goto out_done;
#ifdef USE_MADVISE
- /* the stat_head will be needed soonish, so hint accordingly */
- madvise(data + offset, sizeof(stat_head_t), MADV_WILLNEED);
+ if (rdwr & RRD_COPY) {
+ /* We will read everything in a moment (copying) */
+ _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
+ } else {
+# ifndef ONE_PAGE
+ /* We do not need to read anything in for the moment */
+ _madvise(data, rrd_file->file_len, MADV_DONTNEED);
+ /* the stat_head will be needed soonish, so hint accordingly */
+ _madvise(data + PAGE_ALIGN_DOWN(offset),
+ PAGE_ALIGN(sizeof(stat_head_t)),
+ MADV_WILLNEED | MADV_RANDOM);
+
+# else
+/* alternatively: keep 1 page worth of data, likely headers,
+ * don't need the rest. */
+ _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
+ _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
+ ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
+# endif
+ }
#endif
__rrd_read(rrd->stat_head, stat_head_t,
}
if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
- rrd_set_error("This RRD was created on other architecture");
+ rrd_set_error("This RRD was created on another architecture");
goto out_nullify_head;
}
rrd->stat_head->version);
goto out_nullify_head;
}
-#ifdef USE_MADVISE
+#if defined USE_MADVISE && !defined ONE_PAGE
/* the ds_def will be needed soonish, so hint accordingly */
- madvise(data + offset, sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
- MADV_WILLNEED);
+ _madvise(data + PAGE_ALIGN_DOWN(offset),
+ PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
+ MADV_WILLNEED);
#endif
__rrd_read(rrd->ds_def, ds_def_t,
rrd->stat_head->ds_cnt);
-#ifdef USE_MADVISE
+#if defined USE_MADVISE && !defined ONE_PAGE
/* the rra_def will be needed soonish, so hint accordingly */
- madvise(data + offset, sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
- MADV_WILLNEED);
+ _madvise(data + PAGE_ALIGN_DOWN(offset),
+ PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
+ MADV_WILLNEED);
#endif
__rrd_read(rrd->rra_def, rra_def_t,
rrd->stat_head->rra_cnt);
#endif
rrd->live_head->last_up_usec = 0;
} else {
-#ifdef USE_MADVISE
+#if defined USE_MADVISE && !defined ONE_PAGE
/* the live_head will be needed soonish, so hint accordingly */
- madvise(data + offset, sizeof(live_head_t), MADV_WILLNEED);
+ _madvise(data + PAGE_ALIGN_DOWN(offset),
+ PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
#endif
__rrd_read(rrd->live_head, live_head_t,
1);
__rrd_read(rrd->rra_ptr, rra_ptr_t,
rrd->stat_head->rra_cnt);
-#ifdef USE_MADVISE
- out_done:
-#endif
rrd_file->header_len = offset;
rrd_file->pos = offset;
-/* we could close(rrd_file->fd); here, the mapping is still valid anyway */
+ out_done:
return (rrd_file);
out_nullify_head:
rrd->stat_head = NULL;
out_close:
close(rrd_file->fd);
+ out_free:
+ free(rrd_file);
return NULL;
}
+
/* Close a reference to an rrd_file. */
+
int rrd_close(
rrd_file_t *rrd_file)
{
int ret;
+#if defined HAVE_MMAP || defined DEBUG
+ ssize_t _page_size = sysconf(_SC_PAGESIZE);
+#endif
+#if defined DEBUG && DEBUG > 1
+ /* pretty print blocks in core */
+ off_t off;
+ unsigned char *vec;
+
+ off = rrd_file->file_len +
+ ((rrd_file->file_len + _page_size - 1) / _page_size);
+ vec = malloc(off);
+ if (vec != NULL) {
+ memset(vec, 0, off);
+ if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
+ int prev;
+ unsigned is_in = 0, was_in = 0;
+
+ for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
+ is_in = vec[off] & 1; /* if lsb set then is core resident */
+ if (off == 0)
+ was_in = is_in;
+ if (was_in != is_in) {
+ fprintf(stderr, "%sin core: %p len %ld\n",
+ was_in ? "" : "not ", vec + prev, off - prev);
+ was_in = is_in;
+ prev = off;
+ }
+ }
+ fprintf(stderr,
+ "%sin core: %p len %ld\n",
+ was_in ? "" : "not ", vec + prev, off - prev);
+ } else
+ fprintf(stderr, "mincore: %s", rrd_strerror(errno));
+ }
+#endif /* DEBUG */
+
+#ifdef USE_MADVISE
+# ifdef ONE_PAGE
+ /* Keep headers around, round up to next page boundary. */
+ ret =
+ PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
+ if (rrd_file->file_len > ret)
+ _madvise(rrd_file->file_start + ret,
+ rrd_file->file_len - ret, MADV_DONTNEED);
+# else
+ /* ignoring errors from RRDs that are smaller then the file_len+rounding */
+ _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
+ rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
+ MADV_DONTNEED);
+# endif
+#endif
#ifdef HAVE_MMAP
ret = munmap(rrd_file->file_start, rrd_file->file_len);
if (ret != 0)
return ret;
}
+
/* Set position of rrd_file. */
+
off_t rrd_seek(
rrd_file_t *rrd_file,
off_t off,
return ret == -1; //XXX: or just ret to mimic lseek
}
+
/* Get current position in rrd_file. */
+
inline off_t rrd_tell(
rrd_file_t *rrd_file)
{
return rrd_file->pos;
}
+
/* read count bytes into buffer buf, starting at rrd_file->pos.
- * Returns the number of bytes read. */
-ssize_t rrd_read(
+ * Returns the number of bytes read or <0 on error. */
+
+inline ssize_t rrd_read(
rrd_file_t *rrd_file,
void *buf,
size_t count)
{
#ifdef HAVE_MMAP
- char *pos = rrd_file->file_start + rrd_file->pos;
+ size_t _cnt = count;
+ ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
+
+ if (_surplus > 0) { /* short read */
+ _cnt -= _surplus;
+ }
+ if (_cnt == 0)
+ return 0; /* EOF */
+ buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
- buf = memmove(buf, pos, count);
- rrd_file->pos += count; /* mimmic read() semantics */
- return count;
+ rrd_file->pos += _cnt; /* mimmic read() semantics */
+ return _cnt;
#else
ssize_t ret;
ret = read(rrd_file->fd, buf, count);
- //XXX: eventually add generic rrd_set_error(""); here
- rrd_file->pos += count; /* mimmic read() semantics */
+ if (ret > 0)
+ rrd_file->pos += ret; /* mimmic read() semantics */
return ret;
#endif
}
+
/* write count bytes from buffer buf to the current position
* rrd_file->pos of rrd_file->fd.
* Returns the number of bytes written. */
-ssize_t rrd_write(
+
+inline ssize_t rrd_write(
rrd_file_t *rrd_file,
const void *buf,
size_t count)
{
#ifdef HAVE_MMAP
- memmove(rrd_file->file_start + rrd_file->pos, buf, count);
+ memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
+ rrd_file->pos += count;
return count; /* mimmic write() semantics */
#else
- return write(rrd_file->fd, buf, count);
+ ssize_t _sz = write(rrd_file->fd, buf, count);
+
+ if (_sz > 0)
+ rrd_file->pos += _sz;
+ return _sz;
#endif
}
+
/* flush all data pending to be written to FD. */
+
inline void rrd_flush(
rrd_file_t *rrd_file)
{
}
}
+
+/* Initialize RRD header. */
+
void rrd_init(
rrd_t *rrd)
{
rrd->rrd_value = NULL;
}
-void rrd_free(
+
+/* free RRD header data. */
+
+#ifdef HAVE_MMAP
+inline void rrd_free(
rrd_t UNUSED(*rrd))
{
-#ifndef HAVE_MMAP
- if (atoi(rrd->stat_head->version) < 3)
- free(rrd->live_head);
+}
+#else
+void rrd_free(
+ rrd_t *rrd)
+{
+ free(rrd->live_head);
free(rrd->stat_head);
free(rrd->ds_def);
free(rrd->rra_def);
free(rrd->pdp_prep);
free(rrd->cdp_prep);
free(rrd->rrd_value);
-//XXX: ? rrd_init(rrd);
-#endif
}
+#endif
+
/* routine used by external libraries to free memory allocated by
* rrd library */
+
void rrd_freemem(
void *mem)
{
free(mem);
}
-int readfile(
+
+/* XXX: FIXME: missing documentation. */
+/*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
+
+int /*_rrd_*/ readfile(
const char *file_name,
char **buffer,
int skipfirst)