mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
(unsigned long long)block, nr, flags, inode);
+ BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+ (!inode || !(flags & OCFS2_BH_CACHED)));
+
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
status = -EINVAL;
mlog_errno(status);
bh = bhs[i];
ignore_cache = 0;
+ /* There are three read-ahead cases here which we need to
+ * be concerned with. All three assume a buffer has
+ * previously been submitted with OCFS2_BH_READAHEAD
+ * and it hasn't yet completed I/O.
+ *
+ * 1) The current request is sync to disk. This rarely
+ * happens these days, and never when performance
+ * matters - the code can just wait on the buffer
+ * lock and re-submit.
+ *
+ * 2) The current request is cached, but not
+ * readahead. ocfs2_buffer_uptodate() will return
+ * false anyway, so we'll wind up waiting on the
+ * buffer lock to do I/O. We re-check the request
+ * with after getting the lock to avoid a re-submit.
+ *
+ * 3) The current request is readahead (and so must
+ * also be a caching one). We short circuit if the
+ * buffer is locked (under I/O) and if it's in the
+ * uptodate cache. The re-check from #2 catches the
+ * case that the previous read-ahead completes just
+ * before our is-it-in-flight check.
+ */
+
if (flags & OCFS2_BH_CACHED &&
!ocfs2_buffer_uptodate(inode, bh)) {
mlog(ML_UPTODATE,
continue;
}
+ /* A read-ahead request was made - if the
+ * buffer is already under read-ahead from a
+ * previously submitted request than we are
+ * done here. */
+ if ((flags & OCFS2_BH_READAHEAD)
+ && ocfs2_buffer_read_ahead(inode, bh))
+ continue;
+
lock_buffer(bh);
if (buffer_jbd(bh)) {
#ifdef CATCH_BH_JBD_RACES
continue;
#endif
}
+
+ /* Re-check ocfs2_buffer_uptodate() as a
+ * previously read-ahead buffer may have
+ * completed I/O while we were waiting for the
+ * buffer lock. */
+ if ((flags & OCFS2_BH_CACHED)
+ && !(flags & OCFS2_BH_READAHEAD)
+ && ocfs2_buffer_uptodate(inode, bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
- if (flags & OCFS2_BH_READAHEAD)
- submit_bh(READA, bh);
- else
- submit_bh(READ, bh);
+ submit_bh(READ, bh);
continue;
}
}
for (i = (nr - 1); i >= 0; i--) {
bh = bhs[i];
- /* We know this can't have changed as we hold the
- * inode sem. Avoid doing any work on the bh if the
- * journal has it. */
- if (!buffer_jbd(bh))
- wait_on_buffer(bh);
-
- if (!buffer_uptodate(bh)) {
- /* Status won't be cleared from here on out,
- * so we can safely record this and loop back
- * to cleanup the other buffers. Don't need to
- * remove the clustered uptodate information
- * for this bh as it's not marked locally
- * uptodate. */
- status = -EIO;
- brelse(bh);
- bhs[i] = NULL;
- continue;
+ if (!(flags & OCFS2_BH_READAHEAD)) {
+ /* We know this can't have changed as we hold the
+ * inode sem. Avoid doing any work on the bh if the
+ * journal has it. */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
+
+ if (!buffer_uptodate(bh)) {
+ /* Status won't be cleared from here on out,
+ * so we can safely record this and loop back
+ * to cleanup the other buffers. Don't need to
+ * remove the clustered uptodate information
+ * for this bh as it's not marked locally
+ * uptodate. */
+ status = -EIO;
+ brelse(bh);
+ bhs[i] = NULL;
+ continue;
+ }
}
+ /* Always set the buffer in the cache, even if it was
+ * a forced read, or read-ahead which hasn't yet
+ * completed. */
if (inode)
ocfs2_set_buffer_uptodate(inode, bh);
}
if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
- mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n",
+ mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
(unsigned long long)block, nr,
- (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+ (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
bail:
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
int error = 0;
- unsigned long offset, blk;
- int i, num, stored;
+ unsigned long offset, blk, last_ra_blk = 0;
+ int i, stored;
struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de;
int err;
struct inode *inode = filp->f_dentry->d_inode;
struct super_block * sb = inode->i_sb;
- int have_disk_lock = 0;
+ unsigned int ra_sectors = 16;
mlog_entry("dirino=%llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
mlog_errno(error);
/* we haven't got any yet, so propagate the error. */
stored = error;
- goto bail;
+ goto bail_nolock;
}
- have_disk_lock = 1;
offset = filp->f_pos & (sb->s_blocksize - 1);
continue;
}
- /*
- * Do the readahead (8k)
- */
- if (!offset) {
- for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
+ /* The idea here is to begin with 8k read-ahead and to stay
+ * 4k ahead of our current position.
+ *
+ * TODO: Use the pagecache for this. We just need to
+ * make sure it's cluster-safe... */
+ if (!last_ra_blk
+ || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
+ for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
tmp = ocfs2_bread(inode, ++blk, &err, 1);
if (tmp)
brelse(tmp);
}
+ last_ra_blk = blk;
+ ra_sectors = 8;
}
revalidate:
stored = 0;
bail:
- if (have_disk_lock)
- ocfs2_meta_unlock(inode, 0);
+ ocfs2_meta_unlock(inode, 0);
+bail_nolock:
mlog_exit(stored);
return stored;
}
/* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. */
+ * the block is stored in our inode metadata cache.
+ *
+ * This can be called under lock_buffer()
+ */
int ocfs2_buffer_uptodate(struct inode *inode,
struct buffer_head *bh)
{
return ocfs2_buffer_cached(OCFS2_I(inode), bh);
}
+/*
+ * Determine whether a buffer is currently out on a read-ahead request.
+ * ip_io_sem should be held to serialize submitters with the logic here.
+ */
+int ocfs2_buffer_read_ahead(struct inode *inode,
+ struct buffer_head *bh)
+{
+ return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+}
+
/* Requires ip_lock */
static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
sector_t block)
*
* Note that this function may actually fail to insert the block if
* memory cannot be allocated. This is not fatal however (but may
- * result in a performance penalty) */
+ * result in a performance penalty)
+ *
+ * Readahead buffers can be passed in here before the I/O request is
+ * completed.
+ */
void ocfs2_set_buffer_uptodate(struct inode *inode,
struct buffer_head *bh)
{