glock.o \
glops.o \
inode.o \
- jdata.o \
lm.o \
log.o \
lops.o \
#include "bmap.h"
#include "glock.h"
#include "inode.h"
-#include "jdata.h"
#include "meta_io.h"
#include "page.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
+#include "dir.h"
/* This doesn't need to be that large as max 64 bit pointers in a 4k
* block is 512, so __u16 is fine for that. It saves stack space to
{
struct buffer_head *bh, *dibh;
uint64_t block = 0;
- int journaled = gfs2_is_jdata(ip);
+ int isdir = gfs2_is_dir(ip);
int error;
down_write(&ip->i_rw_mutex);
/* Get a free block, fill it with the stuffed data,
and write it out to disk */
- if (journaled) {
+ if (isdir) {
block = gfs2_alloc_meta(ip);
- error = gfs2_jdata_get_buffer(ip, block, 1, &bh);
+ error = gfs2_dir_get_buffer(ip, block, 1, &bh);
if (error)
goto out_brelse;
gfs2_buffer_copy_tail(bh,
if (ip->i_di.di_size > size)
size = ip->i_di.di_size;
- if (gfs2_is_jdata(ip)) {
+ if (gfs2_is_dir(ip)) {
arr = sdp->sd_jheightsize;
max = sdp->sd_max_jheight;
} else {
return;
if (height == ip->i_di.di_height - 1 &&
- !gfs2_is_jdata(ip))
+ !gfs2_is_dir(ip))
*block = gfs2_alloc_data(ip);
else
*block = gfs2_alloc_meta(ip);
if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
goto out;
- bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
+ bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
height = calc_tree_height(ip, (lblock + 1) * bsize);
if (ip->i_di.di_height < height) {
sm->sm_first = 0;
}
- metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip);
+ metadata = (height != ip->i_di.di_height - 1);
if (metadata)
revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
return error;
}
-static int truncator_journaled(struct gfs2_inode *ip, uint64_t size)
-{
- uint64_t lbn, dbn;
- uint32_t off;
- struct buffer_head *bh;
- int new = 0;
- int error;
-
- lbn = size;
- off = do_div(lbn, ip->i_sbd->sd_jbsize);
-
- error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
- if (error || !dbn)
- return error;
-
- error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh);
- if (error)
- return error;
-
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off);
-
- brelse(bh);
-
- return 0;
-}
-
static int trunc_start(struct gfs2_inode *ip, uint64_t size)
{
struct gfs2_sbd *sdp = ip->i_sbd;
error = 1;
} else {
- if (journaled) {
- uint64_t junk = size;
- /* we're just interested in the modulus */
- if (do_div(junk, sdp->sd_jbsize))
- error = truncator_journaled(ip, size);
- } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
+ if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
error = gfs2_block_truncate_page(ip->i_vnode->i_mapping);
if (!error) {
if (!size)
lblock = 0;
- else if (gfs2_is_jdata(ip)) {
- lblock = size - 1;
- do_div(lblock, ip->i_sbd->sd_jbsize);
- } else
+ else
lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
find_metapath(ip, lblock, &mp);
struct gfs2_sbd *sdp = ip->i_sbd;
unsigned int tmp;
- if (gfs2_is_jdata(ip)) {
+ if (gfs2_is_dir(ip)) {
*data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
*ind_blocks = 3 * (sdp->sd_max_jheight - 1);
} else {
return 0;
}
- if (gfs2_is_jdata(ip)) {
+ if (gfs2_is_dir(ip)) {
unsigned int bsize = sdp->sd_jbsize;
lblock = offset;
do_div(lblock, bsize);
uint32_t index, uint32_t len, uint64_t leaf_no,
void *data);
-static int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
- struct buffer_head **bhp)
+int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
+ struct buffer_head **bhp)
{
struct buffer_head *bh;
int error = 0;
int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
int *alloc_required);
+int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
+ struct buffer_head **bhp);
#endif /* __DIR_DOT_H__ */
return ip->i_di.di_flags & GFS2_DIF_JDATA;
}
+static inline int gfs2_is_dir(struct gfs2_inode *ip)
+{
+ return S_ISDIR(ip->i_di.di_mode);
+}
+
void gfs2_inode_attr_in(struct gfs2_inode *ip);
void gfs2_inode_attr_out(struct gfs2_inode *ip);
struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip);
if (err == 0) {
*ipp = gfs2_ip2v(ip);
+ gfs2_inode_put(ip);
if (*ipp == NULL)
err = -ENOMEM;
- gfs2_inode_put(ip);
}
return err;
}
+++ /dev/null
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <asm/semaphore.h>
-#include <asm/uaccess.h>
-
-#include "gfs2.h"
-#include "bmap.h"
-#include "inode.h"
-#include "jdata.h"
-#include "meta_io.h"
-#include "trans.h"
-
-int gfs2_internal_read(struct gfs2_inode *ip,
- struct file_ra_state *ra_state,
- char *buf, loff_t *pos, unsigned size)
-{
- return gfs2_jdata_read_mem(ip, buf, *pos, size);
-}
-
-int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
- struct buffer_head **bhp)
-{
- struct buffer_head *bh;
- int error = 0;
-
- if (new) {
- bh = gfs2_meta_new(ip->i_gl, block);
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
- gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
- } else {
- error = gfs2_meta_read(ip->i_gl, block,
- DIO_START | DIO_WAIT, &bh);
- if (error)
- return error;
- if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
- brelse(bh);
- return -EIO;
- }
- }
-
- *bhp = bh;
-
- return 0;
-}
-
-/**
- * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read()
- * @bh: The buffer to copy from, or NULL meaning zero the buffer
- * @buf: The buffer to copy/zero
- * @offset: The offset in the buffer to copy from
- * @size: The amount of data to copy/zero
- *
- * Returns: errno
- */
-
-int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset,
- unsigned int size)
-{
- if (bh)
- memcpy(*buf, bh->b_data + offset, size);
- else
- memset(*buf, 0, size);
- *buf += size;
- return 0;
-}
-
-/**
- * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read()
- * @bh: The buffer
- * @buf: The destination of the data
- * @offset: The offset into the buffer
- * @size: The amount of data to copy
- *
- * Returns: errno
- */
-
-int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset,
- unsigned int size)
-{
- int error;
-
- if (bh)
- error = copy_to_user(*buf, bh->b_data + offset, size);
- else
- error = clear_user(*buf, size);
-
- if (error)
- error = -EFAULT;
- else
- *buf += size;
-
- return error;
-}
-
-static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf,
- unsigned int offset, unsigned int size,
- read_copy_fn_t copy_fn)
-{
- struct buffer_head *dibh;
- int error;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (!error) {
- error = copy_fn(dibh, &buf,
- offset + sizeof(struct gfs2_dinode), size);
- brelse(dibh);
- }
-
- return (error) ? error : size;
-}
-
-/**
- * gfs2_jdata_read - Read a jdata file
- * @ip: The GFS2 Inode
- * @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
- * @size: Amount of data to transfer
- * @copy_fn: Function to actually perform the copy
- *
- * The @copy_fn only copies a maximum of a single block at once so
- * we are safe calling it with int arguments. It is done so that
- * we don't needlessly put 64bit arguments on the stack and it
- * also makes the code in the @copy_fn nicer too.
- *
- * Returns: The amount of data actually copied or the error
- */
-
-int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset,
- unsigned int size, read_copy_fn_t copy_fn)
-{
- struct gfs2_sbd *sdp = ip->i_sbd;
- uint64_t lblock, dblock;
- uint32_t extlen = 0;
- unsigned int o;
- int copied = 0;
- int error = 0;
-
- if (offset >= ip->i_di.di_size)
- return 0;
-
- if ((offset + size) > ip->i_di.di_size)
- size = ip->i_di.di_size - offset;
-
- if (!size)
- return 0;
-
- if (gfs2_is_stuffed(ip))
- return jdata_read_stuffed(ip, buf, (unsigned int)offset, size,
- copy_fn);
-
- if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
- return -EINVAL;
-
- lblock = offset;
- o = do_div(lblock, sdp->sd_jbsize) +
- sizeof(struct gfs2_meta_header);
-
- while (copied < size) {
- unsigned int amount;
- struct buffer_head *bh;
- int new;
-
- amount = size - copied;
- if (amount > sdp->sd_sb.sb_bsize - o)
- amount = sdp->sd_sb.sb_bsize - o;
-
- if (!extlen) {
- new = 0;
- error = gfs2_block_map(ip, lblock, &new,
- &dblock, &extlen);
- if (error)
- goto fail;
- }
-
- if (extlen > 1)
- gfs2_meta_ra(ip->i_gl, dblock, extlen);
-
- if (dblock) {
- error = gfs2_jdata_get_buffer(ip, dblock, new, &bh);
- if (error)
- goto fail;
- dblock++;
- extlen--;
- } else
- bh = NULL;
-
- error = copy_fn(bh, &buf, o, amount);
- brelse(bh);
- if (error)
- goto fail;
-
- copied += amount;
- lblock++;
-
- o = sizeof(struct gfs2_meta_header);
- }
-
- return copied;
-
- fail:
- return (copied) ? copied : error;
-}
-
-/**
- * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write()
- * @bh: The buffer to copy to or clear
- * @buf: The buffer to copy from
- * @offset: The offset in the buffer to write to
- * @size: The amount of data to write
- *
- * Returns: errno
- */
-
-int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh,
- const char **buf, unsigned int offset, unsigned int size)
-{
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- memcpy(bh->b_data + offset, *buf, size);
-
- *buf += size;
-
- return 0;
-}
-
-/**
- * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write()
- * @bh: The buffer to copy to or clear
- * @buf: The buffer to copy from
- * @offset: The offset in the buffer to write to
- * @size: The amount of data to write
- *
- * Returns: errno
- */
-
-int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh,
- const char __user **buf, unsigned int offset, unsigned int size)
-{
- int error = 0;
-
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- if (copy_from_user(bh->b_data + offset, *buf, size))
- error = -EFAULT;
- else
- *buf += size;
-
- return error;
-}
-
-static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf,
- unsigned int offset, unsigned int size,
- write_copy_fn_t copy_fn)
-{
- struct buffer_head *dibh;
- int error;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- error = copy_fn(ip,
- dibh, &buf,
- offset + sizeof(struct gfs2_dinode), size);
- if (!error) {
- if (ip->i_di.di_size < offset + size)
- ip->i_di.di_size = offset + size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- }
-
- brelse(dibh);
-
- return (error) ? error : size;
-}
-
-/**
- * gfs2_jdata_write - Write bytes to a file
- * @ip: The GFS2 inode
- * @buf: The buffer containing information to be written
- * @offset: The file offset to start writing at
- * @size: The amount of data to write
- * @copy_fn: Function to do the actual copying
- *
- * Returns: The number of bytes correctly written or error code
- */
-
-int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset,
- unsigned int size, write_copy_fn_t copy_fn)
-{
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct buffer_head *dibh;
- uint64_t lblock, dblock;
- uint32_t extlen = 0;
- unsigned int o;
- int copied = 0;
- int error = 0;
-
- if (!size)
- return 0;
-
- if (gfs2_is_stuffed(ip) &&
- offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
- return jdata_write_stuffed(ip, buf, (unsigned int)offset, size,
- copy_fn);
-
- if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
- return -EINVAL;
-
- if (gfs2_is_stuffed(ip)) {
- error = gfs2_unstuff_dinode(ip, NULL, NULL);
- if (error)
- return error;
- }
-
- lblock = offset;
- o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
-
- while (copied < size) {
- unsigned int amount;
- struct buffer_head *bh;
- int new;
-
- amount = size - copied;
- if (amount > sdp->sd_sb.sb_bsize - o)
- amount = sdp->sd_sb.sb_bsize - o;
-
- if (!extlen) {
- new = 1;
- error = gfs2_block_map(ip, lblock, &new,
- &dblock, &extlen);
- if (error)
- goto fail;
- error = -EIO;
- if (gfs2_assert_withdraw(sdp, dblock))
- goto fail;
- }
-
- error = gfs2_jdata_get_buffer(ip, dblock,
- (amount == sdp->sd_jbsize) ? 1 : new,
- &bh);
- if (error)
- goto fail;
-
- error = copy_fn(ip, bh, &buf, o, amount);
- brelse(bh);
- if (error)
- goto fail;
-
- copied += amount;
- lblock++;
- dblock++;
- extlen--;
-
- o = sizeof(struct gfs2_meta_header);
- }
-
- out:
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- if (ip->i_di.di_size < offset + copied)
- ip->i_di.di_size = offset + copied;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
-
- return copied;
-
- fail:
- if (copied)
- goto out;
- return error;
-}
-
+++ /dev/null
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-#ifndef __FILE_DOT_H__
-#define __FILE_DOT_H__
-
-int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
- struct buffer_head **bhp);
-
-typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf,
- unsigned int offset, unsigned int size);
-typedef int (*write_copy_fn_t) (struct gfs2_inode *ip,
- struct buffer_head *bh, const char **buf,
- unsigned int offset, unsigned int size);
-
-int gfs2_copy2mem(struct buffer_head *bh, char **buf,
- unsigned int offset, unsigned int size);
-int gfs2_copy2user(struct buffer_head *bh, char __user **buf,
- unsigned int offset, unsigned int size);
-int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf,
- uint64_t offset, unsigned int size,
- read_copy_fn_t copy_fn);
-
-int gfs2_copy_from_mem(struct gfs2_inode *ip,
- struct buffer_head *bh, const char **buf,
- unsigned int offset, unsigned int size);
-int gfs2_copy_from_user(struct gfs2_inode *ip,
- struct buffer_head *bh, const char __user **buf,
- unsigned int offset, unsigned int size);
-int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf,
- uint64_t offset, unsigned int size,
- write_copy_fn_t copy_fn);
-
-static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf,
- uint64_t offset, unsigned int size)
-{
- return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem);
-}
-
-static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf,
- uint64_t offset, unsigned int size)
-{
- return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem);
-}
-
-#endif /* __FILE_DOT_H__ */
bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
atomic_set(&bh->b_count, 1);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
- set_bh_page(bh, virt_to_page(real->b_data),
- ((unsigned long)real->b_data) & (PAGE_SIZE - 1));
+ set_bh_page(bh, real->b_page, bh_offset(real));
bh->b_blocknr = blkno;
bh->b_size = sdp->sd_sb.sb_bsize;
bh->b_bdev = sdp->sd_vfs->s_bdev;
gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
+ gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
}
+/**
+ * databuf_lo_add - Add a databuf to the transaction.
+ *
+ * This is used in two distinct cases:
+ * i) In ordered write mode
+ * We put the data buffer on a list so that we can ensure that its
+ * synced to disk at the right time
+ * ii) In journaled data mode
+ * We need to journal the data block in the same way as metadata in
+ * the functions above. The difference is that here we have a tag
+ * which is two __be64's being the block number (as per meta data)
+ * and a flag which says whether the data block needs escaping or
+ * not. This means we need a new log entry for each 251 or so data
+ * blocks, which isn't an enormous overhead but twice as much as
+ * for normal metadata blocks.
+ */
static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
- get_transaction->tr_touched = 1;
+ struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+ struct gfs2_trans *tr = get_transaction;
+ struct address_space *mapping = bd->bd_bh->b_page->mapping;
+ struct gfs2_inode *ip = get_v2ip(mapping->host);
+ tr->tr_touched = 1;
+ if (!list_empty(&bd->bd_list_tr) &&
+ (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+ tr->tr_num_buf++;
+ gfs2_trans_add_gl(bd->bd_gl);
+ list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+ gfs2_pin(sdp, bd->bd_bh);
+ } else {
+ clear_buffer_pinned(bd->bd_bh);
+ }
gfs2_log_lock(sdp);
+ if (ip->i_di.di_flags & GFS2_DIF_JDATA)
+ sdp->sd_log_num_jdata++;
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
gfs2_log_unlock(sdp);
}
+static int gfs2_check_magic(struct buffer_head *bh)
+{
+ struct page *page = bh->b_page;
+ void *kaddr;
+ __be32 *ptr;
+ int rv = 0;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ ptr = kaddr + bh_offset(bh);
+ if (*ptr == cpu_to_be32(GFS2_MAGIC))
+ rv = 1;
+ kunmap_atomic(page, KM_USER0);
+
+ return rv;
+}
+
+/**
+ * databuf_lo_before_commit - Scan the data buffers, writing as we go
+ *
+ * Here we scan through the lists of buffers and make the assumption
+ * that any buffer thats been pinned is being journaled, and that
+ * any unpinned buffer is an ordered write data buffer and therefore
+ * will be written back rather than journaled.
+ */
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
- struct list_head *head = &sdp->sd_log_le_databuf;
LIST_HEAD(started);
- struct gfs2_bufdata *bd;
- struct buffer_head *bh;
+ struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
+ struct buffer_head *bh = NULL;
+ unsigned int offset = sizeof(struct gfs2_log_descriptor);
+ struct gfs2_log_descriptor *ld;
+ unsigned int limit;
+ unsigned int total_dbuf = sdp->sd_log_num_databuf;
+ unsigned int total_jdata = sdp->sd_log_num_jdata;
+ unsigned int num, n;
+ __be64 *ptr;
- while (!list_empty(head)) {
- bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list);
- list_move(&bd->bd_le.le_list, &started);
+ offset += (2*sizeof(__be64) - 1);
+ offset &= ~(2*sizeof(__be64) - 1);
+ limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
- gfs2_log_lock(sdp);
- bh = bd->bd_bh;
+ /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
+ /*
+ * Start writing ordered buffers, write journaled buffers
+ * into the log along with a header
+ */
+ bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list);
+ while(total_dbuf) {
+ num = total_jdata;
+ if (num > limit)
+ num = limit;
+ n = 0;
+ list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) {
+ gfs2_log_lock(sdp);
+ /* An ordered write buffer */
+ if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
+ list_move(&bd1->bd_le.le_list, &started);
+ if (bd1 == bd2) {
+ bd2 = NULL;
+ bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list);
+ }
+ total_dbuf--;
+ if (bd1->bd_bh) {
+ get_bh(bd1->bd_bh);
+ gfs2_log_unlock(sdp);
+ if (buffer_dirty(bd1->bd_bh)) {
+ wait_on_buffer(bd1->bd_bh);
+ ll_rw_block(WRITE, 1, &bd1->bd_bh);
+ }
+ brelse(bd1->bd_bh);
+ continue;
+ }
+ gfs2_log_unlock(sdp);
+ continue;
+ } else if (bd1->bd_bh) { /* A journaled buffer */
+ int magic;
+ gfs2_log_unlock(sdp);
+ /* printk(KERN_INFO "journaled buffer\n"); */
+ if (!bh) {
+ bh = gfs2_log_get_buf(sdp);
+ ld = (struct gfs2_log_descriptor *)bh->b_data;
+ ptr = (__be64 *)(bh->b_data + offset);
+ ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+ ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
+ ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
+ ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA);
+ ld->ld_length = cpu_to_be32(num + 1);
+ ld->ld_data1 = cpu_to_be32(num);
+ ld->ld_data2 = cpu_to_be32(0);
+ memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
+ }
+ magic = gfs2_check_magic(bd1->bd_bh);
+ *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+ *ptr++ = cpu_to_be64((__u64)magic);
+ clear_buffer_escaped(bd1->bd_bh);
+ if (unlikely(magic != 0))
+ set_buffer_escaped(bd1->bd_bh);
+ if (n++ > num)
+ break;
+ }
+ }
if (bh) {
- get_bh(bh);
- gfs2_log_unlock(sdp);
- if (buffer_dirty(bh)) {
- wait_on_buffer(bh);
- ll_rw_block(WRITE, 1, &bh);
+ set_buffer_dirty(bh);
+ ll_rw_block(WRITE, 1, &bh);
+ bh = NULL;
+ }
+ n = 0;
+ /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
+ list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) {
+ if (!bd2->bd_bh)
+ continue;
+ /* copy buffer if it needs escaping */
+ if (unlikely(buffer_escaped(bd2->bd_bh))) {
+ void *kaddr;
+ struct page *page = bd2->bd_bh->b_page;
+ bh = gfs2_log_get_buf(sdp);
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize);
+ kunmap_atomic(page, KM_USER0);
+ *(__be32 *)bh->b_data = 0;
+ } else {
+ bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
}
- brelse(bh);
- } else
- gfs2_log_unlock(sdp);
+ set_buffer_dirty(bh);
+ ll_rw_block(WRITE, 1, &bh);
+ if (++n >= num)
+ break;
+ }
+ bh = NULL;
+ total_dbuf -= num;
+ total_jdata -= num;
}
-
+ /* printk(KERN_INFO "wait on ordered data buffers\n"); */
+ /* Wait on all ordered buffers */
while (!list_empty(&started)) {
- bd = list_entry(started.next, struct gfs2_bufdata,
- bd_le.le_list);
- list_del(&bd->bd_le.le_list);
+ bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list);
+ list_del(&bd1->bd_le.le_list);
sdp->sd_log_num_databuf--;
gfs2_log_lock(sdp);
- bh = bd->bd_bh;
+ bh = bd1->bd_bh;
if (bh) {
set_v2bd(bh, NULL);
gfs2_log_unlock(sdp);
} else
gfs2_log_unlock(sdp);
- kfree(bd);
+ kfree(bd1);
}
+ /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */
+ /* We've removed all the ordered write bufs here, so only jdata left */
+ gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
+}
+
+static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
+ struct gfs2_log_descriptor *ld,
+ __be64 *ptr, int pass)
+{
+ struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+ struct gfs2_glock *gl = jd->jd_inode->i_gl;
+ unsigned int blks = be32_to_cpu(ld->ld_data1);
+ struct buffer_head *bh_log, *bh_ip;
+ uint64_t blkno;
+ uint64_t esc;
+ int error = 0;
+
+ if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
+ return 0;
+
+ gfs2_replay_incr_blk(sdp, &start);
+ for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+ blkno = be64_to_cpu(*ptr++);
+ esc = be64_to_cpu(*ptr++);
+
+ sdp->sd_found_blocks++;
+
+ if (gfs2_revoke_check(sdp, blkno, start))
+ continue;
+
+ error = gfs2_replay_read_block(jd, start, &bh_log);
+ if (error)
+ return error;
+
+ bh_ip = gfs2_meta_new(gl, blkno);
+ memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+ /* Unescape */
+ if (esc) {
+ __be32 *eptr = (__be32 *)bh_ip->b_data;
+ *eptr = cpu_to_be32(GFS2_MAGIC);
+ }
+ mark_buffer_dirty(bh_ip);
+
+ brelse(bh_log);
+ brelse(bh_ip);
+ if (error)
+ break;
+
+ sdp->sd_replayed_blocks++;
+ }
+
+ return error;
+}
+
+/* FIXME: sort out accounting for log blocks etc. */
+
+static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
+{
+ struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+ if (error) {
+ gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+ return;
+ }
+ if (pass != 1)
+ return;
+
+ /* data sync? */
+ gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+
+ fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
+ jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
+}
+
+static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+ struct list_head *head = &sdp->sd_log_le_databuf;
+ struct gfs2_bufdata *bd;
+
+ while (!list_empty(head)) {
+ bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+ list_del_init(&bd->bd_le.le_list);
+ sdp->sd_log_num_databuf--;
+ sdp->sd_log_num_jdata--;
+ gfs2_unpin(sdp, bd->bd_bh, ai);
+ brelse(bd->bd_bh);
+ kfree(bd);
+ }
gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
+ gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
}
+
struct gfs2_log_operations gfs2_glock_lops = {
.lo_add = glock_lo_add,
.lo_after_commit = glock_lo_after_commit,
struct gfs2_log_operations gfs2_databuf_lops = {
.lo_add = databuf_lo_add,
+ .lo_incore_commit = buf_lo_incore_commit,
.lo_before_commit = databuf_lo_before_commit,
+ .lo_after_commit = databuf_lo_after_commit,
+ .lo_scan_elements = databuf_lo_scan_elements,
+ .lo_after_scan = databuf_lo_after_scan,
.lo_name = "databuf"
};
{
struct gfs2_bufdata *bd;
- lock_page(bh->b_page);
+ if (meta)
+ lock_page(bh->b_page);
if (get_v2bd(bh)) {
- unlock_page(bh->b_page);
+ if (meta)
+ unlock_page(bh->b_page);
return;
}
bd->bd_gl = gl;
INIT_LIST_HEAD(&bd->bd_list_tr);
- if (meta)
+ if (meta) {
lops_init_le(&bd->bd_le, &gfs2_buf_lops);
- else
+ } else {
lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
-
+ get_bh(bh);
+ }
set_v2bd(bh, bd);
- unlock_page(bh->b_page);
+ if (meta)
+ unlock_page(bh->b_page);
}
/**
#include "bmap.h"
#include "glock.h"
#include "inode.h"
-#include "jdata.h"
#include "log.h"
#include "meta_io.h"
#include "ops_address.h"
#include "page.h"
#include "quota.h"
#include "trans.h"
+#include "rgrp.h"
/**
* gfs2_get_block - Fills in a buffer head with details about a block
*
* Returns: errno
*
- * Use Linux VFS block_write_full_page() to write one page,
- * using GFS2's get_block_noalloc to find which blocks to write.
+ * Some of this is copied from block_write_full_page() although we still
+ * call it to do most of the work.
*/
static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = get_v2ip(page->mapping->host);
struct gfs2_sbd *sdp = ip->i_sbd;
+ loff_t i_size = i_size_read(inode);
+ pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ unsigned offset;
int error;
+ int done_trans = 0;
atomic_inc(&sdp->sd_ops_address);
-
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
unlock_page(page);
return -EIO;
}
- if (get_transaction) {
- redirty_page_for_writepage(wbc, page);
+ if (get_transaction)
+ goto out_ignore;
+
+ /* Is the page fully outside i_size? (truncate in progress) */
+ offset = i_size & (PAGE_CACHE_SIZE-1);
+ if (page->index >= end_index+1 || !offset) {
+ page->mapping->a_ops->invalidatepage(page, 0);
unlock_page(page);
- return 0;
+ return 0; /* don't care */
}
- error = block_write_full_page(page, get_block_noalloc, wbc);
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+ error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+ if (error)
+ goto out_ignore;
+ gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+ done_trans = 1;
+ }
+ error = block_write_full_page(page, get_block_noalloc, wbc);
+ if (done_trans)
+ gfs2_trans_end(sdp);
gfs2_meta_cache_flush(ip);
-
return error;
+
+out_ignore:
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
}
/**
return 0;
}
-/**
- * jdata_readpage - readpage that goes through gfs2_jdata_read_mem()
- * @ip:
- * @page: The page to read
- *
- * Returns: errno
- */
-
-static int jdata_readpage(struct gfs2_inode *ip, struct page *page)
-{
- void *kaddr;
- int ret;
-
- kaddr = kmap(page);
-
- ret = gfs2_jdata_read_mem(ip, kaddr,
- (uint64_t)page->index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE);
- if (ret >= 0) {
- if (ret < PAGE_CACHE_SIZE)
- memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
- SetPageUptodate(page);
- ret = 0;
- }
-
- kunmap(page);
-
- unlock_page(page);
-
- return ret;
-}
-
/**
* gfs2_readpage - readpage with locking
- * @file: The file to read a page for
+ * @file: The file to read a page for. N.B. This may be NULL if we are
+ * reading an internal file.
* @page: The page to read
*
* Returns: errno
{
struct gfs2_inode *ip = get_v2ip(page->mapping->host);
struct gfs2_sbd *sdp = ip->i_sbd;
+ struct gfs2_holder gh;
int error;
atomic_inc(&sdp->sd_ops_address);
- if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) {
- unlock_page(page);
- return -EOPNOTSUPP;
- }
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+ error = gfs2_glock_nq_m_atime(1, &gh);
+ if (error)
+ goto out_unlock;
- if (!gfs2_is_jdata(ip)) {
- if (gfs2_is_stuffed(ip)) {
- if (!page->index) {
- error = stuffed_readpage(ip, page);
- unlock_page(page);
- } else
- error = zero_readpage(page);
+ if (gfs2_is_stuffed(ip)) {
+ if (!page->index) {
+ error = stuffed_readpage(ip, page);
+ unlock_page(page);
} else
- error = mpage_readpage(page, gfs2_get_block);
+ error = zero_readpage(page);
} else
- error = jdata_readpage(ip, page);
+ error = mpage_readpage(page, gfs2_get_block);
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = -EIO;
+ gfs2_glock_dq_m(1, &gh);
+ gfs2_holder_uninit(&gh);
+out:
return error;
+out_unlock:
+ unlock_page(page);
+ goto out;
}
/**
{
struct gfs2_inode *ip = get_v2ip(page->mapping->host);
struct gfs2_sbd *sdp = ip->i_sbd;
+ unsigned int data_blocks, ind_blocks, rblocks;
+ int alloc_required;
int error = 0;
+ loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
+ loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ struct gfs2_alloc *al;
atomic_inc(&sdp->sd_ops_address);
- if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
- return -EOPNOTSUPP;
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
+ error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
+ if (error)
+ goto out_uninit;
- if (gfs2_is_stuffed(ip)) {
- uint64_t file_size;
- file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
+ gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
+
+ error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
+ if (error)
+ goto out_unlock;
- if (file_size > sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode)) {
- error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
- page);
- if (!error)
- error = block_prepare_write(page, from, to,
- gfs2_get_block);
- } else if (!PageUptodate(page))
+
+ if (alloc_required) {
+ al = gfs2_alloc_get(ip);
+
+ error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+ if (error)
+ goto out_alloc_put;
+
+ error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ if (error)
+ goto out_qunlock;
+
+ al->al_requested = data_blocks + ind_blocks;
+ error = gfs2_inplace_reserve(ip);
+ if (error)
+ goto out_qunlock;
+ }
+
+ rblocks = RES_DINODE + ind_blocks;
+ if (gfs2_is_jdata(ip))
+ rblocks += data_blocks ? data_blocks : 1;
+ if (ind_blocks || data_blocks)
+ rblocks += RES_STATFS + RES_QUOTA;
+
+ error = gfs2_trans_begin(sdp, rblocks, 0);
+ if (error)
+ goto out;
+
+ if (gfs2_is_stuffed(ip)) {
+ if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+ error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, page);
+ if (error)
+ goto out;
+ } else if (!PageUptodate(page)) {
error = stuffed_readpage(ip, page);
- } else
- error = block_prepare_write(page, from, to, gfs2_get_block);
+ goto out;
+ }
+ }
+
+ error = block_prepare_write(page, from, to, gfs2_get_block);
+
+out:
+ if (error) {
+ gfs2_trans_end(sdp);
+ if (alloc_required) {
+ gfs2_inplace_release(ip);
+out_qunlock:
+ gfs2_quota_unlock(ip);
+out_alloc_put:
+ gfs2_alloc_put(ip);
+ }
+out_unlock:
+ gfs2_glock_dq_m(1, &ip->i_gh);
+out_uninit:
+ gfs2_holder_uninit(&ip->i_gh);
+ }
return error;
}
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = get_v2ip(inode);
struct gfs2_sbd *sdp = ip->i_sbd;
- int error;
+ int error = -EOPNOTSUPP;
+ struct buffer_head *dibh;
+ struct gfs2_alloc *al = &ip->i_alloc;;
atomic_inc(&sdp->sd_ops_address);
+
+ if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
+ goto fail_nounlock;
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error)
+ goto fail_endtrans;
+
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
if (gfs2_is_stuffed(ip)) {
- struct buffer_head *dibh;
uint64_t file_size;
void *kaddr;
file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
- kaddr = kmap(page);
+ kaddr = kmap_atomic(page, KM_USER0);
memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
- (char *)kaddr + from,
- to - from);
- kunmap(page);
-
- brelse(dibh);
+ (char *)kaddr + from, to - from);
+ kunmap_atomic(page, KM_USER0);
SetPageUptodate(page);
if (inode->i_size < file_size)
i_size_write(inode, file_size);
} else {
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
gfs2_page_add_databufs(ip, page, from, to);
error = generic_commit_write(file, page, from, to);
if (error)
goto fail;
}
+ if (ip->i_di.di_size < inode->i_size)
+ ip->i_di.di_size = inode->i_size;
+
+ gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ brelse(dibh);
+ gfs2_trans_end(sdp);
+ if (al->al_requested) {
+ gfs2_inplace_release(ip);
+ gfs2_quota_unlock(ip);
+ gfs2_alloc_put(ip);
+ }
+ gfs2_glock_dq_m(1, &ip->i_gh);
+ gfs2_holder_uninit(&ip->i_gh);
return 0;
- fail:
+fail:
+ brelse(dibh);
+fail_endtrans:
+ gfs2_trans_end(sdp);
+ if (al->al_requested) {
+ gfs2_inplace_release(ip);
+ gfs2_quota_unlock(ip);
+ gfs2_alloc_put(ip);
+ }
+ gfs2_glock_dq_m(1, &ip->i_gh);
+ gfs2_holder_uninit(&ip->i_gh);
+fail_nounlock:
ClearPageUptodate(page);
-
return error;
}
atomic_inc(&sdp->sd_ops_address);
- if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
- gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+ if (gfs2_is_jdata(ip))
return -EINVAL;
- if (rw == WRITE && !get_transaction)
- gb = get_blocks_noalloc;
+ if (rw == WRITE) {
+ return -EOPNOTSUPP; /* for now */
+ } else {
+ if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
+ gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+ return -EINVAL;
+ }
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gb, NULL);
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/gfs2_ioctl.h>
+#include <linux/fs.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
#include "glock.h"
#include "glops.h"
#include "inode.h"
-#include "jdata.h"
#include "lm.h"
#include "log.h"
#include "meta_io.h"
void *fdr_opaque;
};
-typedef ssize_t(*do_rw_t) (struct file *file,
- char __user *buf,
- size_t size, loff_t *offset,
- unsigned int num_gh, struct gfs2_holder *ghs);
+static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
+ unsigned long offset, unsigned long size)
+{
+ char *kaddr;
+ unsigned long count = desc->count;
+
+ if (size > count)
+ size = count;
+
+ kaddr = kmap(page);
+ memcpy(desc->arg.buf, kaddr + offset, size);
+ kunmap(page);
+
+ desc->count = count - size;
+ desc->written += size;
+ desc->arg.buf += size;
+ return size;
+}
+
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+ char *buf, loff_t *pos, unsigned size)
+{
+ struct inode *inode = ip->i_vnode;
+ read_descriptor_t desc;
+ desc.written = 0;
+ desc.arg.buf = buf;
+ desc.count = size;
+ desc.error = 0;
+ do_generic_mapping_read(inode->i_mapping, ra_state, NULL, pos, &desc, gfs2_read_actor);
+ return desc.written ? desc.written : desc.error;
+}
/**
* gfs2_llseek - seek to a location in a file
return error;
}
-static inline unsigned int vma2state(struct vm_area_struct *vma)
-{
- if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
- (VM_MAYWRITE | VM_MAYSHARE))
- return LM_ST_EXCLUSIVE;
- return LM_ST_SHARED;
-}
-static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, do_rw_t operation)
+static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs)
{
- struct gfs2_holder *ghs;
- unsigned int num_gh = 0;
- ssize_t count;
- struct super_block *sb = file->f_dentry->d_inode->i_sb;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start = (unsigned long)buf;
- unsigned long end = start + size;
- int dumping = (current->flags & PF_DUMPCORE);
- unsigned int x = 0;
-
- for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
- if (end <= vma->vm_start)
- break;
- if (vma->vm_file &&
- vma->vm_file->f_dentry->d_inode->i_sb == sb) {
- num_gh++;
- }
- }
-
- ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
- if (!ghs) {
- if (!dumping)
- up_read(&mm->mmap_sem);
- return -ENOMEM;
- }
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ ssize_t retval;
- for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
- if (end <= vma->vm_start)
- break;
- if (vma->vm_file) {
- struct inode *inode = vma->vm_file->f_dentry->d_inode;
- if (inode->i_sb == sb)
- gfs2_holder_init(get_v2ip(inode)->i_gl,
- vma2state(vma), 0, &ghs[x++]);
- }
+ retval = filemap_write_and_wait(mapping);
+ if (retval == 0) {
+ retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
+ nr_segs);
}
-
- if (!dumping)
- up_read(&mm->mmap_sem);
-
- gfs2_assert(get_v2sdp(sb), x == num_gh);
-
- count = operation(file, buf, size, offset, num_gh, ghs);
-
- while (num_gh--)
- gfs2_holder_uninit(&ghs[num_gh]);
- kfree(ghs);
-
- return count;
+ return retval;
}
/**
- * walk_vm - Walk the vmas associated with a buffer for read or write.
- * If any of them are gfs2, pass the gfs2 inode down to the read/write
- * worker function so that locks can be acquired in the correct order.
- * @file: The file to read/write from/to
- * @buf: The buffer to copy to/from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @operation: The read or write worker function
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
+ * __gfs2_file_aio_read - The main GFS2 read function
+ *
+ * N.B. This is almost, but not quite the same as __generic_file_aio_read()
+ * the important subtle different being that inode->i_size isn't valid
+ * unless we are holding a lock, and we do this _only_ on the O_DIRECT
+ * path since otherwise locking is done entirely at the page cache
+ * layer.
*/
-
-static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, do_rw_t operation)
+static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
{
+ struct file *filp = iocb->ki_filp;
+ struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host);
struct gfs2_holder gh;
-
- if (current->mm) {
- struct super_block *sb = file->f_dentry->d_inode->i_sb;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start = (unsigned long)buf;
- unsigned long end = start + size;
- int dumping = (current->flags & PF_DUMPCORE);
-
- if (!dumping)
- down_read(&mm->mmap_sem);
-
- for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
- if (end <= vma->vm_start)
- break;
- if (vma->vm_file &&
- vma->vm_file->f_dentry->d_inode->i_sb == sb)
- goto do_locks;
- }
-
- if (!dumping)
- up_read(&mm->mmap_sem);
- }
-
- return operation(file, buf, size, offset, 0, &gh);
-
-do_locks:
- return walk_vm_hard(file, buf, size, offset, operation);
-}
-
-static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size,
- loff_t *offset)
-{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- ssize_t count = 0;
-
- if (*offset < 0)
+ ssize_t retval;
+ unsigned long seg;
+ size_t count;
+
+ count = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ const struct iovec *iv = &iov[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ count += iv->iov_len;
+ if (unlikely((ssize_t)(count|iv->iov_len) < 0))
return -EINVAL;
- if (!access_ok(VERIFY_WRITE, buf, size))
+ if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+ continue;
+ if (seg == 0)
return -EFAULT;
+ nr_segs = seg;
+ count -= iv->iov_len; /* This segment is no good */
+ break;
+ }
+
+ /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
+ if (filp->f_flags & O_DIRECT) {
+ loff_t pos = *ppos, size;
+ struct address_space *mapping;
+ struct inode *inode;
+
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ retval = 0;
+ if (!count)
+ goto out; /* skip atime */
+
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+ retval = gfs2_glock_nq_m_atime(1, &gh);
+ if (retval)
+ goto out;
- if (!(file->f_flags & O_LARGEFILE)) {
- if (*offset >= MAX_NON_LFS)
- return -EFBIG;
- if (*offset + size > MAX_NON_LFS)
- size = MAX_NON_LFS - *offset;
- }
-
- count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
-
- if (count > 0)
- *offset += count;
-
- return count;
-}
-
-/**
- * do_read_direct - Read bytes from a file
- * @file: The file to read from
- * @buf: The buffer to copy into
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @ghs: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes read
- *
- * Returns: The number of bytes read, errno on failure
- */
-
-static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
-{
- struct inode *inode = file->f_mapping->host;
- struct gfs2_inode *ip = get_v2ip(inode);
- unsigned int state = LM_ST_DEFERRED;
- int flags = 0;
- unsigned int x;
- ssize_t count = 0;
- int error;
-
- for (x = 0; x < num_gh; x++)
- if (ghs[x].gh_gl == ip->i_gl) {
- state = LM_ST_SHARED;
- flags |= GL_LOCAL_EXCL;
- break;
+ size = i_size_read(inode);
+ if (pos < size) {
+ retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
+ if (retval > 0 && !is_sync_kiocb(iocb))
+ retval = -EIOCBQUEUED;
+ if (retval > 0)
+ *ppos = pos + retval;
}
-
- gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m(num_gh + 1, ghs);
- if (error)
+ file_accessed(filp);
+ gfs2_glock_dq_m(1, &gh);
+ gfs2_holder_uninit(&gh);
goto out;
+ }
- error = -EINVAL;
- if (gfs2_is_jdata(ip))
- goto out_gunlock;
-
- if (gfs2_is_stuffed(ip)) {
- size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
-
- if (((*offset) & mask) || (((unsigned long)buf) & mask))
- goto out_gunlock;
-
- count = do_jdata_read(file, buf, size & ~mask, offset);
- } else
- count = generic_file_read(file, buf, size, offset);
-
- error = 0;
-
- out_gunlock:
- gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
-}
-
-/**
- * do_read_buf - Read bytes from a file
- * @file: The file to read from
- * @buf: The buffer to copy into
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @ghs: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes read
- *
- * Returns: The number of bytes read, errno on failure
- */
-
-static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
-{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- ssize_t count = 0;
- int error;
-
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
- if (error)
- goto out;
-
- if (gfs2_is_jdata(ip))
- count = do_jdata_read(file, buf, size, offset);
- else
- count = generic_file_read(file, buf, size, offset);
-
- gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
+ retval = 0;
+ if (count) {
+ for (seg = 0; seg < nr_segs; seg++) {
+ read_descriptor_t desc;
+
+ desc.written = 0;
+ desc.arg.buf = iov[seg].iov_base;
+ desc.count = iov[seg].iov_len;
+ if (desc.count == 0)
+ continue;
+ desc.error = 0;
+ do_generic_file_read(filp,ppos,&desc,file_read_actor);
+ retval += desc.written;
+ if (desc.error) {
+ retval = retval ?: desc.error;
+ break;
+ }
+ }
+ }
+out:
+ return retval;
}
/**
* Returns: The number of bytes read, errno on failure
*/
-static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size,
+static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
loff_t *offset)
{
- atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
-
- if (file->f_flags & O_DIRECT)
- return walk_vm(file, buf, size, offset, do_read_direct);
- else
- return walk_vm(file, buf, size, offset, do_read_buf);
-}
-
-/**
- * grope_mapping - feel up a mapping that needs to be written
- * @buf: the start of the memory to be written
- * @size: the size of the memory to be written
- *
- * We do this after acquiring the locks on the mapping,
- * but before starting the write transaction. We need to make
- * sure that we don't cause recursive transactions if blocks
- * need to be allocated to the file backing the mapping.
- *
- * Returns: errno
- */
-
-static int grope_mapping(const char __user *buf, size_t size)
-{
- const char __user *stop = buf + size;
- char c;
-
- while (buf < stop) {
- if (copy_from_user(&c, buf, 1))
- return -EFAULT;
- buf += PAGE_CACHE_SIZE;
- buf = (const char __user *)PAGE_ALIGN((unsigned long)buf);
- }
-
- return 0;
-}
-
-/**
- * do_write_direct_alloc - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size,
- loff_t *offset)
-{
- struct inode *inode = file->f_mapping->host;
- struct gfs2_inode *ip = get_v2ip(inode);
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct gfs2_alloc *al = NULL;
struct iovec local_iov = { .iov_base = buf, .iov_len = size };
- struct buffer_head *dibh;
- unsigned int data_blocks, ind_blocks;
- ssize_t count;
- int error;
-
- gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
-
- al = gfs2_alloc_get(ip);
-
- error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
- if (error)
- goto fail;
-
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
- if (error)
- goto fail_gunlock_q;
-
- al->al_requested = data_blocks + ind_blocks;
-
- error = gfs2_inplace_reserve(ip);
- if (error)
- goto fail_gunlock_q;
-
- error = gfs2_trans_begin(sdp,
- al->al_rgd->rd_ri.ri_length + ind_blocks +
- RES_DINODE + RES_STATFS + RES_QUOTA, 0);
- if (error)
- goto fail_ipres;
-
- if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
- (~(S_ISUID | S_ISGID)) : (~S_ISUID);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
-
- if (gfs2_is_stuffed(ip)) {
- error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
- if (error)
- goto fail_end_trans;
- }
-
- count = generic_file_write_nolock(file, &local_iov, 1, offset);
- if (count < 0) {
- error = count;
- goto fail_end_trans;
- }
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- if (ip->i_di.di_size < inode->i_size)
- ip->i_di.di_size = inode->i_size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
-
- gfs2_trans_end(sdp);
+ struct kiocb kiocb;
+ ssize_t ret;
- if (file->f_flags & O_SYNC)
- gfs2_log_flush_glock(ip->i_gl);
-
- gfs2_inplace_release(ip);
- gfs2_quota_unlock(ip);
- gfs2_alloc_put(ip);
-
- if (file->f_mapping->nrpages) {
- error = filemap_fdatawrite(file->f_mapping);
- if (!error)
- error = filemap_fdatawait(file->f_mapping);
- }
- if (error)
- return error;
-
- return count;
-
- fail_end_trans:
- gfs2_trans_end(sdp);
-
- fail_ipres:
- gfs2_inplace_release(ip);
-
- fail_gunlock_q:
- gfs2_quota_unlock(ip);
-
- fail:
- gfs2_alloc_put(ip);
+ atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
- return error;
-}
-
-/**
- * do_write_direct - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @gh: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
-{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct gfs2_file *fp = get_v2fp(file);
- unsigned int state = LM_ST_DEFERRED;
- int alloc_required;
- unsigned int x;
- size_t s;
- ssize_t count = 0;
- int error;
-
- if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
- state = LM_ST_EXCLUSIVE;
- else
- for (x = 0; x < num_gh; x++)
- if (ghs[x].gh_gl == ip->i_gl) {
- state = LM_ST_EXCLUSIVE;
- break;
- }
-
- restart:
- gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m(num_gh + 1, ghs);
- if (error)
- goto out;
-
- error = -EINVAL;
- if (gfs2_is_jdata(ip))
- goto out_gunlock;
-
- if (num_gh) {
- error = grope_mapping(buf, size);
- if (error)
- goto out_gunlock;
- }
-
- if (file->f_flags & O_APPEND)
- *offset = ip->i_di.di_size;
-
- if (!(file->f_flags & O_LARGEFILE)) {
- error = -EFBIG;
- if (*offset >= MAX_NON_LFS)
- goto out_gunlock;
- if (*offset + size > MAX_NON_LFS)
- size = MAX_NON_LFS - *offset;
- }
-
- if (gfs2_is_stuffed(ip) ||
- *offset + size > ip->i_di.di_size ||
- ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
- alloc_required = 1;
- else {
- error = gfs2_write_alloc_required(ip, *offset, size,
- &alloc_required);
- if (error)
- goto out_gunlock;
- }
-
- if (alloc_required && state != LM_ST_EXCLUSIVE) {
- gfs2_glock_dq_m(num_gh + 1, ghs);
- gfs2_holder_uninit(&ghs[num_gh]);
- state = LM_ST_EXCLUSIVE;
- goto restart;
- }
-
- if (alloc_required) {
- set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
-
- /* split large writes into smaller atomic transactions */
- while (size) {
- s = gfs2_tune_get(sdp, gt_max_atomic_write);
- if (s > size)
- s = size;
-
- error = do_write_direct_alloc(file, buf, s, offset);
- if (error < 0)
- goto out_gunlock;
-
- buf += error;
- size -= error;
- count += error;
- }
- } else {
- struct iovec local_iov = { .iov_base = buf, .iov_len = size };
- struct gfs2_holder t_gh;
-
- clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
-
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- GL_NEVER_RECURSE, &t_gh);
- if (error)
- goto out_gunlock;
-
- count = generic_file_write_nolock(file, &local_iov, 1, offset);
-
- gfs2_glock_dq_uninit(&t_gh);
- }
-
- error = 0;
-
- out_gunlock:
- gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
+ init_sync_kiocb(&kiocb, filp);
+ ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ return ret;
}
-/**
- * do_do_write_buf - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size,
- loff_t *offset)
+static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
{
- struct inode *inode = file->f_mapping->host;
- struct gfs2_inode *ip = get_v2ip(inode);
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct gfs2_alloc *al = NULL;
- struct buffer_head *dibh;
- unsigned int data_blocks, ind_blocks;
- int alloc_required, journaled;
- ssize_t count;
- int error;
-
- journaled = gfs2_is_jdata(ip);
-
- gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
-
- error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
- if (error)
- return error;
-
- if (alloc_required) {
- al = gfs2_alloc_get(ip);
-
- error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
- if (error)
- goto fail;
-
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
- if (error)
- goto fail_gunlock_q;
-
- al->al_requested = data_blocks + ind_blocks;
-
- error = gfs2_inplace_reserve(ip);
- if (error)
- goto fail_gunlock_q;
-
- error = gfs2_trans_begin(sdp,
- al->al_rgd->rd_ri.ri_length +
- ind_blocks +
- ((journaled) ? data_blocks : 0) +
- RES_DINODE + RES_STATFS + RES_QUOTA,
- 0);
- if (error)
- goto fail_ipres;
- } else {
- error = gfs2_trans_begin(sdp,
- ((journaled) ? data_blocks : 0) +
- RES_DINODE,
- 0);
- if (error)
- goto fail_ipres;
- }
-
- if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
- (~(S_ISUID | S_ISGID)) : (~S_ISUID);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
+ struct kiocb kiocb;
+ ssize_t ret;
- if (journaled) {
- count = gfs2_jdata_write(ip, buf, *offset, size,
- gfs2_copy_from_user);
- if (count < 0) {
- error = count;
- goto fail_end_trans;
- }
-
- *offset += count;
- } else {
- struct iovec local_iov = { .iov_base = buf, .iov_len = size };
-
- count = generic_file_write_nolock(file, &local_iov, 1, offset);
- if (count < 0) {
- error = count;
- goto fail_end_trans;
- }
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- if (ip->i_di.di_size < inode->i_size)
- ip->i_di.di_size = inode->i_size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
-
- gfs2_trans_end(sdp);
-
- if (file->f_flags & O_SYNC || IS_SYNC(inode)) {
- gfs2_log_flush_glock(ip->i_gl);
- error = filemap_fdatawrite(file->f_mapping);
- if (error == 0)
- error = filemap_fdatawait(file->f_mapping);
- if (error)
- goto fail_ipres;
- }
-
- if (alloc_required) {
- gfs2_assert_warn(sdp, count != size ||
- al->al_alloced);
- gfs2_inplace_release(ip);
- gfs2_quota_unlock(ip);
- gfs2_alloc_put(ip);
- }
-
- return count;
-
- fail_end_trans:
- gfs2_trans_end(sdp);
-
- fail_ipres:
- if (alloc_required)
- gfs2_inplace_release(ip);
-
- fail_gunlock_q:
- if (alloc_required)
- gfs2_quota_unlock(ip);
+ atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
- fail:
- if (alloc_required)
- gfs2_alloc_put(ip);
-
- return error;
+ init_sync_kiocb(&kiocb, filp);
+ ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ return ret;
}
-/**
- * do_write_buf - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @gh: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
+static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
+ size_t count, loff_t pos)
{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- struct gfs2_sbd *sdp = ip->i_sbd;
- size_t s;
- ssize_t count = 0;
- int error;
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m(num_gh + 1, ghs);
- if (error)
- goto out;
-
- if (num_gh) {
- error = grope_mapping(buf, size);
- if (error)
- goto out_gunlock;
- }
-
- if (file->f_flags & O_APPEND)
- *offset = ip->i_di.di_size;
-
- if (!(file->f_flags & O_LARGEFILE)) {
- error = -EFBIG;
- if (*offset >= MAX_NON_LFS)
- goto out_gunlock;
- if (*offset + size > MAX_NON_LFS)
- size = MAX_NON_LFS - *offset;
- }
-
- /* split large writes into smaller atomic transactions */
- while (size) {
- s = gfs2_tune_get(sdp, gt_max_atomic_write);
- if (s > size)
- s = size;
-
- error = do_do_write_buf(file, buf, s, offset);
- if (error < 0)
- goto out_gunlock;
-
- buf += error;
- size -= error;
- count += error;
- }
-
- error = 0;
+ struct file *filp = iocb->ki_filp;
+ struct iovec local_iov = { .iov_base = buf, .iov_len = count };
- out_gunlock:
- gfs2_glock_dq_m(num_gh + 1, ghs);
+ atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
+ BUG_ON(iocb->ki_pos != pos);
+ return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
}
-/**
- * gfs2_write - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t gfs2_write(struct file *file, const char __user *buf,
- size_t size, loff_t *offset)
-{
- struct inode *inode = file->f_mapping->host;
- ssize_t count;
-
- atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
-
- if (*offset < 0)
- return -EINVAL;
- if (!access_ok(VERIFY_READ, buf, size))
- return -EFAULT;
-
- mutex_lock(&inode->i_mutex);
- if (file->f_flags & O_DIRECT)
- count = walk_vm(file, buf, size, offset,
- do_write_direct);
- else
- count = walk_vm(file, buf, size, offset, do_write_buf);
- mutex_unlock(&inode->i_mutex);
-
- return count;
-}
/**
* filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
if (!S_ISREG(ip->i_di.di_mode))
goto out;
- /* FIXME: Would be nice not to require the following test */
- if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size)
- goto out;
}
if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
if (!S_ISDIR(ip->i_di.di_mode))
return error;
}
- if (gfs2_is_jdata(ip)) {
- if (vma->vm_flags & VM_MAYSHARE)
- error = -EOPNOTSUPP;
- else
- vma->vm_ops = &gfs2_vm_ops_private;
- } else {
- /* This is VM_MAYWRITE instead of VM_WRITE because a call
- to mprotect() can turn on VM_WRITE later. */
-
- if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
- (VM_MAYSHARE | VM_MAYWRITE))
- vma->vm_ops = &gfs2_vm_ops_sharewrite;
- else
- vma->vm_ops = &gfs2_vm_ops_private;
- }
+ /* This is VM_MAYWRITE instead of VM_WRITE because a call
+ to mprotect() can turn on VM_WRITE later. */
+
+ if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
+ (VM_MAYSHARE | VM_MAYWRITE))
+ vma->vm_ops = &gfs2_vm_ops_sharewrite;
+ else
+ vma->vm_ops = &gfs2_vm_ops_private;
gfs2_glock_dq_uninit(&i_gh);
if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
file->f_flags |= O_DIRECT;
- /* Don't let the user open O_DIRECT on a jdata file */
-
- if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
- error = -EINVAL;
- goto fail_gunlock;
- }
-
gfs2_glock_dq_uninit(&i_gh);
}
read_actor_t actor, void *target)
{
struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
- struct gfs2_holder gh;
- ssize_t retval;
atomic_inc(&ip->i_sbd->sd_ops_file);
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-
- retval = gfs2_glock_nq_atime(&gh);
- if (retval)
- goto out;
-
- if (gfs2_is_jdata(ip))
- retval = -EOPNOTSUPP;
- else
- retval = generic_file_sendfile(in_file, offset, count, actor,
- target);
-
- gfs2_glock_dq(&gh);
-
- out:
- gfs2_holder_uninit(&gh);
-
- return retval;
+ return generic_file_sendfile(in_file, offset, count, actor, target);
}
static int do_flock(struct file *file, int cmd, struct file_lock *fl)
struct file_operations gfs2_file_fops = {
.llseek = gfs2_llseek,
.read = gfs2_read,
- .write = gfs2_write,
+ .readv = gfs2_file_readv,
+ .aio_read = gfs2_file_aio_read,
+ .write = generic_file_write,
+ .writev = generic_file_writev,
+ .aio_write = generic_file_aio_write,
.ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
if (error)
return NULL;
- if (gfs2_is_jdata(ip))
- goto out;
-
set_bit(GIF_PAGED, &ip->i_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
- gfs2_trans_add_databuf(sdp, bh);
+ if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
mark_buffer_dirty(bh);
if (release) {
goto unlock;
}
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED/* || gfs2_is_jdata(ip)*/)
- gfs2_trans_add_databuf(sdp, bh);
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + offset, 0, length);
end = start + bsize;
if (end <= from || start >= to)
continue;
- gfs2_trans_add_databuf(ip->i_sbd, bh);
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
}
}
#include <linux/buffer_head.h>
#include <linux/tty.h>
#include <linux/sort.h>
+#include <linux/fs.h>
#include <asm/semaphore.h>
#include "gfs2.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
-#include "jdata.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "super.h"
#include "trans.h"
+#include "inode.h"
#include "ops_file.h"
+#include "ops_address.h"
#define QUOTA_USER 1
#define QUOTA_GROUP 0
up(&sdp->sd_quota_mutex);
}
+/**
+ * gfs2_adjust_quota
+ *
+ * This function was mostly borrowed from gfs2_block_truncate_page which was
+ * in turn mostly borrowed from ext3
+ */
+static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+ int64_t change, struct gfs2_quota_data *qd)
+{
+ struct inode *inode = gfs2_ip2v(ip);
+ struct address_space *mapping = inode->i_mapping;
+ unsigned long index = loc >> PAGE_CACHE_SHIFT;
+ unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+ unsigned blocksize, iblock, pos;
+ struct buffer_head *bh;
+ struct page *page;
+ void *kaddr;
+ __be64 *ptr;
+ u64 value;
+ int err = -EIO;
+
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return -ENOMEM;
+
+ blocksize = inode->i_sb->s_blocksize;
+ iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+
+ bh = page_buffers(page);
+ pos = blocksize;
+ while (offset >= pos) {
+ bh = bh->b_this_page;
+ iblock++;
+ pos += blocksize;
+ }
+
+ if (!buffer_mapped(bh)) {
+ gfs2_get_block(inode, iblock, bh, 1);
+ if (!buffer_mapped(bh))
+ goto unlock;
+ }
+
+ if (PageUptodate(page))
+ set_buffer_uptodate(bh);
+
+ if (!buffer_uptodate(bh)) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ goto unlock;
+ }
+
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ ptr = (__be64 *)(kaddr + offset);
+ value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ err = 0;
+ qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
+#if 0
+ qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
+ qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
+#endif
+ qd->qd_qb.qb_value = cpu_to_be64(value);
+unlock:
+ unlock_page(page);
+ page_cache_release(page);
+ return err;
+}
+
static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
{
struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
for (x = 0; x < num_qd; x++) {
- char buf[sizeof(struct gfs2_quota)];
- struct gfs2_quota q;
-
qd = qda[x];
offset = qd2offset(qd);
-
- /* The quota file may not be a multiple of
- sizeof(struct gfs2_quota) bytes. */
- memset(buf, 0, sizeof(struct gfs2_quota));
-
- error = gfs2_internal_read(ip, &ra_state, buf, &offset,
- sizeof(struct gfs2_quota));
- if (error < 0)
+ error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
+ (struct gfs2_quota_data *)qd->qd_gl->gl_lvb);
+ if (error)
goto out_end_trans;
- gfs2_quota_in(&q, buf);
- q.qu_value += qda[x]->qd_change_sync;
- gfs2_quota_out(&q, buf);
-
- error = gfs2_jdata_write_mem(ip, buf, offset,
- sizeof(struct gfs2_quota));
- if (error < 0)
- goto out_end_trans;
- else if (error != sizeof(struct gfs2_quota)) {
- error = -EIO;
- goto out_end_trans;
- }
-
do_qc(qd, -qd->qd_change_sync);
-
- memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
- qd->qd_qb.qb_magic = GFS2_MAGIC;
- qd->qd_qb.qb_limit = q.qu_limit;
- qd->qd_qb.qb_warn = q.qu_warn;
- qd->qd_qb.qb_value = q.qu_value;
-
- gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
}
error = 0;
gfs2_attach_bufdata(gl, bh, meta);
bd = get_v2bd(bh);
}
-
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
{
struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
- GFP_KERNEL | __GFP_NOFAIL);
+ GFP_NOFS | __GFP_NOFAIL);
lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
rv->rv_blkno = blkno;
lops_add(sdp, &rv->rv_le);
lops_add(rgd->rd_sbd, &rgd->rd_le);
}
-void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh)
-{
- struct gfs2_bufdata *bd;
-
- bd = get_v2bd(bh);
- if (!bd) {
- bd = kmalloc(sizeof(struct gfs2_bufdata),
- GFP_NOFS | __GFP_NOFAIL);
- lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
- get_bh(bh);
- bd->bd_bh = bh;
- set_v2bd(bh, bd);
- lops_add(sdp, &bd->bd_le);
- }
-}
-
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
-void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh);
#endif /* __TRANS_DOT_H__ */
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname, assertion,
sdp->sd_fsname, function, file, line);
+ dump_stack();
return (me) ? -1 : -2;
}
if (sdp->sd_args.ar_debug)
BUG();
+ else
+ dump_stack();
sdp->sd_last_warning = jiffies;
/* ld_data1 is the number of revoke blocks in the descriptor.
ld_data2 is unused. */
+#define GFS2_LOG_DESC_JDATA 302
+/* ld_data1 is the number of data blocks in the descriptor.
+ ld_data2 is unused. */
+
struct gfs2_log_descriptor {
struct gfs2_meta_header ld_header;
__be32 qc_id;
};
+#ifdef __KERNEL__
/* Translation functions */
extern void gfs2_inum_in(struct gfs2_inum *no, char *buf);
extern void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut);
extern void gfs2_quota_change_print(struct gfs2_quota_change *qc);
+#endif /* __KERNEL__ */
+
#endif /* __GFS2_ONDISK_DOT_H__ */