diff --git a/Makefile b/Makefile index cff6d56..8d10f24 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,9 @@ all: $(MKFS) IMAGE ?= test.img IMAGESIZE ?= 200 +JOURNAL ?= journal.img +JOURNALSIZE ?= 8 + # To test max files(40920) in directory, the image size should be at least 159.85 MiB # 40920 * 4096(block size) ~= 159.85 MiB @@ -20,12 +23,18 @@ $(IMAGE): $(MKFS) dd if=/dev/zero of=${IMAGE} bs=1M count=${IMAGESIZE} ./$< $(IMAGE) +journal: $(JOURNAL) + +$(JOURNAL): + dd if=/dev/zero of=$(JOURNAL) bs=1M count=$(JOURNALSIZE) + mke2fs -b 4096 -O journal_dev $(JOURNAL) + check: all script/test.sh $(IMAGE) $(IMAGESIZE) $(MKFS) clean: make -C $(KDIR) M=$(PWD) clean rm -f *~ $(PWD)/*.ur-safe - rm -f $(MKFS) $(IMAGE) + rm -f $(MKFS) $(IMAGE) $(JOURNAL) -.PHONY: all clean +.PHONY: all clean journal diff --git a/README.md b/README.md index 48861b1..9a67f7b 100644 --- a/README.md +++ b/README.md @@ -172,9 +172,85 @@ struct simplefs_extent +---------+ ``` -## TODO +### journalling support -- journalling support +Simplefs now includes support for an external journal device, leveraging the journaling block device (jbd2) subsystem in the Linux kernel. This enhancement improves the file system's resilience by maintaining a log of changes, which helps prevent corruption and facilitates recovery in the event of a crash or power failure. + + +The journaling support in simplefs is implemented using the jbd2 subsystem, which is a widely-used journaling layer in Linux. Currently, simplefs primarily stores the journal-related information in an external journal device. + +For a detailed introduction to journaling, please refer to these two websites: +[Journal(jbd2) document](https://www.kernel.org/doc/html/latest/filesystems/ext4/journal.html) +[Journal(jbd2) api](https://docs.kernel.org/filesystems/journalling.html) + +External journal device disk layout: + ++--------------------+------------------+---------------------------+--------------+ +| Journal Superblock | Descriptor Block | Metadata/Data ( modified ) | Commit Block | ++--------------------+------------------+---------------------------+--------------+ + +Hint: +Each transaction starts with a descriptor block, followed by several metadata blocks or data blocks, and ends with a commit block. Every modified metadata (such as inode, bitmap, etc.) occupies its own block. Currently, simplefs primarily records "extent" metadata. + + +How to Enable Journaling in simplefs: + +Step 1: Create the Journal Disk Image +To create an 8MB disk image for the journal, use the following make command: + +Note: +Assuming an 8 MB size for the external journal device, which is an arbitrary choice for now, I will set the journal block length to a fixed 2048, calculated by dividing the device size by the block size (4096 bytes). + +```shell +$ make journal +``` + +Step 2: Make sure you've loaded the SimpleFS Kernel Module + +```shell +$ insmod simplefs/simplefs.ko +``` + +Step 3: Setup the Loop Device for the Journal +Find an available loop device and associate it with the journal image: + +``` shell +$ loop_device=$(losetup -f) +$ losetup $loop_device /simplefs/journal.img +``` + +You shall get the following kernel messages: +``` +loop0: detected capacity change from 0 to 16384 +``` + +Step 4: Mount the SimpleFS File System with the External Journal +Mount the SimpleFS file system along with the external journal device using the following command: + +```shell +mount -o loop,rw,owner,group,users,journal_path="$loop_device" -t simplefs /simplefs/test.img /test +``` + +Corresponding kernel message: +``` +loop1: detected capacity change from 0 to 409600 +simplefs: simplefs_parse_options: parsing options 'owner,group,journal_path=/dev/loop0' +simplefs: '/dev/loop1' mount success +``` + +Current Limitations and Known Issues + +1. External Journal Device Size: + +- The exact size of the external journal device cannot be determined. As a temporary solution, the size is set by dividing the device size by the block size, with the external journal device size fixed at 8 MB. + +2. Metadata Recording: + +- At present, only "extent" metadata is recorded. In the future, additional metadata such as "super block" and inode metadata can be included. + +3. Implementation of External Journal Device: + +- Only the external journal device is implemented. Future improvements can include the use of an internal journal (inode journal). However, this will require the addition of a bmap function and appropriate adjustments to the disk partition during mkfs. ## License diff --git a/simplefs.h b/simplefs.h index 301f2ab..5bb0f03 100644 --- a/simplefs.h +++ b/simplefs.h @@ -39,6 +39,9 @@ * | blocks | rest of the blocks * +---------------+ */ +#ifdef __KERNEL__ +#include +#endif struct simplefs_inode { uint32_t i_mode; /* File mode */ @@ -71,6 +74,10 @@ struct simplefs_sb_info { uint32_t nr_free_blocks; /* Number of free blocks */ #ifdef __KERNEL__ + journal_t *journal; + struct block_device *s_journal_bdev; /* v5.10+ external journal device */ + struct bdev_handle + *s_journal_bdev_handle; /* v6.7+ external journal device */ unsigned long *ifree_bitmap; /* In-memory free inodes bitmap */ unsigned long *bfree_bitmap; /* In-memory free blocks bitmap */ #endif diff --git a/super.c b/super.c index e966ec9..1a7a303 100644 --- a/super.c +++ b/super.c @@ -7,6 +7,11 @@ #include #include +#include +#include +#include +#include + #include "simplefs.h" struct dentry *simplefs_mount(struct file_system_type *fs_type, @@ -109,6 +114,47 @@ static int simplefs_write_inode(struct inode *inode, static void simplefs_put_super(struct super_block *sb) { struct simplefs_sb_info *sbi = SIMPLEFS_SB(sb); + int aborted = 0; + int err; + + if (sbi->journal) { + aborted = is_journal_aborted(sbi->journal); + err = jbd2_journal_destroy(sbi->journal); + sbi->journal = NULL; + if ((err < 0) && !aborted) { + pr_err("Couldn't clean up the journal, error %d\n", -err); + } + } + + sync_blockdev(sb->s_bdev); + invalidate_bdev(sb->s_bdev); + +#if SIMPLEFS_AT_LEAST(6, 7, 0) + if (sbi->s_journal_bdev_handle) { + sync_blockdev(sbi->s_journal_bdev_handle->bdev); + invalidate_bdev(sbi->s_journal_bdev_handle->bdev); + } +#elif SIMPLEFS_AT_LEAST(6, 6, 0) + if (sbi->s_journal_bdev) { + sync_blockdev(sbi->s_journal_bdev); + invalidate_bdev(sbi->s_journal_bdev); + } +#elif SIMPLEFS_AT_LEAST(6, 5, 0) + if (sbi->s_journal_bdev) { + sync_blockdev(sbi->s_journal_bdev); + invalidate_bdev(sbi->s_journal_bdev); + blkdev_put(sbi->s_journal_bdev, sb); + sbi->s_journal_bdev = NULL; + } +#elif SIMPLEFS_AT_LEAST(5, 10, 0) + if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) { + sync_blockdev(sbi->s_journal_bdev); + invalidate_bdev(sbi->s_journal_bdev); + blkdev_put(sbi->s_journal_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + sbi->s_journal_bdev = NULL; + } +#endif + if (sbi) { kfree(sbi->ifree_bitmap); kfree(sbi->bfree_bitmap); @@ -196,6 +242,254 @@ static int simplefs_statfs(struct dentry *dentry, struct kstatfs *stat) return 0; } +/* Code related to the external journal device settings */ + +static journal_t *simplefs_get_dev_journal(struct super_block *sb, + dev_t journal_dev) +{ + struct simplefs_sb_info *sbi = SIMPLEFS_SB(sb); + struct buffer_head *bh; + struct block_device *bdev; + int hblock, blocksize; + unsigned long long sb_block, start, len; + unsigned long offset; + journal_t *journal; + int errno = 0; + +#if SIMPLEFS_AT_LEAST(6, 8, 0) + struct bdev_handle *bdev_handle; + bdev_handle = bdev_open_by_dev( + journal_dev, BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES, + sb, &fs_holder_ops); +#elif SIMPLEFS_AT_LEAST(6, 7, 0) + struct bdev_handle *bdev_handle; + up_write(&sb->s_umount); + bdev_handle = bdev_open_by_dev(journal_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + sb, &fs_holder_ops); + down_write(&sb->s_umount); +#elif SIMPLEFS_AT_LEAST(6, 6, 0) + up_write(&sb->s_umount); + bdev = blkdev_get_by_dev(journal_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb, + &fs_holder_ops); + down_write(&sb->s_umount); +#elif SIMPLEFS_AT_LEAST(6, 5, 0) + bdev = blkdev_get_by_dev(journal_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb, + NULL); +#elif SIMPLEFS_AT_LEAST(5, 10, 0) + bdev = blkdev_get_by_dev(journal_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, + sb); +#endif + + +#if SIMPLEFS_AT_LEAST(6, 8, 0) + if (IS_ERR(bdev_handle)) { + printk(KERN_ERR + "failed to open journal device unknown-block(%u,%u) %ld\n", + MAJOR(journal_dev), MINOR(journal_dev), PTR_ERR(bdev_handle)); + return ERR_CAST(bdev_handle); + } + bdev = bdev_handle->bdev; +#elif SIMPLEFS_AT_LEAST(5, 10, 0) + if (IS_ERR(bdev)) { + printk(KERN_ERR "failed to open block device (%u:%u), error: %ld\n", + MAJOR(journal_dev), MINOR(journal_dev), PTR_ERR(bdev)); + return ERR_CAST(bdev); + } +#endif + + blocksize = sb->s_blocksize; + hblock = bdev_logical_block_size(bdev); + + if (blocksize < hblock) { + pr_err("blocksize too small for journal device\n"); + errno = -EINVAL; + goto out_bdev; + } + + sb_block = SIMPLEFS_BLOCK_SIZE / blocksize; + offset = SIMPLEFS_BLOCK_SIZE % blocksize; + set_blocksize(bdev, blocksize); + bh = __bread(bdev, sb_block, blocksize); + + if (!bh) { + pr_err("couldn't read superblock of external journal\n"); + errno = -EINVAL; + goto out_bdev; + } + /* + * FIXME: Currently, the exact size of the external journal device is not + * available. Therefore, we use the device size divided by the block size to + * set `len`. Hint: External device size available now is 8MB. + * + * Future implementation might need to change to: + * len = CapacityOfJournalDevice / SIMPLEFS_BLOCK_SIZE + */ + + len = 2048; + start = sb_block; + brelse(bh); + +#if SIMPLEFS_AT_LEAST(6, 8, 0) + journal = jbd2_journal_init_dev(bdev_handle->bdev, sb->s_bdev, start, len, + sb->s_blocksize); +#elif SIMPLEFS_AT_LEAST(5, 15, 0) + journal = jbd2_journal_init_dev(bdev, sb->s_bdev, start, len, blocksize); +#endif + + if (IS_ERR(journal)) { + pr_err( + "simplefs_get_dev_journal: failed to initialize journal, error " + "%ld\n", + PTR_ERR(journal)); + errno = PTR_ERR(journal); + goto out_bdev; + } + +#if SIMPLEFS_AT_LEAST(6, 8, 0) + sbi->s_journal_bdev_handle = bdev_handle; +#elif SIMPLEFS_AT_LEAST(5, 15, 0) + sbi->s_journal_bdev = bdev; +#endif + + journal->j_private = sb; + return journal; + +out_bdev: +#if SIMPLEFS_AT_LEAST(6, 7, 0) + bdev_release(bdev_handle); +#elif SIMPLEFS_AT_LEAST(6, 5, 0) + blkdev_put(bdev, sb); +#elif SIMPLEFS_AT_LEAST(5, 10, 0) + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +#endif + return NULL; +} + +static int simplefs_load_journal(struct super_block *sb, + unsigned long journal_devnum) +{ + journal_t *journal; + struct simplefs_sb_info *sbi = SIMPLEFS_SB(sb); + dev_t journal_dev; + int err = 0; + journal_dev = new_decode_dev(journal_devnum); + int really_read_only; + int journal_dev_ro; + + journal = simplefs_get_dev_journal(sb, journal_dev); + if (IS_ERR(journal)) { + pr_err("Failed to get journal from device, error %ld\n", + PTR_ERR(journal)); + return PTR_ERR(journal); + } + + journal_dev_ro = bdev_read_only(journal->j_dev); + really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro; + + if (journal_dev_ro && !sb_rdonly(sb)) { + pr_err("journal device read-only, try mounting with '-o ro'\n"); + err = -EROFS; + goto err_out; + } + + err = jbd2_journal_wipe(journal, !really_read_only); + + if (!err) { + err = jbd2_journal_load(journal); + if (err) { + pr_err("error loading journal, error %d\n", err); + goto err_out; + } + } + + sbi->journal = journal; + + return 0; + +err_out: + jbd2_journal_destroy(journal); + return err; +} + +/* we use SIMPLEFS_OPT_JOURNAL_PATH case to load external journal device now */ +#define SIMPLEFS_OPT_JOURNAL_DEV 1 +#define SIMPLEFS_OPT_JOURNAL_PATH 2 +static const match_table_t tokens = { + {SIMPLEFS_OPT_JOURNAL_DEV, "journal_dev=%u"}, + {SIMPLEFS_OPT_JOURNAL_PATH, "journal_path=%s"}, +}; +static int simplefs_parse_options(struct super_block *sb, char *options) +{ + substring_t args[MAX_OPT_ARGS]; + int token, ret = 0, arg; + char *p; + char *journal_path; + struct inode *journal_inode; + struct path path; + + pr_info("simplefs_parse_options: parsing options '%s'\n", options); + + while ((p = strsep(&options, ","))) { + if (!*p) + continue; + + args[0].to = args[0].from = NULL; + token = match_token(p, tokens, args); + + switch (token) { + case SIMPLEFS_OPT_JOURNAL_DEV: + if (args->from && match_int(args, &arg)) { + pr_err("simplefs_parse_options: match_int failed\n"); + return 1; + } + if ((ret = simplefs_load_journal(sb, arg))) { + pr_err( + "simplefs_parse_options: simplefs_load_journal failed with " + "%d\n", + ret); + return ret; + } + break; + + case SIMPLEFS_OPT_JOURNAL_PATH: { + journal_path = match_strdup(&args[0]); + if (!journal_path) { + pr_err("simplefs_parse_options: match_strdup failed\n"); + return -ENOMEM; + } + ret = kern_path(journal_path, LOOKUP_FOLLOW, &path); + if (ret) { + pr_err( + "simplefs_parse_options: kern_path failed with error %d\n", + ret); + kfree(journal_path); + return ret; + } + + journal_inode = path.dentry->d_inode; + + path_put(&path); + kfree(journal_path); + + if (S_ISBLK(journal_inode->i_mode)) { + unsigned long journal_devnum = + new_encode_dev(journal_inode->i_rdev); + if ((ret = simplefs_load_journal(sb, journal_devnum))) { + pr_err( + "simplefs_parse_options: simplefs_load_journal failed " + "with %d\n", + ret); + return ret; + } + } + break; + } + } + } + + return 0; +} + static struct super_operations simplefs_super_ops = { .put_super = simplefs_put_super, .alloc_inode = simplefs_alloc_inode, @@ -319,6 +613,13 @@ int simplefs_fill_super(struct super_block *sb, void *data, int silent) goto iput; } + ret = simplefs_parse_options(sb, data); + if (ret) { + pr_err("simplefs_fill_super: Failed to parse options, error code: %d\n", + ret); + return ret; + } + return 0; iput: