4343#include <linux/blkdev.h>
4444#include <linux/uuid.h>
4545#include <linux/btrfs.h>
46+ #include <linux/uaccess.h>
4647#include "compat.h"
4748#include "ctree.h"
4849#include "disk-io.h"
5758#include "send.h"
5859#include "dev-replace.h"
5960
61+ static int btrfs_clone (struct inode * src , struct inode * inode ,
62+ u64 off , u64 olen , u64 olen_aligned , u64 destoff );
63+
6064/* Mask out flags that are inappropriate for the given type of inode. */
6165static inline __u32 btrfs_mask_flags (umode_t mode , __u32 flags )
6266{
@@ -2470,6 +2474,34 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
24702474 return ret ;
24712475}
24722476
2477+ static struct page * extent_same_get_page (struct inode * inode , u64 off )
2478+ {
2479+ struct page * page ;
2480+ pgoff_t index ;
2481+ struct extent_io_tree * tree = & BTRFS_I (inode )-> io_tree ;
2482+
2483+ index = off >> PAGE_CACHE_SHIFT ;
2484+
2485+ page = grab_cache_page (inode -> i_mapping , index );
2486+ if (!page )
2487+ return NULL ;
2488+
2489+ if (!PageUptodate (page )) {
2490+ if (extent_read_full_page_nolock (tree , page , btrfs_get_extent ,
2491+ 0 ))
2492+ return NULL ;
2493+ lock_page (page );
2494+ if (!PageUptodate (page )) {
2495+ unlock_page (page );
2496+ page_cache_release (page );
2497+ return NULL ;
2498+ }
2499+ }
2500+ unlock_page (page );
2501+
2502+ return page ;
2503+ }
2504+
24732505static inline void lock_extent_range (struct inode * inode , u64 off , u64 len )
24742506{
24752507 /* do any pending delalloc/csum calc on src, one way or
@@ -2490,6 +2522,251 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
24902522 }
24912523}
24922524
2525+ static void btrfs_double_unlock (struct inode * inode1 , u64 loff1 ,
2526+ struct inode * inode2 , u64 loff2 , u64 len )
2527+ {
2528+ unlock_extent (& BTRFS_I (inode1 )-> io_tree , loff1 , loff1 + len - 1 );
2529+ unlock_extent (& BTRFS_I (inode2 )-> io_tree , loff2 , loff2 + len - 1 );
2530+
2531+ mutex_unlock (& inode1 -> i_mutex );
2532+ mutex_unlock (& inode2 -> i_mutex );
2533+ }
2534+
2535+ static void btrfs_double_lock (struct inode * inode1 , u64 loff1 ,
2536+ struct inode * inode2 , u64 loff2 , u64 len )
2537+ {
2538+ if (inode1 < inode2 ) {
2539+ swap (inode1 , inode2 );
2540+ swap (loff1 , loff2 );
2541+ }
2542+
2543+ mutex_lock_nested (& inode1 -> i_mutex , I_MUTEX_PARENT );
2544+ lock_extent_range (inode1 , loff1 , len );
2545+ if (inode1 != inode2 ) {
2546+ mutex_lock_nested (& inode2 -> i_mutex , I_MUTEX_CHILD );
2547+ lock_extent_range (inode2 , loff2 , len );
2548+ }
2549+ }
2550+
2551+ static int btrfs_cmp_data (struct inode * src , u64 loff , struct inode * dst ,
2552+ u64 dst_loff , u64 len )
2553+ {
2554+ int ret = 0 ;
2555+ struct page * src_page , * dst_page ;
2556+ unsigned int cmp_len = PAGE_CACHE_SIZE ;
2557+ void * addr , * dst_addr ;
2558+
2559+ while (len ) {
2560+ if (len < PAGE_CACHE_SIZE )
2561+ cmp_len = len ;
2562+
2563+ src_page = extent_same_get_page (src , loff );
2564+ if (!src_page )
2565+ return - EINVAL ;
2566+ dst_page = extent_same_get_page (dst , dst_loff );
2567+ if (!dst_page ) {
2568+ page_cache_release (src_page );
2569+ return - EINVAL ;
2570+ }
2571+ addr = kmap_atomic (src_page );
2572+ dst_addr = kmap_atomic (dst_page );
2573+
2574+ flush_dcache_page (src_page );
2575+ flush_dcache_page (dst_page );
2576+
2577+ if (memcmp (addr , dst_addr , cmp_len ))
2578+ ret = BTRFS_SAME_DATA_DIFFERS ;
2579+
2580+ kunmap_atomic (addr );
2581+ kunmap_atomic (dst_addr );
2582+ page_cache_release (src_page );
2583+ page_cache_release (dst_page );
2584+
2585+ if (ret )
2586+ break ;
2587+
2588+ loff += cmp_len ;
2589+ dst_loff += cmp_len ;
2590+ len -= cmp_len ;
2591+ }
2592+
2593+ return ret ;
2594+ }
2595+
2596+ static int extent_same_check_offsets (struct inode * inode , u64 off , u64 len )
2597+ {
2598+ u64 bs = BTRFS_I (inode )-> root -> fs_info -> sb -> s_blocksize ;
2599+
2600+ if (off + len > inode -> i_size || off + len < off )
2601+ return - EINVAL ;
2602+ /* Check that we are block aligned - btrfs_clone() requires this */
2603+ if (!IS_ALIGNED (off , bs ) || !IS_ALIGNED (off + len , bs ))
2604+ return - EINVAL ;
2605+
2606+ return 0 ;
2607+ }
2608+
2609+ static int btrfs_extent_same (struct inode * src , u64 loff , u64 len ,
2610+ struct inode * dst , u64 dst_loff )
2611+ {
2612+ int ret ;
2613+
2614+ /*
2615+ * btrfs_clone() can't handle extents in the same file
2616+ * yet. Once that works, we can drop this check and replace it
2617+ * with a check for the same inode, but overlapping extents.
2618+ */
2619+ if (src == dst )
2620+ return - EINVAL ;
2621+
2622+ btrfs_double_lock (src , loff , dst , dst_loff , len );
2623+
2624+ ret = extent_same_check_offsets (src , loff , len );
2625+ if (ret )
2626+ goto out_unlock ;
2627+
2628+ ret = extent_same_check_offsets (dst , dst_loff , len );
2629+ if (ret )
2630+ goto out_unlock ;
2631+
2632+ /* don't make the dst file partly checksummed */
2633+ if ((BTRFS_I (src )-> flags & BTRFS_INODE_NODATASUM ) !=
2634+ (BTRFS_I (dst )-> flags & BTRFS_INODE_NODATASUM )) {
2635+ ret = - EINVAL ;
2636+ goto out_unlock ;
2637+ }
2638+
2639+ ret = btrfs_cmp_data (src , loff , dst , dst_loff , len );
2640+ if (ret == 0 )
2641+ ret = btrfs_clone (src , dst , loff , len , len , dst_loff );
2642+
2643+ out_unlock :
2644+ btrfs_double_unlock (src , loff , dst , dst_loff , len );
2645+
2646+ return ret ;
2647+ }
2648+
2649+ #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
2650+
2651+ static long btrfs_ioctl_file_extent_same (struct file * file ,
2652+ void __user * argp )
2653+ {
2654+ struct btrfs_ioctl_same_args * args = argp ;
2655+ struct btrfs_ioctl_same_args same ;
2656+ struct btrfs_ioctl_same_extent_info info ;
2657+ struct inode * src = file -> f_dentry -> d_inode ;
2658+ struct file * dst_file = NULL ;
2659+ struct inode * dst ;
2660+ u64 off ;
2661+ u64 len ;
2662+ int i ;
2663+ int ret ;
2664+ u64 bs = BTRFS_I (src )-> root -> fs_info -> sb -> s_blocksize ;
2665+ bool is_admin = capable (CAP_SYS_ADMIN );
2666+
2667+ if (!(file -> f_mode & FMODE_READ ))
2668+ return - EINVAL ;
2669+
2670+ ret = mnt_want_write_file (file );
2671+ if (ret )
2672+ return ret ;
2673+
2674+ if (copy_from_user (& same ,
2675+ (struct btrfs_ioctl_same_args __user * )argp ,
2676+ sizeof (same ))) {
2677+ ret = - EFAULT ;
2678+ goto out ;
2679+ }
2680+
2681+ off = same .logical_offset ;
2682+ len = same .length ;
2683+
2684+ /*
2685+ * Limit the total length we will dedupe for each operation.
2686+ * This is intended to bound the total time spent in this
2687+ * ioctl to something sane.
2688+ */
2689+ if (len > BTRFS_MAX_DEDUPE_LEN )
2690+ len = BTRFS_MAX_DEDUPE_LEN ;
2691+
2692+ if (WARN_ON_ONCE (bs < PAGE_CACHE_SIZE )) {
2693+ /*
2694+ * Btrfs does not support blocksize < page_size. As a
2695+ * result, btrfs_cmp_data() won't correctly handle
2696+ * this situation without an update.
2697+ */
2698+ ret = - EINVAL ;
2699+ goto out ;
2700+ }
2701+
2702+ ret = - EISDIR ;
2703+ if (S_ISDIR (src -> i_mode ))
2704+ goto out ;
2705+
2706+ ret = - EACCES ;
2707+ if (!S_ISREG (src -> i_mode ))
2708+ goto out ;
2709+
2710+ ret = 0 ;
2711+ for (i = 0 ; i < same .dest_count ; i ++ ) {
2712+ if (copy_from_user (& info , & args -> info [i ], sizeof (info ))) {
2713+ ret = - EFAULT ;
2714+ goto out ;
2715+ }
2716+
2717+ info .bytes_deduped = 0 ;
2718+
2719+ dst_file = fget (info .fd );
2720+ if (!dst_file ) {
2721+ info .status = - EBADF ;
2722+ goto next ;
2723+ }
2724+
2725+ if (!(is_admin || (dst_file -> f_mode & FMODE_WRITE ))) {
2726+ info .status = - EINVAL ;
2727+ goto next ;
2728+ }
2729+
2730+ info .status = - EXDEV ;
2731+ if (file -> f_path .mnt != dst_file -> f_path .mnt )
2732+ goto next ;
2733+
2734+ dst = dst_file -> f_dentry -> d_inode ;
2735+ if (src -> i_sb != dst -> i_sb )
2736+ goto next ;
2737+
2738+ if (S_ISDIR (dst -> i_mode )) {
2739+ info .status = - EISDIR ;
2740+ goto next ;
2741+ }
2742+
2743+ if (!S_ISREG (dst -> i_mode )) {
2744+ info .status = - EACCES ;
2745+ goto next ;
2746+ }
2747+
2748+ info .status = btrfs_extent_same (src , off , len , dst ,
2749+ info .logical_offset );
2750+ if (info .status == 0 )
2751+ info .bytes_deduped += len ;
2752+
2753+ next :
2754+ if (dst_file )
2755+ fput (dst_file );
2756+
2757+ if (__put_user_unaligned (info .status , & args -> info [i ].status ) ||
2758+ __put_user_unaligned (info .bytes_deduped ,
2759+ & args -> info [i ].bytes_deduped )) {
2760+ ret = - EFAULT ;
2761+ goto out ;
2762+ }
2763+ }
2764+
2765+ out :
2766+ mnt_drop_write_file (file );
2767+ return ret ;
2768+ }
2769+
24932770/**
24942771 * btrfs_clone() - clone a range from inode file to another
24952772 *
@@ -4242,6 +4519,8 @@ long btrfs_ioctl(struct file *file, unsigned int
42424519 return btrfs_ioctl_get_fslabel (file , argp );
42434520 case BTRFS_IOC_SET_FSLABEL :
42444521 return btrfs_ioctl_set_fslabel (file , argp );
4522+ case BTRFS_IOC_FILE_EXTENT_SAME :
4523+ return btrfs_ioctl_file_extent_same (file , argp );
42454524 }
42464525
42474526 return - ENOTTY ;
0 commit comments