diff options
Diffstat (limited to 'fs/ocfs2/localalloc.c')
| -rw-r--r-- | fs/ocfs2/localalloc.c | 275 | 
1 files changed, 192 insertions, 83 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index c983715d8d8..3d7419682dc 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);  static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,  					     struct ocfs2_dinode *alloc, -					     u32 numbits); +					     u32 *numbits, +					     struct ocfs2_alloc_reservation *resv);  static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); @@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,  static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,  					  struct inode *local_alloc_inode); +/* + * ocfs2_la_default_mb() - determine a default size, in megabytes of + * the local alloc. + * + * Generally, we'd like to pick as large a local alloc as + * possible. Performance on large workloads tends to scale + * proportionally to la size. In addition to that, the reservations + * code functions more efficiently as it can reserve more windows for + * write. + * + * Some things work against us when trying to choose a large local alloc: + * + * - We need to ensure our sizing is picked to leave enough space in + *   group descriptors for other allocations (such as block groups, + *   etc). Picking default sizes which are a multiple of 4 could help + *   - block groups are allocated in 2mb and 4mb chunks. + * + * - Likewise, we don't want to starve other nodes of bits on small + *   file systems. This can easily be taken care of by limiting our + *   default to a reasonable size (256M) on larger cluster sizes. + * + * - Some file systems can't support very large sizes - 4k and 8k in + *   particular are limited to less than 128 and 256 megabytes respectively. + * + * The following reference table shows group descriptor and local + * alloc maximums at various cluster sizes (4k blocksize) + * + * csize: 4K	group: 126M	la: 121M + * csize: 8K	group: 252M	la: 243M + * csize: 16K	group: 504M	la: 486M + * csize: 32K	group: 1008M	la: 972M + * csize: 64K	group: 2016M	la: 1944M + * csize: 128K	group: 4032M	la: 3888M + * csize: 256K	group: 8064M	la: 7776M + * csize: 512K	group: 16128M	la: 15552M + * csize: 1024K	group: 32256M	la: 31104M + */ +#define	OCFS2_LA_MAX_DEFAULT_MB	256 +#define	OCFS2_LA_OLD_DEFAULT	8 +unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) +{ +	unsigned int la_mb; +	unsigned int gd_mb; +	unsigned int megs_per_slot; +	struct super_block *sb = osb->sb; + +	gd_mb = ocfs2_clusters_to_megabytes(osb->sb, +		8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); + +	/* +	 * This takes care of files systems with very small group +	 * descriptors - 512 byte blocksize at cluster sizes lower +	 * than 16K and also 1k blocksize with 4k cluster size. +	 */ +	if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) +	    || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) +		return OCFS2_LA_OLD_DEFAULT; + +	/* +	 * Leave enough room for some block groups and make the final +	 * value we work from a multiple of 4. +	 */ +	gd_mb -= 16; +	gd_mb &= 0xFFFFFFFB; + +	la_mb = gd_mb; + +	/* +	 * Keep window sizes down to a reasonable default +	 */ +	if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { +		/* +		 * Some clustersize / blocksize combinations will have +		 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB +		 * default size, but get poor distribution when +		 * limited to exactly 256 megabytes. +		 * +		 * As an example, 16K clustersize at 4K blocksize +		 * gives us a cluster group size of 504M. Paring the +		 * local alloc size down to 256 however, would give us +		 * only one window and around 200MB left in the +		 * cluster group. Instead, find the first size below +		 * 256 which would give us an even distribution. +		 * +		 * Larger cluster group sizes actually work out pretty +		 * well when pared to 256, so we don't have to do this +		 * for any group that fits more than two +		 * OCFS2_LA_MAX_DEFAULT_MB windows. +		 */ +		if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) +			la_mb = 256; +		else { +			unsigned int gd_mult = gd_mb; + +			while (gd_mult > 256) +				gd_mult = gd_mult >> 1; + +			la_mb = gd_mult; +		} +	} + +	megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; +	megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); +	/* Too many nodes, too few disk clusters. */ +	if (megs_per_slot < la_mb) +		la_mb = megs_per_slot; + +	return la_mb; +} + +void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) +{ +	struct super_block *sb = osb->sb; +	unsigned int la_default_mb = ocfs2_la_default_mb(osb); +	unsigned int la_max_mb; + +	la_max_mb = ocfs2_clusters_to_megabytes(sb, +						ocfs2_local_alloc_size(sb) * 8); + +	mlog(0, "requested: %dM, max: %uM, default: %uM\n", +	     requested_mb, la_max_mb, la_default_mb); + +	if (requested_mb == -1) { +		/* No user request - use defaults */ +		osb->local_alloc_default_bits = +			ocfs2_megabytes_to_clusters(sb, la_default_mb); +	} else if (requested_mb > la_max_mb) { +		/* Request is too big, we give the maximum available */ +		osb->local_alloc_default_bits = +			ocfs2_megabytes_to_clusters(sb, la_max_mb); +	} else { +		osb->local_alloc_default_bits = +			ocfs2_megabytes_to_clusters(sb, requested_mb); +	} + +	osb->local_alloc_bits = osb->local_alloc_default_bits; +} +  static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)  {  	return (osb->local_alloc_state == OCFS2_LA_THROTTLED || @@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)  		     osb->local_alloc_bits, (osb->bitmap_cpg - 1));  		osb->local_alloc_bits =  			ocfs2_megabytes_to_clusters(osb->sb, -						    OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); +						    ocfs2_la_default_mb(osb));  	}  	/* read the alloc off disk */ @@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)  	osb->local_alloc_state = OCFS2_LA_DISABLED; +	ocfs2_resmap_uninit(&osb->osb_la_resmap); +  	main_bm_inode = ocfs2_get_system_file_inode(osb,  						    GLOBAL_BITMAP_SYSTEM_INODE,  						    OCFS2_INVALID_SLOT); @@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)  	}  	ocfs2_clear_local_alloc(alloc); - -	status = ocfs2_journal_dirty(handle, bh); -	if (status < 0) { -		mlog_errno(status); -		goto out_commit; -	} +	ocfs2_journal_dirty(handle, bh);  	brelse(bh);  	osb->local_alloc_bh = NULL; @@ -481,46 +617,6 @@ out:  	return status;  } -/* Check to see if the local alloc window is within ac->ac_max_block */ -static int ocfs2_local_alloc_in_range(struct inode *inode, -				      struct ocfs2_alloc_context *ac, -				      u32 bits_wanted) -{ -	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); -	struct ocfs2_dinode *alloc; -	struct ocfs2_local_alloc *la; -	int start; -	u64 block_off; - -	if (!ac->ac_max_block) -		return 1; - -	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; -	la = OCFS2_LOCAL_ALLOC(alloc); - -	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); -	if (start == -1) { -		mlog_errno(-ENOSPC); -		return 0; -	} - -	/* -	 * Converting (bm_off + start + bits_wanted) to blocks gives us -	 * the blkno just past our actual allocation.  This is perfect -	 * to compare with ac_max_block. -	 */ -	block_off = ocfs2_clusters_to_blocks(inode->i_sb, -					     le32_to_cpu(la->la_bm_off) + -					     start + bits_wanted); -	mlog(0, "Checking %llu against %llu\n", -	     (unsigned long long)block_off, -	     (unsigned long long)ac->ac_max_block); -	if (block_off > ac->ac_max_block) -		return 0; - -	return 1; -} -  /*   * make sure we've got at least bits_wanted contiguous bits in the   * local alloc. You lose them when you drop i_mutex. @@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,  		mlog(0, "Calling in_range for max block %llu\n",  		     (unsigned long long)ac->ac_max_block); -	if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, -					bits_wanted)) { -		/* -		 * The window is outside ac->ac_max_block. -		 * This errno tells the caller to keep localalloc enabled -		 * but to get the allocation from the main bitmap. -		 */ -		status = -EFBIG; -		goto bail; -	} -  	ac->ac_inode = local_alloc_inode;  	/* We should never use localalloc from another slot */  	ac->ac_alloc_slot = osb->slot_num; @@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,  	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;  	la = OCFS2_LOCAL_ALLOC(alloc); -	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); +	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, +						  ac->ac_resv);  	if (start == -1) {  		/* TODO: Shouldn't we just BUG here? */  		status = -ENOSPC; @@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,  	bitmap = la->la_bitmap;  	*bit_off = le32_to_cpu(la->la_bm_off) + start; -	/* local alloc is always contiguous by nature -- we never -	 * delete bits from it! */  	*num_bits = bits_wanted;  	status = ocfs2_journal_access_di(handle, @@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,  		goto bail;  	} +	ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, +				  bits_wanted); +  	while(bits_wanted--)  		ocfs2_set_bit(start++, bitmap);  	le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); +	ocfs2_journal_dirty(handle, osb->local_alloc_bh); -	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); -	if (status < 0) { -		mlog_errno(status); -		goto bail; -	} - -	status = 0;  bail:  	mlog_exit(status);  	return status; @@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)  }  static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, -					     struct ocfs2_dinode *alloc, -					     u32 numbits) +				     struct ocfs2_dinode *alloc, +				     u32 *numbits, +				     struct ocfs2_alloc_reservation *resv)  {  	int numfound, bitoff, left, startoff, lastzero; +	int local_resv = 0; +	struct ocfs2_alloc_reservation r;  	void *bitmap = NULL; +	struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; -	mlog_entry("(numbits wanted = %u)\n", numbits); +	mlog_entry("(numbits wanted = %u)\n", *numbits);  	if (!alloc->id1.bitmap1.i_total) {  		mlog(0, "No bits in my window!\n"); @@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,  		goto bail;  	} +	if (!resv) { +		local_resv = 1; +		ocfs2_resv_init_once(&r); +		ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); +		resv = &r; +	} + +	numfound = *numbits; +	if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { +		if (numfound < *numbits) +			*numbits = numfound; +		goto bail; +	} + +	/* +	 * Code error. While reservations are enabled, local +	 * allocation should _always_ go through them. +	 */ +	BUG_ON(osb->osb_resv_level != 0); + +	/* +	 * Reservations are disabled. Handle this the old way. +	 */ +  	bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;  	numfound = bitoff = startoff = 0; @@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,  			startoff = bitoff+1;  		}  		/* we got everything we needed */ -		if (numfound == numbits) { +		if (numfound == *numbits) {  			/* mlog(0, "Found it all!\n"); */  			break;  		} @@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,  	mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,  	     numfound); -	if (numfound == numbits) +	if (numfound == *numbits)  		bitoff = startoff - numfound;  	else  		bitoff = -1;  bail: +	if (local_resv) +		ocfs2_resv_discard(resmap, resv); +  	mlog_exit(bitoff);  	return bitoff;  } @@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,  	/* we used the generic suballoc reserve function, but we set  	 * everything up nicely, so there's no reason why we can't use  	 * the more specific cluster api to claim bits. */ -	status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, +	status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,  				      &cluster_off, &cluster_count);  	if (status == -ENOSPC) {  retry_enospc: @@ -1063,7 +1175,7 @@ retry_enospc:  			goto bail;  		ac->ac_bits_wanted = osb->local_alloc_default_bits; -		status = ocfs2_claim_clusters(osb, handle, ac, +		status = ocfs2_claim_clusters(handle, ac,  					      osb->local_alloc_bits,  					      &cluster_off,  					      &cluster_count); @@ -1098,6 +1210,9 @@ retry_enospc:  	memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,  	       le16_to_cpu(la->la_size)); +	ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, +			     OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); +  	mlog(0, "New window allocated:\n");  	mlog(0, "window la_bm_off = %u\n",  	     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); @@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,  	}  	ocfs2_clear_local_alloc(alloc); - -	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); -	if (status < 0) { -		mlog_errno(status); -		goto bail; -	} +	ocfs2_journal_dirty(handle, osb->local_alloc_bh);  	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,  					  main_bm_inode, main_bm_bh); @@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,  	atomic_inc(&osb->alloc_stats.moves); -	status = 0;  bail:  	if (handle)  		ocfs2_commit_trans(osb, handle);  |