--- linux/fs/inode.c.orig	Tue Mar 13 12:48:43 2001
+++ linux/fs/inode.c	Tue Mar 13 12:48:47 2001
@@ -103,6 +103,7 @@
 		INIT_LIST_HEAD(&inode->i_dirty_buffers);
 		sema_init(&inode->i_sem, 1);
 		sema_init(&inode->i_zombie, 1);
+		spin_lock_init(&inode->i_data.page_lock);
 		spin_lock_init(&inode->i_data.i_shared_lock);
 	}
 }
--- linux/mm/vmscan.c.orig	Tue Mar 13 12:48:44 2001
+++ linux/mm/vmscan.c	Tue Mar 13 12:48:48 2001
@@ -320,27 +320,38 @@
  */
 struct page * reclaim_page(zone_t * zone)
 {
+	struct address_space *mapping;
 	struct page * page = NULL;
 	struct list_head * page_lru;
+	spinlock_t *pg_lock;
 	int maxscan;
 
-	/*
-	 * We only need the pagemap_lru_lock if we don't reclaim the page,
-	 * but we have to grab the pagecache_lock before the pagemap_lru_lock
-	 * to avoid deadlocks and most of the time we'll succeed anyway.
+	/* The repeating is necessary because we are taking these
+	 * locks out of the normal order.
 	 */
-	spin_lock(&pagecache_lock);
+repeat:
 	spin_lock(&pagemap_lru_lock);
 	maxscan = zone->inactive_clean_pages;
 	while ((page_lru = zone->inactive_clean_list.prev) !=
 			&zone->inactive_clean_list && maxscan--) {
 		page = list_entry(page_lru, struct page, lru);
 
+		if (!page->mapping)
+			goto bogus_page;
+
+		pg_lock = PAGECACHE_LOCK(page);
+		if (!spin_trylock(pg_lock)) {
+			maxscan++;
+			spin_unlock(&pagemap_lru_lock);
+			goto repeat;
+		}
+
 		/* Wrong page on list?! (list corruption, should not happen) */
 		if (!PageInactiveClean(page)) {
 			printk("VM: reclaim_page, wrong page on list.\n");
 			list_del(page_lru);
 			page->zone->inactive_clean_pages--;
+			spin_unlock(pg_lock);
 			continue;
 		}
 
@@ -349,6 +360,7 @@
 				(!page->buffers && page_count(page) > 1)) {
 			del_page_from_inactive_clean_list(page);
 			add_page_to_active_list(page);
+			spin_unlock(pg_lock);
 			continue;
 		}
 
@@ -356,6 +368,7 @@
 		if (page->buffers || PageDirty(page) || TryLockPage(page)) {
 			del_page_from_inactive_clean_list(page);
 			add_page_to_inactive_dirty_list(page);
+			spin_unlock(pg_lock);
 			continue;
 		}
 
@@ -365,12 +378,18 @@
 			goto found_page;
 		}
 
-		if (page->mapping) {
+		mapping = page->mapping;
+		if (mapping) {
+			spin_lock(&mapping->page_lock);
 			__remove_inode_page(page);
+			spin_unlock(&mapping->page_lock);
 			goto found_page;
 		}
 
 		/* We should never ever get here. */
+		spin_unlock(pg_lock);
+
+	bogus_page:
 		printk(KERN_ERR "VM: reclaim_page, found unknown page\n");
 		list_del(page_lru);
 		zone->inactive_clean_pages--;
@@ -381,15 +400,16 @@
 	goto out;
 
 found_page:
+	spin_unlock(pg_lock);
 	del_page_from_inactive_clean_list(page);
 	UnlockPage(page);
 	page->age = PAGE_AGE_START;
 	if (page_count(page) != 1)
 		printk("VM: reclaim_page, found page with count %d!\n",
 				page_count(page));
+
 out:
 	spin_unlock(&pagemap_lru_lock);
-	spin_unlock(&pagecache_lock);
 	memory_pressure++;
 	return page;
 }
@@ -700,12 +720,8 @@
 		page = list_entry(page_lru, struct page, lru);
 
 		/* Wrong page on list?! (list corruption, should not happen) */
-		if (!PageActive(page)) {
-			printk("VM: refill_inactive, wrong page on list.\n");
-			list_del(page_lru);
-			nr_active_pages--;
-			continue;
-		}
+		if (!PageActive(page))
+			BUG();
 
 		/* Do aging on the pages. */
 		if (PageTestandClearReferenced(page)) {
@@ -1011,7 +1027,10 @@
 		 * we'll be woken up earlier...
 		 */
 		if (!free_shortage() || !inactive_shortage()) {
+repeat_sleep:
 			interruptible_sleep_on_timeout(&kswapd_wait, HZ);
+			if (!free_shortage() && !inactive_shortage())
+				goto repeat_sleep;
 		/*
 		 * If we couldn't free enough memory, we see if it was
 		 * due to the system just not having enough memory.
--- linux/mm/filemap.c.orig	Tue Mar 13 12:48:44 2001
+++ linux/mm/filemap.c	Tue Mar 13 12:48:48 2001
@@ -42,13 +42,31 @@
 
 atomic_t page_cache_size = ATOMIC_INIT(0);
 unsigned int page_hash_bits;
-struct page **page_hash_table;
+struct page_cache_bucket *page_hash_table;
 
-spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
-/*
- * NOTE: to avoid deadlocking you must never acquire the pagecache_lock with
- *       the pagemap_lru_lock held.
+/* Page-cache SMP locking rules:
+ *
+ * 1) The identity of a page (mapping, index) is only changed
+ *    under PAGECACHE_LOCK.
+ *
+ * 2) The deadlock-free ordering of lock acquisition is
+ *    PAGECACHE_LOCK ==> pagemap_lru_lock ==> mapping->page_lock
+ *    There are cases where two of these locks need to be held
+ *    simultaneously but cannot be obtained in the correct order.
+ *    The way to handle this situation is as follows:
+ *
+ *    repeat:
+ *	    spin_lock(&pagemap_lru_lock);
+ *	    some_loop_over_lru_pages() {
+ *	    ...
+ *		    if (!spin_trylock(PAGECACHE_LOCK(page))) {
+ *			    spin_unlock(&pagemap_lru_lock);
+ *			    goto repeat;
+ *		    }
+ *	    ...
+ *	    }
  */
+
 spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED;
 
 #define CLUSTER_PAGES		(1 << page_cluster)
@@ -72,15 +90,23 @@
 {
 	struct list_head *head = &mapping->clean_pages;
 
+	spin_lock(&mapping->page_lock);
 	mapping->nrpages++;
 	list_add(&page->list, head);
 	page->mapping = mapping;
+	spin_unlock(&mapping->page_lock);
 }
 
 static inline void remove_page_from_inode_queue(struct page * page)
 {
 	struct address_space * mapping = page->mapping;
 
+#if CONFIG_SMP
+	if (!spin_is_locked(PAGECACHE_LOCK(page)))
+		BUG();
+	if (!spin_is_locked(&mapping->page_lock))
+		BUG();
+#endif
 	mapping->nrpages--;
 	list_del(&page->list);
 	page->mapping = NULL;
@@ -113,12 +139,81 @@
 
 void remove_inode_page(struct page *page)
 {
+	struct address_space * mapping;
+	spinlock_t *pg_lock;
+
 	if (!PageLocked(page))
 		PAGE_BUG(page);
 
-	spin_lock(&pagecache_lock);
+	pg_lock = PAGECACHE_LOCK(page);
+
+	spin_lock(pg_lock);
+	mapping = page->mapping;
+	spin_lock(&mapping->page_lock);
 	__remove_inode_page(page);
-	spin_unlock(&pagecache_lock);
+	spin_unlock(&mapping->page_lock);
+	spin_unlock(pg_lock);
+}
+
+/*
+ * Flush clean pages from the pagecache.
+ */
+void flush_inode_pages (struct inode * inode)
+{
+	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	struct address_space * mapping = inode->i_mapping;
+	struct list_head *head, *curr;
+	struct page * page;
+
+retry:
+	head = &inode->i_mapping->clean_pages;
+	spin_lock(&pagemap_lru_lock);
+	spin_lock(&mapping->page_lock);
+	curr = head->next;
+
+	while (curr != head) {
+		spinlock_t *pg_lock;
+
+		page = list_entry(curr, struct page, list);
+		curr = curr->next;
+
+		if (page->index == end_index)
+			continue;
+		pg_lock = PAGECACHE_LOCK(page);
+		if (!spin_trylock(pg_lock)) {
+			spin_unlock(&mapping->page_lock);
+			spin_unlock(&pagemap_lru_lock);
+			goto retry;
+		}
+
+		/* We cannot invalidate a locked page */
+		if (TryLockPage(page)) {
+			spin_unlock(pg_lock);
+			continue;
+		}
+
+		/*
+		 * We cannot flush a page if buffers are still active.
+		 */
+		if (page->buffers) {
+			spin_unlock(pg_lock);
+			spin_unlock(&mapping->page_lock);
+			spin_unlock(&pagemap_lru_lock);
+			try_to_free_buffers(page, 2);
+			UnlockPage(page);
+			goto retry;
+		}
+
+		__lru_cache_del(page);
+		__remove_inode_page(page);
+		spin_unlock(pg_lock);
+
+		UnlockPage(page);
+		page_cache_release(page);
+	}
+
+	spin_unlock(&mapping->page_lock);
+	spin_unlock(&pagemap_lru_lock);
 }
 
 static inline int sync_page(struct page *page)
@@ -135,12 +230,20 @@
  */
 void __set_page_dirty(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping;
+	spinlock_t *pg_lock;
+
+	pg_lock = PAGECACHE_LOCK(page);
+	spin_lock(pg_lock);
+
+	mapping = page->mapping;
+	spin_lock(&mapping->page_lock);
 
-	spin_lock(&pagecache_lock);
 	list_del(&page->list);
 	list_add(&page->list, &mapping->dirty_pages);
-	spin_unlock(&pagecache_lock);
+
+	spin_unlock(&mapping->page_lock);
+	spin_unlock(pg_lock);
 
 	if (mapping->host)
 		mark_inode_dirty_pages(mapping->host);
@@ -156,39 +259,58 @@
 
 void invalidate_inode_pages(struct inode * inode)
 {
+	struct address_space * mapping = inode->i_mapping;
 	struct list_head *head, *curr;
 	struct page * page;
 
 	head = &inode->i_mapping->clean_pages;
 
-	spin_lock(&pagecache_lock);
-	spin_lock(&pagemap_lru_lock);
+retry:
+        spin_lock(&pagemap_lru_lock);
+	spin_lock(&mapping->page_lock);
 	curr = head->next;
 
 	while (curr != head) {
+		spinlock_t *pg_lock;
+
 		page = list_entry(curr, struct page, list);
+		pg_lock = PAGECACHE_LOCK(page);
+		if (!spin_trylock(pg_lock)) {
+			spin_unlock(&mapping->page_lock);
+			spin_unlock(&pagemap_lru_lock);
+			goto retry;
+		}
 		curr = curr->next;
 
 		/* We cannot invalidate something in use.. */
-		if (page_count(page) != 1)
+		if (page_count(page) != 1) {
+			spin_unlock(pg_lock);
 			continue;
+		}
 
 		/* ..or dirty.. */
-		if (PageDirty(page))
+		if (PageDirty(page)) {
+			spin_unlock(pg_lock);
 			continue;
+		}
 
 		/* ..or locked */
-		if (TryLockPage(page))
+		if (TryLockPage(page)) {
+			spin_unlock(pg_lock);
 			continue;
+		}
 
 		__lru_cache_del(page);
 		__remove_inode_page(page);
+		spin_unlock(pg_lock);
+
 		UnlockPage(page);
 		page_cache_release(page);
+
 	}
 
+	spin_unlock(&mapping->page_lock);
 	spin_unlock(&pagemap_lru_lock);
-	spin_unlock(&pagecache_lock);
 }
 
 static inline void truncate_partial_page(struct page *page, unsigned partial)
@@ -202,7 +324,6 @@
 
 static inline void truncate_complete_page(struct page *page)
 {
-	/* Leave it on the LRU if it gets converted into anonymous buffers */
 	if (!page->buffers || block_flushpage(page, 0))
 		lru_cache_del(page);
 
@@ -217,13 +338,12 @@
 	 * all sorts of fun problems ...  
 	 */
 	ClearPageDirty(page);
-	ClearPageUptodate(page);
 	remove_inode_page(page);
 	page_cache_release(page);
 }
 
-static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *));
-static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial)
+static int FASTCALL(truncate_list_pages(struct address_space * mapping, struct list_head *, unsigned long, unsigned *));
+static int truncate_list_pages(struct address_space * mapping, struct list_head *head, unsigned long start, unsigned *partial)
 {
 	struct list_head *curr;
 	struct page * page;
@@ -234,24 +354,35 @@
 
 		page = list_entry(curr, struct page, list);
 		curr = curr->next;
-		offset = page->index;
 
 		/* Is one of the pages to truncate? */
+		offset = page->index;
 		if ((offset >= start) || (*partial && (offset + 1) == start)) {
+			spinlock_t *pg_lock;
+
+			pg_lock = PAGECACHE_LOCK(page);
+
+			if (!spin_trylock(pg_lock)) {
+				spin_unlock(&mapping->page_lock);
+				return 1;
+			}
+
 			if (TryLockPage(page)) {
 				page_cache_get(page);
-				spin_unlock(&pagecache_lock);
+				spin_unlock(pg_lock);
+				spin_unlock(&mapping->page_lock);
 				wait_on_page(page);
 				page_cache_release(page);
 				return 1;
 			}
 			page_cache_get(page);
-			spin_unlock(&pagecache_lock);
+			spin_unlock(pg_lock);
+			spin_unlock(&mapping->page_lock);
 
 			if (*partial && (offset + 1) == start) {
 				truncate_partial_page(page, *partial);
 				*partial = 0;
-			} else 
+			} else
 				truncate_complete_page(page);
 
 			UnlockPage(page);
@@ -278,14 +409,14 @@
 	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
 
 repeat:
-	spin_lock(&pagecache_lock);
-	if (truncate_list_pages(&mapping->clean_pages, start, &partial))
+	spin_lock(&mapping->page_lock);
+	if (truncate_list_pages(mapping,&mapping->clean_pages, start, &partial))
 		goto repeat;
-	if (truncate_list_pages(&mapping->dirty_pages, start, &partial))
+	if (truncate_list_pages(mapping,&mapping->dirty_pages, start, &partial))
 		goto repeat;
-	if (truncate_list_pages(&mapping->locked_pages, start, &partial))
+	if (truncate_list_pages(mapping,&mapping->locked_pages,start, &partial))
 		goto repeat;
-	spin_unlock(&pagecache_lock);
+	spin_unlock(&mapping->page_lock);
 }
 
 /*
@@ -294,7 +425,7 @@
  * ideal for ->writepage() clustering and other places where you don't
  * want to mark the page referenced.
  *
- * The caller needs to hold the pagecache_lock.
+ * The caller needs to hold the page bucket lock.
  */
 struct page * __find_page_simple(struct address_space *mapping, unsigned long index)
 {
@@ -378,13 +509,13 @@
 	return error;
 }
 
-static int do_buffer_fdatasync(struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *))
+static int do_buffer_fdatasync(struct address_space * mapping, struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *))
 {
 	struct list_head *curr;
 	struct page *page;
 	int retval = 0;
 
-	spin_lock(&pagecache_lock);
+	spin_lock(&mapping->page_lock);
 	curr = head->next;
 	while (curr != head) {
 		page = list_entry(curr, struct page, list);
@@ -397,7 +528,7 @@
 			continue;
 
 		page_cache_get(page);
-		spin_unlock(&pagecache_lock);
+		spin_unlock(&mapping->page_lock);
 		lock_page(page);
 
 		/* The buffers could have been free'd while we waited for the page lock */
@@ -405,11 +536,11 @@
 			retval |= fn(page);
 
 		UnlockPage(page);
-		spin_lock(&pagecache_lock);
+		spin_lock(&mapping->page_lock);
 		curr = page->list.next;
 		page_cache_release(page);
 	}
-	spin_unlock(&pagecache_lock);
+	spin_unlock(&mapping->page_lock);
 
 	return retval;
 }
@@ -420,17 +551,18 @@
  */
 int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx)
 {
+	struct address_space * mapping = inode->i_mapping;
 	int retval;
 
 	/* writeout dirty buffers on pages from both clean and dirty lists */
-	retval = do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page);
-	retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, writeout_one_page);
-	retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, writeout_one_page);
+	retval = do_buffer_fdatasync(mapping, &mapping->dirty_pages, start_idx, end_idx, writeout_one_page);
+	retval |= do_buffer_fdatasync(mapping, &mapping->clean_pages, start_idx, end_idx, writeout_one_page);
+	retval |= do_buffer_fdatasync(mapping, &mapping->locked_pages, start_idx, end_idx, writeout_one_page);
 
 	/* now wait for locked buffers on pages from both clean and dirty lists */
-	retval |= do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, waitfor_one_page);
-	retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, waitfor_one_page);
-	retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, waitfor_one_page);
+	retval |= do_buffer_fdatasync(mapping, &mapping->dirty_pages, start_idx, end_idx, waitfor_one_page);
+	retval |= do_buffer_fdatasync(mapping, &mapping->clean_pages, start_idx, end_idx, waitfor_one_page);
+	retval |= do_buffer_fdatasync(mapping, &mapping->locked_pages, start_idx, end_idx, waitfor_one_page);
 
 	return retval;
 }
@@ -446,19 +578,29 @@
 {
 	int (*writepage)(struct page *) = mapping->a_ops->writepage;
 
-	spin_lock(&pagecache_lock);
+repeat:
+	spin_lock(&mapping->page_lock);
 
         while (!list_empty(&mapping->dirty_pages)) {
 		struct page *page = list_entry(mapping->dirty_pages.next, struct page, list);
+		spinlock_t *pg_lock;
 
+		pg_lock = PAGECACHE_LOCK(page);
+		if (!spin_trylock(pg_lock)) {
+			spin_unlock(&mapping->page_lock);
+			goto repeat;
+		}
 		list_del(&page->list);
 		list_add(&page->list, &mapping->locked_pages);
 
-		if (!PageDirty(page))
+		if (!PageDirty(page)) {
+			spin_unlock(pg_lock);
 			continue;
+		}
 
 		page_cache_get(page);
-		spin_unlock(&pagecache_lock);
+		spin_unlock(pg_lock);
+		spin_unlock(&mapping->page_lock);
 
 		lock_page(page);
 
@@ -469,9 +611,9 @@
 			UnlockPage(page);
 
 		page_cache_release(page);
-		spin_lock(&pagecache_lock);
+		spin_lock(&mapping->page_lock);
 	}
-	spin_unlock(&pagecache_lock);
+	spin_unlock(&mapping->page_lock);
 }
 
 /**
@@ -483,26 +625,36 @@
  */
 void filemap_fdatawait(struct address_space * mapping)
 {
-	spin_lock(&pagecache_lock);
+repeat:
+	spin_lock(&mapping->page_lock);
 
         while (!list_empty(&mapping->locked_pages)) {
 		struct page *page = list_entry(mapping->locked_pages.next, struct page, list);
+		spinlock_t *pg_lock;
 
+		pg_lock = PAGECACHE_LOCK(page);
+		if (!spin_trylock(pg_lock)) {
+			spin_unlock(&mapping->page_lock);
+			goto repeat;
+		}
 		list_del(&page->list);
 		list_add(&page->list, &mapping->clean_pages);
 
-		if (!PageLocked(page))
+		if (!PageLocked(page)) {
+			spin_unlock(pg_lock);
 			continue;
+		}
 
 		page_cache_get(page);
-		spin_unlock(&pagecache_lock);
+		spin_unlock(pg_lock);
+		spin_unlock(&mapping->page_lock);
 
 		___wait_on_page(page);
 
 		page_cache_release(page);
-		spin_lock(&pagecache_lock);
+		spin_lock(&mapping->page_lock);
 	}
-	spin_unlock(&pagecache_lock);
+	spin_unlock(&mapping->page_lock);
 }
 
 /*
@@ -513,16 +665,19 @@
  */
 void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index)
 {
+	spinlock_t *pg_lock;
+
 	if (!PageLocked(page))
 		BUG();
 
 	page_cache_get(page);
-	spin_lock(&pagecache_lock);
+	pg_lock = __PAGECACHE_LOCK(mapping, index);
+	spin_lock(pg_lock);
 	page->index = index;
 	add_page_to_inode_queue(mapping, page);
 	add_page_to_hash_queue(page, page_hash(mapping, index));
 	lru_cache_add(page);
-	spin_unlock(&pagecache_lock);
+	spin_unlock(pg_lock);
 }
 
 /*
@@ -549,9 +704,11 @@
 
 void add_to_page_cache(struct page * page, struct address_space * mapping, unsigned long offset)
 {
-	spin_lock(&pagecache_lock);
+	spinlock_t *pg_lock = __PAGECACHE_LOCK(mapping, offset);
+
+	spin_lock(pg_lock);
 	__add_to_page_cache(page, mapping, offset, page_hash(mapping, offset));
-	spin_unlock(&pagecache_lock);
+	spin_unlock(pg_lock);
 }
 
 static int add_to_page_cache_unique(struct page * page,
@@ -560,8 +717,11 @@
 {
 	int err;
 	struct page *alias;
+	spinlock_t *pg_lock;
+
+	pg_lock = __PAGECACHE_LOCK(mapping, offset);
+	spin_lock(pg_lock);
 
-	spin_lock(&pagecache_lock);
 	alias = __find_page_nolock(mapping, offset, *hash);
 
 	err = 1;
@@ -570,7 +730,8 @@
 		err = 0;
 	}
 
-	spin_unlock(&pagecache_lock);
+	spin_unlock(pg_lock);
+
 	return err;
 }
 
@@ -585,9 +746,9 @@
 	struct page **hash = page_hash(mapping, offset);
 	struct page *page; 
 
-	spin_lock(&pagecache_lock);
+	spin_lock(__PAGECACHE_LOCK(mapping, offset));
 	page = __find_page_nolock(mapping, offset, *hash); 
-	spin_unlock(&pagecache_lock);
+	spin_unlock(__PAGECACHE_LOCK(mapping, offset));
 	if (page)
 		return 0;
 
@@ -702,11 +863,11 @@
 	 * We scan the hash list read-only. Addition to and removal from
 	 * the hash-list needs a held write-lock.
 	 */
-	spin_lock(&pagecache_lock);
+	spin_lock(__PAGECACHE_LOCK(mapping, offset));
 	page = __find_page_nolock(mapping, offset, *hash);
 	if (page)
 		page_cache_get(page);
-	spin_unlock(&pagecache_lock);
+	spin_unlock(__PAGECACHE_LOCK(mapping, offset));
 	return page;
 }
 
@@ -723,11 +884,11 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
-	spin_lock(&pagecache_lock);
+	spin_lock(__PAGECACHE_LOCK(mapping, offset));
 	page = __find_page_nolock(mapping, offset, *hash);
 	if (page) {
 		page_cache_get(page);
-		spin_unlock(&pagecache_lock);
+		spin_unlock(__PAGECACHE_LOCK(mapping, offset));
 
 		lock_page(page);
 
@@ -740,7 +901,7 @@
 		page_cache_release(page);
 		goto repeat;
 	}
-	spin_unlock(&pagecache_lock);
+	spin_unlock(__PAGECACHE_LOCK(mapping, offset));
 	return NULL;
 }
 
@@ -784,14 +945,16 @@
 	 * been increased since the last time we were called, we
 	 * stop when the page isn't there.
 	 */
-	spin_lock(&pagecache_lock);
 	while (--index >= start) {
+		spin_lock(__PAGECACHE_LOCK(mapping, index));
 		page = __find_page_simple(mapping, index);
-		if (!page)
+		if (!page) {
+			spin_unlock(__PAGECACHE_LOCK(mapping, index));
 			break;
+		}
 		deactivate_page(page);
+		spin_unlock(__PAGECACHE_LOCK(mapping, index));
 	}
-	spin_unlock(&pagecache_lock);
 }
 
 /*
@@ -1036,7 +1199,7 @@
  * This is really ugly. But the goto's actually try to clarify some
  * of the logic when it comes to error handling etc.
  */
-void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor)
+void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor, int nonblock)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
@@ -1110,17 +1273,24 @@
 		 */
 		hash = page_hash(mapping, index);
 
-		spin_lock(&pagecache_lock);
+		spin_lock(__PAGECACHE_LOCK(mapping, index));
 		page = __find_page_nolock(mapping, index, *hash);
 		if (!page)
 			goto no_cached_page;
 found_page:
 		page_cache_get(page);
-		spin_unlock(&pagecache_lock);
+		spin_unlock(__PAGECACHE_LOCK(mapping, index));
 
-		if (!Page_Uptodate(page))
+		if (!Page_Uptodate(page)) {
+			if (nonblock) {
+				page_cache_release(page);
+				desc->error = -EWOULDBLOCKIO;
+				break;
+			}
 			goto page_not_up_to_date;
-		generic_file_readahead(reada_ok, filp, inode, page);
+		}
+		if (!nonblock)
+			generic_file_readahead(reada_ok, filp, inode, page);
 page_ok:
 		/* If users can be writing to this page using arbitrary
 		 * virtual addresses, take care about potential aliasing
@@ -1196,6 +1366,11 @@
 		break;
 
 no_cached_page:
+		if (nonblock) {
+			spin_unlock(__PAGECACHE_LOCK(mapping, index));
+			desc->error = -EWOULDBLOCKIO;
+			break;
+		}
 		/*
 		 * Ok, it wasn't cached, so we need to create a new
 		 * page..
@@ -1203,7 +1378,7 @@
 		 * We get here with the page cache lock held.
 		 */
 		if (!cached_page) {
-			spin_unlock(&pagecache_lock);
+			spin_unlock(__PAGECACHE_LOCK(mapping, index));
 			cached_page = page_cache_alloc(mapping);
 			if (!cached_page) {
 				desc->error = -ENOMEM;
@@ -1214,7 +1389,7 @@
 			 * Somebody may have added the page while we
 			 * dropped the page cache lock. Check for that.
 			 */
-			spin_lock(&pagecache_lock);
+			spin_lock(__PAGECACHE_LOCK(mapping, index));
 			page = __find_page_nolock(mapping, index, *hash);
 			if (page)
 				goto found_page;
@@ -1225,7 +1400,7 @@
 		 */
 		page = cached_page;
 		__add_to_page_cache(page, mapping, index, hash);
-		spin_unlock(&pagecache_lock);
+		spin_unlock(__PAGECACHE_LOCK(mapping, index));
 		cached_page = NULL;
 
 		goto readpage;
@@ -1279,7 +1454,7 @@
 			desc.count = count;
 			desc.buf = buf;
 			desc.error = 0;
-			do_generic_file_read(filp, ppos, &desc, file_read_actor);
+			do_generic_file_read(filp, ppos, &desc, file_read_actor, 0);
 
 			retval = desc.written;
 			if (!retval)
@@ -1289,7 +1464,7 @@
 	return retval;
 }
 
-static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
+int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
 {
 	ssize_t written;
 	unsigned long count = desc->count;
@@ -1382,7 +1557,7 @@
 		desc.count = count;
 		desc.buf = (char *) out_file;
 		desc.error = 0;
-		do_generic_file_read(in_file, ppos, &desc, file_send_actor);
+		do_generic_file_read(in_file, ppos, &desc, file_send_actor, 0);
 
 		retval = desc.written;
 		if (!retval)
@@ -2201,11 +2376,11 @@
 	struct address_space * as = &vma->vm_file->f_dentry->d_inode->i_data;
 	struct page * page, ** hash = page_hash(as, pgoff);
 
-	spin_lock(&pagecache_lock);
+	spin_lock(__PAGECACHE_LOCK(as, pgoff));
 	page = __find_page_nolock(as, pgoff, *hash);
 	if ((page) && (Page_Uptodate(page)))
 		present = 1;
-	spin_unlock(&pagecache_lock);
+	spin_unlock(__PAGECACHE_LOCK(as, pgoff));
 
 	return present;
 }
@@ -2510,7 +2685,7 @@
 	 * Check whether we've reached the file size limit.
 	 */
 	err = -EFBIG;
-	
+
 	if (limit != RLIM_INFINITY) {
 		if (pos >= limit) {
 			send_sig(SIGXFSZ, current, 0);
@@ -2669,21 +2844,21 @@
 
 void __init page_cache_init(unsigned long mempages)
 {
-	unsigned long htable_size, order;
+	unsigned long htable_size, order, i;
 
 	htable_size = mempages;
-	htable_size *= sizeof(struct page *);
+	htable_size *= sizeof(struct page_cache_bucket);
 	for(order = 0; (PAGE_SIZE << order) < htable_size; order++)
 		;
 
 	do {
-		unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct page *);
+		unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct page_cache_bucket);
 
 		page_hash_bits = 0;
 		while((tmp >>= 1UL) != 0UL)
 			page_hash_bits++;
 
-		page_hash_table = (struct page **)
+		page_hash_table = (struct page_cache_bucket *)
 			__get_free_pages(GFP_ATOMIC, order);
 	} while(page_hash_table == NULL && --order > 0);
 
@@ -2691,5 +2866,9 @@
 	       (1 << page_hash_bits), order, (PAGE_SIZE << order));
 	if (!page_hash_table)
 		panic("Failed to allocate page hash table\n");
-	memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *));
+
+	for (i = 0; i < PAGE_HASH_SIZE; i++) {
+		spin_lock_init(&page_hash_table[i].lock);
+		page_hash_table[i].chain = NULL;
+	}
 }
--- linux/mm/swap_state.c.orig	Sat Dec 30 00:04:27 2000
+++ linux/mm/swap_state.c	Tue Mar 13 12:48:48 2001
@@ -33,6 +33,7 @@
 	LIST_HEAD_INIT(swapper_space.dirty_pages),
 	LIST_HEAD_INIT(swapper_space.locked_pages),
 	0,				/* nrpages	*/
+	SPIN_LOCK_UNLOCKED,
 	&swap_aops,
 };
 
@@ -80,7 +81,9 @@
 
 	PageClearSwapCache(page);
 	ClearPageDirty(page);
+	spin_lock(&mapping->page_lock);
 	__remove_inode_page(page);
+	spin_unlock(&mapping->page_lock);
 }
 
 /*
@@ -106,16 +109,18 @@
  */
 void delete_from_swap_cache_nolock(struct page *page)
 {
+	spinlock_t *pg_lock;
+
 	if (!PageLocked(page))
 		BUG();
-
 	if (block_flushpage(page, 0))
 		lru_cache_del(page);
 
-	spin_lock(&pagecache_lock);
+	pg_lock = PAGECACHE_LOCK(page);
+	spin_lock(pg_lock);
 	ClearPageDirty(page);
 	__delete_from_swap_cache(page);
-	spin_unlock(&pagecache_lock);
+	spin_unlock(pg_lock);
 	page_cache_release(page);
 }
 
--- linux/mm/swap.c.orig	Tue Mar 13 12:48:34 2001
+++ linux/mm/swap.c	Tue Mar 13 12:48:48 2001
@@ -41,6 +41,7 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
+
 /*
  * This variable contains the amount of page steals the system
  * is doing, averaged over a minute. We use this to determine how
@@ -92,11 +93,12 @@
 	 */
 	if (!page->age)
 		activate_page_nolock(page);
-
-	/* The actual page aging bit */
-	page->age += PAGE_AGE_ADV;
-	if (page->age > PAGE_AGE_MAX)
-		page->age = PAGE_AGE_MAX;
+	else {
+		/* The actual page aging bit */
+		page->age += PAGE_AGE_ADV;
+		if (page->age > PAGE_AGE_MAX)
+			page->age = PAGE_AGE_MAX;
+	}
 }
 
 /*
@@ -248,6 +250,10 @@
  */
 void __lru_cache_del(struct page * page)
 {
+#if CONFIG_SMP
+	if (!spin_is_locked(&pagemap_lru_lock))
+		BUG();
+#endif
 	if (PageActive(page)) {
 		del_page_from_active_list(page);
 	} else if (PageInactiveDirty(page)) {
--- linux/include/linux/swap.h.orig	Tue Mar 13 12:48:44 2001
+++ linux/include/linux/swap.h	Tue Mar 13 12:48:48 2001
@@ -90,7 +90,6 @@
 extern struct address_space swapper_space;
 extern atomic_t page_cache_size;
 extern atomic_t buffermem_pages;
-extern spinlock_t pagecache_lock;
 extern void __remove_inode_page(struct page *);
 
 /* Incomplete types for prototype declarations: */
@@ -208,6 +207,10 @@
  * Since we do exponential decay of the page age, we
  * can chose a fairly large maximum.
  */
+#define DEFAULT_PAGE_AGE_START 1
+#define DEFAULT_PAGE_AGE_ADV 1
+#define DEFAULT_PAGE_AGE_MAX 8192
+
 #define PAGE_AGE_START 2
 #define PAGE_AGE_ADV 3
 #define PAGE_AGE_MAX 64
@@ -223,22 +226,22 @@
 #define ZERO_PAGE_BUG \
 	if (page_count(page) == 0) BUG();
 
-#define add_page_to_active_list(page) { \
+#define add_page_to_active_list(page) ({ \
 	DEBUG_ADD_PAGE \
 	ZERO_PAGE_BUG \
 	SetPageActive(page); \
 	list_add(&(page)->lru, &active_list); \
 	nr_active_pages++; \
-}
+})
 
-#define add_page_to_inactive_dirty_list(page) { \
+#define add_page_to_inactive_dirty_list(page) ({ \
 	DEBUG_ADD_PAGE \
 	ZERO_PAGE_BUG \
 	SetPageInactiveDirty(page); \
 	list_add(&(page)->lru, &inactive_dirty_list); \
 	nr_inactive_dirty_pages++; \
 	page->zone->inactive_dirty_pages++; \
-}
+})
 
 #define add_page_to_inactive_clean_list(page) { \
 	DEBUG_ADD_PAGE \
@@ -248,30 +251,30 @@
 	page->zone->inactive_clean_pages++; \
 }
 
-#define del_page_from_active_list(page) { \
+#define del_page_from_active_list(page) ({ \
 	list_del(&(page)->lru); \
 	ClearPageActive(page); \
 	nr_active_pages--; \
 	DEBUG_ADD_PAGE \
 	ZERO_PAGE_BUG \
-}
+})
 
-#define del_page_from_inactive_dirty_list(page) { \
+#define del_page_from_inactive_dirty_list(page) ({ \
 	list_del(&(page)->lru); \
 	ClearPageInactiveDirty(page); \
 	nr_inactive_dirty_pages--; \
 	page->zone->inactive_dirty_pages--; \
 	DEBUG_ADD_PAGE \
 	ZERO_PAGE_BUG \
-}
+})
 
-#define del_page_from_inactive_clean_list(page) { \
+#define del_page_from_inactive_clean_list(page) ({ \
 	list_del(&(page)->lru); \
 	ClearPageInactiveClean(page); \
 	page->zone->inactive_clean_pages--; \
 	DEBUG_ADD_PAGE \
 	ZERO_PAGE_BUG \
-}
+})
 
 /*
  * In mm/swap.c::recalculate_vm_stats(), we substract
--- linux/include/linux/errno.h.orig	Wed Mar 29 03:51:39 2000
+++ linux/include/linux/errno.h	Tue Mar 13 12:48:48 2001
@@ -21,6 +21,9 @@
 #define EBADTYPE	527	/* Type not supported by server */
 #define EJUKEBOX	528	/* Request initiated, but will not complete before timeout */
 
+/* Defined for TUX async IO */
+#define EWOULDBLOCKIO	530	/* Would block due to block-IO */
+
 #endif
 
 #endif
--- linux/include/linux/fs.h.orig	Tue Mar 13 12:48:44 2001
+++ linux/include/linux/fs.h	Tue Mar 13 12:48:48 2001
@@ -377,6 +377,8 @@
 	struct list_head	dirty_pages;	/* list of dirty pages */
 	struct list_head	locked_pages;	/* list of locked pages */
 	unsigned long		nrpages;	/* number of total pages */
+	spinlock_t		page_lock;	/* and spinlock protecting them */
+
 	struct address_space_operations *a_ops;	/* methods */
 	struct inode		*host;		/* owner: inode, block_device */
 	struct vm_area_struct	*i_mmap;	/* list of private mappings */
@@ -1302,6 +1304,7 @@
 extern int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 				unsigned long *);
 extern int block_sync_page(struct page *);
+extern void flush_inode_pages (struct inode * inode);
 
 int generic_block_bmap(struct address_space *, long, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
@@ -1311,7 +1314,7 @@
 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
 extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
-extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
+extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t, int);
 
 extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *);
 extern int generic_file_open(struct inode *, struct file *);
--- linux/include/linux/pagemap.h.orig	Tue Mar 13 12:48:44 2001
+++ linux/include/linux/pagemap.h	Tue Mar 13 12:48:48 2001
@@ -42,12 +42,17 @@
  */
 #define page_cache_entry(x)	virt_to_page(x)
 
+struct page_cache_bucket {
+	spinlock_t	lock;
+	struct page	*chain;
+} __attribute__((__aligned__(8)));
+
 extern unsigned int page_hash_bits;
 #define PAGE_HASH_BITS (page_hash_bits)
 #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS)
 
 extern atomic_t page_cache_size; /* # of pages currently in the hash table */
-extern struct page **page_hash_table;
+extern struct page_cache_bucket *page_hash_table;
 
 extern void page_cache_init(unsigned long);
 
@@ -69,7 +74,12 @@
 #undef s
 }
 
-#define page_hash(mapping,index) (page_hash_table+_page_hashfn(mapping,index))
+#define page_hash(mapping,index) \
+	&((page_hash_table+_page_hashfn(mapping,index))->chain)
+#define __PAGECACHE_LOCK(mapping,index) \
+	&((page_hash_table+_page_hashfn(mapping,index))->lock)
+#define PAGECACHE_LOCK(page) \
+	__PAGECACHE_LOCK((page)->mapping, (page)->index)
 
 extern struct page * __find_get_page(struct address_space *mapping,
 				     unsigned long offset, struct page **hash);
--- linux/net/khttpd/datasending.c.orig	Fri Nov 17 20:36:27 2000
+++ linux/net/khttpd/datasending.c	Tue Mar 13 12:48:48 2001
@@ -127,7 +127,7 @@
 				desc.count = ReadSize;
 				desc.buf = (char *) CurrentRequest->sock;
 				desc.error = 0;
-				do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor);
+				do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor, 0);
 				if (desc.written>0)
 				{	
 					CurrentRequest->BytesSent += desc.written;
--- linux/drivers/block/loop.c.orig	Tue Mar 13 12:48:40 2001
+++ linux/drivers/block/loop.c	Tue Mar 13 12:48:48 2001
@@ -268,7 +268,7 @@
 	spin_lock_irq(&lo->lo_lock);
 	file = lo->lo_backing_file;
 	spin_unlock_irq(&lo->lo_lock);
-	do_generic_file_read(file, &pos, &desc, lo_read_actor);
+	do_generic_file_read(file, &pos, &desc, lo_read_actor, 0);
 	return desc.error;
 }