--- linux-source-2.6.22-2.6.22.orig/scripts/kconfig/conf.c
+++ linux-source-2.6.22-2.6.22/scripts/kconfig/conf.c
@@ -64,7 +64,7 @@
}
}
-static void conf_askvalue(struct symbol *sym, const char *def)
+static int conf_askvalue(struct symbol *sym, const char *def)
{
enum symbol_type type = sym_get_type(sym);
tristate val;
@@ -79,7 +79,7 @@
printf("%s\n", def);
line[0] = '\n';
line[1] = 0;
- return;
+ return 0;
}
switch (input_mode) {
@@ -89,23 +89,23 @@
case set_random:
if (sym_has_value(sym)) {
printf("%s\n", def);
- return;
+ return 0;
}
break;
case ask_new:
case ask_silent:
if (sym_has_value(sym)) {
printf("%s\n", def);
- return;
+ return 0;
}
check_stdin();
case ask_all:
fflush(stdout);
fgets(line, 128, stdin);
- return;
+ return 1;
case set_default:
printf("%s\n", def);
- return;
+ return 1;
default:
break;
}
@@ -115,7 +115,7 @@
case S_HEX:
case S_STRING:
printf("%s\n", def);
- return;
+ return 1;
default:
;
}
@@ -166,6 +166,7 @@
break;
}
printf("%s", line);
+ return 1;
}
int conf_string(struct menu *menu)
@@ -179,7 +180,8 @@
def = sym_get_string_value(sym);
if (sym_get_string_value(sym))
printf("[%s] ", def);
- conf_askvalue(sym, def);
+ if (!conf_askvalue(sym, def))
+ return 0;
switch (line[0]) {
case '\n':
break;
@@ -236,7 +238,8 @@
if (sym->help)
printf("/?");
printf("] ");
- conf_askvalue(sym, sym_get_string_value(sym));
+ if (!conf_askvalue(sym, sym_get_string_value(sym)))
+ return 0;
strip(line);
switch (line[0]) {
--- linux-source-2.6.22-2.6.22.orig/MAINTAINERS
+++ linux-source-2.6.22-2.6.22/MAINTAINERS
@@ -3593,6 +3593,15 @@
W: http://www.kernel.dk
S: Maintained
+UNIONFS
+P: Erez Zadok
+M: ezk@cs.sunysb.edu
+P: Josef "Jeff" Sipek
+M: jsipek@cs.sunysb.edu
+L: unionfs@filesystems.org
+W: http://unionfs.filesystems.org
+S: Maintained
+
USB ACM DRIVER
P: Oliver Neukum
M: oliver@neukum.name
--- linux-source-2.6.22-2.6.22.orig/Makefile
+++ linux-source-2.6.22-2.6.22/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 22
-EXTRAVERSION =
+EXTRAVERSION = .9
NAME = Holy Dancing Manatees, Batman!
# *DOCUMENTATION*
--- linux-source-2.6.22-2.6.22.orig/init/initramfs.c
+++ linux-source-2.6.22-2.6.22/init/initramfs.c
@@ -541,6 +541,26 @@
#endif
+/* Tries to read the initramfs if it's already there, for ACPI Table Overiding */
+void __init early_populate_rootfs(void)
+{
+ char *err = unpack_to_rootfs(__initramfs_start,
+ __initramfs_end - __initramfs_start, 0);
+ if (err)
+ return;
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start) {
+ printk(KERN_INFO "Early unpacking initramfs...");
+ err = unpack_to_rootfs((char *)initrd_start,
+ initrd_end - initrd_start, 0);
+ if (err)
+ return;
+ printk(" done\n");
+ }
+#endif
+ return;
+}
+
static int __init populate_rootfs(void)
{
char *err = unpack_to_rootfs(__initramfs_start,
--- linux-source-2.6.22-2.6.22.orig/init/main.c
+++ linux-source-2.6.22-2.6.22/init/main.c
@@ -97,8 +97,10 @@
extern void free_initmem(void);
#ifdef CONFIG_ACPI
extern void acpi_early_init(void);
+extern void early_populate_rootfs(void);
#else
static inline void acpi_early_init(void) { }
+static inline void early_populate_rootfs(void) { }
#endif
#ifndef CONFIG_DEBUG_RODATA
static inline void mark_rodata_ro(void) { }
@@ -630,6 +632,7 @@
check_bugs();
+ early_populate_rootfs(); /* For DSDT override from initramfs */
acpi_early_init(); /* before LAPIC and SMP init */
/* Do the rest non-__init'ed, we're now alive */
--- linux-source-2.6.22-2.6.22.orig/init/version.c
+++ linux-source-2.6.22-2.6.22/init/version.c
@@ -36,7 +36,11 @@
/* FIXED STRINGS! Don't touch! */
const char linux_banner[] =
"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
- LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION
+#ifdef CONFIG_VERSION_SIGNATURE
+ " (" CONFIG_VERSION_SIGNATURE ")"
+#endif
+ "\n";
const char linux_proc_banner[] =
"%s version %s"
--- linux-source-2.6.22-2.6.22.orig/init/Kconfig
+++ linux-source-2.6.22-2.6.22/init/Kconfig
@@ -95,6 +95,15 @@
which is done within the script "scripts/setlocalversion".)
+config VERSION_SIGNATURE
+ string "Arbitrary version signature"
+ help
+ This string will be created in a file, /proc/version_signature. It
+ is useful in determining arbitrary data about your kernel. For instance,
+ if you have several kernels of the same version, but need to keep track
+ of a revision of the same kernel, but not affect it's ability to load
+ compatible modules, this is the easiest way to do that.
+
config SWAP
bool "Support for paging of anonymous memory (swap)"
depends on MMU && BLOCK
@@ -505,6 +514,7 @@
config TIMERFD
bool "Enable timerfd() system call" if EMBEDDED
depends on ANON_INODES
+ depends on BROKEN
default y
help
Enable the timerfd() system call that allows to receive timer
--- linux-source-2.6.22-2.6.22.orig/mm/filemap_xip.c
+++ linux-source-2.6.22-2.6.22/mm/filemap_xip.c
@@ -406,7 +406,7 @@
if (count == 0)
goto out_backing;
- ret = remove_suid(filp->f_path.dentry);
+ ret = remove_suid(&filp->f_path);
if (ret)
goto out_backing;
--- linux-source-2.6.22-2.6.22.orig/mm/hugetlb.c
+++ linux-source-2.6.22-2.6.22/mm/hugetlb.c
@@ -101,13 +101,20 @@
static int alloc_fresh_huge_page(void)
{
- static int nid = 0;
+ static int prev_nid;
struct page *page;
- page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
- HUGETLB_PAGE_ORDER);
- nid = next_node(nid, node_online_map);
+ static DEFINE_SPINLOCK(nid_lock);
+ int nid;
+
+ spin_lock(&nid_lock);
+ nid = next_node(prev_nid, node_online_map);
if (nid == MAX_NUMNODES)
nid = first_node(node_online_map);
+ prev_nid = nid;
+ spin_unlock(&nid_lock);
+
+ page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
+ HUGETLB_PAGE_ORDER);
if (page) {
set_compound_page_dtor(page, free_huge_page);
spin_lock(&hugetlb_lock);
--- linux-source-2.6.22-2.6.22.orig/mm/filemap.c
+++ linux-source-2.6.22-2.6.22/mm/filemap.c
@@ -1905,20 +1905,20 @@
}
EXPORT_SYMBOL(should_remove_suid);
-int __remove_suid(struct dentry *dentry, int kill)
+int __remove_suid(struct path *path, int kill)
{
struct iattr newattrs;
newattrs.ia_valid = ATTR_FORCE | kill;
- return notify_change(dentry, &newattrs);
+ return notify_change(path->dentry, path->mnt, &newattrs);
}
-int remove_suid(struct dentry *dentry)
+int remove_suid(struct path *path)
{
- int kill = should_remove_suid(dentry);
+ int kill = should_remove_suid(path->dentry);
if (unlikely(kill))
- return __remove_suid(dentry, kill);
+ return __remove_suid(path, kill);
return 0;
}
@@ -2146,22 +2146,9 @@
}
status = a_ops->prepare_write(file, page, offset, offset+bytes);
- if (unlikely(status)) {
- loff_t isize = i_size_read(inode);
+ if (unlikely(status))
+ goto fs_write_aop_error;
- if (status != AOP_TRUNCATED_PAGE)
- unlock_page(page);
- page_cache_release(page);
- if (status == AOP_TRUNCATED_PAGE)
- continue;
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again.
- */
- if (pos + bytes > isize)
- vmtruncate(inode, isize);
- break;
- }
if (likely(nr_segs == 1))
copied = filemap_copy_from_user(page, offset,
buf, bytes);
@@ -2170,41 +2157,54 @@
cur_iov, iov_base, bytes);
flush_dcache_page(page);
status = a_ops->commit_write(file, page, offset, offset+bytes);
- if (status == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
- continue;
+ if (unlikely(status < 0 || status == AOP_TRUNCATED_PAGE))
+ goto fs_write_aop_error;
+ if (unlikely(copied != bytes)) {
+ status = -EFAULT;
+ goto fs_write_aop_error;
}
zero_length_segment:
- if (likely(copied >= 0)) {
- if (!status)
- status = copied;
+ if (unlikely(status > 0)) /* filesystem did partial write */
+ copied = status;
- if (status >= 0) {
- written += status;
- count -= status;
- pos += status;
- buf += status;
- if (unlikely(nr_segs > 1)) {
- filemap_set_next_iovec(&cur_iov,
- &iov_base, status);
- if (count)
- buf = cur_iov->iov_base +
- iov_base;
- } else {
- iov_base += status;
- }
+ if (likely(copied >= 0)) {
+ written += copied;
+ count -= copied;
+ pos += copied;
+ buf += copied;
+ if (unlikely(nr_segs > 1)) {
+ filemap_set_next_iovec(&cur_iov,
+ &iov_base, copied);
+ if (count)
+ buf = cur_iov->iov_base + iov_base;
+ } else {
+ iov_base += copied;
}
}
- if (unlikely(copied != bytes))
- if (status >= 0)
- status = -EFAULT;
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
- if (status < 0)
- break;
balance_dirty_pages_ratelimited(mapping);
cond_resched();
+ continue;
+
+fs_write_aop_error:
+ if (status != AOP_TRUNCATED_PAGE)
+ unlock_page(page);
+ page_cache_release(page);
+
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (pos + bytes > inode->i_size)
+ vmtruncate(inode, inode->i_size);
+ if (status == AOP_TRUNCATED_PAGE)
+ continue;
+ else
+ break;
+
} while (count);
*ppos = pos;
@@ -2269,7 +2269,7 @@
if (count == 0)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = remove_suid(&file->f_path);
if (err)
goto out;
--- linux-source-2.6.22-2.6.22.orig/mm/mmap.c
+++ linux-source-2.6.22-2.6.22/mm/mmap.c
@@ -2157,7 +2157,7 @@
vma->vm_start = addr;
vma->vm_end = addr + len;
- vma->vm_flags = vm_flags | mm->def_flags;
+ vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
vma->vm_page_prot = protection_map[vma->vm_flags & 7];
vma->vm_ops = &special_mapping_vmops;
--- linux-source-2.6.22-2.6.22.orig/mm/shmem.c
+++ linux-source-2.6.22-2.6.22/mm/shmem.c
@@ -1051,7 +1051,7 @@
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
pvma.vm_pgoff = idx;
pvma.vm_end = PAGE_SIZE;
- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0);
+ page = alloc_page_vma(gfp, &pvma, 0);
mpol_free(pvma.vm_policy);
return page;
}
@@ -1071,7 +1071,7 @@
static inline struct page *
shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
{
- return alloc_page(gfp | __GFP_ZERO);
+ return alloc_page(gfp);
}
#endif
@@ -1280,6 +1280,7 @@
info->alloced++;
spin_unlock(&info->lock);
+ clear_highpage(filepage);
flush_dcache_page(filepage);
SetPageUptodate(filepage);
}
@@ -1518,7 +1519,7 @@
if (err || !count)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = remove_suid(&file->f_path);
if (err)
goto out;
--- linux-source-2.6.22-2.6.22.orig/mm/mlock.c
+++ linux-source-2.6.22-2.6.22/mm/mlock.c
@@ -244,9 +244,12 @@
locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ if (lock_limit == RLIM_INFINITY)
+ allowed = 1;
lock_limit >>= PAGE_SHIFT;
spin_lock(&shmlock_user_lock);
- if (locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
+ if (!allowed &&
+ locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
goto out;
get_uid(user);
user->locked_shm += locked;
--- linux-source-2.6.22-2.6.22.orig/mm/tiny-shmem.c
+++ linux-source-2.6.22-2.6.22/mm/tiny-shmem.c
@@ -86,7 +86,7 @@
file->f_mode = FMODE_WRITE | FMODE_READ;
/* notify everyone as to the change of file size */
- error = do_truncate(dentry, size, 0, file);
+ error = do_truncate(dentry, file->f_path.mnt, size, 0, file);
if (error < 0)
goto close_file;
--- linux-source-2.6.22-2.6.22.orig/mm/vmalloc.c
+++ linux-source-2.6.22-2.6.22/mm/vmalloc.c
@@ -578,9 +578,9 @@
}
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 GFP_DMA32
+#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 GFP_DMA
+#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
#else
#define GFP_VMALLOC32 GFP_KERNEL
#endif
--- linux-source-2.6.22-2.6.22.orig/mm/readahead.c
+++ linux-source-2.6.22-2.6.22/mm/readahead.c
@@ -21,8 +21,16 @@
}
EXPORT_SYMBOL(default_unplug_io_fn);
+/*
+ * Convienent macros for min/max read-ahead pages.
+ * Note that MAX_RA_PAGES is rounded down, while MIN_RA_PAGES is rounded up.
+ * The latter is necessary for systems with large page size(i.e. 64k).
+ */
+#define MAX_RA_PAGES (VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE)
+#define MIN_RA_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE)
+
struct backing_dev_info default_backing_dev_info = {
- .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
+ .ra_pages = MAX_RA_PAGES,
.state = 0,
.capabilities = BDI_CAP_MAP_COPY,
.unplug_io_fn = default_unplug_io_fn,
@@ -51,7 +59,7 @@
static inline unsigned long get_min_readahead(struct file_ra_state *ra)
{
- return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ return MIN_RA_PAGES;
}
static inline void reset_ahead_window(struct file_ra_state *ra)
--- linux-source-2.6.22-2.6.22.orig/Documentation/filesystems/unionfs/usage.txt
+++ linux-source-2.6.22-2.6.22/Documentation/filesystems/unionfs/usage.txt
@@ -0,0 +1,97 @@
+Unionfs is a stackable unification file system, which can appear to merge
+the contents of several directories (branches), while keeping their physical
+content separate. Unionfs is useful for unified source tree management,
+merged contents of split CD-ROM, merged separate software package
+directories, data grids, and more. Unionfs allows any mix of read-only and
+read-write branches, as well as insertion and deletion of branches anywhere
+in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
+duplicates, partial-error conditions, and more.
+
+# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT
+
+The available branch-option for the mount command is:
+
+ dirs=branch[=ro|=rw][:...]
+
+specifies a separated list of which directories compose the union.
+Directories that come earlier in the list have a higher precedence than
+those which come later. Additionally, read-only or read-write permissions of
+the branch can be specified by appending =ro or =rw (default) to each
+directory.
+
+Syntax:
+
+ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
+
+Example:
+
+ dirs=/writable_branch=rw:/read-only_branch=ro
+
+
+DYNAMIC BRANCH MANAGEMENT AND REMOUNTS
+======================================
+
+You can remount a union and change its overall mode, or reconfigure the
+branches, as follows.
+
+To downgrade a union from read-write to read-only:
+
+# mount -t unionfs -o remount,ro none MOUNTPOINT
+
+To upgrade a union from read-only to read-write:
+
+# mount -t unionfs -o remount,rw none MOUNTPOINT
+
+To delete a branch /foo, regardless where it is in the current union:
+
+# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
+
+To insert (add) a branch /foo before /bar:
+
+# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
+
+To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
+
+# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
+
+To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
+new highest-priority branch), you can use the above syntax, or use a short
+hand version as follows:
+
+# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
+
+To append a branch to the very end (new lowest-priority branch):
+
+# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
+
+To append a branch to the very end (new lowest-priority branch), in
+read-only mode:
+
+# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
+
+Finally, to change the mode of one existing branch, say /foo, from read-only
+to read-write, and change /bar from read-write to read-only:
+
+# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
+
+
+CACHE CONSISTENCY
+=================
+
+If you modify any file on any of the lower branches directly, while there is
+a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs
+to purge its caches and re-get the objects. To do that, you have to
+increment the generation number of the superblock using the following
+command:
+
+# mount -t unionfs -o remount,incgen none MOUNTPOINT
+
+Note that the older way of incrementing the generation number using an
+ioctl, is no longer supported in Unionfs 2.0. Ioctls in general are not
+encouraged. Plus, an ioctl is per-file concept, whereas the generation
+number is a per-file-system concept. Worse, such an ioctl requires an open
+file, which then has to be invalidated by the very nature of the generation
+number increase (read: the old generation increase ioctl was pretty racy).
+
+
+For more information, see .
--- linux-source-2.6.22-2.6.22.orig/Documentation/filesystems/unionfs/issues.txt
+++ linux-source-2.6.22-2.6.22/Documentation/filesystems/unionfs/issues.txt
@@ -0,0 +1,12 @@
+KNOWN Unionfs 2.1 ISSUES:
+=========================
+
+1. Unionfs should not use lookup_one_len() on the underlying f/s as it
+ confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
+ lower file-system, this eliminates part of the problem. The remaining
+ calls to lookup_one_len may need to be changed to pass an intent. We are
+ currently introducing VFS changes to fs/namei.c's do_path_lookup() to
+ allow proper file lookup and opening in stackable file systems.
+
+
+For more information, see .
--- linux-source-2.6.22-2.6.22.orig/Documentation/filesystems/unionfs/00-INDEX
+++ linux-source-2.6.22-2.6.22/Documentation/filesystems/unionfs/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+ - this file.
+concepts.txt
+ - A brief introduction of concepts.
+issues.txt
+ - A summary of known issues with unionfs.
+rename.txt
+ - Information regarding rename operations.
+usage.txt
+ - Usage information and examples.
--- linux-source-2.6.22-2.6.22.orig/Documentation/filesystems/unionfs/rename.txt
+++ linux-source-2.6.22-2.6.22/Documentation/filesystems/unionfs/rename.txt
@@ -0,0 +1,31 @@
+Rename is a complex beast. The following table shows which rename(2) operations
+should succeed and which should fail.
+
+o: success
+E: error (either unionfs or vfs)
+X: EXDEV
+
+none = file does not exist
+file = file is a file
+dir = file is a empty directory
+child= file is a non-empty directory
+wh = file is a directory containing only whiteouts; this makes it logically
+ empty
+
+ none file dir child wh
+file o o E E E
+dir o E o E o
+child X E X E X
+wh o E o E o
+
+
+Renaming directories:
+=====================
+
+Whenever a empty (either physically or logically) directory is being renamed,
+the following sequence of events should take place:
+
+1) Remove whiteouts from both source and destination directory
+2) Rename source to destination
+3) Make destination opaque to prevent anything under it from showing up
+
--- linux-source-2.6.22-2.6.22.orig/Documentation/filesystems/unionfs/concepts.txt
+++ linux-source-2.6.22-2.6.22/Documentation/filesystems/unionfs/concepts.txt
@@ -0,0 +1,181 @@
+Unionfs 2.0 CONCEPTS:
+=====================
+
+This file describes the concepts needed by a namespace unification file
+system.
+
+
+Branch Priority:
+================
+
+Each branch is assigned a unique priority - starting from 0 (highest
+priority). No two branches can have the same priority.
+
+
+Branch Mode:
+============
+
+Each branch is assigned a mode - read-write or read-only. This allows
+directories on media mounted read-write to be used in a read-only manner.
+
+
+Whiteouts:
+==========
+
+A whiteout removes a file name from the namespace. Whiteouts are needed when
+one attempts to remove a file on a read-only branch.
+
+Suppose we have a two-branch union, where branch 0 is read-write and branch
+1 is read-only. And a file 'foo' on branch 1:
+
+./b0/
+./b1/
+./b1/foo
+
+The unified view would simply be:
+
+./union/
+./union/foo
+
+Since 'foo' is stored on a read-only branch, it cannot be removed. A
+whiteout is used to remove the name 'foo' from the unified namespace. Again,
+since branch 1 is read-only, the whiteout cannot be created there. So, we
+try on a higher priority (lower numerically) branch and create the whiteout
+there.
+
+./b0/
+./b0/.wh.foo
+./b1/
+./b1/foo
+
+Later, when Unionfs traverses branches (due to lookup or readdir), it
+eliminate 'foo' from the namespace (as well as the whiteout itself.)
+
+
+Duplicate Elimination:
+======================
+
+It is possible for files on different branches to have the same name.
+Unionfs then has to select which instance of the file to show to the user.
+Given the fact that each branch has a priority associated with it, the
+simplest solution is to take the instance from the highest priority
+(numerically lowest value) and "hide" the others.
+
+
+Copyup:
+=======
+
+When a change is made to the contents of a file's data or meta-data, they
+have to be stored somewhere. The best way is to create a copy of the
+original file on a branch that is writable, and then redirect the write
+though to this copy. The copy must be made on a higher priority branch so
+that lookup and readdir return this newer "version" of the file rather than
+the original (see duplicate elimination).
+
+
+Cache Coherency:
+================
+
+Unionfs users often want to be able to modify files and directories directly
+on the lower branches, and have those changes be visible at the Unionfs
+level. This means that data (e.g., pages) and meta-data (dentries, inodes,
+open files, etc.) have to be synchronized between the upper and lower
+layers. In other words, the newest changes from a layer below have to be
+propagated to the Unionfs layer above. If the two layers are not in sync, a
+cache incoherency ensues, which could lead to application failures and even
+oopses. The Linux kernel, however, has a rather limited set of mechanisms
+to ensure this inter-layer cache coherency---so Unionfs has to do most of
+the hard work on its own.
+
+Maintaining Invariants:
+
+The way Unionfs ensures cache coherency is as follows. At each entry point
+to a Unionfs file system method, we call a utility function to validate the
+primary objects of this method. Generally, we call unionfs_file_revalidate
+on open files, and __Unionfs_d_revalidate_chain on dentries (which also
+validates inodes). These utility functions check to see whether the upper
+Unionfs object is in sync with any of the lower objects that it represents.
+The checks we perform include whether the Unionfs superblock has a newer
+generation number, or if any of the lower objects mtime's or ctime's are
+newer. (Note: generation numbers change when branch-management commands are
+issued, so in a way, maintaining cache coherency is also very important for
+branch-management.) If indeed we determine that any Unionfs object is no
+longer in sync with its lower counterparts, then we rebuild that object
+similarly to how we do so for branch-management.
+
+While rebuilding Unionfs's objects, we also purge any page mappings and
+truncate inode pages (see fs/Unionfs/dentry.c:purge_inode_data). This is to
+ensure that Unionfs will re-get the newer data from the lower branches. We
+perform this purging only if the Unionfs operation in question is a reading
+operation; if Unionfs is performing a data writing operation (e.g., ->write,
+->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
+because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
+considered more authoritative anyway, as they are newer and will overwrite
+any lower pages.
+
+Unionfs maintains the following important invariant regarding mtime's,
+ctime's, and atime's: the upper inode object's times are the max() of all of
+the lower ones. For non-directory objects, there's only one object below,
+so the mapping is simple; for directory objects, there could me multiple
+lower objects and we have to sync up with the newest one of all the lower
+ones. This invariant is important to maintain, especially for directories
+(besides, we need this to be POSIX compliant). A union could comprise
+multiple writable branches, each of which could change. If we don't reflect
+the newest possible mtime/ctime, some applications could fail. For example,
+NFSv2/v3 exports check for newer directory mtimes on the server to determine
+if the client-side attribute cache should be purged.
+
+To maintain these important invariants, of course, Unionfs carefully
+synchronizes upper and lower times in various places. For example, if we
+copy-up a file to a top-level branch, the parent directory where the file
+was copied up to will now have a new mtime: so after a successful copy-up,
+we sync up with the new top-level branch's parent directory mtime.
+
+Implementation:
+
+This cache-coherency implementation is efficient because it defers any
+synchronizing between the upper and lower layers until absolutely needed.
+Consider the example a common situation where users perform a lot of lower
+changes, such as untarring a whole package. While these take place,
+typically the user doesn't access the files via Unionfs; only after the
+lower changes are done, does the user try to access the lower files. With
+our cache-coherency implementation, the entirety of the changes to the lower
+branches will not result in a single CPU cycle spent at the Unionfs level
+until the user invokes a system call that goes through Unionfs.
+
+We have considered two alternate cache-coherency designs. (1) Using the
+dentry/inode notify functionality to register interest in finding out about
+any lower changes. This is a somewhat limited and also a heavy-handed
+approach which could result in many notifications to the Unionfs layer upon
+each small change at the lower layer (imagine a file being modified multiple
+times in rapid succession). (2) Rewriting the VFS to support explicit
+callbacks from lower objects to upper objects. We began exploring such an
+implementation, but found it to be very complicated--it would have resulted
+in massive VFS/MM changes which are unlikely to be accepted by the LKML
+community. We therefore believe that our current cache-coherency design and
+implementation represent the best approach at this time.
+
+Limitations:
+
+Our implementation works in that as long as a user process will have caused
+Unionfs to be called, directly or indirectly, even to just do
+->d_revalidate; then we will have purged the current Unionfs data and the
+process will see the new data. For example, a process that continually
+re-reads the same file's data will see the NEW data as soon as the lower
+file had changed, upon the next read(2) syscall (even if the file is still
+open!) However, this doesn't work when the process re-reads the open file's
+data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
+it). Once we respond to ->readpage(s), then the kernel maps the page into
+the process's address space and there doesn't appear to be a way to force
+the kernel to invalidate those pages/mappings, and force the process to
+re-issue ->readpage. If there's a way to invalidate active mappings and
+force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
+the trick).
+
+Our current Unionfs code has to perform many file-revalidation calls. It
+would be really nice if the VFS would export an optional file system hook
+->file_revalidate (similarly to dentry->d_revalidate) that will be called
+before each VFS op that has a "struct file" in it.
+
+
+For more information, see .
--- linux-source-2.6.22-2.6.22.orig/Documentation/filesystems/00-INDEX
+++ linux-source-2.6.22-2.6.22/Documentation/filesystems/00-INDEX
@@ -84,6 +84,8 @@
- info and mount options for the UDF filesystem.
ufs.txt
- info on the ufs filesystem.
+unionfs/
+ - info on the unionfs filesystem
vfat.txt
- info on using the VFAT filesystem used in Windows NT and Windows 95
vfs.txt
--- linux-source-2.6.22-2.6.22.orig/Documentation/dvb/get_dvb_firmware
+++ linux-source-2.6.22-2.6.22/Documentation/dvb/get_dvb_firmware
@@ -56,7 +56,7 @@
sub sp8870 {
my $sourcefile = "tt_Premium_217g.zip";
- my $url = "http://www.technotrend.de/new/217g/$sourcefile";
+ my $url = "http://www.softwarepatch.pl/9999ccd06a4813cb827dbb0005071c71/$sourcefile";
my $hash = "53970ec17a538945a6d8cb608a7b3899";
my $outfile = "dvb-fe-sp8870.fw";
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
@@ -110,21 +110,21 @@
}
sub tda10046 {
- my $sourcefile = "tt_budget_217g.zip";
- my $url = "http://www.technotrend.de/new/217g/$sourcefile";
- my $hash = "6a7e1e2f2644b162ff0502367553c72d";
- my $outfile = "dvb-fe-tda10046.fw";
- my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
+ my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip";
+ my $url = "http://technotrend-online.com/download/software/219/$sourcefile";
+ my $hash = "6a7e1e2f2644b162ff0502367553c72d";
+ my $outfile = "dvb-fe-tda10046.fw";
+ my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
- checkstandard();
+ checkstandard();
- wgetfile($sourcefile, $url);
- unzip($sourcefile, $tmpdir);
- extract("$tmpdir/software/OEM/PCI/App/ttlcdacc.dll", 0x3f731, 24478, "$tmpdir/fwtmp");
- verify("$tmpdir/fwtmp", $hash);
- copy("$tmpdir/fwtmp", $outfile);
+ wgetfile($sourcefile, $url);
+ unzip($sourcefile, $tmpdir);
+ extract("$tmpdir/TT_PCI_2.19h_28_11_2006/software/OEM/PCI/App/ttlcdacc.dll", 0x65389, 24478, "$tmpdir/fwtmp");
+ verify("$tmpdir/fwtmp", $hash);
+ copy("$tmpdir/fwtmp", $outfile);
- $outfile;
+ $outfile;
}
sub tda10046lifeview {
--- linux-source-2.6.22-2.6.22.orig/Documentation/dsdt-initrd.txt
+++ linux-source-2.6.22-2.6.22/Documentation/dsdt-initrd.txt
@@ -0,0 +1,98 @@
+ACPI Custom DSDT read from initramfs
+
+2003 by Markuss Gaugusch < dsdt at gaugusch dot org >
+Special thanks go to Thomas Renninger from SuSE, who updated the patch for
+2.6.0 and later modified it to read inside initramfs
+2004 - 2007 maintained by Eric Piel < eric dot piel at tremplin-utc dot net >
+
+This option is intended for people who would like to hack their DSDT and don't want
+to recompile their kernel after every change. It can also be useful to distros
+which offers pre-compiled kernels and want to allow their users to use a
+modified DSDT. In the Kernel config, enable the initial RAM filesystem support
+(in Device Drivers|Block Devices) and enable ACPI_CUSTOM_DSDT_INITRD at the ACPI
+options (General Setup|ACPI Support|Read custom DSDT from initrd).
+
+A custom DSDT (Differentiated System Description Table) is useful when your
+computer uses ACPI but problems occur due to broken implementation. Typically,
+your computer works but there are some troubles with the hardware detection or
+the power management. You can check that troubles come from errors in the DSDT by
+activating the ACPI debug option and reading the logs. This table is provided
+by the BIOS, therefore it might be a good idea to check for BIOS update on your
+vendor website before going any further. Errors are often caused by vendors
+testing their hardware only with Windows or because there is code which is
+executed only on a specific OS with a specific version and Linux hasn't been
+considered during the development.
+
+Before you run away from customising your DSDT, you should note that already
+corrected tables are available for a fair amount of computers on this web-page:
+http://acpi.sf.net/dsdt . If you are part of the unluckies who cannot find
+their hardware in this database, you can modify your DSDT by yourself. This
+process is less painful than it sounds. Download the Intel ASL
+compiler/decompiler at http://www.intel.com/technology/IAPC/acpi/downloads.htm .
+As root, you then have to dump your DSDT and decompile it. By using the
+compiler messages as well as the kernel ACPI debug messages and the reference book
+(available at the Intel website and also at http://www.acpi.info), it is quite
+easy to obtain a fully working table.
+
+Once your new DSDT is ready you'll have to add it to an initrd so that the
+kernel can read the table at the very beginning of the boot. As the file has
+to be accessed very early during the boot process the initrd has to be an
+initramfs. The file is contained into the initramfs under the name /DSDT.aml .
+To obtain such an initrd, you might have to modify your mkinitrd script or you
+can add it later to the initrd with the script appended to this document. The
+command will look like:
+initrd-add-dsdt initrd.img my-dsdt.aml
+
+In case you don't use any initrd, the possibilities you have are to either start
+using one (try mkinitrd or yaird), or use the "Include Custom DSDT" configure
+option to directly include your DSDT inside the kernel.
+
+The message "Looking for DSDT in initramfs..." will tell you if the DSDT was
+found or not. If you need to update your DSDT, generate a new initrd and
+perform the steps above. Don't forget that with Lilo, you'll have to re-run it.
+
+
+======================= Here starts initrd-add-dsdt ===============================
+#!/bin/bash
+# Adds a DSDT file to the initrd (if it's an initramfs)
+# first argument is the name of archive
+# second argurment is the name of the file to add
+# The file will be copied as /DSDT.aml
+
+# 20060126: fix "Premature end of file" with some old cpio (Roland Robic)
+# 20060205: this time it should really work
+
+# check the arguments
+if [ $# -ne 2 ]; then
+ program_name=$(basename $0)
+ echo "\
+$program_name: too few arguments
+Usage: $program_name initrd-name.img DSDT-to-add.aml
+Adds a DSDT file to an initrd (in initramfs format)
+
+ initrd-name.img: filename of the initrd in initramfs format
+ DSDT-to-add.aml: filename of the DSDT file to add
+ " 1>&2
+ exit 1
+fi
+
+# we should check it's an initramfs
+
+tempcpio=$(mktemp -d)
+# cleanup on exit, hangup, interrupt, quit, termination
+trap 'rm -rf $tempcpio' 0 1 2 3 15
+
+# extract the archive
+gunzip -c "$1" > "$tempcpio"/initramfs.cpio || exit 1
+
+# copy the DSDT file at the root of the directory so that we can call it "/DSDT.aml"
+cp -f "$2" "$tempcpio"/DSDT.aml
+
+# add the file
+cd "$tempcpio"
+(echo DSDT.aml | cpio --quiet -H newc -o -A -O "$tempcpio"/initramfs.cpio) || exit 1
+cd "$OLDPWD"
+
+# re-compress the archive
+gzip -c "$tempcpio"/initramfs.cpio > "$1"
+
--- linux-source-2.6.22-2.6.22.orig/Documentation/kernel-parameters.txt
+++ linux-source-2.6.22-2.6.22/Documentation/kernel-parameters.txt
@@ -850,11 +850,6 @@
lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip
Format: addr:,irq:
- legacy_serial.force [HW,IA-32,X86-64]
- Probe for COM ports at legacy addresses even
- if PNPBIOS or ACPI should describe them. This
- is for working around firmware defects.
-
llsc*= [IA64] See function print_params() in
arch/ia64/sn/kernel/llsc4.c.
@@ -1312,6 +1307,8 @@
Mechanism 1.
conf2 [IA-32] Force use of PCI Configuration
Mechanism 2.
+ mmconf [IA-32,X86_64] Enable use of MMCONFIG for PCI
+ Configuration
nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI
Configuration
nomsi [MSI] If the PCI_MSI kernel config parameter is
--- linux-source-2.6.22-2.6.22.orig/crypto/blkcipher.c
+++ linux-source-2.6.22-2.6.22/crypto/blkcipher.c
@@ -59,11 +59,13 @@
scatterwalk_unmap(walk->dst.virt.addr, 1);
}
+/* Get a spot of the specified length that does not straddle a page.
+ * The caller needs to ensure that there is enough space for this operation.
+ */
static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
{
- if (offset_in_page(start + len) < len)
- return (u8 *)((unsigned long)(start + len) & PAGE_MASK);
- return start;
+ u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
+ return start > end_page ? start : end_page;
}
static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm,
@@ -155,7 +157,8 @@
if (walk->buffer)
goto ok;
- n = bsize * 2 + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+ n = bsize * 3 - (alignmask + 1) +
+ (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
walk->buffer = kmalloc(n, GFP_ATOMIC);
if (!walk->buffer)
return blkcipher_walk_done(desc, walk, -ENOMEM);
--- linux-source-2.6.22-2.6.22.orig/sound/oss/via82cxxx_audio.c
+++ linux-source-2.6.22-2.6.22/sound/oss/via82cxxx_audio.c
@@ -2104,6 +2104,7 @@
{
struct via_info *card = vma->vm_private_data;
struct via_channel *chan = &card->ch_out;
+ unsigned long max_bufs;
struct page *dmapage;
unsigned long pgoff;
int rd, wr;
@@ -2127,14 +2128,11 @@
rd = card->ch_in.is_mapped;
wr = card->ch_out.is_mapped;
-#ifndef VIA_NDEBUG
- {
- unsigned long max_bufs = chan->frag_number;
- if (rd && wr) max_bufs *= 2;
- /* via_dsp_mmap() should ensure this */
- assert (pgoff < max_bufs);
- }
-#endif
+ max_bufs = chan->frag_number;
+ if (rd && wr)
+ max_bufs *= 2;
+ if (pgoff >= max_bufs)
+ return NOPAGE_SIGBUS;
/* if full-duplex (read+write) and we have two sets of bufs,
* then the playback buffers come first, sez soundcard.c */
--- linux-source-2.6.22-2.6.22.orig/sound/core/seq/oss/seq_oss_synth.c
+++ linux-source-2.6.22-2.6.22/sound/core/seq/oss/seq_oss_synth.c
@@ -599,6 +599,9 @@
{
struct seq_oss_synth *rec;
+ if (dev < 0 || dev >= dp->max_synthdev)
+ return -ENXIO;
+
if (dp->synths[dev].is_midi) {
struct midi_info minf;
snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf);
--- linux-source-2.6.22-2.6.22.orig/sound/core/memalloc.c
+++ linux-source-2.6.22-2.6.22/sound/core/memalloc.c
@@ -27,6 +27,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -481,53 +482,54 @@
#define SND_MEM_PROC_FILE "driver/snd-page-alloc"
static struct proc_dir_entry *snd_mem_proc;
-static int snd_mem_proc_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int snd_mem_proc_read(struct seq_file *seq, void *offset)
{
- int len = 0;
long pages = snd_allocated_pages >> (PAGE_SHIFT-12);
struct snd_mem_list *mem;
int devno;
static char *types[] = { "UNKNOWN", "CONT", "DEV", "DEV-SG", "SBUS" };
mutex_lock(&list_mutex);
- len += snprintf(page + len, count - len,
- "pages : %li bytes (%li pages per %likB)\n",
- pages * PAGE_SIZE, pages, PAGE_SIZE / 1024);
+ seq_printf(seq, "pages : %li bytes (%li pages per %likB)\n",
+ pages * PAGE_SIZE, pages, PAGE_SIZE / 1024);
devno = 0;
list_for_each_entry(mem, &mem_list_head, list) {
devno++;
- len += snprintf(page + len, count - len,
- "buffer %d : ID %08x : type %s\n",
- devno, mem->id, types[mem->buffer.dev.type]);
- len += snprintf(page + len, count - len,
- " addr = 0x%lx, size = %d bytes\n",
- (unsigned long)mem->buffer.addr, (int)mem->buffer.bytes);
+ seq_printf(seq, "buffer %d : ID %08x : type %s\n",
+ devno, mem->id, types[mem->buffer.dev.type]);
+ seq_printf(seq, " addr = 0x%lx, size = %d bytes\n",
+ (unsigned long)mem->buffer.addr,
+ (int)mem->buffer.bytes);
}
mutex_unlock(&list_mutex);
- return len;
+ return 0;
+}
+
+static int snd_mem_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, snd_mem_proc_read, NULL);
}
/* FIXME: for pci only - other bus? */
#ifdef CONFIG_PCI
#define gettoken(bufp) strsep(bufp, " \t\n")
-static int snd_mem_proc_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+static ssize_t snd_mem_proc_write(struct file *file, const char __user * buffer,
+ size_t count, loff_t * ppos)
{
char buf[128];
char *token, *p;
- if (count > ARRAY_SIZE(buf) - 1)
- count = ARRAY_SIZE(buf) - 1;
+ if (count > sizeof(buf) - 1)
+ return -EINVAL;
if (copy_from_user(buf, buffer, count))
return -EFAULT;
- buf[ARRAY_SIZE(buf) - 1] = '\0';
+ buf[count] = '\0';
p = buf;
token = gettoken(&p);
if (! token || *token == '#')
- return (int)count;
+ return count;
if (strcmp(token, "add") == 0) {
char *endp;
int vendor, device, size, buffers;
@@ -548,7 +550,7 @@
(buffers = simple_strtol(token, NULL, 0)) <= 0 ||
buffers > 4) {
printk(KERN_ERR "snd-page-alloc: invalid proc write format\n");
- return (int)count;
+ return count;
}
vendor &= 0xffff;
device &= 0xffff;
@@ -560,7 +562,7 @@
if (pci_set_dma_mask(pci, mask) < 0 ||
pci_set_consistent_dma_mask(pci, mask) < 0) {
printk(KERN_ERR "snd-page-alloc: cannot set DMA mask %lx for pci %04x:%04x\n", mask, vendor, device);
- return (int)count;
+ return count;
}
}
for (i = 0; i < buffers; i++) {
@@ -570,7 +572,7 @@
size, &dmab) < 0) {
printk(KERN_ERR "snd-page-alloc: cannot allocate buffer pages (size = %d)\n", size);
pci_dev_put(pci);
- return (int)count;
+ return count;
}
snd_dma_reserve_buf(&dmab, snd_dma_pci_buf_id(pci));
}
@@ -596,9 +598,21 @@
free_all_reserved_pages();
else
printk(KERN_ERR "snd-page-alloc: invalid proc cmd\n");
- return (int)count;
+ return count;
}
#endif /* CONFIG_PCI */
+
+static const struct file_operations snd_mem_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = snd_mem_proc_open,
+ .read = seq_read,
+#ifdef CONFIG_PCI
+ .write = snd_mem_proc_write,
+#endif
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
#endif /* CONFIG_PROC_FS */
/*
@@ -609,12 +623,8 @@
{
#ifdef CONFIG_PROC_FS
snd_mem_proc = create_proc_entry(SND_MEM_PROC_FILE, 0644, NULL);
- if (snd_mem_proc) {
- snd_mem_proc->read_proc = snd_mem_proc_read;
-#ifdef CONFIG_PCI
- snd_mem_proc->write_proc = snd_mem_proc_write;
-#endif
- }
+ if (snd_mem_proc)
+ snd_mem_proc->proc_fops = &snd_mem_proc_fops;
#endif
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/sound/pci/hda/hda_intel.c
+++ linux-source-2.6.22-2.6.22/sound/pci/hda/hda_intel.c
@@ -72,6 +72,12 @@
module_param(enable_msi, int, 0);
MODULE_PARM_DESC(enable_msi, "Enable Message Signaled Interrupt (MSI)");
+/* For workaround Poulsbo SI bugs, it affects stream descriptor offset and
+ * corresponding control bits
+ */
+static int sd_offset_fixup;
+static int sd_bit_fixup;
+
/* just for backward compatibility */
static int enable;
@@ -79,21 +85,21 @@
MODULE_LICENSE("GPL");
MODULE_SUPPORTED_DEVICE("{{Intel, ICH6},"
- "{Intel, ICH6M},"
- "{Intel, ICH7},"
- "{Intel, ESB2},"
- "{Intel, ICH8},"
- "{Intel, ICH9},"
- "{ATI, SB450},"
- "{ATI, SB600},"
- "{ATI, RS600},"
- "{ATI, RS690},"
- "{ATI, RS780},"
- "{ATI, R600},"
- "{VIA, VT8251},"
- "{VIA, VT8237A},"
- "{SiS, SIS966},"
- "{ULI, M5461}}");
+ "{Intel, ICH6M},"
+ "{Intel, ICH7},"
+ "{Intel, ESB2},"
+ "{Intel, ICH8},"
+ "{Intel, ICH9},"
+ "{ATI, SB450},"
+ "{ATI, SB600},"
+ "{ATI, RS600},"
+ "{ATI, RS690},"
+ "{ATI, RS780},"
+ "{ATI, R600},"
+ "{VIA, VT8251},"
+ "{VIA, VT8237A},"
+ "{SiS, SIS966},"
+ "{ULI, M5461}}");
MODULE_DESCRIPTION("Intel HDA driver");
#define SFX "hda-intel: "
@@ -252,6 +258,10 @@
/* Defines for Nvidia HDA support */
#define NVIDIA_HDA_TRANSREG_ADDR 0x4e
#define NVIDIA_HDA_ENABLE_COHBITS 0x0f
+/* Defines for Intel SCH HDA snoop control */
+#define INTEL_SCH_HDA_DEVC 0x78
+#define INTEL_SCH_HDA_DEVC_NOSNOOP (0x1<<11)
+
/*
*/
@@ -367,30 +377,30 @@
/*
* macros for easy use
*/
-#define azx_writel(chip,reg,value) \
+#define azx_writel(chip, reg, value) \
writel(value, (chip)->remap_addr + ICH6_REG_##reg)
-#define azx_readl(chip,reg) \
+#define azx_readl(chip, reg) \
readl((chip)->remap_addr + ICH6_REG_##reg)
-#define azx_writew(chip,reg,value) \
+#define azx_writew(chip, reg, value) \
writew(value, (chip)->remap_addr + ICH6_REG_##reg)
-#define azx_readw(chip,reg) \
+#define azx_readw(chip, reg) \
readw((chip)->remap_addr + ICH6_REG_##reg)
-#define azx_writeb(chip,reg,value) \
+#define azx_writeb(chip, reg, value) \
writeb(value, (chip)->remap_addr + ICH6_REG_##reg)
-#define azx_readb(chip,reg) \
+#define azx_readb(chip, reg) \
readb((chip)->remap_addr + ICH6_REG_##reg)
-#define azx_sd_writel(dev,reg,value) \
+#define azx_sd_writel(dev, reg, value) \
writel(value, (dev)->sd_addr + ICH6_REG_##reg)
-#define azx_sd_readl(dev,reg) \
+#define azx_sd_readl(dev, reg) \
readl((dev)->sd_addr + ICH6_REG_##reg)
-#define azx_sd_writew(dev,reg,value) \
+#define azx_sd_writew(dev, reg, value) \
writew(value, (dev)->sd_addr + ICH6_REG_##reg)
-#define azx_sd_readw(dev,reg) \
+#define azx_sd_readw(dev, reg) \
readw((dev)->sd_addr + ICH6_REG_##reg)
-#define azx_sd_writeb(dev,reg,value) \
+#define azx_sd_writeb(dev, reg, value) \
writeb(value, (dev)->sd_addr + ICH6_REG_##reg)
-#define azx_sd_readb(dev,reg) \
+#define azx_sd_readb(dev, reg) \
readb((dev)->sd_addr + ICH6_REG_##reg)
/* for pcm support */
@@ -528,7 +538,7 @@
struct azx *chip = codec->bus->private_data;
unsigned long timeout;
- again:
+again:
timeout = jiffies + msecs_to_jiffies(1000);
do {
if (chip->polling_mode) {
@@ -759,8 +769,19 @@
static void azx_stream_start(struct azx *chip, struct azx_dev *azx_dev)
{
/* enable SIE */
- azx_writeb(chip, INTCTL,
- azx_readb(chip, INTCTL) | (1 << azx_dev->index));
+ if (!sd_bit_fixup) {
+ azx_writel(chip, INTCTL,
+ azx_readl(chip, INTCTL) | (1 << azx_dev->index));
+ } else {
+ if (azx_dev->index < sd_bit_fixup) {
+ azx_writel(chip, INTCTL,
+ azx_readl(chip, INTCTL) | (1 << azx_dev->index));
+ } else {
+ azx_writel(chip, INTCTL,
+ azx_readl(chip, INTCTL) |
+ (1 << (azx_dev->index+sd_bit_fixup)));
+ }
+ }
/* set DMA start and interrupt mask */
azx_sd_writeb(azx_dev, SD_CTL, azx_sd_readb(azx_dev, SD_CTL) |
SD_CTL_DMA_START | SD_INT_MASK);
@@ -774,8 +795,19 @@
~(SD_CTL_DMA_START | SD_INT_MASK));
azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */
/* disable SIE */
- azx_writeb(chip, INTCTL,
- azx_readb(chip, INTCTL) & ~(1 << azx_dev->index));
+ if (!sd_bit_fixup) {
+ azx_writeb(chip, INTCTL,
+ azx_readb(chip, INTCTL) & ~(1 << azx_dev->index));
+ } else {
+ if (azx_dev->index < sd_bit_fixup ) {
+ azx_writeb(chip, INTCTL,
+ azx_readb(chip, INTCTL) & ~(1 << azx_dev->index));
+ } else {
+ azx_writeb(chip, INTCTL,
+ azx_readb(chip, INTCTL) & ~(1 << (azx_dev->index+sd_bit_fixup)));
+
+ }
+ }
}
@@ -785,7 +817,7 @@
static void azx_init_chip(struct azx *chip)
{
unsigned char reg;
-
+ unsigned short reg16;
/* Clear bits 0-2 of PCI register TCSEL (at offset 0x44)
* TCSEL == Traffic Class Select Register, which sets PCI express QOS
* Ensuring these bits are 0 clears playback static on some HD Audio codecs
@@ -822,6 +854,22 @@
pci_write_config_byte(chip->pci,NVIDIA_HDA_TRANSREG_ADDR,
(reg & 0xf0) | NVIDIA_HDA_ENABLE_COHBITS);
break;
+ case AZX_DRIVER_ICH:
+ /* Snoop is disabled in SCH (Poulsbo) at reset, enable it */
+ if (chip->pci->device == PCI_DEVICE_ID_INTEL_POULSBO_HDA) {
+ pci_read_config_word(chip->pci, INTEL_SCH_HDA_DEVC, \
+ ®16);
+ if (reg16 & INTEL_SCH_HDA_DEVC_NOSNOOP ) {
+ pci_write_config_word(chip->pci,
+ INTEL_SCH_HDA_DEVC, \
+ reg16 & (~INTEL_SCH_HDA_DEVC_NOSNOOP));
+ pci_read_config_word(chip->pci,
+ INTEL_SCH_HDA_DEVC, ®16);
+ snd_printk(KERN_INFO "HDA snoop disabled, try to enable ... %s\n", \
+ (reg16&INTEL_SCH_HDA_DEVC_NOSNOOP)? "Failed" : "OK");
+ }
+ }
+ break;
}
}
@@ -1396,11 +1444,24 @@
struct azx_dev *azx_dev = &chip->azx_dev[i];
azx_dev->bdl = (u32 *)(chip->bdl.area + off);
azx_dev->bdl_addr = chip->bdl.addr + off;
- azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + i * 8);
/* offset: SDI0=0x80, SDI1=0xa0, ... SDO3=0x160 */
- azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
- /* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */
- azx_dev->sd_int_sta_mask = 1 << i;
+ if (!sd_bit_fixup) {
+ azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + i * 8);
+ azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
+ /* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */
+ azx_dev->sd_int_sta_mask = 1 << i;
+ } else {
+ if (i < sd_bit_fixup) {
+ azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + i * 8);
+ azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
+ azx_dev->sd_int_sta_mask = 1 << i;
+ } else {
+ azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80 + sd_offset_fixup);
+ azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + (2+i) * 8);
+ azx_dev->sd_int_sta_mask = 1 << (i+sd_bit_fixup);
+ }
+ }
+
/* stream tag: must be non-zero and unique */
azx_dev->index = i;
azx_dev->stream_tag = i + 1;
@@ -1566,9 +1627,11 @@
{
struct azx *chip;
int err;
+ unsigned short stepping;
static struct snd_device_ops ops = {
.dev_free = azx_dev_free,
};
+ unsigned short gcap;
*rchip = NULL;
@@ -1646,10 +1709,39 @@
chip->capture_index_offset = ATIHDMI_CAPTURE_INDEX;
break;
default:
- chip->playback_streams = ICH6_NUM_PLAYBACK;
- chip->capture_streams = ICH6_NUM_CAPTURE;
- chip->playback_index_offset = ICH6_PLAYBACK_INDEX;
- chip->capture_index_offset = ICH6_CAPTURE_INDEX;
+ /* read number of streams from GCAP regiser instead of using
+ * hardcoded value
+ */
+ gcap = azx_readw(chip, GCAP);
+ if (!gcap) {
+ snd_printk(KERN_ERR "Device has no streams \n");
+ goto errout;
+ };
+ chip->playback_streams = (gcap&(0xF<<12))>>12;
+ chip->capture_streams = (gcap&(0xF<<8))>>8;
+ chip->playback_index_offset = (gcap&(0xF<<12))>>12;
+ chip->capture_index_offset = 0;
+ /* do fixup for poulsbo */
+ if (pci->device == PCI_DEVICE_ID_INTEL_POULSBO_HDA) {
+ snd_printk(KERN_INFO "Do fixup for Poulsbo ");
+ pci_bus_read_config_word(pci->bus, 0, 0x8, &stepping);
+ switch (stepping) {
+ case 0:
+ /* A2 has wrong OSD0 offset and control bits */
+ snd_printk(KERN_INFO "A2 stepping\n");
+ sd_offset_fixup = 0x40;
+ sd_bit_fixup = 0x2;
+ break;
+ case 2:
+ /* B0 moved OSD0 offset but not control bits */
+ snd_printk(KERN_INFO "B0 stepping\n");
+ sd_bit_fixup = 0x2;
+ break;
+ default:
+ snd_printk(KERN_ERR "Unknow stepping\n");
+ break;
+ }
+ }
break;
}
chip->num_streams = chip->playback_streams + chip->capture_streams;
@@ -1703,7 +1795,7 @@
*rchip = chip;
return 0;
- errout:
+errout:
azx_free(chip);
return err;
}
@@ -1771,6 +1863,7 @@
{ 0x8086, 0x284b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH8 */
{ 0x8086, 0x293e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH9 */
{ 0x8086, 0x293f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH9 */
+ { 0x8086, 0x811b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* POULSBO */
{ 0x1002, 0x437b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ATI }, /* ATI SB450 */
{ 0x1002, 0x4383, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ATI }, /* ATI SB600 */
{ 0x1002, 0x793b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ATIHDMI }, /* ATI RS600 HDMI */
--- linux-source-2.6.22-2.6.22.orig/sound/pci/hda/patch_sigmatel.c
+++ linux-source-2.6.22-2.6.22/sound/pci/hda/patch_sigmatel.c
@@ -478,6 +478,8 @@
"Dell Inspiron 640m", STAC_REF),
SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f5,
"Dell Inspiron 1501", STAC_REF),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01dd,
+ "Dell Inspiron 1420", STAC_REF),
/* Panasonic */
SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_REF),
--- linux-source-2.6.22-2.6.22.orig/sound/usb/usx2y/usx2yhwdeppcm.c
+++ linux-source-2.6.22-2.6.22/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -728,7 +728,7 @@
return -ENODEV;
}
area->vm_ops = &snd_usX2Y_hwdep_pcm_vm_ops;
- area->vm_flags |= VM_RESERVED;
+ area->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
area->vm_private_data = hw->private_data;
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/sound/usb/usx2y/usX2Yhwdep.c
+++ linux-source-2.6.22-2.6.22/sound/usb/usx2y/usX2Yhwdep.c
@@ -88,7 +88,7 @@
us428->us428ctls_sharedmem->CtlSnapShotLast = -2;
}
area->vm_ops = &us428ctls_vm_ops;
- area->vm_flags |= VM_RESERVED;
+ area->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
area->vm_private_data = hw->private_data;
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/fs/dcache.c
+++ linux-source-2.6.22-2.6.22/fs/dcache.c
@@ -1761,92 +1761,132 @@
}
/**
- * d_path - return the path of a dentry
+ * __d_path - return the path of a dentry
* @dentry: dentry to report
* @vfsmnt: vfsmnt to which the dentry belongs
* @root: root dentry
* @rootmnt: vfsmnt to which the root dentry belongs
* @buffer: buffer to return value in
* @buflen: buffer length
+ * @fail_deleted: what to return for deleted files
*
- * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * Convert a dentry into an ASCII path name. If the entry has been deleted,
+ * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise,
* the string " (deleted)" is appended. Note that this is ambiguous.
*
- * Returns the buffer or an error code if the path was too long.
+ * If @dentry is not connected to @root, the path returned will be relative
+ * (i.e., it will not start with a slash).
*
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * Returns the buffer or an error code.
*/
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
- struct dentry *root, struct vfsmount *rootmnt,
- char *buffer, int buflen)
-{
- char * end = buffer+buflen;
- char * retval;
- int namelen;
+char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+ struct dentry *root, struct vfsmount *rootmnt,
+ char *buffer, int buflen, int fail_deleted)
+{
+ int namelen, is_slash, vfsmount_locked = 0;
+
+ if (buflen < 2)
+ return ERR_PTR(-ENAMETOOLONG);
+ buffer += --buflen;
+ *buffer = '\0';
- *--end = '\0';
- buflen--;
+ spin_lock(&dcache_lock);
if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
- buflen -= 10;
- end -= 10;
- if (buflen < 0)
+ if (fail_deleted) {
+ buffer = ERR_PTR(-ENOENT);
+ goto out;
+ }
+ if (buflen < 10)
goto Elong;
- memcpy(end, " (deleted)", 10);
+ buflen -= 10;
+ buffer -= 10;
+ memcpy(buffer, " (deleted)", 10);
}
-
- if (buflen < 1)
- goto Elong;
- /* Get '/' right */
- retval = end-1;
- *retval = '/';
-
- for (;;) {
+ while (dentry != root || vfsmnt != rootmnt) {
struct dentry * parent;
- if (dentry == root && vfsmnt == rootmnt)
- break;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
- /* Global root? */
- spin_lock(&vfsmount_lock);
- if (vfsmnt->mnt_parent == vfsmnt) {
- spin_unlock(&vfsmount_lock);
- goto global_root;
+ if (!vfsmount_locked) {
+ spin_lock(&vfsmount_lock);
+ vfsmount_locked = 1;
}
+ if (vfsmnt->mnt_parent == vfsmnt)
+ goto global_root;
dentry = vfsmnt->mnt_mountpoint;
vfsmnt = vfsmnt->mnt_parent;
- spin_unlock(&vfsmount_lock);
continue;
}
parent = dentry->d_parent;
prefetch(parent);
namelen = dentry->d_name.len;
- buflen -= namelen + 1;
- if (buflen < 0)
+ if (buflen < namelen + 1)
goto Elong;
- end -= namelen;
- memcpy(end, dentry->d_name.name, namelen);
- *--end = '/';
- retval = end;
+ buflen -= namelen + 1;
+ buffer -= namelen;
+ memcpy(buffer, dentry->d_name.name, namelen);
+ *--buffer = '/';
dentry = parent;
}
+ /* Get '/' right. */
+ if (*buffer != '/')
+ *--buffer = '/';
- return retval;
+out:
+ if (vfsmount_locked)
+ spin_unlock(&vfsmount_lock);
+ spin_unlock(&dcache_lock);
+ return buffer;
global_root:
+ /*
+ * We went past the (vfsmount, dentry) we were looking for and have
+ * either hit a root dentry, a lazily unmounted dentry, an
+ * unconnected dentry, or the file is on a pseudo filesystem.
+ */
namelen = dentry->d_name.len;
- buflen -= namelen;
- if (buflen < 0)
+ is_slash = (namelen == 1 && *dentry->d_name.name == '/');
+ if (is_slash || (dentry->d_sb->s_flags & MS_NOUSER)) {
+ /*
+ * Make sure we won't return a pathname starting with '/'.
+ *
+ * Historically, we also glue together the root dentry and
+ * remaining name for pseudo filesystems like pipefs, which
+ * have the MS_NOUSER flag set. This results in pathnames
+ * like "pipe:[439336]".
+ */
+ if (*buffer == '/') {
+ buffer++;
+ buflen++;
+ }
+ if (is_slash)
+ goto out;
+ }
+ if (buflen < namelen)
goto Elong;
- retval -= namelen-1; /* hit the slash */
- memcpy(retval, dentry->d_name.name, namelen);
- return retval;
+ buffer -= namelen;
+ memcpy(buffer, dentry->d_name.name, namelen);
+ goto out;
+
Elong:
- return ERR_PTR(-ENAMETOOLONG);
+ buffer = ERR_PTR(-ENAMETOOLONG);
+ goto out;
+}
+
+static char *__connect_d_path(char *path, char *buffer)
+{
+ if (!IS_ERR(path) && *path != '/') {
+ /* Pretend that disconnected paths are hanging off the root. */
+ if (path == buffer)
+ path = ERR_PTR(-ENAMETOOLONG);
+ else
+ *--path = '/';
+ }
+ return path;
}
/* write full pathname into buffer and return start of pathname */
-char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
- char *buf, int buflen)
+char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf,
+ int buflen)
{
char *res;
struct vfsmount *rootmnt;
@@ -1866,9 +1906,8 @@
rootmnt = mntget(current->fs->rootmnt);
root = dget(current->fs->root);
read_unlock(¤t->fs->lock);
- spin_lock(&dcache_lock);
- res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
- spin_unlock(&dcache_lock);
+ res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0);
+ res = __connect_d_path(res, buf);
dput(root);
mntput(rootmnt);
return res;
@@ -1915,10 +1954,10 @@
*/
asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
{
- int error;
+ int error, len;
struct vfsmount *pwdmnt, *rootmnt;
struct dentry *pwd, *root;
- char *page = (char *) __get_free_page(GFP_USER);
+ char *page = (char *) __get_free_page(GFP_USER), *cwd;
if (!page)
return -ENOMEM;
@@ -1930,29 +1969,19 @@
root = dget(current->fs->root);
read_unlock(¤t->fs->lock);
- error = -ENOENT;
- /* Has the current directory has been unlinked? */
- spin_lock(&dcache_lock);
- if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
- unsigned long len;
- char * cwd;
-
- cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
- spin_unlock(&dcache_lock);
-
- error = PTR_ERR(cwd);
- if (IS_ERR(cwd))
- goto out;
-
- error = -ERANGE;
- len = PAGE_SIZE + page - cwd;
- if (len <= size) {
- error = len;
- if (copy_to_user(buf, cwd, len))
- error = -EFAULT;
- }
- } else
- spin_unlock(&dcache_lock);
+ cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1);
+ cwd = __connect_d_path(cwd, page);
+ error = PTR_ERR(cwd);
+ if (IS_ERR(cwd))
+ goto out;
+
+ error = -ERANGE;
+ len = PAGE_SIZE + page - cwd;
+ if (len <= size) {
+ error = len;
+ if (copy_to_user(buf, cwd, len))
+ error = -EFAULT;
+ }
out:
dput(pwd);
--- linux-source-2.6.22-2.6.22.orig/fs/fat/file.c
+++ linux-source-2.6.22-2.6.22/fs/fat/file.c
@@ -92,7 +92,7 @@
}
/* This MUST be done before doing anything irreversible... */
- err = notify_change(filp->f_path.dentry, &ia);
+ err = notify_change(filp->f_path.dentry, filp->f_path.mnt, &ia);
if (err)
goto up;
--- linux-source-2.6.22-2.6.22.orig/fs/ntfs/file.c
+++ linux-source-2.6.22-2.6.22/fs/ntfs/file.c
@@ -2122,7 +2122,7 @@
goto out;
if (!count)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = remove_suid(&file->f_path);
if (err)
goto out;
file_update_time(file);
--- linux-source-2.6.22-2.6.22.orig/fs/signalfd.c
+++ linux-source-2.6.22-2.6.22/fs/signalfd.c
@@ -56,12 +56,18 @@
sighand = lock_task_sighand(lk->tsk, &lk->flags);
rcu_read_unlock();
- if (sighand && !ctx->tsk) {
+ if (!sighand)
+ return 0;
+
+ if (!ctx->tsk) {
unlock_task_sighand(lk->tsk, &lk->flags);
- sighand = NULL;
+ return 0;
}
- return sighand != NULL;
+ if (lk->tsk->tgid == current->tgid)
+ lk->tsk = current;
+
+ return 1;
}
static void signalfd_unlock(struct signalfd_lockctx *lk)
@@ -331,7 +337,7 @@
init_waitqueue_head(&ctx->wqh);
ctx->sigmask = sigmask;
- ctx->tsk = current;
+ ctx->tsk = current->group_leader;
sighand = current->sighand;
/*
--- linux-source-2.6.22-2.6.22.orig/fs/ncpfs/mmap.c
+++ linux-source-2.6.22-2.6.22/fs/ncpfs/mmap.c
@@ -47,9 +47,6 @@
pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT);
count = PAGE_SIZE;
- if (address + PAGE_SIZE > area->vm_end) {
- count = area->vm_end - address;
- }
/* what we can read in one go */
bufsize = NCP_SERVER(inode)->buffer_size;
--- linux-source-2.6.22-2.6.22.orig/fs/timerfd.c
+++ linux-source-2.6.22-2.6.22/fs/timerfd.c
@@ -95,7 +95,7 @@
{
struct timerfd_ctx *ctx = file->private_data;
ssize_t res;
- u32 ticks = 0;
+ u64 ticks = 0;
DECLARE_WAITQUEUE(wait, current);
if (count < sizeof(ticks))
@@ -130,7 +130,7 @@
* callback to avoid DoS attacks specifying a very
* short timer period.
*/
- ticks = (u32)
+ ticks = (u64)
hrtimer_forward(&ctx->tmr,
hrtimer_cb_get_time(&ctx->tmr),
ctx->tintv);
@@ -140,7 +140,7 @@
}
spin_unlock_irq(&ctx->wqh.lock);
if (ticks)
- res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks);
+ res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
return res;
}
--- linux-source-2.6.22-2.6.22.orig/fs/direct-io.c
+++ linux-source-2.6.22-2.6.22/fs/direct-io.c
@@ -974,6 +974,7 @@
dio->get_block = get_block;
dio->end_io = end_io;
dio->map_bh.b_private = NULL;
+ dio->map_bh.b_state = 0;
dio->final_block_in_bio = -1;
dio->next_block_for_io = -1;
--- linux-source-2.6.22-2.6.22.orig/fs/jbd2/commit.c
+++ linux-source-2.6.22-2.6.22/fs/jbd2/commit.c
@@ -896,7 +896,8 @@
journal->j_committing_transaction = NULL;
spin_unlock(&journal->j_state_lock);
- if (commit_transaction->t_checkpoint_list == NULL) {
+ if (commit_transaction->t_checkpoint_list == NULL &&
+ commit_transaction->t_checkpoint_io_list == NULL) {
__jbd2_journal_drop_transaction(journal, commit_transaction);
} else {
if (journal->j_checkpoint_transactions == NULL) {
--- linux-source-2.6.22-2.6.22.orig/fs/afs/mntpt.c
+++ linux-source-2.6.22-2.6.22/fs/afs/mntpt.c
@@ -235,8 +235,8 @@
err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
switch (err) {
case 0:
- mntput(nd->mnt);
dput(nd->dentry);
+ mntput(nd->mnt);
nd->mnt = newmnt;
nd->dentry = dget(newmnt->mnt_root);
schedule_delayed_work(&afs_mntpt_expiry_timer,
--- linux-source-2.6.22-2.6.22.orig/fs/splice.c
+++ linux-source-2.6.22-2.6.22/fs/splice.c
@@ -601,7 +601,7 @@
ret = add_to_page_cache_lru(page, mapping, index,
GFP_KERNEL);
if (unlikely(ret))
- goto out;
+ goto out_release;
}
ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
@@ -657,8 +657,9 @@
*/
mark_page_accessed(page);
out:
- page_cache_release(page);
unlock_page(page);
+out_release:
+ page_cache_release(page);
out_ret:
return ret;
}
@@ -807,7 +808,7 @@
ssize_t ret;
int err;
- err = remove_suid(out->f_path.dentry);
+ err = remove_suid(&out->f_path);
if (unlikely(err))
return err;
@@ -860,7 +861,7 @@
err = should_remove_suid(out->f_path.dentry);
if (unlikely(err)) {
mutex_lock(&inode->i_mutex);
- err = __remove_suid(out->f_path.dentry, err);
+ err = __remove_suid(&out->f_path, err);
mutex_unlock(&inode->i_mutex);
if (err)
return err;
@@ -1010,7 +1011,7 @@
max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
- if (unlikely(ret < 0))
+ if (unlikely(ret <= 0))
goto out_release;
read_len = ret;
@@ -1022,7 +1023,7 @@
*/
ret = do_splice_from(pipe, out, &out_off, read_len,
flags & ~SPLICE_F_NONBLOCK);
- if (unlikely(ret < 0))
+ if (unlikely(ret <= 0))
goto out_release;
bytes += ret;
@@ -1181,6 +1182,9 @@
if (unlikely(!base))
break;
+ if (!access_ok(VERIFY_READ, base, len))
+ break;
+
/*
* Get this base offset and number of pages, then map
* in the user pages.
--- linux-source-2.6.22-2.6.22.orig/fs/proc/proc_misc.c
+++ linux-source-2.6.22-2.6.22/fs/proc/proc_misc.c
@@ -623,6 +623,19 @@
return proc_calc_metrics(page, start, off, count, eof, len);
}
+#ifdef CONFIG_VERSION_SIGNATURE
+static int version_signature_read_proc(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len;
+
+ strcpy(page, CONFIG_VERSION_SIGNATURE);
+ strcat(page, "\n");
+ len = strlen(page);
+ return proc_calc_metrics(page, start, off, count, eof, len);
+}
+#endif
+
#ifdef CONFIG_MAGIC_SYSRQ
/*
* writing 'C' to /proc/sysrq-trigger is like sysrq-C
@@ -675,6 +688,9 @@
{"cmdline", cmdline_read_proc},
{"locks", locks_read_proc},
{"execdomains", execdomains_read_proc},
+#ifdef CONFIG_VERSION_SIGNATURE
+ {"version_signature", version_signature_read_proc},
+#endif
{NULL,}
};
for (p = simple_ones; p->name; p++)
--- linux-source-2.6.22-2.6.22.orig/fs/jffs2/os-linux.h
+++ linux-source-2.6.22-2.6.22/fs/jffs2/os-linux.h
@@ -173,12 +173,15 @@
extern const struct inode_operations jffs2_symlink_inode_operations;
/* fs.c */
+struct posix_acl;
+
int jffs2_setattr (struct dentry *, struct iattr *);
+int jffs2_do_setattr (struct inode *, struct iattr *);
void jffs2_read_inode (struct inode *);
void jffs2_clear_inode (struct inode *);
void jffs2_dirty_inode(struct inode *inode);
struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
- struct jffs2_raw_inode *ri);
+ struct jffs2_raw_inode *ri, struct posix_acl **acl);
int jffs2_statfs (struct dentry *, struct kstatfs *);
void jffs2_write_super (struct super_block *);
int jffs2_remount_fs (struct super_block *, int *, char *);
--- linux-source-2.6.22-2.6.22.orig/fs/jffs2/write.c
+++ linux-source-2.6.22-2.6.22/fs/jffs2/write.c
@@ -553,6 +553,9 @@
struct jffs2_full_dirent **prev = &dir_f->dents;
uint32_t nhash = full_name_hash(name, namelen);
+ /* We don't actually want to reserve any space, but we do
+ want to be holding the alloc_sem when we write to flash */
+ down(&c->alloc_sem);
down(&dir_f->sem);
while ((*prev) && (*prev)->nhash <= nhash) {
--- linux-source-2.6.22-2.6.22.orig/fs/jffs2/acl.c
+++ linux-source-2.6.22-2.6.22/fs/jffs2/acl.c
@@ -176,7 +176,7 @@
spin_unlock(&inode->i_lock);
}
-static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
+struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
{
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct posix_acl *acl;
@@ -247,8 +247,13 @@
if (rc < 0)
return rc;
if (inode->i_mode != mode) {
- inode->i_mode = mode;
- jffs2_dirty_inode(inode);
+ struct iattr attr;
+
+ attr.ia_valid = ATTR_MODE;
+ attr.ia_mode = mode;
+ rc = jffs2_do_setattr(inode, &attr);
+ if (rc < 0)
+ return rc;
}
if (rc == 0)
acl = NULL;
@@ -307,22 +312,16 @@
return generic_permission(inode, mask, jffs2_check_acl);
}
-int jffs2_init_acl(struct inode *inode, struct inode *dir)
+int jffs2_init_acl(struct inode *inode, struct posix_acl *acl)
{
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
- struct posix_acl *acl = NULL, *clone;
+ struct posix_acl *clone;
mode_t mode;
int rc = 0;
f->i_acl_access = JFFS2_ACL_NOT_CACHED;
f->i_acl_default = JFFS2_ACL_NOT_CACHED;
- if (!S_ISLNK(inode->i_mode)) {
- acl = jffs2_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (!acl)
- inode->i_mode &= ~current->fs->umask;
- }
+
if (acl) {
if (S_ISDIR(inode->i_mode)) {
rc = jffs2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
--- linux-source-2.6.22-2.6.22.orig/fs/jffs2/fs.c
+++ linux-source-2.6.22-2.6.22/fs/jffs2/fs.c
@@ -24,7 +24,7 @@
static int jffs2_flash_setup(struct jffs2_sb_info *c);
-static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
+int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
{
struct jffs2_full_dnode *old_metadata, *new_metadata;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
@@ -36,10 +36,8 @@
unsigned int ivalid;
uint32_t alloclen;
int ret;
+
D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino));
- ret = inode_change_ok(inode, iattr);
- if (ret)
- return ret;
/* Special cases - we don't want more than one data node
for these types on the medium at any time. So setattr
@@ -183,9 +181,14 @@
{
int rc;
+ rc = inode_change_ok(dentry->d_inode, iattr);
+ if (rc)
+ return rc;
+
rc = jffs2_do_setattr(dentry->d_inode, iattr);
if (!rc && (iattr->ia_valid & ATTR_MODE))
rc = jffs2_acl_chmod(dentry->d_inode);
+
return rc;
}
@@ -399,7 +402,8 @@
/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
fill in the raw_inode while you're at it. */
-struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri)
+struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri,
+ struct posix_acl **acl)
{
struct inode *inode;
struct super_block *sb = dir_i->i_sb;
@@ -431,7 +435,23 @@
} else {
ri->gid = cpu_to_je16(current->fsgid);
}
- ri->mode = cpu_to_jemode(mode);
+
+ /* POSIX ACLs have to be processed now, at least partly.
+ The umask is only applied if there's no default ACL */
+ if (!S_ISLNK(mode)) {
+ *acl = jffs2_get_acl(dir_i, ACL_TYPE_DEFAULT);
+ if (IS_ERR(*acl)) {
+ make_bad_inode(inode);
+ iput(inode);
+ inode = (void *)*acl;
+ *acl = NULL;
+ return inode;
+ }
+ if (!(*acl))
+ mode &= ~current->fs->umask;
+ } else {
+ *acl = NULL;
+ }
ret = jffs2_do_new_inode (c, f, mode, ri);
if (ret) {
make_bad_inode(inode);
@@ -627,7 +647,7 @@
struct inode *inode = OFNI_EDONI_2SFFJ(f);
struct page *pg;
- pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+ pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
(void *)jffs2_do_readpage_unlock, inode);
if (IS_ERR(pg))
return (void *)pg;
--- linux-source-2.6.22-2.6.22.orig/fs/jffs2/acl.h
+++ linux-source-2.6.22-2.6.22/fs/jffs2/acl.h
@@ -28,9 +28,10 @@
#define JFFS2_ACL_NOT_CACHED ((void *)-1)
+extern struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
extern int jffs2_permission(struct inode *, int, struct nameidata *);
extern int jffs2_acl_chmod(struct inode *);
-extern int jffs2_init_acl(struct inode *, struct inode *);
+extern int jffs2_init_acl(struct inode *, struct posix_acl *);
extern void jffs2_clear_acl(struct jffs2_inode_info *);
extern struct xattr_handler jffs2_acl_access_xattr_handler;
@@ -38,6 +39,7 @@
#else
+#define jffs2_get_acl(inode, type) (NULL)
#define jffs2_permission NULL
#define jffs2_acl_chmod(inode) (0)
#define jffs2_init_acl(inode,dir) (0)
--- linux-source-2.6.22-2.6.22.orig/fs/jffs2/dir.c
+++ linux-source-2.6.22-2.6.22/fs/jffs2/dir.c
@@ -182,6 +182,7 @@
struct jffs2_inode_info *f, *dir_f;
struct jffs2_sb_info *c;
struct inode *inode;
+ struct posix_acl *acl;
int ret;
ri = jffs2_alloc_raw_inode();
@@ -192,7 +193,7 @@
D1(printk(KERN_DEBUG "jffs2_create()\n"));
- inode = jffs2_new_inode(dir_i, mode, ri);
+ inode = jffs2_new_inode(dir_i, mode, ri, &acl);
if (IS_ERR(inode)) {
D1(printk(KERN_DEBUG "jffs2_new_inode() failed\n"));
@@ -212,12 +213,12 @@
dentry->d_name.name, dentry->d_name.len);
if (ret)
- goto fail;
+ goto fail_acl;
ret = jffs2_init_security(inode, dir_i);
if (ret)
- goto fail;
- ret = jffs2_init_acl(inode, dir_i);
+ goto fail_acl;
+ ret = jffs2_init_acl(inode, acl);
if (ret)
goto fail;
@@ -230,6 +231,8 @@
inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages));
return 0;
+ fail_acl:
+ posix_acl_release(acl);
fail:
make_bad_inode(inode);
iput(inode);
@@ -306,6 +309,7 @@
struct jffs2_full_dirent *fd;
int namelen;
uint32_t alloclen;
+ struct posix_acl *acl;
int ret, targetlen = strlen(target);
/* FIXME: If you care. We'd need to use frags for the target
@@ -332,7 +336,7 @@
return ret;
}
- inode = jffs2_new_inode(dir_i, S_IFLNK | S_IRWXUGO, ri);
+ inode = jffs2_new_inode(dir_i, S_IFLNK | S_IRWXUGO, ri, &acl);
if (IS_ERR(inode)) {
jffs2_free_raw_inode(ri);
@@ -362,6 +366,7 @@
up(&f->sem);
jffs2_complete_reservation(c);
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return PTR_ERR(fn);
}
@@ -372,6 +377,7 @@
up(&f->sem);
jffs2_complete_reservation(c);
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return -ENOMEM;
}
@@ -389,9 +395,10 @@
ret = jffs2_init_security(inode, dir_i);
if (ret) {
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return ret;
}
- ret = jffs2_init_acl(inode, dir_i);
+ ret = jffs2_init_acl(inode, acl);
if (ret) {
jffs2_clear_inode(inode);
return ret;
@@ -469,6 +476,7 @@
struct jffs2_full_dirent *fd;
int namelen;
uint32_t alloclen;
+ struct posix_acl *acl;
int ret;
mode |= S_IFDIR;
@@ -491,7 +499,7 @@
return ret;
}
- inode = jffs2_new_inode(dir_i, mode, ri);
+ inode = jffs2_new_inode(dir_i, mode, ri, &acl);
if (IS_ERR(inode)) {
jffs2_free_raw_inode(ri);
@@ -518,6 +526,7 @@
up(&f->sem);
jffs2_complete_reservation(c);
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return PTR_ERR(fn);
}
/* No data here. Only a metadata node, which will be
@@ -531,9 +540,10 @@
ret = jffs2_init_security(inode, dir_i);
if (ret) {
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return ret;
}
- ret = jffs2_init_acl(inode, dir_i);
+ ret = jffs2_init_acl(inode, acl);
if (ret) {
jffs2_clear_inode(inode);
return ret;
@@ -629,6 +639,7 @@
union jffs2_device_node dev;
int devlen = 0;
uint32_t alloclen;
+ struct posix_acl *acl;
int ret;
if (!new_valid_dev(rdev))
@@ -655,7 +666,7 @@
return ret;
}
- inode = jffs2_new_inode(dir_i, mode, ri);
+ inode = jffs2_new_inode(dir_i, mode, ri, &acl);
if (IS_ERR(inode)) {
jffs2_free_raw_inode(ri);
@@ -684,6 +695,7 @@
up(&f->sem);
jffs2_complete_reservation(c);
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return PTR_ERR(fn);
}
/* No data here. Only a metadata node, which will be
@@ -697,9 +709,10 @@
ret = jffs2_init_security(inode, dir_i);
if (ret) {
jffs2_clear_inode(inode);
+ posix_acl_release(acl);
return ret;
}
- ret = jffs2_init_acl(inode, dir_i);
+ ret = jffs2_init_acl(inode, acl);
if (ret) {
jffs2_clear_inode(inode);
return ret;
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/meta_io.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/meta_io.c
@@ -387,12 +387,18 @@
if (test_clear_buffer_pinned(bh)) {
struct gfs2_trans *tr = current->journal_info;
+ struct gfs2_inode *bh_ip =
+ GFS2_I(bh->b_page->mapping->host);
+
gfs2_log_lock(sdp);
list_del_init(&bd->bd_le.le_list);
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
gfs2_log_unlock(sdp);
- tr->tr_num_buf_rm++;
+ if (bh_ip->i_inode.i_private != NULL)
+ tr->tr_num_databuf_rm++;
+ else
+ tr->tr_num_buf_rm++;
brelse(bh);
}
if (bd) {
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/incore.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/incore.h
@@ -28,6 +28,14 @@
typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
+struct gfs2_log_header_host {
+ u64 lh_sequence; /* Sequence number of this transaction */
+ u32 lh_flags; /* GFS2_LOG_HEAD_... */
+ u32 lh_tail; /* Block number of log tail */
+ u32 lh_blkno;
+ u32 lh_hash;
+};
+
/*
* Structure of operations that are associated with each
* type of element in the log.
@@ -60,12 +68,23 @@
u32 bi_len;
};
+struct gfs2_rgrp_host {
+ u32 rg_flags;
+ u32 rg_free;
+ u32 rg_dinodes;
+ u64 rg_igeneration;
+};
+
struct gfs2_rgrpd {
struct list_head rd_list; /* Link with superblock */
struct list_head rd_list_mru;
struct list_head rd_recent; /* Recently used rgrps */
struct gfs2_glock *rd_gl; /* Glock for this rgrp */
- struct gfs2_rindex_host rd_ri;
+ u64 rd_addr; /* grp block disk address */
+ u64 rd_data0; /* first data location */
+ u32 rd_length; /* length of rgrp header in fs blocks */
+ u32 rd_data; /* num of data blocks in rgrp */
+ u32 rd_bitbytes; /* number of bytes in data bitmaps */
struct gfs2_rgrp_host rd_rg;
u64 rd_rg_vn;
struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@
u32 rd_last_alloc_data;
u32 rd_last_alloc_meta;
struct gfs2_sbd *rd_sbd;
+ unsigned long rd_flags;
+#define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */
};
enum gfs2_state_bits {
@@ -211,10 +232,24 @@
GIF_SW_PAGED = 3,
};
+struct gfs2_dinode_host {
+ u64 di_size; /* number of bytes in file */
+ u64 di_blocks; /* number of blocks in file */
+ u64 di_goal_meta; /* rgrp to alloc from next */
+ u64 di_goal_data; /* data block goal */
+ u64 di_generation; /* generation number for NFS */
+ u32 di_flags; /* GFS2_DIF_... */
+ u16 di_height; /* height of metadata */
+ /* These only apply to directories */
+ u16 di_depth; /* Number of bits in the table */
+ u32 di_entries; /* The number of entries in the directory */
+ u64 di_eattr; /* extended attribute block number */
+};
+
struct gfs2_inode {
struct inode i_inode;
- struct gfs2_inum_host i_num;
-
+ u64 i_no_addr;
+ u64 i_no_formal_ino;
unsigned long i_flags; /* GIF_... */
struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@
QDF_LOCKED = 2,
};
-struct gfs2_quota_lvb {
- __be32 qb_magic;
- u32 __pad;
- __be64 qb_limit; /* Hard limit of # blocks to alloc */
- __be64 qb_warn; /* Warn user when alloc is above this # */
- __be64 qb_value; /* Current # blocks allocated */
-};
-
struct gfs2_quota_data {
struct list_head qd_list;
unsigned int qd_count;
@@ -327,7 +354,9 @@
unsigned int tr_num_buf;
unsigned int tr_num_buf_new;
+ unsigned int tr_num_databuf_new;
unsigned int tr_num_buf_rm;
+ unsigned int tr_num_databuf_rm;
struct list_head tr_list_buf;
unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@
unsigned int jd_blocks;
};
+struct gfs2_statfs_change_host {
+ s64 sc_total;
+ s64 sc_free;
+ s64 sc_dinodes;
+};
+
#define GFS2_GLOCKD_DEFAULT 1
#define GFS2_GLOCKD_MAX 16
@@ -426,6 +461,28 @@
#define GFS2_FSNAME_LEN 256
+struct gfs2_inum_host {
+ u64 no_formal_ino;
+ u64 no_addr;
+};
+
+struct gfs2_sb_host {
+ u32 sb_magic;
+ u32 sb_type;
+ u32 sb_format;
+
+ u32 sb_fs_format;
+ u32 sb_multihost_format;
+ u32 sb_bsize;
+ u32 sb_bsize_shift;
+
+ struct gfs2_inum_host sb_master_dir;
+ struct gfs2_inum_host sb_root_dir;
+
+ char sb_lockproto[GFS2_LOCKNAME_LEN];
+ char sb_locktable[GFS2_LOCKNAME_LEN];
+};
+
struct gfs2_sbd {
struct super_block *sd_vfs;
struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@
unsigned int sd_log_blks_reserved;
unsigned int sd_log_commited_buf;
+ unsigned int sd_log_commited_databuf;
unsigned int sd_log_commited_revoke;
unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@
unsigned int sd_log_num_rg;
unsigned int sd_log_num_databuf;
unsigned int sd_log_num_jdata;
- unsigned int sd_log_num_hdrs;
struct list_head sd_log_le_gl;
struct list_head sd_log_le_buf;
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_file.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_file.c
@@ -502,7 +502,7 @@
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
struct lm_lockname name =
- { .ln_number = ip->i_num.no_addr,
+ { .ln_number = ip->i_no_addr,
.ln_type = LM_TYPE_PLOCK };
if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@
gfs2_glock_dq_uninit(fl_gh);
} else {
error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
- ip->i_num.no_addr, &gfs2_flock_glops,
+ ip->i_no_addr, &gfs2_flock_glops,
CREATE, &gl);
if (error)
goto out;
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/daemon.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/daemon.c
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include "gfs2.h"
#include "incore.h"
@@ -49,6 +50,8 @@
while (!kthread_should_stop()) {
gfs2_scand_internal(sdp);
t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+ if (freezing(current))
+ refrigerator();
schedule_timeout_interruptible(t);
}
@@ -74,6 +77,8 @@
wait_event_interruptible(sdp->sd_reclaim_wq,
(atomic_read(&sdp->sd_reclaim_count) ||
kthread_should_stop()));
+ if (freezing(current))
+ refrigerator();
}
return 0;
@@ -93,6 +98,8 @@
while (!kthread_should_stop()) {
gfs2_check_journals(sdp);
t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
+ if (freezing(current))
+ refrigerator();
schedule_timeout_interruptible(t);
}
@@ -141,6 +148,8 @@
}
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+ if (freezing(current))
+ refrigerator();
schedule_timeout_interruptible(t);
}
@@ -191,6 +200,8 @@
gfs2_quota_scan(sdp);
t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+ if (freezing(current))
+ refrigerator();
schedule_timeout_interruptible(t);
}
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_fstype.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_fstype.h
@@ -14,5 +14,6 @@
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
+extern struct export_operations gfs2_export_ops;
#endif /* __OPS_FSTYPE_DOT_H__ */
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/locking.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/locking.c
@@ -181,4 +181,7 @@
EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
+EXPORT_SYMBOL_GPL(gfs2_unmount_lockproto);
+EXPORT_SYMBOL_GPL(gfs2_mount_lockproto);
+EXPORT_SYMBOL_GPL(gfs2_withdraw_lockproto);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/inode.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/inode.c
@@ -38,12 +38,17 @@
#include "trans.h"
#include "util.h"
+struct gfs2_inum_range_host {
+ u64 ir_start;
+ u64 ir_length;
+};
+
static int iget_test(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_inum_host *inum = opaque;
+ u64 *no_addr = opaque;
- if (ip->i_num.no_addr == inum->no_addr &&
+ if (ip->i_no_addr == *no_addr &&
inode->i_private != NULL)
return 1;
@@ -53,37 +58,70 @@
static int iget_set(struct inode *inode, void *opaque)
{
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_inum_host *inum = opaque;
+ u64 *no_addr = opaque;
- ip->i_num = *inum;
- inode->i_ino = inum->no_addr;
+ inode->i_ino = (unsigned long)*no_addr;
+ ip->i_no_addr = *no_addr;
return 0;
}
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+{
+ unsigned long hash = (unsigned long)no_addr;
+ return ilookup5(sb, hash, iget_test, &no_addr);
+}
+
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
{
- return ilookup5(sb, (unsigned long)inum->no_addr,
- iget_test, inum);
+ unsigned long hash = (unsigned long)no_addr;
+ return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
}
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
+/**
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup(). This has caused issues
+ * with NFS code path since its get_dentry routine doesn't have the relevant
+ * directory entry when gfs2_inode_lookup() is invoked. Part of the code
+ * segment inside gfs2_inode_lookup code needs to get moved around.
+ *
+ * Clean up I_LOCK and I_NEW as well.
+ **/
+
+void gfs2_set_iop(struct inode *inode)
{
- return iget5_locked(sb, (unsigned long)inum->no_addr,
- iget_test, iget_set, inum);
+ umode_t mode = inode->i_mode;
+
+ if (S_ISREG(mode)) {
+ inode->i_op = &gfs2_file_iops;
+ inode->i_fop = &gfs2_file_fops;
+ inode->i_mapping->a_ops = &gfs2_file_aops;
+ } else if (S_ISDIR(mode)) {
+ inode->i_op = &gfs2_dir_iops;
+ inode->i_fop = &gfs2_dir_fops;
+ } else if (S_ISLNK(mode)) {
+ inode->i_op = &gfs2_symlink_iops;
+ } else {
+ inode->i_op = &gfs2_dev_iops;
+ }
+
+ unlock_new_inode(inode);
}
/**
* gfs2_inode_lookup - Lookup an inode
* @sb: The super block
- * @inum: The inode number
+ * @no_addr: The inode number
* @type: The type of the inode
*
* Returns: A VFS inode, or an error
*/
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb,
+ unsigned int type,
+ u64 no_addr,
+ u64 no_formal_ino)
{
- struct inode *inode = gfs2_iget(sb, inum);
+ struct inode *inode = gfs2_iget(sb, no_addr);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_glock *io_gl;
int error;
@@ -93,29 +131,15 @@
if (inode->i_state & I_NEW) {
struct gfs2_sbd *sdp = GFS2_SB(inode);
- umode_t mode = DT2IF(type);
inode->i_private = ip;
- inode->i_mode = mode;
+ ip->i_no_formal_ino = no_formal_ino;
- if (S_ISREG(mode)) {
- inode->i_op = &gfs2_file_iops;
- inode->i_fop = &gfs2_file_fops;
- inode->i_mapping->a_ops = &gfs2_file_aops;
- } else if (S_ISDIR(mode)) {
- inode->i_op = &gfs2_dir_iops;
- inode->i_fop = &gfs2_dir_fops;
- } else if (S_ISLNK(mode)) {
- inode->i_op = &gfs2_symlink_iops;
- } else {
- inode->i_op = &gfs2_dev_iops;
- }
-
- error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+ error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (unlikely(error))
goto fail;
ip->i_gl->gl_object = ip;
- error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+ error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
goto fail_put;
@@ -123,12 +147,38 @@
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail_iopen;
+ ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
- unlock_new_inode(inode);
+
+ if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
+ goto gfs2_nfsbypass;
+
+ inode->i_mode = DT2IF(type);
+
+ /*
+ * We must read the inode in order to work out its type in
+ * this case. Note that this doesn't happen often as we normally
+ * know the type beforehand. This code path only occurs during
+ * unlinked inode recovery (where it is safe to do this glock,
+ * which is not true in the general case).
+ */
+ if (type == DT_UNKNOWN) {
+ struct gfs2_holder gh;
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ if (unlikely(error))
+ goto fail_glock;
+ /* Inode is now uptodate */
+ gfs2_glock_dq_uninit(&gh);
+ }
+
+ gfs2_set_iop(inode);
}
+gfs2_nfsbypass:
return inode;
+fail_glock:
+ gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
gfs2_glock_put(io_gl);
fail_put:
@@ -144,14 +194,12 @@
struct gfs2_dinode_host *di = &ip->i_di;
const struct gfs2_dinode *str = buf;
- if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+ if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
if (gfs2_consist_inode(ip))
gfs2_dinode_print(ip);
return -EIO;
}
- if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
- return -ESTALE;
-
+ ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
ip->i_inode.i_rdev = 0;
switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@
di->di_blocks = be64_to_cpu(str->di_blocks);
gfs2_set_inode_blocks(&ip->i_inode);
ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
- ip->i_inode.i_atime.tv_nsec = 0;
+ ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
- ip->i_inode.i_mtime.tv_nsec = 0;
+ ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
- ip->i_inode.i_ctime.tv_nsec = 0;
+ ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@
if (error)
goto out_qs;
- rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
if (!rgd) {
gfs2_consist_inode(ip);
error = -EIO;
@@ -314,7 +362,7 @@
else
drop_nlink(&ip->i_inode);
- ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@
struct super_block *sb = dir->i_sb;
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_holder d_gh;
- struct gfs2_inum_host inum;
- unsigned int type;
- int error;
+ int error = 0;
struct inode *inode = NULL;
int unlock = 0;
@@ -395,12 +441,9 @@
goto out;
}
- error = gfs2_dir_search(dir, name, &inum, &type);
- if (error)
- goto out;
-
- inode = gfs2_inode_lookup(sb, &inum, type);
-
+ inode = gfs2_dir_search(dir, name);
+ if (IS_ERR(inode))
+ error = PTR_ERR(inode);
out:
if (unlock)
gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@
return inode ? inode : ERR_PTR(error);
}
+static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
+{
+ const struct gfs2_inum_range *str = buf;
+
+ ir->ir_start = be64_to_cpu(str->ir_start);
+ ir->ir_length = be64_to_cpu(str->ir_length);
+}
+
+static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
+{
+ struct gfs2_inum_range *str = buf;
+
+ str->ir_start = cpu_to_be64(ir->ir_start);
+ str->ir_length = cpu_to_be64(ir->ir_length);
+}
+
static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@
if (!dip->i_inode.i_nlink)
return -EPERM;
- error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
+ error = gfs2_dir_check(&dip->i_inode, name, NULL);
switch (error) {
case -ENOENT:
error = 0;
@@ -588,8 +647,7 @@
*gid = current->fsgid;
}
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
- u64 *generation)
+static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
int error;
@@ -605,7 +663,7 @@
if (error)
goto out_ipreserv;
- inum->no_addr = gfs2_alloc_di(dip, generation);
+ *no_addr = gfs2_alloc_di(dip, generation);
gfs2_trans_end(sdp);
@@ -635,6 +693,7 @@
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
struct buffer_head *dibh;
+ struct timespec tv = CURRENT_TIME;
dibh = gfs2_meta_new(gl, inum->no_addr);
gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@
di->di_nlink = 0;
di->di_size = 0;
di->di_blocks = cpu_to_be64(1);
- di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
+ di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
di->di_major = cpu_to_be32(MAJOR(dev));
di->di_minor = cpu_to_be32(MINOR(dev));
di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@
di->di_entries = 0;
memset(&di->__pad4, 0, sizeof(di->__pad4));
di->di_eattr = 0;
+ di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
+ di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
+ di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
brelse(dibh);
@@ -749,7 +811,7 @@
goto fail_quota_locks;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- al->al_rgd->rd_ri.ri_length +
+ al->al_rgd->rd_length +
2 * RES_DINODE +
RES_STATFS + RES_QUOTA, 0);
if (error)
@@ -760,7 +822,7 @@
goto fail_quota_locks;
}
- error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
+ error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
if (error)
goto fail_end_trans;
@@ -840,11 +902,11 @@
struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
unsigned int mode, dev_t dev)
{
- struct inode *inode;
+ struct inode *inode = NULL;
struct gfs2_inode *dip = ghs->gh_gl->gl_object;
struct inode *dir = &dip->i_inode;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct gfs2_inum_host inum;
+ struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
int error;
u64 generation;
@@ -864,7 +926,7 @@
if (error)
goto fail_gunlock;
- error = alloc_dinode(dip, &inum, &generation);
+ error = alloc_dinode(dip, &inum.no_addr, &generation);
if (error)
goto fail_gunlock;
@@ -877,34 +939,36 @@
if (error)
goto fail_gunlock2;
- inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
+ inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
+ inum.no_addr,
+ inum.no_formal_ino);
if (IS_ERR(inode))
goto fail_gunlock2;
error = gfs2_inode_refresh(GFS2_I(inode));
if (error)
- goto fail_iput;
+ goto fail_gunlock2;
error = gfs2_acl_create(dip, GFS2_I(inode));
if (error)
- goto fail_iput;
+ goto fail_gunlock2;
error = gfs2_security_init(dip, GFS2_I(inode));
if (error)
- goto fail_iput;
+ goto fail_gunlock2;
error = link_dinode(dip, name, GFS2_I(inode));
if (error)
- goto fail_iput;
+ goto fail_gunlock2;
if (!inode)
return ERR_PTR(-ENOMEM);
return inode;
-fail_iput:
- iput(inode);
fail_gunlock2:
gfs2_glock_dq_uninit(ghs + 1);
+ if (inode)
+ iput(inode);
fail_gunlock:
gfs2_glock_dq(ghs);
fail:
@@ -976,10 +1040,8 @@
*/
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
- struct gfs2_inode *ip)
+ const struct gfs2_inode *ip)
{
- struct gfs2_inum_host inum;
- unsigned int type;
int error;
if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@
if (error)
return error;
- error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
+ error = gfs2_dir_check(&dip->i_inode, name, ip);
if (error)
return error;
- if (!gfs2_inum_equal(&inum, &ip->i_num))
- return -ENOENT;
-
- if (IF2DT(ip->i_inode.i_mode) != type) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
-
return 0;
}
@@ -1132,10 +1186,11 @@
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = gl->gl_object;
- s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+ s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
unsigned int state;
int flags;
int error;
+ struct timespec tv = CURRENT_TIME;
if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@
(sdp->sd_vfs->s_flags & MS_RDONLY))
return 0;
- curtime = get_seconds();
- if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+ if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
gfs2_glock_dq(gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
gh);
@@ -1165,8 +1219,8 @@
/* Verify that atime hasn't been updated while we were
trying to get exclusive lock. */
- curtime = get_seconds();
- if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+ tv = CURRENT_TIME;
+ if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
struct buffer_head *dibh;
struct gfs2_dinode *di;
@@ -1180,11 +1234,12 @@
if (error)
goto fail_end_trans;
- ip->i_inode.i_atime.tv_sec = curtime;
+ ip->i_inode.i_atime = tv;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
di = (struct gfs2_dinode *)dibh->b_data;
di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+ di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
brelse(dibh);
gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@
return error;
}
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+ const struct gfs2_dinode_host *di = &ip->i_di;
+ struct gfs2_dinode *str = buf;
+
+ str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+ str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+ str->di_header.__pad0 = 0;
+ str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+ str->di_header.__pad1 = 0;
+ str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+ str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+ str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+ str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+ str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+ str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+ str->di_size = cpu_to_be64(di->di_size);
+ str->di_blocks = cpu_to_be64(di->di_blocks);
+ str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+ str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+ str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+
+ str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
+ str->di_goal_data = cpu_to_be64(di->di_goal_data);
+ str->di_generation = cpu_to_be64(di->di_generation);
+
+ str->di_flags = cpu_to_be32(di->di_flags);
+ str->di_height = cpu_to_be16(di->di_height);
+ str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+ !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+ GFS2_FORMAT_DE : 0);
+ str->di_depth = cpu_to_be16(di->di_depth);
+ str->di_entries = cpu_to_be32(di->di_entries);
+
+ str->di_eattr = cpu_to_be64(di->di_eattr);
+ str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+ str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+ str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
+
+void gfs2_dinode_print(const struct gfs2_inode *ip)
+{
+ const struct gfs2_dinode_host *di = &ip->i_di;
+
+ printk(KERN_INFO " no_formal_ino = %llu\n",
+ (unsigned long long)ip->i_no_formal_ino);
+ printk(KERN_INFO " no_addr = %llu\n",
+ (unsigned long long)ip->i_no_addr);
+ printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
+ printk(KERN_INFO " di_blocks = %llu\n",
+ (unsigned long long)di->di_blocks);
+ printk(KERN_INFO " di_goal_meta = %llu\n",
+ (unsigned long long)di->di_goal_meta);
+ printk(KERN_INFO " di_goal_data = %llu\n",
+ (unsigned long long)di->di_goal_data);
+ printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags);
+ printk(KERN_INFO " di_height = %u\n", di->di_height);
+ printk(KERN_INFO " di_depth = %u\n", di->di_depth);
+ printk(KERN_INFO " di_entries = %u\n", di->di_entries);
+ printk(KERN_INFO " di_eattr = %llu\n",
+ (unsigned long long)di->di_eattr);
+}
+
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/quota.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/quota.c
@@ -66,6 +66,18 @@
#define QUOTA_USER 1
#define QUOTA_GROUP 0
+struct gfs2_quota_host {
+ u64 qu_limit;
+ u64 qu_warn;
+ s64 qu_value;
+};
+
+struct gfs2_quota_change_host {
+ u64 qc_change;
+ u32 qc_flags; /* GFS2_QCF_... */
+ u32 qc_id;
+};
+
static u64 qd2offset(struct gfs2_quota_data *qd)
{
u64 offset;
@@ -561,6 +573,25 @@
mutex_unlock(&sdp->sd_quota_mutex);
}
+static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
+{
+ const struct gfs2_quota *str = buf;
+
+ qu->qu_limit = be64_to_cpu(str->qu_limit);
+ qu->qu_warn = be64_to_cpu(str->qu_warn);
+ qu->qu_value = be64_to_cpu(str->qu_value);
+}
+
+static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
+{
+ struct gfs2_quota *str = buf;
+
+ str->qu_limit = cpu_to_be64(qu->qu_limit);
+ str->qu_warn = cpu_to_be64(qu->qu_warn);
+ str->qu_value = cpu_to_be64(qu->qu_value);
+ memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
+}
+
/**
* gfs2_adjust_quota
*
@@ -573,12 +604,13 @@
struct inode *inode = &ip->i_inode;
struct address_space *mapping = inode->i_mapping;
unsigned long index = loc >> PAGE_CACHE_SHIFT;
- unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+ unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, pos;
struct buffer_head *bh;
struct page *page;
void *kaddr;
- __be64 *ptr;
+ char *ptr;
+ struct gfs2_quota_host qp;
s64 value;
int err = -EIO;
@@ -620,13 +652,17 @@
kaddr = kmap_atomic(page, KM_USER0);
ptr = kaddr + offset;
- value = (s64)be64_to_cpu(*ptr) + change;
- *ptr = cpu_to_be64(value);
+ gfs2_quota_in(&qp, ptr);
+ qp.qu_value += change;
+ value = qp.qu_value;
+ gfs2_quota_out(&qp, ptr);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
err = 0;
qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
qd->qd_qb.qb_value = cpu_to_be64(value);
+ ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
+ ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
unlock:
unlock_page(page);
page_cache_release(page);
@@ -689,7 +725,7 @@
goto out_alloc;
error = gfs2_trans_begin(sdp,
- al->al_rgd->rd_ri.ri_length +
+ al->al_rgd->rd_length +
num_qd * data_blocks +
nalloc * ind_blocks +
RES_DINODE + num_qd +
@@ -709,7 +745,7 @@
offset = qd2offset(qd);
error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
(struct gfs2_quota_data *)
- qd->qd_gl->gl_lvb);
+ qd);
if (error)
goto out_end_trans;
@@ -1050,6 +1086,15 @@
return error;
}
+static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
+{
+ const struct gfs2_quota_change *str = buf;
+
+ qc->qc_change = be64_to_cpu(str->qc_change);
+ qc->qc_flags = be32_to_cpu(str->qc_flags);
+ qc->qc_id = be32_to_cpu(str->qc_id);
+}
+
int gfs2_quota_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/Makefile
+++ linux-source-2.6.22-2.6.22/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
- mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+ mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
recovery.o rgrp.o super.o sys.o trans.o util.o
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_dentry.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_dentry.c
@@ -21,6 +21,7 @@
#include "glock.h"
#include "ops_dentry.h"
#include "util.h"
+#include "inode.h"
/**
* gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@
struct gfs2_inode *dip = GFS2_I(parent->d_inode);
struct inode *inode = dentry->d_inode;
struct gfs2_holder d_gh;
- struct gfs2_inode *ip;
- struct gfs2_inum_host inum;
- unsigned int type;
+ struct gfs2_inode *ip = NULL;
int error;
int had_lock=0;
- if (inode && is_bad_inode(inode))
- goto invalid;
+ if (inode) {
+ if (is_bad_inode(inode))
+ goto invalid;
+ ip = GFS2_I(inode);
+ }
if (sdp->sd_args.ar_localcaching)
goto valid;
@@ -59,7 +61,7 @@
goto fail;
}
- error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
+ error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
switch (error) {
case 0:
if (!inode)
@@ -73,16 +75,6 @@
goto fail_gunlock;
}
- ip = GFS2_I(inode);
-
- if (!gfs2_inum_equal(&ip->i_num, &inum))
- goto invalid_gunlock;
-
- if (IF2DT(ip->i_inode.i_mode) != type) {
- gfs2_consist_inode(dip);
- goto fail_gunlock;
- }
-
valid_gunlock:
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/rgrp.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/rgrp.h
@@ -65,5 +65,6 @@
void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
int flags);
void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
#endif /* __RGRP_DOT_H__ */
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/glock.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/glock.c
@@ -422,11 +422,11 @@
static void gfs2_holder_wake(struct gfs2_holder *gh)
{
clear_bit(HIF_WAIT, &gh->gh_iflags);
- smp_mb();
+ smp_mb__after_clear_bit();
wake_up_bit(&gh->gh_iflags, HIF_WAIT);
}
-static int holder_wait(void *word)
+static int just_schedule(void *word)
{
schedule();
return 0;
@@ -435,7 +435,20 @@
static void wait_on_holder(struct gfs2_holder *gh)
{
might_sleep();
- wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+ clear_bit(GLF_DEMOTE, &gl->gl_flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+ might_sleep();
+ wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
}
/**
@@ -528,7 +541,7 @@
if (gl->gl_state == gl->gl_demote_state ||
gl->gl_state == LM_ST_UNLOCKED) {
- clear_bit(GLF_DEMOTE, &gl->gl_flags);
+ gfs2_demote_wake(gl);
return 0;
}
set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@
* practise: LM_ST_SHARED and LM_ST_UNLOCKED
*/
-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
{
spin_lock(&gl->gl_spin);
if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
gl->gl_demote_state = state;
gl->gl_demote_time = jiffies;
+ if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+ gl->gl_object) {
+ struct inode *inode = igrab(gl->gl_object);
+ spin_unlock(&gl->gl_spin);
+ if (inode) {
+ d_prune_aliases(inode);
+ iput(inode);
+ }
+ return;
+ }
} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
gl->gl_demote_state = state;
}
@@ -740,7 +763,7 @@
if (ret & LM_OUT_CANCELED)
op_done = 0;
else
- clear_bit(GLF_DEMOTE, &gl->gl_flags);
+ gfs2_demote_wake(gl);
} else {
spin_lock(&gl->gl_spin);
list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@
gfs2_assert_warn(sdp, !ret);
state_change(gl, LM_ST_UNLOCKED);
- clear_bit(GLF_DEMOTE, &gl->gl_flags);
+ gfs2_demote_wake(gl);
if (glops->go_inval)
glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@
const struct gfs2_glock_operations *glops = gl->gl_ops;
if (gh->gh_flags & GL_NOCACHE)
- handle_callback(gl, LM_ST_UNLOCKED);
+ handle_callback(gl, LM_ST_UNLOCKED, 0);
gfs2_glmutex_lock(gl);
@@ -1196,6 +1219,13 @@
spin_unlock(&gl->gl_spin);
}
+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+ struct gfs2_glock *gl = gh->gh_gl;
+ gfs2_glock_dq(gh);
+ wait_on_demote(gl);
+}
+
/**
* gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
* @gh: the holder structure
@@ -1297,10 +1327,6 @@
* @num_gh: the number of structures
* @ghs: an array of struct gfs2_holder structures
*
- * Figure out how big an impact this function has. Either:
- * 1) Replace this code with code that calls gfs2_glock_prefetch()
- * 2) Forget async stuff and just call nq_m_sync()
- * 3) Leave it like it is
*
* Returns: 0 on success (all glocks acquired),
* errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@
int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
- int *e;
- unsigned int x;
- int borked = 0, serious = 0;
+ struct gfs2_holder *tmp[4];
+ struct gfs2_holder **pph = tmp;
int error = 0;
- if (!num_gh)
+ switch(num_gh) {
+ case 0:
return 0;
-
- if (num_gh == 1) {
+ case 1:
ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
return gfs2_glock_nq(ghs);
- }
-
- e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
- if (!e)
- return -ENOMEM;
-
- for (x = 0; x < num_gh; x++) {
- ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
- error = gfs2_glock_nq(&ghs[x]);
- if (error) {
- borked = 1;
- serious = error;
- num_gh = x;
+ default:
+ if (num_gh <= 4)
break;
- }
+ pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
+ if (!pph)
+ return -ENOMEM;
}
- for (x = 0; x < num_gh; x++) {
- error = e[x] = glock_wait_internal(&ghs[x]);
- if (error) {
- borked = 1;
- if (error != GLR_TRYFAILED && error != GLR_CANCELED)
- serious = error;
- }
- }
-
- if (!borked) {
- kfree(e);
- return 0;
- }
-
- for (x = 0; x < num_gh; x++)
- if (!e[x])
- gfs2_glock_dq(&ghs[x]);
-
- if (serious)
- error = serious;
- else {
- for (x = 0; x < num_gh; x++)
- gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
- &ghs[x]);
- error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
- }
+ error = nq_m_sync(num_gh, ghs, pph);
- kfree(e);
+ if (pph != tmp)
+ kfree(pph);
return error;
}
@@ -1456,7 +1448,7 @@
if (!gl)
return;
- handle_callback(gl, state);
+ handle_callback(gl, state, 1);
spin_lock(&gl->gl_spin);
run_queue(gl);
@@ -1596,7 +1588,7 @@
if (gfs2_glmutex_trylock(gl)) {
if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
- handle_callback(gl, LM_ST_UNLOCKED);
+ handle_callback(gl, LM_ST_UNLOCKED, 0);
gfs2_glmutex_unlock(gl);
}
@@ -1709,7 +1701,7 @@
if (gfs2_glmutex_trylock(gl)) {
if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED)
- handle_callback(gl, LM_ST_UNLOCKED);
+ handle_callback(gl, LM_ST_UNLOCKED, 0);
gfs2_glmutex_unlock(gl);
}
}
@@ -1823,7 +1815,8 @@
print_dbg(gi, " Inode:\n");
print_dbg(gi, " num = %llu/%llu\n",
- ip->i_num.no_formal_ino, ip->i_num.no_addr);
+ (unsigned long long)ip->i_no_formal_ino,
+ (unsigned long long)ip->i_no_addr);
print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode));
print_dbg(gi, " i_flags =");
for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@
}
if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n",
- gl->gl_demote_state,
- (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
+ gl->gl_demote_state, (unsigned long long)
+ (jiffies - gl->gl_demote_time)*(1000000/HZ));
}
if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/super.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/super.h
@@ -16,7 +16,7 @@
int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
{
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_vm.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_vm.c
@@ -66,7 +66,7 @@
if (error)
goto out_gunlock_q;
- error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
+ error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
ind_blocks + RES_DINODE +
RES_STATFS + RES_QUOTA, 0);
if (error)
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_address.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_address.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/log.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/log.c
@@ -83,6 +83,11 @@
gfs2_assert(sdp, bd->bd_ail == ai);
+ if (!bh){
+ list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+ continue;
+ }
+
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh)) {
gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@
bd_ail_st_list) {
bh = bd->bd_bh;
+ if (!bh){
+ list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+ continue;
+ }
+
gfs2_assert(sdp, bd->bd_ail == ai);
if (buffer_busy(bh)) {
@@ -227,7 +237,10 @@
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
- brelse(bd->bd_bh);
+ if (bd->bd_bh)
+ brelse(bd->bd_bh);
+ else
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
}
}
@@ -262,8 +275,8 @@
* @sdp: The GFS2 superblock
* @blks: The number of blocks to reserve
*
- * Note that we never give out the last 6 blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * Note that we never give out the last few blocks of the journal. Thats
+ * due to the fact that there is a small number of header blocks
* associated with each log flush. The exact number can't be known until
* flush time, so we ensure that we have just enough free blocks at all
* times to avoid running out during a log flush.
@@ -274,6 +287,7 @@
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
{
unsigned int try = 0;
+ unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
if (gfs2_assert_warn(sdp, blks) ||
gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +295,7 @@
mutex_lock(&sdp->sd_log_reserve_mutex);
gfs2_log_lock(sdp);
- while(sdp->sd_log_blks_free <= (blks + 6)) {
+ while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
gfs2_log_unlock(sdp);
gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL);
@@ -357,6 +371,58 @@
return dist;
}
+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ * refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex. We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
+ * all our journaled data buffers for journaled files (e.g. files in the
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal. So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+ unsigned int reserved = 0;
+ unsigned int mbuf_limit, metabufhdrs_needed;
+ unsigned int dbuf_limit, databufhdrs_needed;
+ unsigned int revokes = 0;
+
+ mbuf_limit = buf_limit(sdp);
+ metabufhdrs_needed = (sdp->sd_log_commited_buf +
+ (mbuf_limit - 1)) / mbuf_limit;
+ dbuf_limit = databuf_limit(sdp);
+ databufhdrs_needed = (sdp->sd_log_commited_databuf +
+ (dbuf_limit - 1)) / dbuf_limit;
+
+ if (sdp->sd_log_commited_revoke)
+ revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+ sizeof(u64));
+
+ reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+ sdp->sd_log_commited_databuf + databufhdrs_needed +
+ revokes;
+ /* One for the overall header */
+ if (reserved)
+ reserved++;
+ return reserved;
+}
+
static unsigned int current_tail(struct gfs2_sbd *sdp)
{
struct gfs2_ail *ai;
@@ -447,14 +513,14 @@
return bh;
}
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
{
unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
ail2_empty(sdp, new_tail);
gfs2_log_lock(sdp);
- sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+ sdp->sd_log_blks_free += dist;
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -504,7 +570,7 @@
brelse(bh);
if (sdp->sd_log_tail != tail)
- log_pull_tail(sdp, tail, pull);
+ log_pull_tail(sdp, tail);
else
gfs2_assert_withdraw(sdp, !pull);
@@ -565,7 +631,10 @@
INIT_LIST_HEAD(&ai->ai_ail1_list);
INIT_LIST_HEAD(&ai->ai_ail2_list);
- gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+ gfs2_assert_withdraw(sdp,
+ sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+ sdp->sd_log_commited_buf +
+ sdp->sd_log_commited_databuf);
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
@@ -576,16 +645,19 @@
lops_before_commit(sdp);
if (!list_empty(&sdp->sd_log_flush_list))
log_flush_commit(sdp);
- else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+ else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+ gfs2_log_lock(sdp);
+ sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+ gfs2_log_unlock(sdp);
log_write_header(sdp, 0, PULL);
+ }
lops_after_commit(sdp, ai);
gfs2_log_lock(sdp);
sdp->sd_log_head = sdp->sd_log_flush_head;
- sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
sdp->sd_log_blks_reserved = 0;
sdp->sd_log_commited_buf = 0;
- sdp->sd_log_num_hdrs = 0;
+ sdp->sd_log_commited_databuf = 0;
sdp->sd_log_commited_revoke = 0;
if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +674,26 @@
static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
- unsigned int reserved = 0;
+ unsigned int reserved;
unsigned int old;
gfs2_log_lock(sdp);
sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
- gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+ sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+ tr->tr_num_databuf_rm;
+ gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+ (((int)sdp->sd_log_commited_databuf) >= 0));
sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
-
- if (sdp->sd_log_commited_buf)
- reserved += sdp->sd_log_commited_buf;
- if (sdp->sd_log_commited_revoke)
- reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
- sizeof(u64));
- if (reserved)
- reserved++;
-
+ reserved = calc_reserved(sdp);
old = sdp->sd_log_blks_free;
sdp->sd_log_blks_free += tr->tr_reserved -
(reserved - sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
- gfs2_assert_withdraw(sdp,
- sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
- sdp->sd_log_num_hdrs);
+ gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
+ sdp->sd_jdesc->jd_blocks);
sdp->sd_log_blks_reserved = reserved;
@@ -673,13 +739,13 @@
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
- gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
- log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+ log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+ (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_fstype.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_fstype.c
@@ -27,7 +27,6 @@
#include "inode.h"
#include "lm.h"
#include "mount.h"
-#include "ops_export.h"
#include "ops_fstype.h"
#include "ops_super.h"
#include "recovery.h"
@@ -105,6 +104,7 @@
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_export_op = &gfs2_export_ops;
+ sb->s_time_gran = 1;
sb->s_maxbytes = MAX_LFS_FILESIZE;
if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@
static int init_names(struct gfs2_sbd *sdp, int silent)
{
- struct page *page;
char *proto, *table;
int error = 0;
@@ -126,14 +125,9 @@
/* Try to autodetect */
if (!proto[0] || !table[0]) {
- struct gfs2_sb *sb;
- page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
- if (!page)
- return -ENOBUFS;
- sb = kmap(page);
- gfs2_sb_in(&sdp->sd_sb, sb);
- kunmap(page);
- __free_page(page);
+ error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+ if (error)
+ return error;
error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
if (error)
@@ -151,6 +145,9 @@
snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
+ while ((table = strchr(sdp->sd_table_name, '/')))
+ *table = '_';
+
out:
return error;
}
@@ -236,17 +233,17 @@
return error;
}
-static struct inode *gfs2_lookup_root(struct super_block *sb,
- struct gfs2_inum_host *inum)
+static inline struct inode *gfs2_lookup_root(struct super_block *sb,
+ u64 no_addr)
{
- return gfs2_inode_lookup(sb, inum, DT_DIR);
+ return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
}
static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
{
struct super_block *sb = sdp->sd_vfs;
struct gfs2_holder sb_gh;
- struct gfs2_inum_host *inum;
+ u64 no_addr;
struct inode *inode;
int error = 0;
@@ -289,10 +286,10 @@
sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
/* Get the root inode */
- inum = &sdp->sd_sb.sb_root_dir;
+ no_addr = sdp->sd_sb.sb_root_dir.no_addr;
if (sb->s_type == &gfs2meta_fs_type)
- inum = &sdp->sd_sb.sb_master_dir;
- inode = gfs2_lookup_root(sb, inum);
+ no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+ inode = gfs2_lookup_root(sb, no_addr);
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@
if (undo)
goto fail_qinode;
- inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
+ inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
fs_err(sdp, "can't read in master directory: %d\n", error);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/locking/dlm/thread.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/locking/dlm/thread.c
@@ -44,6 +44,13 @@
ls->fscb(ls->sdp, cb, &lp->lockname);
}
+static void wake_up_ast(struct gdlm_lock *lp)
+{
+ clear_bit(LFL_AST_WAIT, &lp->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&lp->flags, LFL_AST_WAIT);
+}
+
static void process_complete(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@
*/
if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
- complete(&lp->ast_wait);
+ wake_up_ast(lp);
return;
}
@@ -214,7 +221,7 @@
if (test_bit(LFL_INLOCK, &lp->flags)) {
clear_bit(LFL_NOBLOCK, &lp->flags);
lp->cur = lp->req;
- complete(&lp->ast_wait);
+ wake_up_ast(lp);
return;
}
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/locking/dlm/mount.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/locking/dlm/mount.c
@@ -147,7 +147,7 @@
error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
&ls->dlm_lockspace,
- nodir ? DLM_LSFL_NODIR : 0,
+ DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
GDLM_LVB_SIZE);
if (error) {
log_error("dlm_new_lockspace error %d", error);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/locking/dlm/lock.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/locking/dlm/lock.c
@@ -174,7 +174,6 @@
lp->cur = DLM_LOCK_IV;
lp->lvb = NULL;
lp->hold_null = NULL;
- init_completion(&lp->ast_wait);
INIT_LIST_HEAD(&lp->clist);
INIT_LIST_HEAD(&lp->blist);
INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@
lp->lksb.sb_lvbptr = NULL;
}
+static int gdlm_ast_wait(void *word)
+{
+ schedule();
+ return 0;
+}
+
/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
the completion) because gfs won't call hold_lvb() during a callback (from
the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@
lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
set_bit(LFL_NOBAST, &lpn->flags);
set_bit(LFL_INLOCK, &lpn->flags);
+ set_bit(LFL_AST_WAIT, &lpn->flags);
- init_completion(&lpn->ast_wait);
gdlm_do_lock(lpn);
- wait_for_completion(&lpn->ast_wait);
+ wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
error = lpn->lksb.sb_status;
if (error) {
printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/locking/dlm/plock.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/locking/dlm/plock.c
@@ -242,7 +242,7 @@
op->info.number = name->ln_number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
-
+ op->info.owner = (__u64)(long) fl->fl_owner;
send_op(op);
wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@
}
spin_unlock(&ops_lock);
+ /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+ -ENOENT if there are no locks on the file */
+
rv = op->info.rv;
fl->fl_type = F_UNLCK;
if (rv == -ENOENT)
rv = 0;
- else if (rv == 0 && op->info.pid != fl->fl_pid) {
+ else if (rv > 0) {
fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
fl->fl_pid = op->info.pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
+ rv = 0;
}
kfree(op);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/locking/dlm/lock_dlm.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/locking/dlm/lock_dlm.h
@@ -101,6 +101,7 @@
LFL_NOBAST = 10,
LFL_HEADQUE = 11,
LFL_UNLOCK_DELETE = 12,
+ LFL_AST_WAIT = 13,
};
struct gdlm_lock {
@@ -117,7 +118,6 @@
unsigned long flags; /* lock_dlm flags LFL_ */
int bast_mode; /* protected by async_lock */
- struct completion ast_wait;
struct list_head clist; /* complete */
struct list_head blist; /* blocking */
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/meta_io.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/meta_io.h
@@ -63,7 +63,7 @@
static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
struct buffer_head **bhp)
{
- return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
+ return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
}
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_super.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_super.c
@@ -326,8 +326,10 @@
gfs2_glock_schedule_for_reclaim(ip->i_gl);
gfs2_glock_put(ip->i_gl);
ip->i_gl = NULL;
- if (ip->i_iopen_gh.gh_gl)
+ if (ip->i_iopen_gh.gh_gl) {
+ ip->i_iopen_gh.gh_gl->gl_object = NULL;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ }
}
}
@@ -422,13 +424,13 @@
if (!inode->i_private)
goto out;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
if (unlikely(error)) {
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
}
- gfs2_glock_dq(&ip->i_iopen_gh);
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
error = gfs2_glock_nq(&ip->i_iopen_gh);
if (error)
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/mount.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/mount.c
@@ -82,20 +82,19 @@
char *options, *o, *v;
int error = 0;
- if (!remount) {
- /* If someone preloaded options, use those instead */
- spin_lock(&gfs2_sys_margs_lock);
- if (gfs2_sys_margs) {
- data = gfs2_sys_margs;
- gfs2_sys_margs = NULL;
- }
- spin_unlock(&gfs2_sys_margs_lock);
-
- /* Set some defaults */
- args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
- args->ar_quota = GFS2_QUOTA_DEFAULT;
- args->ar_data = GFS2_DATA_DEFAULT;
+ /* If someone preloaded options, use those instead */
+ spin_lock(&gfs2_sys_margs_lock);
+ if (!remount && gfs2_sys_margs) {
+ data = gfs2_sys_margs;
+ gfs2_sys_margs = NULL;
}
+ spin_unlock(&gfs2_sys_margs_lock);
+
+ /* Set some defaults */
+ memset(args, 0, sizeof(struct gfs2_args));
+ args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+ args->ar_quota = GFS2_QUOTA_DEFAULT;
+ args->ar_data = GFS2_DATA_DEFAULT;
/* Split the options into tokens with the "," character and
process them */
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/glock.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/glock.h
@@ -87,6 +87,7 @@
int gfs2_glock_poll(struct gfs2_holder *gh);
int gfs2_glock_wait(struct gfs2_holder *gh);
void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_address.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
#include "trans.h"
#include "rgrp.h"
#include "ops_file.h"
+#include "super.h"
#include "util.h"
#include "glops.h"
@@ -49,6 +50,8 @@
end = start + bsize;
if (end <= from || start >= to)
continue;
+ if (gfs2_is_jdata(ip))
+ set_buffer_uptodate(bh);
gfs2_trans_add_bh(ip->i_gl, bh, 0);
}
}
@@ -134,7 +137,9 @@
return 0; /* don't care */
}
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+ if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+ PageChecked(page)) {
+ ClearPageChecked(page);
error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
if (error)
goto out_ignore;
@@ -203,11 +208,7 @@
* so we need to supply one here. It doesn't happen often.
*/
if (unlikely(page->index)) {
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(page);
- SetPageUptodate(page);
+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
return 0;
}
@@ -450,6 +451,31 @@
}
/**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+ struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+ struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+ u64 fs_total, new_free;
+
+ /* Total up the file system space, according to the latest rindex. */
+ fs_total = gfs2_ri_total(sdp);
+
+ spin_lock(&sdp->sd_statfs_spin);
+ if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+ new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+ else
+ new_free = 0;
+ spin_unlock(&sdp->sd_statfs_spin);
+ fs_warn(sdp, "File system extended by %llu blocks.\n",
+ (unsigned long long)new_free);
+ gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+
+/**
* gfs2_commit_write - Commit write to a file
* @file: The file to write to
* @page: The page containing the data
@@ -511,6 +537,9 @@
di->di_size = cpu_to_be64(inode->i_size);
}
+ if (inode == sdp->sd_rindex)
+ adjust_fs_space(inode);
+
brelse(dibh);
gfs2_trans_end(sdp);
if (al->al_requested) {
@@ -543,6 +572,23 @@
}
/**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+
+static int gfs2_set_page_dirty(struct page *page)
+{
+ struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+ struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+ SetPageChecked(page);
+ return __set_page_dirty_buffers(page);
+}
+
+/**
* gfs2_bmap - Block map function
* @mapping: Address space info
* @lblock: The block to map
@@ -578,6 +624,8 @@
if (bd) {
bd->bd_bh = NULL;
bh->b_private = NULL;
+ if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
}
gfs2_log_unlock(sdp);
@@ -598,6 +646,8 @@
unsigned int curr_off = 0;
BUG_ON(!PageLocked(page));
+ if (offset == 0)
+ ClearPageChecked(page);
if (!page_has_buffers(page))
return;
@@ -728,8 +778,8 @@
return;
fs_warn(sdp, "ip = %llu %llu\n",
- (unsigned long long)ip->i_num.no_formal_ino,
- (unsigned long long)ip->i_num.no_addr);
+ (unsigned long long)ip->i_no_formal_ino,
+ (unsigned long long)ip->i_no_addr);
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@
.sync_page = block_sync_page,
.prepare_write = gfs2_prepare_write,
.commit_write = gfs2_commit_write,
+ .set_page_dirty = gfs2_set_page_dirty,
.bmap = gfs2_bmap,
.invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage,
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/dir.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/dir.h
@@ -16,15 +16,16 @@
struct gfs2_inode;
struct gfs2_inum;
-int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
- struct gfs2_inum_host *inum, unsigned int *type);
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+ const struct gfs2_inode *ip);
int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
- const struct gfs2_inum_host *inum, unsigned int type);
+ const struct gfs2_inode *ip, unsigned int type);
int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir);
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
- struct gfs2_inum_host *new_inum, unsigned int new_type);
+ const struct gfs2_inode *nip, unsigned int new_type);
int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/lops.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/lops.h
@@ -13,6 +13,13 @@
#include
#include "incore.h"
+#define BUF_OFFSET \
+ ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
+ ~(sizeof(__be64) - 1))
+#define DATABUF_OFFSET \
+ ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
+ ~(2 * sizeof(__be64) - 1))
+
extern const struct gfs2_log_operations gfs2_glock_lops;
extern const struct gfs2_log_operations gfs2_buf_lops;
extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@
extern const struct gfs2_log_operations *gfs2_log_ops[];
+static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
+{
+ unsigned int limit;
+
+ limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
+ return limit;
+}
+
+static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
+{
+ unsigned int limit;
+
+ limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
+ return limit;
+}
+
static inline void lops_init_le(struct gfs2_log_element *le,
const struct gfs2_log_operations *lops)
{
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/super.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/super.c
@@ -95,8 +95,8 @@
{
unsigned int x;
- if (sb->sb_header.mh_magic != GFS2_MAGIC ||
- sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+ if (sb->sb_magic != GFS2_MAGIC ||
+ sb->sb_type != GFS2_METATYPE_SB) {
if (!silent)
printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
return -EINVAL;
@@ -174,10 +174,31 @@
return 0;
}
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+ const struct gfs2_sb *str = buf;
+
+ sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+ sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+ sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+ sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+ sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+ sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+ sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+ sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+ sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+ sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+ sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+ memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+ memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
/**
* gfs2_read_super - Read the gfs2 super block from disk
- * @sb: The VFS super block
+ * @sdp: The GFS2 super block
* @sector: The location of the super block
+ * @error: The error code to return
*
* This uses the bio functions to read the super block from disk
* because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@
* the master directory (contains pointers to journals etc) and the
* root directory.
*
- * Returns: A page containing the sb or NULL
+ * Returns: 0 on success or error
*/
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
{
+ struct super_block *sb = sdp->sd_vfs;
+ struct gfs2_sb *p;
struct page *page;
struct bio *bio;
page = alloc_page(GFP_KERNEL);
if (unlikely(!page))
- return NULL;
+ return -ENOBUFS;
ClearPageUptodate(page);
ClearPageDirty(page);
@@ -208,7 +231,7 @@
bio = bio_alloc(GFP_KERNEL, 1);
if (unlikely(!bio)) {
__free_page(page);
- return NULL;
+ return -ENOBUFS;
}
bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@
bio_put(bio);
if (!PageUptodate(page)) {
__free_page(page);
- return NULL;
+ return -EIO;
}
- return page;
+ p = kmap(page);
+ gfs2_sb_in(&sdp->sd_sb, p);
+ kunmap(page);
+ __free_page(page);
+ return 0;
}
/**
@@ -241,19 +268,13 @@
u32 tmp_blocks;
unsigned int x;
int error;
- struct page *page;
- char *sb;
- page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
- if (!page) {
+ error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+ if (error) {
if (!silent)
fs_err(sdp, "can't read superblock\n");
- return -EIO;
+ return error;
}
- sb = kmap(page);
- gfs2_sb_in(&sdp->sd_sb, sb);
- kunmap(page);
- __free_page(page);
error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
if (error)
@@ -360,7 +381,7 @@
name.len = sprintf(buf, "journal%u", sdp->sd_journals);
name.hash = gfs2_disk_hash(name.name, name.len);
- error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+ error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
if (error == -ENOENT) {
error = 0;
break;
@@ -593,6 +614,24 @@
return error;
}
+static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
+{
+ const struct gfs2_statfs_change *str = buf;
+
+ sc->sc_total = be64_to_cpu(str->sc_total);
+ sc->sc_free = be64_to_cpu(str->sc_free);
+ sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
+}
+
+static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+{
+ struct gfs2_statfs_change *str = buf;
+
+ str->sc_total = cpu_to_be64(sc->sc_total);
+ str->sc_free = cpu_to_be64(sc->sc_free);
+ str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
+}
+
int gfs2_statfs_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@
struct gfs2_statfs_change_host *sc)
{
gfs2_rgrp_verify(rgd);
- sc->sc_total += rgd->rd_ri.ri_data;
+ sc->sc_total += rgd->rd_data;
sc->sc_free += rgd->rd_rg.rg_free;
sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
return 0;
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/glops.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/glops.c
@@ -156,9 +156,9 @@
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
if (ip)
filemap_fdatawrite(ip->i_inode.i_mapping);
+ gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/eattr.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/eattr.c
@@ -254,7 +254,7 @@
if (error)
return error;
- error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
+ error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
RES_EATTR + RES_STATFS + RES_QUOTA, blks);
if (error)
goto out_gunlock;
@@ -300,7 +300,7 @@
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -700,7 +700,7 @@
goto out_gunlock_q;
error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
- blks + al->al_rgd->rd_ri.ri_length +
+ blks + al->al_rgd->rd_length +
RES_DINODE + RES_STATFS + RES_QUOTA, 0);
if (error)
goto out_ipres;
@@ -717,7 +717,7 @@
(er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
- ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -852,7 +852,7 @@
(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
- ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1133,7 +1133,7 @@
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1352,7 +1352,7 @@
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd;
rgd = rlist.rl_ghs[x].gh_gl->gl_object;
- rg_blocks += rgd->rd_ri.ri_length;
+ rg_blocks += rgd->rd_length;
}
error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/bmap.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/bmap.c
@@ -718,7 +718,7 @@
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd;
rgd = rlist.rl_ghs[x].gh_gl->gl_object;
- rg_blocks += rgd->rd_ri.ri_length;
+ rg_blocks += rgd->rd_length;
}
error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@
gfs2_free_data(ip, bstart, blen);
}
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, dibh->b_data);
@@ -824,7 +824,7 @@
goto out_gunlock_q;
error = gfs2_trans_begin(sdp,
- sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+ sdp->sd_max_height + al->al_rgd->rd_length +
RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
if (error)
goto out_ipres;
@@ -847,7 +847,7 @@
}
ip->i_di.di_size = size;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
@@ -885,7 +885,6 @@
unsigned blocksize, iblock, length, pos;
struct buffer_head *bh;
struct page *page;
- void *kaddr;
int err;
page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
goto unlock;
+ err = 0;
}
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
gfs2_trans_add_bh(ip->i_gl, bh, 0);
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, length);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, length, KM_USER0);
unlock:
unlock_page(page);
@@ -962,7 +959,7 @@
if (gfs2_is_stuffed(ip)) {
ip->i_di.di_size = size;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@
if (!error) {
ip->i_di.di_size = size;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@
ip->i_di.di_height = 0;
ip->i_di.di_goal_meta =
ip->i_di.di_goal_data =
- ip->i_num.no_addr;
+ ip->i_no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
}
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/recovery.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/recovery.c
@@ -116,6 +116,22 @@
}
}
+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+ const struct gfs2_log_header *str = buf;
+
+ if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+ str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+ return 1;
+
+ lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+ lh->lh_flags = be32_to_cpu(str->lh_flags);
+ lh->lh_tail = be32_to_cpu(str->lh_tail);
+ lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+ lh->lh_hash = be32_to_cpu(str->lh_hash);
+ return 0;
+}
+
/**
* get_log_header - read the log header for a given segment
* @jd: the journal
@@ -147,12 +163,10 @@
sizeof(u32));
hash = crc32_le(hash, (unsigned char const *)¬hing, sizeof(nothing));
hash ^= (u32)~0;
- gfs2_log_header_in(&lh, bh->b_data);
+ error = gfs2_log_header_in(&lh, bh->b_data);
brelse(bh);
- if (lh.lh_header.mh_magic != GFS2_MAGIC ||
- lh.lh_header.mh_type != GFS2_METATYPE_LH ||
- lh.lh_blkno != blk || lh.lh_hash != hash)
+ if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
return 1;
*head = lh;
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_inode.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_inode.c
@@ -157,7 +157,7 @@
if (error)
goto out_gunlock;
- error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
+ error = gfs2_dir_check(dir, &dentry->d_name, NULL);
switch (error) {
case -ENOENT:
break;
@@ -206,7 +206,7 @@
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- al->al_rgd->rd_ri.ri_length +
+ al->al_rgd->rd_length +
2 * RES_DINODE + RES_STATFS +
RES_QUOTA, 0);
if (error)
@@ -217,8 +217,7 @@
goto out_ipres;
}
- error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
- IF2DT(inode->i_mode));
+ error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
if (error)
goto out_end_trans;
@@ -275,7 +274,7 @@
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
@@ -420,7 +419,7 @@
dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
- gfs2_inum_out(&dip->i_num, &dent->de_inum);
+ gfs2_inum_out(dip, dent);
dent->de_type = cpu_to_be16(DT_DIR);
gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@
* this is the case of the target file already existing
* so we unlink before doing the rename
*/
- nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+ nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
if (nrgd)
gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
@@ -653,7 +652,7 @@
if (error)
goto out_gunlock;
- error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
+ error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
switch (error) {
case -ENOENT:
error = 0;
@@ -712,7 +711,7 @@
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- al->al_rgd->rd_ri.ri_length +
+ al->al_rgd->rd_length +
4 * RES_DINODE + 4 * RES_LEAF +
RES_STATFS + RES_QUOTA + 4, 0);
if (error)
@@ -750,7 +749,7 @@
if (error)
goto out_end_trans;
- error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+ error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
if (error)
goto out_end_trans;
} else {
@@ -758,7 +757,7 @@
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out_end_trans;
- ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -768,8 +767,7 @@
if (error)
goto out_end_trans;
- error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
- IF2DT(ip->i_inode.i_mode));
+ error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
if (error)
goto out_end_trans;
@@ -905,8 +903,8 @@
}
error = gfs2_truncatei(ip, attr->ia_size);
- if (error)
- return error;
+ if (error && (inode->i_size != ip->i_di.di_size))
+ i_size_write(inode, ip->i_di.di_size);
return error;
}
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/inode.h
+++ linux-source-2.6.22-2.6.22/fs/gfs2/inode.h
@@ -10,17 +10,17 @@
#ifndef __INODE_DOT_H__
#define __INODE_DOT_H__
-static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
{
return !ip->i_di.di_height;
}
-static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
{
return ip->i_di.di_flags & GFS2_DIF_JDATA;
}
-static inline int gfs2_is_dir(struct gfs2_inode *ip)
+static inline int gfs2_is_dir(const struct gfs2_inode *ip)
{
return S_ISDIR(ip->i_inode.i_mode);
}
@@ -32,9 +32,25 @@
(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
}
+static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
+ u64 no_formal_ino)
+{
+ return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
+}
+
+static inline void gfs2_inum_out(const struct gfs2_inode *ip,
+ struct gfs2_dirent *dent)
+{
+ dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+ dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
+}
+
+
void gfs2_inode_attr_in(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
+void gfs2_set_iop(struct inode *inode);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
+ u64 no_addr, u64 no_formal_ino);
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
int gfs2_inode_refresh(struct gfs2_inode *ip);
@@ -47,12 +63,14 @@
int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip);
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
- struct gfs2_inode *ip);
+ const struct gfs2_inode *ip);
int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
int gfs2_glock_nq_atime(struct gfs2_holder *gh);
int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+void gfs2_dinode_print(const struct gfs2_inode *ip);
#endif /* __INODE_DOT_H__ */
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/dir.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/dir.c
@@ -130,7 +130,7 @@
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_di.di_size < offset + size)
ip->i_di.di_size = offset + size;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -228,7 +228,7 @@
if (ip->i_di.di_size < offset + copied)
ip->i_di.di_size = offset + copied;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@
if (dip->i_di.di_entries != g.offset) {
fs_warn(sdp, "Number of entries corrupt in dir %llu, "
"ip->i_di.di_entries (%u) != g.offset (%u)\n",
- (unsigned long long)dip->i_num.no_addr,
+ (unsigned long long)dip->i_no_addr,
dip->i_di.di_entries,
g.offset);
error = -EIO;
@@ -1488,24 +1488,55 @@
* Returns: errno
*/
-int gfs2_dir_search(struct inode *dir, const struct qstr *name,
- struct gfs2_inum_host *inum, unsigned int *type)
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
{
struct buffer_head *bh;
struct gfs2_dirent *dent;
+ struct inode *inode;
+
+ dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
+ if (dent) {
+ if (IS_ERR(dent))
+ return ERR_PTR(PTR_ERR(dent));
+ inode = gfs2_inode_lookup(dir->i_sb,
+ be16_to_cpu(dent->de_type),
+ be64_to_cpu(dent->de_inum.no_addr),
+ be64_to_cpu(dent->de_inum.no_formal_ino));
+ brelse(bh);
+ return inode;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+int gfs2_dir_check(struct inode *dir, const struct qstr *name,
+ const struct gfs2_inode *ip)
+{
+ struct buffer_head *bh;
+ struct gfs2_dirent *dent;
+ int ret = -ENOENT;
dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
if (dent) {
if (IS_ERR(dent))
return PTR_ERR(dent);
- if (inum)
- gfs2_inum_in(inum, (char *)&dent->de_inum);
- if (type)
- *type = be16_to_cpu(dent->de_type);
+ if (ip) {
+ if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
+ goto out;
+ if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
+ ip->i_no_formal_ino)
+ goto out;
+ if (unlikely(IF2DT(ip->i_inode.i_mode) !=
+ be16_to_cpu(dent->de_type))) {
+ gfs2_consist_inode(GFS2_I(dir));
+ ret = -EIO;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
brelse(bh);
- return 0;
}
- return -ENOENT;
+ return ret;
}
static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@
*/
int gfs2_dir_add(struct inode *inode, const struct qstr *name,
- const struct gfs2_inum_host *inum, unsigned type)
+ const struct gfs2_inode *nip, unsigned type)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
@@ -1580,7 +1611,7 @@
if (IS_ERR(dent))
return PTR_ERR(dent);
dent = gfs2_init_dirent(inode, dent, name, bh);
- gfs2_inum_out(inum, (char *)&dent->de_inum);
+ gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(type);
if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@
break;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_entries++;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
@@ -1678,7 +1709,7 @@
gfs2_consist_inode(dip);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_di.di_entries--;
- dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@
*/
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
- struct gfs2_inum_host *inum, unsigned int new_type)
+ const struct gfs2_inode *nip, unsigned int new_type)
{
struct buffer_head *bh;
struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@
return PTR_ERR(dent);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
- gfs2_inum_out(inum, (char *)&dent->de_inum);
+ gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(new_type);
if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@
gfs2_trans_add_bh(dip->i_gl, bh, 1);
}
- dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+ dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
return 0;
@@ -1867,7 +1898,7 @@
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd;
rgd = rlist.rl_ghs[x].gh_gl->gl_object;
- rg_blocks += rgd->rd_ri.ri_length;
+ rg_blocks += rgd->rd_length;
}
error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/ops_export.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/ops_export.c
@@ -22,10 +22,13 @@
#include "glops.h"
#include "inode.h"
#include "ops_dentry.h"
-#include "ops_export.h"
+#include "ops_fstype.h"
#include "rgrp.h"
#include "util.h"
+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+
static struct dentry *gfs2_decode_fh(struct super_block *sb,
__u32 *p,
int fh_len,
@@ -35,11 +38,8 @@
void *context)
{
__be32 *fh = (__force __be32 *)p;
- struct gfs2_fh_obj fh_obj;
- struct gfs2_inum_host *this, parent;
+ struct gfs2_inum_host inum, parent;
- this = &fh_obj.this;
- fh_obj.imode = DT_UNKNOWN;
memset(&parent, 0, sizeof(struct gfs2_inum));
switch (fh_len) {
@@ -48,18 +48,17 @@
parent.no_formal_ino |= be32_to_cpu(fh[5]);
parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
parent.no_addr |= be32_to_cpu(fh[7]);
- fh_obj.imode = be32_to_cpu(fh[8]);
case GFS2_SMALL_FH_SIZE:
- this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
- this->no_formal_ino |= be32_to_cpu(fh[1]);
- this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
- this->no_addr |= be32_to_cpu(fh[3]);
+ inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+ inum.no_formal_ino |= be32_to_cpu(fh[1]);
+ inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+ inum.no_addr |= be32_to_cpu(fh[3]);
break;
default:
return NULL;
}
- return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
+ return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
acceptable, context);
}
@@ -75,10 +74,10 @@
(connectable && *len < GFS2_LARGE_FH_SIZE))
return 255;
- fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
- fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
- fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
- fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+ fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+ fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+ fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
+ fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
*len = GFS2_SMALL_FH_SIZE;
if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +89,10 @@
igrab(inode);
spin_unlock(&dentry->d_lock);
- fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
- fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
- fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
- fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
-
- fh[8] = cpu_to_be32(inode->i_mode);
- fh[9] = 0; /* pad to double word */
+ fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+ fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+ fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
+ fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
*len = GFS2_LARGE_FH_SIZE;
iput(inode);
@@ -144,7 +140,8 @@
ip = GFS2_I(inode);
*name = 0;
- gnfd.inum = ip->i_num;
+ gnfd.inum.no_addr = ip->i_no_addr;
+ gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
gnfd.name = name;
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +189,7 @@
static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
- struct gfs2_inum_host *inum = &fh_obj->this;
+ struct gfs2_inum_host *inum = inum_obj;
struct gfs2_holder i_gh, ri_gh, rgd_gh;
struct gfs2_rgrpd *rgd;
struct inode *inode;
@@ -202,9 +198,9 @@
/* System files? */
- inode = gfs2_ilookup(sb, inum);
+ inode = gfs2_ilookup(sb, inum->no_addr);
if (inode) {
- if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
+ if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
iput(inode);
return ERR_PTR(-ESTALE);
}
@@ -236,7 +232,9 @@
gfs2_glock_dq_uninit(&rgd_gh);
gfs2_glock_dq_uninit(&ri_gh);
- inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
+ inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+ inum->no_addr,
+ 0);
if (!inode)
goto fail;
if (IS_ERR(inode)) {
@@ -250,6 +248,15 @@
goto fail;
}
+ /* Pick up the works we bypass in gfs2_inode_lookup */
+ if (inode->i_state & I_NEW)
+ gfs2_set_iop(inode);
+
+ if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+ iput(inode);
+ goto fail;
+ }
+
error = -EIO;
if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
iput(inode);
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/util.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/util.c
@@ -115,8 +115,8 @@
"GFS2: fsid=%s: inode = %llu %llu\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
- sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
- (unsigned long long)ip->i_num.no_addr,
+ sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
+ (unsigned long long)ip->i_no_addr,
sdp->sd_fsname, function, file, line);
return rv;
}
@@ -137,7 +137,7 @@
"GFS2: fsid=%s: RG = %llu\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
- sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
+ sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
sdp->sd_fsname, function, file, line);
return rv;
}
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/rgrp.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
#include "ops_file.h"
#include "util.h"
#include "log.h"
+#include "inode.h"
#define BFITNOENT ((u32)~0)
@@ -50,6 +51,9 @@
1, 0, 0, 0
};
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+ unsigned char old_state, unsigned char new_state);
+
/**
* gfs2_setbit - Set a bit in the bitmaps
* @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct gfs2_bitmap *bi = NULL;
- u32 length = rgd->rd_ri.ri_length;
+ u32 length = rgd->rd_length;
u32 count[4], tmp;
int buf, x;
@@ -227,7 +231,7 @@
return;
}
- tmp = rgd->rd_ri.ri_data -
+ tmp = rgd->rd_data -
rgd->rd_rg.rg_free -
rgd->rd_rg.rg_dinodes;
if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@
}
-static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
{
- u64 first = ri->ri_data0;
- u64 last = first + ri->ri_data;
+ u64 first = rgd->rd_data0;
+ u64 last = first + rgd->rd_data;
return first <= block && block < last;
}
@@ -275,7 +279,7 @@
spin_lock(&sdp->sd_rindex_spin);
list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
- if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+ if (rgrp_contains_block(rgd, blk)) {
list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
spin_unlock(&sdp->sd_rindex_spin);
return rgd;
@@ -354,6 +358,15 @@
mutex_unlock(&sdp->sd_rindex_mutex);
}
+static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
+{
+ printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
+ printk(KERN_INFO " ri_length = %u\n", rgd->rd_length);
+ printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
+ printk(KERN_INFO " ri_data = %u\n", rgd->rd_data);
+ printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes);
+}
+
/**
* gfs2_compute_bitstructs - Compute the bitmap sizes
* @rgd: The resource group descriptor
@@ -367,7 +380,7 @@
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct gfs2_bitmap *bi;
- u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+ u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
u32 bytes_left, bytes;
int x;
@@ -378,7 +391,7 @@
if (!rgd->rd_bits)
return -ENOMEM;
- bytes_left = rgd->rd_ri.ri_bitbytes;
+ bytes_left = rgd->rd_bitbytes;
for (x = 0; x < length; x++) {
bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@
} else if (x + 1 == length) {
bytes = bytes_left;
bi->bi_offset = sizeof(struct gfs2_meta_header);
- bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+ bi->bi_start = rgd->rd_bitbytes - bytes_left;
bi->bi_len = bytes;
/* other blocks */
} else {
bytes = sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header);
bi->bi_offset = sizeof(struct gfs2_meta_header);
- bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+ bi->bi_start = rgd->rd_bitbytes - bytes_left;
bi->bi_len = bytes;
}
@@ -418,9 +431,9 @@
return -EIO;
}
bi = rgd->rd_bits + (length - 1);
- if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+ if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
if (gfs2_consist_rgrpd(rgd)) {
- gfs2_rindex_print(&rgd->rd_ri);
+ gfs2_rindex_print(rgd);
fs_err(sdp, "start=%u len=%u offset=%u\n",
bi->bi_start, bi->bi_len, bi->bi_offset);
}
@@ -431,9 +444,104 @@
}
/**
- * gfs2_ri_update - Pull in a new resource index from the disk
+ * gfs2_ri_total - Total up the file system space, according to the rindex.
+ *
+ */
+u64 gfs2_ri_total(struct gfs2_sbd *sdp)
+{
+ u64 total_data = 0;
+ struct inode *inode = sdp->sd_rindex;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ char buf[sizeof(struct gfs2_rindex)];
+ struct file_ra_state ra_state;
+ int error, rgrps;
+
+ mutex_lock(&sdp->sd_rindex_mutex);
+ file_ra_state_init(&ra_state, inode->i_mapping);
+ for (rgrps = 0;; rgrps++) {
+ loff_t pos = rgrps * sizeof(struct gfs2_rindex);
+
+ if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+ break;
+ error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+ sizeof(struct gfs2_rindex));
+ if (error != sizeof(struct gfs2_rindex))
+ break;
+ total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
+ }
+ mutex_unlock(&sdp->sd_rindex_mutex);
+ return total_data;
+}
+
+static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+{
+ const struct gfs2_rindex *str = buf;
+
+ rgd->rd_addr = be64_to_cpu(str->ri_addr);
+ rgd->rd_length = be32_to_cpu(str->ri_length);
+ rgd->rd_data0 = be64_to_cpu(str->ri_data0);
+ rgd->rd_data = be32_to_cpu(str->ri_data);
+ rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+}
+
+/**
+ * read_rindex_entry - Pull in a new resource index entry from the disk
* @gl: The glock covering the rindex inode
*
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int read_rindex_entry(struct gfs2_inode *ip,
+ struct file_ra_state *ra_state)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+ char buf[sizeof(struct gfs2_rindex)];
+ int error;
+ struct gfs2_rgrpd *rgd;
+
+ error = gfs2_internal_read(ip, ra_state, buf, &pos,
+ sizeof(struct gfs2_rindex));
+ if (!error)
+ return 0;
+ if (error != sizeof(struct gfs2_rindex)) {
+ if (error > 0)
+ error = -EIO;
+ return error;
+ }
+
+ rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+ error = -ENOMEM;
+ if (!rgd)
+ return error;
+
+ mutex_init(&rgd->rd_mutex);
+ lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+ rgd->rd_sbd = sdp;
+
+ list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+ list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+
+ gfs2_rindex_in(rgd, buf);
+ error = compute_bitstructs(rgd);
+ if (error)
+ return error;
+
+ error = gfs2_glock_get(sdp, rgd->rd_addr,
+ &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+ if (error)
+ return error;
+
+ rgd->rd_gl->gl_object = rgd;
+ rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+ rgd->rd_flags |= GFS2_RDF_CHECK;
+ return error;
+}
+
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @ip: pointer to the rindex inode
+ *
* Returns: 0 on successful update, error code otherwise
*/
@@ -441,13 +549,11 @@
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct inode *inode = &ip->i_inode;
- struct gfs2_rgrpd *rgd;
- char buf[sizeof(struct gfs2_rindex)];
struct file_ra_state ra_state;
- u64 junk = ip->i_di.di_size;
+ u64 rgrp_count = ip->i_di.di_size;
int error;
- if (do_div(junk, sizeof(struct gfs2_rindex))) {
+ if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
gfs2_consist_inode(ip);
return -EIO;
}
@@ -455,50 +561,50 @@
clear_rgrpdi(sdp);
file_ra_state_init(&ra_state, inode->i_mapping);
- for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
- loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
- error = gfs2_internal_read(ip, &ra_state, buf, &pos,
- sizeof(struct gfs2_rindex));
- if (!error)
- break;
- if (error != sizeof(struct gfs2_rindex)) {
- if (error > 0)
- error = -EIO;
- goto fail;
+ for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
+ error = read_rindex_entry(ip, &ra_state);
+ if (error) {
+ clear_rgrpdi(sdp);
+ return error;
}
+ }
- rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
- error = -ENOMEM;
- if (!rgd)
- goto fail;
-
- mutex_init(&rgd->rd_mutex);
- lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
- rgd->rd_sbd = sdp;
-
- list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
- list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-
- gfs2_rindex_in(&rgd->rd_ri, buf);
- error = compute_bitstructs(rgd);
- if (error)
- goto fail;
+ sdp->sd_rindex_vn = ip->i_gl->gl_vn;
+ return 0;
+}
- error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
- &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
- if (error)
- goto fail;
+/**
+ * gfs2_ri_update_special - Pull in a new resource index from the disk
+ *
+ * This is a special version that's safe to call from gfs2_inplace_reserve_i.
+ * In this case we know that we don't have any resource groups in memory yet.
+ *
+ * @ip: pointer to the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+static int gfs2_ri_update_special(struct gfs2_inode *ip)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct inode *inode = &ip->i_inode;
+ struct file_ra_state ra_state;
+ int error;
- rgd->rd_gl->gl_object = rgd;
- rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+ file_ra_state_init(&ra_state, inode->i_mapping);
+ for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+ /* Ignore partials */
+ if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
+ ip->i_di.di_size)
+ break;
+ error = read_rindex_entry(ip, &ra_state);
+ if (error) {
+ clear_rgrpdi(sdp);
+ return error;
+ }
}
sdp->sd_rindex_vn = ip->i_gl->gl_vn;
return 0;
-
-fail:
- clear_rgrpdi(sdp);
- return error;
}
/**
@@ -543,6 +649,28 @@
return error;
}
+static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
+{
+ const struct gfs2_rgrp *str = buf;
+
+ rg->rg_flags = be32_to_cpu(str->rg_flags);
+ rg->rg_free = be32_to_cpu(str->rg_free);
+ rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+ rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+}
+
+static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
+{
+ struct gfs2_rgrp *str = buf;
+
+ str->rg_flags = cpu_to_be32(rg->rg_flags);
+ str->rg_free = cpu_to_be32(rg->rg_free);
+ str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+ str->__pad = cpu_to_be32(0);
+ str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+ memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+}
+
/**
* gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
* @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct gfs2_glock *gl = rgd->rd_gl;
- unsigned int length = rgd->rd_ri.ri_length;
+ unsigned int length = rgd->rd_length;
struct gfs2_bitmap *bi;
unsigned int x, y;
int error;
@@ -575,7 +703,7 @@
for (x = 0; x < length; x++) {
bi = rgd->rd_bits + x;
- error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
+ error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
if (error)
goto fail;
}
@@ -637,7 +765,7 @@
void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
- int x, length = rgd->rd_ri.ri_length;
+ int x, length = rgd->rd_length;
spin_lock(&sdp->sd_rindex_spin);
gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@
void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
- unsigned int length = rgd->rd_ri.ri_length;
+ unsigned int length = rgd->rd_length;
unsigned int x;
for (x = 0; x < length; x++) {
@@ -722,6 +850,38 @@
}
/**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+ struct inode *inode;
+ u32 goal = 0;
+ u64 no_addr;
+
+ for(;;) {
+ goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+ GFS2_BLKST_UNLINKED);
+ if (goal == 0)
+ return 0;
+ no_addr = goal + rgd->rd_data0;
+ if (no_addr <= *last_unlinked)
+ continue;
+ *last_unlinked = no_addr;
+ inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
+ no_addr, -1);
+ if (!IS_ERR(inode))
+ return inode;
+ }
+
+ rgd->rd_flags &= ~GFS2_RDF_CHECK;
+ return NULL;
+}
+
+/**
* recent_rgrp_first - get first RG from "recent" list
* @sdp: The GFS2 superblock
* @rglast: address of the rgrp used last
@@ -743,7 +903,7 @@
goto first;
list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
- if (rgd->rd_ri.ri_addr == rglast)
+ if (rgd->rd_addr == rglast)
goto out;
}
@@ -882,8 +1042,9 @@
* Returns: errno
*/
-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
{
+ struct inode *inode = NULL;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd, *begin = NULL;
struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@
case 0:
if (try_rgrp_fit(rgd, al))
goto out;
+ if (rgd->rd_flags & GFS2_RDF_CHECK)
+ inode = try_rgrp_unlink(rgd, last_unlinked);
gfs2_glock_dq_uninit(&al->al_rgd_gh);
+ if (inode)
+ return inode;
rgd = recent_rgrp_next(rgd, 1);
break;
@@ -912,7 +1077,7 @@
break;
default:
- return error;
+ return ERR_PTR(error);
}
}
@@ -927,7 +1092,11 @@
case 0:
if (try_rgrp_fit(rgd, al))
goto out;
+ if (rgd->rd_flags & GFS2_RDF_CHECK)
+ inode = try_rgrp_unlink(rgd, last_unlinked);
gfs2_glock_dq_uninit(&al->al_rgd_gh);
+ if (inode)
+ return inode;
break;
case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@
break;
default:
- return error;
+ return ERR_PTR(error);
}
rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@
if (rgd == begin) {
if (++loops >= 3)
- return -ENOSPC;
+ return ERR_PTR(-ENOSPC);
if (!skipped)
loops++;
flags = 0;
@@ -954,7 +1123,7 @@
}
out:
- ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+ ip->i_last_rg_alloc = rgd->rd_addr;
if (begin) {
recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@
forward_rgrp_set(sdp, rgd);
}
- return 0;
+ return NULL;
}
/**
@@ -978,19 +1147,33 @@
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_alloc *al = &ip->i_alloc;
- int error;
+ struct inode *inode;
+ int error = 0;
+ u64 last_unlinked = 0;
if (gfs2_assert_warn(sdp, al->al_requested))
return -EINVAL;
- error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+try_again:
+ /* We need to hold the rindex unless the inode we're using is
+ the rindex itself, in which case it's already held. */
+ if (ip != GFS2_I(sdp->sd_rindex))
+ error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+ else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+ error = gfs2_ri_update_special(ip);
+
if (error)
return error;
- error = get_local_rgrp(ip);
- if (error) {
- gfs2_glock_dq_uninit(&al->al_ri_gh);
- return error;
+ inode = get_local_rgrp(ip, &last_unlinked);
+ if (inode) {
+ if (ip != GFS2_I(sdp->sd_rindex))
+ gfs2_glock_dq_uninit(&al->al_ri_gh);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ iput(inode);
+ gfs2_log_flush(sdp, NULL);
+ goto try_again;
}
al->al_file = file;
@@ -1019,7 +1202,8 @@
al->al_rgd = NULL;
gfs2_glock_dq_uninit(&al->al_rgd_gh);
- gfs2_glock_dq_uninit(&al->al_ri_gh);
+ if (ip != GFS2_I(sdp->sd_rindex))
+ gfs2_glock_dq_uninit(&al->al_ri_gh);
}
/**
@@ -1037,8 +1221,8 @@
unsigned int buf;
unsigned char type;
- length = rgd->rd_ri.ri_length;
- rgrp_block = block - rgd->rd_ri.ri_data0;
+ length = rgd->rd_length;
+ rgrp_block = block - rgd->rd_data0;
for (buf = 0; buf < length; buf++) {
bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@
*/
static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
- unsigned char old_state, unsigned char new_state)
+ unsigned char old_state, unsigned char new_state)
{
struct gfs2_bitmap *bi = NULL;
- u32 length = rgd->rd_ri.ri_length;
+ u32 length = rgd->rd_length;
u32 blk = 0;
unsigned int buf, x;
@@ -1118,17 +1302,18 @@
goal = 0;
}
- if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
- blk = 0;
+ if (old_state != new_state) {
+ gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
- gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
- gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
- bi->bi_len, blk, new_state);
- if (bi->bi_clone)
- gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+ gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+ gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
bi->bi_len, blk, new_state);
+ if (bi->bi_clone)
+ gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+ bi->bi_len, blk, new_state);
+ }
- return bi->bi_start * GFS2_NBBY + blk;
+ return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
}
/**
@@ -1156,9 +1341,9 @@
return NULL;
}
- length = rgd->rd_ri.ri_length;
+ length = rgd->rd_length;
- rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+ rgrp_blk = bstart - rgd->rd_data0;
while (blen--) {
for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@
u32 goal, blk;
u64 block;
- if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
- goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+ if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
+ goal = ip->i_di.di_goal_data - rgd->rd_data0;
else
goal = rgd->rd_last_alloc_data;
blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
rgd->rd_last_alloc_data = blk;
- block = rgd->rd_ri.ri_data0 + blk;
+ block = rgd->rd_data0 + blk;
ip->i_di.di_goal_data = block;
gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@
u32 goal, blk;
u64 block;
- if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
- goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+ if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
+ goal = ip->i_di.di_goal_meta - rgd->rd_data0;
else
goal = rgd->rd_last_alloc_meta;
blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
rgd->rd_last_alloc_meta = blk;
- block = rgd->rd_ri.ri_data0 + blk;
+ block = rgd->rd_data0 + blk;
ip->i_di.di_goal_meta = block;
gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@
rgd->rd_last_alloc_meta = blk;
- block = rgd->rd_ri.ri_data0 + blk;
+ block = rgd->rd_data0 + blk;
gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_rgrpd *rgd;
- u64 blkno = ip->i_num.no_addr;
+ u64 blkno = ip->i_no_addr;
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
@@ -1414,9 +1599,9 @@
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
- gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+ gfs2_free_uninit_di(rgd, ip->i_no_addr);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
- gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+ gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
/**
--- linux-source-2.6.22-2.6.22.orig/fs/gfs2/lops.c
+++ linux-source-2.6.22-2.6.22/fs/gfs2/lops.c
@@ -17,6 +17,7 @@
#include "gfs2.h"
#include "incore.h"
+#include "inode.h"
#include "glock.h"
#include "log.h"
#include "lops.h"
@@ -117,15 +118,13 @@
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd1 = NULL, *bd2;
unsigned int total = sdp->sd_log_num_buf;
- unsigned int offset = sizeof(struct gfs2_log_descriptor);
+ unsigned int offset = BUF_OFFSET;
unsigned int limit;
unsigned int num;
unsigned n;
__be64 *ptr;
- offset += sizeof(__be64) - 1;
- offset &= ~(sizeof(__be64) - 1);
- limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+ limit = buf_limit(sdp);
/* for 4k blocks, limit = 503 */
bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@
if (total > limit)
num = limit;
bh = gfs2_log_get_buf(sdp);
- sdp->sd_log_num_hdrs++;
ld = (struct gfs2_log_descriptor *)bh->b_data;
ptr = (__be64 *)(bh->b_data + offset);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@
struct gfs2_inode *ip = GFS2_I(mapping->host);
gfs2_log_lock(sdp);
+ if (!list_empty(&bd->bd_list_tr)) {
+ gfs2_log_unlock(sdp);
+ return;
+ }
tr->tr_touched = 1;
- if (list_empty(&bd->bd_list_tr) &&
- (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+ if (gfs2_is_jdata(ip)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
- gfs2_log_unlock(sdp);
- gfs2_pin(sdp, bd->bd_bh);
- tr->tr_num_buf_new++;
- } else {
- gfs2_log_unlock(sdp);
}
+ gfs2_log_unlock(sdp);
+ if (!list_empty(&le->le_list))
+ return;
+
gfs2_trans_add_gl(bd->bd_gl);
- gfs2_log_lock(sdp);
- if (list_empty(&le->le_list)) {
- if (ip->i_di.di_flags & GFS2_DIF_JDATA)
- sdp->sd_log_num_jdata++;
- sdp->sd_log_num_databuf++;
- list_add(&le->le_list, &sdp->sd_log_le_databuf);
+ if (gfs2_is_jdata(ip)) {
+ sdp->sd_log_num_jdata++;
+ gfs2_pin(sdp, bd->bd_bh);
+ tr->tr_num_databuf_new++;
}
+ sdp->sd_log_num_databuf++;
+ gfs2_log_lock(sdp);
+ list_add(&le->le_list, &sdp->sd_log_le_databuf);
gfs2_log_unlock(sdp);
}
@@ -520,7 +521,6 @@
LIST_HEAD(started);
struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
struct buffer_head *bh = NULL,*bh1 = NULL;
- unsigned int offset = sizeof(struct gfs2_log_descriptor);
struct gfs2_log_descriptor *ld;
unsigned int limit;
unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@
unsigned int num, n;
__be64 *ptr = NULL;
- offset += 2*sizeof(__be64) - 1;
- offset &= ~(2*sizeof(__be64) - 1);
- limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+ limit = databuf_limit(sdp);
/*
* Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@
gfs2_log_unlock(sdp);
if (!bh) {
bh = gfs2_log_get_buf(sdp);
- sdp->sd_log_num_hdrs++;
ld = (struct gfs2_log_descriptor *)
bh->b_data;
- ptr = (__be64 *)(bh->b_data + offset);
+ ptr = (__be64 *)(bh->b_data +
+ DATABUF_OFFSET);
ld->ld_header.mh_magic =
cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type =
@@ -605,7 +603,7 @@
if (unlikely(magic != 0))
set_buffer_escaped(bh1);
gfs2_log_lock(sdp);
- if (n++ > num)
+ if (++n >= num)
break;
} else if (!bh1) {
total_dbuf--;
@@ -622,6 +620,7 @@
}
gfs2_log_unlock(sdp);
if (bh) {
+ set_buffer_mapped(bh);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
bh = NULL;
--- linux-source-2.6.22-2.6.22.orig/fs/locks.c
+++ linux-source-2.6.22-2.6.22/fs/locks.c
@@ -786,7 +786,7 @@
if (request->fl_flags & FL_ACCESS)
goto out;
locks_copy_lock(new_fl, request);
- locks_insert_lock(&inode->i_flock, new_fl);
+ locks_insert_lock(before, new_fl);
new_fl = NULL;
error = 0;
@@ -1733,6 +1733,7 @@
struct file_lock *file_lock = locks_alloc_lock();
struct flock flock;
struct inode *inode;
+ struct file *f;
int error;
if (file_lock == NULL)
@@ -1803,7 +1804,15 @@
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
*/
- if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
+ /*
+ * we need that spin_lock here - it prevents reordering between
+ * update of inode->i_flock and check for it done in close().
+ * rcu_read_lock() wouldn't do.
+ */
+ spin_lock(¤t->files->file_lock);
+ f = fcheck(fd);
+ spin_unlock(¤t->files->file_lock);
+ if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
}
@@ -1859,6 +1868,7 @@
struct file_lock *file_lock = locks_alloc_lock();
struct flock64 flock;
struct inode *inode;
+ struct file *f;
int error;
if (file_lock == NULL)
@@ -1929,7 +1939,10 @@
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
*/
- if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
+ spin_lock(¤t->files->file_lock);
+ f = fcheck(fd);
+ spin_unlock(¤t->files->file_lock);
+ if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
}
--- linux-source-2.6.22-2.6.22.orig/fs/hpfs/namei.c
+++ linux-source-2.6.22-2.6.22/fs/hpfs/namei.c
@@ -426,7 +426,7 @@
/*printk("HPFS: truncating file before delete.\n");*/
newattrs.ia_size = 0;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
- err = notify_change(dentry, &newattrs);
+ err = notify_change(dentry, NULL, &newattrs);
put_write_access(inode);
if (!err)
goto again;
--- linux-source-2.6.22-2.6.22.orig/fs/ext4/extents.c
+++ linux-source-2.6.22-2.6.22/fs/ext4/extents.c
@@ -1445,7 +1445,7 @@
static void
ext4_ext_put_in_cache(struct inode *inode, __u32 block,
- __u32 len, __u32 start, int type)
+ __u32 len, ext4_fsblk_t start, int type)
{
struct ext4_ext_cache *cex;
BUG_ON(len == 0);
--- linux-source-2.6.22-2.6.22.orig/fs/ext4/namei.c
+++ linux-source-2.6.22-2.6.22/fs/ext4/namei.c
@@ -140,7 +140,8 @@
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@
ext4fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1079,6 +1112,10 @@
}
#ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext4_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1097,6 +1134,10 @@
return (struct ext4_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1119,6 +1160,11 @@
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1130,7 +1176,7 @@
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext4_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1158,8 +1204,19 @@
count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
--- linux-source-2.6.22-2.6.22.orig/fs/stat.c
+++ linux-source-2.6.22-2.6.22/fs/stat.c
@@ -306,7 +306,7 @@
error = -EINVAL;
if (inode->i_op && inode->i_op->readlink) {
- error = security_inode_readlink(nd.dentry);
+ error = security_inode_readlink(nd.dentry, nd.mnt);
if (!error) {
touch_atime(nd.mnt, nd.dentry);
error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
--- linux-source-2.6.22-2.6.22.orig/fs/nfsd/nfs3acl.c
+++ linux-source-2.6.22-2.6.22/fs/nfsd/nfs3acl.c
@@ -37,7 +37,7 @@
fh = fh_copy(&resp->fh, &argp->fh);
if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
- RETURN_STATUS(nfserr_inval);
+ RETURN_STATUS(nfserr);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
RETURN_STATUS(nfserr_inval);
--- linux-source-2.6.22-2.6.22.orig/fs/nfsd/nfsfh.c
+++ linux-source-2.6.22-2.6.22/fs/nfsd/nfsfh.c
@@ -565,13 +565,23 @@
case FSID_DEV:
case FSID_ENCODE_DEV:
case FSID_MAJOR_MINOR:
- return FSIDSOURCE_DEV;
+ if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags
+ & FS_REQUIRES_DEV)
+ return FSIDSOURCE_DEV;
+ break;
case FSID_NUM:
- return FSIDSOURCE_FSID;
- default:
if (fhp->fh_export->ex_flags & NFSEXP_FSID)
return FSIDSOURCE_FSID;
- else
- return FSIDSOURCE_UUID;
+ break;
+ default:
+ break;
}
+ /* either a UUID type filehandle, or the filehandle doesn't
+ * match the export.
+ */
+ if (fhp->fh_export->ex_flags & NFSEXP_FSID)
+ return FSIDSOURCE_FSID;
+ if (fhp->fh_export->ex_uuid)
+ return FSIDSOURCE_UUID;
+ return FSIDSOURCE_DEV;
}
--- linux-source-2.6.22-2.6.22.orig/fs/nfsd/nfs2acl.c
+++ linux-source-2.6.22-2.6.22/fs/nfsd/nfs2acl.c
@@ -41,7 +41,7 @@
fh = fh_copy(&resp->fh, &argp->fh);
if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
- RETURN_STATUS(nfserr_inval);
+ RETURN_STATUS(nfserr);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
RETURN_STATUS(nfserr_inval);
--- linux-source-2.6.22-2.6.22.orig/fs/nfsd/vfs.c
+++ linux-source-2.6.22-2.6.22/fs/nfsd/vfs.c
@@ -358,7 +358,7 @@
err = nfserr_notsync;
if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
fh_lock(fhp);
- host_err = notify_change(dentry, iap);
+ host_err = notify_change(dentry, fhp->fh_export->ex_mnt, iap);
err = nfserrno(host_err);
fh_unlock(fhp);
}
@@ -378,11 +378,12 @@
#if defined(CONFIG_NFSD_V2_ACL) || \
defined(CONFIG_NFSD_V3_ACL) || \
defined(CONFIG_NFSD_V4)
-static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
+static ssize_t nfsd_getxattr(struct dentry *dentry, struct vfsmount *mnt,
+ char *key, void **buf)
{
ssize_t buflen;
- buflen = vfs_getxattr(dentry, key, NULL, 0);
+ buflen = vfs_getxattr(dentry, mnt, key, NULL, 0, NULL);
if (buflen <= 0)
return buflen;
@@ -390,13 +391,14 @@
if (!*buf)
return -ENOMEM;
- return vfs_getxattr(dentry, key, *buf, buflen);
+ return vfs_getxattr(dentry, mnt, key, *buf, buflen, NULL);
}
#endif
#if defined(CONFIG_NFSD_V4)
static int
-set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
+set_nfsv4_acl_one(struct dentry *dentry, struct vfsmount *mnt,
+ struct posix_acl *pacl, char *key)
{
int len;
size_t buflen;
@@ -415,7 +417,7 @@
goto out;
}
- error = vfs_setxattr(dentry, key, buf, len, 0);
+ error = vfs_setxattr(dentry, mnt, key, buf, len, 0, NULL);
out:
kfree(buf);
return error;
@@ -428,6 +430,7 @@
__be32 error;
int host_error;
struct dentry *dentry;
+ struct vfsmount *mnt;
struct inode *inode;
struct posix_acl *pacl = NULL, *dpacl = NULL;
unsigned int flags = 0;
@@ -438,6 +441,7 @@
goto out;
dentry = fhp->fh_dentry;
+ mnt = fhp->fh_export->ex_mnt;
inode = dentry->d_inode;
if (S_ISDIR(inode->i_mode))
flags = NFS4_ACL_DIR;
@@ -449,12 +453,14 @@
} else if (host_error < 0)
goto out_nfserr;
- host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+ host_error = set_nfsv4_acl_one(dentry, mnt, pacl,
+ POSIX_ACL_XATTR_ACCESS);
if (host_error < 0)
goto out_nfserr;
if (S_ISDIR(inode->i_mode)) {
- host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
+ host_error = set_nfsv4_acl_one(dentry, mnt, dpacl,
+ POSIX_ACL_XATTR_DEFAULT);
if (host_error < 0)
goto out_nfserr;
}
@@ -474,13 +480,13 @@
}
static struct posix_acl *
-_get_posix_acl(struct dentry *dentry, char *key)
+_get_posix_acl(struct dentry *dentry, struct vfsmount *mnt, char *key)
{
void *buf = NULL;
struct posix_acl *pacl = NULL;
int buflen;
- buflen = nfsd_getxattr(dentry, key, &buf);
+ buflen = nfsd_getxattr(dentry, mnt, key, &buf);
if (!buflen)
buflen = -ENODATA;
if (buflen <= 0)
@@ -492,14 +498,15 @@
}
int
-nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
+nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
+ struct vfsmount *mnt, struct nfs4_acl **acl)
{
struct inode *inode = dentry->d_inode;
int error = 0;
struct posix_acl *pacl = NULL, *dpacl = NULL;
unsigned int flags = 0;
- pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
+ pacl = _get_posix_acl(dentry, mnt, POSIX_ACL_XATTR_ACCESS);
if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(pacl)) {
@@ -509,7 +516,7 @@
}
if (S_ISDIR(inode->i_mode)) {
- dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
+ dpacl = _get_posix_acl(dentry, mnt, POSIX_ACL_XATTR_DEFAULT);
if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
dpacl = NULL;
else if (IS_ERR(dpacl)) {
@@ -893,13 +900,13 @@
return err;
}
-static void kill_suid(struct dentry *dentry)
+static void kill_suid(struct dentry *dentry, struct vfsmount *mnt)
{
struct iattr ia;
ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
mutex_lock(&dentry->d_inode->i_mutex);
- notify_change(dentry, &ia);
+ notify_change(dentry, mnt, &ia);
mutex_unlock(&dentry->d_inode->i_mutex);
}
@@ -958,7 +965,7 @@
/* clear setuid/setgid flag after write */
if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
- kill_suid(dentry);
+ kill_suid(dentry, exp->ex_mnt);
if (host_err >= 0 && stable) {
static ino_t last_ino;
@@ -1115,6 +1122,7 @@
int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
+ struct svc_export *exp;
struct inode *dirp;
__be32 err;
int host_err;
@@ -1131,6 +1139,7 @@
goto out;
dentry = fhp->fh_dentry;
+ exp = fhp->fh_export;
dirp = dentry->d_inode;
err = nfserr_notdir;
@@ -1147,7 +1156,7 @@
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
goto out_nfserr;
- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+ err = fh_compose(resfhp, exp, dchild, fhp);
if (err)
goto out;
} else {
@@ -1186,13 +1195,14 @@
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
break;
case S_IFDIR:
- host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+ host_err = vfs_mkdir(dirp, dchild, exp->ex_mnt, iap->ia_mode);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+ host_err = vfs_mknod(dirp, dchild, exp->ex_mnt, iap->ia_mode,
+ rdev);
break;
default:
printk("nfsd: bad file type %o in nfsd_create\n", type);
@@ -1201,7 +1211,7 @@
if (host_err < 0)
goto out_nfserr;
- if (EX_ISSYNC(fhp->fh_export)) {
+ if (EX_ISSYNC(exp)) {
err = nfserrno(nfsd_sync_dir(dentry));
write_inode_now(dchild->d_inode, 1);
}
@@ -1433,6 +1443,7 @@
struct iattr *iap)
{
struct dentry *dentry, *dnew;
+ struct svc_export *exp;
__be32 err, cerr;
int host_err;
umode_t mode;
@@ -1459,6 +1470,7 @@
if (iap && (iap->ia_valid & ATTR_MODE))
mode = iap->ia_mode & S_IALLUGO;
+ exp = fhp->fh_export;
if (unlikely(path[plen] != 0)) {
char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
if (path_alloced == NULL)
@@ -1466,20 +1478,22 @@
else {
strncpy(path_alloced, path, plen);
path_alloced[plen] = 0;
- host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+ host_err = vfs_symlink(dentry->d_inode, dnew,
+ exp->ex_mnt, path_alloced, mode);
kfree(path_alloced);
}
} else
- host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+ host_err = vfs_symlink(dentry->d_inode, dnew, exp->ex_mnt, path,
+ mode);
if (!host_err) {
- if (EX_ISSYNC(fhp->fh_export))
+ if (EX_ISSYNC(exp))
host_err = nfsd_sync_dir(dentry);
}
err = nfserrno(host_err);
fh_unlock(fhp);
- cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+ cerr = fh_compose(resfhp, exp, dnew, fhp);
dput(dnew);
if (err==0) err = cerr;
out:
@@ -1529,7 +1543,8 @@
dold = tfhp->fh_dentry;
dest = dold->d_inode;
- host_err = vfs_link(dold, dirp, dnew);
+ host_err = vfs_link(dold, tfhp->fh_export->ex_mnt, dirp,
+ dnew, ffhp->fh_export->ex_mnt);
if (!host_err) {
if (EX_ISSYNC(ffhp->fh_export)) {
err = nfserrno(nfsd_sync_dir(ddir));
@@ -1622,7 +1637,8 @@
host_err = -EPERM;
} else
#endif
- host_err = vfs_rename(fdir, odentry, tdir, ndentry);
+ host_err = vfs_rename(fdir, odentry, ffhp->fh_export->ex_mnt,
+ tdir, ndentry, tfhp->fh_export->ex_mnt);
if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
host_err = nfsd_sync_dir(tdentry);
if (!host_err)
@@ -1658,6 +1674,7 @@
char *fname, int flen)
{
struct dentry *dentry, *rdentry;
+ struct svc_export *exp;
struct inode *dirp;
__be32 err;
int host_err;
@@ -1672,6 +1689,7 @@
fh_lock_nested(fhp, I_MUTEX_PARENT);
dentry = fhp->fh_dentry;
dirp = dentry->d_inode;
+ exp = fhp->fh_export;
rdentry = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(rdentry);
@@ -1689,21 +1707,21 @@
if (type != S_IFDIR) { /* It's UNLINK */
#ifdef MSNFS
- if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+ if ((exp->ex_flags & NFSEXP_MSNFS) &&
(atomic_read(&rdentry->d_count) > 1)) {
host_err = -EPERM;
} else
#endif
- host_err = vfs_unlink(dirp, rdentry);
+ host_err = vfs_unlink(dirp, rdentry, exp->ex_mnt);
} else { /* It's RMDIR */
- host_err = vfs_rmdir(dirp, rdentry);
+ host_err = vfs_rmdir(dirp, rdentry, exp->ex_mnt);
}
dput(rdentry);
if (host_err)
goto out_nfserr;
- if (EX_ISSYNC(fhp->fh_export))
+ if (EX_ISSYNC(exp))
host_err = nfsd_sync_dir(dentry);
out_nfserr:
@@ -1890,7 +1908,7 @@
raparm_hash[i].pb_head = NULL;
spin_lock_init(&raparm_hash[i].pb_lock);
}
- nperbucket = cache_size >> RAPARM_HASH_BITS;
+ nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
for (i = 0; i < cache_size - 1; i++) {
if (i % nperbucket == 0)
raparm_hash[j++].pb_head = raparml + i;
@@ -1926,7 +1944,8 @@
return ERR_PTR(-EOPNOTSUPP);
}
- size = nfsd_getxattr(fhp->fh_dentry, name, &value);
+ size = nfsd_getxattr(fhp->fh_dentry, fhp->fh_export->ex_mnt, name,
+ &value);
if (size < 0)
return ERR_PTR(size);
@@ -1938,6 +1957,7 @@
int
nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
{
+ struct vfsmount *mnt;
struct inode *inode = fhp->fh_dentry->d_inode;
char *name;
void *value = NULL;
@@ -1970,13 +1990,16 @@
} else
size = 0;
+ mnt = fhp->fh_export->ex_mnt;
if (size)
- error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
+ error = vfs_setxattr(fhp->fh_dentry, mnt, name, value, size, 0,
+ NULL);
else {
if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
error = 0;
else {
- error = vfs_removexattr(fhp->fh_dentry, name);
+ error = vfs_removexattr(fhp->fh_dentry, mnt, name,
+ NULL);
if (error == -ENODATA)
error = 0;
}
--- linux-source-2.6.22-2.6.22.orig/fs/nfsd/nfs4recover.c
+++ linux-source-2.6.22-2.6.22/fs/nfsd/nfs4recover.c
@@ -156,7 +156,8 @@
dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
goto out_put;
}
- status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+ status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, rec_dir.mnt,
+ S_IRWXU);
out_put:
dput(dentry);
out_unlock:
@@ -260,7 +261,7 @@
return -EINVAL;
}
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- status = vfs_unlink(dir->d_inode, dentry);
+ status = vfs_unlink(dir->d_inode, dentry, rec_dir.mnt);
mutex_unlock(&dir->d_inode->i_mutex);
return status;
}
@@ -275,7 +276,7 @@
* a kernel from the future.... */
nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- status = vfs_rmdir(dir->d_inode, dentry);
+ status = vfs_rmdir(dir->d_inode, dentry, rec_dir.mnt);
mutex_unlock(&dir->d_inode->i_mutex);
return status;
}
--- linux-source-2.6.22-2.6.22.orig/fs/nfsd/nfs4xdr.c
+++ linux-source-2.6.22-2.6.22/fs/nfsd/nfs4xdr.c
@@ -1453,7 +1453,8 @@
err = vfs_getattr(exp->ex_mnt, dentry, &stat);
if (err)
goto out_nfserr;
- if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
+ if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
+ FATTR4_WORD0_MAXNAME)) ||
(bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(dentry, &statfs);
@@ -1469,7 +1470,7 @@
}
if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
| FATTR4_WORD0_SUPPORTED_ATTRS)) {
- err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
+ err = nfsd4_get_nfs4_acl(rqstp, dentry, exp->ex_mnt, &acl);
aclsupport = (err == 0);
if (bmval0 & FATTR4_WORD0_ACL) {
if (err == -EOPNOTSUPP)
@@ -1699,7 +1700,7 @@
if (bmval0 & FATTR4_WORD0_MAXNAME) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(~(u32) 0);
+ WRITE32(statfs.f_namelen);
}
if (bmval0 & FATTR4_WORD0_MAXREAD) {
if ((buflen -= 8) < 0)
--- linux-source-2.6.22-2.6.22.orig/fs/xattr.c
+++ linux-source-2.6.22-2.6.22/fs/xattr.c
@@ -69,8 +69,8 @@
}
int
-vfs_setxattr(struct dentry *dentry, char *name, void *value,
- size_t size, int flags)
+vfs_setxattr(struct dentry *dentry, struct vfsmount *mnt, char *name,
+ void *value, size_t size, int flags, struct file *file)
{
struct inode *inode = dentry->d_inode;
int error;
@@ -80,7 +80,7 @@
return error;
mutex_lock(&inode->i_mutex);
- error = security_inode_setxattr(dentry, name, value, size, flags);
+ error = security_inode_setxattr(dentry, mnt, name, value, size, flags, file);
if (error)
goto out;
error = -EOPNOTSUPP;
@@ -88,7 +88,7 @@
error = inode->i_op->setxattr(dentry, name, value, size, flags);
if (!error) {
fsnotify_xattr(dentry);
- security_inode_post_setxattr(dentry, name, value,
+ security_inode_post_setxattr(dentry, mnt, name, value,
size, flags);
}
} else if (!strncmp(name, XATTR_SECURITY_PREFIX,
@@ -106,7 +106,8 @@
EXPORT_SYMBOL_GPL(vfs_setxattr);
ssize_t
-vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
+vfs_getxattr(struct dentry *dentry, struct vfsmount *mnt, char *name,
+ void *value, size_t size, struct file *file)
{
struct inode *inode = dentry->d_inode;
int error;
@@ -115,7 +116,7 @@
if (error)
return error;
- error = security_inode_getxattr(dentry, name);
+ error = security_inode_getxattr(dentry, mnt, name, file);
if (error)
return error;
@@ -142,18 +143,20 @@
EXPORT_SYMBOL_GPL(vfs_getxattr);
ssize_t
-vfs_listxattr(struct dentry *d, char *list, size_t size)
+vfs_listxattr(struct dentry *dentry, struct vfsmount *mnt, char *list,
+ size_t size, struct file *file)
{
+ struct inode *inode = dentry->d_inode;
ssize_t error;
- error = security_inode_listxattr(d);
+ error = security_inode_listxattr(dentry, mnt, file);
if (error)
return error;
error = -EOPNOTSUPP;
- if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
- error = d->d_inode->i_op->listxattr(d, list, size);
- } else {
- error = security_inode_listsecurity(d->d_inode, list, size);
+ if (inode->i_op && inode->i_op->listxattr)
+ error = inode->i_op->listxattr(dentry, list, size);
+ else {
+ error = security_inode_listsecurity(inode, list, size);
if (size && error > size)
error = -ERANGE;
}
@@ -162,7 +165,8 @@
EXPORT_SYMBOL_GPL(vfs_listxattr);
int
-vfs_removexattr(struct dentry *dentry, char *name)
+vfs_removexattr(struct dentry *dentry, struct vfsmount *mnt, char *name,
+ struct file *file)
{
struct inode *inode = dentry->d_inode;
int error;
@@ -174,7 +178,7 @@
if (error)
return error;
- error = security_inode_removexattr(dentry, name);
+ error = security_inode_removexattr(dentry, mnt, name, file);
if (error)
return error;
@@ -193,8 +197,8 @@
* Extended attribute SET operations
*/
static long
-setxattr(struct dentry *d, char __user *name, void __user *value,
- size_t size, int flags)
+setxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *name,
+ void __user *value, size_t size, int flags, struct file *file)
{
int error;
void *kvalue = NULL;
@@ -221,7 +225,7 @@
}
}
- error = vfs_setxattr(d, kname, kvalue, size, flags);
+ error = vfs_setxattr(dentry, mnt, kname, kvalue, size, flags, file);
kfree(kvalue);
return error;
}
@@ -236,7 +240,7 @@
error = user_path_walk(path, &nd);
if (error)
return error;
- error = setxattr(nd.dentry, name, value, size, flags);
+ error = setxattr(nd.dentry, nd.mnt, name, value, size, flags, NULL);
path_release(&nd);
return error;
}
@@ -251,7 +255,7 @@
error = user_path_walk_link(path, &nd);
if (error)
return error;
- error = setxattr(nd.dentry, name, value, size, flags);
+ error = setxattr(nd.dentry, nd.mnt, name, value, size, flags, NULL);
path_release(&nd);
return error;
}
@@ -269,7 +273,7 @@
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry->d_inode);
- error = setxattr(dentry, name, value, size, flags);
+ error = setxattr(dentry, f->f_vfsmnt, name, value, size, flags, f);
fput(f);
return error;
}
@@ -278,7 +282,8 @@
* Extended attribute GET operations
*/
static ssize_t
-getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
+getxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *name,
+ void __user *value, size_t size, struct file *file)
{
ssize_t error;
void *kvalue = NULL;
@@ -298,7 +303,7 @@
return -ENOMEM;
}
- error = vfs_getxattr(d, kname, kvalue, size);
+ error = vfs_getxattr(dentry, mnt, kname, kvalue, size, file);
if (error > 0) {
if (size && copy_to_user(value, kvalue, error))
error = -EFAULT;
@@ -321,7 +326,7 @@
error = user_path_walk(path, &nd);
if (error)
return error;
- error = getxattr(nd.dentry, name, value, size);
+ error = getxattr(nd.dentry, nd.mnt, name, value, size, NULL);
path_release(&nd);
return error;
}
@@ -336,7 +341,7 @@
error = user_path_walk_link(path, &nd);
if (error)
return error;
- error = getxattr(nd.dentry, name, value, size);
+ error = getxattr(nd.dentry, nd.mnt, name, value, size, NULL);
path_release(&nd);
return error;
}
@@ -351,7 +356,7 @@
if (!f)
return error;
audit_inode(NULL, f->f_path.dentry->d_inode);
- error = getxattr(f->f_path.dentry, name, value, size);
+ error = getxattr(f->f_path.dentry, f->f_path.mnt, name, value, size, f);
fput(f);
return error;
}
@@ -360,7 +365,8 @@
* Extended attribute LIST operations
*/
static ssize_t
-listxattr(struct dentry *d, char __user *list, size_t size)
+listxattr(struct dentry *dentry, struct vfsmount *mnt, char __user *list,
+ size_t size, struct file *file)
{
ssize_t error;
char *klist = NULL;
@@ -373,7 +379,7 @@
return -ENOMEM;
}
- error = vfs_listxattr(d, klist, size);
+ error = vfs_listxattr(dentry, mnt, klist, size, file);
if (error > 0) {
if (size && copy_to_user(list, klist, error))
error = -EFAULT;
@@ -395,7 +401,7 @@
error = user_path_walk(path, &nd);
if (error)
return error;
- error = listxattr(nd.dentry, list, size);
+ error = listxattr(nd.dentry, nd.mnt, list, size, NULL);
path_release(&nd);
return error;
}
@@ -409,7 +415,7 @@
error = user_path_walk_link(path, &nd);
if (error)
return error;
- error = listxattr(nd.dentry, list, size);
+ error = listxattr(nd.dentry, nd.mnt, list, size, NULL);
path_release(&nd);
return error;
}
@@ -424,7 +430,7 @@
if (!f)
return error;
audit_inode(NULL, f->f_path.dentry->d_inode);
- error = listxattr(f->f_path.dentry, list, size);
+ error = listxattr(f->f_path.dentry, f->f_path.mnt, list, size, f);
fput(f);
return error;
}
@@ -433,7 +439,8 @@
* Extended attribute REMOVE operations
*/
static long
-removexattr(struct dentry *d, char __user *name)
+removexattr(struct dentry *dentry, struct vfsmount *mnt, char __user *name,
+ struct file *file)
{
int error;
char kname[XATTR_NAME_MAX + 1];
@@ -444,7 +451,7 @@
if (error < 0)
return error;
- return vfs_removexattr(d, kname);
+ return vfs_removexattr(dentry, mnt, kname, file);
}
asmlinkage long
@@ -456,7 +463,7 @@
error = user_path_walk(path, &nd);
if (error)
return error;
- error = removexattr(nd.dentry, name);
+ error = removexattr(nd.dentry, nd.mnt, name, NULL);
path_release(&nd);
return error;
}
@@ -470,7 +477,7 @@
error = user_path_walk_link(path, &nd);
if (error)
return error;
- error = removexattr(nd.dentry, name);
+ error = removexattr(nd.dentry, nd.mnt, name, NULL);
path_release(&nd);
return error;
}
@@ -487,7 +494,7 @@
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry->d_inode);
- error = removexattr(dentry, name);
+ error = removexattr(dentry, f->f_path.mnt, name, f);
fput(f);
return error;
}
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/cifsglob.h
+++ linux-source-2.6.22-2.6.22/fs/cifs/cifsglob.h
@@ -442,6 +442,17 @@
#define CIFS_LARGE_BUFFER 2
#define CIFS_IOVEC 4 /* array of response buffers */
+/* Type of Request to SendReceive2 */
+#define CIFS_STD_OP 0 /* normal request timeout */
+#define CIFS_LONG_OP 1 /* long op (up to 45 sec, oplock time) */
+#define CIFS_VLONG_OP 2 /* sloow op - can take up to 180 seconds */
+#define CIFS_BLOCKING_OP 4 /* operation can block */
+#define CIFS_ASYNC_OP 8 /* do not wait for response */
+#define CIFS_TIMEOUT_MASK 0x00F /* only one of 5 above set in req */
+#define CIFS_LOG_ERROR 0x010 /* log NT STATUS if non-zero */
+#define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */
+#define CIFS_NO_RESP 0x040 /* no response buffer required */
+
/* Security Flags: indicate type of session setup needed */
#define CIFSSEC_MAY_SIGN 0x00001
#define CIFSSEC_MAY_NTLM 0x00002
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/connect.c
+++ linux-source-2.6.22-2.6.22/fs/cifs/connect.c
@@ -2273,7 +2273,7 @@
pSMB->req_no_secext.ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
- &bytes_returned, 1);
+ &bytes_returned, CIFS_LONG_OP);
if (rc) {
/* rc = map_smb_to_linux_error(smb_buffer_response); now done in SendReceive */
} else if ((smb_buffer_response->WordCount == 3)
@@ -2559,7 +2559,7 @@
pSMB->req.ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
- &bytes_returned, 1);
+ &bytes_returned, CIFS_LONG_OP);
if (smb_buffer_response->Status.CifsError ==
cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))
@@ -2985,7 +2985,7 @@
pSMB->req.ByteCount = cpu_to_le16(count);
rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
- &bytes_returned, 1);
+ &bytes_returned, CIFS_LONG_OP);
if (rc) {
/* rc = map_smb_to_linux_error(smb_buffer_response); *//* done in SendReceive now */
} else if ((smb_buffer_response->WordCount == 3)
@@ -3256,7 +3256,8 @@
pSMB->hdr.smb_buf_length += count;
pSMB->ByteCount = cpu_to_le16(count);
- rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 0);
+ rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
+ CIFS_STD_OP);
/* if (rc) rc = map_smb_to_linux_error(smb_buffer_response); */
/* above now done in SendReceive */
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/cifsproto.h
+++ linux-source-2.6.22-2.6.22/fs/cifs/cifsproto.h
@@ -48,10 +48,12 @@
struct smb_hdr * /* input */ ,
struct smb_hdr * /* out */ ,
int * /* bytes returned */ , const int long_op);
+extern int SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
+ struct smb_hdr *in_buf, int flags);
extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
- struct kvec *, int /* nvec to send */,
- int * /* type of buf returned */ , const int long_op);
-extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
+ struct kvec *, int /* nvec to send */,
+ int * /* type of buf returned */ , const int flags);
+extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
struct cifsTconInfo *,
struct smb_hdr * /* input */ ,
struct smb_hdr * /* out */ ,
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/asn1.c
+++ linux-source-2.6.22-2.6.22/fs/cifs/asn1.c
@@ -182,6 +182,11 @@
}
}
}
+
+ /* don't trust len bigger than ctx buffer */
+ if (*len > ctx->end - ctx->pointer)
+ return 0;
+
return 1;
}
@@ -199,6 +204,10 @@
if (!asn1_length_decode(ctx, &def, &len))
return 0;
+ /* primitive shall be definite, indefinite shall be constructed */
+ if (*con == ASN1_PRI && !def)
+ return 0;
+
if (def)
*eoc = ctx->pointer + len;
else
@@ -385,7 +394,12 @@
unsigned long *optr;
size = eoc - ctx->pointer + 1;
- *oid = kmalloc(size * sizeof (unsigned long), GFP_ATOMIC);
+
+ /* first subid actually encodes first two subids */
+ if (size < 2 || size > UINT_MAX/sizeof(unsigned long))
+ return 0;
+
+ *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
if (*oid == NULL) {
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/cifssmb.c
+++ linux-source-2.6.22-2.6.22/fs/cifs/cifssmb.c
@@ -660,9 +660,7 @@
CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
{
struct smb_hdr *smb_buffer;
- struct smb_hdr *smb_buffer_response; /* BB removeme BB */
int rc = 0;
- int length;
cFYI(1, ("In tree disconnect"));
/*
@@ -699,16 +697,12 @@
if (rc) {
up(&tcon->tconSem);
return rc;
- } else {
- smb_buffer_response = smb_buffer; /* BB removeme BB */
}
- rc = SendReceive(xid, tcon->ses, smb_buffer, smb_buffer_response,
- &length, 0);
+
+ rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0);
if (rc)
cFYI(1, ("Tree disconnect failed %d", rc));
- if (smb_buffer)
- cifs_small_buf_release(smb_buffer);
up(&tcon->tconSem);
/* No need to return error on this operation if tid invalidated and
@@ -722,10 +716,8 @@
int
CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
{
- struct smb_hdr *smb_buffer_response;
LOGOFF_ANDX_REQ *pSMB;
int rc = 0;
- int length;
cFYI(1, ("In SMBLogoff for session disconnect"));
if (ses)
@@ -744,9 +736,7 @@
return rc;
}
- smb_buffer_response = (struct smb_hdr *)pSMB; /* BB removeme BB */
-
- if(ses->server) {
+ if (ses->server) {
pSMB->hdr.Mid = GetNextMid(ses->server);
if(ses->server->secMode &
@@ -757,8 +747,7 @@
pSMB->hdr.Uid = ses->Suid;
pSMB->AndXCommand = 0xFF;
- rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
- smb_buffer_response, &length, 0);
+ rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0);
if (ses->server) {
atomic_dec(&ses->server->socketUseCount);
if (atomic_read(&ses->server->socketUseCount) == 0) {
@@ -769,7 +758,6 @@
}
}
up(&ses->sesSem);
- cifs_small_buf_release(pSMB);
/* if session dead then we do not need to do ulogoff,
since server closed smb session, no sense reporting
@@ -1143,7 +1131,7 @@
pSMB->ByteCount = cpu_to_le16(count);
/* long_op set to 1 to allow for oplock break timeouts */
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 1);
+ (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP);
cifs_stats_inc(&tcon->num_opens);
if (rc) {
cFYI(1, ("Error in Open = %d", rc));
@@ -1257,7 +1245,7 @@
pSMB->ByteCount = cpu_to_le16(count);
/* long_op set to 1 to allow for oplock break timeouts */
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 1);
+ (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP);
cifs_stats_inc(&tcon->num_opens);
if (rc) {
cFYI(1, ("Error in Open = %d", rc));
@@ -1335,9 +1323,8 @@
iov[0].iov_base = (char *)pSMB;
iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
- rc = SendReceive2(xid, tcon->ses, iov,
- 1 /* num iovecs */,
- &resp_buf_type, 0);
+ rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
+ &resp_buf_type, CIFS_STD_OP | CIFS_LOG_ERROR);
cifs_stats_inc(&tcon->num_reads);
pSMBr = (READ_RSP *)iov[0].iov_base;
if (rc) {
@@ -1596,7 +1583,7 @@
int timeout = 0;
__u16 count;
- cFYI(1, ("In CIFSSMBLock - timeout %d numLock %d",waitFlag,numLock));
+ cFYI(1, ("CIFSSMBLock timeout %d numLock %d", waitFlag, numLock));
rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
if (rc)
@@ -1604,11 +1591,11 @@
pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */
- if(lockType == LOCKING_ANDX_OPLOCK_RELEASE) {
- timeout = -1; /* no response expected */
+ if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) {
+ timeout = CIFS_ASYNC_OP; /* no response expected */
pSMB->Timeout = 0;
} else if (waitFlag == TRUE) {
- timeout = 3; /* blocking operation, no timeout */
+ timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */
pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */
} else {
pSMB->Timeout = 0;
@@ -1638,15 +1625,16 @@
if (waitFlag) {
rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned);
+ cifs_small_buf_release(pSMB);
} else {
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, timeout);
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB,
+ timeout);
+ /* SMB buffer freed by function above */
}
cifs_stats_inc(&tcon->num_locks);
if (rc) {
cFYI(1, ("Send error in Lock = %d", rc));
}
- cifs_small_buf_release(pSMB);
/* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
@@ -1666,7 +1654,9 @@
int rc = 0;
int timeout = 0;
int bytes_returned = 0;
+ int resp_buf_type = 0;
__u16 params, param_offset, offset, byte_count, count;
+ struct kvec iov[1];
cFYI(1, ("Posix Lock"));
@@ -1709,8 +1699,8 @@
(((char *) &pSMB->hdr.Protocol) + offset);
parm_data->lock_type = cpu_to_le16(lock_type);
- if(waitFlag) {
- timeout = 3; /* blocking operation, no timeout */
+ if (waitFlag) {
+ timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */
parm_data->lock_flags = cpu_to_le16(1);
pSMB->Timeout = cpu_to_le32(-1);
} else
@@ -1730,8 +1720,13 @@
rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned);
} else {
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, timeout);
+ iov[0].iov_base = (char *)pSMB;
+ iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+ rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
+ &resp_buf_type, timeout);
+ pSMB = NULL; /* request buf already freed by SendReceive2. Do
+ not try to free it twice below on exit */
+ pSMBr = (struct smb_com_transaction2_sfi_rsp *)iov[0].iov_base;
}
if (rc) {
@@ -1766,6 +1761,11 @@
if (pSMB)
cifs_small_buf_release(pSMB);
+ if (resp_buf_type == CIFS_SMALL_BUFFER)
+ cifs_small_buf_release(iov[0].iov_base);
+ else if (resp_buf_type == CIFS_LARGE_BUFFER)
+ cifs_buf_release(iov[0].iov_base);
+
/* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
@@ -1778,8 +1778,6 @@
{
int rc = 0;
CLOSE_REQ *pSMB = NULL;
- CLOSE_RSP *pSMBr = NULL;
- int bytes_returned;
cFYI(1, ("In CIFSSMBClose"));
/* do not retry on dead session on close */
@@ -1789,13 +1787,10 @@
if (rc)
return rc;
- pSMBr = (CLOSE_RSP *)pSMB; /* BB removeme BB */
-
pSMB->FileID = (__u16) smb_file_id;
pSMB->LastWriteTime = 0xFFFFFFFF;
pSMB->ByteCount = 0;
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
cifs_stats_inc(&tcon->num_closes);
if (rc) {
if(rc!=-EINTR) {
@@ -1804,8 +1799,6 @@
}
}
- cifs_small_buf_release(pSMB);
-
/* Since session is dead, file will be closed on server already */
if(rc == -EAGAIN)
rc = 0;
@@ -2989,7 +2982,8 @@
iov[0].iov_base = (char *)pSMB;
iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
- rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0);
+ rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
+ CIFS_STD_OP);
cifs_stats_inc(&tcon->num_acl_get);
if (rc) {
cFYI(1, ("Send error in QuerySecDesc = %d", rc));
@@ -3634,8 +3628,6 @@
{
int rc = 0;
FINDCLOSE_REQ *pSMB = NULL;
- CLOSE_RSP *pSMBr = NULL; /* BB removeme BB */
- int bytes_returned;
cFYI(1, ("In CIFSSMBFindClose"));
rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB);
@@ -3647,16 +3639,13 @@
if (rc)
return rc;
- pSMBr = (CLOSE_RSP *)pSMB; /* BB removeme BB */
pSMB->FileID = searchHandle;
pSMB->ByteCount = 0;
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
if (rc) {
cERROR(1, ("Send error in FindClose = %d", rc));
}
cifs_stats_inc(&tcon->num_fclose);
- cifs_small_buf_release(pSMB);
/* Since session is dead, search handle closed on server already */
if (rc == -EAGAIN)
@@ -4571,11 +4560,9 @@
__u16 fid, __u32 pid_of_opener, int SetAllocation)
{
struct smb_com_transaction2_sfi_req *pSMB = NULL;
- struct smb_com_transaction2_sfi_rsp *pSMBr = NULL;
char *data_offset;
struct file_end_of_file_info *parm_data;
int rc = 0;
- int bytes_returned = 0;
__u16 params, param_offset, offset, byte_count, count;
cFYI(1, ("SetFileSize (via SetFileInfo) %lld",
@@ -4585,8 +4572,6 @@
if (rc)
return rc;
- pSMBr = (struct smb_com_transaction2_sfi_rsp *)pSMB;
-
pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
@@ -4637,18 +4622,14 @@
pSMB->Reserved4 = 0;
pSMB->hdr.smb_buf_length += byte_count;
pSMB->ByteCount = cpu_to_le16(byte_count);
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
if (rc) {
cFYI(1,
("Send error in SetFileInfo (SetFileSize) = %d",
rc));
}
- if (pSMB)
- cifs_small_buf_release(pSMB);
-
- /* Note: On -EAGAIN error only caller can retry on handle based calls
+ /* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
return rc;
@@ -4665,10 +4646,8 @@
__u16 fid)
{
struct smb_com_transaction2_sfi_req *pSMB = NULL;
- struct smb_com_transaction2_sfi_rsp *pSMBr = NULL;
char *data_offset;
int rc = 0;
- int bytes_returned = 0;
__u16 params, param_offset, offset, byte_count, count;
cFYI(1, ("Set Times (via SetFileInfo)"));
@@ -4677,8 +4656,6 @@
if (rc)
return rc;
- pSMBr = (struct smb_com_transaction2_sfi_rsp *)pSMB;
-
/* At this point there is no need to override the current pid
with the pid of the opener, but that could change if we someday
use an existing handle (rather than opening one on the fly) */
@@ -4717,16 +4694,13 @@
pSMB->Reserved4 = 0;
pSMB->hdr.smb_buf_length += byte_count;
pSMB->ByteCount = cpu_to_le16(byte_count);
- memcpy(data_offset,data,sizeof(FILE_BASIC_INFO));
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+ memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
if (rc) {
cFYI(1,("Send error in Set Time (SetFileInfo) = %d",rc));
}
- cifs_small_buf_release(pSMB);
-
- /* Note: On -EAGAIN error only caller can retry on handle based calls
+ /* Note: On -EAGAIN error only caller can retry on handle based calls
since file handle passed in no longer valid */
return rc;
@@ -5016,7 +4990,8 @@
pSMB->ByteCount = 0;
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, -1);
+ (struct smb_hdr *)pSMBr, &bytes_returned,
+ CIFS_ASYNC_OP);
if (rc) {
cFYI(1, ("Error in Notify = %d", rc));
} else {
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/transport.c
+++ linux-source-2.6.22-2.6.22/fs/cifs/transport.c
@@ -308,7 +308,7 @@
static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op)
{
- if(long_op == -1) {
+ if (long_op == CIFS_ASYNC_OP) {
/* oplock breaks must not be held up */
atomic_inc(&ses->server->inFlight);
} else {
@@ -337,7 +337,7 @@
they are allowed to block on server */
/* update # of requests on the wire to server */
- if (long_op < 3)
+ if (long_op != CIFS_BLOCKING_OP)
atomic_inc(&ses->server->inFlight);
spin_unlock(&GlobalMid_Lock);
break;
@@ -416,17 +416,48 @@
}
}
+
+/*
+ *
+ * Send an SMB Request. No response info (other than return code)
+ * needs to be parsed.
+ *
+ * flags indicate the type of request buffer and how long to wait
+ * and whether to log NT STATUS code (error) before mapping it to POSIX error
+ *
+ */
+int
+SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
+ struct smb_hdr *in_buf, int flags)
+{
+ int rc;
+ struct kvec iov[1];
+ int resp_buf_type;
+
+ iov[0].iov_base = (char *)in_buf;
+ iov[0].iov_len = in_buf->smb_buf_length + 4;
+ flags |= CIFS_NO_RESP;
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
+#ifdef CONFIG_CIFS_DEBUG2
+ cFYI(1, ("SendRcvNoR flags %d rc %d", flags, rc));
+#endif
+ return rc;
+}
+
int
-SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
- struct kvec *iov, int n_vec, int * pRespBufType /* ret */,
- const int long_op)
+SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
+ struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
+ const int flags)
{
int rc = 0;
+ int long_op;
unsigned int receive_len;
unsigned long timeout;
struct mid_q_entry *midQ;
struct smb_hdr *in_buf = iov[0].iov_base;
-
+
+ long_op = flags & CIFS_TIMEOUT_MASK;
+
*pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */
if ((ses == NULL) || (ses->server == NULL)) {
@@ -485,15 +516,22 @@
if(rc < 0)
goto out;
- if (long_op == -1)
- goto out;
- else if (long_op == 2) /* writes past end of file can take loong time */
+ if (long_op == CIFS_STD_OP)
+ timeout = 15 * HZ;
+ else if (long_op == CIFS_VLONG_OP) /* e.g. slow writes past EOF */
timeout = 180 * HZ;
- else if (long_op == 1)
- timeout = 45 * HZ; /* should be greater than
+ else if (long_op == CIFS_LONG_OP)
+ timeout = 45 * HZ; /* should be greater than
servers oplock break timeout (about 43 seconds) */
- else
- timeout = 15 * HZ;
+ else if (long_op == CIFS_ASYNC_OP)
+ goto out;
+ else if (long_op == CIFS_BLOCKING_OP)
+ timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */
+ else {
+ cERROR(1, ("unknown timeout flag %d", long_op));
+ rc = -EIO;
+ goto out;
+ }
/* wait for 15 seconds or until woken up due to response arriving or
due to last connection to this server being unmounted */
@@ -568,7 +606,6 @@
}
/* BB special case reconnect tid and uid here? */
- /* BB special case Errbadpassword and pwdexpired here */
rc = map_smb_to_linux_error(midQ->resp_buf);
/* convert ByteCount if necessary */
@@ -578,8 +615,10 @@
(2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
BCC(midQ->resp_buf) =
le16_to_cpu(BCC_LE(midQ->resp_buf));
- midQ->resp_buf = NULL; /* mark it so will not be freed
- by DeleteMidQEntry */
+ if ((flags & CIFS_NO_RESP) == 0)
+ midQ->resp_buf = NULL; /* mark it so buf will
+ not be freed by
+ DeleteMidQEntry */
} else {
rc = -EIO;
cFYI(1,("Bad MID state?"));
@@ -667,17 +706,25 @@
if(rc < 0)
goto out;
- if (long_op == -1)
+ if (long_op == CIFS_STD_OP)
+ timeout = 15 * HZ;
+ /* wait for 15 seconds or until woken up due to response arriving or
+ due to last connection to this server being unmounted */
+ else if (long_op == CIFS_ASYNC_OP)
goto out;
- else if (long_op == 2) /* writes past end of file can take loong time */
+ else if (long_op == CIFS_VLONG_OP) /* writes past EOF can be slow */
timeout = 180 * HZ;
- else if (long_op == 1)
- timeout = 45 * HZ; /* should be greater than
+ else if (long_op == CIFS_LONG_OP)
+ timeout = 45 * HZ; /* should be greater than
servers oplock break timeout (about 43 seconds) */
- else
- timeout = 15 * HZ;
- /* wait for 15 seconds or until woken up due to response arriving or
- due to last connection to this server being unmounted */
+ else if (long_op == CIFS_BLOCKING_OP)
+ timeout = 0x7FFFFFFF; /* large but no so large as to wrap */
+ else {
+ cERROR(1, ("unknown timeout flag %d", long_op));
+ rc = -EIO;
+ goto out;
+ }
+
if (signal_pending(current)) {
/* if signal pending do not hold up user for full smb timeout
but we still give response a chance to complete */
@@ -817,7 +864,7 @@
pSMB->hdr.Mid = GetNextMid(ses->server);
return SendReceive(xid, ses, in_buf, out_buf,
- &bytes_returned, 0);
+ &bytes_returned, CIFS_STD_OP);
}
int
@@ -849,7 +896,7 @@
to the same server. We may make this configurable later or
use ses->maxReq */
- rc = wait_for_free_request(ses, 3);
+ rc = wait_for_free_request(ses, CIFS_BLOCKING_OP);
if (rc)
return rc;
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/file.c
+++ linux-source-2.6.22-2.6.22/fs/cifs/file.c
@@ -809,9 +809,9 @@
xid = GetXid();
if (*poffset > file->f_path.dentry->d_inode->i_size)
- long_op = 2; /* writes past end of file can take a long time */
+ long_op = CIFS_VLONG_OP; /* writes past EOF take long time */
else
- long_op = 1;
+ long_op = CIFS_LONG_OP;
for (total_written = 0; write_size > total_written;
total_written += bytes_written) {
@@ -858,7 +858,7 @@
}
} else
*poffset += bytes_written;
- long_op = FALSE; /* subsequent writes fast -
+ long_op = CIFS_STD_OP; /* subsequent writes fast -
15 seconds is plenty */
}
@@ -908,9 +908,9 @@
xid = GetXid();
if (*poffset > file->f_path.dentry->d_inode->i_size)
- long_op = 2; /* writes past end of file can take a long time */
+ long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */
else
- long_op = 1;
+ long_op = CIFS_LONG_OP;
for (total_written = 0; write_size > total_written;
total_written += bytes_written) {
@@ -976,7 +976,7 @@
}
} else
*poffset += bytes_written;
- long_op = FALSE; /* subsequent writes fast -
+ long_op = CIFS_STD_OP; /* subsequent writes fast -
15 seconds is plenty */
}
@@ -1276,7 +1276,7 @@
open_file->netfid,
bytes_to_write, offset,
&bytes_written, iov, n_iov,
- 1);
+ CIFS_LONG_OP);
atomic_dec(&open_file->wrtPending);
if (rc || bytes_written < bytes_to_write) {
cERROR(1,("Write2 ret %d, written = %d",
--- linux-source-2.6.22-2.6.22.orig/fs/cifs/sess.c
+++ linux-source-2.6.22-2.6.22/fs/cifs/sess.c
@@ -488,8 +488,9 @@
BCC_LE(smb_buf) = cpu_to_le16(count);
iov[1].iov_base = str_area;
- iov[1].iov_len = count;
- rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
+ iov[1].iov_len = count;
+ rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type,
+ CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
/* SMB request buf freed in SendReceive2 */
cFYI(1,("ssetup rc from sendrecv2 is %d",rc));
--- linux-source-2.6.22-2.6.22.orig/fs/xfs/linux-2.6/xfs_lrw.c
+++ linux-source-2.6.22-2.6.22/fs/xfs/linux-2.6/xfs_lrw.c
@@ -798,7 +798,7 @@
!capable(CAP_FSETID)) {
error = xfs_write_clear_setuid(xip);
if (likely(!error))
- error = -remove_suid(file->f_path.dentry);
+ error = -remove_suid(&file->f_path);
if (unlikely(error)) {
goto out_unlock_internal;
}
--- linux-source-2.6.22-2.6.22.orig/fs/9p/conv.c
+++ linux-source-2.6.22-2.6.22/fs/9p/conv.c
@@ -742,6 +742,7 @@
if (err) {
kfree(fc);
fc = ERR_PTR(err);
+ goto error;
}
if (buf_check_overflow(bufp)) {
--- linux-source-2.6.22-2.6.22.orig/fs/reiserfs/xattr.c
+++ linux-source-2.6.22-2.6.22/fs/reiserfs/xattr.c
@@ -479,7 +479,7 @@
newattrs.ia_size = buffer_size;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
mutex_lock(&xinode->i_mutex);
- err = notify_change(fp->f_path.dentry, &newattrs);
+ err = notify_change(fp->f_path.dentry, NULL, &newattrs);
if (err)
goto out_filp;
@@ -775,7 +775,7 @@
if (dir->d_inode->i_nlink <= 2) {
root = get_xa_root(inode->i_sb, XATTR_REPLACE);
reiserfs_write_lock_xattrs(inode->i_sb);
- err = vfs_rmdir(root->d_inode, dir);
+ err = vfs_rmdir(root->d_inode, dir, NULL);
reiserfs_write_unlock_xattrs(inode->i_sb);
dput(root);
} else {
@@ -819,7 +819,7 @@
}
if (!S_ISDIR(xafile->d_inode->i_mode))
- err = notify_change(xafile, attrs);
+ err = notify_change(xafile, NULL, attrs);
dput(xafile);
return err;
@@ -871,7 +871,7 @@
goto out_dir;
}
- err = notify_change(dir, attrs);
+ err = notify_change(dir, NULL, attrs);
unlock_kernel();
out_dir:
--- linux-source-2.6.22-2.6.22.orig/fs/reiserfs/file.c
+++ linux-source-2.6.22-2.6.22/fs/reiserfs/file.c
@@ -1335,7 +1335,7 @@
if (count == 0)
goto out;
- res = remove_suid(file->f_path.dentry);
+ res = remove_suid(&file->f_path);
if (res)
goto out;
--- linux-source-2.6.22-2.6.22.orig/fs/dnotify.c
+++ linux-source-2.6.22-2.6.22/fs/dnotify.c
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
int dir_notify_enable __read_mostly = 1;
@@ -66,6 +67,7 @@
struct dnotify_struct **prev;
struct inode *inode;
fl_owner_t id = current->files;
+ struct file *f;
int error = 0;
if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@
prev = &odn->dn_next;
}
+ rcu_read_lock();
+ f = fcheck(fd);
+ rcu_read_unlock();
+ /* we'd lost the race with close(), sod off silently */
+ /* note that inode->i_lock prevents reordering problems
+ * between accesses to descriptor table and ->i_dnotify */
+ if (f != filp)
+ goto out_free;
+
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error)
goto out_free;
--- linux-source-2.6.22-2.6.22.orig/fs/Kconfig
+++ linux-source-2.6.22-2.6.22/fs/Kconfig
@@ -1030,6 +1030,22 @@
endmenu
+menu "Layered filesystems"
+
+config ECRYPT_FS
+ tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && KEYS && CRYPTO && NET
+ help
+ Encrypted filesystem that operates on the VFS layer. See
+ to learn more about
+ eCryptfs. Userspace components are required and can be
+ obtained from .
+
+ To compile this file system support as a module, choose M here: the
+ module will be called ecryptfs.
+
+endmenu
+
menu "Miscellaneous filesystems"
config ADFS_FS
@@ -1082,18 +1098,6 @@
To compile this file system support as a module, choose M here: the
module will be called affs. If unsure, say N.
-config ECRYPT_FS
- tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && KEYS && CRYPTO && NET
- help
- Encrypted filesystem that operates on the VFS layer. See
- to learn more about
- eCryptfs. Userspace components are required and can be
- obtained from .
-
- To compile this file system support as a module, choose M here: the
- module will be called ecryptfs.
-
config HFS_FS
tristate "Apple Macintosh file system support (EXPERIMENTAL)"
depends on BLOCK && EXPERIMENTAL
--- linux-source-2.6.22-2.6.22.orig/fs/exec.c
+++ linux-source-2.6.22-2.6.22/fs/exec.c
@@ -586,18 +586,12 @@
int count;
/*
- * Tell all the sighand listeners that this sighand has
- * been detached. The signalfd_detach() function grabs the
- * sighand lock, if signal listeners are present on the sighand.
- */
- signalfd_detach(tsk);
-
- /*
* If we don't share sighandlers, then we aren't sharing anything
* and we can just re-use it all.
*/
if (atomic_read(&oldsighand->count) <= 1) {
BUG_ON(atomic_read(&sig->count) != 1);
+ signalfd_detach(tsk);
exit_itimers(sig);
return 0;
}
@@ -736,6 +730,7 @@
sig->flags = 0;
no_thread_group:
+ signalfd_detach(tsk);
exit_itimers(sig);
if (leader)
release_task(leader);
@@ -890,9 +885,12 @@
*/
current->mm->task_size = TASK_SIZE;
- if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
- file_permission(bprm->file, MAY_READ) ||
- (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+ if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
+ suid_keys(current);
+ current->mm->dumpable = suid_dumpable;
+ current->pdeath_signal = 0;
+ } else if (file_permission(bprm->file, MAY_READ) ||
+ (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
suid_keys(current);
current->mm->dumpable = suid_dumpable;
}
@@ -983,8 +981,10 @@
{
int unsafe;
- if (bprm->e_uid != current->uid)
+ if (bprm->e_uid != current->uid) {
suid_keys(current);
+ current->pdeath_signal = 0;
+ }
exec_keys(current);
task_lock(current);
@@ -1484,9 +1484,23 @@
return core_waiters;
}
+#define CORE_ENV_MAX_ARGS 8
+
int do_coredump(long signr, int exit_code, struct pt_regs * regs)
{
char corename[CORENAME_MAX_SIZE + 1];
+ char *core_argv[2];
+ char *core_envp[CORE_ENV_MAX_ARGS + 1];
+ /* Gotta love scope... */
+ char core_pid[CORENAME_MAX_SIZE + 1];
+ char core_uid[CORENAME_MAX_SIZE + 1];
+ char core_gid[CORENAME_MAX_SIZE + 1];
+ char core_signal[CORENAME_MAX_SIZE + 1];
+ char core_time[CORENAME_MAX_SIZE + 1];
+ char core_hostname[CORENAME_MAX_SIZE + 1];
+ char core_comm[CORENAME_MAX_SIZE + 1];
+ char core_rlim[CORENAME_MAX_SIZE + 1];
+ int old_rlim = -1;
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
struct inode * inode;
@@ -1495,6 +1509,7 @@
int fsuid = current->fsuid;
int flag = 0;
int ispipe = 0;
+ struct subprocess_info *sub_info = NULL;
audit_core_dumps(signr);
@@ -1522,14 +1537,27 @@
if (retval < 0)
goto fail;
+ /* For piped core's, we rely on the script to limit what it writes
+ * out. Since we aren't writing directly to the fs, we shouldn't
+ * worry too much (and pipe should infer we always want to do the
+ * core). We tell the script the original value for rlim_cur via
+ * env, so it can make intelligent decisions. */
+ if (core_pattern[0] == '|') {
+ old_rlim = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+ current->signal->rlim[RLIMIT_CORE].rlim_cur =
+ 1024 * 1024 * 1024;
+ }
+
/*
* Clear any false indication of pending signals that might
* be seen by the filesystem code called to write the core file.
*/
clear_thread_flag(TIF_SIGPENDING);
- if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
+ if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump &&
+ core_pattern[0] != '|') {
goto fail_unlock;
+ }
/*
* lock_kernel() because format_corename() is controlled by sysctl, which
@@ -1539,8 +1567,43 @@
ispipe = format_corename(corename, core_pattern, signr);
unlock_kernel();
if (ispipe) {
+ int i = 0;
+ struct timeval tv;
+
+#define CORE_ENV_ADD(__buf, __fmt, __arg) \
+do { \
+if (i < CORE_ENV_MAX_ARGS ) { \
+ snprintf(__buf, sizeof(__buf), __fmt, __arg); \
+ core_envp[i++] = __buf; \
+} else \
+ WARN_ON(1); \
+} while(0)
+
+ /* Create the env */
+ CORE_ENV_ADD(core_pid, "CORE_PID=%d", current->tgid);
+ CORE_ENV_ADD(core_uid, "CORE_UID=%d", current->uid);
+ CORE_ENV_ADD(core_gid, "CORE_GID=%d", current->gid);
+ CORE_ENV_ADD(core_signal, "CORE_SIGNAL=%ld", signr);
+ CORE_ENV_ADD(core_comm, "CORE_EXECUTABLE=%s", current->comm);
+ CORE_ENV_ADD(core_rlim, "CORE_REAL_RLIM=%d", old_rlim);
+
+ do_gettimeofday(&tv);
+ CORE_ENV_ADD(core_time, "CORE_TIME=%lu", tv.tv_sec);
+
+ down_read(&uts_sem);
+ CORE_ENV_ADD(core_hostname, "CORE_HOSTNAME=%s", utsname()->nodename);
+ up_read(&uts_sem);
+
+#undef CORE_ENV_ADD
+
+ core_envp[i] = NULL;
+
+ core_argv[0] = corename+1;
+ core_argv[1] = NULL;
+
/* SIGPIPE can happen, but it's just never processed */
- if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
+ if (call_usermodehelper_pipe(core_argv[0], core_argv,
+ core_envp, &file, &sub_info)) {
printk(KERN_INFO "Core dump to %s pipe failed\n",
corename);
goto fail_unlock;
@@ -1554,18 +1617,25 @@
inode = file->f_path.dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
- if (!ispipe && d_unhashed(file->f_path.dentry))
+ if (!sub_info && d_unhashed(file->f_path.dentry))
goto close_fail;
/* AK: actually i see no reason to not allow this for named pipes etc.,
but keep the previous behaviour for now. */
- if (!ispipe && !S_ISREG(inode->i_mode))
+ if (!sub_info && !S_ISREG(inode->i_mode))
+ goto close_fail;
+ /*
+ * Dont allow local users get cute and trick others to coredump
+ * into their pre-created files:
+ */
+ if (inode->i_uid != current->fsuid)
goto close_fail;
if (!file->f_op)
goto close_fail;
if (!file->f_op->write)
goto close_fail;
- if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+ if (!sub_info &&
+ do_truncate(file->f_path.dentry, file->f_path.mnt, 0, 0, file) != 0)
goto close_fail;
retval = binfmt->core_dump(signr, regs, file);
@@ -1574,9 +1644,13 @@
current->signal->group_exit_code |= 0x80;
close_fail:
filp_close(file, NULL);
+ if (sub_info)
+ finish_usermodehelper_pipe(sub_info);
fail_unlock:
current->fsuid = fsuid;
complete_all(&mm->core_done);
+ if (old_rlim >= 0)
+ current->signal->rlim[RLIMIT_CORE].rlim_cur = old_rlim;
fail:
return retval;
}
--- linux-source-2.6.22-2.6.22.orig/fs/ecryptfs/inode.c
+++ linux-source-2.6.22-2.6.22/fs/ecryptfs/inode.c
@@ -280,7 +280,9 @@
int rc = 0;
struct dentry *lower_dir_dentry;
struct dentry *lower_dentry;
+ struct dentry *dentry_save;
struct vfsmount *lower_mnt;
+ struct vfsmount *mnt_save;
char *encoded_name;
unsigned int encoded_namelen;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -308,9 +310,13 @@
}
ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
"= [%d]\n", encoded_name, encoded_namelen);
- lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
- encoded_namelen - 1);
+ dentry_save = nd->dentry;
+ mnt_save = nd->mnt;
+ lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry,
+ (encoded_namelen - 1), nd);
kfree(encoded_name);
+ nd->mnt = mnt_save;
+ nd->dentry = dentry_save;
if (IS_ERR(lower_dentry)) {
ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
rc = PTR_ERR(lower_dentry);
@@ -411,19 +417,24 @@
struct dentry *new_dentry)
{
struct dentry *lower_old_dentry;
+ struct vfsmount *lower_old_mnt;
struct dentry *lower_new_dentry;
+ struct vfsmount *lower_new_mnt;
struct dentry *lower_dir_dentry;
u64 file_size_save;
int rc;
file_size_save = i_size_read(old_dentry->d_inode);
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
+ lower_old_mnt = ecryptfs_dentry_to_lower_mnt(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
+ lower_new_mnt = ecryptfs_dentry_to_lower_mnt(new_dentry);
dget(lower_old_dentry);
dget(lower_new_dentry);
lower_dir_dentry = lock_parent(lower_new_dentry);
- rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
- lower_new_dentry);
+ rc = vfs_link(lower_old_dentry, lower_old_mnt,
+ lower_dir_dentry->d_inode, lower_new_dentry,
+ lower_new_mnt);
if (rc || !lower_new_dentry->d_inode)
goto out_lock;
rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
@@ -448,10 +459,11 @@
{
int rc = 0;
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry);
+ rc = vfs_unlink(lower_dir_inode, lower_dentry, lower_mnt);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -470,6 +482,7 @@
{
int rc;
struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
struct dentry *lower_dir_dentry;
umode_t mode;
char *encoded_symname;
@@ -478,6 +491,7 @@
lower_dentry = ecryptfs_dentry_to_lower(dentry);
dget(lower_dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
lower_dir_dentry = lock_parent(lower_dentry);
mode = S_IALLUGO;
encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
@@ -487,7 +501,7 @@
rc = encoded_symlen;
goto out_lock;
}
- rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
+ rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, lower_mnt,
encoded_symname, mode);
kfree(encoded_symname);
if (rc || !lower_dentry->d_inode)
@@ -509,11 +523,14 @@
{
int rc;
struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
struct dentry *lower_dir_dentry;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
+ rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, lower_mnt,
+ mode);
if (rc || !lower_dentry->d_inode)
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
@@ -532,14 +549,16 @@
static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
struct dentry *lower_dir_dentry;
int rc;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
dget(dentry);
lower_dir_dentry = lock_parent(lower_dentry);
dget(lower_dentry);
- rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+ rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry, lower_mnt);
dput(lower_dentry);
if (!rc)
d_delete(lower_dentry);
@@ -557,11 +576,14 @@
{
int rc;
struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
struct dentry *lower_dir_dentry;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
+ rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, lower_mnt, mode,
+ dev);
if (rc || !lower_dentry->d_inode)
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
@@ -582,24 +604,29 @@
{
int rc;
struct dentry *lower_old_dentry;
+ struct vfsmount *lower_old_mnt;
struct dentry *lower_new_dentry;
+ struct vfsmount *lower_new_mnt;
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
+ lower_old_mnt = ecryptfs_dentry_to_lower_mnt(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
+ lower_new_mnt = ecryptfs_dentry_to_lower_mnt(new_dentry);
dget(lower_old_dentry);
dget(lower_new_dentry);
lower_old_dir_dentry = dget_parent(lower_old_dentry);
lower_new_dir_dentry = dget_parent(lower_new_dentry);
lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
- lower_new_dir_dentry->d_inode, lower_new_dentry);
+ lower_old_mnt, lower_new_dir_dentry->d_inode,
+ lower_new_dentry, lower_new_mnt);
if (rc)
goto out_lock;
- fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir)
- fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
out_lock:
unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
dput(lower_new_dentry->d_parent);
@@ -889,6 +916,7 @@
{
int rc = 0;
struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
struct inode *inode;
struct inode *lower_inode;
struct ecryptfs_crypt_stat *crypt_stat;
@@ -899,11 +927,13 @@
inode = dentry->d_inode;
lower_inode = ecryptfs_inode_to_lower(inode);
lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
mutex_lock(&crypt_stat->cs_mutex);
if (S_ISDIR(dentry->d_inode->i_mode))
crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
- else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
- || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
+ else if (S_ISREG(dentry->d_inode->i_mode)
+ && (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
+ || !(crypt_stat->flags & ECRYPTFS_KEY_VALID))) {
struct vfsmount *lower_mnt;
struct file *lower_file = NULL;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
@@ -954,9 +984,9 @@
if (rc < 0)
goto out;
}
- rc = notify_change(lower_dentry, ia);
+ rc = notify_change(lower_dentry, lower_mnt, ia);
out:
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
return rc;
}
--- linux-source-2.6.22-2.6.22.orig/fs/ecryptfs/dentry.c
+++ linux-source-2.6.22-2.6.22/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@
struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode);
- fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+ fsstack_copy_attr_all(dentry->d_inode, lower_inode);
}
out:
return rc;
--- linux-source-2.6.22-2.6.22.orig/fs/ecryptfs/main.c
+++ linux-source-2.6.22-2.6.22/fs/ecryptfs/main.c
@@ -151,7 +151,7 @@
d_add(dentry, inode);
else
d_instantiate(dentry, inode);
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and
* other metadata */
fsstack_copy_inode_size(inode, lower_inode);
--- linux-source-2.6.22-2.6.22.orig/fs/sysfs/file.c
+++ linux-source-2.6.22-2.6.22/fs/sysfs/file.c
@@ -283,6 +283,7 @@
mutex_lock(&inode->i_mutex);
if (!(set = inode->i_private)) {
if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
+ mutex_unlock(&inode->i_mutex);
error = -ENOMEM;
goto Done;
} else {
@@ -554,7 +555,7 @@
newattrs.ia_mode = (mode & S_IALLUGO) |
(inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- res = notify_change(victim, &newattrs);
+ res = notify_change(victim, NULL, &newattrs);
mutex_unlock(&inode->i_mutex);
}
dput(victim);
--- linux-source-2.6.22-2.6.22.orig/fs/namespace.c
+++ linux-source-2.6.22-2.6.22/fs/namespace.c
@@ -37,7 +37,8 @@
static struct list_head *mount_hashtable __read_mostly;
static int hash_mask __read_mostly, hash_bits __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
-static struct rw_semaphore namespace_sem;
+struct rw_semaphore namespace_sem;
+EXPORT_SYMBOL_GPL(namespace_sem);
/* /sys/fs */
decl_subsys(fs, NULL, NULL);
@@ -1868,3 +1869,30 @@
release_mounts(&umount_list);
kfree(ns);
}
+
+char *d_namespace_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+ char *buf, int buflen)
+{
+ struct vfsmount *rootmnt, *nsrootmnt = NULL;
+ struct dentry *root = NULL;
+ char *res;
+
+ read_lock(¤t->fs->lock);
+ rootmnt = mntget(current->fs->rootmnt);
+ read_unlock(¤t->fs->lock);
+ spin_lock(&vfsmount_lock);
+ if (rootmnt->mnt_ns)
+ nsrootmnt = mntget(rootmnt->mnt_ns->root);
+ spin_unlock(&vfsmount_lock);
+ mntput(rootmnt);
+ if (nsrootmnt)
+ root = dget(nsrootmnt->mnt_root);
+ res = __d_path(dentry, vfsmnt, root, nsrootmnt, buf, buflen, 1);
+ dput(root);
+ mntput(nsrootmnt);
+ /* Prevent empty path for lazily unmounted filesystems. */
+ if (!IS_ERR(res) && *res == '\0')
+ *--res = '.';
+ return res;
+}
+EXPORT_SYMBOL(d_namespace_path);
--- linux-source-2.6.22-2.6.22.orig/fs/ocfs2/file.c
+++ linux-source-2.6.22-2.6.22/fs/ocfs2/file.c
@@ -1353,7 +1353,7 @@
else
src_page = ERR_PTR(-EFAULT);
} else {
- bp->b_src_buf = buf;
+ bp->b_src_buf = (char *)((unsigned long)buf & PAGE_CACHE_MASK);
}
return src_page;
--- linux-source-2.6.22-2.6.22.orig/fs/nfs/inode.c
+++ linux-source-2.6.22-2.6.22/fs/nfs/inode.c
@@ -961,8 +961,8 @@
goto out_changed;
server = NFS_SERVER(inode);
- /* Update the fsid if and only if this is the root directory */
- if (inode == inode->i_sb->s_root->d_inode
+ /* Update the fsid? */
+ if (S_ISDIR(inode->i_mode)
&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
server->fsid = fattr->fsid;
--- linux-source-2.6.22-2.6.22.orig/fs/nfs/write.c
+++ linux-source-2.6.22-2.6.22/fs/nfs/write.c
@@ -710,6 +710,17 @@
}
/*
+ * If the page cache is marked as unsafe or invalid, then we can't rely on
+ * the PageUptodate() flag. In this case, we will need to turn off
+ * write optimisations that depend on the page contents being correct.
+ */
+static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
+{
+ return PageUptodate(page) &&
+ !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
+}
+
+/*
* Update and possibly write a cached page of an NFS file.
*
* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
@@ -730,10 +741,13 @@
(long long)(page_offset(page) +offset));
/* If we're not using byte range locks, and we know the page
- * is entirely in cache, it may be more efficient to avoid
- * fragmenting write requests.
+ * is up to date, it may be more efficient to extend the write
+ * to cover the entire page in order to avoid fragmentation
+ * inefficiencies.
*/
- if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
+ if (nfs_write_pageuptodate(page, inode) &&
+ inode->i_flock == NULL &&
+ !(file->f_mode & O_SYNC)) {
count = max(count + offset, nfs_page_length(page));
offset = 0;
}
--- linux-source-2.6.22-2.6.22.orig/fs/nfs/getroot.c
+++ linux-source-2.6.22-2.6.22/fs/nfs/getroot.c
@@ -175,6 +175,9 @@
path++;
name.len = path - (const char *) name.name;
+ if (name.len > NFS4_MAXNAMLEN)
+ return -ENAMETOOLONG;
+
eat_dot_dir:
while (*path == '/')
path++;
--- linux-source-2.6.22-2.6.22.orig/fs/nfs/client.c
+++ linux-source-2.6.22-2.6.22/fs/nfs/client.c
@@ -433,9 +433,6 @@
*/
static void nfs_destroy_server(struct nfs_server *server)
{
- if (!IS_ERR(server->client_acl))
- rpc_shutdown_client(server->client_acl);
-
if (!(server->flags & NFS_MOUNT_NONLM))
lockd_down(); /* release rpc.lockd */
}
@@ -445,6 +442,7 @@
*/
static int nfs_start_lockd(struct nfs_server *server)
{
+ static int warned;
int error = 0;
if (server->nfs_client->cl_nfsversion > 3)
@@ -453,9 +451,28 @@
goto out;
error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
IPPROTO_TCP : IPPROTO_UDP);
- if (error < 0)
+ if (error < 0) {
+ /*
+ * Ubuntu: fix NFS mounting regression from Edgy->Feisty.
+ * In 2.6.18 and older kernels any failures to start lockd were
+ * ignored. This meant an Edgy user could successfully mount
+ * NFS filesystems without having installed nfs-common.
+ *
+ * This behaviour has been changed in 2.6.19 and later kernels,
+ * and so mounting NFS filesystems without nfs-common fail with
+ * can't read superblock.
+ *
+ * This workaround fixes this by issuing a warning (on the first
+ * lockd start failure), and then allowing the mount to continue
+ * without locking.
+ */
+ if (warned++ == 0) {
+ printk(KERN_ERR "nfs: Starting lockd failed (do you have nfs-common installed?).\n");
+ printk(KERN_ERR "nfs: Continuing anyway, but this workaround will go away soon.\n");
+ }
server->flags |= NFS_MOUNT_NONLM;
- else
+ error = 0;
+ } else
server->destroy = nfs_destroy_server;
out:
return error;
@@ -614,16 +631,6 @@
server->namelen = data->namlen;
/* Create a client RPC handle for the NFSv3 ACL management interface */
nfs_init_server_aclclient(server);
- if (clp->cl_nfsversion == 3) {
- if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
- server->namelen = NFS3_MAXNAMLEN;
- if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
- server->caps |= NFS_CAP_READDIRPLUS;
- } else {
- if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
- server->namelen = NFS2_MAXNAMLEN;
- }
-
dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
return 0;
@@ -781,6 +788,9 @@
if (server->destroy != NULL)
server->destroy(server);
+
+ if (!IS_ERR(server->client_acl))
+ rpc_shutdown_client(server->client_acl);
if (!IS_ERR(server->client))
rpc_shutdown_client(server->client);
@@ -820,6 +830,16 @@
error = nfs_probe_fsinfo(server, mntfh, &fattr);
if (error < 0)
goto error;
+ if (server->nfs_client->rpc_ops->version == 3) {
+ if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+ server->namelen = NFS3_MAXNAMLEN;
+ if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+ server->caps |= NFS_CAP_READDIRPLUS;
+ } else {
+ if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+ server->namelen = NFS2_MAXNAMLEN;
+ }
+
if (!(fattr.valid & NFS_ATTR_FATTR)) {
error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
if (error < 0) {
@@ -1010,6 +1030,9 @@
if (error < 0)
goto error;
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
BUG_ON(!server->nfs_client);
BUG_ON(!server->nfs_client->rpc_ops);
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
@@ -1082,6 +1105,9 @@
if (error < 0)
goto error;
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
dprintk("Referral FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
@@ -1141,6 +1167,9 @@
if (error < 0)
goto out_free_server;
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
dprintk("Cloned FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
--- linux-source-2.6.22-2.6.22.orig/fs/nfs/super.c
+++ linux-source-2.6.22-2.6.22/fs/nfs/super.c
@@ -181,8 +181,8 @@
remove_shrinker(acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
- nfs_unregister_sysctl();
#endif
+ nfs_unregister_sysctl();
unregister_filesystem(&nfs_fs_type);
}
@@ -584,27 +584,71 @@
nfs_initialise_sb(sb);
}
-static int nfs_set_super(struct super_block *s, void *_server)
-{
- struct nfs_server *server = _server;
- int ret;
+#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
- s->s_fs_info = server;
- ret = set_anon_super(s, server);
- if (ret == 0)
- server->s_dev = s->s_dev;
- return ret;
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+{
+ const struct nfs_server *a = s->s_fs_info;
+ const struct rpc_clnt *clnt_a = a->client;
+ const struct rpc_clnt *clnt_b = b->client;
+
+ if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+ goto Ebusy;
+ if (a->nfs_client != b->nfs_client)
+ goto Ebusy;
+ if (a->flags != b->flags)
+ goto Ebusy;
+ if (a->wsize != b->wsize)
+ goto Ebusy;
+ if (a->rsize != b->rsize)
+ goto Ebusy;
+ if (a->acregmin != b->acregmin)
+ goto Ebusy;
+ if (a->acregmax != b->acregmax)
+ goto Ebusy;
+ if (a->acdirmin != b->acdirmin)
+ goto Ebusy;
+ if (a->acdirmax != b->acdirmax)
+ goto Ebusy;
+ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ goto Ebusy;
+ return 1;
+Ebusy:
+ return 0;
+}
+
+struct nfs_sb_mountdata {
+ struct nfs_server *server;
+ int mntflags;
+};
+
+static int nfs_set_super(struct super_block *s, void *data)
+{
+ struct nfs_sb_mountdata *sb_mntdata = data;
+ struct nfs_server *server = sb_mntdata->server;
+ int ret;
+
+ s->s_flags = sb_mntdata->mntflags;
+ s->s_fs_info = server;
+ ret = set_anon_super(s, server);
+ if (ret == 0)
+ server->s_dev = s->s_dev;
+ return ret;
}
static int nfs_compare_super(struct super_block *sb, void *data)
{
- struct nfs_server *server = data, *old = NFS_SB(sb);
-
- if (old->nfs_client != server->nfs_client)
- return 0;
- if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
- return 0;
- return 1;
+ struct nfs_sb_mountdata *sb_mntdata = data;
+ struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
+ int mntflags = sb_mntdata->mntflags;
+
+ if (memcmp(&old->nfs_client->cl_addr,
+ &server->nfs_client->cl_addr,
+ sizeof(old->nfs_client->cl_addr)) != 0)
+ return 0;
+ if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
+ return 0;
+ return nfs_compare_mount_options(sb, server, mntflags);
}
static int nfs_get_sb(struct file_system_type *fs_type,
@@ -615,6 +659,9 @@
struct nfs_fh mntfh;
struct nfs_mount_data *data = raw_data;
struct dentry *mntroot;
+ struct nfs_sb_mountdata sb_mntdata = {
+ .mntflags = flags,
+ };
int error;
/* Validate the mount data */
@@ -629,8 +676,10 @@
goto out_err_noserver;
}
+ sb_mntdata.server = server;
+
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(fs_type, nfs_compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
@@ -643,7 +692,6 @@
if (!s->s_root) {
/* initial superblock/root creation */
- s->s_flags = flags;
nfs_fill_super(s, data);
}
@@ -691,6 +739,9 @@
struct super_block *s;
struct nfs_server *server;
struct dentry *mntroot;
+ struct nfs_sb_mountdata sb_mntdata = {
+ .mntflags = flags,
+ };
int error;
dprintk("--> nfs_xdev_get_sb()\n");
@@ -701,9 +752,10 @@
error = PTR_ERR(server);
goto out_err_noserver;
}
+ sb_mntdata.server = server;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
@@ -716,7 +768,6 @@
if (!s->s_root) {
/* initial superblock/root creation */
- s->s_flags = flags;
nfs_clone_super(s, data->sb);
}
@@ -808,6 +859,9 @@
struct dentry *mntroot;
char *mntpath = NULL, *hostname = NULL, ip_addr[16];
void *p;
+ struct nfs_sb_mountdata sb_mntdata = {
+ .mntflags = flags,
+ };
int error;
if (data == NULL) {
@@ -878,9 +932,10 @@
error = PTR_ERR(server);
goto out_err_noserver;
}
+ sb_mntdata.server = server;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(fs_type, nfs_compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_free;
@@ -893,7 +948,6 @@
if (!s->s_root) {
/* initial superblock/root creation */
- s->s_flags = flags;
nfs4_fill_super(s);
}
@@ -949,6 +1003,9 @@
struct super_block *s;
struct nfs_server *server;
struct dentry *mntroot;
+ struct nfs_sb_mountdata sb_mntdata = {
+ .mntflags = flags,
+ };
int error;
dprintk("--> nfs4_xdev_get_sb()\n");
@@ -959,9 +1016,10 @@
error = PTR_ERR(server);
goto out_err_noserver;
}
+ sb_mntdata.server = server;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
@@ -974,7 +1032,6 @@
if (!s->s_root) {
/* initial superblock/root creation */
- s->s_flags = flags;
nfs4_clone_super(s, data->sb);
}
@@ -1016,6 +1073,9 @@
struct nfs_server *server;
struct dentry *mntroot;
struct nfs_fh mntfh;
+ struct nfs_sb_mountdata sb_mntdata = {
+ .mntflags = flags,
+ };
int error;
dprintk("--> nfs4_referral_get_sb()\n");
@@ -1026,9 +1086,10 @@
error = PTR_ERR(server);
goto out_err_noserver;
}
+ sb_mntdata.server = server;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_err_nosb;
@@ -1041,7 +1102,6 @@
if (!s->s_root) {
/* initial superblock/root creation */
- s->s_flags = flags;
nfs4_fill_super(s);
}
--- linux-source-2.6.22-2.6.22.orig/fs/nfs/dir.c
+++ linux-source-2.6.22-2.6.22/fs/nfs/dir.c
@@ -897,14 +897,13 @@
return (nd->intent.open.flags & O_EXCL) != 0;
}
-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
- /* Revalidate fsid on root dir */
- return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
+ /* Revalidate fsid using the parent directory */
+ return __nfs_revalidate_inode(server, dir);
return 0;
}
@@ -946,7 +945,7 @@
res = ERR_PTR(error);
goto out_unlock;
}
- error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
+ error = nfs_reval_fsid(dir, &fattr);
if (error < 0) {
res = ERR_PTR(error);
goto out_unlock;
@@ -1163,6 +1162,8 @@
}
if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
return NULL;
+ if (name.len > NFS_SERVER(dir)->namelen)
+ return NULL;
/* Note: caller is already holding the dir->i_mutex! */
dentry = d_alloc(parent, &name);
if (dentry == NULL)
--- linux-source-2.6.22-2.6.22.orig/fs/attr.c
+++ linux-source-2.6.22-2.6.22/fs/attr.c
@@ -100,7 +100,8 @@
}
EXPORT_SYMBOL(inode_setattr);
-int notify_change(struct dentry * dentry, struct iattr * attr)
+int notify_change(struct dentry *dentry, struct vfsmount *mnt,
+ struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
mode_t mode;
@@ -143,13 +144,13 @@
down_write(&dentry->d_inode->i_alloc_sem);
if (inode->i_op && inode->i_op->setattr) {
- error = security_inode_setattr(dentry, attr);
+ error = security_inode_setattr(dentry, mnt, attr);
if (!error)
error = inode->i_op->setattr(dentry, attr);
} else {
error = inode_change_ok(inode, attr);
if (!error)
- error = security_inode_setattr(dentry, attr);
+ error = security_inode_setattr(dentry, mnt, attr);
if (!error) {
if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
--- linux-source-2.6.22-2.6.22.orig/fs/jbd/commit.c
+++ linux-source-2.6.22-2.6.22/fs/jbd/commit.c
@@ -887,7 +887,8 @@
journal->j_committing_transaction = NULL;
spin_unlock(&journal->j_state_lock);
- if (commit_transaction->t_checkpoint_list == NULL) {
+ if (commit_transaction->t_checkpoint_list == NULL &&
+ commit_transaction->t_checkpoint_io_list == NULL) {
__journal_drop_transaction(journal, commit_transaction);
} else {
if (journal->j_checkpoint_transactions == NULL) {
--- linux-source-2.6.22-2.6.22.orig/fs/drop_caches.c
+++ linux-source-2.6.22-2.6.22/fs/drop_caches.c
@@ -3,6 +3,7 @@
*/
#include
+#include
#include
#include
#include
@@ -12,7 +13,7 @@
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;
-static void drop_pagecache_sb(struct super_block *sb)
+void drop_pagecache_sb(struct super_block *sb)
{
struct inode *inode;
@@ -24,6 +25,7 @@
}
spin_unlock(&inode_lock);
}
+EXPORT_SYMBOL(drop_pagecache_sb);
void drop_pagecache(void)
{
--- linux-source-2.6.22-2.6.22.orig/fs/minix/itree_v2.c
+++ linux-source-2.6.22-2.6.22/fs/minix/itree_v2.c
@@ -23,12 +23,17 @@
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
+ char b[BDEVNAME_SIZE];
struct super_block *sb = inode->i_sb;
if (block < 0) {
- printk("minix_bmap: block<0\n");
+ printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n",
+ block, bdevname(sb->s_bdev, b));
} else if (block >= (minix_sb(inode->i_sb)->s_max_size/sb->s_blocksize)) {
- printk("minix_bmap: block>big\n");
+ if (printk_ratelimit())
+ printk("MINIX-fs: block_to_path: "
+ "block %ld too big on dev %s\n",
+ block, bdevname(sb->s_bdev, b));
} else if (block < 7) {
offsets[n++] = block;
} else if ((block -= 7) < 256) {
--- linux-source-2.6.22-2.6.22.orig/fs/minix/itree_v1.c
+++ linux-source-2.6.22-2.6.22/fs/minix/itree_v1.c
@@ -23,11 +23,16 @@
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
+ char b[BDEVNAME_SIZE];
if (block < 0) {
- printk("minix_bmap: block<0\n");
+ printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n",
+ block, bdevname(inode->i_sb->s_bdev, b));
} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
- printk("minix_bmap: block>big\n");
+ if (printk_ratelimit())
+ printk("MINIX-fs: block_to_path: "
+ "block %ld too big on dev %s\n",
+ block, bdevname(inode->i_sb->s_bdev, b));
} else if (block < 7) {
offsets[n++] = block;
} else if ((block -= 7) < 512) {
--- linux-source-2.6.22-2.6.22.orig/fs/namei.c
+++ linux-source-2.6.22-2.6.22/fs/namei.c
@@ -296,7 +296,13 @@
*/
int file_permission(struct file *file, int mask)
{
- return permission(file->f_path.dentry->d_inode, mask, NULL);
+ struct nameidata nd;
+
+ nd.dentry = file->f_path.dentry;
+ nd.mnt = file->f_path.mnt;
+ nd.flags = LOOKUP_ACCESS;
+
+ return permission(nd.dentry->d_inode, mask, &nd);
}
/*
@@ -487,7 +493,14 @@
*/
result = d_lookup(parent, name);
if (!result) {
- struct dentry * dentry = d_alloc(parent, name);
+ struct dentry *dentry;
+
+ /* Don't create child dentry for a dead directory. */
+ result = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(dir))
+ goto out_unlock;
+
+ dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
result = dir->i_op->lookup(dir, dentry, nd);
@@ -496,6 +509,7 @@
else
result = dentry;
}
+out_unlock:
mutex_unlock(&dir->i_mutex);
return result;
}
@@ -1124,31 +1138,34 @@
nd->mnt = mntget(fs->rootmnt);
nd->dentry = dget(fs->root);
read_unlock(&fs->lock);
+ } else if (flags & LOOKUP_ONE) {
+ /* nd->mnt and nd->dentry already set, just grab references */
+ mntget(nd->mnt);
+ dget(nd->dentry);
} else if (dfd == AT_FDCWD) {
read_lock(&fs->lock);
nd->mnt = mntget(fs->pwdmnt);
nd->dentry = dget(fs->pwd);
read_unlock(&fs->lock);
} else {
- struct dentry *dentry;
-
file = fget_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
- dentry = file->f_path.dentry;
+ nd->dentry = file->f_path.dentry;
+ nd->mnt = file->f_path.mnt;
retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
+ if (!S_ISDIR(nd->dentry->d_inode->i_mode))
goto fput_fail;
- retval = file_permission(file, MAY_EXEC);
+ retval = vfs_permission(nd, MAY_EXEC);
if (retval)
goto fput_fail;
- nd->mnt = mntget(file->f_path.mnt);
- nd->dentry = dget(dentry);
+ mntget(nd->mnt);
+ dget(nd->dentry);
fput_light(file, fput_needed);
}
@@ -1261,7 +1278,14 @@
dentry = cached_lookup(base, name, nd);
if (!dentry) {
- struct dentry *new = d_alloc(base, name);
+ struct dentry *new;
+
+ /* Don't create child dentry for a dead directory. */
+ dentry = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(inode))
+ goto out;
+
+ new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
if (!new)
goto out;
@@ -1325,7 +1349,8 @@
return 0;
}
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+struct dentry *lookup_one_len_nd(const char *name, struct dentry *base,
+ int len, struct nameidata *nd)
{
int err;
struct qstr this;
@@ -1333,7 +1358,7 @@
err = __lookup_one_len(name, &this, base, len);
if (err)
return ERR_PTR(err);
- return __lookup_hash(&this, base, NULL);
+ return __lookup_hash(&this, base, nd);
}
struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int len)
@@ -1409,6 +1434,10 @@
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(victim->d_name.name, victim->d_inode, dir);
+#if 0
+ if (nd)
+ nd->flags |= LOOKUP_CONTINUE;
+#endif
error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
if (error)
return error;
@@ -1446,6 +1475,8 @@
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
+ if (nd)
+ nd->flags |= LOOKUP_CONTINUE;
return permission(dir,MAY_WRITE | MAY_EXEC, nd);
}
@@ -1521,7 +1552,7 @@
return -EACCES; /* shouldn't it be ENOSYS? */
mode &= S_IALLUGO;
mode |= S_IFREG;
- error = security_inode_create(dir, dentry, mode);
+ error = security_inode_create(dir, dentry, nd ? nd->mnt : NULL, mode);
if (error)
return error;
DQUOT_INIT(dir);
@@ -1543,7 +1574,7 @@
if (S_ISLNK(inode->i_mode))
return -ELOOP;
- if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
+ if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE))
return -EISDIR;
error = vfs_permission(nd, acc_mode);
@@ -1562,7 +1593,7 @@
return -EACCES;
flag &= ~O_TRUNC;
- } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
+ } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
return -EROFS;
/*
* An append-only file must be opened in append mode for writing.
@@ -1598,7 +1629,8 @@
if (!error) {
DQUOT_INIT(inode);
- error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
+ error = do_truncate(dentry, nd->mnt, 0,
+ ATTR_MTIME|ATTR_CTIME, NULL);
}
put_write_access(inode);
if (error)
@@ -1854,7 +1886,8 @@
}
EXPORT_SYMBOL_GPL(lookup_create);
-int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+int vfs_mknod(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt,
+ int mode, dev_t dev)
{
int error = may_create(dir, dentry, NULL);
@@ -1867,7 +1900,7 @@
if (!dir->i_op || !dir->i_op->mknod)
return -EPERM;
- error = security_inode_mknod(dir, dentry, mode, dev);
+ error = security_inode_mknod(dir, dentry, mnt, mode, dev);
if (error)
return error;
@@ -1906,11 +1939,12 @@
error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
break;
case S_IFCHR: case S_IFBLK:
- error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
- new_decode_dev(dev));
+ error = vfs_mknod(nd.dentry->d_inode, dentry, nd.mnt,
+ mode, new_decode_dev(dev));
break;
case S_IFIFO: case S_IFSOCK:
- error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
+ error = vfs_mknod(nd.dentry->d_inode, dentry, nd.mnt,
+ mode, 0);
break;
case S_IFDIR:
error = -EPERM;
@@ -1933,7 +1967,8 @@
return sys_mknodat(AT_FDCWD, filename, mode, dev);
}
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt,
+ int mode)
{
int error = may_create(dir, dentry, NULL);
@@ -1944,7 +1979,7 @@
return -EPERM;
mode &= (S_IRWXUGO|S_ISVTX);
- error = security_inode_mkdir(dir, dentry, mode);
+ error = security_inode_mkdir(dir, dentry, mnt, mode);
if (error)
return error;
@@ -1977,7 +2012,7 @@
if (!IS_POSIXACL(nd.dentry->d_inode))
mode &= ~current->fs->umask;
- error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+ error = vfs_mkdir(nd.dentry->d_inode, dentry, nd.mnt, mode);
dput(dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
@@ -2020,7 +2055,7 @@
spin_unlock(&dcache_lock);
}
-int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+int vfs_rmdir(struct inode *dir, struct dentry *dentry,struct vfsmount *mnt)
{
int error = may_delete(dir, dentry, 1);
@@ -2030,6 +2065,10 @@
if (!dir->i_op || !dir->i_op->rmdir)
return -EPERM;
+ error = security_inode_rmdir(dir, dentry, mnt);
+ if (error)
+ return error;
+
DQUOT_INIT(dir);
mutex_lock(&dentry->d_inode->i_mutex);
@@ -2037,12 +2076,9 @@
if (d_mountpoint(dentry))
error = -EBUSY;
else {
- error = security_inode_rmdir(dir, dentry);
- if (!error) {
- error = dir->i_op->rmdir(dir, dentry);
- if (!error)
- dentry->d_inode->i_flags |= S_DEAD;
- }
+ error = dir->i_op->rmdir(dir, dentry);
+ if (!error)
+ dentry->d_inode->i_flags |= S_DEAD;
}
mutex_unlock(&dentry->d_inode->i_mutex);
if (!error) {
@@ -2084,7 +2120,7 @@
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
- error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ error = vfs_rmdir(nd.dentry->d_inode, dentry, nd.mnt);
dput(dentry);
exit2:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
@@ -2100,7 +2136,7 @@
return do_rmdir(AT_FDCWD, pathname);
}
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt)
{
int error = may_delete(dir, dentry, 0);
@@ -2116,7 +2152,7 @@
if (d_mountpoint(dentry))
error = -EBUSY;
else {
- error = security_inode_unlink(dir, dentry);
+ error = security_inode_unlink(dir, dentry, mnt);
if (!error)
error = dir->i_op->unlink(dir, dentry);
}
@@ -2164,7 +2200,7 @@
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
- error = vfs_unlink(nd.dentry->d_inode, dentry);
+ error = vfs_unlink(nd.dentry->d_inode, dentry, nd.mnt);
exit2:
dput(dentry);
}
@@ -2199,7 +2235,8 @@
return do_unlinkat(AT_FDCWD, pathname);
}
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
+int vfs_symlink(struct inode *dir, struct dentry *dentry, struct vfsmount *mnt,
+ const char *oldname, int mode)
{
int error = may_create(dir, dentry, NULL);
@@ -2209,7 +2246,7 @@
if (!dir->i_op || !dir->i_op->symlink)
return -EPERM;
- error = security_inode_symlink(dir, dentry, oldname);
+ error = security_inode_symlink(dir, dentry, mnt, oldname);
if (error)
return error;
@@ -2245,7 +2282,8 @@
if (IS_ERR(dentry))
goto out_unlock;
- error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
+ error = vfs_symlink(nd.dentry->d_inode, dentry, nd.mnt, from,
+ S_IALLUGO);
dput(dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
@@ -2262,7 +2300,7 @@
return sys_symlinkat(oldname, AT_FDCWD, newname);
}
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+int vfs_link(struct dentry *old_dentry, struct vfsmount *old_mnt, struct inode *dir, struct dentry *new_dentry, struct vfsmount *new_mnt)
{
struct inode *inode = old_dentry->d_inode;
int error;
@@ -2287,7 +2325,8 @@
if (S_ISDIR(old_dentry->d_inode->i_mode))
return -EPERM;
- error = security_inode_link(old_dentry, dir, new_dentry);
+ error = security_inode_link(old_dentry, old_mnt, dir, new_dentry,
+ new_mnt);
if (error)
return error;
@@ -2340,7 +2379,8 @@
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto out_unlock;
- error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ error = vfs_link(old_nd.dentry, old_nd.mnt, nd.dentry->d_inode,
+ new_dentry, nd.mnt);
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
@@ -2392,7 +2432,8 @@
* locking].
*/
static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct vfsmount *old_mnt, struct inode *new_dir,
+ struct dentry *new_dentry, struct vfsmount *new_mnt)
{
int error = 0;
struct inode *target;
@@ -2407,7 +2448,8 @@
return error;
}
- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
+ error = security_inode_rename(old_dir, old_dentry, old_mnt,
+ new_dir, new_dentry, new_mnt);
if (error)
return error;
@@ -2435,12 +2477,14 @@
}
static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct vfsmount *old_mnt, struct inode *new_dir,
+ struct dentry *new_dentry, struct vfsmount *new_mnt)
{
struct inode *target;
int error;
- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
+ error = security_inode_rename(old_dir, old_dentry, old_mnt,
+ new_dir, new_dentry, new_mnt);
if (error)
return error;
@@ -2463,7 +2507,8 @@
}
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
+ struct vfsmount *old_mnt, struct inode *new_dir,
+ struct dentry *new_dentry, struct vfsmount *new_mnt)
{
int error;
int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
@@ -2492,9 +2537,11 @@
old_name = fsnotify_oldname_init(old_dentry->d_name.name);
if (is_dir)
- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
+ error = vfs_rename_dir(old_dir, old_dentry, old_mnt,
+ new_dir, new_dentry, new_mnt);
else
- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+ error = vfs_rename_other(old_dir, old_dentry, old_mnt,
+ new_dir, new_dentry, new_mnt);
if (!error) {
const char *new_name = old_dentry->d_name.name;
fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
@@ -2566,8 +2613,8 @@
if (new_dentry == trap)
goto exit5;
- error = vfs_rename(old_dir->d_inode, old_dentry,
- new_dir->d_inode, new_dentry);
+ error = vfs_rename(old_dir->d_inode, old_dentry, oldnd.mnt,
+ new_dir->d_inode, new_dentry, newnd.mnt);
exit5:
dput(new_dentry);
exit4:
@@ -2766,7 +2813,7 @@
EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
EXPORT_SYMBOL(getname);
EXPORT_SYMBOL(lock_rename);
-EXPORT_SYMBOL(lookup_one_len);
+EXPORT_SYMBOL(lookup_one_len_nd);
EXPORT_SYMBOL(page_follow_link_light);
EXPORT_SYMBOL(page_put_link);
EXPORT_SYMBOL(page_readlink);
--- linux-source-2.6.22-2.6.22.orig/fs/utimes.c
+++ linux-source-2.6.22-2.6.22/fs/utimes.c
@@ -38,6 +38,19 @@
#endif
+static bool nsec_special(long nsec)
+{
+ return nsec == UTIME_OMIT || nsec == UTIME_NOW;
+}
+
+static bool nsec_valid(long nsec)
+{
+ if (nsec_special(nsec))
+ return true;
+
+ return nsec >= 0 && nsec <= 999999999;
+}
+
/* If times==NULL, set access and modification to current time,
* must be owner or have write permission.
* Else, update from *times, must be owner or super user.
@@ -46,12 +59,17 @@
{
int error;
struct nameidata nd;
- struct dentry *dentry;
+ struct path path;
struct inode *inode;
struct iattr newattrs;
struct file *f = NULL;
error = -EINVAL;
+ if (times && (!nsec_valid(times[0].tv_nsec) ||
+ !nsec_valid(times[1].tv_nsec))) {
+ goto out;
+ }
+
if (flags & ~AT_SYMLINK_NOFOLLOW)
goto out;
@@ -64,16 +82,17 @@
f = fget(dfd);
if (!f)
goto out;
- dentry = f->f_path.dentry;
+ path = f->f_path;
} else {
error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
if (error)
goto out;
- dentry = nd.dentry;
+ path.dentry = nd.dentry;
+ path.mnt = nd.mnt;
}
- inode = dentry->d_inode;
+ inode = path.dentry->d_inode;
error = -EROFS;
if (IS_RDONLY(inode))
@@ -101,7 +120,15 @@
newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
newattrs.ia_valid |= ATTR_MTIME_SET;
}
- } else {
+ }
+
+ /*
+ * If times is NULL or both times are either UTIME_OMIT or
+ * UTIME_NOW, then need to check permissions, because
+ * inode_change_ok() won't do it.
+ */
+ if (!times || (nsec_special(times[0].tv_nsec) &&
+ nsec_special(times[1].tv_nsec))) {
error = -EACCES;
if (IS_IMMUTABLE(inode))
goto dput_and_out;
@@ -118,7 +145,7 @@
}
}
mutex_lock(&inode->i_mutex);
- error = notify_change(dentry, &newattrs);
+ error = notify_change(path.dentry, path.mnt, &newattrs);
mutex_unlock(&inode->i_mutex);
dput_and_out:
if (f)
--- linux-source-2.6.22-2.6.22.orig/fs/ext3/namei.c
+++ linux-source-2.6.22-2.6.22/fs/ext3/namei.c
@@ -140,7 +140,8 @@
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@
ext3fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1081,6 +1114,10 @@
}
#ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext3_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1099,6 +1136,10 @@
return (struct ext3_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1121,6 +1162,11 @@
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1132,7 +1178,7 @@
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext3_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1160,8 +1206,19 @@
count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/debug_fs.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
#include
#include "dlm_internal.h"
+#include "lock.h"
#define DLM_DEBUG_BUF_LEN 4096
static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@
struct rsb_iter {
int entry;
+ int master;
+ int header;
struct dlm_ls *ls;
struct list_head *next;
struct dlm_rsb *rsb;
@@ -85,6 +88,8 @@
struct dlm_lkb *lkb;
int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
+ lock_rsb(res);
+
seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
for (i = 0; i < res->res_length; i++) {
if (isprint(res->res_name[i]))
@@ -151,6 +156,59 @@
seq_printf(s, "\n");
}
out:
+ unlock_rsb(res);
+ return 0;
+}
+
+static void print_master_lock(struct seq_file *s, struct dlm_lkb *lkb,
+ struct dlm_rsb *r)
+{
+ struct dlm_user_args *ua;
+ unsigned int waiting = 0;
+ uint64_t xid = 0;
+
+ if (lkb->lkb_flags & DLM_IFL_USER) {
+ ua = (struct dlm_user_args *) lkb->lkb_astparam;
+ if (ua)
+ xid = ua->xid;
+ }
+
+ if (lkb->lkb_timestamp)
+ waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+
+ /* id nodeid remid pid xid flags sts grmode rqmode time_ms len name */
+
+ seq_printf(s, "%x %d %x %u %llu %x %d %d %d %u %d \"%s\"\n",
+ lkb->lkb_id,
+ lkb->lkb_nodeid,
+ lkb->lkb_remid,
+ lkb->lkb_ownpid,
+ (unsigned long long)xid,
+ lkb->lkb_exflags,
+ lkb->lkb_status,
+ lkb->lkb_grmode,
+ lkb->lkb_rqmode,
+ waiting,
+ r->res_length,
+ r->res_name);
+}
+
+static int print_master_resource(struct dlm_rsb *r, struct seq_file *s)
+{
+ struct dlm_lkb *lkb;
+
+ lock_rsb(r);
+
+ list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+ print_master_lock(s, lkb, r);
+
+ list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+ print_master_lock(s, lkb, r);
+
+ list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+ print_master_lock(s, lkb, r);
+
+ unlock_rsb(r);
return 0;
}
@@ -166,6 +224,9 @@
read_lock(&ls->ls_rsbtbl[i].lock);
if (!list_empty(&ls->ls_rsbtbl[i].list)) {
ri->next = ls->ls_rsbtbl[i].list.next;
+ ri->rsb = list_entry(ri->next, struct dlm_rsb,
+ res_hashchain);
+ dlm_hold_rsb(ri->rsb);
read_unlock(&ls->ls_rsbtbl[i].lock);
break;
}
@@ -176,6 +237,7 @@
if (ri->entry >= ls->ls_rsbtbl_size)
return 1;
} else {
+ struct dlm_rsb *old = ri->rsb;
i = ri->entry;
read_lock(&ls->ls_rsbtbl[i].lock);
ri->next = ri->next->next;
@@ -184,11 +246,14 @@
ri->next = NULL;
ri->entry++;
read_unlock(&ls->ls_rsbtbl[i].lock);
+ dlm_put_rsb(old);
goto top;
}
+ ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+ dlm_hold_rsb(ri->rsb);
read_unlock(&ls->ls_rsbtbl[i].lock);
+ dlm_put_rsb(old);
}
- ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
return 0;
}
@@ -202,7 +267,7 @@
{
struct rsb_iter *ri;
- ri = kmalloc(sizeof *ri, GFP_KERNEL);
+ ri = kzalloc(sizeof *ri, GFP_KERNEL);
if (!ri)
return NULL;
@@ -260,7 +325,17 @@
{
struct rsb_iter *ri = iter_ptr;
- print_resource(ri->rsb, file);
+ if (ri->master) {
+ if (ri->header) {
+ seq_printf(file, "id nodeid remid pid xid flags sts "
+ "grmode rqmode time_ms len name\n");
+ ri->header = 0;
+ }
+ if (is_master(ri->rsb))
+ print_master_resource(ri->rsb, file);
+ } else {
+ print_resource(ri->rsb, file);
+ }
return 0;
}
@@ -296,6 +371,83 @@
};
/*
+ * Dump master lock state
+ */
+
+static struct rsb_iter *master_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+ struct rsb_iter *ri;
+
+ ri = kzalloc(sizeof *ri, GFP_KERNEL);
+ if (!ri)
+ return NULL;
+
+ ri->ls = ls;
+ ri->entry = 0;
+ ri->next = NULL;
+ ri->master = 1;
+
+ if (*pos == 0)
+ ri->header = 1;
+
+ if (rsb_iter_next(ri)) {
+ rsb_iter_free(ri);
+ return NULL;
+ }
+
+ return ri;
+}
+
+static void *master_seq_start(struct seq_file *file, loff_t *pos)
+{
+ struct rsb_iter *ri;
+ loff_t n = *pos;
+
+ ri = master_iter_init(file->private, pos);
+ if (!ri)
+ return NULL;
+
+ while (n--) {
+ if (rsb_iter_next(ri)) {
+ rsb_iter_free(ri);
+ return NULL;
+ }
+ }
+
+ return ri;
+}
+
+static struct seq_operations master_seq_ops = {
+ .start = master_seq_start,
+ .next = rsb_seq_next,
+ .stop = rsb_seq_stop,
+ .show = rsb_seq_show,
+};
+
+static int master_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &master_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private;
+
+ return 0;
+}
+
+static const struct file_operations master_fops = {
+ .owner = THIS_MODULE,
+ .open = master_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+/*
* dump lkb's on the ls_waiters list
*/
@@ -362,6 +514,20 @@
return -ENOMEM;
}
+ memset(name, 0, sizeof(name));
+ snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_master", ls->ls_name);
+
+ ls->ls_debug_master_dentry = debugfs_create_file(name,
+ S_IFREG | S_IRUGO,
+ dlm_root,
+ ls,
+ &master_fops);
+ if (!ls->ls_debug_master_dentry) {
+ debugfs_remove(ls->ls_debug_waiters_dentry);
+ debugfs_remove(ls->ls_debug_rsb_dentry);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -371,6 +537,8 @@
debugfs_remove(ls->ls_debug_rsb_dentry);
if (ls->ls_debug_waiters_dentry)
debugfs_remove(ls->ls_debug_waiters_dentry);
+ if (ls->ls_debug_master_dentry)
+ debugfs_remove(ls->ls_debug_master_dentry);
}
int dlm_register_debugfs(void)
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/lockspace.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@
else
kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
+ log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+
+ /* dlm_controld will see the uevent, do the necessary group management
+ and then write to sysfs to wake us */
+
error = wait_event_interruptible(ls->ls_uevent_wait,
test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+
+ log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
+
if (error)
goto out;
error = ls->ls_uevent_result;
out:
+ if (error)
+ log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+ error, ls->ls_uevent_result);
return error;
}
@@ -234,8 +245,13 @@
struct dlm_ls *ls;
while (!kthread_should_stop()) {
- list_for_each_entry(ls, &lslist, ls_list)
- dlm_scan_rsbs(ls);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ if (dlm_lock_recovery_try(ls)) {
+ dlm_scan_rsbs(ls);
+ dlm_scan_timeout(ls);
+ dlm_unlock_recovery(ls);
+ }
+ }
schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
@@ -395,6 +411,7 @@
{
struct dlm_ls *ls;
int i, size, error = -ENOMEM;
+ int do_unreg = 0;
if (namelen > DLM_LOCKSPACE_LEN)
return -EINVAL;
@@ -417,11 +434,22 @@
goto out;
memcpy(ls->ls_name, name, namelen);
ls->ls_namelen = namelen;
- ls->ls_exflags = flags;
ls->ls_lvblen = lvblen;
ls->ls_count = 0;
ls->ls_flags = 0;
+ if (flags & DLM_LSFL_TIMEWARN)
+ set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+ if (flags & DLM_LSFL_FS)
+ ls->ls_allocation = GFP_NOFS;
+ else
+ ls->ls_allocation = GFP_KERNEL;
+
+ /* ls_exflags are forced to match among nodes, and we don't
+ need to require all nodes to have TIMEWARN or FS set */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
@@ -461,6 +489,8 @@
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
+ INIT_LIST_HEAD(&ls->ls_timeout);
+ mutex_init(&ls->ls_timeout_mutex);
INIT_LIST_HEAD(&ls->ls_nodes);
INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
+ init_completion(&ls->ls_members_done);
+ ls->ls_members_result = -1;
ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@
error = dlm_recoverd_start(ls);
if (error) {
log_error(ls, "can't start dlm_recoverd %d", error);
- goto out_rcomfree;
+ goto out_delist;
}
- dlm_create_debug_file(ls);
-
error = kobject_setup(ls);
if (error)
- goto out_del;
+ goto out_stop;
error = kobject_register(&ls->ls_kobj);
if (error)
- goto out_del;
+ goto out_stop;
+
+ /* let kobject handle freeing of ls if there's an error */
+ do_unreg = 1;
+
+ /* This uevent triggers dlm_controld in userspace to add us to the
+ group of nodes that are members of this lockspace (managed by the
+ cluster infrastructure.) Once it's done that, it tells us who the
+ current lockspace members are (via configfs) and then tells the
+ lockspace to start running (via sysfs) in dlm_ls_start(). */
error = do_uevent(ls, 1);
if (error)
- goto out_unreg;
+ goto out_stop;
+
+ wait_for_completion(&ls->ls_members_done);
+ error = ls->ls_members_result;
+ if (error)
+ goto out_members;
+
+ dlm_create_debug_file(ls);
+
+ log_debug(ls, "join complete");
*lockspace = ls;
return 0;
- out_unreg:
- kobject_unregister(&ls->ls_kobj);
- out_del:
- dlm_delete_debug_file(ls);
+ out_members:
+ do_uevent(ls, 0);
+ dlm_clear_members(ls);
+ kfree(ls->ls_node_array);
+ out_stop:
dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
spin_lock(&lslist_lock);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@
out_rsbfree:
kfree(ls->ls_rsbtbl);
out_lsfree:
- kfree(ls);
+ if (do_unreg)
+ kobject_unregister(&ls->ls_kobj);
+ else
+ kfree(ls);
out:
module_put(THIS_MODULE);
return error;
@@ -570,6 +622,8 @@
error = new_lockspace(name, namelen, lockspace, flags, lvblen);
if (!error)
ls_count++;
+ else if (!ls_count)
+ threads_stop();
out:
mutex_unlock(&ls_lock);
return error;
@@ -696,7 +750,7 @@
dlm_clear_members_gone(ls);
kfree(ls->ls_node_array);
kobject_unregister(&ls->ls_kobj);
- /* The ls structure will be freed when the kobject is done with */
+ /* The ls structure will be freed when the kobject is done with */
mutex_lock(&ls_lock);
ls_count--;
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/config.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/config.c
@@ -90,6 +90,7 @@
unsigned int cl_scan_secs;
unsigned int cl_log_debug;
unsigned int cl_protocol;
+ unsigned int cl_timewarn_cs;
};
enum {
@@ -103,6 +104,7 @@
CLUSTER_ATTR_SCAN_SECS,
CLUSTER_ATTR_LOG_DEBUG,
CLUSTER_ATTR_PROTOCOL,
+ CLUSTER_ATTR_TIMEWARN_CS,
};
struct cluster_attribute {
@@ -162,6 +164,7 @@
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@
[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+ [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
NULL,
};
@@ -429,6 +433,8 @@
cl->cl_toss_secs = dlm_config.ci_toss_secs;
cl->cl_scan_secs = dlm_config.ci_scan_secs;
cl->cl_log_debug = dlm_config.ci_log_debug;
+ cl->cl_protocol = dlm_config.ci_protocol;
+ cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@
static struct space *get_space(char *name)
{
+ struct config_item *i;
+
if (!space_list)
return NULL;
- return to_space(config_group_find_obj(space_list, name));
+
+ down(&space_list->cg_subsys->su_sem);
+ i = config_group_find_obj(space_list, name);
+ up(&space_list->cg_subsys->su_sem);
+
+ return to_space(i);
}
static void put_space(struct space *sp)
@@ -776,20 +789,20 @@
if (cm->nodeid != nodeid)
continue;
found = 1;
+ config_item_get(i);
break;
} else {
if (!cm->addr_count ||
memcmp(cm->addr[0], addr, sizeof(*addr)))
continue;
found = 1;
+ config_item_get(i);
break;
}
}
up(&clusters_root.subsys.su_sem);
- if (found)
- config_item_get(i);
- else
+ if (!found)
cm = NULL;
return cm;
}
@@ -909,6 +922,7 @@
#define DEFAULT_SCAN_SECS 5
#define DEFAULT_LOG_DEBUG 0
#define DEFAULT_PROTOCOL 0
+#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@
.ci_toss_secs = DEFAULT_TOSS_SECS,
.ci_scan_secs = DEFAULT_SCAN_SECS,
.ci_log_debug = DEFAULT_LOG_DEBUG,
- .ci_protocol = DEFAULT_PROTOCOL
+ .ci_protocol = DEFAULT_PROTOCOL,
+ .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
};
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/Makefile
+++ linux-source-2.6.22-2.6.22/fs/dlm/Makefile
@@ -8,6 +8,7 @@
member.o \
memory.o \
midcomms.o \
+ netlink.o \
lowcomms.o \
rcom.o \
recover.o \
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/dlm_internal.h
+++ linux-source-2.6.22-2.6.22/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@
void *bastaddr;
int mode;
struct dlm_lksb *lksb;
+ unsigned long timeout;
};
@@ -213,6 +214,9 @@
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
+#define DLM_IFL_WATCH_TIMEWARN 0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
@@ -243,6 +247,9 @@
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_astqueue; /* need ast to be sent */
struct list_head lkb_ownqueue; /* list of locks for a process */
+ struct list_head lkb_time_list;
+ unsigned long lkb_timestamp;
+ unsigned long lkb_timeout_cs;
char *lkb_lvbptr;
struct dlm_lksb *lkb_lksb; /* caller's status block */
@@ -447,12 +454,16 @@
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
+ struct mutex ls_timeout_mutex;
+ struct list_head ls_timeout;
+
struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */
int ls_low_nodeid;
int ls_total_weight;
int *ls_node_array;
+ gfp_t ls_allocation;
struct dlm_rsb ls_stub_rsb; /* for returning errors */
struct dlm_lkb ls_stub_lkb; /* for returning errors */
@@ -460,9 +471,12 @@
struct dentry *ls_debug_rsb_dentry; /* debugfs */
struct dentry *ls_debug_waiters_dentry; /* debugfs */
+ struct dentry *ls_debug_master_dentry; /* debugfs */
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
+ struct completion ls_members_done;
+ int ls_members_result;
struct miscdevice ls_device;
@@ -472,6 +486,7 @@
struct task_struct *ls_recoverd_task;
struct mutex ls_recoverd_active;
spinlock_t ls_recover_lock;
+ unsigned long ls_recover_begin; /* jiffies timestamp */
uint32_t ls_recover_status; /* DLM_RS_ */
uint64_t ls_recover_seq;
struct dlm_recover *ls_recover_args;
@@ -501,6 +516,7 @@
#define LSFL_RCOM_READY 3
#define LSFL_RCOM_WAIT 4
#define LSFL_UEVENT_WAIT 5
+#define LSFL_TIMEWARN 6
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@
void __user *castaddr;
void __user *bastparam;
void __user *bastaddr;
+ uint64_t xid;
};
#define DLM_PROC_FLAGS_CLOSING 1
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/recoverd.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@
dlm_clear_members_gone(ls);
+ dlm_adjust_timeouts(ls);
+
error = enable_locking(ls, rv->seq);
if (error) {
log_debug(ls, "enable_locking failed %d", error);
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/lock.h
+++ linux-source-2.6.22-2.6.22/fs/dlm/lock.h
@@ -1,7 +1,7 @@
/******************************************************************************
*******************************************************************************
**
-** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@
void dlm_hold_rsb(struct dlm_rsb *r);
int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
int dlm_purge_locks(struct dlm_ls *ls);
void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
- uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+ uint32_t flags, void *name, unsigned int namelen,
+ unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
- int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+ unsigned long timeout_cs);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid);
int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
static inline int is_master(struct dlm_rsb *r)
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/lock.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/lock.c
@@ -82,10 +82,13 @@
static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
static int send_remove(struct dlm_rsb *r);
static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
struct dlm_message *ms);
static int receive_extralen(struct dlm_message *ms);
static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
/*
* Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@
/* Threads cannot use the lockspace while it's being recovered */
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
{
down_read(&ls->ls_in_recovery);
}
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
{
up_read(&ls->ls_in_recovery);
}
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
{
return down_read_trylock(&ls->ls_in_recovery);
}
@@ -286,8 +289,22 @@
if (is_master_copy(lkb))
return;
+ del_timeout(lkb);
+
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+ /* if the operation was a cancel, then return -DLM_ECANCEL, if a
+ timeout caused the cancel then return -ETIMEDOUT */
+ if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+ lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+ rv = -ETIMEDOUT;
+ }
+
+ if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+ lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+ rv = -EDEADLK;
+ }
+
lkb->lkb_lksb->sb_status = rv;
lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
@@ -581,6 +598,7 @@
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+ INIT_LIST_HEAD(&lkb->lkb_time_list);
get_random_bytes(&bucket, sizeof(bucket));
bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@
{
int i;
- if (dlm_locking_stopped(ls))
- return;
-
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
shrink_bucket(ls, i);
+ if (dlm_locking_stopped(ls))
+ break;
cond_resched();
}
}
+static void add_timeout(struct dlm_lkb *lkb)
+{
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+ if (is_master_copy(lkb)) {
+ lkb->lkb_timestamp = jiffies;
+ return;
+ }
+
+ if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+ !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+ lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+ goto add_it;
+ }
+ if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+ goto add_it;
+ return;
+
+ add_it:
+ DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+ mutex_lock(&ls->ls_timeout_mutex);
+ hold_lkb(lkb);
+ lkb->lkb_timestamp = jiffies;
+ list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+ mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+static void del_timeout(struct dlm_lkb *lkb)
+{
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+ mutex_lock(&ls->ls_timeout_mutex);
+ if (!list_empty(&lkb->lkb_time_list)) {
+ list_del_init(&lkb->lkb_time_list);
+ unhold_lkb(lkb);
+ }
+ mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+ lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex
+ and then lock rsb because of lock ordering in add_timeout. We may need
+ to specify some special timeout-related bits in the lkb that are just to
+ be accessed under the timeout_mutex. */
+
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+ struct dlm_rsb *r;
+ struct dlm_lkb *lkb;
+ int do_cancel, do_warn;
+
+ for (;;) {
+ if (dlm_locking_stopped(ls))
+ break;
+
+ do_cancel = 0;
+ do_warn = 0;
+ mutex_lock(&ls->ls_timeout_mutex);
+ list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+
+ if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+ time_after_eq(jiffies, lkb->lkb_timestamp +
+ lkb->lkb_timeout_cs * HZ/100))
+ do_cancel = 1;
+
+ if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+ time_after_eq(jiffies, lkb->lkb_timestamp +
+ dlm_config.ci_timewarn_cs * HZ/100))
+ do_warn = 1;
+
+ if (!do_cancel && !do_warn)
+ continue;
+ hold_lkb(lkb);
+ break;
+ }
+ mutex_unlock(&ls->ls_timeout_mutex);
+
+ if (!do_cancel && !do_warn)
+ break;
+
+ r = lkb->lkb_resource;
+ hold_rsb(r);
+ lock_rsb(r);
+
+ if (do_warn) {
+ /* clear flag so we only warn once */
+ lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+ if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+ del_timeout(lkb);
+ dlm_timeout_warn(lkb);
+ }
+
+ if (do_cancel) {
+ log_debug(ls, "timeout cancel %x node %d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+ lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+ lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+ del_timeout(lkb);
+ _cancel_lock(r, lkb);
+ }
+
+ unlock_rsb(r);
+ unhold_rsb(r);
+ dlm_put_lkb(lkb);
+ }
+}
+
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+ dlm_recoverd before checking/setting ls_recover_begin. */
+
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+ struct dlm_lkb *lkb;
+ long adj = jiffies - ls->ls_recover_begin;
+
+ ls->ls_recover_begin = 0;
+ mutex_lock(&ls->ls_timeout_mutex);
+ list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+ lkb->lkb_timestamp += adj;
+ mutex_unlock(&ls->ls_timeout_mutex);
+}
+
/* lkb is master or local copy */
static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@
* queue for one resource. The granted mode of each lock blocks the requested
* mode of the other lock."
*
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
+ * convert queue from being granted, then deadlk/demote lkb.
*
* Example:
* Granted Queue: empty
@@ -1287,41 +1424,52 @@
*
* The first lock can't be granted because of the granted mode of the second
* lock and the second lock can't be granted because it's not first in the
- * list. We demote the granted mode of the second lock (the lkb passed to this
- * function).
+ * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ * blocked by the granted mode of lkb2, and there was nothing on the
+ * granted queue preventing lkb1 from being granted immediately, i.e.
+ * lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away. It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
*
- * After the resolution, the "grant pending" function needs to go back and try
- * to grant locks on the convert queue again since the first lock can now be
- * granted.
+ * Now, we detect and take action on conversion deadlocks immediately when
+ * they're created, even if they may not be immediately consequential. If
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
*/
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
{
- struct dlm_lkb *this, *first = NULL, *self = NULL;
+ struct dlm_lkb *lkb1;
+ int lkb_is_ahead = 0;
- list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
- if (!first)
- first = this;
- if (this == lkb) {
- self = lkb;
+ list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
+ if (lkb1 == lkb2) {
+ lkb_is_ahead = 1;
continue;
}
- if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
- return 1;
- }
-
- /* if lkb is on the convert queue and is preventing the first
- from being granted, then there's deadlock and we demote lkb.
- multiple converting locks may need to do this before the first
- converting lock can be granted. */
-
- if (self && self != first) {
- if (!modes_compat(lkb, first) &&
- !queue_conflict(&rsb->res_grantqueue, first))
- return 1;
+ if (!lkb_is_ahead) {
+ if (!modes_compat(lkb2, lkb1))
+ return 1;
+ } else {
+ if (!modes_compat(lkb2, lkb1) &&
+ !modes_compat(lkb1, lkb2))
+ return 1;
+ }
}
-
return 0;
}
@@ -1450,42 +1598,57 @@
if (!now && !conv && list_empty(&r->res_convertqueue) &&
first_in_list(lkb, &r->res_waitqueue))
return 1;
-
out:
- /*
- * The following, enabled by CONVDEADLK, departs from VMS.
- */
-
- if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
- conversion_deadlock_detect(r, lkb)) {
- lkb->lkb_grmode = DLM_LOCK_NL;
- lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
- }
-
return 0;
}
-/*
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
- * simple way to provide a big optimization to applications that can use them.
- */
-
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
+ int *err)
{
- uint32_t flags = lkb->lkb_exflags;
int rv;
int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+ int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+
+ if (err)
+ *err = 0;
rv = _can_be_granted(r, lkb, now);
if (rv)
goto out;
- if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+ /*
+ * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+ * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+ * cancels one of the locks.
+ */
+
+ if (is_convert && can_be_queued(lkb) &&
+ conversion_deadlock_detect(r, lkb)) {
+ if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+ lkb->lkb_grmode = DLM_LOCK_NL;
+ lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+ } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+ if (err)
+ *err = -EDEADLK;
+ else {
+ log_print("can_be_granted deadlock %x now %d",
+ lkb->lkb_id, now);
+ dlm_dump_rsb(r);
+ }
+ }
goto out;
+ }
- if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+ /*
+ * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+ * to grant a request in a mode other than the normal rqmode. It's a
+ * simple way to provide a big optimization to applications that can
+ * use them.
+ */
+
+ if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
alt = DLM_LOCK_PR;
- else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+ else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
alt = DLM_LOCK_CW;
if (alt) {
@@ -1500,10 +1663,20 @@
return rv;
}
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+ for locks pending on the convert list. Once verified (watch for these
+ log_prints), we should be able to just call _can_be_granted() and not
+ bother with the demote/deadlk cases here (and there's no easy way to deal
+ with a deadlk here, we'd have to generate something like grant_lock with
+ the deadlk error.) */
+
+/* returns the highest requested mode of all blocked conversions */
+
static int grant_pending_convert(struct dlm_rsb *r, int high)
{
struct dlm_lkb *lkb, *s;
int hi, demoted, quit, grant_restart, demote_restart;
+ int deadlk;
quit = 0;
restart:
@@ -1513,14 +1686,29 @@
list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
demoted = is_demoted(lkb);
- if (can_be_granted(r, lkb, 0)) {
+ deadlk = 0;
+
+ if (can_be_granted(r, lkb, 0, &deadlk)) {
grant_lock_pending(r, lkb);
grant_restart = 1;
- } else {
- hi = max_t(int, lkb->lkb_rqmode, hi);
- if (!demoted && is_demoted(lkb))
- demote_restart = 1;
+ continue;
+ }
+
+ if (!demoted && is_demoted(lkb)) {
+ log_print("WARN: pending demoted %x node %d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+ demote_restart = 1;
+ continue;
+ }
+
+ if (deadlk) {
+ log_print("WARN: pending deadlock %x node %d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+ dlm_dump_rsb(r);
+ continue;
}
+
+ hi = max_t(int, lkb->lkb_rqmode, hi);
}
if (grant_restart)
@@ -1538,7 +1726,7 @@
struct dlm_lkb *lkb, *s;
list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
- if (can_be_granted(r, lkb, 0))
+ if (can_be_granted(r, lkb, 0, NULL))
grant_lock_pending(r, lkb);
else
high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@
}
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
- int namelen, uint32_t parent_lkid, void *ast,
+ int namelen, unsigned long timeout_cs, void *ast,
void *astarg, void *bast, struct dlm_args *args)
{
int rv = -EINVAL;
@@ -1776,10 +1964,6 @@
if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
goto out;
- /* parent/child locks not yet supported */
- if (parent_lkid)
- goto out;
-
if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
goto out;
@@ -1791,6 +1975,7 @@
args->astaddr = ast;
args->astparam = (long) astarg;
args->bastaddr = bast;
+ args->timeout = timeout_cs;
args->mode = mode;
args->lksb = lksb;
rv = 0;
@@ -1845,6 +2030,7 @@
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
lkb->lkb_ownpid = (int) current->pid;
+ lkb->lkb_timeout_cs = args->timeout;
rv = 0;
out:
return rv;
@@ -1903,6 +2089,9 @@
if (is_overlap(lkb))
goto out;
+ /* don't let scand try to do a cancel */
+ del_timeout(lkb);
+
if (lkb->lkb_flags & DLM_IFL_RESEND) {
lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
rv = -EBUSY;
@@ -1934,6 +2123,9 @@
if (is_overlap_unlock(lkb))
goto out;
+ /* don't let scand try to do a cancel */
+ del_timeout(lkb);
+
if (lkb->lkb_flags & DLM_IFL_RESEND) {
lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
rv = -EBUSY;
@@ -1984,7 +2176,7 @@
{
int error = 0;
- if (can_be_granted(r, lkb, 1)) {
+ if (can_be_granted(r, lkb, 1, NULL)) {
grant_lock(r, lkb);
queue_cast(r, lkb, 0);
goto out;
@@ -1994,6 +2186,7 @@
error = -EINPROGRESS;
add_lkb(r, lkb, DLM_LKSTS_WAITING);
send_blocking_asts(r, lkb);
+ add_timeout(lkb);
goto out;
}
@@ -2009,16 +2202,32 @@
static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
int error = 0;
+ int deadlk = 0;
/* changing an existing lock may allow others to be granted */
- if (can_be_granted(r, lkb, 1)) {
+ if (can_be_granted(r, lkb, 1, &deadlk)) {
grant_lock(r, lkb);
queue_cast(r, lkb, 0);
grant_pending_locks(r);
goto out;
}
+ /* can_be_granted() detected that this lock would block in a conversion
+ deadlock, so we leave it on the granted queue and return EDEADLK in
+ the ast for the convert. */
+
+ if (deadlk) {
+ /* it's left on the granted queue */
+ log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+ lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+ lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+ revert_lock(r, lkb);
+ queue_cast(r, lkb, -EDEADLK);
+ error = -EDEADLK;
+ goto out;
+ }
+
/* is_demoted() means the can_be_granted() above set the grmode
to NL, and left us on the granted queue. This auto-demotion
(due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@
del_lkb(r, lkb);
add_lkb(r, lkb, DLM_LKSTS_CONVERT);
send_blocking_asts(r, lkb);
+ add_timeout(lkb);
goto out;
}
@@ -2274,7 +2484,7 @@
if (!ls)
return -EINVAL;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
if (convert)
error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@
if (error)
goto out;
- error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+ error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
astarg, bast, &args);
if (error)
goto out_put;
@@ -2299,10 +2509,10 @@
out_put:
if (convert || error)
__put_lkb(ls, lkb);
- if (error == -EAGAIN)
+ if (error == -EAGAIN || error == -EDEADLK)
error = 0;
out:
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
dlm_put_lockspace(ls);
return error;
}
@@ -2322,7 +2532,7 @@
if (!ls)
return -EINVAL;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
error = find_lkb(ls, lkid, &lkb);
if (error)
@@ -2344,7 +2554,7 @@
out_put:
dlm_put_lkb(lkb);
out:
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
dlm_put_lockspace(ls);
return error;
}
@@ -2384,7 +2594,7 @@
pass into lowcomms_commit and a message buffer (mb) that we
write our data into */
- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+ mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
if (!mh)
return -ENOBUFS;
@@ -3111,9 +3321,10 @@
lkb->lkb_remid = ms->m_lkid;
if (is_altmode(lkb))
munge_altmode(lkb, ms);
- if (result)
+ if (result) {
add_lkb(r, lkb, DLM_LKSTS_WAITING);
- else {
+ add_timeout(lkb);
+ } else {
grant_lock_pc(r, lkb, ms);
queue_cast(r, lkb, 0);
}
@@ -3172,6 +3383,12 @@
queue_cast(r, lkb, -EAGAIN);
break;
+ case -EDEADLK:
+ receive_flags_reply(lkb, ms);
+ revert_lock_pc(r, lkb);
+ queue_cast(r, lkb, -EDEADLK);
+ break;
+
case -EINPROGRESS:
/* convert was queued on remote master */
receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@
munge_demoted(lkb, ms);
del_lkb(r, lkb);
add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+ add_timeout(lkb);
break;
case 0:
@@ -3298,8 +3516,7 @@
case -DLM_ECANCEL:
receive_flags_reply(lkb, ms);
revert_lock_pc(r, lkb);
- if (ms->m_result)
- queue_cast(r, lkb, -DLM_ECANCEL);
+ queue_cast(r, lkb, -DLM_ECANCEL);
break;
case 0:
break;
@@ -3424,7 +3641,7 @@
}
}
- if (lock_recovery_try(ls))
+ if (dlm_lock_recovery_try(ls))
break;
schedule();
}
@@ -3503,7 +3720,7 @@
log_error(ls, "unknown message type %d", ms->m_type);
}
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
@@ -4034,13 +4251,13 @@
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen,
- uint32_t parent_lkid)
+ unsigned long timeout_cs)
{
struct dlm_lkb *lkb;
struct dlm_args args;
int error;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
error = create_lkb(ls, &lkb);
if (error) {
@@ -4062,7 +4279,7 @@
When DLM_IFL_USER is set, the dlm knows that this is a userspace
lock and that lkb_astparam is the dlm_user_args structure. */
- error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+ error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
lkb->lkb_flags |= DLM_IFL_USER;
ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@
list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
spin_unlock(&ua->proc->locks_spin);
out:
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
return error;
}
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
- int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+ int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+ unsigned long timeout_cs)
{
struct dlm_lkb *lkb;
struct dlm_args args;
struct dlm_user_args *ua;
int error;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
error = find_lkb(ls, lkid, &lkb);
if (error)
@@ -4127,6 +4345,7 @@
if (lvb_in && ua->lksb.sb_lvbptr)
memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
+ ua->xid = ua_tmp->xid;
ua->castparam = ua_tmp->castparam;
ua->castaddr = ua_tmp->castaddr;
ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@
ua->user_lksb = ua_tmp->user_lksb;
ua->old_mode = lkb->lkb_grmode;
- error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
- ua, DLM_FAKE_USER_AST, &args);
+ error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
+ DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
if (error)
goto out_put;
error = convert_lock(ls, lkb, &args);
- if (error == -EINPROGRESS || error == -EAGAIN)
+ if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
error = 0;
out_put:
dlm_put_lkb(lkb);
out:
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
kfree(ua_tmp);
return error;
}
@@ -4159,7 +4378,7 @@
struct dlm_user_args *ua;
int error;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
error = find_lkb(ls, lkid, &lkb);
if (error)
@@ -4194,7 +4413,7 @@
out_put:
dlm_put_lkb(lkb);
out:
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
kfree(ua_tmp);
return error;
}
@@ -4207,7 +4426,7 @@
struct dlm_user_args *ua;
int error;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
error = find_lkb(ls, lkid, &lkb);
if (error)
@@ -4231,11 +4450,59 @@
out_put:
dlm_put_lkb(lkb);
out:
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
kfree(ua_tmp);
return error;
}
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+ struct dlm_lkb *lkb;
+ struct dlm_args args;
+ struct dlm_user_args *ua;
+ struct dlm_rsb *r;
+ int error;
+
+ dlm_lock_recovery(ls);
+
+ error = find_lkb(ls, lkid, &lkb);
+ if (error)
+ goto out;
+
+ ua = (struct dlm_user_args *)lkb->lkb_astparam;
+
+ error = set_unlock_args(flags, ua, &args);
+ if (error)
+ goto out_put;
+
+ /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+
+ r = lkb->lkb_resource;
+ hold_rsb(r);
+ lock_rsb(r);
+
+ error = validate_unlock_args(lkb, &args);
+ if (error)
+ goto out_r;
+ lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+
+ error = _cancel_lock(r, lkb);
+ out_r:
+ unlock_rsb(r);
+ put_rsb(r);
+
+ if (error == -DLM_ECANCEL)
+ error = 0;
+ /* from validate_unlock_args() */
+ if (error == -EBUSY)
+ error = 0;
+ out_put:
+ dlm_put_lkb(lkb);
+ out:
+ dlm_unlock_recovery(ls);
+ return error;
+}
+
/* lkb's that are removed from the waiters list by revert are just left on the
orphans list with the granted orphan locks, to be freed by purge */
@@ -4314,12 +4581,13 @@
{
struct dlm_lkb *lkb, *safe;
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
while (1) {
lkb = del_proc_lock(ls, proc);
if (!lkb)
break;
+ del_timeout(lkb);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
orphan_proc_lock(ls, lkb);
else
@@ -4347,7 +4615,7 @@
}
mutex_unlock(&ls->ls_clear_proc_locks);
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
}
static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@
if (nodeid != dlm_our_nodeid()) {
error = send_purge(ls, nodeid, pid);
} else {
- lock_recovery(ls);
+ dlm_lock_recovery(ls);
if (pid == current->pid)
purge_proc_locks(ls, proc);
else
do_purge(ls, nodeid, pid);
- unlock_recovery(ls);
+ dlm_unlock_recovery(ls);
}
return error;
}
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/netlink.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include
+#include
+#include
+
+#include "dlm_internal.h"
+
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+
+static struct genl_family family = {
+ .id = GENL_ID_GENERATE,
+ .name = DLM_GENL_NAME,
+ .version = DLM_GENL_VERSION,
+};
+
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+ struct sk_buff *skb;
+ void *data;
+
+ skb = genlmsg_new(size, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ /* add the message headers */
+ data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+ if (!data) {
+ nlmsg_free(skb);
+ return -EINVAL;
+ }
+
+ *skbp = skb;
+ return 0;
+}
+
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+ struct nlattr *ret;
+
+ ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+ if (!ret)
+ return NULL;
+ return nla_data(ret);
+}
+
+static int send_data(struct sk_buff *skb)
+{
+ struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+ void *data = genlmsg_data(genlhdr);
+ int rv;
+
+ rv = genlmsg_end(skb, data);
+ if (rv < 0) {
+ nlmsg_free(skb);
+ return rv;
+ }
+
+ return genlmsg_unicast(skb, listener_nlpid);
+}
+
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ listener_nlpid = info->snd_pid;
+ printk("user_cmd nlpid %u\n", listener_nlpid);
+ return 0;
+}
+
+static struct genl_ops dlm_nl_ops = {
+ .cmd = DLM_CMD_HELLO,
+ .doit = user_cmd,
+};
+
+int dlm_netlink_init(void)
+{
+ int rv;
+
+ rv = genl_register_family(&family);
+ if (rv)
+ return rv;
+
+ rv = genl_register_ops(&family, &dlm_nl_ops);
+ if (rv < 0)
+ goto err;
+ return 0;
+ err:
+ genl_unregister_family(&family);
+ return rv;
+}
+
+void dlm_netlink_exit(void)
+{
+ genl_unregister_ops(&family, &dlm_nl_ops);
+ genl_unregister_family(&family);
+}
+
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+ struct dlm_rsb *r = lkb->lkb_resource;
+ struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+
+ memset(data, 0, sizeof(struct dlm_lock_data));
+
+ data->version = DLM_LOCK_DATA_VERSION;
+ data->nodeid = lkb->lkb_nodeid;
+ data->ownpid = lkb->lkb_ownpid;
+ data->id = lkb->lkb_id;
+ data->remid = lkb->lkb_remid;
+ data->status = lkb->lkb_status;
+ data->grmode = lkb->lkb_grmode;
+ data->rqmode = lkb->lkb_rqmode;
+ data->timestamp = lkb->lkb_timestamp;
+ if (ua)
+ data->xid = ua->xid;
+ if (r) {
+ data->lockspace_id = r->res_ls->ls_global_id;
+ data->resource_namelen = r->res_length;
+ memcpy(data->resource_name, r->res_name, r->res_length);
+ }
+}
+
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+ struct dlm_lock_data *data;
+ struct sk_buff *send_skb;
+ size_t size;
+ int rv;
+
+ size = nla_total_size(sizeof(struct dlm_lock_data)) +
+ nla_total_size(0); /* why this? */
+
+ rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+ if (rv < 0)
+ return;
+
+ data = mk_data(send_skb);
+ if (!data) {
+ nlmsg_free(send_skb);
+ return;
+ }
+
+ fill_data(data, lkb);
+
+ send_data(send_skb);
+}
+
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/main.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/main.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@
static inline int dlm_register_debugfs(void) { return 0; }
static inline void dlm_unregister_debugfs(void) { }
#endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
static int __init init_dlm(void)
{
@@ -50,10 +52,16 @@
if (error)
goto out_debug;
+ error = dlm_netlink_init();
+ if (error)
+ goto out_user;
+
printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
+ out_user:
+ dlm_user_exit();
out_debug:
dlm_unregister_debugfs();
out_config:
@@ -68,6 +76,7 @@
static void __exit exit_dlm(void)
{
+ dlm_netlink_exit();
dlm_user_exit();
dlm_config_exit();
dlm_memory_exit();
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/config.h
+++ linux-source-2.6.22-2.6.22/fs/dlm/config.h
@@ -27,6 +27,7 @@
int ci_scan_secs;
int ci_log_debug;
int ci_protocol;
+ int ci_timewarn_cs;
};
extern struct dlm_config_info dlm_config;
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/user.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/user.c
@@ -33,16 +33,17 @@
struct dlm_lock_params32 {
__u8 mode;
__u8 namelen;
- __u16 flags;
+ __u16 unused;
+ __u32 flags;
__u32 lkid;
__u32 parent;
-
+ __u64 xid;
+ __u64 timeout;
__u32 castparam;
__u32 castaddr;
__u32 bastparam;
__u32 bastaddr;
__u32 lksb;
-
char lvb[DLM_USER_LVB_LEN];
char name[0];
};
@@ -68,6 +69,7 @@
};
struct dlm_lock_result32 {
+ __u32 version[3];
__u32 length;
__u32 user_astaddr;
__u32 user_astparam;
@@ -102,6 +104,8 @@
kb->i.lock.flags = kb32->i.lock.flags;
kb->i.lock.lkid = kb32->i.lock.lkid;
kb->i.lock.parent = kb32->i.lock.parent;
+ kb->i.lock.xid = kb32->i.lock.xid;
+ kb->i.lock.timeout = kb32->i.lock.timeout;
kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@
static void compat_output(struct dlm_lock_result *res,
struct dlm_lock_result32 *res32)
{
+ res32->version[0] = res->version[0];
+ res32->version[1] = res->version[1];
+ res32->version[2] = res->version[2];
+
res32->user_astaddr = (__u32)(long)res->user_astaddr;
res32->user_astparam = (__u32)(long)res->user_astparam;
res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@
}
#endif
+/* Figure out if this lock is at the end of its life and no longer
+ available for the application to use. The lkb still exists until
+ the final ast is read. A lock becomes EOL in three situations:
+ 1. a noqueue request fails with EAGAIN
+ 2. an unlock completes with EUNLOCK
+ 3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+ An EOL lock needs to be removed from the process's list of locks.
+ And we can't allow any new operation on an EOL lock. This is
+ not related to the lifetime of the lkb struct which is managed
+ entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+ switch (sb_status) {
+ case -DLM_EUNLOCK:
+ return 1;
+ case -DLM_ECANCEL:
+ case -ETIMEDOUT:
+ case -EDEADLK:
+ if (lkb->lkb_grmode == DLM_LOCK_IV)
+ return 1;
+ break;
+ case -EAGAIN:
+ if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+ return 1;
+ break;
+ }
+ return 0;
+}
+
/* we could possibly check if the cancel of an orphan has resulted in the lkb
being removed and then remove that lkb from the orphans list and free it */
@@ -176,25 +214,7 @@
log_debug(ls, "ast overlap %x status %x %x",
lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
- /* Figure out if this lock is at the end of its life and no longer
- available for the application to use. The lkb still exists until
- the final ast is read. A lock becomes EOL in three situations:
- 1. a noqueue request fails with EAGAIN
- 2. an unlock completes with EUNLOCK
- 3. a cancel of a waiting request completes with ECANCEL
- An EOL lock needs to be removed from the process's list of locks.
- And we can't allow any new operation on an EOL lock. This is
- not related to the lifetime of the lkb struct which is managed
- entirely by refcount. */
-
- if (type == AST_COMP &&
- lkb->lkb_grmode == DLM_LOCK_IV &&
- ua->lksb.sb_status == -EAGAIN)
- eol = 1;
- else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
- (ua->lksb.sb_status == -DLM_ECANCEL &&
- lkb->lkb_grmode == DLM_LOCK_IV))
- eol = 1;
+ eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
if (eol) {
lkb->lkb_ast_type &= ~AST_BAST;
lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@
ua->castaddr = params->castaddr;
ua->bastparam = params->bastparam;
ua->bastaddr = params->bastaddr;
+ ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT)
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
- params->lkid, params->lvb);
+ params->lkid, params->lvb,
+ (unsigned long) params->timeout);
else {
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
- params->parent);
+ (unsigned long) params->timeout);
if (!error)
error = ua->lksb.sb_lkid;
}
@@ -299,6 +321,22 @@
return error;
}
+static int device_user_deadlock(struct dlm_user_proc *proc,
+ struct dlm_lock_params *params)
+{
+ struct dlm_ls *ls;
+ int error;
+
+ ls = dlm_find_lockspace_local(proc->lockspace);
+ if (!ls)
+ return -ENOENT;
+
+ error = dlm_user_deadlock(ls, params->flags, params->lkid);
+
+ dlm_put_lockspace(ls);
+ return error;
+}
+
static int create_misc_device(struct dlm_ls *ls, char *name)
{
int error, len;
@@ -348,7 +386,7 @@
return -EPERM;
error = dlm_new_lockspace(params->name, strlen(params->name),
- &lockspace, 0, DLM_USER_LVB_LEN);
+ &lockspace, params->flags, DLM_USER_LVB_LEN);
if (error)
return error;
@@ -524,6 +562,14 @@
error = device_user_unlock(proc, &kbuf->i.lock);
break;
+ case DLM_USER_DEADLOCK:
+ if (!proc) {
+ log_print("no locking on control device");
+ goto out_sig;
+ }
+ error = device_user_deadlock(proc, &kbuf->i.lock);
+ break;
+
case DLM_USER_CREATE_LOCKSPACE:
if (proc) {
log_print("create/remove only on control device");
@@ -641,6 +687,9 @@
int struct_len;
memset(&result, 0, sizeof(struct dlm_lock_result));
+ result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+ result.version[1] = DLM_DEVICE_VERSION_MINOR;
+ result.version[2] = DLM_DEVICE_VERSION_PATCH;
memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
result.user_lksb = ua->user_lksb;
@@ -699,6 +748,20 @@
return error;
}
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+ struct dlm_device_version ver;
+
+ memset(&ver, 0, sizeof(struct dlm_device_version));
+ ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+ ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+ ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+
+ if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+ return -EFAULT;
+ return sizeof(struct dlm_device_version);
+}
+
/* a read returns a single ast described in a struct dlm_lock_result */
static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@
DECLARE_WAITQUEUE(wait, current);
int error, type=0, bmode=0, removed = 0;
+ if (count == sizeof(struct dlm_device_version)) {
+ error = copy_version_to_user(buf, count);
+ return error;
+ }
+
+ if (!proc) {
+ log_print("non-version read from control device %zu", count);
+ return -EINVAL;
+ }
+
#ifdef CONFIG_COMPAT
if (count < sizeof(struct dlm_lock_result32))
#else
@@ -747,11 +820,6 @@
}
}
- if (list_empty(&proc->asts)) {
- spin_unlock(&proc->asts_spin);
- return -EAGAIN;
- }
-
/* there may be both completion and blocking asts to return for
the lkb, don't remove lkb from asts list unless no asts remain */
@@ -823,6 +891,7 @@
static const struct file_operations ctl_device_fops = {
.open = ctl_device_open,
.release = ctl_device_close,
+ .read = device_read,
.write = device_write,
.owner = THIS_MODULE,
};
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/member.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/member.c
@@ -1,7 +1,7 @@
/******************************************************************************
*******************************************************************************
**
-** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@
*neg_out = neg;
error = ping_members(ls);
+ if (!error || error == -EPROTO) {
+ /* new_lockspace() may be waiting to know if the config
+ is good or bad */
+ ls->ls_members_result = error;
+ complete(&ls->ls_members_done);
+ }
if (error)
goto out;
@@ -284,6 +290,9 @@
dlm_recoverd_suspend(ls);
ls->ls_recover_status = 0;
dlm_recoverd_resume(ls);
+
+ if (!ls->ls_recover_begin)
+ ls->ls_recover_begin = jiffies;
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/rcom.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/rcom.c
@@ -38,7 +38,7 @@
char *mb;
int mb_len = sizeof(struct dlm_rcom) + len;
- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+ mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
if (!mh) {
log_print("create_rcom to %d type %d len %d ENOBUFS",
to_nodeid, type, len);
@@ -90,7 +90,7 @@
log_error(ls, "version mismatch: %x nodeid %d: %x",
DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
rc->rc_header.h_version);
- return -EINVAL;
+ return -EPROTO;
}
if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
ls->ls_lvblen, ls->ls_exflags,
nodeid, rf->rf_lvblen, rf->rf_lsflags);
- return -EINVAL;
+ return -EPROTO;
}
return 0;
}
@@ -386,7 +386,8 @@
dlm_recover_process_copy(ls, rc_in);
}
-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+ struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct rcom_config *rf;
@@ -394,7 +395,7 @@
char *mb;
int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
- mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+ mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
if (!mh)
return -ENOBUFS;
memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@
log_print("lockspace %x from %d type %x not found",
hd->h_lockspace, nodeid, rc->rc_type);
if (rc->rc_type == DLM_RCOM_STATUS)
- send_ls_not_ready(nodeid, rc);
+ send_ls_not_ready(ls, nodeid, rc);
return;
}
--- linux-source-2.6.22-2.6.22.orig/fs/dlm/lowcomms.c
+++ linux-source-2.6.22-2.6.22/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@
static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
struct connection *con = sock2con(sk);
- if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
queue_work(recv_workqueue, &con->rwork);
}
@@ -268,7 +268,7 @@
{
struct connection *con = sock2con(sk);
- if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+ if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
queue_work(send_workqueue, &con->swork);
}
@@ -313,6 +313,7 @@
in6_addr->sin6_port = cpu_to_be16(port);
*addr_len = sizeof(struct sockaddr_in6);
}
+ memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
}
/* Close a remote connection and tidy up */
@@ -332,6 +333,7 @@
__free_page(con->rx_page);
con->rx_page = NULL;
}
+
con->retries = 0;
mutex_unlock(&con->sock_mutex);
}
@@ -631,7 +633,7 @@
out_close:
mutex_unlock(&con->sock_mutex);
- if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
+ if (ret != -EAGAIN) {
close_connection(con, false);
/* Reconnect when there is something to send */
}
@@ -719,12 +721,20 @@
INIT_WORK(&othercon->swork, process_send_sockets);
INIT_WORK(&othercon->rwork, process_recv_sockets);
set_bit(CF_IS_OTHERCON, &othercon->flags);
+ }
+ if (!othercon->sock) {
newcon->othercon = othercon;
+ othercon->sock = newsock;
+ newsock->sk->sk_user_data = othercon;
+ add_sock(newsock, othercon);
+ addcon = othercon;
+ }
+ else {
+ printk("Extra connection from node %d attempted\n", nodeid);
+ result = -EAGAIN;
+ mutex_unlock(&newcon->sock_mutex);
+ goto accept_err;
}
- othercon->sock = newsock;
- newsock->sk->sk_user_data = othercon;
- add_sock(newsock, othercon);
- addcon = othercon;
}
else {
newsock->sk->sk_user_data = newcon;
@@ -854,7 +864,7 @@
static void tcp_connect_to_sock(struct connection *con)
{
int result = -EHOSTUNREACH;
- struct sockaddr_storage saddr;
+ struct sockaddr_storage saddr, src_addr;
int addr_len;
struct socket *sock;
@@ -888,6 +898,17 @@
con->connect_action = tcp_connect_to_sock;
add_sock(sock, con);
+ /* Bind to our cluster-known address connecting to avoid
+ routing problems */
+ memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
+ make_sockaddr(&src_addr, 0, &addr_len);
+ result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
+ addr_len);
+ if (result < 0) {
+ printk("dlm: could not bind for connect: %d\n", result);
+ /* This *may* not indicate a critical error */
+ }
+
make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
log_print("connecting to %d", con->nodeid);
@@ -1116,8 +1137,6 @@
log_print("Using TCP for communications");
- set_bit(CF_IS_OTHERCON, &con->flags);
-
sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
if (sock) {
add_sock(sock, con);
@@ -1256,14 +1275,15 @@
if (len) {
ret = sendpage(con->sock, e->page, offset, len,
msg_flags);
- if (ret == -EAGAIN || ret == 0)
+ if (ret == -EAGAIN || ret == 0) {
+ cond_resched();
goto out;
+ }
if (ret <= 0)
goto send_error;
- } else {
+ }
/* Don't starve people filling buffers */
cond_resched();
- }
spin_lock(&con->writequeue_lock);
e->offset += ret;
@@ -1400,8 +1420,11 @@
down(&connections_lock);
for (i = 0; i <= max_nodeid; i++) {
con = __nodeid2con(i, 0);
- if (con)
- con->flags |= 0xFF;
+ if (con) {
+ con->flags |= 0x0F;
+ if (con->sock)
+ con->sock->sk->sk_user_data = NULL;
+ }
}
up(&connections_lock);
--- linux-source-2.6.22-2.6.22.orig/fs/open.c
+++ linux-source-2.6.22-2.6.22/fs/open.c
@@ -193,8 +193,8 @@
return error;
}
-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
- struct file *filp)
+int do_truncate(struct dentry *dentry, struct vfsmount *mnt, loff_t length,
+ unsigned int time_attrs, struct file *filp)
{
int err;
struct iattr newattrs;
@@ -214,7 +214,7 @@
newattrs.ia_valid |= should_remove_suid(dentry);
mutex_lock(&dentry->d_inode->i_mutex);
- err = notify_change(dentry, &newattrs);
+ err = notify_change(dentry, mnt, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
return err;
}
@@ -269,7 +269,7 @@
error = locks_verify_truncate(inode, NULL, length);
if (!error) {
DQUOT_INIT(inode);
- error = do_truncate(nd.dentry, length, 0, NULL);
+ error = do_truncate(nd.dentry, nd.mnt, length, 0, NULL);
}
put_write_access(inode);
@@ -321,7 +321,8 @@
error = locks_verify_truncate(inode, file, length);
if (!error)
- error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+ error = do_truncate(dentry, file->f_path.mnt, length,
+ ATTR_MTIME|ATTR_CTIME, file);
out_putf:
fput(file);
out:
@@ -439,10 +440,8 @@
asmlinkage long sys_fchdir(unsigned int fd)
{
+ struct nameidata nd;
struct file *file;
- struct dentry *dentry;
- struct inode *inode;
- struct vfsmount *mnt;
int error;
error = -EBADF;
@@ -450,17 +449,17 @@
if (!file)
goto out;
- dentry = file->f_path.dentry;
- mnt = file->f_path.mnt;
- inode = dentry->d_inode;
+ nd.dentry = file->f_path.dentry;
+ nd.mnt = file->f_path.mnt;
+ nd.flags = 0;
error = -ENOTDIR;
- if (!S_ISDIR(inode->i_mode))
+ if (!S_ISDIR(nd.dentry->d_inode->i_mode))
goto out_putf;
- error = file_permission(file, MAY_EXEC);
+ error = vfs_permission(&nd, MAY_EXEC);
if (!error)
- set_fs_pwd(current->fs, mnt, dentry);
+ set_fs_pwd(current->fs, nd.mnt, nd.dentry);
out_putf:
fput(file);
out:
@@ -521,7 +520,9 @@
mode = inode->i_mode;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- err = notify_change(dentry, &newattrs);
+ newattrs.ia_valid |= ATTR_FILE;
+ newattrs.ia_file = file;
+ err = notify_change(dentry, file->f_path.mnt, &newattrs);
mutex_unlock(&inode->i_mutex);
out_putf:
@@ -556,7 +557,7 @@
mode = inode->i_mode;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(nd.dentry, &newattrs);
+ error = notify_change(nd.dentry, nd.mnt, &newattrs);
mutex_unlock(&inode->i_mutex);
dput_and_out:
@@ -570,7 +571,8 @@
return sys_fchmodat(AT_FDCWD, filename, mode);
}
-static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
+static int chown_common(struct dentry * dentry, struct vfsmount *mnt,
+ uid_t user, gid_t group, struct file *file)
{
struct inode * inode;
int error;
@@ -598,8 +600,12 @@
}
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
+ if (file) {
+ newattrs.ia_file = file;
+ newattrs.ia_valid |= ATTR_FILE;
+ }
mutex_lock(&inode->i_mutex);
- error = notify_change(dentry, &newattrs);
+ error = notify_change(dentry, mnt, &newattrs);
mutex_unlock(&inode->i_mutex);
out:
return error;
@@ -613,7 +619,7 @@
error = user_path_walk(filename, &nd);
if (error)
goto out;
- error = chown_common(nd.dentry, user, group);
+ error = chown_common(nd.dentry, nd.mnt, user, group, NULL);
path_release(&nd);
out:
return error;
@@ -633,7 +639,7 @@
error = __user_walk_fd(dfd, filename, follow, &nd);
if (error)
goto out;
- error = chown_common(nd.dentry, user, group);
+ error = chown_common(nd.dentry, nd.mnt, user, group, NULL);
path_release(&nd);
out:
return error;
@@ -647,7 +653,7 @@
error = user_path_walk_link(filename, &nd);
if (error)
goto out;
- error = chown_common(nd.dentry, user, group);
+ error = chown_common(nd.dentry, nd.mnt, user, group, NULL);
path_release(&nd);
out:
return error;
@@ -666,7 +672,7 @@
dentry = file->f_path.dentry;
audit_inode(NULL, dentry->d_inode);
- error = chown_common(dentry, user, group);
+ error = chown_common(dentry, file->f_path.mnt, user, group, file);
fput(file);
out:
return error;
--- linux-source-2.6.22-2.6.22.orig/fs/stack.c
+++ linux-source-2.6.22-2.6.22/fs/stack.c
@@ -1,8 +1,20 @@
+/*
+ * Copyright (c) 2006-2007 Erez Zadok
+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2006-2007 Stony Brook University
+ * Copyright (c) 2006-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
#include
#include
#include
-/* does _NOT_ require i_mutex to be held.
+/*
+ * does _NOT_ require i_mutex to be held.
*
* This function cannot be inlined since i_size_{read,write} is rather
* heavy-weight on 32-bit systems
@@ -14,11 +26,11 @@
}
EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
+/*
+ * copy all attributes; get_nlinks is optional way to override the i_nlink
* copying
*/
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
- int (*get_nlinks)(struct inode *))
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
{
dest->i_mode = src->i_mode;
dest->i_uid = src->i_uid;
@@ -29,14 +41,6 @@
dest->i_ctime = src->i_ctime;
dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags;
-
- /*
- * Update the nlinks AFTER updating the above fields, because the
- * get_links callback may depend on them.
- */
- if (!get_nlinks)
- dest->i_nlink = src->i_nlink;
- else
- dest->i_nlink = (*get_nlinks)(dest);
+ dest->i_nlink = src->i_nlink;
}
EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
--- linux-source-2.6.22-2.6.22.orig/kernel/sys.c
+++ linux-source-2.6.22-2.6.22/kernel/sys.c
@@ -1428,7 +1428,6 @@
* Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
* LBT 04.03.94
*/
-
asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
{
struct task_struct *p;
@@ -1456,7 +1455,7 @@
if (!thread_group_leader(p))
goto out;
- if (p->real_parent == group_leader) {
+ if (p->real_parent->tgid == group_leader->tgid) {
err = -EPERM;
if (task_session(p) != task_session(group_leader))
goto out;
--- linux-source-2.6.22-2.6.22.orig/kernel/signal.c
+++ linux-source-2.6.22-2.6.22/kernel/signal.c
@@ -368,7 +368,7 @@
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
- if (tsk == current)
+ if (likely(tsk == current))
signr = __dequeue_signal(&tsk->pending, mask, info);
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
@@ -415,7 +415,7 @@
if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
}
- if ( signr &&
+ if (signr && likely(tsk == current) &&
((info->si_code & __SI_MASK) == __SI_TIMER) &&
info->si_sys_private){
/*
@@ -1259,20 +1259,19 @@
void sigqueue_free(struct sigqueue *q)
{
unsigned long flags;
+ spinlock_t *lock = ¤t->sighand->siglock;
+
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
/*
* If the signal is still pending remove it from the
- * pending queue.
+ * pending queue. We must hold ->siglock while testing
+ * q->list to serialize with collect_signal().
*/
- if (unlikely(!list_empty(&q->list))) {
- spinlock_t *lock = ¤t->sighand->siglock;
- read_lock(&tasklist_lock);
- spin_lock_irqsave(lock, flags);
- if (!list_empty(&q->list))
- list_del_init(&q->list);
- spin_unlock_irqrestore(lock, flags);
- read_unlock(&tasklist_lock);
- }
+ spin_lock_irqsave(lock, flags);
+ if (!list_empty(&q->list))
+ list_del_init(&q->list);
+ spin_unlock_irqrestore(lock, flags);
+
q->flags &= ~SIGQUEUE_PREALLOC;
__sigqueue_free(q);
}
--- linux-source-2.6.22-2.6.22.orig/kernel/relay.c
+++ linux-source-2.6.22-2.6.22/kernel/relay.c
@@ -91,6 +91,7 @@
return -EINVAL;
vma->vm_ops = &relay_file_mmap_ops;
+ vma->vm_flags |= VM_DONTEXPAND;
vma->vm_private_data = buf;
buf->chan->cb->buf_mapped(buf, filp);
--- linux-source-2.6.22-2.6.22.orig/kernel/exit.c
+++ linux-source-2.6.22-2.6.22/kernel/exit.c
@@ -1336,8 +1336,7 @@
int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
exit_code = p->exit_code;
- if (unlikely(!exit_code) ||
- unlikely(p->state & TASK_TRACED))
+ if (unlikely(!exit_code) || unlikely(p->exit_state))
goto bail_ref;
return wait_noreap_copyout(p, pid, uid,
why, (exit_code << 8) | 0x7f,
--- linux-source-2.6.22-2.6.22.orig/kernel/power/console.c
+++ linux-source-2.6.22-2.6.22/kernel/power/console.c
@@ -16,6 +16,7 @@
int pm_prepare_console(void)
{
+#ifndef CONFIG_PM_DISABLE_CONSOLE
acquire_console_sem();
orig_fgconsole = fg_console;
@@ -44,15 +45,18 @@
}
orig_kmsg = kmsg_redirect;
kmsg_redirect = SUSPEND_CONSOLE;
+#endif
return 0;
}
void pm_restore_console(void)
{
+#ifndef CONFIG_PM_DISABLE_CONSOLE
acquire_console_sem();
set_console(orig_fgconsole);
release_console_sem();
kmsg_redirect = orig_kmsg;
+#endif
return;
}
#endif
--- linux-source-2.6.22-2.6.22.orig/kernel/power/Kconfig
+++ linux-source-2.6.22-2.6.22/kernel/power/Kconfig
@@ -33,13 +33,20 @@
bool "Power Management Debug Support"
depends on PM
---help---
- This option enables verbose debugging support in the Power Management
- code. This is helpful when debugging and reporting various PM bugs,
- like suspend support.
+ This option enables various debugging support in the Power Management
+ code. This is helpful when debugging and reporting PM bugs, like
+ suspend support.
+
+config PM_VERBOSE
+ bool "Verbose Power Management debugging"
+ depends on PM_DEBUG
+ default n
+ ---help---
+ This option enables verbose messages from the Power Management code.
config DISABLE_CONSOLE_SUSPEND
bool "Keep console(s) enabled during suspend/resume (DANGEROUS)"
- depends on PM && PM_DEBUG
+ depends on PM_DEBUG
default n
---help---
This option turns off the console suspend mechanism that prevents
@@ -50,7 +57,7 @@
config PM_TRACE
bool "Suspend/resume event tracing"
- depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL
+ depends on PM_DEBUG && X86_32 && EXPERIMENTAL
default n
---help---
This enables some cheesy code to save the last PM event point in the
@@ -77,6 +84,21 @@
handle the wide variability of device power states; any replacements
are likely to be bus or driver specific.
+config PM_DISABLE_CONSOLE
+ bool "Disable Power Management messing with the active console"
+ depends on PM
+ default n
+ ---help---
+ By defauly, PM will take over the active console (generally, this means
+ switching to the console when suspending from X). This can at times cause
+ problems, especially if userspace suspend scripts try to do things with the
+ console before or after suspending (e.g. calling vbestate).
+
+ To work around this, enable this option so that PM will not handle the
+ console.
+
+ If unsure, say N.
+
config SOFTWARE_SUSPEND
bool "Software Suspend (Hibernation)"
depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
--- linux-source-2.6.22-2.6.22.orig/kernel/power/snapshot.c
+++ linux-source-2.6.22-2.6.22/kernel/power/snapshot.c
@@ -709,7 +709,8 @@
region->end_pfn << PAGE_SHIFT);
for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
- memory_bm_set_bit(bm, pfn);
+ if (pfn_valid(pfn))
+ memory_bm_set_bit(bm, pfn);
}
}
--- linux-source-2.6.22-2.6.22.orig/kernel/kmod.c
+++ linux-source-2.6.22-2.6.22/kernel/kmod.c
@@ -305,23 +305,32 @@
EXPORT_SYMBOL(call_usermodehelper_keys);
int call_usermodehelper_pipe(char *path, char **argv, char **envp,
- struct file **filp)
+ struct file **filp, struct subprocess_info **sub_info)
{
- DECLARE_COMPLETION(done);
- struct subprocess_info sub_info = {
- .work = __WORK_INITIALIZER(sub_info.work,
- __call_usermodehelper),
- .complete = &done,
- .path = path,
- .argv = argv,
- .envp = envp,
- .retval = 0,
- };
struct file *f;
+ struct subprocess_info *sinfo;
if (!khelper_wq)
return -EBUSY;
+ sinfo = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
+ if (!sinfo)
+ return -ENOMEM;
+
+ sinfo->complete = kmalloc(sizeof(struct completion), GFP_KERNEL);
+ if (!sinfo->complete) {
+ kfree(sinfo);
+ return -ENOMEM;
+ }
+
+ *sub_info = sinfo;
+ INIT_WORK(&sinfo->work, __call_usermodehelper);
+ init_completion(sinfo->complete);
+ sinfo->path = path;
+ sinfo->argv = argv;
+ sinfo->envp = envp;
+ sinfo->wait = 1;
+
if (path[0] == '\0')
return 0;
@@ -335,14 +344,23 @@
free_write_pipe(*filp);
return PTR_ERR(f);
}
- sub_info.stdin = f;
+ sinfo->stdin = f;
- queue_work(khelper_wq, &sub_info.work);
- wait_for_completion(&done);
- return sub_info.retval;
+ queue_work(khelper_wq, &sinfo->work);
+ return 0;
}
EXPORT_SYMBOL(call_usermodehelper_pipe);
+int finish_usermodehelper_pipe(struct subprocess_info *sub_info)
+{
+ wait_for_completion(sub_info->complete);
+ kfree(sub_info->complete);
+ kfree(sub_info);
+
+ return sub_info->retval;
+}
+EXPORT_SYMBOL(finish_usermodehelper_pipe);
+
void __init usermodehelper_init(void)
{
khelper_wq = create_singlethread_workqueue("khelper");
--- linux-source-2.6.22-2.6.22.orig/kernel/futex_compat.c
+++ linux-source-2.6.22-2.6.22/kernel/futex_compat.c
@@ -29,6 +29,15 @@
return 0;
}
+static void __user *futex_uaddr(struct robust_list *entry,
+ compat_long_t futex_offset)
+{
+ compat_uptr_t base = ptr_to_compat(entry);
+ void __user *uaddr = compat_ptr(base + futex_offset);
+
+ return uaddr;
+}
+
/*
* Walk curr->robust_list (very carefully, it's a userspace list!)
* and mark any locks found there dead, and notify any waiters.
@@ -61,19 +70,23 @@
if (fetch_robust_entry(&upending, &pending,
&head->list_op_pending, &pip))
return;
- if (upending)
- handle_futex_death((void __user *)pending + futex_offset, curr, pip);
+ if (pending) {
+ void __user *uaddr = futex_uaddr(pending, futex_offset);
- while (compat_ptr(uentry) != &head->list) {
+ handle_futex_death(uaddr, curr, pip);
+ }
+
+ while (entry != (struct robust_list __user *) &head->list) {
/*
* A pending lock might already be on the list, so
* dont process it twice:
*/
- if (entry != pending)
- if (handle_futex_death((void __user *)entry + futex_offset,
- curr, pi))
+ if (entry != pending) {
+ void __user *uaddr = futex_uaddr(entry,
+ futex_offset);
+ if (handle_futex_death(uaddr, curr, pi))
return;
-
+ }
/*
* Fetch the next entry in the list:
*/
@@ -154,7 +167,7 @@
t = timespec_to_ktime(ts);
if (cmd == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
+ t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
--- linux-source-2.6.22-2.6.22.orig/kernel/lockdep_proc.c
+++ linux-source-2.6.22-2.6.22/kernel/lockdep_proc.c
@@ -339,7 +339,7 @@
.open = lockdep_stats_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = single_release,
};
static int __init lockdep_proc_init(void)
--- linux-source-2.6.22-2.6.22.orig/kernel/posix-timers.c
+++ linux-source-2.6.22-2.6.22/kernel/posix-timers.c
@@ -764,9 +764,11 @@
/* SIGEV_NONE timers are not queued ! See common_timer_get */
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
/* Setup correct expiry time for relative timers */
- if (mode == HRTIMER_MODE_REL)
- timer->expires = ktime_add(timer->expires,
- timer->base->get_time());
+ if (mode == HRTIMER_MODE_REL) {
+ timer->expires =
+ ktime_add_safe(timer->expires,
+ timer->base->get_time());
+ }
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/kernel/audit.c
+++ linux-source-2.6.22-2.6.22/kernel/audit.c
@@ -1054,8 +1054,7 @@
* will be called a second time. Currently, we assume that a printk
* can't format message larger than 1024 bytes, so we don't either.
*/
-static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
- va_list args)
+void audit_log_vformat(struct audit_buffer *ab, const char *fmt, va_list args)
{
int len, avail;
struct sk_buff *skb;
@@ -1311,3 +1310,6 @@
EXPORT_SYMBOL(audit_log_end);
EXPORT_SYMBOL(audit_log_format);
EXPORT_SYMBOL(audit_log);
+EXPORT_SYMBOL_GPL(audit_log_vformat);
+EXPORT_SYMBOL_GPL(audit_log_untrustedstring);
+EXPORT_SYMBOL_GPL(audit_log_d_path);
--- linux-source-2.6.22-2.6.22.orig/kernel/hrtimer.c
+++ linux-source-2.6.22-2.6.22/kernel/hrtimer.c
@@ -305,6 +305,23 @@
}
#endif /* BITS_PER_LONG >= 64 */
+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+ ktime_t res = ktime_add(lhs, rhs);
+
+ /*
+ * We use KTIME_SEC_MAX here, the maximum timeout which we can
+ * return to user space in a timespec:
+ */
+ if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+ res = ktime_set(KTIME_SEC_MAX, 0);
+
+ return res;
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -659,13 +676,7 @@
*/
orun++;
}
- timer->expires = ktime_add(timer->expires, interval);
- /*
- * Make sure, that the result did not wrap with a very large
- * interval.
- */
- if (timer->expires.tv64 < 0)
- timer->expires = ktime_set(KTIME_SEC_MAX, 0);
+ timer->expires = ktime_add_safe(timer->expires, interval);
return orun;
}
@@ -814,7 +825,7 @@
new_base = switch_hrtimer_base(timer, base);
if (mode == HRTIMER_MODE_REL) {
- tim = ktime_add(tim, new_base->get_time());
+ tim = ktime_add_safe(tim, new_base->get_time());
/*
* CONFIG_TIME_LOW_RES is a temporary way for architectures
* to signal that they simply return xtime in
@@ -823,7 +834,7 @@
* timeouts. This will go away with the GTOD framework.
*/
#ifdef CONFIG_TIME_LOW_RES
- tim = ktime_add(tim, base->resolution);
+ tim = ktime_add_safe(tim, base->resolution);
#endif
}
timer->expires = tim;
@@ -1406,7 +1417,7 @@
static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- long cpu = (long)hcpu;
+ unsigned int cpu = (long)hcpu;
switch (action) {
--- linux-source-2.6.22-2.6.22.orig/kernel/auditsc.c
+++ linux-source-2.6.22-2.6.22/kernel/auditsc.c
@@ -1998,19 +1998,19 @@
extern uid_t audit_sig_uid;
extern u32 audit_sig_sid;
- if (audit_pid && t->tgid == audit_pid &&
- (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) {
- audit_sig_pid = tsk->pid;
- if (ctx)
- audit_sig_uid = ctx->loginuid;
- else
- audit_sig_uid = tsk->uid;
- selinux_get_task_sid(tsk, &audit_sig_sid);
+ if (audit_pid && t->tgid == audit_pid) {
+ if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
+ audit_sig_pid = tsk->pid;
+ if (ctx)
+ audit_sig_uid = ctx->loginuid;
+ else
+ audit_sig_uid = tsk->uid;
+ selinux_get_task_sid(tsk, &audit_sig_sid);
+ }
+ if (!audit_signals || audit_dummy_context())
+ return 0;
}
- if (!audit_signals) /* audit_context checked in wrapper */
- return 0;
-
/* optimize the common case by putting first signal recipient directly
* in audit_context */
if (!ctx->target_pid) {
--- linux-source-2.6.22-2.6.22.orig/kernel/sysctl.c
+++ linux-source-2.6.22-2.6.22/kernel/sysctl.c
@@ -1110,6 +1110,33 @@
return NULL;
}
+char *sysctl_pathname(ctl_table *table, char *buffer, int buflen)
+{
+ if (buflen < 1)
+ return NULL;
+ buffer += --buflen;
+ *buffer = '\0';
+
+ while (table) {
+ int namelen = strlen(table->procname);
+
+ if (buflen < namelen + 1)
+ return NULL;
+ buflen -= namelen + 1;
+ buffer -= namelen;
+ memcpy(buffer, table->procname, namelen);
+ *--buffer = '/';
+ table = table->parent;
+ }
+ if (buflen < 4)
+ return NULL;
+ buffer -= 4;
+ memcpy(buffer, "/sys", 4);
+
+ return buffer;
+}
+EXPORT_SYMBOL(sysctl_pathname);
+
#ifdef CONFIG_SYSCTL_SYSCALL
int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
--- linux-source-2.6.22-2.6.22.orig/kernel/time/tick-broadcast.c
+++ linux-source-2.6.22-2.6.22/kernel/time/tick-broadcast.c
@@ -364,11 +364,7 @@
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-
- if(!cpus_empty(tick_broadcast_oneshot_mask))
- tick_broadcast_set_event(ktime_get(), 1);
-
- return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask);
+ return 0;
}
/*
--- linux-source-2.6.22-2.6.22.orig/kernel/time/timer_stats.c
+++ linux-source-2.6.22-2.6.22/kernel/time/timer_stats.c
@@ -391,7 +391,7 @@
.read = seq_read,
.write = tstats_write,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = single_release,
};
void __init init_timer_stats(void)
--- linux-source-2.6.22-2.6.22.orig/kernel/time/timer_list.c
+++ linux-source-2.6.22-2.6.22/kernel/time/timer_list.c
@@ -267,7 +267,7 @@
.open = timer_list_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = single_release,
};
static int __init init_timer_list_procfs(void)
--- linux-source-2.6.22-2.6.22.orig/kernel/workqueue.c
+++ linux-source-2.6.22-2.6.22/kernel/workqueue.c
@@ -739,18 +739,17 @@
if (cwq->thread == NULL)
return;
+ flush_cpu_workqueue(cwq);
/*
- * If the caller is CPU_DEAD the single flush_cpu_workqueue()
- * is not enough, a concurrent flush_workqueue() can insert a
- * barrier after us.
+ * If the caller is CPU_DEAD and cwq->worklist was not empty,
+ * a concurrent flush_workqueue() can insert a barrier after us.
+ * However, in that case run_workqueue() won't return and check
+ * kthread_should_stop() until it flushes all work_struct's.
* When ->worklist becomes empty it is safe to exit because no
* more work_structs can be queued on this cwq: flush_workqueue
* checks list_empty(), and a "normal" queue_work() can't use
* a dead CPU.
*/
- while (flush_cpu_workqueue(cwq))
- ;
-
kthread_stop(cwq->thread);
cwq->thread = NULL;
}
--- linux-source-2.6.22-2.6.22.orig/kernel/futex.c
+++ linux-source-2.6.22-2.6.22/kernel/futex.c
@@ -2056,13 +2056,15 @@
t = timespec_to_ktime(ts);
if (cmd == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
+ t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
/*
* requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
+ * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
*/
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_WAKE_OP)
val2 = (u32) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
--- linux-source-2.6.22-2.6.22.orig/arch/i386/Makefile
+++ linux-source-2.6.22-2.6.22/arch/i386/Makefile
@@ -51,8 +51,8 @@
CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
# is .cfi_signal_frame supported too?
cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
--- linux-source-2.6.22-2.6.22.orig/arch/i386/Kconfig.debug
+++ linux-source-2.6.22-2.6.22/arch/i386/Kconfig.debug
@@ -19,6 +19,12 @@
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash.
+config WRAPPER_PRINT
+ bool "Boot wrapper print" if EMBEDDED
+ default y
+ help
+ Enable informational output from the bootwrapper (bzImage and zImage).
+
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
--- linux-source-2.6.22-2.6.22.orig/arch/i386/mm/fault.c
+++ linux-source-2.6.22-2.6.22/arch/i386/mm/fault.c
@@ -249,9 +249,10 @@
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
return NULL;
- if (!pmd_present(*pmd))
+ if (!pmd_present(*pmd)) {
set_pmd(pmd, *pmd_k);
- else
+ arch_flush_lazy_mmu_mode();
+ } else
BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
return pmd_k;
}
--- linux-source-2.6.22-2.6.22.orig/arch/i386/pci/common.c
+++ linux-source-2.6.22-2.6.22/arch/i386/pci/common.c
@@ -17,8 +17,7 @@
#include "pci.h"
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
- PCI_PROBE_MMCONF;
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
static int pci_bf_sort;
int pci_routeirq;
@@ -145,6 +144,22 @@
}
#endif
+#ifdef CONFIG_PCI_MMCONFIG
+static int __devinit working_mmconfig(struct dmi_system_id *d)
+{
+ pci_probe |= PCI_PROBE_MMCONF;
+ return 0;
+}
+
+static int __devinit blacklist_mmconfig(struct dmi_system_id *d)
+{
+ pci_probe &= ~PCI_PROBE_MMCONF;
+ printk(KERN_INFO "%s detected: disabling MMCONFIG PCI access",
+ d->ident);
+ return 0;
+}
+#endif /*CONFIG_PCI_MMCONFIG*/
+
static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
#ifdef __i386__
/*
@@ -388,6 +403,10 @@
pci_probe &= ~PCI_PROBE_MMCONF;
return NULL;
}
+ else if (!strcmp(str, "mmconf")) {
+ pci_probe |= PCI_PROBE_MMCONF;
+ return NULL;
+ }
#endif
else if (!strcmp(str, "noacpi")) {
acpi_noirq_set();
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/acpi/boot.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/acpi/boot.c
@@ -592,9 +592,25 @@
* RSDP signature.
*/
for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
+ if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len) == 0) {
+ /* 2007-09-24 TJ
+ * The ACPI specification states the first 20 bytes of the RSDP table
+ * must have a checksum of 0 (ACPI 1.0b RSDP table is 20 bytes long).
+ * The signature can appear in multiple memory locations so don't rely
+ * on it as the sole proof of a valid table.
+ * This fixes broken/disabled ACPI problems with Acer Travelmate C100
+ * (and others) where the first signature match is accepted without
+ * confirming the checksum.
+ */
+ unsigned int i;
+ unsigned char checksum;
+ unsigned char *table = (unsigned char *)(phys_to_virt(start) + offset);
+ for (checksum = 0, i = 0; i < 20; i++)
+ checksum += table[i];
+
+ printk(KERN_WARNING PREFIX "RSDP signature @ 0x%0.8lX checksum %d\n", table, checksum);
+ if (checksum == 0) return (start + offset);
+ }
}
return 0;
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/Makefile
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/Makefile
@@ -35,7 +35,6 @@
obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
-obj-$(CONFIG_SERIAL_8250) += legacy_serial.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/doublefault.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/doublefault.c
@@ -13,7 +13,7 @@
static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
+#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
static void doublefault_fn(void)
{
@@ -23,23 +23,23 @@
store_gdt(&gdt_desc);
gdt = gdt_desc.address;
- printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
+ printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
if (ptr_ok(gdt)) {
gdt += GDT_ENTRY_TSS << 3;
tss = *(u16 *)(gdt+2);
tss += *(u8 *)(gdt+4) << 16;
tss += *(u8 *)(gdt+7) << 24;
- printk("double fault, tss at %08lx\n", tss);
+ printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
if (ptr_ok(tss)) {
struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
- printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
+ printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp);
- printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
+ printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
t->eax, t->ebx, t->ecx, t->edx);
- printk("esi = %08lx, edi = %08lx\n",
+ printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
t->esi, t->edi);
}
}
@@ -63,6 +63,7 @@
.cs = __KERNEL_CS,
.ss = __KERNEL_DS,
.ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,
.__cr3 = __pa(swapper_pg_dir)
}
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/sysenter.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/sysenter.c
@@ -336,7 +336,9 @@
int in_gate_area(struct task_struct *task, unsigned long addr)
{
- return 0;
+ const struct vm_area_struct *vma = get_gate_vma(task);
+
+ return vma && addr >= vma->vm_start && addr < vma->vm_end;
}
int in_gate_area_no_task(unsigned long addr)
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/hpet.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/hpet.c
@@ -226,7 +226,8 @@
{
unsigned long id;
uint64_t hpet_freq;
- u64 tmp;
+ u64 tmp, start, now;
+ cycle_t t1;
if (!is_hpet_capable())
return 0;
@@ -273,6 +274,27 @@
/* Start the counter */
hpet_start_counter();
+ /* Verify whether hpet counter works */
+ t1 = read_hpet();
+ rdtscll(start);
+
+ /*
+ * We don't know the TSC frequency yet, but waiting for
+ * 200000 TSC cycles is safe:
+ * 4 GHz == 50us
+ * 1 GHz == 200us
+ */
+ do {
+ rep_nop();
+ rdtscll(now);
+ } while ((now - start) < 200000UL);
+
+ if (t1 == read_hpet()) {
+ printk(KERN_WARNING
+ "HPET counter not counting. HPET disabled\n");
+ goto out_nohpet;
+ }
+
/* Initialize and register HPET clocksource
*
* hpet period is in femto seconds per cycle
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/cpu/perfctr-watchdog.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -346,7 +346,9 @@
perfctr_msr = MSR_P6_PERFCTR0;
evntsel_msr = MSR_P6_EVNTSEL0;
- wrmsrl(perfctr_msr, 0UL);
+ /* KVM doesn't implement this MSR */
+ if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
+ return 0;
evntsel = P6_EVNTSEL_INT
| P6_EVNTSEL_OS
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -76,7 +76,10 @@
/* Return a frequency in MHz, given an input fid and did */
static u32 find_freq_from_fiddid(u32 fid, u32 did)
{
- return 100 * (fid + 0x10) >> did;
+ if (current_cpu_data.x86 == 0x10)
+ return 100 * (fid + 0x10) >> did;
+ else
+ return 100 * (fid + 0x8) >> did;
}
static u32 find_khz_freq_from_fiddid(u32 fid, u32 did)
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -167,11 +167,13 @@
static void do_drv_write(struct drv_cmd *cmd)
{
- u32 h = 0;
+ u32 lo, hi;
switch (cmd->type) {
case SYSTEM_INTEL_MSR_CAPABLE:
- wrmsr(cmd->addr.msr.reg, cmd->val, h);
+ rdmsr(cmd->addr.msr.reg, lo, hi);
+ lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
+ wrmsr(cmd->addr.msr.reg, lo, hi);
break;
case SYSTEM_IO_CAPABLE:
acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
@@ -372,7 +374,6 @@
struct cpufreq_freqs freqs;
cpumask_t online_policy_cpus;
struct drv_cmd cmd;
- unsigned int msr;
unsigned int next_state = 0; /* Index into freq_table */
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
@@ -417,11 +418,7 @@
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
- msr =
- (u32) perf->states[next_perf_state].
- control & INTEL_MSR_RANGE;
- cmd.val = get_cur_val(online_policy_cpus);
- cmd.val = (cmd.val & ~INTEL_MSR_RANGE) | msr;
+ cmd.val = (u32) perf->states[next_perf_state].control;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/cpu/amd.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/cpu/amd.c
@@ -3,6 +3,7 @@
#include
#include
#include
+#include
#include "cpu.h"
@@ -22,6 +23,7 @@
extern void vide(void);
__asm__(".align 4\nvide: ret");
+#ifdef CONFIG_X86_LOCAL_APIC
#define ENABLE_C1E_MASK 0x18000000
#define CPUID_PROCESSOR_SIGNATURE 1
#define CPUID_XFAM 0x0ff00000
@@ -52,6 +54,7 @@
}
return 0;
}
+#endif
int force_mwait __cpuinitdata;
@@ -275,8 +278,10 @@
if (cpuid_eax(0x80000000) >= 0x80000006)
num_cache_leaves = 3;
+#ifdef CONFIG_X86_LOCAL_APIC
if (amd_apic_timer_broken())
- set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
+ local_apic_timer_disabled = 1;
+#endif
if (c->x86 == 0x10 && !force_mwait)
clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
--- linux-source-2.6.22-2.6.22.orig/arch/i386/kernel/apic.c
+++ linux-source-2.6.22-2.6.22/arch/i386/kernel/apic.c
@@ -61,8 +61,9 @@
/* Local APIC timer verification ok */
static int local_apic_timer_verify_ok;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int local_apic_timer_disabled;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk
+ or using CPU MSR check */
+int local_apic_timer_disabled;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -367,9 +368,6 @@
long delta, deltapm;
int pm_referenced = 0;
- if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN))
- local_apic_timer_disabled = 1;
-
/*
* The local apic timer can be disabled via the kernel
* commandline or from the test above. Register the lapic
--- linux-source-2.6.22-2.6.22.orig/arch/i386/boot/compressed/misc.c
+++ linux-source-2.6.22-2.6.22/arch/i386/boot/compressed/misc.c
@@ -184,8 +184,6 @@
static void *memset(void *s, int c, unsigned n);
static void *memcpy(void *dest, const void *src, unsigned n);
-static void putstr(const char *);
-
static unsigned long free_mem_ptr;
static unsigned long free_mem_end_ptr;
@@ -232,7 +230,8 @@
{
free_mem_ptr = (unsigned long) *ptr;
}
-
+
+#ifdef CONFIG_WRAPPER_PRINT
static void scroll(void)
{
int i;
@@ -278,6 +277,9 @@
outb_p(15, vidport);
outb_p(0xff & (pos >> 1), vidport+1);
}
+#else
+#define putstr(__x) do{}while(0)
+#endif /* CONFIG_WRAPPER_PRINT */
static void* memset(void* s, int c, unsigned n)
{
--- linux-source-2.6.22-2.6.22.orig/arch/ia64/kernel/unaligned.c
+++ linux-source-2.6.22-2.6.22/arch/ia64/kernel/unaligned.c
@@ -1487,16 +1487,19 @@
case LDFA_OP:
case LDFCCLR_OP:
case LDFCNC_OP:
- case LDF_IMM_OP:
- case LDFA_IMM_OP:
- case LDFCCLR_IMM_OP:
- case LDFCNC_IMM_OP:
if (u.insn.x)
ret = emulate_load_floatpair(ifa, u.insn, regs);
else
ret = emulate_load_float(ifa, u.insn, regs);
break;
+ case LDF_IMM_OP:
+ case LDFA_IMM_OP:
+ case LDFCCLR_IMM_OP:
+ case LDFCNC_IMM_OP:
+ ret = emulate_load_float(ifa, u.insn, regs);
+ break;
+
case STF_OP:
case STF_IMM_OP:
ret = emulate_store_float(ifa, u.insn, regs);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/mm/init.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/mm/init.c
@@ -1135,14 +1135,9 @@
}
}
-static void __init kernel_physical_mapping_init(void)
+static void __init init_kpte_bitmap(void)
{
unsigned long i;
-#ifdef CONFIG_DEBUG_PAGEALLOC
- unsigned long mem_alloced = 0UL;
-#endif
-
- read_obp_memory("reg", &pall[0], &pall_ents);
for (i = 0; i < pall_ents; i++) {
unsigned long phys_start, phys_end;
@@ -1151,14 +1146,24 @@
phys_end = phys_start + pall[i].reg_size;
mark_kpte_bitmap(phys_start, phys_end);
+ }
+}
+static void __init kernel_physical_mapping_init(void)
+{
#ifdef CONFIG_DEBUG_PAGEALLOC
+ unsigned long i, mem_alloced = 0UL;
+
+ for (i = 0; i < pall_ents; i++) {
+ unsigned long phys_start, phys_end;
+
+ phys_start = pall[i].phys_addr;
+ phys_end = phys_start + pall[i].reg_size;
+
mem_alloced += kernel_map_range(phys_start, phys_end,
PAGE_KERNEL);
-#endif
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
printk("Allocated %ld bytes for kernel page tables.\n",
mem_alloced);
@@ -1400,6 +1405,10 @@
inherit_prom_mappings();
+ read_obp_memory("reg", &pall[0], &pall_ents);
+
+ init_kpte_bitmap();
+
/* Ok, we can use our TLB miss and window trap handlers safely. */
setup_tba();
@@ -1854,7 +1863,9 @@
"wrpr %0, %1, %%pstate"
: "=r" (pstate)
: "i" (PSTATE_IE));
- if (tlb_type == spitfire) {
+ if (tlb_type == hypervisor) {
+ sun4v_mmu_demap_all();
+ } else if (tlb_type == spitfire) {
for (i = 0; i < 64; i++) {
/* Spitfire Errata #32 workaround */
/* NOTE: Always runs on spitfire, so no
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/mm/fault.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/mm/fault.c
@@ -112,15 +112,12 @@
static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
{
- unsigned long *ksp;
-
printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
regs->tpc);
printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]);
print_symbol("RPC: <%s>\n", regs->u_regs[15]);
printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
- __asm__("mov %%sp, %0" : "=r" (ksp));
- show_stack(current, ksp);
+ dump_stack();
unhandled_fault(regs->tpc, current, regs);
}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/prom/console.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/prom/console.c
@@ -73,88 +73,3 @@
P1275_INOUT(3,1),
prom_stdout, s, P1275_SIZE(len));
}
-
-/* Query for input device type */
-enum prom_input_device
-prom_query_input_device(void)
-{
- int st_p;
- char propb[64];
-
- st_p = prom_inst2pkg(prom_stdin);
- if(prom_node_has_property(st_p, "keyboard"))
- return PROMDEV_IKBD;
- prom_getproperty(st_p, "device_type", propb, sizeof(propb));
- if(strncmp(propb, "serial", 6))
- return PROMDEV_I_UNK;
- /* FIXME: Is there any better way how to find out? */
- memset(propb, 0, sizeof(propb));
- st_p = prom_finddevice ("/options");
- prom_getproperty(st_p, "input-device", propb, sizeof(propb));
-
- /*
- * If we get here with propb == 'keyboard', we are on ttya, as
- * the PROM defaulted to this due to 'no input device'.
- */
- if (!strncmp(propb, "keyboard", 8))
- return PROMDEV_ITTYA;
-
- if (!strncmp (propb, "rsc", 3))
- return PROMDEV_IRSC;
-
- if (!strncmp (propb, "virtual-console", 3))
- return PROMDEV_IVCONS;
-
- if (strncmp (propb, "tty", 3) || !propb[3])
- return PROMDEV_I_UNK;
-
- switch (propb[3]) {
- case 'a': return PROMDEV_ITTYA;
- case 'b': return PROMDEV_ITTYB;
- default: return PROMDEV_I_UNK;
- }
-}
-
-/* Query for output device type */
-
-enum prom_output_device
-prom_query_output_device(void)
-{
- int st_p;
- char propb[64];
- int propl;
-
- st_p = prom_inst2pkg(prom_stdout);
- propl = prom_getproperty(st_p, "device_type", propb, sizeof(propb));
- if (propl >= 0 && propl == sizeof("display") &&
- strncmp("display", propb, sizeof("display")) == 0)
- return PROMDEV_OSCREEN;
- if(strncmp("serial", propb, 6))
- return PROMDEV_O_UNK;
- /* FIXME: Is there any better way how to find out? */
- memset(propb, 0, sizeof(propb));
- st_p = prom_finddevice ("/options");
- prom_getproperty(st_p, "output-device", propb, sizeof(propb));
-
- /*
- * If we get here with propb == 'screen', we are on ttya, as
- * the PROM defaulted to this due to 'no input device'.
- */
- if (!strncmp(propb, "screen", 6))
- return PROMDEV_OTTYA;
-
- if (!strncmp (propb, "rsc", 3))
- return PROMDEV_ORSC;
-
- if (!strncmp (propb, "virtual-console", 3))
- return PROMDEV_OVCONS;
-
- if (strncmp (propb, "tty", 3) || !propb[3])
- return PROMDEV_O_UNK;
-
- switch (propb[3]) {
- case 'a': return PROMDEV_OTTYA;
- case 'b': return PROMDEV_OTTYB;
- default: return PROMDEV_O_UNK;
- }
-}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/prom/tree.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/prom/tree.c
@@ -13,6 +13,7 @@
#include
#include
+#include
/* Return the child of node 'node' or zero if no this node has no
* direct descendent.
@@ -261,9 +262,17 @@
int
prom_setprop(int node, const char *pname, char *value, int size)
{
- if(size == 0) return 0;
- if((pname == 0) || (value == 0)) return 0;
+ if (size == 0)
+ return 0;
+ if ((pname == 0) || (value == 0))
+ return 0;
+#ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled) {
+ ldom_set_var(pname, value);
+ return 0;
+ }
+#endif
return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)|
P1275_ARG(2,P1275_ARG_IN_BUF)|
P1275_INOUT(4, 1),
@@ -295,3 +304,11 @@
if (node == -1) return 0;
return node;
}
+
+int prom_ihandle2path(int handle, char *buffer, int bufsize)
+{
+ return p1275_cmd("instance-to-path",
+ P1275_ARG(1,P1275_ARG_OUT_BUF)|
+ P1275_INOUT(3, 1),
+ handle, buffer, P1275_SIZE(bufsize));
+}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/prom/p1275.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/prom/p1275.c
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
struct {
long prom_callback; /* 0x00 */
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/prom/misc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/prom/misc.c
@@ -14,6 +14,7 @@
#include
#include
#include
+#include
int prom_service_exists(const char *service_name)
{
@@ -37,6 +38,10 @@
/* Reset and reboot the machine with the command 'bcommand'. */
void prom_reboot(const char *bcommand)
{
+#ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled)
+ ldom_reboot(bcommand);
+#endif
p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) |
P1275_INOUT(1, 0), bcommand);
}
@@ -67,7 +72,7 @@
local_irq_save(flags);
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette(1);
#ifdef CONFIG_SMP
@@ -80,7 +85,7 @@
smp_release();
#endif
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette(0);
local_irq_restore(flags);
@@ -91,6 +96,10 @@
*/
void prom_halt(void)
{
+#ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled)
+ ldom_power_off();
+#endif
again:
p1275_cmd("exit", P1275_INOUT(0, 0));
goto again; /* PROM is out to get me -DaveM */
@@ -98,6 +107,10 @@
void prom_halt_power_off(void)
{
+#ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled)
+ ldom_power_off();
+#endif
p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
/* if nothing else helps, we just halt */
@@ -130,22 +143,6 @@
return 0xff;
}
-/* Install Linux trap table so PROM uses that instead of its own. */
-void prom_set_trap_table(unsigned long tba)
-{
- p1275_cmd("SUNW,set-trap-table",
- (P1275_ARG(0, P1275_ARG_IN_64B) |
- P1275_INOUT(1, 0)), tba);
-}
-
-void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa)
-{
- p1275_cmd("SUNW,set-trap-table",
- (P1275_ARG(0, P1275_ARG_IN_64B) |
- P1275_ARG(1, P1275_ARG_IN_64B) |
- P1275_INOUT(2, 0)), tba, mmfsa);
-}
-
int prom_get_mmu_ihandle(void)
{
int node, ret;
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/sysfs.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/sysfs.c
@@ -193,7 +193,6 @@
}
SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick);
-SHOW_CPUDATA_ULONG_NAME(udelay_val, udelay_val);
SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size);
SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size);
SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size);
@@ -203,7 +202,6 @@
static struct sysdev_attribute cpu_core_attrs[] = {
_SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL),
- _SYSDEV_ATTR(udelay_val, 0444, show_udelay_val, NULL),
_SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL),
_SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL),
_SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL),
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/head.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/head.S
@@ -97,7 +97,8 @@
.globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
.globl prom_boot_mapped_pc, prom_boot_mapping_mode
.globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
- .globl is_sun4v
+ .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible
+ .globl is_sun4v, sun4v_chip_type, prom_set_trap_table_name
prom_peer_name:
.asciz "peer"
prom_compatible_name:
@@ -106,6 +107,8 @@
.asciz "finddevice"
prom_chosen_path:
.asciz "/chosen"
+prom_cpu_path:
+ .asciz "/cpu"
prom_getprop_name:
.asciz "getprop"
prom_mmu_name:
@@ -118,11 +121,17 @@
.asciz "map"
prom_unmap_name:
.asciz "unmap"
+prom_set_trap_table_name:
+ .asciz "SUNW,set-trap-table"
prom_sun4v_name:
.asciz "sun4v"
+prom_niagara_prefix:
+ .asciz "SUNW,UltraSPARC-T"
.align 4
prom_root_compatible:
.skip 64
+prom_cpu_compatible:
+ .skip 64
prom_root_node:
.word 0
prom_mmu_ihandle_cache:
@@ -138,6 +147,8 @@
.xword 0
is_sun4v:
.word 0
+sun4v_chip_type:
+ .word SUN4V_CHIP_INVALID
1:
rd %pc, %l0
@@ -296,13 +307,13 @@
sethi %hi(prom_sun4v_name), %g7
or %g7, %lo(prom_sun4v_name), %g7
mov 5, %g3
-1: ldub [%g7], %g2
+90: ldub [%g7], %g2
ldub [%g1], %g4
cmp %g2, %g4
- bne,pn %icc, 2f
+ bne,pn %icc, 80f
add %g7, 1, %g7
subcc %g3, 1, %g3
- bne,pt %xcc, 1b
+ bne,pt %xcc, 90b
add %g1, 1, %g1
sethi %hi(is_sun4v), %g1
@@ -310,7 +321,80 @@
mov 1, %g7
stw %g7, [%g1]
-2:
+ /* cpu_node = prom_finddevice("/cpu") */
+ mov (1b - prom_finddev_name), %l1
+ mov (1b - prom_cpu_path), %l2
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %sp, (192 + 128), %sp
+
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice"
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu"
+ stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node
+
+ mov (1b - prom_getprop_name), %l1
+ mov (1b - prom_compatible_name), %l2
+ mov (1b - prom_cpu_compatible), %l5
+ sub %l0, %l1, %l1
+ sub %l0, %l2, %l2
+ sub %l0, %l5, %l5
+
+ /* prom_getproperty(cpu_node, "compatible",
+ * &prom_cpu_compatible, 64)
+ */
+ stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
+ mov 4, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
+ mov 1, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
+ stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node
+ stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
+ stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible
+ mov 64, %l3
+ stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
+ stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
+ call %l7
+ add %sp, (2047 + 128), %o0 ! argument array
+
+ add %sp, (192 + 128), %sp
+
+ sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ sethi %hi(prom_niagara_prefix), %g7
+ or %g7, %lo(prom_niagara_prefix), %g7
+ mov 17, %g3
+90: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
+ bne,pn %icc, 4f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 90b
+ add %g1, 1, %g1
+
+ sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ ldub [%g1 + 17], %g2
+ cmp %g2, '1'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA1, %g4
+ cmp %g2, '2'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA2, %g4
+4:
+ mov SUN4V_CHIP_UNKNOWN, %g4
+5: sethi %hi(sun4v_chip_type), %g2
+ or %g2, %lo(sun4v_chip_type), %g2
+ stw %g4, [%g2]
+
+80:
BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
@@ -413,6 +497,33 @@
stw %g2, [%g1 + %lo(tlb_type)]
/* Patch copy/clear ops. */
+ sethi %hi(sun4v_chip_type), %g1
+ lduw [%g1 + %lo(sun4v_chip_type)], %g1
+ cmp %g1, SUN4V_CHIP_NIAGARA1
+ be,pt %xcc, niagara_patch
+ cmp %g1, SUN4V_CHIP_NIAGARA2
+ be,pt %xcc, niagara2_patch
+ nop
+
+ call generic_patch_copyops
+ nop
+ call generic_patch_bzero
+ nop
+ call generic_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+niagara2_patch:
+ call niagara2_patch_copyops
+ nop
+ call niagara_patch_bzero
+ nop
+ call niagara2_patch_pageops
+ nop
+
+ ba,a,pt %xcc, 80f
+
+niagara_patch:
call niagara_patch_copyops
nop
call niagara_patch_bzero
@@ -420,6 +531,7 @@
call niagara_patch_pageops
nop
+80:
/* Patch TLB/cache ops. */
call hypervisor_patch_cachetlbops
nop
@@ -458,7 +570,6 @@
or %g6, %lo(init_thread_union), %g6
ldx [%g6 + TI_TASK], %g4
mov %sp, %l6
- mov %o4, %l7
wr %g0, ASI_P, %asi
mov 1, %g1
@@ -579,15 +690,38 @@
sethi %hi(kern_base), %g3
ldx [%g3 + %lo(kern_base)], %g3
add %g2, %g3, %o1
+ sethi %hi(sparc64_ttable_tl0), %o0
- call prom_set_trap_table_sun4v
- sethi %hi(sparc64_ttable_tl0), %o0
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 2, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ stx %o1, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
ba,pt %xcc, 2f
nop
-1: call prom_set_trap_table
- sethi %hi(sparc64_ttable_tl0), %o0
+1: sethi %hi(sparc64_ttable_tl0), %o0
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
/* Start using proper page size encodings in ctx register. */
2: sethi %hi(sparc64_kern_pri_context), %g3
@@ -603,12 +737,13 @@
membar #Sync
+ BRANCH_IF_SUN4V(o2, 1f)
+
/* Kill PROM timer */
sethi %hi(0x80000000), %o2
sllx %o2, 32, %o2
wr %o2, 0, %tick_cmpr
- BRANCH_IF_SUN4V(o2, 1f)
BRANCH_IF_ANY_CHEETAH(o2, o3, 1f)
ba,pt %xcc, 2f
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/time.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/time.c
@@ -849,9 +849,6 @@
{
struct device_node *dp;
unsigned long clock;
-#ifdef CONFIG_SMP
- extern void smp_tick_init(void);
-#endif
dp = of_find_node_by_path("/");
if (tlb_type == spitfire) {
@@ -874,10 +871,6 @@
clock = of_getintprop_default(dp, "stick-frequency", 0);
}
-#ifdef CONFIG_SMP
- smp_tick_init();
-#endif
-
return clock;
}
@@ -1038,10 +1031,31 @@
sparc64_clockevent.mult = mult;
}
+static unsigned long tb_ticks_per_usec __read_mostly;
+
+void __delay(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ bclock = tick_ops->get_tick();
+ do {
+ now = tick_ops->get_tick();
+ } while ((now-bclock) < loops);
+}
+EXPORT_SYMBOL(__delay);
+
+void udelay(unsigned long usecs)
+{
+ __delay(tb_ticks_per_usec * usecs);
+}
+EXPORT_SYMBOL(udelay);
+
void __init time_init(void)
{
unsigned long clock = sparc64_init_timers();
+ tb_ticks_per_usec = clock / USEC_PER_SEC;
+
timer_ticks_per_nsec_quotient =
clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT);
@@ -1102,7 +1116,7 @@
* Not having a register set can lead to trouble.
* Also starfire doesn't have a tod clock.
*/
- if (!mregs && !dregs & !bregs)
+ if (!mregs && !dregs && !bregs)
return -1;
if (mregs) {
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/viohs.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/viohs.c
@@ -0,0 +1,822 @@
+/* viohs.c: LDOM Virtual I/O handshake helper layer.
+ *
+ * Copyright (C) 2007 David S. Miller
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+int vio_ldc_send(struct vio_driver_state *vio, void *data, int len)
+{
+ int err, limit = 1000;
+
+ err = -EINVAL;
+ while (limit-- > 0) {
+ err = ldc_write(vio->lp, data, len);
+ if (!err || (err != -EAGAIN))
+ break;
+ udelay(1);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(vio_ldc_send);
+
+static int send_ctrl(struct vio_driver_state *vio,
+ struct vio_msg_tag *tag, int len)
+{
+ tag->sid = vio_send_sid(vio);
+ return vio_ldc_send(vio, tag, len);
+}
+
+static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env)
+{
+ tag->type = type;
+ tag->stype = stype;
+ tag->stype_env = stype_env;
+}
+
+static int send_version(struct vio_driver_state *vio, u16 major, u16 minor)
+{
+ struct vio_ver_info pkt;
+
+ vio->_local_sid = (u32) sched_clock();
+
+ memset(&pkt, 0, sizeof(pkt));
+ init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO);
+ pkt.major = major;
+ pkt.minor = minor;
+ pkt.dev_class = vio->dev_class;
+
+ viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n",
+ major, minor, vio->dev_class);
+
+ return send_ctrl(vio, &pkt.tag, sizeof(pkt));
+}
+
+static int start_handshake(struct vio_driver_state *vio)
+{
+ int err;
+
+ viodbg(HS, "START HANDSHAKE\n");
+
+ vio->hs_state = VIO_HS_INVALID;
+
+ err = send_version(vio,
+ vio->ver_table[0].major,
+ vio->ver_table[0].minor);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void flush_rx_dring(struct vio_driver_state *vio)
+{
+ struct vio_dring_state *dr;
+ u64 ident;
+
+ BUG_ON(!(vio->dr_state & VIO_DR_STATE_RXREG));
+
+ dr = &vio->drings[VIO_DRIVER_RX_RING];
+ ident = dr->ident;
+
+ BUG_ON(!vio->desc_buf);
+ kfree(vio->desc_buf);
+ vio->desc_buf = NULL;
+
+ memset(dr, 0, sizeof(*dr));
+ dr->ident = ident;
+}
+
+void vio_link_state_change(struct vio_driver_state *vio, int event)
+{
+ if (event == LDC_EVENT_UP) {
+ vio->hs_state = VIO_HS_INVALID;
+
+ switch (vio->dev_class) {
+ case VDEV_NETWORK:
+ case VDEV_NETWORK_SWITCH:
+ vio->dr_state = (VIO_DR_STATE_TXREQ |
+ VIO_DR_STATE_RXREQ);
+ break;
+
+ case VDEV_DISK:
+ vio->dr_state = VIO_DR_STATE_TXREQ;
+ break;
+ case VDEV_DISK_SERVER:
+ vio->dr_state = VIO_DR_STATE_RXREQ;
+ break;
+ }
+ start_handshake(vio);
+ } else if (event == LDC_EVENT_RESET) {
+ vio->hs_state = VIO_HS_INVALID;
+
+ if (vio->dr_state & VIO_DR_STATE_RXREG)
+ flush_rx_dring(vio);
+
+ vio->dr_state = 0x00;
+ memset(&vio->ver, 0, sizeof(vio->ver));
+
+ ldc_disconnect(vio->lp);
+ }
+}
+EXPORT_SYMBOL(vio_link_state_change);
+
+static int handshake_failure(struct vio_driver_state *vio)
+{
+ struct vio_dring_state *dr;
+
+ /* XXX Put policy here... Perhaps start a timer to fire
+ * XXX in 100 ms, which will bring the link up and retry
+ * XXX the handshake.
+ */
+
+ viodbg(HS, "HANDSHAKE FAILURE\n");
+
+ vio->dr_state &= ~(VIO_DR_STATE_TXREG |
+ VIO_DR_STATE_RXREG);
+
+ dr = &vio->drings[VIO_DRIVER_RX_RING];
+ memset(dr, 0, sizeof(*dr));
+
+ kfree(vio->desc_buf);
+ vio->desc_buf = NULL;
+ vio->desc_buf_len = 0;
+
+ vio->hs_state = VIO_HS_INVALID;
+
+ return -ECONNRESET;
+}
+
+static int process_unknown(struct vio_driver_state *vio, void *arg)
+{
+ struct vio_msg_tag *pkt = arg;
+
+ viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n",
+ pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
+
+ printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n",
+ vio->vdev->channel_id);
+
+ ldc_disconnect(vio->lp);
+
+ return -ECONNRESET;
+}
+
+static int send_dreg(struct vio_driver_state *vio)
+{
+ struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING];
+ union {
+ struct vio_dring_register pkt;
+ char all[sizeof(struct vio_dring_register) +
+ (sizeof(struct ldc_trans_cookie) *
+ dr->ncookies)];
+ } u;
+ int i;
+
+ memset(&u, 0, sizeof(u));
+ init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
+ u.pkt.dring_ident = 0;
+ u.pkt.num_descr = dr->num_entries;
+ u.pkt.descr_size = dr->entry_size;
+ u.pkt.options = VIO_TX_DRING;
+ u.pkt.num_cookies = dr->ncookies;
+
+ viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] "
+ "ncookies[%u]\n",
+ u.pkt.num_descr, u.pkt.descr_size, u.pkt.options,
+ u.pkt.num_cookies);
+
+ for (i = 0; i < dr->ncookies; i++) {
+ u.pkt.cookies[i] = dr->cookies[i];
+
+ viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
+ i,
+ (unsigned long long) u.pkt.cookies[i].cookie_addr,
+ (unsigned long long) u.pkt.cookies[i].cookie_size);
+ }
+
+ return send_ctrl(vio, &u.pkt.tag, sizeof(u));
+}
+
+static int send_rdx(struct vio_driver_state *vio)
+{
+ struct vio_rdx pkt;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX);
+
+ viodbg(HS, "SEND RDX INFO\n");
+
+ return send_ctrl(vio, &pkt.tag, sizeof(pkt));
+}
+
+static int send_attr(struct vio_driver_state *vio)
+{
+ return vio->ops->send_attr(vio);
+}
+
+static struct vio_version *find_by_major(struct vio_driver_state *vio,
+ u16 major)
+{
+ struct vio_version *ret = NULL;
+ int i;
+
+ for (i = 0; i < vio->ver_table_entries; i++) {
+ struct vio_version *v = &vio->ver_table[i];
+ if (v->major <= major) {
+ ret = v;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int process_ver_info(struct vio_driver_state *vio,
+ struct vio_ver_info *pkt)
+{
+ struct vio_version *vap;
+ int err;
+
+ viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n",
+ pkt->major, pkt->minor, pkt->dev_class);
+
+ if (vio->hs_state != VIO_HS_INVALID) {
+ /* XXX Perhaps invoke start_handshake? XXX */
+ memset(&vio->ver, 0, sizeof(vio->ver));
+ vio->hs_state = VIO_HS_INVALID;
+ }
+
+ vap = find_by_major(vio, pkt->major);
+
+ vio->_peer_sid = pkt->tag.sid;
+
+ if (!vap) {
+ pkt->tag.stype = VIO_SUBTYPE_NACK;
+ pkt->major = 0;
+ pkt->minor = 0;
+ viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n");
+ err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+ } else if (vap->major != pkt->major) {
+ pkt->tag.stype = VIO_SUBTYPE_NACK;
+ pkt->major = vap->major;
+ pkt->minor = vap->minor;
+ viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n",
+ pkt->major, pkt->minor);
+ err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+ } else {
+ struct vio_version ver = {
+ .major = pkt->major,
+ .minor = pkt->minor,
+ };
+ if (ver.minor > vap->minor)
+ ver.minor = vap->minor;
+ pkt->minor = ver.minor;
+ pkt->tag.stype = VIO_SUBTYPE_ACK;
+ viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n",
+ pkt->major, pkt->minor);
+ err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+ if (err > 0) {
+ vio->ver = ver;
+ vio->hs_state = VIO_HS_GOTVERS;
+ }
+ }
+ if (err < 0)
+ return handshake_failure(vio);
+
+ return 0;
+}
+
+static int process_ver_ack(struct vio_driver_state *vio,
+ struct vio_ver_info *pkt)
+{
+ viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n",
+ pkt->major, pkt->minor, pkt->dev_class);
+
+ if (vio->hs_state & VIO_HS_GOTVERS) {
+ if (vio->ver.major != pkt->major ||
+ vio->ver.minor != pkt->minor) {
+ pkt->tag.stype = VIO_SUBTYPE_NACK;
+ (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+ return handshake_failure(vio);
+ }
+ } else {
+ vio->ver.major = pkt->major;
+ vio->ver.minor = pkt->minor;
+ vio->hs_state = VIO_HS_GOTVERS;
+ }
+
+ switch (vio->dev_class) {
+ case VDEV_NETWORK:
+ case VDEV_DISK:
+ if (send_attr(vio) < 0)
+ return handshake_failure(vio);
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int process_ver_nack(struct vio_driver_state *vio,
+ struct vio_ver_info *pkt)
+{
+ struct vio_version *nver;
+
+ viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n",
+ pkt->major, pkt->minor, pkt->dev_class);
+
+ if ((pkt->major == 0 && pkt->minor == 0) ||
+ !(nver = find_by_major(vio, pkt->major)))
+ return handshake_failure(vio);
+
+ if (send_version(vio, nver->major, nver->minor) < 0)
+ return handshake_failure(vio);
+
+ return 0;
+}
+
+static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt)
+{
+ switch (pkt->tag.stype) {
+ case VIO_SUBTYPE_INFO:
+ return process_ver_info(vio, pkt);
+
+ case VIO_SUBTYPE_ACK:
+ return process_ver_ack(vio, pkt);
+
+ case VIO_SUBTYPE_NACK:
+ return process_ver_nack(vio, pkt);
+
+ default:
+ return handshake_failure(vio);
+ };
+}
+
+static int process_attr(struct vio_driver_state *vio, void *pkt)
+{
+ int err;
+
+ if (!(vio->hs_state & VIO_HS_GOTVERS))
+ return handshake_failure(vio);
+
+ err = vio->ops->handle_attr(vio, pkt);
+ if (err < 0) {
+ return handshake_failure(vio);
+ } else {
+ vio->hs_state |= VIO_HS_GOT_ATTR;
+
+ if ((vio->dr_state & VIO_DR_STATE_TXREQ) &&
+ !(vio->hs_state & VIO_HS_SENT_DREG)) {
+ if (send_dreg(vio) < 0)
+ return handshake_failure(vio);
+
+ vio->hs_state |= VIO_HS_SENT_DREG;
+ }
+ }
+ return 0;
+}
+
+static int all_drings_registered(struct vio_driver_state *vio)
+{
+ int need_rx, need_tx;
+
+ need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ);
+ need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ);
+
+ if (need_rx &&
+ !(vio->dr_state & VIO_DR_STATE_RXREG))
+ return 0;
+
+ if (need_tx &&
+ !(vio->dr_state & VIO_DR_STATE_TXREG))
+ return 0;
+
+ return 1;
+}
+
+static int process_dreg_info(struct vio_driver_state *vio,
+ struct vio_dring_register *pkt)
+{
+ struct vio_dring_state *dr;
+ int i, len;
+
+ viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
+ "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+ (unsigned long long) pkt->dring_ident,
+ pkt->num_descr, pkt->descr_size, pkt->options,
+ pkt->num_cookies);
+
+ if (!(vio->dr_state & VIO_DR_STATE_RXREQ))
+ goto send_nack;
+
+ if (vio->dr_state & VIO_DR_STATE_RXREG)
+ goto send_nack;
+
+ BUG_ON(vio->desc_buf);
+
+ vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
+ if (!vio->desc_buf)
+ goto send_nack;
+
+ vio->desc_buf_len = pkt->descr_size;
+
+ dr = &vio->drings[VIO_DRIVER_RX_RING];
+
+ dr->num_entries = pkt->num_descr;
+ dr->entry_size = pkt->descr_size;
+ dr->ncookies = pkt->num_cookies;
+ for (i = 0; i < dr->ncookies; i++) {
+ dr->cookies[i] = pkt->cookies[i];
+
+ viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
+ i,
+ (unsigned long long)
+ pkt->cookies[i].cookie_addr,
+ (unsigned long long)
+ pkt->cookies[i].cookie_size);
+ }
+
+ pkt->tag.stype = VIO_SUBTYPE_ACK;
+ pkt->dring_ident = ++dr->ident;
+
+ viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
+ (unsigned long long) pkt->dring_ident);
+
+ len = (sizeof(*pkt) +
+ (dr->ncookies * sizeof(struct ldc_trans_cookie)));
+ if (send_ctrl(vio, &pkt->tag, len) < 0)
+ goto send_nack;
+
+ vio->dr_state |= VIO_DR_STATE_RXREG;
+
+ return 0;
+
+send_nack:
+ pkt->tag.stype = VIO_SUBTYPE_NACK;
+ viodbg(HS, "SEND DRING_REG NACK\n");
+ (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+
+ return handshake_failure(vio);
+}
+
+static int process_dreg_ack(struct vio_driver_state *vio,
+ struct vio_dring_register *pkt)
+{
+ struct vio_dring_state *dr;
+
+ viodbg(HS, "GOT DRING_REG ACK ident[%llx] "
+ "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+ (unsigned long long) pkt->dring_ident,
+ pkt->num_descr, pkt->descr_size, pkt->options,
+ pkt->num_cookies);
+
+ dr = &vio->drings[VIO_DRIVER_TX_RING];
+
+ if (!(vio->dr_state & VIO_DR_STATE_TXREQ))
+ return handshake_failure(vio);
+
+ dr->ident = pkt->dring_ident;
+ vio->dr_state |= VIO_DR_STATE_TXREG;
+
+ if (all_drings_registered(vio)) {
+ if (send_rdx(vio) < 0)
+ return handshake_failure(vio);
+ vio->hs_state = VIO_HS_SENT_RDX;
+ }
+ return 0;
+}
+
+static int process_dreg_nack(struct vio_driver_state *vio,
+ struct vio_dring_register *pkt)
+{
+ viodbg(HS, "GOT DRING_REG NACK ident[%llx] "
+ "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+ (unsigned long long) pkt->dring_ident,
+ pkt->num_descr, pkt->descr_size, pkt->options,
+ pkt->num_cookies);
+
+ return handshake_failure(vio);
+}
+
+static int process_dreg(struct vio_driver_state *vio,
+ struct vio_dring_register *pkt)
+{
+ if (!(vio->hs_state & VIO_HS_GOTVERS))
+ return handshake_failure(vio);
+
+ switch (pkt->tag.stype) {
+ case VIO_SUBTYPE_INFO:
+ return process_dreg_info(vio, pkt);
+
+ case VIO_SUBTYPE_ACK:
+ return process_dreg_ack(vio, pkt);
+
+ case VIO_SUBTYPE_NACK:
+ return process_dreg_nack(vio, pkt);
+
+ default:
+ return handshake_failure(vio);
+ }
+}
+
+static int process_dunreg(struct vio_driver_state *vio,
+ struct vio_dring_unregister *pkt)
+{
+ struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING];
+
+ viodbg(HS, "GOT DRING_UNREG\n");
+
+ if (pkt->dring_ident != dr->ident)
+ return 0;
+
+ vio->dr_state &= ~VIO_DR_STATE_RXREG;
+
+ memset(dr, 0, sizeof(*dr));
+
+ kfree(vio->desc_buf);
+ vio->desc_buf = NULL;
+ vio->desc_buf_len = 0;
+
+ return 0;
+}
+
+static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+ viodbg(HS, "GOT RDX INFO\n");
+
+ pkt->tag.stype = VIO_SUBTYPE_ACK;
+ viodbg(HS, "SEND RDX ACK\n");
+ if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0)
+ return handshake_failure(vio);
+
+ vio->hs_state |= VIO_HS_SENT_RDX_ACK;
+ return 0;
+}
+
+static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+ viodbg(HS, "GOT RDX ACK\n");
+
+ if (!(vio->hs_state & VIO_HS_SENT_RDX))
+ return handshake_failure(vio);
+
+ vio->hs_state |= VIO_HS_GOT_RDX_ACK;
+ return 0;
+}
+
+static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+ viodbg(HS, "GOT RDX NACK\n");
+
+ return handshake_failure(vio);
+}
+
+static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+ if (!all_drings_registered(vio))
+ handshake_failure(vio);
+
+ switch (pkt->tag.stype) {
+ case VIO_SUBTYPE_INFO:
+ return process_rdx_info(vio, pkt);
+
+ case VIO_SUBTYPE_ACK:
+ return process_rdx_ack(vio, pkt);
+
+ case VIO_SUBTYPE_NACK:
+ return process_rdx_nack(vio, pkt);
+
+ default:
+ return handshake_failure(vio);
+ }
+}
+
+int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt)
+{
+ struct vio_msg_tag *tag = pkt;
+ u8 prev_state = vio->hs_state;
+ int err;
+
+ switch (tag->stype_env) {
+ case VIO_VER_INFO:
+ err = process_ver(vio, pkt);
+ break;
+
+ case VIO_ATTR_INFO:
+ err = process_attr(vio, pkt);
+ break;
+
+ case VIO_DRING_REG:
+ err = process_dreg(vio, pkt);
+ break;
+
+ case VIO_DRING_UNREG:
+ err = process_dunreg(vio, pkt);
+ break;
+
+ case VIO_RDX:
+ err = process_rdx(vio, pkt);
+ break;
+
+ default:
+ err = process_unknown(vio, pkt);
+ break;
+ }
+ if (!err &&
+ vio->hs_state != prev_state &&
+ (vio->hs_state & VIO_HS_COMPLETE))
+ vio->ops->handshake_complete(vio);
+
+ return err;
+}
+EXPORT_SYMBOL(vio_control_pkt_engine);
+
+void vio_conn_reset(struct vio_driver_state *vio)
+{
+}
+EXPORT_SYMBOL(vio_conn_reset);
+
+/* The issue is that the Solaris virtual disk server just mirrors the
+ * SID values it gets from the client peer. So we work around that
+ * here in vio_{validate,send}_sid() so that the drivers don't need
+ * to be aware of this crap.
+ */
+int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp)
+{
+ u32 sid;
+
+ /* Always let VERSION+INFO packets through unchecked, they
+ * define the new SID.
+ */
+ if (tp->type == VIO_TYPE_CTRL &&
+ tp->stype == VIO_SUBTYPE_INFO &&
+ tp->stype_env == VIO_VER_INFO)
+ return 0;
+
+ /* Ok, now figure out which SID to use. */
+ switch (vio->dev_class) {
+ case VDEV_NETWORK:
+ case VDEV_NETWORK_SWITCH:
+ case VDEV_DISK_SERVER:
+ default:
+ sid = vio->_peer_sid;
+ break;
+
+ case VDEV_DISK:
+ sid = vio->_local_sid;
+ break;
+ }
+
+ if (sid == tp->sid)
+ return 0;
+ viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n",
+ tp->sid, vio->_peer_sid, vio->_local_sid);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vio_validate_sid);
+
+u32 vio_send_sid(struct vio_driver_state *vio)
+{
+ switch (vio->dev_class) {
+ case VDEV_NETWORK:
+ case VDEV_NETWORK_SWITCH:
+ case VDEV_DISK:
+ default:
+ return vio->_local_sid;
+
+ case VDEV_DISK_SERVER:
+ return vio->_peer_sid;
+ }
+}
+EXPORT_SYMBOL(vio_send_sid);
+
+int vio_ldc_alloc(struct vio_driver_state *vio,
+ struct ldc_channel_config *base_cfg,
+ void *event_arg)
+{
+ struct ldc_channel_config cfg = *base_cfg;
+ struct ldc_channel *lp;
+
+ cfg.tx_irq = vio->vdev->tx_irq;
+ cfg.rx_irq = vio->vdev->rx_irq;
+
+ lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
+ if (IS_ERR(lp))
+ return PTR_ERR(lp);
+
+ vio->lp = lp;
+
+ return 0;
+}
+EXPORT_SYMBOL(vio_ldc_alloc);
+
+void vio_ldc_free(struct vio_driver_state *vio)
+{
+ ldc_free(vio->lp);
+ vio->lp = NULL;
+
+ kfree(vio->desc_buf);
+ vio->desc_buf = NULL;
+ vio->desc_buf_len = 0;
+}
+EXPORT_SYMBOL(vio_ldc_free);
+
+void vio_port_up(struct vio_driver_state *vio)
+{
+ unsigned long flags;
+ int err, state;
+
+ spin_lock_irqsave(&vio->lock, flags);
+
+ state = ldc_state(vio->lp);
+
+ err = 0;
+ if (state == LDC_STATE_INIT) {
+ err = ldc_bind(vio->lp, vio->name);
+ if (err)
+ printk(KERN_WARNING "%s: Port %lu bind failed, "
+ "err=%d\n",
+ vio->name, vio->vdev->channel_id, err);
+ }
+
+ if (!err) {
+ err = ldc_connect(vio->lp);
+ if (err)
+ printk(KERN_WARNING "%s: Port %lu connect failed, "
+ "err=%d\n",
+ vio->name, vio->vdev->channel_id, err);
+ }
+ if (err) {
+ unsigned long expires = jiffies + HZ;
+
+ expires = round_jiffies(expires);
+ mod_timer(&vio->timer, expires);
+ }
+
+ spin_unlock_irqrestore(&vio->lock, flags);
+}
+EXPORT_SYMBOL(vio_port_up);
+
+static void vio_port_timer(unsigned long _arg)
+{
+ struct vio_driver_state *vio = (struct vio_driver_state *) _arg;
+
+ vio_port_up(vio);
+}
+
+int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
+ u8 dev_class, struct vio_version *ver_table,
+ int ver_table_size, struct vio_driver_ops *ops,
+ char *name)
+{
+ switch (dev_class) {
+ case VDEV_NETWORK:
+ case VDEV_NETWORK_SWITCH:
+ case VDEV_DISK:
+ case VDEV_DISK_SERVER:
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (!ops->send_attr ||
+ !ops->handle_attr ||
+ !ops->handshake_complete)
+ return -EINVAL;
+
+ if (!ver_table || ver_table_size < 0)
+ return -EINVAL;
+
+ if (!name)
+ return -EINVAL;
+
+ spin_lock_init(&vio->lock);
+
+ vio->name = name;
+
+ vio->dev_class = dev_class;
+ vio->vdev = vdev;
+
+ vio->ver_table = ver_table;
+ vio->ver_table_entries = ver_table_size;
+
+ vio->ops = ops;
+
+ setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio);
+
+ return 0;
+}
+EXPORT_SYMBOL(vio_driver_init);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/ds.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/ds.c
@@ -0,0 +1,1248 @@
+/* ds.c: Domain Services driver for Logical Domains
+ *
+ * Copyright (C) 2007 David S. Miller
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define DRV_MODULE_NAME "ds"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.0"
+#define DRV_MODULE_RELDATE "Jul 11, 2007"
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM domain services driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+struct ds_msg_tag {
+ __u32 type;
+#define DS_INIT_REQ 0x00
+#define DS_INIT_ACK 0x01
+#define DS_INIT_NACK 0x02
+#define DS_REG_REQ 0x03
+#define DS_REG_ACK 0x04
+#define DS_REG_NACK 0x05
+#define DS_UNREG_REQ 0x06
+#define DS_UNREG_ACK 0x07
+#define DS_UNREG_NACK 0x08
+#define DS_DATA 0x09
+#define DS_NACK 0x0a
+
+ __u32 len;
+};
+
+/* Result codes */
+#define DS_OK 0x00
+#define DS_REG_VER_NACK 0x01
+#define DS_REG_DUP 0x02
+#define DS_INV_HDL 0x03
+#define DS_TYPE_UNKNOWN 0x04
+
+struct ds_version {
+ __u16 major;
+ __u16 minor;
+};
+
+struct ds_ver_req {
+ struct ds_msg_tag tag;
+ struct ds_version ver;
+};
+
+struct ds_ver_ack {
+ struct ds_msg_tag tag;
+ __u16 minor;
+};
+
+struct ds_ver_nack {
+ struct ds_msg_tag tag;
+ __u16 major;
+};
+
+struct ds_reg_req {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 major;
+ __u16 minor;
+ char svc_id[0];
+};
+
+struct ds_reg_ack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 minor;
+};
+
+struct ds_reg_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u16 major;
+};
+
+struct ds_unreg_req {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_unreg_ack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_unreg_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_data {
+ struct ds_msg_tag tag;
+ __u64 handle;
+};
+
+struct ds_data_nack {
+ struct ds_msg_tag tag;
+ __u64 handle;
+ __u64 result;
+};
+
+struct ds_info;
+struct ds_cap_state {
+ __u64 handle;
+
+ void (*data)(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+
+ const char *service_id;
+
+ u8 state;
+#define CAP_STATE_UNKNOWN 0x00
+#define CAP_STATE_REG_SENT 0x01
+#define CAP_STATE_REGISTERED 0x02
+};
+
+static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp,
+ void *buf, int len);
+static void domain_shutdown_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+static void domain_panic_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+#ifdef CONFIG_HOTPLUG_CPU
+static void dr_cpu_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+#endif
+static void ds_pri_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+static void ds_var_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len);
+
+struct ds_cap_state ds_states_template[] = {
+ {
+ .service_id = "md-update",
+ .data = md_update_data,
+ },
+ {
+ .service_id = "domain-shutdown",
+ .data = domain_shutdown_data,
+ },
+ {
+ .service_id = "domain-panic",
+ .data = domain_panic_data,
+ },
+#ifdef CONFIG_HOTPLUG_CPU
+ {
+ .service_id = "dr-cpu",
+ .data = dr_cpu_data,
+ },
+#endif
+ {
+ .service_id = "pri",
+ .data = ds_pri_data,
+ },
+ {
+ .service_id = "var-config",
+ .data = ds_var_data,
+ },
+ {
+ .service_id = "var-config-backup",
+ .data = ds_var_data,
+ },
+};
+
+static DEFINE_SPINLOCK(ds_lock);
+
+struct ds_info {
+ struct ldc_channel *lp;
+ u8 hs_state;
+#define DS_HS_START 0x01
+#define DS_HS_DONE 0x02
+
+ u64 id;
+
+ void *rcv_buf;
+ int rcv_buf_len;
+
+ struct ds_cap_state *ds_states;
+ int num_ds_states;
+
+ struct ds_info *next;
+};
+
+static struct ds_info *ds_info_list;
+
+static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle)
+{
+ unsigned int index = handle >> 32;
+
+ if (index >= dp->num_ds_states)
+ return NULL;
+ return &dp->ds_states[index];
+}
+
+static struct ds_cap_state *find_cap_by_string(struct ds_info *dp,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ if (strcmp(dp->ds_states[i].service_id, name))
+ continue;
+
+ return &dp->ds_states[i];
+ }
+ return NULL;
+}
+
+static int __ds_send(struct ldc_channel *lp, void *data, int len)
+{
+ int err, limit = 1000;
+
+ err = -EINVAL;
+ while (limit-- > 0) {
+ err = ldc_write(lp, data, len);
+ if (!err || (err != -EAGAIN))
+ break;
+ udelay(1);
+ }
+
+ return err;
+}
+
+static int ds_send(struct ldc_channel *lp, void *data, int len)
+{
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ err = __ds_send(lp, data, len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ return err;
+}
+
+struct ds_md_update_req {
+ __u64 req_num;
+};
+
+struct ds_md_update_res {
+ __u64 req_num;
+ __u32 result;
+};
+
+static void md_update_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_md_update_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_md_update_res res;
+ } pkt;
+
+ rp = (struct ds_md_update_req *) (dpkt + 1);
+
+ printk(KERN_INFO "ds-%lu: Machine description update.\n", dp->id);
+
+ mdesc_update();
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+}
+
+struct ds_shutdown_req {
+ __u64 req_num;
+ __u32 ms_delay;
+};
+
+struct ds_shutdown_res {
+ __u64 req_num;
+ __u32 result;
+ char reason[1];
+};
+
+static void domain_shutdown_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_shutdown_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_shutdown_res res;
+ } pkt;
+
+ rp = (struct ds_shutdown_req *) (dpkt + 1);
+
+ printk(KERN_ALERT "ds-%lu: Shutdown request from "
+ "LDOM manager received.\n", dp->id);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+ pkt.res.reason[0] = 0;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+
+ wake_up_powerd();
+}
+
+struct ds_panic_req {
+ __u64 req_num;
+};
+
+struct ds_panic_res {
+ __u64 req_num;
+ __u32 result;
+ char reason[1];
+};
+
+static void domain_panic_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_data *dpkt = buf;
+ struct ds_panic_req *rp;
+ struct {
+ struct ds_data data;
+ struct ds_panic_res res;
+ } pkt;
+
+ rp = (struct ds_panic_req *) (dpkt + 1);
+
+ printk(KERN_ALERT "ds-%lu: Panic request from "
+ "LDOM manager received.\n", dp->id);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+ pkt.data.handle = cp->handle;
+ pkt.res.req_num = rp->req_num;
+ pkt.res.result = DS_OK;
+ pkt.res.reason[0] = 0;
+
+ ds_send(lp, &pkt, sizeof(pkt));
+
+ panic("PANIC requested by LDOM manager.");
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+struct dr_cpu_tag {
+ __u64 req_num;
+ __u32 type;
+#define DR_CPU_CONFIGURE 0x43
+#define DR_CPU_UNCONFIGURE 0x55
+#define DR_CPU_FORCE_UNCONFIGURE 0x46
+#define DR_CPU_STATUS 0x53
+
+/* Responses */
+#define DR_CPU_OK 0x6f
+#define DR_CPU_ERROR 0x65
+
+ __u32 num_records;
+};
+
+struct dr_cpu_resp_entry {
+ __u32 cpu;
+ __u32 result;
+#define DR_CPU_RES_OK 0x00
+#define DR_CPU_RES_FAILURE 0x01
+#define DR_CPU_RES_BLOCKED 0x02
+#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
+#define DR_CPU_RES_NOT_IN_MD 0x04
+
+ __u32 stat;
+#define DR_CPU_STAT_NOT_PRESENT 0x00
+#define DR_CPU_STAT_UNCONFIGURED 0x01
+#define DR_CPU_STAT_CONFIGURED 0x02
+
+ __u32 str_off;
+};
+
+static void __dr_cpu_send_error(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ struct ds_data *data)
+{
+ struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+ struct {
+ struct ds_data data;
+ struct dr_cpu_tag tag;
+ } pkt;
+ int msg_len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.data.tag.type = DS_DATA;
+ pkt.data.handle = cp->handle;
+ pkt.tag.req_num = tag->req_num;
+ pkt.tag.type = DR_CPU_ERROR;
+ pkt.tag.num_records = 0;
+
+ msg_len = (sizeof(struct ds_data) +
+ sizeof(struct dr_cpu_tag));
+
+ pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+ __ds_send(dp->lp, &pkt, msg_len);
+}
+
+static void dr_cpu_send_error(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ struct ds_data *data)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __dr_cpu_send_error(dp, cp, data);
+ spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+#define CPU_SENTINEL 0xffffffff
+
+static void purge_dups(u32 *list, u32 num_ents)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_ents; i++) {
+ u32 cpu = list[i];
+ unsigned int j;
+
+ if (cpu == CPU_SENTINEL)
+ continue;
+
+ for (j = i + 1; j < num_ents; j++) {
+ if (list[j] == cpu)
+ list[j] = CPU_SENTINEL;
+ }
+ }
+}
+
+static int dr_cpu_size_response(int ncpus)
+{
+ return (sizeof(struct ds_data) +
+ sizeof(struct dr_cpu_tag) +
+ (sizeof(struct dr_cpu_resp_entry) * ncpus));
+}
+
+static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
+ u64 handle, int resp_len, int ncpus,
+ cpumask_t *mask, u32 default_stat)
+{
+ struct dr_cpu_resp_entry *ent;
+ struct dr_cpu_tag *tag;
+ int i, cpu;
+
+ tag = (struct dr_cpu_tag *) (resp + 1);
+ ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+ resp->tag.type = DS_DATA;
+ resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
+ resp->handle = handle;
+ tag->req_num = req_num;
+ tag->type = DR_CPU_OK;
+ tag->num_records = ncpus;
+
+ i = 0;
+ for_each_cpu_mask(cpu, *mask) {
+ ent[i].cpu = cpu;
+ ent[i].result = DR_CPU_RES_OK;
+ ent[i].stat = default_stat;
+ i++;
+ }
+ BUG_ON(i != ncpus);
+}
+
+static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
+ u32 res, u32 stat)
+{
+ struct dr_cpu_resp_entry *ent;
+ struct dr_cpu_tag *tag;
+ int i;
+
+ tag = (struct dr_cpu_tag *) (resp + 1);
+ ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+ for (i = 0; i < ncpus; i++) {
+ if (ent[i].cpu != cpu)
+ continue;
+ ent[i].result = res;
+ ent[i].stat = stat;
+ break;
+ }
+}
+
+static int dr_cpu_configure(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ u64 req_num,
+ cpumask_t *mask)
+{
+ struct ds_data *resp;
+ int resp_len, ncpus, cpu;
+ unsigned long flags;
+
+ ncpus = cpus_weight(*mask);
+ resp_len = dr_cpu_size_response(ncpus);
+ resp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ dr_cpu_init_response(resp, req_num, cp->handle,
+ resp_len, ncpus, mask,
+ DR_CPU_STAT_CONFIGURED);
+
+ mdesc_fill_in_cpu_data(*mask);
+
+ for_each_cpu_mask(cpu, *mask) {
+ int err;
+
+ printk(KERN_INFO "ds-%lu: Starting cpu %d...\n",
+ dp->id, cpu);
+ err = cpu_up(cpu);
+ if (err) {
+ __u32 res = DR_CPU_RES_FAILURE;
+ __u32 stat = DR_CPU_STAT_UNCONFIGURED;
+
+ if (!cpu_present(cpu)) {
+ /* CPU not present in MD */
+ res = DR_CPU_RES_NOT_IN_MD;
+ stat = DR_CPU_STAT_NOT_PRESENT;
+ } else if (err == -ENODEV) {
+ /* CPU did not call in successfully */
+ res = DR_CPU_RES_CPU_NOT_RESPONDING;
+ }
+
+ printk(KERN_INFO "ds-%lu: CPU startup failed err=%d\n",
+ dp->id, err);
+ dr_cpu_mark(resp, cpu, ncpus, res, stat);
+ }
+ }
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __ds_send(dp->lp, resp, resp_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ kfree(resp);
+
+ /* Redistribute IRQs, taking into account the new cpus. */
+ fixup_irqs();
+
+ return 0;
+}
+
+static int dr_cpu_unconfigure(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ u64 req_num,
+ cpumask_t *mask)
+{
+ struct ds_data *resp;
+ int resp_len, ncpus, cpu;
+ unsigned long flags;
+
+ ncpus = cpus_weight(*mask);
+ resp_len = dr_cpu_size_response(ncpus);
+ resp = kzalloc(resp_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ dr_cpu_init_response(resp, req_num, cp->handle,
+ resp_len, ncpus, mask,
+ DR_CPU_STAT_UNCONFIGURED);
+
+ for_each_cpu_mask(cpu, *mask) {
+ int err;
+
+ printk(KERN_INFO "ds-%lu: Shutting down cpu %d...\n",
+ dp->id, cpu);
+ err = cpu_down(cpu);
+ if (err)
+ dr_cpu_mark(resp, cpu, ncpus,
+ DR_CPU_RES_FAILURE,
+ DR_CPU_STAT_CONFIGURED);
+ }
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __ds_send(dp->lp, resp, resp_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ kfree(resp);
+
+ return 0;
+}
+
+static void dr_cpu_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *data = buf;
+ struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+ u32 *cpu_list = (u32 *) (tag + 1);
+ u64 req_num = tag->req_num;
+ cpumask_t mask;
+ unsigned int i;
+ int err;
+
+ switch (tag->type) {
+ case DR_CPU_CONFIGURE:
+ case DR_CPU_UNCONFIGURE:
+ case DR_CPU_FORCE_UNCONFIGURE:
+ break;
+
+ default:
+ dr_cpu_send_error(dp, cp, data);
+ return;
+ }
+
+ purge_dups(cpu_list, tag->num_records);
+
+ cpus_clear(mask);
+ for (i = 0; i < tag->num_records; i++) {
+ if (cpu_list[i] == CPU_SENTINEL)
+ continue;
+
+ if (cpu_list[i] < NR_CPUS)
+ cpu_set(cpu_list[i], mask);
+ }
+
+ if (tag->type == DR_CPU_CONFIGURE)
+ err = dr_cpu_configure(dp, cp, req_num, &mask);
+ else
+ err = dr_cpu_unconfigure(dp, cp, req_num, &mask);
+
+ if (err)
+ dr_cpu_send_error(dp, cp, data);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+struct ds_pri_msg {
+ __u64 req_num;
+ __u64 type;
+#define DS_PRI_REQUEST 0x00
+#define DS_PRI_DATA 0x01
+#define DS_PRI_UPDATE 0x02
+};
+
+static void ds_pri_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *dpkt = buf;
+ struct ds_pri_msg *rp;
+
+ rp = (struct ds_pri_msg *) (dpkt + 1);
+
+ printk(KERN_INFO "ds-%lu: PRI REQ [%lx:%lx], len=%d\n",
+ dp->id, rp->req_num, rp->type, len);
+}
+
+struct ds_var_hdr {
+ __u32 type;
+#define DS_VAR_SET_REQ 0x00
+#define DS_VAR_DELETE_REQ 0x01
+#define DS_VAR_SET_RESP 0x02
+#define DS_VAR_DELETE_RESP 0x03
+};
+
+struct ds_var_set_msg {
+ struct ds_var_hdr hdr;
+ char name_and_value[0];
+};
+
+struct ds_var_delete_msg {
+ struct ds_var_hdr hdr;
+ char name[0];
+};
+
+struct ds_var_resp {
+ struct ds_var_hdr hdr;
+ __u32 result;
+#define DS_VAR_SUCCESS 0x00
+#define DS_VAR_NO_SPACE 0x01
+#define DS_VAR_INVALID_VAR 0x02
+#define DS_VAR_INVALID_VAL 0x03
+#define DS_VAR_NOT_PRESENT 0x04
+};
+
+static DEFINE_MUTEX(ds_var_mutex);
+static int ds_var_doorbell;
+static int ds_var_response;
+
+static void ds_var_data(struct ds_info *dp,
+ struct ds_cap_state *cp,
+ void *buf, int len)
+{
+ struct ds_data *dpkt = buf;
+ struct ds_var_resp *rp;
+
+ rp = (struct ds_var_resp *) (dpkt + 1);
+
+ if (rp->hdr.type != DS_VAR_SET_RESP &&
+ rp->hdr.type != DS_VAR_DELETE_RESP)
+ return;
+
+ ds_var_response = rp->result;
+ wmb();
+ ds_var_doorbell = 1;
+}
+
+void ldom_set_var(const char *var, const char *value)
+{
+ struct ds_cap_state *cp;
+ struct ds_info *dp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ds_lock, flags);
+ cp = NULL;
+ for (dp = ds_info_list; dp; dp = dp->next) {
+ struct ds_cap_state *tmp;
+
+ tmp = find_cap_by_string(dp, "var-config");
+ if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+ cp = tmp;
+ break;
+ }
+ }
+ if (!cp) {
+ for (dp = ds_info_list; dp; dp = dp->next) {
+ struct ds_cap_state *tmp;
+
+ tmp = find_cap_by_string(dp, "var-config-backup");
+ if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+ cp = tmp;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ if (cp) {
+ union {
+ struct {
+ struct ds_data data;
+ struct ds_var_set_msg msg;
+ } header;
+ char all[512];
+ } pkt;
+ char *base, *p;
+ int msg_len, loops;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.header.data.tag.type = DS_DATA;
+ pkt.header.data.handle = cp->handle;
+ pkt.header.msg.hdr.type = DS_VAR_SET_REQ;
+ base = p = &pkt.header.msg.name_and_value[0];
+ strcpy(p, var);
+ p += strlen(var) + 1;
+ strcpy(p, value);
+ p += strlen(value) + 1;
+
+ msg_len = (sizeof(struct ds_data) +
+ sizeof(struct ds_var_set_msg) +
+ (p - base));
+ msg_len = (msg_len + 3) & ~3;
+ pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+ mutex_lock(&ds_var_mutex);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ ds_var_doorbell = 0;
+ ds_var_response = -1;
+
+ __ds_send(dp->lp, &pkt, msg_len);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ loops = 1000;
+ while (ds_var_doorbell == 0) {
+ if (loops-- < 0)
+ break;
+ barrier();
+ udelay(100);
+ }
+
+ mutex_unlock(&ds_var_mutex);
+
+ if (ds_var_doorbell == 0 ||
+ ds_var_response != DS_VAR_SUCCESS)
+ printk(KERN_ERR "ds-%lu: var-config [%s:%s] "
+ "failed, response(%d).\n",
+ dp->id, var, value,
+ ds_var_response);
+ } else {
+ printk(KERN_ERR PFX "var-config not registered so "
+ "could not set (%s) variable to (%s).\n",
+ var, value);
+ }
+}
+
+void ldom_reboot(const char *boot_command)
+{
+ /* Don't bother with any of this if the boot_command
+ * is empty.
+ */
+ if (boot_command && strlen(boot_command)) {
+ char full_boot_str[256];
+
+ strcpy(full_boot_str, "boot ");
+ strcpy(full_boot_str + strlen("boot "), boot_command);
+
+ ldom_set_var("reboot-command", full_boot_str);
+ }
+ sun4v_mach_sir();
+}
+
+void ldom_power_off(void)
+{
+ sun4v_mach_exit(0);
+}
+
+static void ds_conn_reset(struct ds_info *dp)
+{
+ printk(KERN_ERR "ds-%lu: ds_conn_reset() from %p\n",
+ dp->id, __builtin_return_address(0));
+}
+
+static int register_services(struct ds_info *dp)
+{
+ struct ldc_channel *lp = dp->lp;
+ int i;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ struct {
+ struct ds_reg_req req;
+ u8 id_buf[256];
+ } pbuf;
+ struct ds_cap_state *cp = &dp->ds_states[i];
+ int err, msg_len;
+ u64 new_count;
+
+ if (cp->state == CAP_STATE_REGISTERED)
+ continue;
+
+ new_count = sched_clock() & 0xffffffff;
+ cp->handle = ((u64) i << 32) | new_count;
+
+ msg_len = (sizeof(struct ds_reg_req) +
+ strlen(cp->service_id));
+
+ memset(&pbuf, 0, sizeof(pbuf));
+ pbuf.req.tag.type = DS_REG_REQ;
+ pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag));
+ pbuf.req.handle = cp->handle;
+ pbuf.req.major = 1;
+ pbuf.req.minor = 0;
+ strcpy(pbuf.req.svc_id, cp->service_id);
+
+ err = __ds_send(lp, &pbuf, msg_len);
+ if (err > 0)
+ cp->state = CAP_STATE_REG_SENT;
+ }
+ return 0;
+}
+
+static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
+{
+
+ if (dp->hs_state == DS_HS_START) {
+ if (pkt->type != DS_INIT_ACK)
+ goto conn_reset;
+
+ dp->hs_state = DS_HS_DONE;
+
+ return register_services(dp);
+ }
+
+ if (dp->hs_state != DS_HS_DONE)
+ goto conn_reset;
+
+ if (pkt->type == DS_REG_ACK) {
+ struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt;
+ struct ds_cap_state *cp = find_cap(dp, ap->handle);
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: REG ACK for unknown "
+ "handle %lx\n", dp->id, ap->handle);
+ return 0;
+ }
+ printk(KERN_INFO "ds-%lu: Registered %s service.\n",
+ dp->id, cp->service_id);
+ cp->state = CAP_STATE_REGISTERED;
+ } else if (pkt->type == DS_REG_NACK) {
+ struct ds_reg_nack *np = (struct ds_reg_nack *) pkt;
+ struct ds_cap_state *cp = find_cap(dp, np->handle);
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: REG NACK for "
+ "unknown handle %lx\n",
+ dp->id, np->handle);
+ return 0;
+ }
+ cp->state = CAP_STATE_UNKNOWN;
+ }
+
+ return 0;
+
+conn_reset:
+ ds_conn_reset(dp);
+ return -ECONNRESET;
+}
+
+static void __send_ds_nack(struct ds_info *dp, u64 handle)
+{
+ struct ds_data_nack nack = {
+ .tag = {
+ .type = DS_NACK,
+ .len = (sizeof(struct ds_data_nack) -
+ sizeof(struct ds_msg_tag)),
+ },
+ .handle = handle,
+ .result = DS_INV_HDL,
+ };
+
+ __ds_send(dp->lp, &nack, sizeof(nack));
+}
+
+static LIST_HEAD(ds_work_list);
+static DECLARE_WAIT_QUEUE_HEAD(ds_wait);
+
+struct ds_queue_entry {
+ struct list_head list;
+ struct ds_info *dp;
+ int req_len;
+ int __pad;
+ u64 req[0];
+};
+
+static void process_ds_work(void)
+{
+ struct ds_queue_entry *qp, *tmp;
+ unsigned long flags;
+ LIST_HEAD(todo);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ list_splice(&ds_work_list, &todo);
+ INIT_LIST_HEAD(&ds_work_list);
+ spin_unlock_irqrestore(&ds_lock, flags);
+
+ list_for_each_entry_safe(qp, tmp, &todo, list) {
+ struct ds_data *dpkt = (struct ds_data *) qp->req;
+ struct ds_info *dp = qp->dp;
+ struct ds_cap_state *cp = find_cap(dp, dpkt->handle);
+ int req_len = qp->req_len;
+
+ if (!cp) {
+ printk(KERN_ERR "ds-%lu: Data for unknown "
+ "handle %lu\n",
+ dp->id, dpkt->handle);
+
+ spin_lock_irqsave(&ds_lock, flags);
+ __send_ds_nack(dp, dpkt->handle);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ } else {
+ cp->data(dp, cp, dpkt, req_len);
+ }
+
+ list_del(&qp->list);
+ kfree(qp);
+ }
+}
+
+static int ds_thread(void *__unused)
+{
+ DEFINE_WAIT(wait);
+
+ while (1) {
+ prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE);
+ if (list_empty(&ds_work_list))
+ schedule();
+ finish_wait(&ds_wait, &wait);
+
+ if (kthread_should_stop())
+ break;
+
+ process_ds_work();
+ }
+
+ return 0;
+}
+
+static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
+{
+ struct ds_data *dpkt = (struct ds_data *) pkt;
+ struct ds_queue_entry *qp;
+
+ qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC);
+ if (!qp) {
+ __send_ds_nack(dp, dpkt->handle);
+ } else {
+ qp->dp = dp;
+ memcpy(&qp->req, pkt, len);
+ list_add_tail(&qp->list, &ds_work_list);
+ wake_up(&ds_wait);
+ }
+ return 0;
+}
+
+static void ds_up(struct ds_info *dp)
+{
+ struct ldc_channel *lp = dp->lp;
+ struct ds_ver_req req;
+ int err;
+
+ req.tag.type = DS_INIT_REQ;
+ req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag);
+ req.ver.major = 1;
+ req.ver.minor = 0;
+
+ err = __ds_send(lp, &req, sizeof(req));
+ if (err > 0)
+ dp->hs_state = DS_HS_START;
+}
+
+static void ds_reset(struct ds_info *dp)
+{
+ int i;
+
+ dp->hs_state = 0;
+
+ for (i = 0; i < dp->num_ds_states; i++) {
+ struct ds_cap_state *cp = &dp->ds_states[i];
+
+ cp->state = CAP_STATE_UNKNOWN;
+ }
+}
+
+static void ds_event(void *arg, int event)
+{
+ struct ds_info *dp = arg;
+ struct ldc_channel *lp = dp->lp;
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&ds_lock, flags);
+
+ if (event == LDC_EVENT_UP) {
+ ds_up(dp);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ if (event == LDC_EVENT_RESET) {
+ ds_reset(dp);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ if (event != LDC_EVENT_DATA_READY) {
+ printk(KERN_WARNING "ds-%lu: Unexpected LDC event %d\n",
+ dp->id, event);
+ spin_unlock_irqrestore(&ds_lock, flags);
+ return;
+ }
+
+ err = 0;
+ while (1) {
+ struct ds_msg_tag *tag;
+
+ err = ldc_read(lp, dp->rcv_buf, sizeof(*tag));
+
+ if (unlikely(err < 0)) {
+ if (err == -ECONNRESET)
+ ds_conn_reset(dp);
+ break;
+ }
+ if (err == 0)
+ break;
+
+ tag = dp->rcv_buf;
+ err = ldc_read(lp, tag + 1, tag->len);
+
+ if (unlikely(err < 0)) {
+ if (err == -ECONNRESET)
+ ds_conn_reset(dp);
+ break;
+ }
+ if (err < tag->len)
+ break;
+
+ if (tag->type < DS_DATA)
+ err = ds_handshake(dp, dp->rcv_buf);
+ else
+ err = ds_data(dp, dp->rcv_buf,
+ sizeof(*tag) + err);
+ if (err == -ECONNRESET)
+ break;
+ }
+
+ spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+static int __devinit ds_probe(struct vio_dev *vdev,
+ const struct vio_device_id *id)
+{
+ static int ds_version_printed;
+ struct ldc_channel_config ds_cfg = {
+ .event = ds_event,
+ .mtu = 4096,
+ .mode = LDC_MODE_STREAM,
+ };
+ struct mdesc_handle *hp;
+ struct ldc_channel *lp;
+ struct ds_info *dp;
+ const u64 *val;
+ int err, i;
+
+ if (ds_version_printed++ == 0)
+ printk(KERN_INFO "%s", version);
+
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!dp)
+ goto out_err;
+
+ hp = mdesc_grab();
+ val = mdesc_get_property(hp, vdev->mp, "id", NULL);
+ if (val)
+ dp->id = *val;
+ mdesc_release(hp);
+
+ dp->rcv_buf = kzalloc(4096, GFP_KERNEL);
+ if (!dp->rcv_buf)
+ goto out_free_dp;
+
+ dp->rcv_buf_len = 4096;
+
+ dp->ds_states = kzalloc(sizeof(ds_states_template),
+ GFP_KERNEL);
+ if (!dp->ds_states)
+ goto out_free_rcv_buf;
+
+ memcpy(dp->ds_states, ds_states_template,
+ sizeof(ds_states_template));
+ dp->num_ds_states = ARRAY_SIZE(ds_states_template);
+
+ for (i = 0; i < dp->num_ds_states; i++)
+ dp->ds_states[i].handle = ((u64)i << 32);
+
+ ds_cfg.tx_irq = vdev->tx_irq;
+ ds_cfg.rx_irq = vdev->rx_irq;
+
+ lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
+ if (IS_ERR(lp)) {
+ err = PTR_ERR(lp);
+ goto out_free_ds_states;
+ }
+ dp->lp = lp;
+
+ err = ldc_bind(lp, "DS");
+ if (err)
+ goto out_free_ldc;
+
+ spin_lock_irq(&ds_lock);
+ dp->next = ds_info_list;
+ ds_info_list = dp;
+ spin_unlock_irq(&ds_lock);
+
+ start_powerd();
+
+ return err;
+
+out_free_ldc:
+ ldc_free(dp->lp);
+
+out_free_ds_states:
+ kfree(dp->ds_states);
+
+out_free_rcv_buf:
+ kfree(dp->rcv_buf);
+
+out_free_dp:
+ kfree(dp);
+
+out_err:
+ return err;
+}
+
+static int ds_remove(struct vio_dev *vdev)
+{
+ return 0;
+}
+
+static struct vio_device_id ds_match[] = {
+ {
+ .type = "domain-services-port",
+ },
+ {},
+};
+
+static struct vio_driver ds_driver = {
+ .id_table = ds_match,
+ .probe = ds_probe,
+ .remove = ds_remove,
+ .driver = {
+ .name = "ds",
+ .owner = THIS_MODULE,
+ }
+};
+
+static int __init ds_init(void)
+{
+ kthread_run(ds_thread, NULL, "kldomd");
+
+ return vio_register_driver(&ds_driver);
+}
+
+subsys_initcall(ds_init);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/smp.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/smp.c
@@ -1,6 +1,6 @@
/* smp.c: Sparc64 SMP support.
*
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
*/
#include
@@ -28,6 +28,8 @@
#include
#include
#include
+#include
+#include
#include
#include
@@ -41,22 +43,26 @@
#include
#include
#include
+#include
+#include
extern void calibrate_delay(void);
int sparc64_multi_core __read_mostly;
-/* Please don't make this stuff initdata!!! --DaveM */
-unsigned char boot_cpu_id;
-
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+EXPORT_SYMBOL(cpu_possible_map);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(cpu_core_map);
+
static cpumask_t smp_commenced_mask;
-static cpumask_t cpu_callout_map;
void smp_info(struct seq_file *m)
{
@@ -73,18 +79,17 @@
for_each_online_cpu(i)
seq_printf(m,
- "Cpu%dBogo\t: %lu.%02lu\n"
"Cpu%dClkTck\t: %016lx\n",
- i, cpu_data(i).udelay_val / (500000/HZ),
- (cpu_data(i).udelay_val / (5000/HZ)) % 100,
i, cpu_data(i).clock_tick);
}
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
+
extern void setup_sparc64_timer(void);
static volatile unsigned long callin_flag = 0;
-void __init smp_callin(void)
+void __devinit smp_callin(void)
{
int cpuid = hard_smp_processor_id();
@@ -102,8 +107,6 @@
local_irq_enable();
- calibrate_delay();
- cpu_data(cpuid).udelay_val = loops_per_jiffy;
callin_flag = 1;
__asm__ __volatile__("membar #Sync\n\t"
"flush %%g6" : : : "memory");
@@ -120,7 +123,9 @@
while (!cpu_isset(cpuid, smp_commenced_mask))
rmb();
+ spin_lock(&call_lock);
cpu_set(cpuid, cpu_online_map);
+ spin_unlock(&call_lock);
/* idle thread is expected to have preempt disabled */
preempt_disable();
@@ -268,7 +273,66 @@
spin_unlock_irqrestore(&itc_sync_lock, flags);
}
-extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+/* XXX Put this in some common place. XXX */
+static unsigned long kimage_addr_to_ra(void *p)
+{
+ unsigned long val = (unsigned long) p;
+
+ return kern_base + (val - KERNBASE);
+}
+
+static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+{
+ extern unsigned long sparc64_ttable_tl0;
+ extern unsigned long kern_locked_tte_data;
+ extern int bigkernel;
+ struct hvtramp_descr *hdesc;
+ unsigned long trampoline_ra;
+ struct trap_per_cpu *tb;
+ u64 tte_vaddr, tte_data;
+ unsigned long hv_err;
+
+ hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
+ if (!hdesc) {
+ printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
+ "hvtramp_descr.\n");
+ return;
+ }
+
+ hdesc->cpu = cpu;
+ hdesc->num_mappings = (bigkernel ? 2 : 1);
+
+ tb = &trap_block[cpu];
+ tb->hdesc = hdesc;
+
+ hdesc->fault_info_va = (unsigned long) &tb->fault_info;
+ hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
+
+ hdesc->thread_reg = thread_reg;
+
+ tte_vaddr = (unsigned long) KERNBASE;
+ tte_data = kern_locked_tte_data;
+
+ hdesc->maps[0].vaddr = tte_vaddr;
+ hdesc->maps[0].tte = tte_data;
+ if (bigkernel) {
+ tte_vaddr += 0x400000;
+ tte_data += 0x400000;
+ hdesc->maps[1].vaddr = tte_vaddr;
+ hdesc->maps[1].tte = tte_data;
+ }
+
+ trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
+
+ hv_err = sun4v_cpu_start(cpu, trampoline_ra,
+ kimage_addr_to_ra(&sparc64_ttable_tl0),
+ __pa(hdesc));
+ if (hv_err)
+ printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
+ "gives error %lu\n", hv_err);
+}
+#endif
extern unsigned long sparc64_cpu_startup;
@@ -280,6 +344,7 @@
static int __devinit smp_boot_one_cpu(unsigned int cpu)
{
+ struct trap_per_cpu *tb = &trap_block[cpu];
unsigned long entry =
(unsigned long)(&sparc64_cpu_startup);
unsigned long cookie =
@@ -288,22 +353,26 @@
int timeout, ret;
p = fork_idle(cpu);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
callin_flag = 0;
cpu_new_thread = task_thread_info(p);
- cpu_set(cpu, cpu_callout_map);
if (tlb_type == hypervisor) {
- /* Alloc the mondo queues, cpu will load them. */
- sun4v_init_mondo_queues(0, cpu, 1, 0);
-
- prom_startcpu_cpuid(cpu, entry, cookie);
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+ if (ldom_domaining_enabled)
+ ldom_startcpu_cpuid(cpu,
+ (unsigned long) cpu_new_thread);
+ else
+#endif
+ prom_startcpu_cpuid(cpu, entry, cookie);
} else {
struct device_node *dp = of_find_node_by_cpuid(cpu);
prom_startcpu(dp->node, entry, cookie);
}
- for (timeout = 0; timeout < 5000000; timeout++) {
+ for (timeout = 0; timeout < 50000; timeout++) {
if (callin_flag)
break;
udelay(100);
@@ -313,11 +382,15 @@
ret = 0;
} else {
printk("Processor %d is stuck.\n", cpu);
- cpu_clear(cpu, cpu_callout_map);
ret = -ENODEV;
}
cpu_new_thread = NULL;
+ if (tb->hdesc) {
+ kfree(tb->hdesc);
+ tb->hdesc = NULL;
+ }
+
return ret;
}
@@ -403,7 +476,7 @@
*/
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
- u64 pstate, ver;
+ u64 pstate, ver, busy_mask;
int nack_busy_id, is_jbus, need_more;
if (cpus_empty(mask))
@@ -435,14 +508,20 @@
"i" (ASI_INTR_W));
nack_busy_id = 0;
+ busy_mask = 0;
{
int i;
for_each_cpu_mask(i, mask) {
u64 target = (i << 14) | 0x70;
- if (!is_jbus)
+ if (is_jbus) {
+ busy_mask |= (0x1UL << (i * 2));
+ } else {
target |= (nack_busy_id << 24);
+ busy_mask |= (0x1UL <<
+ (nack_busy_id * 2));
+ }
__asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t"
"membar #Sync\n\t"
@@ -458,15 +537,16 @@
/* Now, poll for completion. */
{
- u64 dispatch_stat;
+ u64 dispatch_stat, nack_mask;
long stuck;
stuck = 100000 * nack_busy_id;
+ nack_mask = busy_mask << 1;
do {
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (dispatch_stat)
: "i" (ASI_INTR_DISPATCH_STAT));
- if (dispatch_stat == 0UL) {
+ if (!(dispatch_stat & (busy_mask | nack_mask))) {
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
if (unlikely(need_more)) {
@@ -483,12 +563,12 @@
}
if (!--stuck)
break;
- } while (dispatch_stat & 0x5555555555555555UL);
+ } while (dispatch_stat & busy_mask);
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
- if ((dispatch_stat & ~(0x5555555555555555UL)) == 0) {
+ if (dispatch_stat & busy_mask) {
/* Busy bits will not clear, continue instead
* of freezing up on this cpu.
*/
@@ -720,7 +800,6 @@
int wait;
};
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
static struct call_data_struct *call_data;
extern unsigned long xcall_call_function;
@@ -1152,11 +1231,6 @@
preempt_enable();
}
-void __init smp_tick_init(void)
-{
- boot_cpu_id = hard_smp_processor_id();
-}
-
/* /proc/profile writes can call this, don't __init it please. */
int setup_profiling_timer(unsigned int multiplier)
{
@@ -1189,23 +1263,8 @@
smallest / 1024U / 1024U);
}
-/* Constrain the number of cpus to max_cpus. */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- int i;
-
- if (num_possible_cpus() > max_cpus) {
- for_each_possible_cpu(i) {
- if (i != boot_cpu_id) {
- cpu_clear(i, phys_cpu_present_map);
- cpu_clear(i, cpu_present_map);
- if (num_possible_cpus() <= max_cpus)
- break;
- }
- }
- }
-
- cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
smp_tune_scheduling();
}
@@ -1217,30 +1276,32 @@
{
unsigned int i;
- for_each_possible_cpu(i) {
+ for_each_present_cpu(i) {
unsigned int j;
+ cpus_clear(cpu_core_map[i]);
if (cpu_data(i).core_id == 0) {
cpu_set(i, cpu_core_map[i]);
continue;
}
- for_each_possible_cpu(j) {
+ for_each_present_cpu(j) {
if (cpu_data(i).core_id ==
cpu_data(j).core_id)
cpu_set(j, cpu_core_map[i]);
}
}
- for_each_possible_cpu(i) {
+ for_each_present_cpu(i) {
unsigned int j;
+ cpus_clear(cpu_sibling_map[i]);
if (cpu_data(i).proc_id == -1) {
cpu_set(i, cpu_sibling_map[i]);
continue;
}
- for_each_possible_cpu(j) {
+ for_each_present_cpu(j) {
if (cpu_data(i).proc_id ==
cpu_data(j).proc_id)
cpu_set(j, cpu_sibling_map[i]);
@@ -1269,18 +1330,112 @@
return ret;
}
-void __init smp_cpus_done(unsigned int max_cpus)
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_play_dead(void)
{
- unsigned long bogosum = 0;
+ int cpu = smp_processor_id();
+ unsigned long pstate;
+
+ idle_task_exit();
+
+ if (tlb_type == hypervisor) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
+ tb->cpu_mondo_pa, 0);
+ sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
+ tb->dev_mondo_pa, 0);
+ sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
+ tb->resum_mondo_pa, 0);
+ sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
+ tb->nonresum_mondo_pa, 0);
+ }
+
+ cpu_clear(cpu, smp_commenced_mask);
+ membar_safe("#Sync");
+
+ local_irq_disable();
+
+ __asm__ __volatile__(
+ "rdpr %%pstate, %0\n\t"
+ "wrpr %0, %1, %%pstate"
+ : "=r" (pstate)
+ : "i" (PSTATE_IE));
+
+ while (1)
+ barrier();
+}
+
+int __cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+ cpuinfo_sparc *c;
int i;
- for_each_online_cpu(i)
- bogosum += cpu_data(i).udelay_val;
- printk("Total of %ld processors activated "
- "(%lu.%02lu BogoMIPS).\n",
- (long) num_online_cpus(),
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
+ for_each_cpu_mask(i, cpu_core_map[cpu])
+ cpu_clear(cpu, cpu_core_map[i]);
+ cpus_clear(cpu_core_map[cpu]);
+
+ for_each_cpu_mask(i, cpu_sibling_map[cpu])
+ cpu_clear(cpu, cpu_sibling_map[i]);
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ c = &cpu_data(cpu);
+
+ c->core_id = 0;
+ c->proc_id = -1;
+
+ spin_lock(&call_lock);
+ cpu_clear(cpu, cpu_online_map);
+ spin_unlock(&call_lock);
+
+ smp_wmb();
+
+ /* Make sure no interrupts point to this cpu. */
+ fixup_irqs();
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ int i;
+
+ for (i = 0; i < 100; i++) {
+ smp_rmb();
+ if (!cpu_isset(cpu, smp_commenced_mask))
+ break;
+ msleep(100);
+ }
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+ } else {
+#if defined(CONFIG_SUN_LDOMS)
+ unsigned long hv_err;
+ int limit = 100;
+
+ do {
+ hv_err = sun4v_cpu_stop(cpu);
+ if (hv_err == HV_EOK) {
+ cpu_clear(cpu, cpu_present_map);
+ break;
+ }
+ } while (--limit > 0);
+ if (limit <= 0) {
+ printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
+ hv_err);
+ }
+#endif
+ }
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
}
void smp_send_reschedule(int cpu)
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/vio.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/vio.c
@@ -0,0 +1,453 @@
+/* vio.c: Virtual I/O channel devices probing infrastructure.
+ *
+ * Copyright (c) 2003-2005 IBM Corp.
+ * Dave Engebretsen engebret@us.ibm.com
+ * Santiago Leon santil@us.ibm.com
+ * Hollis Blanchard
+ * Stephen Rothwell
+ *
+ * Adapted to sparc64 by David S. Miller davem@davemloft.net
+ */
+
+#include
+#include
+#include
+
+#include
+#include
+
+static const struct vio_device_id *vio_match_device(
+ const struct vio_device_id *matches,
+ const struct vio_dev *dev)
+{
+ const char *type, *compat;
+ int len;
+
+ type = dev->type;
+ compat = dev->compat;
+ len = dev->compat_len;
+
+ while (matches->type[0] || matches->compat[0]) {
+ int match = 1;
+ if (matches->type[0])
+ match &= !strcmp(matches->type, type);
+
+ if (matches->compat[0]) {
+ match &= len &&
+ of_find_in_proplist(compat, matches->compat, len);
+ }
+ if (match)
+ return matches;
+ matches++;
+ }
+ return NULL;
+}
+
+static int vio_bus_match(struct device *dev, struct device_driver *drv)
+{
+ struct vio_dev *vio_dev = to_vio_dev(dev);
+ struct vio_driver *vio_drv = to_vio_driver(drv);
+ const struct vio_device_id *matches = vio_drv->id_table;
+
+ if (!matches)
+ return 0;
+
+ return vio_match_device(matches, vio_dev) != NULL;
+}
+
+static int vio_device_probe(struct device *dev)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+ struct vio_driver *drv = to_vio_driver(dev->driver);
+ const struct vio_device_id *id;
+ int error = -ENODEV;
+
+ if (drv->probe) {
+ id = vio_match_device(drv->id_table, vdev);
+ if (id)
+ error = drv->probe(vdev, id);
+ }
+
+ return error;
+}
+
+static int vio_device_remove(struct device *dev)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+ struct vio_driver *drv = to_vio_driver(dev->driver);
+
+ if (drv->remove)
+ return drv->remove(vdev);
+
+ return 1;
+}
+
+static ssize_t devspec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+ const char *str = "none";
+
+ if (!strcmp(vdev->type, "vnet-port"))
+ str = "vnet";
+ else if (!strcmp(vdev->type, "vdc-port"))
+ str = "vdisk";
+
+ return sprintf(buf, "%s\n", str);
+}
+
+static ssize_t type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+ return sprintf(buf, "%s\n", vdev->type);
+}
+
+static struct device_attribute vio_dev_attrs[] = {
+ __ATTR_RO(devspec),
+ __ATTR_RO(type),
+ __ATTR_NULL
+};
+
+static struct bus_type vio_bus_type = {
+ .name = "vio",
+ .dev_attrs = vio_dev_attrs,
+ .match = vio_bus_match,
+ .probe = vio_device_probe,
+ .remove = vio_device_remove,
+};
+
+int vio_register_driver(struct vio_driver *viodrv)
+{
+ viodrv->driver.bus = &vio_bus_type;
+
+ return driver_register(&viodrv->driver);
+}
+EXPORT_SYMBOL(vio_register_driver);
+
+void vio_unregister_driver(struct vio_driver *viodrv)
+{
+ driver_unregister(&viodrv->driver);
+}
+EXPORT_SYMBOL(vio_unregister_driver);
+
+static void __devinit vio_dev_release(struct device *dev)
+{
+ kfree(to_vio_dev(dev));
+}
+
+static ssize_t
+show_pciobppath_attr(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct vio_dev *vdev;
+ struct device_node *dp;
+
+ vdev = to_vio_dev(dev);
+ dp = vdev->dp;
+
+ return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
+}
+
+static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH,
+ show_pciobppath_attr, NULL);
+
+struct device_node *cdev_node;
+
+static struct vio_dev *root_vdev;
+static u64 cdev_cfg_handle;
+
+static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp,
+ struct vio_dev *vdev)
+{
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ const u64 *chan_id;
+ const u64 *irq;
+ u64 target;
+
+ target = mdesc_arc_target(hp, a);
+
+ irq = mdesc_get_property(hp, target, "tx-ino", NULL);
+ if (irq)
+ vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
+
+ irq = mdesc_get_property(hp, target, "rx-ino", NULL);
+ if (irq)
+ vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
+
+ chan_id = mdesc_get_property(hp, target, "id", NULL);
+ if (chan_id)
+ vdev->channel_id = *chan_id;
+ }
+}
+
+static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
+ struct device *parent)
+{
+ const char *type, *compat, *bus_id_name;
+ struct device_node *dp;
+ struct vio_dev *vdev;
+ int err, tlen, clen;
+ const u64 *id, *cfg_handle;
+ u64 a;
+
+ type = mdesc_get_property(hp, mp, "device-type", &tlen);
+ if (!type) {
+ type = mdesc_get_property(hp, mp, "name", &tlen);
+ if (!type) {
+ type = mdesc_node_name(hp, mp);
+ tlen = strlen(type) + 1;
+ }
+ }
+ if (tlen > VIO_MAX_TYPE_LEN) {
+ printk(KERN_ERR "VIO: Type string [%s] is too long.\n",
+ type);
+ return NULL;
+ }
+
+ id = mdesc_get_property(hp, mp, "id", NULL);
+
+ cfg_handle = NULL;
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 target;
+
+ target = mdesc_arc_target(hp, a);
+ cfg_handle = mdesc_get_property(hp, target,
+ "cfg-handle", NULL);
+ if (cfg_handle)
+ break;
+ }
+
+ bus_id_name = type;
+ if (!strcmp(type, "domain-services-port"))
+ bus_id_name = "ds";
+
+ if (strlen(bus_id_name) >= KOBJ_NAME_LEN - 4) {
+ printk(KERN_ERR "VIO: bus_id_name [%s] is too long.\n",
+ bus_id_name);
+ return NULL;
+ }
+
+ compat = mdesc_get_property(hp, mp, "device-type", &clen);
+ if (!compat) {
+ clen = 0;
+ } else if (clen > VIO_MAX_COMPAT_LEN) {
+ printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n",
+ clen, type);
+ return NULL;
+ }
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev) {
+ printk(KERN_ERR "VIO: Could not allocate vio_dev\n");
+ return NULL;
+ }
+
+ vdev->mp = mp;
+ memcpy(vdev->type, type, tlen);
+ if (compat)
+ memcpy(vdev->compat, compat, clen);
+ else
+ memset(vdev->compat, 0, sizeof(vdev->compat));
+ vdev->compat_len = clen;
+
+ vdev->channel_id = ~0UL;
+ vdev->tx_irq = ~0;
+ vdev->rx_irq = ~0;
+
+ vio_fill_channel_info(hp, mp, vdev);
+
+ if (!id) {
+ snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s",
+ bus_id_name);
+ vdev->dev_no = ~(u64)0;
+ } else if (!cfg_handle) {
+ snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s-%lu",
+ bus_id_name, *id);
+ vdev->dev_no = *id;
+ } else {
+ snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s-%lu-%lu",
+ bus_id_name, *cfg_handle, *id);
+ vdev->dev_no = *cfg_handle;
+ }
+
+ vdev->dev.parent = parent;
+ vdev->dev.bus = &vio_bus_type;
+ vdev->dev.release = vio_dev_release;
+
+ if (parent == NULL) {
+ dp = cdev_node;
+ } else if (to_vio_dev(parent) == root_vdev) {
+ dp = of_get_next_child(cdev_node, NULL);
+ while (dp) {
+ if (!strcmp(dp->type, type))
+ break;
+
+ dp = of_get_next_child(cdev_node, dp);
+ }
+ } else {
+ dp = to_vio_dev(parent)->dp;
+ }
+ vdev->dp = dp;
+
+ printk(KERN_INFO "VIO: Adding device %s\n", vdev->dev.bus_id);
+
+ err = device_register(&vdev->dev);
+ if (err) {
+ printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
+ vdev->dev.bus_id, err);
+ kfree(vdev);
+ return NULL;
+ }
+ if (vdev->dp)
+ err = sysfs_create_file(&vdev->dev.kobj,
+ &dev_attr_obppath.attr);
+
+ return vdev;
+}
+
+static void vio_add(struct mdesc_handle *hp, u64 node)
+{
+ (void) vio_create_one(hp, node, &root_vdev->dev);
+}
+
+static int vio_md_node_match(struct device *dev, void *arg)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+
+ if (vdev->mp == (u64) arg)
+ return 1;
+
+ return 0;
+}
+
+static void vio_remove(struct mdesc_handle *hp, u64 node)
+{
+ struct device *dev;
+
+ dev = device_find_child(&root_vdev->dev, (void *) node,
+ vio_md_node_match);
+ if (dev) {
+ printk(KERN_INFO "VIO: Removing device %s\n", dev->bus_id);
+
+ device_unregister(dev);
+ }
+}
+
+static struct mdesc_notifier_client vio_device_notifier = {
+ .add = vio_add,
+ .remove = vio_remove,
+ .node_name = "virtual-device-port",
+};
+
+/* We are only interested in domain service ports under the
+ * "domain-services" node. On control nodes there is another port
+ * under "openboot" that we should not mess with as aparently that is
+ * reserved exclusively for OBP use.
+ */
+static void vio_add_ds(struct mdesc_handle *hp, u64 node)
+{
+ int found;
+ u64 a;
+
+ found = 0;
+ mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+ u64 target = mdesc_arc_target(hp, a);
+ const char *name = mdesc_node_name(hp, target);
+
+ if (!strcmp(name, "domain-services")) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found)
+ (void) vio_create_one(hp, node, &root_vdev->dev);
+}
+
+static struct mdesc_notifier_client vio_ds_notifier = {
+ .add = vio_add_ds,
+ .remove = vio_remove,
+ .node_name = "domain-services-port",
+};
+
+const char *channel_devices_node = "channel-devices";
+const char *channel_devices_compat = "SUNW,sun4v-channel-devices";
+const char *cfg_handle_prop = "cfg-handle";
+
+static int __init vio_init(void)
+{
+ struct mdesc_handle *hp;
+ const char *compat;
+ const u64 *cfg_handle;
+ int err, len;
+ u64 root;
+
+ err = bus_register(&vio_bus_type);
+ if (err) {
+ printk(KERN_ERR "VIO: Could not register bus type err=%d\n",
+ err);
+ return err;
+ }
+
+ hp = mdesc_grab();
+ if (!hp)
+ return 0;
+
+ root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node);
+ if (root == MDESC_NODE_NULL) {
+ printk(KERN_INFO "VIO: No channel-devices MDESC node.\n");
+ mdesc_release(hp);
+ return 0;
+ }
+
+ cdev_node = of_find_node_by_name(NULL, "channel-devices");
+ err = -ENODEV;
+ if (!cdev_node) {
+ printk(KERN_INFO "VIO: No channel-devices OBP node.\n");
+ goto out_release;
+ }
+
+ compat = mdesc_get_property(hp, root, "compatible", &len);
+ if (!compat) {
+ printk(KERN_ERR "VIO: Channel devices lacks compatible "
+ "property\n");
+ goto out_release;
+ }
+ if (!of_find_in_proplist(compat, channel_devices_compat, len)) {
+ printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
+ "compat entry.\n", channel_devices_compat);
+ goto out_release;
+ }
+
+ cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL);
+ if (!cfg_handle) {
+ printk(KERN_ERR "VIO: Channel devices lacks %s property\n",
+ cfg_handle_prop);
+ goto out_release;
+ }
+
+ cdev_cfg_handle = *cfg_handle;
+
+ root_vdev = vio_create_one(hp, root, NULL);
+ err = -ENODEV;
+ if (!root_vdev) {
+ printk(KERN_ERR "VIO: Coult not create root device.\n");
+ goto out_release;
+ }
+
+ mdesc_register_notifier(&vio_device_notifier);
+ mdesc_register_notifier(&vio_ds_notifier);
+
+ mdesc_release(hp);
+
+ return err;
+
+out_release:
+ mdesc_release(hp);
+ return err;
+}
+
+postcore_initcall(vio_init);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/pci_fire.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/pci_fire.c
@@ -6,9 +6,12 @@
#include
#include
#include
+#include
+#include
#include
#include
+#include
#include "pci_impl.h"
@@ -80,6 +83,440 @@
fire_write(iommu->iommu_control, control);
}
+#ifdef CONFIG_PCI_MSI
+struct pci_msiq_entry {
+ u64 word0;
+#define MSIQ_WORD0_RESV 0x8000000000000000UL
+#define MSIQ_WORD0_FMT_TYPE 0x7f00000000000000UL
+#define MSIQ_WORD0_FMT_TYPE_SHIFT 56
+#define MSIQ_WORD0_LEN 0x00ffc00000000000UL
+#define MSIQ_WORD0_LEN_SHIFT 46
+#define MSIQ_WORD0_ADDR0 0x00003fff00000000UL
+#define MSIQ_WORD0_ADDR0_SHIFT 32
+#define MSIQ_WORD0_RID 0x00000000ffff0000UL
+#define MSIQ_WORD0_RID_SHIFT 16
+#define MSIQ_WORD0_DATA0 0x000000000000ffffUL
+#define MSIQ_WORD0_DATA0_SHIFT 0
+
+#define MSIQ_TYPE_MSG 0x6
+#define MSIQ_TYPE_MSI32 0xb
+#define MSIQ_TYPE_MSI64 0xf
+
+ u64 word1;
+#define MSIQ_WORD1_ADDR1 0xffffffffffff0000UL
+#define MSIQ_WORD1_ADDR1_SHIFT 16
+#define MSIQ_WORD1_DATA1 0x000000000000ffffUL
+#define MSIQ_WORD1_DATA1_SHIFT 0
+
+ u64 resv[6];
+};
+
+/* All MSI registers are offset from pbm->pbm_regs */
+#define EVENT_QUEUE_BASE_ADDR_REG 0x010000UL
+#define EVENT_QUEUE_BASE_ADDR_ALL_ONES 0xfffc000000000000UL
+
+#define EVENT_QUEUE_CONTROL_SET(EQ) (0x011000UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_CONTROL_SET_OFLOW 0x0200000000000000UL
+#define EVENT_QUEUE_CONTROL_SET_EN 0x0000100000000000UL
+
+#define EVENT_QUEUE_CONTROL_CLEAR(EQ) (0x011200UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_CONTROL_CLEAR_OF 0x0200000000000000UL
+#define EVENT_QUEUE_CONTROL_CLEAR_E2I 0x0000800000000000UL
+#define EVENT_QUEUE_CONTROL_CLEAR_DIS 0x0000100000000000UL
+
+#define EVENT_QUEUE_STATE(EQ) (0x011400UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_STATE_MASK 0x0000000000000007UL
+#define EVENT_QUEUE_STATE_IDLE 0x0000000000000001UL
+#define EVENT_QUEUE_STATE_ACTIVE 0x0000000000000002UL
+#define EVENT_QUEUE_STATE_ERROR 0x0000000000000004UL
+
+#define EVENT_QUEUE_TAIL(EQ) (0x011600UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_TAIL_OFLOW 0x0200000000000000UL
+#define EVENT_QUEUE_TAIL_VAL 0x000000000000007fUL
+
+#define EVENT_QUEUE_HEAD(EQ) (0x011800UL + (EQ) * 0x8UL)
+#define EVENT_QUEUE_HEAD_VAL 0x000000000000007fUL
+
+#define MSI_MAP(MSI) (0x020000UL + (MSI) * 0x8UL)
+#define MSI_MAP_VALID 0x8000000000000000UL
+#define MSI_MAP_EQWR_N 0x4000000000000000UL
+#define MSI_MAP_EQNUM 0x000000000000003fUL
+
+#define MSI_CLEAR(MSI) (0x028000UL + (MSI) * 0x8UL)
+#define MSI_CLEAR_EQWR_N 0x4000000000000000UL
+
+#define IMONDO_DATA0 0x02C000UL
+#define IMONDO_DATA0_DATA 0xffffffffffffffc0UL
+
+#define IMONDO_DATA1 0x02C008UL
+#define IMONDO_DATA1_DATA 0xffffffffffffffffUL
+
+#define MSI_32BIT_ADDR 0x034000UL
+#define MSI_32BIT_ADDR_VAL 0x00000000ffff0000UL
+
+#define MSI_64BIT_ADDR 0x034008UL
+#define MSI_64BIT_ADDR_VAL 0xffffffffffff0000UL
+
+/* For now this just runs as a pre-handler for the real interrupt handler.
+ * So we just walk through the queue and ACK all the entries, update the
+ * head pointer, and return.
+ *
+ * In the longer term it would be nice to do something more integrated
+ * wherein we can pass in some of this MSI info to the drivers. This
+ * would be most useful for PCIe fabric error messages, although we could
+ * invoke those directly from the loop here in order to pass the info around.
+ */
+static void pci_msi_prehandler(unsigned int ino, void *data1, void *data2)
+{
+ unsigned long msiqid, orig_head, head, type_fmt, type;
+ struct pci_pbm_info *pbm = data1;
+ struct pci_msiq_entry *base, *ep;
+
+ msiqid = (unsigned long) data2;
+
+ head = fire_read(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+
+ orig_head = head;
+ base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
+ ep = &base[head];
+ while ((ep->word0 & MSIQ_WORD0_FMT_TYPE) != 0) {
+ unsigned long msi_num;
+
+ type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
+ MSIQ_WORD0_FMT_TYPE_SHIFT);
+ type = (type_fmt >>3);
+ if (unlikely(type != MSIQ_TYPE_MSI32 &&
+ type != MSIQ_TYPE_MSI64))
+ goto bad_type;
+
+ msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
+ MSIQ_WORD0_DATA0_SHIFT);
+
+ fire_write(pbm->pbm_regs + MSI_CLEAR(msi_num),
+ MSI_CLEAR_EQWR_N);
+
+ /* Clear the entry. */
+ ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
+
+ /* Go to next entry in ring. */
+ head++;
+ if (head >= pbm->msiq_ent_count)
+ head = 0;
+ ep = &base[head];
+ }
+
+ if (likely(head != orig_head)) {
+ /* ACK entries by updating head pointer. */
+ fire_write(pbm->pbm_regs +
+ EVENT_QUEUE_HEAD(msiqid),
+ head);
+ }
+ return;
+
+bad_type:
+ printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type);
+ return;
+}
+
+static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+{
+ unsigned long size, bits_per_ulong;
+
+ bits_per_ulong = sizeof(unsigned long) * 8;
+ size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
+ size /= 8;
+ BUG_ON(size % sizeof(unsigned long));
+
+ pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
+ if (!pbm->msi_bitmap)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void msi_bitmap_free(struct pci_pbm_info *pbm)
+{
+ kfree(pbm->msi_bitmap);
+ pbm->msi_bitmap = NULL;
+}
+
+static int msi_queue_alloc(struct pci_pbm_info *pbm)
+{
+ unsigned long pages, order, i;
+
+ order = get_order(512 * 1024);
+ pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+ if (pages == 0UL) {
+ printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+ order);
+ return -ENOMEM;
+ }
+ memset((char *)pages, 0, PAGE_SIZE << order);
+ pbm->msi_queues = (void *) pages;
+
+ fire_write(pbm->pbm_regs + EVENT_QUEUE_BASE_ADDR_REG,
+ (EVENT_QUEUE_BASE_ADDR_ALL_ONES |
+ __pa(pbm->msi_queues)));
+
+ fire_write(pbm->pbm_regs + IMONDO_DATA0,
+ pbm->portid << 6);
+ fire_write(pbm->pbm_regs + IMONDO_DATA1, 0);
+
+ fire_write(pbm->pbm_regs + MSI_32BIT_ADDR,
+ pbm->msi32_start);
+ fire_write(pbm->pbm_regs + MSI_64BIT_ADDR,
+ pbm->msi64_start);
+
+ for (i = 0; i < pbm->msiq_num; i++) {
+ fire_write(pbm->pbm_regs + EVENT_QUEUE_HEAD(i), 0);
+ fire_write(pbm->pbm_regs + EVENT_QUEUE_TAIL(i), 0);
+ }
+
+ return 0;
+}
+
+static int alloc_msi(struct pci_pbm_info *pbm)
+{
+ int i;
+
+ for (i = 0; i < pbm->msi_num; i++) {
+ if (!test_and_set_bit(i, pbm->msi_bitmap))
+ return i + pbm->msi_first;
+ }
+
+ return -ENOENT;
+}
+
+static void free_msi(struct pci_pbm_info *pbm, int msi_num)
+{
+ msi_num -= pbm->msi_first;
+ clear_bit(msi_num, pbm->msi_bitmap);
+}
+
+static int pci_setup_msi_irq(unsigned int *virt_irq_p,
+ struct pci_dev *pdev,
+ struct msi_desc *entry)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned long devino, msiqid, cregs, imap_off;
+ struct msi_msg msg;
+ int msi_num, err;
+ u64 val;
+
+ *virt_irq_p = 0;
+
+ msi_num = alloc_msi(pbm);
+ if (msi_num < 0)
+ return msi_num;
+
+ cregs = (unsigned long) pbm->pbm_regs;
+
+ err = sun4u_build_msi(pbm->portid, virt_irq_p,
+ pbm->msiq_first_devino,
+ (pbm->msiq_first_devino +
+ pbm->msiq_num),
+ cregs + 0x001000UL,
+ cregs + 0x001400UL);
+ if (err < 0)
+ goto out_err;
+ devino = err;
+
+ imap_off = 0x001000UL + (devino * 0x8UL);
+
+ val = fire_read(pbm->pbm_regs + imap_off);
+ val |= (1UL << 63) | (1UL << 6);
+ fire_write(pbm->pbm_regs + imap_off, val);
+
+ msiqid = ((devino - pbm->msiq_first_devino) +
+ pbm->msiq_first);
+
+ fire_write(pbm->pbm_regs +
+ EVENT_QUEUE_CONTROL_SET(msiqid),
+ EVENT_QUEUE_CONTROL_SET_EN);
+
+ val = fire_read(pbm->pbm_regs + MSI_MAP(msi_num));
+ val &= ~(MSI_MAP_EQNUM);
+ val |= msiqid;
+ fire_write(pbm->pbm_regs + MSI_MAP(msi_num), val);
+
+ fire_write(pbm->pbm_regs + MSI_CLEAR(msi_num),
+ MSI_CLEAR_EQWR_N);
+
+ val = fire_read(pbm->pbm_regs + MSI_MAP(msi_num));
+ val |= MSI_MAP_VALID;
+ fire_write(pbm->pbm_regs + MSI_MAP(msi_num), val);
+
+ sparc64_set_msi(*virt_irq_p, msi_num);
+
+ if (entry->msi_attrib.is_64) {
+ msg.address_hi = pbm->msi64_start >> 32;
+ msg.address_lo = pbm->msi64_start & 0xffffffff;
+ } else {
+ msg.address_hi = 0;
+ msg.address_lo = pbm->msi32_start;
+ }
+ msg.data = msi_num;
+
+ set_irq_msi(*virt_irq_p, entry);
+ write_msi_msg(*virt_irq_p, &msg);
+
+ irq_install_pre_handler(*virt_irq_p,
+ pci_msi_prehandler,
+ pbm, (void *) msiqid);
+
+ return 0;
+
+out_err:
+ free_msi(pbm, msi_num);
+ return err;
+}
+
+static void pci_teardown_msi_irq(unsigned int virt_irq,
+ struct pci_dev *pdev)
+{
+ struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+ unsigned long msiqid, msi_num;
+ u64 val;
+
+ msi_num = sparc64_get_msi(virt_irq);
+
+ val = fire_read(pbm->pbm_regs + MSI_MAP(msi_num));
+
+ msiqid = (val & MSI_MAP_EQNUM);
+
+ val &= ~MSI_MAP_VALID;
+ fire_write(pbm->pbm_regs + MSI_MAP(msi_num), val);
+
+ fire_write(pbm->pbm_regs + EVENT_QUEUE_CONTROL_CLEAR(msiqid),
+ EVENT_QUEUE_CONTROL_CLEAR_DIS);
+
+ free_msi(pbm, msi_num);
+
+ /* The sun4u_destroy_msi() will liberate the devino and thus the MSIQ
+ * allocation.
+ */
+ sun4u_destroy_msi(virt_irq);
+}
+
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+ const u32 *val;
+ int len;
+
+ val = of_get_property(pbm->prom_node, "#msi-eqs", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msiq_num = *val;
+ if (pbm->msiq_num) {
+ const struct msiq_prop {
+ u32 first_msiq;
+ u32 num_msiq;
+ u32 first_devino;
+ } *mqp;
+ const struct msi_range_prop {
+ u32 first_msi;
+ u32 num_msi;
+ } *mrng;
+ const struct addr_range_prop {
+ u32 msi32_high;
+ u32 msi32_low;
+ u32 msi32_len;
+ u32 msi64_high;
+ u32 msi64_low;
+ u32 msi64_len;
+ } *arng;
+
+ val = of_get_property(pbm->prom_node, "msi-eq-size", &len);
+ if (!val || len != 4)
+ goto no_msi;
+
+ pbm->msiq_ent_count = *val;
+
+ mqp = of_get_property(pbm->prom_node,
+ "msi-eq-to-devino", &len);
+ if (!mqp)
+ mqp = of_get_property(pbm->prom_node,
+ "msi-eq-devino", &len);
+ if (!mqp || len != sizeof(struct msiq_prop))
+ goto no_msi;
+
+ pbm->msiq_first = mqp->first_msiq;
+ pbm->msiq_first_devino = mqp->first_devino;
+
+ val = of_get_property(pbm->prom_node, "#msi", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msi_num = *val;
+
+ mrng = of_get_property(pbm->prom_node, "msi-ranges", &len);
+ if (!mrng || len != sizeof(struct msi_range_prop))
+ goto no_msi;
+ pbm->msi_first = mrng->first_msi;
+
+ val = of_get_property(pbm->prom_node, "msi-data-mask", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msi_data_mask = *val;
+
+ val = of_get_property(pbm->prom_node, "msix-data-width", &len);
+ if (!val || len != 4)
+ goto no_msi;
+ pbm->msix_data_width = *val;
+
+ arng = of_get_property(pbm->prom_node, "msi-address-ranges",
+ &len);
+ if (!arng || len != sizeof(struct addr_range_prop))
+ goto no_msi;
+ pbm->msi32_start = ((u64)arng->msi32_high << 32) |
+ (u64) arng->msi32_low;
+ pbm->msi64_start = ((u64)arng->msi64_high << 32) |
+ (u64) arng->msi64_low;
+ pbm->msi32_len = arng->msi32_len;
+ pbm->msi64_len = arng->msi64_len;
+
+ if (msi_bitmap_alloc(pbm))
+ goto no_msi;
+
+ if (msi_queue_alloc(pbm)) {
+ msi_bitmap_free(pbm);
+ goto no_msi;
+ }
+
+ printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
+ "devino[0x%x]\n",
+ pbm->name,
+ pbm->msiq_first, pbm->msiq_num,
+ pbm->msiq_ent_count,
+ pbm->msiq_first_devino);
+ printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
+ "width[%u]\n",
+ pbm->name,
+ pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
+ pbm->msix_data_width);
+ printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
+ "addr64[0x%lx:0x%x]\n",
+ pbm->name,
+ pbm->msi32_start, pbm->msi32_len,
+ pbm->msi64_start, pbm->msi64_len);
+ printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
+ pbm->name,
+ __pa(pbm->msi_queues));
+ }
+ pbm->setup_msi_irq = pci_setup_msi_irq;
+ pbm->teardown_msi_irq = pci_teardown_msi_irq;
+
+ return;
+
+no_msi:
+ pbm->msiq_num = 0;
+ printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+}
+#else /* CONFIG_PCI_MSI */
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
/* Based at pbm->controller_regs */
#define FIRE_PARITY_CONTROL 0x470010UL
#define FIRE_PARITY_ENAB 0x8000000000000000UL
@@ -204,6 +641,7 @@
pci_fire_hw_init(pbm);
pci_fire_pbm_iommu_init(pbm);
+ pci_fire_msi_init(pbm);
}
static inline int portid_compare(u32 x, u32 y)
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/Makefile
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/Makefile
@@ -18,7 +18,7 @@
obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
pci_psycho.o pci_sabre.o pci_schizo.o \
pci_sun4v.o pci_sun4v_asm.o pci_fire.o
-obj-$(CONFIG_SMP) += smp.o trampoline.o
+obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o
obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o
@@ -26,6 +26,7 @@
obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
obj-y += $(obj-yy)
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/sys_sparc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/sys_sparc.c
@@ -436,7 +436,7 @@
asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
unsigned long third, void __user *ptr, long fifth)
{
- int err;
+ long err;
/* No need for backward compatibility. We can start fresh... */
if (call <= SEMCTL) {
@@ -453,16 +453,9 @@
err = sys_semget(first, (int)second, (int)third);
goto out;
case SEMCTL: {
- union semun fourth;
- err = -EINVAL;
- if (!ptr)
- goto out;
- err = -EFAULT;
- if (get_user(fourth.__pad,
- (void __user * __user *) ptr))
- goto out;
- err = sys_semctl(first, (int)second | IPC_64,
- (int)third, fourth);
+ err = sys_semctl(first, third,
+ (int)second | IPC_64,
+ (union semun) ptr);
goto out;
}
default:
@@ -555,13 +548,13 @@
if (len >= STACK_TOP32)
return -EINVAL;
- if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
+ if (addr > STACK_TOP32 - len)
return -EINVAL;
} else {
if (len >= VA_EXCLUDE_START)
return -EINVAL;
- if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
+ if (invalid_64bit_range(addr, len))
return -EINVAL;
}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/prom.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/prom.c
@@ -276,6 +276,21 @@
}
EXPORT_SYMBOL(of_set_property);
+int of_find_in_proplist(const char *list, const char *match, int len)
+{
+ while (len > 0) {
+ int l;
+
+ if (!strcmp(list, match))
+ return 1;
+ l = strlen(list) + 1;
+ list += l;
+ len -= l;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(of_find_in_proplist);
+
static unsigned int prom_early_allocated;
static void * __init prom_early_alloc(unsigned long size)
@@ -1200,7 +1215,8 @@
if (!strcmp(dp->name, "fhc") &&
!strcmp(dp->parent->name, "central"))
return central_irq_trans_init(dp);
- if (!strcmp(dp->name, "virtual-devices"))
+ if (!strcmp(dp->name, "virtual-devices") ||
+ !strcmp(dp->name, "niu"))
return sun4v_vdev_irq_trans_init(dp);
}
@@ -1737,8 +1753,12 @@
ncpus_probed++;
#ifdef CONFIG_SMP
- if (cpuid >= NR_CPUS)
+ if (cpuid >= NR_CPUS) {
+ printk(KERN_WARNING "Ignoring CPU %d which is "
+ ">= NR_CPUS (%d)\n",
+ cpuid, NR_CPUS);
continue;
+ }
#else
/* On uniprocessor we only want the values for the
* real physical cpu the kernel booted onto, however
@@ -1808,13 +1828,67 @@
#ifdef CONFIG_SMP
cpu_set(cpuid, cpu_present_map);
- cpu_set(cpuid, phys_cpu_present_map);
+ cpu_set(cpuid, cpu_possible_map);
#endif
}
smp_fill_in_sib_core_maps();
}
+struct device_node *of_console_device;
+EXPORT_SYMBOL(of_console_device);
+
+char *of_console_path;
+EXPORT_SYMBOL(of_console_path);
+
+char *of_console_options;
+EXPORT_SYMBOL(of_console_options);
+
+static void __init of_console_init(void)
+{
+ char *msg = "OF stdout device is: %s\n";
+ struct device_node *dp;
+ const char *type;
+ phandle node;
+
+ of_console_path = prom_early_alloc(256);
+ if (prom_ihandle2path(prom_stdout, of_console_path, 256) < 0) {
+ prom_printf("Cannot obtain path of stdout.\n");
+ prom_halt();
+ }
+ of_console_options = strrchr(of_console_path, ':');
+ if (of_console_options) {
+ of_console_options++;
+ if (*of_console_options == '\0')
+ of_console_options = NULL;
+ }
+
+ node = prom_inst2pkg(prom_stdout);
+ if (!node) {
+ prom_printf("Cannot resolve stdout node from "
+ "instance %08x.\n", prom_stdout);
+ prom_halt();
+ }
+
+ dp = of_find_node_by_phandle(node);
+ type = of_get_property(dp, "device_type", NULL);
+ if (!type) {
+ prom_printf("Console stdout lacks device_type property.\n");
+ prom_halt();
+ }
+
+ if (strcmp(type, "display") && strcmp(type, "serial")) {
+ prom_printf("Console device_type is neither display "
+ "nor serial.\n");
+ prom_halt();
+ }
+
+ of_console_device = dp;
+
+ prom_printf(msg, of_console_path);
+ printk(msg, of_console_path);
+}
+
void __init prom_build_devicetree(void)
{
struct device_node **nextp;
@@ -1827,6 +1901,8 @@
allnodes->child = build_tree(allnodes,
prom_getchild(allnodes->node),
&nextp);
+ of_console_init();
+
printk("PROM: Built device tree with %u bytes of memory.\n",
prom_early_allocated);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/mdesc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/mdesc.c
@@ -6,6 +6,10 @@
#include
#include
#include
+#include
+#include
+#include
+#include
#include
#include
@@ -29,7 +33,7 @@
u32 node_sz; /* node block size */
u32 name_sz; /* name block size */
u32 data_sz; /* data block size */
-};
+} __attribute__((aligned(16)));
struct mdesc_elem {
u8 tag;
@@ -53,338 +57,536 @@
} d;
};
-static struct mdesc_hdr *main_mdesc;
-static struct mdesc_node *allnodes;
+struct mdesc_mem_ops {
+ struct mdesc_handle *(*alloc)(unsigned int mdesc_size);
+ void (*free)(struct mdesc_handle *handle);
+};
+
+struct mdesc_handle {
+ struct list_head list;
+ struct mdesc_mem_ops *mops;
+ void *self_base;
+ atomic_t refcnt;
+ unsigned int handle_size;
+ struct mdesc_hdr mdesc;
+};
-static struct mdesc_node *allnodes_tail;
-static unsigned int unique_id;
+static void mdesc_handle_init(struct mdesc_handle *hp,
+ unsigned int handle_size,
+ void *base)
+{
+ BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1));
-static struct mdesc_node **mdesc_hash;
-static unsigned int mdesc_hash_size;
+ memset(hp, 0, handle_size);
+ INIT_LIST_HEAD(&hp->list);
+ hp->self_base = base;
+ atomic_set(&hp->refcnt, 1);
+ hp->handle_size = handle_size;
+}
-static inline unsigned int node_hashfn(u64 node)
+static struct mdesc_handle * __init mdesc_bootmem_alloc(unsigned int mdesc_size)
{
- return ((unsigned int) (node ^ (node >> 8) ^ (node >> 16)))
- & (mdesc_hash_size - 1);
+ struct mdesc_handle *hp;
+ unsigned int handle_size, alloc_size;
+
+ handle_size = (sizeof(struct mdesc_handle) -
+ sizeof(struct mdesc_hdr) +
+ mdesc_size);
+ alloc_size = PAGE_ALIGN(handle_size);
+
+ hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL);
+ if (hp)
+ mdesc_handle_init(hp, handle_size, hp);
+
+ return hp;
}
-static inline void hash_node(struct mdesc_node *mp)
+static void mdesc_bootmem_free(struct mdesc_handle *hp)
{
- struct mdesc_node **head = &mdesc_hash[node_hashfn(mp->node)];
+ unsigned int alloc_size, handle_size = hp->handle_size;
+ unsigned long start, end;
- mp->hash_next = *head;
- *head = mp;
+ BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(!list_empty(&hp->list));
- if (allnodes_tail) {
- allnodes_tail->allnodes_next = mp;
- allnodes_tail = mp;
- } else {
- allnodes = allnodes_tail = mp;
+ alloc_size = PAGE_ALIGN(handle_size);
+
+ start = (unsigned long) hp;
+ end = start + alloc_size;
+
+ while (start < end) {
+ struct page *p;
+
+ p = virt_to_page(start);
+ ClearPageReserved(p);
+ __free_page(p);
+ start += PAGE_SIZE;
}
}
-static struct mdesc_node *find_node(u64 node)
+static struct mdesc_mem_ops bootmem_mdesc_ops = {
+ .alloc = mdesc_bootmem_alloc,
+ .free = mdesc_bootmem_free,
+};
+
+static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
{
- struct mdesc_node *mp = mdesc_hash[node_hashfn(node)];
+ unsigned int handle_size;
+ void *base;
- while (mp) {
- if (mp->node == node)
- return mp;
+ handle_size = (sizeof(struct mdesc_handle) -
+ sizeof(struct mdesc_hdr) +
+ mdesc_size);
+
+ base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL);
+ if (base) {
+ struct mdesc_handle *hp;
+ unsigned long addr;
+
+ addr = (unsigned long)base;
+ addr = (addr + 15UL) & ~15UL;
+ hp = (struct mdesc_handle *) addr;
- mp = mp->hash_next;
+ mdesc_handle_init(hp, handle_size, base);
+ return hp;
}
+
return NULL;
}
-struct property *md_find_property(const struct mdesc_node *mp,
- const char *name,
- int *lenp)
+static void mdesc_kfree(struct mdesc_handle *hp)
{
- struct property *pp;
+ BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(!list_empty(&hp->list));
- for (pp = mp->properties; pp != 0; pp = pp->next) {
- if (strcasecmp(pp->name, name) == 0) {
- if (lenp)
- *lenp = pp->length;
- break;
- }
- }
- return pp;
+ kfree(hp->self_base);
}
-EXPORT_SYMBOL(md_find_property);
-/*
- * Find a property with a given name for a given node
- * and return the value.
- */
-const void *md_get_property(const struct mdesc_node *mp, const char *name,
- int *lenp)
+static struct mdesc_mem_ops kmalloc_mdesc_memops = {
+ .alloc = mdesc_kmalloc,
+ .free = mdesc_kfree,
+};
+
+static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size,
+ struct mdesc_mem_ops *mops)
{
- struct property *pp = md_find_property(mp, name, lenp);
- return pp ? pp->value : NULL;
+ struct mdesc_handle *hp = mops->alloc(mdesc_size);
+
+ if (hp)
+ hp->mops = mops;
+
+ return hp;
}
-EXPORT_SYMBOL(md_get_property);
-struct mdesc_node *md_find_node_by_name(struct mdesc_node *from,
- const char *name)
+static void mdesc_free(struct mdesc_handle *hp)
{
- struct mdesc_node *mp;
+ hp->mops->free(hp);
+}
- mp = from ? from->allnodes_next : allnodes;
- for (; mp != NULL; mp = mp->allnodes_next) {
- if (strcmp(mp->name, name) == 0)
- break;
+static struct mdesc_handle *cur_mdesc;
+static LIST_HEAD(mdesc_zombie_list);
+static DEFINE_SPINLOCK(mdesc_lock);
+
+struct mdesc_handle *mdesc_grab(void)
+{
+ struct mdesc_handle *hp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ hp = cur_mdesc;
+ if (hp)
+ atomic_inc(&hp->refcnt);
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+ return hp;
+}
+EXPORT_SYMBOL(mdesc_grab);
+
+void mdesc_release(struct mdesc_handle *hp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ if (atomic_dec_and_test(&hp->refcnt)) {
+ list_del_init(&hp->list);
+ hp->mops->free(hp);
}
- return mp;
+ spin_unlock_irqrestore(&mdesc_lock, flags);
}
-EXPORT_SYMBOL(md_find_node_by_name);
+EXPORT_SYMBOL(mdesc_release);
-static unsigned int mdesc_early_allocated;
+static DEFINE_MUTEX(mdesc_mutex);
+static struct mdesc_notifier_client *client_list;
-static void * __init mdesc_early_alloc(unsigned long size)
+void mdesc_register_notifier(struct mdesc_notifier_client *client)
{
- void *ret;
+ u64 node;
- ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
- if (ret == NULL) {
- prom_printf("MDESC: alloc of %lu bytes failed.\n", size);
- prom_halt();
+ mutex_lock(&mdesc_mutex);
+ client->next = client_list;
+ client_list = client;
+
+ mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name)
+ client->add(cur_mdesc, node);
+
+ mutex_unlock(&mdesc_mutex);
+}
+
+static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node)
+{
+ const u64 *id;
+ u64 a;
+
+ id = NULL;
+ mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+ u64 target;
+
+ target = mdesc_arc_target(hp, a);
+ id = mdesc_get_property(hp, target,
+ "cfg-handle", NULL);
+ if (id)
+ break;
}
- memset(ret, 0, size);
+ return id;
+}
- mdesc_early_allocated += size;
+/* Run 'func' on nodes which are in A but not in B. */
+static void invoke_on_missing(const char *name,
+ struct mdesc_handle *a,
+ struct mdesc_handle *b,
+ void (*func)(struct mdesc_handle *, u64))
+{
+ u64 node;
- return ret;
+ mdesc_for_each_node_by_name(a, node, name) {
+ int found = 0, is_vdc_port = 0;
+ const char *name_prop;
+ const u64 *id;
+ u64 fnode;
+
+ name_prop = mdesc_get_property(a, node, "name", NULL);
+ if (name_prop && !strcmp(name_prop, "vdc-port")) {
+ is_vdc_port = 1;
+ id = parent_cfg_handle(a, node);
+ } else
+ id = mdesc_get_property(a, node, "id", NULL);
+
+ if (!id) {
+ printk(KERN_ERR "MD: Cannot find ID for %s node.\n",
+ (name_prop ? name_prop : name));
+ continue;
+ }
+
+ mdesc_for_each_node_by_name(b, fnode, name) {
+ const u64 *fid;
+
+ if (is_vdc_port) {
+ name_prop = mdesc_get_property(b, fnode,
+ "name", NULL);
+ if (!name_prop ||
+ strcmp(name_prop, "vdc-port"))
+ continue;
+ fid = parent_cfg_handle(b, fnode);
+ if (!fid) {
+ printk(KERN_ERR "MD: Cannot find ID "
+ "for vdc-port node.\n");
+ continue;
+ }
+ } else
+ fid = mdesc_get_property(b, fnode,
+ "id", NULL);
+
+ if (*id == *fid) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ func(a, node);
+ }
}
-static unsigned int __init count_arcs(struct mdesc_elem *ep)
+static void notify_one(struct mdesc_notifier_client *p,
+ struct mdesc_handle *old_hp,
+ struct mdesc_handle *new_hp)
{
- unsigned int ret = 0;
+ invoke_on_missing(p->node_name, old_hp, new_hp, p->remove);
+ invoke_on_missing(p->node_name, new_hp, old_hp, p->add);
+}
- ep++;
- while (ep->tag != MD_NODE_END) {
- if (ep->tag == MD_PROP_ARC)
- ret++;
- ep++;
+static void mdesc_notify_clients(struct mdesc_handle *old_hp,
+ struct mdesc_handle *new_hp)
+{
+ struct mdesc_notifier_client *p = client_list;
+
+ while (p) {
+ notify_one(p, old_hp, new_hp);
+ p = p->next;
}
- return ret;
}
-static void __init mdesc_node_alloc(u64 node, struct mdesc_elem *ep, const char *names)
+void mdesc_update(void)
{
- unsigned int num_arcs = count_arcs(ep);
- struct mdesc_node *mp;
+ unsigned long len, real_len, status;
+ struct mdesc_handle *hp, *orig_hp;
+ unsigned long flags;
+
+ mutex_lock(&mdesc_mutex);
- mp = mdesc_early_alloc(sizeof(*mp) +
- (num_arcs * sizeof(struct mdesc_arc)));
- mp->name = names + ep->name_offset;
- mp->node = node;
- mp->unique_id = unique_id++;
- mp->num_arcs = num_arcs;
+ (void) sun4v_mach_desc(0UL, 0UL, &len);
+
+ hp = mdesc_alloc(len, &kmalloc_mdesc_memops);
+ if (!hp) {
+ printk(KERN_ERR "MD: mdesc alloc fails\n");
+ goto out;
+ }
+
+ status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+ if (status != HV_EOK || real_len > len) {
+ printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
+ status);
+ atomic_dec(&hp->refcnt);
+ mdesc_free(hp);
+ goto out;
+ }
- hash_node(mp);
+ spin_lock_irqsave(&mdesc_lock, flags);
+ orig_hp = cur_mdesc;
+ cur_mdesc = hp;
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+ mdesc_notify_clients(orig_hp, hp);
+
+ spin_lock_irqsave(&mdesc_lock, flags);
+ if (atomic_dec_and_test(&orig_hp->refcnt))
+ mdesc_free(orig_hp);
+ else
+ list_add(&orig_hp->list, &mdesc_zombie_list);
+ spin_unlock_irqrestore(&mdesc_lock, flags);
+
+out:
+ mutex_unlock(&mdesc_mutex);
}
-static inline struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
+static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
{
return (struct mdesc_elem *) (mdesc + 1);
}
-static inline void *name_block(struct mdesc_hdr *mdesc)
+static void *name_block(struct mdesc_hdr *mdesc)
{
return ((void *) node_block(mdesc)) + mdesc->node_sz;
}
-static inline void *data_block(struct mdesc_hdr *mdesc)
+static void *data_block(struct mdesc_hdr *mdesc)
{
return ((void *) name_block(mdesc)) + mdesc->name_sz;
}
-/* In order to avoid recursion (the graph can be very deep) we use a
- * two pass algorithm. First we allocate all the nodes and hash them.
- * Then we iterate over each node, filling in the arcs and properties.
- */
-static void __init build_all_nodes(struct mdesc_hdr *mdesc)
+u64 mdesc_node_by_name(struct mdesc_handle *hp,
+ u64 from_node, const char *name)
{
- struct mdesc_elem *start, *ep;
- struct mdesc_node *mp;
- const char *names;
- void *data;
- u64 last_node;
+ struct mdesc_elem *ep = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+ u64 ret;
+
+ if (from_node == MDESC_NODE_NULL) {
+ ret = from_node = 0;
+ } else if (from_node >= last_node) {
+ return MDESC_NODE_NULL;
+ } else {
+ ret = ep[from_node].d.val;
+ }
- start = ep = node_block(mdesc);
- last_node = mdesc->node_sz / 16;
+ while (ret < last_node) {
+ if (ep[ret].tag != MD_NODE)
+ return MDESC_NODE_NULL;
+ if (!strcmp(names + ep[ret].name_offset, name))
+ break;
+ ret = ep[ret].d.val;
+ }
+ if (ret >= last_node)
+ ret = MDESC_NODE_NULL;
+ return ret;
+}
+EXPORT_SYMBOL(mdesc_node_by_name);
- names = name_block(mdesc);
+const void *mdesc_get_property(struct mdesc_handle *hp, u64 node,
+ const char *name, int *lenp)
+{
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+ void *data = data_block(&hp->mdesc);
+ struct mdesc_elem *ep;
- while (1) {
- u64 node = ep - start;
+ if (node == MDESC_NODE_NULL || node >= last_node)
+ return NULL;
- if (ep->tag == MD_LIST_END)
+ ep = node_block(&hp->mdesc) + node;
+ ep++;
+ for (; ep->tag != MD_NODE_END; ep++) {
+ void *val = NULL;
+ int len = 0;
+
+ switch (ep->tag) {
+ case MD_PROP_VAL:
+ val = &ep->d.val;
+ len = 8;
break;
- if (ep->tag != MD_NODE) {
- prom_printf("MDESC: Inconsistent element list.\n");
- prom_halt();
- }
-
- mdesc_node_alloc(node, ep, names);
+ case MD_PROP_STR:
+ case MD_PROP_DATA:
+ val = data + ep->d.data.data_offset;
+ len = ep->d.data.data_len;
+ break;
- if (ep->d.val >= last_node) {
- printk("MDESC: Warning, early break out of node scan.\n");
- printk("MDESC: Next node [%lu] last_node [%lu].\n",
- node, last_node);
+ default:
break;
}
+ if (!val)
+ continue;
- ep = start + ep->d.val;
+ if (!strcmp(names + ep->name_offset, name)) {
+ if (lenp)
+ *lenp = len;
+ return val;
+ }
}
- data = data_block(mdesc);
- for (mp = allnodes; mp; mp = mp->allnodes_next) {
- struct mdesc_elem *ep = start + mp->node;
- struct property **link = &mp->properties;
- unsigned int this_arc = 0;
-
- ep++;
- while (ep->tag != MD_NODE_END) {
- switch (ep->tag) {
- case MD_PROP_ARC: {
- struct mdesc_node *target;
-
- if (this_arc >= mp->num_arcs) {
- prom_printf("MDESC: ARC overrun [%u:%u]\n",
- this_arc, mp->num_arcs);
- prom_halt();
- }
- target = find_node(ep->d.val);
- if (!target) {
- printk("MDESC: Warning, arc points to "
- "missing node, ignoring.\n");
- break;
- }
- mp->arcs[this_arc].name =
- (names + ep->name_offset);
- mp->arcs[this_arc].arc = target;
- this_arc++;
- break;
- }
+ return NULL;
+}
+EXPORT_SYMBOL(mdesc_get_property);
- case MD_PROP_VAL:
- case MD_PROP_STR:
- case MD_PROP_DATA: {
- struct property *p = mdesc_early_alloc(sizeof(*p));
-
- p->unique_id = unique_id++;
- p->name = (char *) names + ep->name_offset;
- if (ep->tag == MD_PROP_VAL) {
- p->value = &ep->d.val;
- p->length = 8;
- } else {
- p->value = data + ep->d.data.data_offset;
- p->length = ep->d.data.data_len;
- }
- *link = p;
- link = &p->next;
- break;
- }
+u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
- case MD_NOOP:
- break;
+ if (from == MDESC_NODE_NULL || from >= last_node)
+ return MDESC_NODE_NULL;
- default:
- printk("MDESC: Warning, ignoring unknown tag type %02x\n",
- ep->tag);
- }
- ep++;
- }
+ ep = base + from;
+
+ ep++;
+ for (; ep->tag != MD_NODE_END; ep++) {
+ if (ep->tag != MD_PROP_ARC)
+ continue;
+
+ if (strcmp(names + ep->name_offset, arc_type))
+ continue;
+
+ return ep - base;
}
+
+ return MDESC_NODE_NULL;
}
+EXPORT_SYMBOL(mdesc_next_arc);
-static unsigned int __init count_nodes(struct mdesc_hdr *mdesc)
+u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc)
{
- struct mdesc_elem *ep = node_block(mdesc);
- struct mdesc_elem *end;
- unsigned int cnt = 0;
-
- end = ((void *)ep) + mdesc->node_sz;
- while (ep < end) {
- if (ep->tag == MD_NODE)
- cnt++;
- ep++;
- }
- return cnt;
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+
+ ep = base + arc;
+
+ return ep->d.val;
}
+EXPORT_SYMBOL(mdesc_arc_target);
+
+const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
+{
+ struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+ const char *names = name_block(&hp->mdesc);
+ u64 last_node = hp->mdesc.node_sz / 16;
+
+ if (node == MDESC_NODE_NULL || node >= last_node)
+ return NULL;
+
+ ep = base + node;
+ if (ep->tag != MD_NODE)
+ return NULL;
+
+ return names + ep->name_offset;
+}
+EXPORT_SYMBOL(mdesc_node_name);
static void __init report_platform_properties(void)
{
- struct mdesc_node *pn = md_find_node_by_name(NULL, "platform");
+ struct mdesc_handle *hp = mdesc_grab();
+ u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
const char *s;
const u64 *v;
- if (!pn) {
+ if (pn == MDESC_NODE_NULL) {
prom_printf("No platform node in machine-description.\n");
prom_halt();
}
- s = md_get_property(pn, "banner-name", NULL);
+ s = mdesc_get_property(hp, pn, "banner-name", NULL);
printk("PLATFORM: banner-name [%s]\n", s);
- s = md_get_property(pn, "name", NULL);
+ s = mdesc_get_property(hp, pn, "name", NULL);
printk("PLATFORM: name [%s]\n", s);
- v = md_get_property(pn, "hostid", NULL);
+ v = mdesc_get_property(hp, pn, "hostid", NULL);
if (v)
printk("PLATFORM: hostid [%08lx]\n", *v);
- v = md_get_property(pn, "serial#", NULL);
+ v = mdesc_get_property(hp, pn, "serial#", NULL);
if (v)
printk("PLATFORM: serial# [%08lx]\n", *v);
- v = md_get_property(pn, "stick-frequency", NULL);
+ v = mdesc_get_property(hp, pn, "stick-frequency", NULL);
printk("PLATFORM: stick-frequency [%08lx]\n", *v);
- v = md_get_property(pn, "mac-address", NULL);
+ v = mdesc_get_property(hp, pn, "mac-address", NULL);
if (v)
printk("PLATFORM: mac-address [%lx]\n", *v);
- v = md_get_property(pn, "watchdog-resolution", NULL);
+ v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL);
if (v)
printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v);
- v = md_get_property(pn, "watchdog-max-timeout", NULL);
+ v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL);
if (v)
printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v);
- v = md_get_property(pn, "max-cpus", NULL);
+ v = mdesc_get_property(hp, pn, "max-cpus", NULL);
if (v)
printk("PLATFORM: max-cpus [%lu]\n", *v);
-}
-static int inline find_in_proplist(const char *list, const char *match, int len)
-{
- while (len > 0) {
- int l;
+#ifdef CONFIG_SMP
+ {
+ int max_cpu, i;
- if (!strcmp(list, match))
- return 1;
- l = strlen(list) + 1;
- list += l;
- len -= l;
+ if (v) {
+ max_cpu = *v;
+ if (max_cpu > NR_CPUS)
+ max_cpu = NR_CPUS;
+ } else {
+ max_cpu = NR_CPUS;
+ }
+ for (i = 0; i < max_cpu; i++)
+ cpu_set(i, cpu_possible_map);
}
- return 0;
+#endif
+
+ mdesc_release(hp);
}
-static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp)
+static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
+ struct mdesc_handle *hp,
+ u64 mp)
{
- const u64 *level = md_get_property(mp, "level", NULL);
- const u64 *size = md_get_property(mp, "size", NULL);
- const u64 *line_size = md_get_property(mp, "line-size", NULL);
+ const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
+ const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
+ const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL);
const char *type;
int type_len;
- type = md_get_property(mp, "type", &type_len);
+ type = mdesc_get_property(hp, mp, "type", &type_len);
switch (*level) {
case 1:
- if (find_in_proplist(type, "instn", type_len)) {
+ if (of_find_in_proplist(type, "instn", type_len)) {
c->icache_size = *size;
c->icache_line_size = *line_size;
- } else if (find_in_proplist(type, "data", type_len)) {
+ } else if (of_find_in_proplist(type, "data", type_len)) {
c->dcache_size = *size;
c->dcache_line_size = *line_size;
}
@@ -400,48 +602,45 @@
}
if (*level == 1) {
- unsigned int i;
-
- for (i = 0; i < mp->num_arcs; i++) {
- struct mdesc_node *t = mp->arcs[i].arc;
+ u64 a;
- if (strcmp(mp->arcs[i].name, "fwd"))
- continue;
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 target = mdesc_arc_target(hp, a);
+ const char *name = mdesc_node_name(hp, target);
- if (!strcmp(t->name, "cache"))
- fill_in_one_cache(c, t);
+ if (!strcmp(name, "cache"))
+ fill_in_one_cache(c, hp, target);
}
}
}
-static void __init mark_core_ids(struct mdesc_node *mp, int core_id)
+static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
+ int core_id)
{
- unsigned int i;
+ u64 a;
- for (i = 0; i < mp->num_arcs; i++) {
- struct mdesc_node *t = mp->arcs[i].arc;
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
const u64 *id;
- if (strcmp(mp->arcs[i].name, "back"))
- continue;
-
- if (!strcmp(t->name, "cpu")) {
- id = md_get_property(t, "id", NULL);
+ name = mdesc_node_name(hp, t);
+ if (!strcmp(name, "cpu")) {
+ id = mdesc_get_property(hp, t, "id", NULL);
if (*id < NR_CPUS)
cpu_data(*id).core_id = core_id;
} else {
- unsigned int j;
+ u64 j;
- for (j = 0; j < t->num_arcs; j++) {
- struct mdesc_node *n = t->arcs[j].arc;
+ mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
+ u64 n = mdesc_arc_target(hp, j);
+ const char *n_name;
- if (strcmp(t->arcs[j].name, "back"))
+ n_name = mdesc_node_name(hp, n);
+ if (strcmp(n_name, "cpu"))
continue;
- if (strcmp(n->name, "cpu"))
- continue;
-
- id = md_get_property(n, "id", NULL);
+ id = mdesc_get_property(hp, n, "id", NULL);
if (*id < NR_CPUS)
cpu_data(*id).core_id = core_id;
}
@@ -449,78 +648,81 @@
}
}
-static void __init set_core_ids(void)
+static void __devinit set_core_ids(struct mdesc_handle *hp)
{
- struct mdesc_node *mp;
int idx;
+ u64 mp;
idx = 1;
- md_for_each_node_by_name(mp, "cache") {
- const u64 *level = md_get_property(mp, "level", NULL);
+ mdesc_for_each_node_by_name(hp, mp, "cache") {
+ const u64 *level;
const char *type;
int len;
+ level = mdesc_get_property(hp, mp, "level", NULL);
if (*level != 1)
continue;
- type = md_get_property(mp, "type", &len);
- if (!find_in_proplist(type, "instn", len))
+ type = mdesc_get_property(hp, mp, "type", &len);
+ if (!of_find_in_proplist(type, "instn", len))
continue;
- mark_core_ids(mp, idx);
+ mark_core_ids(hp, mp, idx);
idx++;
}
}
-static void __init mark_proc_ids(struct mdesc_node *mp, int proc_id)
+static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
+ int proc_id)
{
- int i;
+ u64 a;
- for (i = 0; i < mp->num_arcs; i++) {
- struct mdesc_node *t = mp->arcs[i].arc;
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+ u64 t = mdesc_arc_target(hp, a);
+ const char *name;
const u64 *id;
- if (strcmp(mp->arcs[i].name, "back"))
+ name = mdesc_node_name(hp, t);
+ if (strcmp(name, "cpu"))
continue;
- if (strcmp(t->name, "cpu"))
- continue;
-
- id = md_get_property(t, "id", NULL);
+ id = mdesc_get_property(hp, t, "id", NULL);
if (*id < NR_CPUS)
cpu_data(*id).proc_id = proc_id;
}
}
-static void __init __set_proc_ids(const char *exec_unit_name)
+static void __devinit __set_proc_ids(struct mdesc_handle *hp,
+ const char *exec_unit_name)
{
- struct mdesc_node *mp;
int idx;
+ u64 mp;
idx = 0;
- md_for_each_node_by_name(mp, exec_unit_name) {
+ mdesc_for_each_node_by_name(hp, mp, exec_unit_name) {
const char *type;
int len;
- type = md_get_property(mp, "type", &len);
- if (!find_in_proplist(type, "int", len) &&
- !find_in_proplist(type, "integer", len))
+ type = mdesc_get_property(hp, mp, "type", &len);
+ if (!of_find_in_proplist(type, "int", len) &&
+ !of_find_in_proplist(type, "integer", len))
continue;
- mark_proc_ids(mp, idx);
+ mark_proc_ids(hp, mp, idx);
idx++;
}
}
-static void __init set_proc_ids(void)
+static void __devinit set_proc_ids(struct mdesc_handle *hp)
{
- __set_proc_ids("exec_unit");
- __set_proc_ids("exec-unit");
+ __set_proc_ids(hp, "exec_unit");
+ __set_proc_ids(hp, "exec-unit");
}
-static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def)
+static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
+ unsigned char def)
{
u64 val;
@@ -538,42 +740,50 @@
*mask = ((1U << def) * 64U) - 1U;
}
-static void __init get_mondo_data(struct mdesc_node *mp, struct trap_per_cpu *tb)
+static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
+ struct trap_per_cpu *tb)
{
const u64 *val;
- val = md_get_property(mp, "q-cpu-mondo-#bits", NULL);
+ val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
- val = md_get_property(mp, "q-dev-mondo-#bits", NULL);
+ val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
- val = md_get_property(mp, "q-resumable-#bits", NULL);
+ val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
get_one_mondo_bits(val, &tb->resum_qmask, 6);
- val = md_get_property(mp, "q-nonresumable-#bits", NULL);
+ val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
}
-static void __init mdesc_fill_in_cpu_data(void)
+void __devinit mdesc_fill_in_cpu_data(cpumask_t mask)
{
- struct mdesc_node *mp;
+ struct mdesc_handle *hp = mdesc_grab();
+ u64 mp;
ncpus_probed = 0;
- md_for_each_node_by_name(mp, "cpu") {
- const u64 *id = md_get_property(mp, "id", NULL);
- const u64 *cfreq = md_get_property(mp, "clock-frequency", NULL);
+ mdesc_for_each_node_by_name(hp, mp, "cpu") {
+ const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
+ const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
struct trap_per_cpu *tb;
cpuinfo_sparc *c;
- unsigned int i;
int cpuid;
+ u64 a;
ncpus_probed++;
cpuid = *id;
#ifdef CONFIG_SMP
- if (cpuid >= NR_CPUS)
+ if (cpuid >= NR_CPUS) {
+ printk(KERN_WARNING "Ignoring CPU %d which is "
+ ">= NR_CPUS (%d)\n",
+ cpuid, NR_CPUS);
+ continue;
+ }
+ if (!cpu_isset(cpuid, mask))
continue;
#else
/* On uniprocessor we only want the values for the
@@ -589,35 +799,30 @@
c->clock_tick = *cfreq;
tb = &trap_block[cpuid];
- get_mondo_data(mp, tb);
-
- for (i = 0; i < mp->num_arcs; i++) {
- struct mdesc_node *t = mp->arcs[i].arc;
- unsigned int j;
+ get_mondo_data(hp, mp, tb);
- if (strcmp(mp->arcs[i].name, "fwd"))
- continue;
-
- if (!strcmp(t->name, "cache")) {
- fill_in_one_cache(c, t);
+ mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+ u64 j, t = mdesc_arc_target(hp, a);
+ const char *t_name;
+
+ t_name = mdesc_node_name(hp, t);
+ if (!strcmp(t_name, "cache")) {
+ fill_in_one_cache(c, hp, t);
continue;
}
- for (j = 0; j < t->num_arcs; j++) {
- struct mdesc_node *n;
-
- n = t->arcs[j].arc;
- if (strcmp(t->arcs[j].name, "fwd"))
- continue;
-
- if (!strcmp(n->name, "cache"))
- fill_in_one_cache(c, n);
+ mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
+ u64 n = mdesc_arc_target(hp, j);
+ const char *n_name;
+
+ n_name = mdesc_node_name(hp, n);
+ if (!strcmp(n_name, "cache"))
+ fill_in_one_cache(c, hp, n);
}
}
#ifdef CONFIG_SMP
cpu_set(cpuid, cpu_present_map);
- cpu_set(cpuid, phys_cpu_present_map);
#endif
c->core_id = 0;
@@ -628,45 +833,80 @@
sparc64_multi_core = 1;
#endif
- set_core_ids();
- set_proc_ids();
+ set_core_ids(hp);
+ set_proc_ids(hp);
smp_fill_in_sib_core_maps();
+
+ mdesc_release(hp);
}
+static ssize_t mdesc_read(struct file *file, char __user *buf,
+ size_t len, loff_t *offp)
+{
+ struct mdesc_handle *hp = mdesc_grab();
+ int err;
+
+ if (!hp)
+ return -ENODEV;
+
+ err = hp->handle_size;
+ if (len < hp->handle_size)
+ err = -EMSGSIZE;
+ else if (copy_to_user(buf, &hp->mdesc, hp->handle_size))
+ err = -EFAULT;
+ mdesc_release(hp);
+
+ return err;
+}
+
+static const struct file_operations mdesc_fops = {
+ .read = mdesc_read,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice mdesc_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "mdesc",
+ .fops = &mdesc_fops,
+};
+
+static int __init mdesc_misc_init(void)
+{
+ return misc_register(&mdesc_misc);
+}
+
+__initcall(mdesc_misc_init);
+
void __init sun4v_mdesc_init(void)
{
+ struct mdesc_handle *hp;
unsigned long len, real_len, status;
+ cpumask_t mask;
(void) sun4v_mach_desc(0UL, 0UL, &len);
printk("MDESC: Size is %lu bytes.\n", len);
- main_mdesc = mdesc_early_alloc(len);
+ hp = mdesc_alloc(len, &bootmem_mdesc_ops);
+ if (hp == NULL) {
+ prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
+ prom_halt();
+ }
- status = sun4v_mach_desc(__pa(main_mdesc), len, &real_len);
+ status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
if (status != HV_EOK || real_len > len) {
prom_printf("sun4v_mach_desc fails, err(%lu), "
"len(%lu), real_len(%lu)\n",
status, len, real_len);
+ mdesc_free(hp);
prom_halt();
}
- len = count_nodes(main_mdesc);
- printk("MDESC: %lu nodes.\n", len);
-
- len = roundup_pow_of_two(len);
-
- mdesc_hash = mdesc_early_alloc(len * sizeof(struct mdesc_node *));
- mdesc_hash_size = len;
-
- printk("MDESC: Hash size %lu entries.\n", len);
-
- build_all_nodes(main_mdesc);
-
- printk("MDESC: Built graph with %u bytes of memory.\n",
- mdesc_early_allocated);
+ cur_mdesc = hp;
report_platform_properties();
- mdesc_fill_in_cpu_data();
+
+ cpus_setall(mask);
+ mdesc_fill_in_cpu_data(mask);
}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/hvtramp.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/hvtramp.S
@@ -0,0 +1,137 @@
+/* hvtramp.S: Hypervisor start-cpu trampoline code.
+ *
+ * Copyright (C) 2007 David S. Miller
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+ .text
+ .align 8
+ .globl hv_cpu_startup, hv_cpu_startup_end
+
+ /* This code executes directly out of the hypervisor
+ * with physical addressing (va==pa). %o0 contains
+ * our client argument which for Linux points to
+ * a descriptor data structure which defines the
+ * MMU entries we need to load up.
+ *
+ * After we set things up we enable the MMU and call
+ * into the kernel.
+ *
+ * First setup basic privileged cpu state.
+ */
+hv_cpu_startup:
+ SET_GL(0)
+ wrpr %g0, 15, %pil
+ wrpr %g0, 0, %canrestore
+ wrpr %g0, 0, %otherwin
+ wrpr %g0, 6, %cansave
+ wrpr %g0, 6, %cleanwin
+ wrpr %g0, 0, %cwp
+ wrpr %g0, 0, %wstate
+ wrpr %g0, 0, %tl
+
+ sethi %hi(sparc64_ttable_tl0), %g1
+ wrpr %g1, %tba
+
+ mov %o0, %l0
+
+ lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
+ mov SCRATCHPAD_CPUID, %g2
+ stxa %g1, [%g2] ASI_SCRATCHPAD
+
+ ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
+ stxa %g2, [%g0] ASI_SCRATCHPAD
+
+ mov 0, %l1
+ lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
+ add %l0, HVTRAMP_DESCR_MAPS, %l3
+
+1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
+ clr %o1
+ ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
+ mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
+ mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
+ ta HV_FAST_TRAP
+
+ brnz,pn %o0, 80f
+ nop
+
+ add %l1, 1, %l1
+ cmp %l1, %l2
+ blt,a,pt %xcc, 1b
+ add %l3, HVTRAMP_MAPPING_SIZE, %l3
+
+ ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
+ mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
+ ta HV_FAST_TRAP
+
+ brnz,pn %o0, 80f
+ nop
+
+ wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
+
+ ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
+
+ mov 1, %o0
+ set 1f, %o1
+ mov HV_FAST_MMU_ENABLE, %o5
+ ta HV_FAST_TRAP
+
+ ba,pt %xcc, 80f
+ nop
+
+1:
+ wr %g0, 0, %fprs
+ wr %g0, ASI_P, %asi
+
+ mov PRIMARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov SECONDARY_CONTEXT, %g7
+ stxa %g0, [%g7] ASI_MMU
+ membar #Sync
+
+ mov %l6, %g6
+ ldx [%g6 + TI_TASK], %g4
+
+ mov 1, %g5
+ sllx %g5, THREAD_SHIFT, %g5
+ sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+ add %g6, %g5, %sp
+ mov 0, %fp
+
+ call init_irqwork_curcpu
+ nop
+ call hard_smp_processor_id
+ nop
+
+ call sun4v_register_mondo_queues
+ nop
+
+ call init_cur_cpu_trap
+ mov %g6, %o0
+
+ wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
+
+ call smp_callin
+ nop
+ call cpu_idle
+ mov 0, %o0
+ call cpu_panic
+ nop
+
+80: ba,pt %xcc, 80b
+ nop
+
+ .align 8
+hv_cpu_startup_end:
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/process.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/process.c
@@ -29,6 +29,7 @@
#include
#include
#include
+#include
#include
#include
@@ -49,7 +50,7 @@
/* #define VERBOSE_SHOWREGS */
-static void sparc64_yield(void)
+static void sparc64_yield(int cpu)
{
if (tlb_type != hypervisor)
return;
@@ -57,7 +58,7 @@
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb__after_clear_bit();
- while (!need_resched()) {
+ while (!need_resched() && !cpu_is_offline(cpu)) {
unsigned long pstate;
/* Disable interrupts. */
@@ -68,7 +69,7 @@
: "=&r" (pstate)
: "i" (PSTATE_IE));
- if (!need_resched())
+ if (!need_resched() && !cpu_is_offline(cpu))
sun4v_cpu_yield();
/* Re-enable interrupts. */
@@ -86,15 +87,25 @@
/* The idle loop on sparc64. */
void cpu_idle(void)
{
+ int cpu = smp_processor_id();
+
set_thread_flag(TIF_POLLING_NRFLAG);
while(1) {
tick_nohz_stop_sched_tick();
- while (!need_resched())
- sparc64_yield();
+
+ while (!need_resched() && !cpu_is_offline(cpu))
+ sparc64_yield(cpu);
+
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_is_offline(cpu))
+ cpu_play_dead();
+#endif
+
schedule();
preempt_disable();
}
@@ -108,7 +119,7 @@
void machine_halt(void)
{
sstate_halt();
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette (1);
if (prom_keyboard)
prom_keyboard();
@@ -119,7 +130,7 @@
void machine_alt_power_off(void)
{
sstate_poweroff();
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette(1);
if (prom_keyboard)
prom_keyboard();
@@ -134,7 +145,7 @@
sstate_reboot();
p = strchr (reboot_command, '\n');
if (p) *p = 0;
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette (1);
if (prom_keyboard)
prom_keyboard();
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/setup.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/setup.c
@@ -133,33 +133,6 @@
}
}
-static void __init process_console(char *commands)
-{
- serial_console = 0;
- commands += 8;
- /* Linux-style serial */
- if (!strncmp(commands, "ttyS", 4))
- serial_console = simple_strtoul(commands + 4, NULL, 10) + 1;
- else if (!strncmp(commands, "tty", 3)) {
- char c = *(commands + 3);
- /* Solaris-style serial */
- if (c == 'a' || c == 'b') {
- serial_console = c - 'a' + 1;
- prom_printf ("Using /dev/tty%c as console.\n", c);
- }
- /* else Linux-style fbcon, not serial */
- }
-#if defined(CONFIG_PROM_CONSOLE)
- if (!strncmp(commands, "prom", 4)) {
- char *p;
-
- for (p = commands - 8; *p && *p != ' '; p++)
- *p = ' ';
- conswitchp = &prom_con;
- }
-#endif
-}
-
static void __init boot_flags_init(char *commands)
{
while (*commands) {
@@ -176,9 +149,7 @@
process_switch(*commands++);
continue;
}
- if (!strncmp(commands, "console=", 8)) {
- process_console(commands);
- } else if (!strncmp(commands, "mem=", 4)) {
+ if (!strncmp(commands, "mem=", 4)) {
/*
* "mem=XXX[kKmM]" overrides the PROM-reported
* memory size.
@@ -378,44 +349,6 @@
paging_init();
}
-static int __init set_preferred_console(void)
-{
- int idev, odev;
-
- /* The user has requested a console so this is already set up. */
- if (serial_console >= 0)
- return -EBUSY;
-
- idev = prom_query_input_device();
- odev = prom_query_output_device();
- if (idev == PROMDEV_IKBD && odev == PROMDEV_OSCREEN) {
- serial_console = 0;
- } else if (idev == PROMDEV_ITTYA && odev == PROMDEV_OTTYA) {
- serial_console = 1;
- } else if (idev == PROMDEV_ITTYB && odev == PROMDEV_OTTYB) {
- serial_console = 2;
- } else if (idev == PROMDEV_IRSC && odev == PROMDEV_ORSC) {
- serial_console = 3;
- } else if (idev == PROMDEV_IVCONS && odev == PROMDEV_OVCONS) {
- /* sunhv_console_init() doesn't check the serial_console
- * value anyways...
- */
- serial_console = 4;
- return add_preferred_console("ttyHV", 0, NULL);
- } else {
- prom_printf("Inconsistent console: "
- "input %d, output %d\n",
- idev, odev);
- prom_halt();
- }
-
- if (serial_console)
- return add_preferred_console("ttyS", serial_console - 1, NULL);
-
- return -ENODEV;
-}
-console_initcall(set_preferred_console);
-
/* BUFFER is PAGE_SIZE bytes long. */
extern char *sparc_cpu_type;
@@ -442,7 +375,6 @@
"D$ parity tl1\t: %u\n"
"I$ parity tl1\t: %u\n"
#ifndef CONFIG_SMP
- "Cpu0Bogo\t: %lu.%02lu\n"
"Cpu0ClkTck\t: %016lx\n"
#endif
,
@@ -457,9 +389,7 @@
dcache_parity_tl1_occurred,
icache_parity_tl1_occurred
#ifndef CONFIG_SMP
- , cpu_data(0).udelay_val/(500000/HZ),
- (cpu_data(0).udelay_val/(5000/HZ)) % 100,
- cpu_data(0).clock_tick
+ , cpu_data(0).clock_tick
#endif
);
#ifdef CONFIG_SMP
@@ -511,5 +441,4 @@
prom_cmdline();
}
-int serial_console = -1;
int stop_a_enabled = 1;
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/pci.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/pci.c
@@ -404,7 +404,6 @@
sd->host_controller = pbm;
sd->prom_node = node;
sd->op = of_find_device_by_node(node);
- sd->msi_num = 0xffffffff;
type = of_get_property(node, "device_type", NULL);
if (type == NULL)
@@ -422,10 +421,15 @@
dev->multifunction = 0; /* maybe a lie? */
if (host_controller) {
- dev->vendor = 0x108e;
- dev->device = 0x8000;
- dev->subsystem_vendor = 0x0000;
- dev->subsystem_device = 0x0000;
+ if (tlb_type != hypervisor) {
+ pci_read_config_word(dev, PCI_VENDOR_ID,
+ &dev->vendor);
+ pci_read_config_word(dev, PCI_DEVICE_ID,
+ &dev->device);
+ } else {
+ dev->vendor = PCI_VENDOR_ID_SUN;
+ dev->device = 0x80f0;
+ }
dev->cfg_size = 256;
dev->class = PCI_CLASS_BRIDGE_HOST << 8;
sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
@@ -746,7 +750,7 @@
{
struct device_node *child;
const u32 *reg;
- int reglen, devfn;
+ int reglen, devfn, prev_devfn;
struct pci_dev *dev;
if (ofpci_verbose)
@@ -754,14 +758,25 @@
node->full_name, bus->number);
child = NULL;
+ prev_devfn = -1;
while ((child = of_get_next_child(node, child)) != NULL) {
if (ofpci_verbose)
printk(" * %s\n", child->full_name);
reg = of_get_property(child, "reg", ®len);
if (reg == NULL || reglen < 20)
continue;
+
devfn = (reg[0] >> 8) & 0xff;
+ /* This is a workaround for some device trees
+ * which list PCI devices twice. On the V100
+ * for example, device number 3 is listed twice.
+ * Once as "pm" and once again as "lomp".
+ */
+ if (devfn == prev_devfn)
+ continue;
+ prev_devfn = devfn;
+
/* create a new pci_dev for this device */
dev = of_create_pci_dev(pbm, child, bus, devfn, 0);
if (!dev)
@@ -817,7 +832,7 @@
{
static u8 fake_pci_config[] = {
0x8e, 0x10, /* Vendor: 0x108e (Sun) */
- 0x00, 0x80, /* Device: 0x8000 (PBM) */
+ 0xf0, 0x80, /* Device: 0x80f0 (Fire) */
0x46, 0x01, /* Command: 0x0146 (SERR, PARITY, MASTER, MEM) */
0xa0, 0x22, /* Status: 0x02a0 (DEVSEL_MED, FB2B, 66MHZ) */
0x00, 0x00, 0x00, 0x06, /* Class: 0x06000000 host bridge */
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/irq.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/irq.c
@@ -87,7 +87,11 @@
*/
#define irq_work(__cpu) &(trap_block[(__cpu)].irq_worklist)
-static unsigned int virt_to_real_irq_table[NR_IRQS];
+static struct {
+ unsigned int irq;
+ unsigned int dev_handle;
+ unsigned int dev_ino;
+} virt_to_real_irq_table[NR_IRQS];
static unsigned char virt_irq_alloc(unsigned int real_irq)
{
@@ -96,7 +100,7 @@
BUILD_BUG_ON(NR_IRQS >= 256);
for (ent = 1; ent < NR_IRQS; ent++) {
- if (!virt_to_real_irq_table[ent])
+ if (!virt_to_real_irq_table[ent].irq)
break;
}
if (ent >= NR_IRQS) {
@@ -104,7 +108,7 @@
return 0;
}
- virt_to_real_irq_table[ent] = real_irq;
+ virt_to_real_irq_table[ent].irq = real_irq;
return ent;
}
@@ -117,8 +121,8 @@
if (virt_irq >= NR_IRQS)
return;
- real_irq = virt_to_real_irq_table[virt_irq];
- virt_to_real_irq_table[virt_irq] = 0;
+ real_irq = virt_to_real_irq_table[virt_irq].irq;
+ virt_to_real_irq_table[virt_irq].irq = 0;
__bucket(real_irq)->virt_irq = 0;
}
@@ -126,7 +130,7 @@
static unsigned int virt_to_real_irq(unsigned char virt_irq)
{
- return virt_to_real_irq_table[virt_irq];
+ return virt_to_real_irq_table[virt_irq].irq;
}
/*
@@ -213,8 +217,27 @@
void (*pre_handler)(unsigned int, void *, void *);
void *pre_handler_arg1;
void *pre_handler_arg2;
+
+ u32 msi;
};
+void sparc64_set_msi(unsigned int virt_irq, u32 msi)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+ if (data)
+ data->msi = msi;
+}
+
+u32 sparc64_get_msi(unsigned int virt_irq)
+{
+ struct irq_handler_data *data = get_irq_chip_data(virt_irq);
+
+ if (data)
+ return data->msi;
+ return 0xffffffff;
+}
+
static inline struct ino_bucket *virt_irq_to_bucket(unsigned int virt_irq)
{
unsigned int real_irq = virt_to_real_irq(virt_irq);
@@ -293,13 +316,18 @@
}
}
+static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+ sun4u_irq_enable(virt_irq);
+}
+
static void sun4u_irq_disable(unsigned int virt_irq)
{
struct irq_handler_data *data = get_irq_chip_data(virt_irq);
if (likely(data)) {
unsigned long imap = data->imap;
- u32 tmp = upa_readq(imap);
+ unsigned long tmp = upa_readq(imap);
tmp &= ~IMAP_VALID;
upa_writeq(tmp, imap);
@@ -327,19 +355,37 @@
err = sun4v_intr_settarget(ino, cpuid);
if (err != HV_EOK)
- printk("sun4v_intr_settarget(%x,%lu): err(%d)\n",
- ino, cpuid, err);
+ printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+ "err(%d)\n", ino, cpuid, err);
err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
if (err != HV_EOK)
- printk("sun4v_intr_setstate(%x): "
+ printk(KERN_ERR "sun4v_intr_setstate(%x): "
"err(%d)\n", ino, err);
err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
if (err != HV_EOK)
- printk("sun4v_intr_setenabled(%x): err(%d)\n",
+ printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
ino, err);
}
}
+static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+ struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+ unsigned int ino = bucket - &ivector_table[0];
+
+ if (likely(bucket)) {
+ unsigned long cpuid;
+ int err;
+
+ cpuid = irq_choose_cpu(virt_irq);
+
+ err = sun4v_intr_settarget(ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+ "err(%d)\n", ino, cpuid, err);
+ }
+}
+
static void sun4v_irq_disable(unsigned int virt_irq)
{
struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
@@ -350,12 +396,24 @@
err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
if (err != HV_EOK)
- printk("sun4v_intr_setenabled(%x): "
+ printk(KERN_ERR "sun4v_intr_setenabled(%x): "
"err(%d)\n", ino, err);
}
}
#ifdef CONFIG_PCI_MSI
+static void sun4u_msi_enable(unsigned int virt_irq)
+{
+ sun4u_irq_enable(virt_irq);
+ unmask_msi_irq(virt_irq);
+}
+
+static void sun4u_msi_disable(unsigned int virt_irq)
+{
+ mask_msi_irq(virt_irq);
+ sun4u_irq_disable(virt_irq);
+}
+
static void sun4v_msi_enable(unsigned int virt_irq)
{
sun4v_irq_enable(virt_irq);
@@ -379,7 +437,7 @@
err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
if (err != HV_EOK)
- printk("sun4v_intr_setstate(%x): "
+ printk(KERN_ERR "sun4v_intr_setstate(%x): "
"err(%d)\n", ino, err);
}
}
@@ -387,7 +445,6 @@
static void sun4v_virq_enable(unsigned int virt_irq)
{
struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
- unsigned int ino = bucket - &ivector_table[0];
if (likely(bucket)) {
unsigned long cpuid, dev_handle, dev_ino;
@@ -395,45 +452,65 @@
cpuid = irq_choose_cpu(virt_irq);
- dev_handle = ino & IMAP_IGN;
- dev_ino = ino & IMAP_INO;
+ dev_handle = virt_to_real_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_to_real_irq_table[virt_irq].dev_ino;
err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
if (err != HV_EOK)
- printk("sun4v_vintr_set_target(%lx,%lx,%lu): "
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
"err(%d)\n",
dev_handle, dev_ino, cpuid, err);
err = sun4v_vintr_set_state(dev_handle, dev_ino,
HV_INTR_STATE_IDLE);
if (err != HV_EOK)
- printk("sun4v_vintr_set_state(%lx,%lx,"
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
"HV_INTR_STATE_IDLE): err(%d)\n",
dev_handle, dev_ino, err);
err = sun4v_vintr_set_valid(dev_handle, dev_ino,
HV_INTR_ENABLED);
if (err != HV_EOK)
- printk("sun4v_vintr_set_state(%lx,%lx,"
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
"HV_INTR_ENABLED): err(%d)\n",
dev_handle, dev_ino, err);
}
}
+static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+{
+ struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+
+ if (likely(bucket)) {
+ unsigned long cpuid, dev_handle, dev_ino;
+ int err;
+
+ cpuid = irq_choose_cpu(virt_irq);
+
+ dev_handle = virt_to_real_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_to_real_irq_table[virt_irq].dev_ino;
+
+ err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+ if (err != HV_EOK)
+ printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+ "err(%d)\n",
+ dev_handle, dev_ino, cpuid, err);
+ }
+}
+
static void sun4v_virq_disable(unsigned int virt_irq)
{
struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
- unsigned int ino = bucket - &ivector_table[0];
if (likely(bucket)) {
unsigned long dev_handle, dev_ino;
int err;
- dev_handle = ino & IMAP_IGN;
- dev_ino = ino & IMAP_INO;
+ dev_handle = virt_to_real_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_to_real_irq_table[virt_irq].dev_ino;
err = sun4v_vintr_set_valid(dev_handle, dev_ino,
HV_INTR_DISABLED);
if (err != HV_EOK)
- printk("sun4v_vintr_set_state(%lx,%lx,"
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
"HV_INTR_DISABLED): err(%d)\n",
dev_handle, dev_ino, err);
}
@@ -442,20 +519,21 @@
static void sun4v_virq_end(unsigned int virt_irq)
{
struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
- unsigned int ino = bucket - &ivector_table[0];
+ struct irq_desc *desc = irq_desc + virt_irq;
- if (likely(bucket)) {
+ if (likely(bucket &&
+ !(desc->status & IRQ_INPROGRESS))) {
unsigned long dev_handle, dev_ino;
int err;
- dev_handle = ino & IMAP_IGN;
- dev_ino = ino & IMAP_INO;
+ dev_handle = virt_to_real_irq_table[virt_irq].dev_handle;
+ dev_ino = virt_to_real_irq_table[virt_irq].dev_ino;
err = sun4v_vintr_set_state(dev_handle, dev_ino,
HV_INTR_STATE_IDLE);
if (err != HV_EOK)
- printk("sun4v_vintr_set_state(%lx,%lx,"
- "HV_INTR_STATE_IDLE): err(%d)\n",
+ printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+ "HV_INTR_STATE_IDLE): err(%d)\n",
dev_handle, dev_ino, err);
}
}
@@ -477,6 +555,7 @@
.enable = sun4u_irq_enable,
.disable = sun4u_irq_disable,
.end = sun4u_irq_end,
+ .set_affinity = sun4u_set_affinity,
};
static struct irq_chip sun4u_irq_ack = {
@@ -485,6 +564,7 @@
.disable = sun4u_irq_disable,
.ack = run_pre_handler,
.end = sun4u_irq_end,
+ .set_affinity = sun4u_set_affinity,
};
static struct irq_chip sun4v_irq = {
@@ -492,6 +572,7 @@
.enable = sun4v_irq_enable,
.disable = sun4v_irq_disable,
.end = sun4v_irq_end,
+ .set_affinity = sun4v_set_affinity,
};
static struct irq_chip sun4v_irq_ack = {
@@ -500,9 +581,21 @@
.disable = sun4v_irq_disable,
.ack = run_pre_handler,
.end = sun4v_irq_end,
+ .set_affinity = sun4v_set_affinity,
};
#ifdef CONFIG_PCI_MSI
+static struct irq_chip sun4u_msi = {
+ .typename = "sun4u+msi",
+ .mask = mask_msi_irq,
+ .unmask = unmask_msi_irq,
+ .enable = sun4u_msi_enable,
+ .disable = sun4u_msi_disable,
+ .ack = run_pre_handler,
+ .end = sun4u_irq_end,
+ .set_affinity = sun4u_set_affinity,
+};
+
static struct irq_chip sun4v_msi = {
.typename = "sun4v+msi",
.mask = mask_msi_irq,
@@ -511,6 +604,7 @@
.disable = sun4v_msi_disable,
.ack = run_pre_handler,
.end = sun4v_irq_end,
+ .set_affinity = sun4v_set_affinity,
};
#endif
@@ -519,6 +613,7 @@
.enable = sun4v_virq_enable,
.disable = sun4v_virq_disable,
.end = sun4v_virq_end,
+ .set_affinity = sun4v_virt_set_affinity,
};
static struct irq_chip sun4v_virq_ack = {
@@ -527,6 +622,7 @@
.disable = sun4v_virq_disable,
.ack = run_pre_handler,
.end = sun4v_virq_end,
+ .set_affinity = sun4v_virt_set_affinity,
};
void irq_install_pre_handler(int virt_irq,
@@ -545,6 +641,7 @@
chip == &sun4v_irq_ack ||
chip == &sun4v_virq_ack
#ifdef CONFIG_PCI_MSI
+ || chip == &sun4u_msi
|| chip == &sun4v_msi
#endif
)
@@ -636,11 +733,12 @@
unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
{
unsigned long sysino, hv_err;
+ unsigned int virq;
- BUG_ON(devhandle & ~IMAP_IGN);
- BUG_ON(devino & ~IMAP_INO);
+ BUG_ON(devhandle & devino);
sysino = devhandle | devino;
+ BUG_ON(sysino & ~(IMAP_IGN | IMAP_INO));
hv_err = sun4v_vintr_set_cookie(devhandle, devino, sysino);
if (hv_err) {
@@ -649,7 +747,12 @@
prom_halt();
}
- return sun4v_build_common(sysino, &sun4v_virq);
+ virq = sun4v_build_common(sysino, &sun4v_virq);
+
+ virt_to_real_irq_table[virq].dev_handle = devhandle;
+ virt_to_real_irq_table[virq].dev_ino = devino;
+
+ return virq;
}
#ifdef CONFIG_PCI_MSI
@@ -671,7 +774,7 @@
break;
}
if (devino >= msi_end)
- return 0;
+ return -ENOSPC;
sysino = sun4v_devino_to_sysino(devhandle, devino);
bucket = &ivector_table[sysino];
@@ -685,8 +788,8 @@
data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
if (unlikely(!data)) {
- prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
- prom_halt();
+ virt_irq_free(*virt_irq_p);
+ return -ENOMEM;
}
set_irq_chip_data(bucket->virt_irq, data);
@@ -700,6 +803,53 @@
{
virt_irq_free(virt_irq);
}
+
+unsigned int sun4u_build_msi(u32 portid, unsigned int *virt_irq_p,
+ unsigned int msi_start, unsigned int msi_end,
+ unsigned long imap_base, unsigned long iclr_base)
+{
+ struct ino_bucket *bucket;
+ struct irq_handler_data *data;
+ unsigned long sysino;
+ unsigned int devino;
+
+ /* Find a free devino in the given range. */
+ for (devino = msi_start; devino < msi_end; devino++) {
+ sysino = (portid << 6) | devino;
+ bucket = &ivector_table[sysino];
+ if (!bucket->virt_irq)
+ break;
+ }
+ if (devino >= msi_end)
+ return -ENOSPC;
+
+ sysino = (portid << 6) | devino;
+ bucket = &ivector_table[sysino];
+ bucket->virt_irq = virt_irq_alloc(__irq(bucket));
+ *virt_irq_p = bucket->virt_irq;
+ set_irq_chip(bucket->virt_irq, &sun4u_msi);
+
+ data = get_irq_chip_data(bucket->virt_irq);
+ if (unlikely(data))
+ return devino;
+
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ virt_irq_free(*virt_irq_p);
+ return -ENOMEM;
+ }
+ set_irq_chip_data(bucket->virt_irq, data);
+
+ data->imap = (imap_base + (devino * 0x8UL));
+ data->iclr = (iclr_base + (devino * 0x8UL));
+
+ return devino;
+}
+
+void sun4u_destroy_msi(unsigned int virt_irq)
+{
+ virt_irq_free(virt_irq);
+}
#endif
void ack_bad_irq(unsigned int virt_irq)
@@ -739,6 +889,26 @@
set_irq_regs(old_regs);
}
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(void)
+{
+ unsigned int irq;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_desc[irq].lock, flags);
+ if (irq_desc[irq].action &&
+ !(irq_desc[irq].status & IRQ_PER_CPU)) {
+ if (irq_desc[irq].chip->set_affinity)
+ irq_desc[irq].chip->set_affinity(irq,
+ irq_desc[irq].affinity);
+ }
+ spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+ }
+}
+#endif
+
struct sun5_timer {
u64 count0;
u64 limit0;
@@ -839,7 +1009,7 @@
}
}
-static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
+void __cpuinit sun4v_register_mondo_queues(int this_cpu)
{
struct trap_per_cpu *tb = &trap_block[this_cpu];
@@ -853,20 +1023,10 @@
tb->nonresum_qmask);
}
-static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem)
+static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
{
unsigned long size = PAGE_ALIGN(qmask + 1);
- unsigned long order = get_order(size);
- void *p = NULL;
-
- if (use_bootmem) {
- p = __alloc_bootmem_low(size, size, 0);
- } else {
- struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
- if (page)
- p = page_address(page);
- }
-
+ void *p = __alloc_bootmem(size, size, 0);
if (!p) {
prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
prom_halt();
@@ -875,19 +1035,10 @@
*pa_ptr = __pa(p);
}
-static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem)
+static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
{
unsigned long size = PAGE_ALIGN(qmask + 1);
- unsigned long order = get_order(size);
- void *p = NULL;
-
- if (use_bootmem) {
- p = __alloc_bootmem_low(size, size, 0);
- } else {
- struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
- if (page)
- p = page_address(page);
- }
+ void *p = __alloc_bootmem(size, size, 0);
if (!p) {
prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
@@ -897,18 +1048,14 @@
*pa_ptr = __pa(p);
}
-static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem)
+static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
{
#ifdef CONFIG_SMP
void *page;
BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
- if (use_bootmem)
- page = alloc_bootmem_low_pages(PAGE_SIZE);
- else
- page = (void *) get_zeroed_page(GFP_ATOMIC);
-
+ page = alloc_bootmem_pages(PAGE_SIZE);
if (!page) {
prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
prom_halt();
@@ -919,30 +1066,27 @@
#endif
}
-/* Allocate and register the mondo and error queues for this cpu. */
-void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load)
+/* Allocate mondo and error queues for all possible cpus. */
+static void __init sun4v_init_mondo_queues(void)
{
- struct trap_per_cpu *tb = &trap_block[cpu];
+ int cpu;
- if (alloc) {
- alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask, use_bootmem);
- alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask, use_bootmem);
- alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask, use_bootmem);
- alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask, use_bootmem);
- alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask, use_bootmem);
- alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, tb->nonresum_qmask, use_bootmem);
-
- init_cpu_send_mondo_info(tb, use_bootmem);
- }
-
- if (load) {
- if (cpu != hard_smp_processor_id()) {
- prom_printf("SUN4V: init mondo on cpu %d not %d\n",
- cpu, hard_smp_processor_id());
- prom_halt();
- }
- sun4v_register_mondo_queues(cpu);
+ for_each_possible_cpu(cpu) {
+ struct trap_per_cpu *tb = &trap_block[cpu];
+
+ alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+ alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+ alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
+ alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+ alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+ alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
+ tb->nonresum_qmask);
+
+ init_cpu_send_mondo_info(tb);
}
+
+ /* Load up the boot cpu's entries. */
+ sun4v_register_mondo_queues(hard_smp_processor_id());
}
static struct irqaction timer_irq_action = {
@@ -957,7 +1101,7 @@
memset(&ivector_table[0], 0, sizeof(ivector_table));
if (tlb_type == hypervisor)
- sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1);
+ sun4v_init_mondo_queues();
/* We need to clear any IRQ's pending in the soft interrupt
* registers, a spurious one could be left around from the
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/cpu.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/cpu.c
@@ -1,7 +1,7 @@
/* cpu.c: Dinky routines to look for the kind of Sparc cpu
* we are on.
*
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
*/
#include
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
@@ -61,21 +62,40 @@
#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
-char *sparc_cpu_type = "cpu-oops";
-char *sparc_fpu_type = "fpu-oops";
+char *sparc_cpu_type;
+char *sparc_fpu_type;
unsigned int fsr_storage;
+static void __init sun4v_cpu_probe(void)
+{
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_NIAGARA1:
+ sparc_cpu_type = "UltraSparc T1 (Niagara)";
+ sparc_fpu_type = "UltraSparc T1 integrated FPU";
+ break;
+
+ case SUN4V_CHIP_NIAGARA2:
+ sparc_cpu_type = "UltraSparc T2 (Niagara2)";
+ sparc_fpu_type = "UltraSparc T2 integrated FPU";
+ break;
+
+ default:
+ printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
+ prom_cpu_compatible);
+ sparc_cpu_type = "Unknown SUN4V CPU";
+ sparc_fpu_type = "Unknown SUN4V FPU";
+ break;
+ }
+}
+
void __init cpu_probe(void)
{
unsigned long ver, fpu_vers, manuf, impl, fprs;
int i;
- if (tlb_type == hypervisor) {
- sparc_cpu_type = "UltraSparc T1 (Niagara)";
- sparc_fpu_type = "UltraSparc T1 integrated FPU";
- return;
- }
+ if (tlb_type == hypervisor)
+ return sun4v_cpu_probe();
fprs = fprs_read();
fprs_write(FPRS_FEF);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/sparc64_ksyms.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/sparc64_ksyms.c
@@ -1,7 +1,6 @@
-/* $Id: sparc64_ksyms.c,v 1.121 2002/02/09 19:49:31 davem Exp $
- * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
+/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
*
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
* Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
*/
@@ -28,7 +27,6 @@
#include
#include
-#include
#include
#include
#include
@@ -124,10 +122,6 @@
EXPORT_SYMBOL(__write_unlock);
EXPORT_SYMBOL(__write_trylock);
-/* CPU online map and active count. */
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(phys_cpu_present_map);
-
EXPORT_SYMBOL(smp_call_function);
#endif /* CONFIG_SMP */
@@ -174,6 +168,7 @@
EXPORT_SYMBOL(__flushw_user);
EXPORT_SYMBOL(tlb_type);
+EXPORT_SYMBOL(sun4v_chip_type);
EXPORT_SYMBOL(get_fb_unmapped_area);
EXPORT_SYMBOL(flush_icache_range);
@@ -330,19 +325,12 @@
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(strncmp);
-/* Delay routines. */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__ndelay);
-EXPORT_SYMBOL(__const_udelay);
-EXPORT_SYMBOL(__delay);
-
void VISenter(void);
/* RAID code needs this */
EXPORT_SYMBOL(VISenter);
/* for input/keybdev */
EXPORT_SYMBOL(sun_do_break);
-EXPORT_SYMBOL(serial_console);
EXPORT_SYMBOL(stop_a_enabled);
#ifdef CONFIG_DEBUG_BUGVERBOSE
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/traps.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/traps.c
@@ -2134,12 +2134,20 @@
void show_stack(struct task_struct *tsk, unsigned long *_ksp)
{
unsigned long pc, fp, thread_base, ksp;
- void *tp = task_stack_page(tsk);
+ struct thread_info *tp;
struct reg_window *rw;
int count = 0;
ksp = (unsigned long) _ksp;
-
+ if (!tsk)
+ tsk = current;
+ tp = task_thread_info(tsk);
+ if (ksp == 0UL) {
+ if (tsk == current)
+ asm("mov %%fp, %0" : "=r" (ksp));
+ else
+ ksp = tp->ksp;
+ }
if (tp == current_thread_info())
flushw_all();
@@ -2168,11 +2176,7 @@
void dump_stack(void)
{
- unsigned long *ksp;
-
- __asm__ __volatile__("mov %%fp, %0"
- : "=r" (ksp));
- show_stack(current, ksp);
+ show_stack(current, NULL);
}
EXPORT_SYMBOL(dump_stack);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/trampoline.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/trampoline.S
@@ -95,14 +95,13 @@
membar #Sync
startup_continue:
+ mov %o0, %l0
+ BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
+
sethi %hi(0x80000000), %g2
sllx %g2, 32, %g2
wr %g2, 0, %tick_cmpr
- mov %o0, %l0
-
- BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
-
/* Call OBP by hand to lock KERNBASE into i/d tlbs.
* We lock 2 consequetive entries if we are 'bigkernel'.
*/
@@ -346,7 +345,7 @@
sethi %hi(tramp_stack), %g1
or %g1, %lo(tramp_stack), %g1
add %g1, TRAMP_STACK_SIZE, %g1
- sub %g1, STACKFRAME_SZ + STACK_BIAS, %sp
+ sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
mov 0, %fp
/* Put garbage in these registers to trap any access to them. */
@@ -366,11 +365,8 @@
call hard_smp_processor_id
nop
- mov %o0, %o1
- mov 0, %o0
- mov 0, %o2
- call sun4v_init_mondo_queues
- mov 1, %o3
+ call sun4v_register_mondo_queues
+ nop
1: call init_cur_cpu_trap
ldx [%l0], %o0
@@ -415,15 +411,38 @@
sethi %hi(kern_base), %g3
ldx [%g3 + %lo(kern_base)], %g3
add %g2, %g3, %o1
+ sethi %hi(sparc64_ttable_tl0), %o0
- call prom_set_trap_table_sun4v
- sethi %hi(sparc64_ttable_tl0), %o0
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 2, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ stx %o1, [%sp + 2047 + 128 + 0x20]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
ba,pt %xcc, 2f
nop
-1: call prom_set_trap_table
- sethi %hi(sparc64_ttable_tl0), %o0
+1: sethi %hi(sparc64_ttable_tl0), %o0
+ set prom_set_trap_table_name, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x00]
+ mov 1, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x08]
+ mov 0, %g2
+ stx %g2, [%sp + 2047 + 128 + 0x10]
+ stx %o0, [%sp + 2047 + 128 + 0x18]
+ sethi %hi(p1275buf), %g2
+ or %g2, %lo(p1275buf), %g2
+ ldx [%g2 + 0x08], %o1
+ call %o1
+ add %sp, (2047 + 128), %o0
2: ldx [%l0], %g6
ldx [%g6 + TI_TASK], %g4
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/power.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/power.c
@@ -1,7 +1,6 @@
-/* $Id: power.c,v 1.10 2001/12/11 01:57:16 davem Exp $
- * power.c: Power management driver.
+/* power.c: Power management driver.
*
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
*/
#include
@@ -19,6 +18,7 @@
#include
#include
#include
+#include
#include
#include
@@ -29,24 +29,26 @@
*/
int scons_pwroff = 1;
-#ifdef CONFIG_PCI
-#include
static void __iomem *power_reg;
static DECLARE_WAIT_QUEUE_HEAD(powerd_wait);
static int button_pressed;
-static irqreturn_t power_handler(int irq, void *dev_id)
+void wake_up_powerd(void)
{
if (button_pressed == 0) {
button_pressed = 1;
wake_up(&powerd_wait);
}
+}
+
+static irqreturn_t power_handler(int irq, void *dev_id)
+{
+ wake_up_powerd();
/* FIXME: Check registers for status... */
return IRQ_HANDLED;
}
-#endif /* CONFIG_PCI */
extern void machine_halt(void);
extern void machine_alt_power_off(void);
@@ -55,20 +57,19 @@
void machine_power_off(void)
{
sstate_poweroff();
- if (!serial_console || scons_pwroff) {
-#ifdef CONFIG_PCI
+ if (strcmp(of_console_device->type, "serial") || scons_pwroff) {
if (power_reg) {
/* Both register bits seem to have the
* same effect, so until I figure out
* what the difference is...
*/
writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg);
- } else
-#endif /* CONFIG_PCI */
+ } else {
if (poweroff_method != NULL) {
poweroff_method();
/* not reached */
}
+ }
}
machine_halt();
}
@@ -76,7 +77,6 @@
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL(pm_power_off);
-#ifdef CONFIG_PCI
static int powerd(void *__unused)
{
static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
@@ -86,7 +86,7 @@
daemonize("powerd");
add_wait_queue(&powerd_wait, &wait);
-again:
+
for (;;) {
set_task_state(current, TASK_INTERRUPTIBLE);
if (button_pressed)
@@ -100,16 +100,28 @@
/* Ok, down we go... */
button_pressed = 0;
if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
- printk("powerd: shutdown execution failed\n");
- add_wait_queue(&powerd_wait, &wait);
- goto again;
+ printk(KERN_ERR "powerd: shutdown execution failed\n");
+ machine_power_off();
}
return 0;
}
+int start_powerd(void)
+{
+ int err;
+
+ err = kernel_thread(powerd, NULL, CLONE_FS);
+ if (err < 0)
+ printk(KERN_ERR "power: Failed to start power daemon.\n");
+ else
+ printk(KERN_INFO "power: powerd running.\n");
+
+ return err;
+}
+
static int __init has_button_interrupt(unsigned int irq, struct device_node *dp)
{
- if (irq == PCI_IRQ_NONE)
+ if (irq == 0xffffffff)
return 0;
if (!of_find_property(dp, "button", NULL))
return 0;
@@ -130,17 +142,14 @@
poweroff_method = machine_halt; /* able to use the standard halt */
if (has_button_interrupt(irq, op->node)) {
- if (kernel_thread(powerd, NULL, CLONE_FS) < 0) {
- printk("Failed to start power daemon.\n");
+ if (start_powerd() < 0)
return 0;
- }
- printk("powerd running.\n");
if (request_irq(irq,
power_handler, 0, "power", NULL) < 0)
- printk("power: Error, cannot register IRQ handler.\n");
+ printk(KERN_ERR "power: Cannot setup IRQ handler.\n");
} else {
- printk("not using powerd.\n");
+ printk(KERN_INFO "power: Not using powerd.\n");
}
return 0;
@@ -164,4 +173,3 @@
of_register_driver(&power_driver, &of_bus_type);
return;
}
-#endif /* CONFIG_PCI */
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/ldc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/ldc.c
@@ -0,0 +1,2373 @@
+/* ldc.c: Logical Domain Channel link-layer protocol driver.
+ *
+ * Copyright (C) 2007 David S. Miller
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#define DRV_MODULE_NAME "ldc"
+#define PFX DRV_MODULE_NAME ": "
+#define DRV_MODULE_VERSION "1.0"
+#define DRV_MODULE_RELDATE "June 25, 2007"
+
+static char version[] __devinitdata =
+ DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+#define LDC_PACKET_SIZE 64
+
+/* Packet header layout for unreliable and reliable mode frames.
+ * When in RAW mode, packets are simply straight 64-byte payloads
+ * with no headers.
+ */
+struct ldc_packet {
+ u8 type;
+#define LDC_CTRL 0x01
+#define LDC_DATA 0x02
+#define LDC_ERR 0x10
+
+ u8 stype;
+#define LDC_INFO 0x01
+#define LDC_ACK 0x02
+#define LDC_NACK 0x04
+
+ u8 ctrl;
+#define LDC_VERS 0x01 /* Link Version */
+#define LDC_RTS 0x02 /* Request To Send */
+#define LDC_RTR 0x03 /* Ready To Receive */
+#define LDC_RDX 0x04 /* Ready for Data eXchange */
+#define LDC_CTRL_MSK 0x0f
+
+ u8 env;
+#define LDC_LEN 0x3f
+#define LDC_FRAG_MASK 0xc0
+#define LDC_START 0x40
+#define LDC_STOP 0x80
+
+ u32 seqid;
+
+ union {
+ u8 u_data[LDC_PACKET_SIZE - 8];
+ struct {
+ u32 pad;
+ u32 ackid;
+ u8 r_data[LDC_PACKET_SIZE - 8 - 8];
+ } r;
+ } u;
+};
+
+struct ldc_version {
+ u16 major;
+ u16 minor;
+};
+
+/* Ordered from largest major to lowest. */
+static struct ldc_version ver_arr[] = {
+ { .major = 1, .minor = 0 },
+};
+
+#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
+#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
+
+struct ldc_channel;
+
+struct ldc_mode_ops {
+ int (*write)(struct ldc_channel *, const void *, unsigned int);
+ int (*read)(struct ldc_channel *, void *, unsigned int);
+};
+
+static const struct ldc_mode_ops raw_ops;
+static const struct ldc_mode_ops nonraw_ops;
+static const struct ldc_mode_ops stream_ops;
+
+int ldom_domaining_enabled;
+
+struct ldc_iommu {
+ /* Protects arena alloc/free. */
+ spinlock_t lock;
+ struct iommu_arena arena;
+ struct ldc_mtable_entry *page_table;
+};
+
+struct ldc_channel {
+ /* Protects all operations that depend upon channel state. */
+ spinlock_t lock;
+
+ unsigned long id;
+
+ u8 *mssbuf;
+ u32 mssbuf_len;
+ u32 mssbuf_off;
+
+ struct ldc_packet *tx_base;
+ unsigned long tx_head;
+ unsigned long tx_tail;
+ unsigned long tx_num_entries;
+ unsigned long tx_ra;
+
+ unsigned long tx_acked;
+
+ struct ldc_packet *rx_base;
+ unsigned long rx_head;
+ unsigned long rx_tail;
+ unsigned long rx_num_entries;
+ unsigned long rx_ra;
+
+ u32 rcv_nxt;
+ u32 snd_nxt;
+
+ unsigned long chan_state;
+
+ struct ldc_channel_config cfg;
+ void *event_arg;
+
+ const struct ldc_mode_ops *mops;
+
+ struct ldc_iommu iommu;
+
+ struct ldc_version ver;
+
+ u8 hs_state;
+#define LDC_HS_CLOSED 0x00
+#define LDC_HS_OPEN 0x01
+#define LDC_HS_GOTVERS 0x02
+#define LDC_HS_SENTRTR 0x03
+#define LDC_HS_GOTRTR 0x04
+#define LDC_HS_COMPLETE 0x10
+
+ u8 flags;
+#define LDC_FLAG_ALLOCED_QUEUES 0x01
+#define LDC_FLAG_REGISTERED_QUEUES 0x02
+#define LDC_FLAG_REGISTERED_IRQS 0x04
+#define LDC_FLAG_RESET 0x10
+
+ u8 mss;
+ u8 state;
+
+#define LDC_IRQ_NAME_MAX 32
+ char rx_irq_name[LDC_IRQ_NAME_MAX];
+ char tx_irq_name[LDC_IRQ_NAME_MAX];
+
+ struct hlist_head mh_list;
+
+ struct hlist_node list;
+};
+
+#define ldcdbg(TYPE, f, a...) \
+do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
+ printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
+} while (0)
+
+static const char *state_to_str(u8 state)
+{
+ switch (state) {
+ case LDC_STATE_INVALID:
+ return "INVALID";
+ case LDC_STATE_INIT:
+ return "INIT";
+ case LDC_STATE_BOUND:
+ return "BOUND";
+ case LDC_STATE_READY:
+ return "READY";
+ case LDC_STATE_CONNECTED:
+ return "CONNECTED";
+ default:
+ return "";
+ }
+}
+
+static void ldc_set_state(struct ldc_channel *lp, u8 state)
+{
+ ldcdbg(STATE, "STATE (%s) --> (%s)\n",
+ state_to_str(lp->state),
+ state_to_str(state));
+
+ lp->state = state;
+}
+
+static unsigned long __advance(unsigned long off, unsigned long num_entries)
+{
+ off += LDC_PACKET_SIZE;
+ if (off == (num_entries * LDC_PACKET_SIZE))
+ off = 0;
+
+ return off;
+}
+
+static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
+{
+ return __advance(off, lp->rx_num_entries);
+}
+
+static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
+{
+ return __advance(off, lp->tx_num_entries);
+}
+
+static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p;
+ unsigned long t;
+
+ t = tx_advance(lp, lp->tx_tail);
+ if (t == lp->tx_head)
+ return NULL;
+
+ *new_tail = t;
+
+ p = lp->tx_base;
+ return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+/* When we are in reliable or stream mode, have to track the next packet
+ * we haven't gotten an ACK for in the TX queue using tx_acked. We have
+ * to be careful not to stomp over the queue past that point. During
+ * the handshake, we don't have TX data packets pending in the queue
+ * and that's why handshake_get_tx_packet() need not be mindful of
+ * lp->tx_acked.
+ */
+static unsigned long head_for_data(struct ldc_channel *lp)
+{
+ if (lp->cfg.mode == LDC_MODE_STREAM)
+ return lp->tx_acked;
+ return lp->tx_head;
+}
+
+static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
+{
+ unsigned long limit, tail, new_tail, diff;
+ unsigned int mss;
+
+ limit = head_for_data(lp);
+ tail = lp->tx_tail;
+ new_tail = tx_advance(lp, tail);
+ if (new_tail == limit)
+ return 0;
+
+ if (limit > new_tail)
+ diff = limit - new_tail;
+ else
+ diff = (limit +
+ ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
+ diff /= LDC_PACKET_SIZE;
+ mss = lp->mss;
+
+ if (diff * mss < size)
+ return 0;
+
+ return 1;
+}
+
+static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p;
+ unsigned long h, t;
+
+ h = head_for_data(lp);
+ t = tx_advance(lp, lp->tx_tail);
+ if (t == h)
+ return NULL;
+
+ *new_tail = t;
+
+ p = lp->tx_base;
+ return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
+{
+ unsigned long orig_tail = lp->tx_tail;
+ int limit = 1000;
+
+ lp->tx_tail = tail;
+ while (limit-- > 0) {
+ unsigned long err;
+
+ err = sun4v_ldc_tx_set_qtail(lp->id, tail);
+ if (!err)
+ return 0;
+
+ if (err != HV_EWOULDBLOCK) {
+ lp->tx_tail = orig_tail;
+ return -EINVAL;
+ }
+ udelay(1);
+ }
+
+ lp->tx_tail = orig_tail;
+ return -EBUSY;
+}
+
+/* This just updates the head value in the hypervisor using
+ * a polling loop with a timeout. The caller takes care of
+ * upating software state representing the head change, if any.
+ */
+static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
+{
+ int limit = 1000;
+
+ while (limit-- > 0) {
+ unsigned long err;
+
+ err = sun4v_ldc_rx_set_qhead(lp->id, head);
+ if (!err)
+ return 0;
+
+ if (err != HV_EWOULDBLOCK)
+ return -EINVAL;
+
+ udelay(1);
+ }
+
+ return -EBUSY;
+}
+
+static int send_tx_packet(struct ldc_channel *lp,
+ struct ldc_packet *p,
+ unsigned long new_tail)
+{
+ BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
+
+ return set_tx_tail(lp, new_tail);
+}
+
+static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
+ u8 stype, u8 ctrl,
+ void *data, int dlen,
+ unsigned long *new_tail)
+{
+ struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
+
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ p->type = LDC_CTRL;
+ p->stype = stype;
+ p->ctrl = ctrl;
+ if (data)
+ memcpy(p->u.u_data, data, dlen);
+ }
+ return p;
+}
+
+static int start_handshake(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ struct ldc_version *ver;
+ unsigned long new_tail;
+
+ ver = &ver_arr[0];
+
+ ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
+ ver->major, ver->minor);
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+ ver, sizeof(*ver), &new_tail);
+ if (p) {
+ int err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->flags &= ~LDC_FLAG_RESET;
+ return err;
+ }
+ return -EBUSY;
+}
+
+static int send_version_nack(struct ldc_channel *lp,
+ u16 major, u16 minor)
+{
+ struct ldc_packet *p;
+ struct ldc_version ver;
+ unsigned long new_tail;
+
+ ver.major = major;
+ ver.minor = minor;
+
+ p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
+ &ver, sizeof(ver), &new_tail);
+ if (p) {
+ ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
+ ver.major, ver.minor);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_version_ack(struct ldc_channel *lp,
+ struct ldc_version *vp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
+ vp, sizeof(*vp), &new_tail);
+ if (p) {
+ ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
+ vp->major, vp->minor);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rts(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = lp->cfg.mode;
+ p->seqid = 0;
+ lp->rcv_nxt = 0;
+
+ ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
+ p->env, p->seqid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rtr(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = lp->cfg.mode;
+ p->seqid = 0;
+
+ ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
+ p->env, p->seqid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_rdx(struct ldc_channel *lp)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
+ &new_tail);
+ if (p) {
+ p->env = 0;
+ p->seqid = ++lp->snd_nxt;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
+ p->env, p->seqid, p->u.r.ackid);
+
+ return send_tx_packet(lp, p, new_tail);
+ }
+ return -EBUSY;
+}
+
+static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+ int err;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (!p)
+ return -EBUSY;
+ memset(p, 0, sizeof(*p));
+ p->type = data_pkt->type;
+ p->stype = LDC_NACK;
+ p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
+ p->seqid = lp->snd_nxt + 1;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
+ p->type, p->ctrl, p->seqid, p->u.r.ackid);
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->snd_nxt++;
+
+ return err;
+}
+
+static int ldc_abort(struct ldc_channel *lp)
+{
+ unsigned long hv_err;
+
+ ldcdbg(STATE, "ABORT\n");
+
+ /* We report but do not act upon the hypervisor errors because
+ * there really isn't much we can do if they fail at this point.
+ */
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+ lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
+ lp->id, hv_err);
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+ lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
+
+ /* Refetch the RX queue state as well, because we could be invoked
+ * here in the queue processing context.
+ */
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ printk(KERN_ERR PFX "ldc_abort: "
+ "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
+ lp->id, hv_err);
+
+ return -ECONNRESET;
+}
+
+static struct ldc_version *find_by_major(u16 major)
+{
+ struct ldc_version *ret = NULL;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
+ struct ldc_version *v = &ver_arr[i];
+ if (v->major <= major) {
+ ret = v;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ struct ldc_version *vap;
+ int err;
+
+ ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
+ vp->major, vp->minor);
+
+ if (lp->hs_state == LDC_HS_GOTVERS) {
+ lp->hs_state = LDC_HS_OPEN;
+ memset(&lp->ver, 0, sizeof(lp->ver));
+ }
+
+ vap = find_by_major(vp->major);
+ if (!vap) {
+ err = send_version_nack(lp, 0, 0);
+ } else if (vap->major != vp->major) {
+ err = send_version_nack(lp, vap->major, vap->minor);
+ } else {
+ struct ldc_version ver = *vp;
+ if (ver.minor > vap->minor)
+ ver.minor = vap->minor;
+ err = send_version_ack(lp, &ver);
+ if (!err) {
+ lp->ver = ver;
+ lp->hs_state = LDC_HS_GOTVERS;
+ }
+ }
+ if (err)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
+ vp->major, vp->minor);
+
+ if (lp->hs_state == LDC_HS_GOTVERS) {
+ if (lp->ver.major != vp->major ||
+ lp->ver.minor != vp->minor)
+ return ldc_abort(lp);
+ } else {
+ lp->ver = *vp;
+ lp->hs_state = LDC_HS_GOTVERS;
+ }
+ if (send_rts(lp))
+ return ldc_abort(lp);
+ return 0;
+}
+
+static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+ struct ldc_version *vap;
+
+ if ((vp->major == 0 && vp->minor == 0) ||
+ !(vap = find_by_major(vp->major))) {
+ return ldc_abort(lp);
+ } else {
+ struct ldc_packet *p;
+ unsigned long new_tail;
+
+ p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+ vap, sizeof(*vap),
+ &new_tail);
+ if (p)
+ return send_tx_packet(lp, p, new_tail);
+ else
+ return ldc_abort(lp);
+ }
+}
+
+static int process_version(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ struct ldc_version *vp;
+
+ vp = (struct ldc_version *) p->u.u_data;
+
+ switch (p->stype) {
+ case LDC_INFO:
+ return process_ver_info(lp, vp);
+
+ case LDC_ACK:
+ return process_ver_ack(lp, vp);
+
+ case LDC_NACK:
+ return process_ver_nack(lp, vp);
+
+ default:
+ return ldc_abort(lp);
+ }
+}
+
+static int process_rts(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
+ p->stype, p->seqid, p->env);
+
+ if (p->stype != LDC_INFO ||
+ lp->hs_state != LDC_HS_GOTVERS ||
+ p->env != lp->cfg.mode)
+ return ldc_abort(lp);
+
+ lp->snd_nxt = p->seqid;
+ lp->rcv_nxt = p->seqid;
+ lp->hs_state = LDC_HS_SENTRTR;
+ if (send_rtr(lp))
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int process_rtr(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
+ p->stype, p->seqid, p->env);
+
+ if (p->stype != LDC_INFO ||
+ p->env != lp->cfg.mode)
+ return ldc_abort(lp);
+
+ lp->snd_nxt = p->seqid;
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+ send_rdx(lp);
+
+ return LDC_EVENT_UP;
+}
+
+static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
+{
+ return lp->rcv_nxt + 1 == seqid;
+}
+
+static int process_rdx(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
+ p->stype, p->seqid, p->env, p->u.r.ackid);
+
+ if (p->stype != LDC_INFO ||
+ !(rx_seq_ok(lp, p->seqid)))
+ return ldc_abort(lp);
+
+ lp->rcv_nxt = p->seqid;
+
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ return LDC_EVENT_UP;
+}
+
+static int process_control_frame(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ switch (p->ctrl) {
+ case LDC_VERS:
+ return process_version(lp, p);
+
+ case LDC_RTS:
+ return process_rts(lp, p);
+
+ case LDC_RTR:
+ return process_rtr(lp, p);
+
+ case LDC_RDX:
+ return process_rdx(lp, p);
+
+ default:
+ return ldc_abort(lp);
+ }
+}
+
+static int process_error_frame(struct ldc_channel *lp,
+ struct ldc_packet *p)
+{
+ return ldc_abort(lp);
+}
+
+static int process_data_ack(struct ldc_channel *lp,
+ struct ldc_packet *ack)
+{
+ unsigned long head = lp->tx_acked;
+ u32 ackid = ack->u.r.ackid;
+
+ while (1) {
+ struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
+
+ head = tx_advance(lp, head);
+
+ if (p->seqid == ackid) {
+ lp->tx_acked = head;
+ return 0;
+ }
+ if (head == lp->tx_tail)
+ return ldc_abort(lp);
+ }
+
+ return 0;
+}
+
+static void send_events(struct ldc_channel *lp, unsigned int event_mask)
+{
+ if (event_mask & LDC_EVENT_RESET)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
+ if (event_mask & LDC_EVENT_UP)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
+ if (event_mask & LDC_EVENT_DATA_READY)
+ lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
+}
+
+static irqreturn_t ldc_rx(int irq, void *dev_id)
+{
+ struct ldc_channel *lp = dev_id;
+ unsigned long orig_state, hv_err, flags;
+ unsigned int event_mask;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ orig_state = lp->chan_state;
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+
+ ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+ orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
+
+ event_mask = 0;
+
+ if (lp->cfg.mode == LDC_MODE_RAW &&
+ lp->chan_state == LDC_CHANNEL_UP) {
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ event_mask |= LDC_EVENT_UP;
+
+ orig_state = lp->chan_state;
+ }
+
+ /* If we are in reset state, flush the RX queue and ignore
+ * everything.
+ */
+ if (lp->flags & LDC_FLAG_RESET) {
+ (void) __set_rx_head(lp, lp->rx_tail);
+ goto out;
+ }
+
+ /* Once we finish the handshake, we let the ldc_read()
+ * paths do all of the control frame and state management.
+ * Just trigger the callback.
+ */
+ if (lp->hs_state == LDC_HS_COMPLETE) {
+handshake_complete:
+ if (lp->chan_state != orig_state) {
+ unsigned int event = LDC_EVENT_RESET;
+
+ if (lp->chan_state == LDC_CHANNEL_UP)
+ event = LDC_EVENT_UP;
+
+ event_mask |= event;
+ }
+ if (lp->rx_head != lp->rx_tail)
+ event_mask |= LDC_EVENT_DATA_READY;
+
+ goto out;
+ }
+
+ if (lp->chan_state != orig_state)
+ goto out;
+
+ while (lp->rx_head != lp->rx_tail) {
+ struct ldc_packet *p;
+ unsigned long new;
+ int err;
+
+ p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+
+ switch (p->type) {
+ case LDC_CTRL:
+ err = process_control_frame(lp, p);
+ if (err > 0)
+ event_mask |= err;
+ break;
+
+ case LDC_DATA:
+ event_mask |= LDC_EVENT_DATA_READY;
+ err = 0;
+ break;
+
+ case LDC_ERR:
+ err = process_error_frame(lp, p);
+ break;
+
+ default:
+ err = ldc_abort(lp);
+ break;
+ }
+
+ if (err < 0)
+ break;
+
+ new = lp->rx_head;
+ new += LDC_PACKET_SIZE;
+ if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
+ new = 0;
+ lp->rx_head = new;
+
+ err = __set_rx_head(lp, new);
+ if (err < 0) {
+ (void) ldc_abort(lp);
+ break;
+ }
+ if (lp->hs_state == LDC_HS_COMPLETE)
+ goto handshake_complete;
+ }
+
+out:
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ send_events(lp, event_mask);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ldc_tx(int irq, void *dev_id)
+{
+ struct ldc_channel *lp = dev_id;
+ unsigned long flags, hv_err, orig_state;
+ unsigned int event_mask = 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ orig_state = lp->chan_state;
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+
+ ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+ orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
+
+ if (lp->cfg.mode == LDC_MODE_RAW &&
+ lp->chan_state == LDC_CHANNEL_UP) {
+ lp->hs_state = LDC_HS_COMPLETE;
+ ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+ event_mask |= LDC_EVENT_UP;
+ }
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ send_events(lp, event_mask);
+
+ return IRQ_HANDLED;
+}
+
+/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
+ * XXX that addition and removal from the ldc_channel_list has
+ * XXX atomicity, otherwise the __ldc_channel_exists() check is
+ * XXX totally pointless as another thread can slip into ldc_alloc()
+ * XXX and add a channel with the same ID. There also needs to be
+ * XXX a spinlock for ldc_channel_list.
+ */
+static HLIST_HEAD(ldc_channel_list);
+
+static int __ldc_channel_exists(unsigned long id)
+{
+ struct ldc_channel *lp;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
+ if (lp->id == id)
+ return 1;
+ }
+ return 0;
+}
+
+static int alloc_queue(const char *name, unsigned long num_entries,
+ struct ldc_packet **base, unsigned long *ra)
+{
+ unsigned long size, order;
+ void *q;
+
+ size = num_entries * LDC_PACKET_SIZE;
+ order = get_order(size);
+
+ q = (void *) __get_free_pages(GFP_KERNEL, order);
+ if (!q) {
+ printk(KERN_ERR PFX "Alloc of %s queue failed with "
+ "size=%lu order=%lu\n", name, size, order);
+ return -ENOMEM;
+ }
+
+ memset(q, 0, PAGE_SIZE << order);
+
+ *base = q;
+ *ra = __pa(q);
+
+ return 0;
+}
+
+static void free_queue(unsigned long num_entries, struct ldc_packet *q)
+{
+ unsigned long size, order;
+
+ if (!q)
+ return;
+
+ size = num_entries * LDC_PACKET_SIZE;
+ order = get_order(size);
+
+ free_pages((unsigned long)q, order);
+}
+
+/* XXX Make this configurable... XXX */
+#define LDC_IOTABLE_SIZE (8 * 1024)
+
+static int ldc_iommu_init(struct ldc_channel *lp)
+{
+ unsigned long sz, num_tsb_entries, tsbsize, order;
+ struct ldc_iommu *iommu = &lp->iommu;
+ struct ldc_mtable_entry *table;
+ unsigned long hv_err;
+ int err;
+
+ num_tsb_entries = LDC_IOTABLE_SIZE;
+ tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+
+ spin_lock_init(&iommu->lock);
+
+ sz = num_tsb_entries / 8;
+ sz = (sz + 7UL) & ~7UL;
+ iommu->arena.map = kzalloc(sz, GFP_KERNEL);
+ if (!iommu->arena.map) {
+ printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
+ return -ENOMEM;
+ }
+
+ iommu->arena.limit = num_tsb_entries;
+
+ order = get_order(tsbsize);
+
+ table = (struct ldc_mtable_entry *)
+ __get_free_pages(GFP_KERNEL, order);
+ err = -ENOMEM;
+ if (!table) {
+ printk(KERN_ERR PFX "Alloc of MTE table failed, "
+ "size=%lu order=%lu\n", tsbsize, order);
+ goto out_free_map;
+ }
+
+ memset(table, 0, PAGE_SIZE << order);
+
+ iommu->page_table = table;
+
+ hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
+ num_tsb_entries);
+ err = -EINVAL;
+ if (hv_err)
+ goto out_free_table;
+
+ return 0;
+
+out_free_table:
+ free_pages((unsigned long) table, order);
+ iommu->page_table = NULL;
+
+out_free_map:
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+
+ return err;
+}
+
+static void ldc_iommu_release(struct ldc_channel *lp)
+{
+ struct ldc_iommu *iommu = &lp->iommu;
+ unsigned long num_tsb_entries, tsbsize, order;
+
+ (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
+
+ num_tsb_entries = iommu->arena.limit;
+ tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+ order = get_order(tsbsize);
+
+ free_pages((unsigned long) iommu->page_table, order);
+ iommu->page_table = NULL;
+
+ kfree(iommu->arena.map);
+ iommu->arena.map = NULL;
+}
+
+struct ldc_channel *ldc_alloc(unsigned long id,
+ const struct ldc_channel_config *cfgp,
+ void *event_arg)
+{
+ struct ldc_channel *lp;
+ const struct ldc_mode_ops *mops;
+ unsigned long dummy1, dummy2, hv_err;
+ u8 mss, *mssbuf;
+ int err;
+
+ err = -ENODEV;
+ if (!ldom_domaining_enabled)
+ goto out_err;
+
+ err = -EINVAL;
+ if (!cfgp)
+ goto out_err;
+
+ switch (cfgp->mode) {
+ case LDC_MODE_RAW:
+ mops = &raw_ops;
+ mss = LDC_PACKET_SIZE;
+ break;
+
+ case LDC_MODE_UNRELIABLE:
+ mops = &nonraw_ops;
+ mss = LDC_PACKET_SIZE - 8;
+ break;
+
+ case LDC_MODE_STREAM:
+ mops = &stream_ops;
+ mss = LDC_PACKET_SIZE - 8 - 8;
+ break;
+
+ default:
+ goto out_err;
+ }
+
+ if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
+ goto out_err;
+
+ hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
+ err = -ENODEV;
+ if (hv_err == HV_ECHANNEL)
+ goto out_err;
+
+ err = -EEXIST;
+ if (__ldc_channel_exists(id))
+ goto out_err;
+
+ mssbuf = NULL;
+
+ lp = kzalloc(sizeof(*lp), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!lp)
+ goto out_err;
+
+ spin_lock_init(&lp->lock);
+
+ lp->id = id;
+
+ err = ldc_iommu_init(lp);
+ if (err)
+ goto out_free_ldc;
+
+ lp->mops = mops;
+ lp->mss = mss;
+
+ lp->cfg = *cfgp;
+ if (!lp->cfg.mtu)
+ lp->cfg.mtu = LDC_DEFAULT_MTU;
+
+ if (lp->cfg.mode == LDC_MODE_STREAM) {
+ mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
+ if (!mssbuf) {
+ err = -ENOMEM;
+ goto out_free_iommu;
+ }
+ lp->mssbuf = mssbuf;
+ }
+
+ lp->event_arg = event_arg;
+
+ /* XXX allow setting via ldc_channel_config to override defaults
+ * XXX or use some formula based upon mtu
+ */
+ lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+ lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+
+ err = alloc_queue("TX", lp->tx_num_entries,
+ &lp->tx_base, &lp->tx_ra);
+ if (err)
+ goto out_free_mssbuf;
+
+ err = alloc_queue("RX", lp->rx_num_entries,
+ &lp->rx_base, &lp->rx_ra);
+ if (err)
+ goto out_free_txq;
+
+ lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
+
+ lp->hs_state = LDC_HS_CLOSED;
+ ldc_set_state(lp, LDC_STATE_INIT);
+
+ INIT_HLIST_NODE(&lp->list);
+ hlist_add_head(&lp->list, &ldc_channel_list);
+
+ INIT_HLIST_HEAD(&lp->mh_list);
+
+ return lp;
+
+out_free_txq:
+ free_queue(lp->tx_num_entries, lp->tx_base);
+
+out_free_mssbuf:
+ if (mssbuf)
+ kfree(mssbuf);
+
+out_free_iommu:
+ ldc_iommu_release(lp);
+
+out_free_ldc:
+ kfree(lp);
+
+out_err:
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ldc_alloc);
+
+void ldc_free(struct ldc_channel *lp)
+{
+ if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
+ free_irq(lp->cfg.rx_irq, lp);
+ free_irq(lp->cfg.tx_irq, lp);
+ }
+
+ if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+ }
+ if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
+ free_queue(lp->tx_num_entries, lp->tx_base);
+ free_queue(lp->rx_num_entries, lp->rx_base);
+ lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
+ }
+
+ hlist_del(&lp->list);
+
+ if (lp->mssbuf)
+ kfree(lp->mssbuf);
+
+ ldc_iommu_release(lp);
+
+ kfree(lp);
+}
+EXPORT_SYMBOL(ldc_free);
+
+/* Bind the channel. This registers the LDC queues with
+ * the hypervisor and puts the channel into a pseudo-listening
+ * state. This does not initiate a handshake, ldc_connect() does
+ * that.
+ */
+int ldc_bind(struct ldc_channel *lp, const char *name)
+{
+ unsigned long hv_err, flags;
+ int err = -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (!name)
+ goto out_err;
+
+ if (lp->state != LDC_STATE_INIT)
+ goto out_err;
+
+ snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+ snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+ err = request_irq(lp->cfg.rx_irq, ldc_rx,
+ IRQF_SAMPLE_RANDOM | IRQF_SHARED,
+ lp->rx_irq_name, lp);
+ if (err)
+ goto out_err;
+
+ err = request_irq(lp->cfg.tx_irq, ldc_tx,
+ IRQF_SAMPLE_RANDOM | IRQF_SHARED,
+ lp->tx_irq_name, lp);
+ if (err)
+ goto out_free_rx_irq;
+
+
+ lp->flags |= LDC_FLAG_REGISTERED_IRQS;
+
+ err = -ENODEV;
+ hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_free_tx_irq;
+
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ goto out_free_tx_irq;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_unmap_tx;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ goto out_unmap_tx;
+
+ lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id,
+ &lp->tx_head,
+ &lp->tx_tail,
+ &lp->chan_state);
+ err = -EBUSY;
+ if (hv_err)
+ goto out_unmap_rx;
+
+ lp->tx_acked = lp->tx_head;
+
+ lp->hs_state = LDC_HS_OPEN;
+ ldc_set_state(lp, LDC_STATE_BOUND);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
+
+out_unmap_rx:
+ lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+
+out_unmap_tx:
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+
+out_free_tx_irq:
+ lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
+ free_irq(lp->cfg.tx_irq, lp);
+
+out_free_rx_irq:
+ free_irq(lp->cfg.rx_irq, lp);
+
+out_err:
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_bind);
+
+int ldc_connect(struct ldc_channel *lp)
+{
+ unsigned long flags;
+ int err;
+
+ if (lp->cfg.mode == LDC_MODE_RAW)
+ return -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+ !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
+ lp->hs_state != LDC_HS_OPEN)
+ err = -EINVAL;
+ else
+ err = start_handshake(lp);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_connect);
+
+int ldc_disconnect(struct ldc_channel *lp)
+{
+ unsigned long hv_err, flags;
+ int err;
+
+ if (lp->cfg.mode == LDC_MODE_RAW)
+ return -EINVAL;
+
+ if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+ !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
+ return -EINVAL;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ err = -ENODEV;
+ hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ if (hv_err)
+ goto out_err;
+
+ hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+ if (hv_err)
+ goto out_err;
+
+ ldc_set_state(lp, LDC_STATE_BOUND);
+ lp->hs_state = LDC_HS_OPEN;
+ lp->flags |= LDC_FLAG_RESET;
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
+
+out_err:
+ sun4v_ldc_tx_qconf(lp->id, 0, 0);
+ sun4v_ldc_rx_qconf(lp->id, 0, 0);
+ free_irq(lp->cfg.tx_irq, lp);
+ free_irq(lp->cfg.rx_irq, lp);
+ lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
+ LDC_FLAG_REGISTERED_QUEUES);
+ ldc_set_state(lp, LDC_STATE_INIT);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_disconnect);
+
+int ldc_state(struct ldc_channel *lp)
+{
+ return lp->state;
+}
+EXPORT_SYMBOL(ldc_state);
+
+static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+ struct ldc_packet *p;
+ unsigned long new_tail;
+ int err;
+
+ if (size > LDC_PACKET_SIZE)
+ return -EMSGSIZE;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (!p)
+ return -EAGAIN;
+
+ memcpy(p, buf, size);
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ err = size;
+
+ return err;
+}
+
+static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ struct ldc_packet *p;
+ unsigned long hv_err, new;
+ int err;
+
+ if (size < LDC_PACKET_SIZE)
+ return -EINVAL;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (lp->rx_head == lp->rx_tail)
+ return 0;
+
+ p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+ memcpy(buf, p, LDC_PACKET_SIZE);
+
+ new = rx_advance(lp, lp->rx_head);
+ lp->rx_head = new;
+
+ err = __set_rx_head(lp, new);
+ if (err < 0)
+ err = -ECONNRESET;
+ else
+ err = LDC_PACKET_SIZE;
+
+ return err;
+}
+
+static const struct ldc_mode_ops raw_ops = {
+ .write = write_raw,
+ .read = read_raw,
+};
+
+static int write_nonraw(struct ldc_channel *lp, const void *buf,
+ unsigned int size)
+{
+ unsigned long hv_err, tail;
+ unsigned int copied;
+ u32 seq;
+ int err;
+
+ hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
+ &lp->chan_state);
+ if (unlikely(hv_err))
+ return -EBUSY;
+
+ if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
+ return ldc_abort(lp);
+
+ if (!tx_has_space_for(lp, size))
+ return -EAGAIN;
+
+ seq = lp->snd_nxt;
+ copied = 0;
+ tail = lp->tx_tail;
+ while (copied < size) {
+ struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
+ u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
+ p->u.u_data :
+ p->u.r.r_data);
+ int data_len;
+
+ p->type = LDC_DATA;
+ p->stype = LDC_INFO;
+ p->ctrl = 0;
+
+ data_len = size - copied;
+ if (data_len > lp->mss)
+ data_len = lp->mss;
+
+ BUG_ON(data_len > LDC_LEN);
+
+ p->env = (data_len |
+ (copied == 0 ? LDC_START : 0) |
+ (data_len == size - copied ? LDC_STOP : 0));
+
+ p->seqid = ++seq;
+
+ ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
+ p->type,
+ p->stype,
+ p->ctrl,
+ p->env,
+ p->seqid);
+
+ memcpy(data, buf, data_len);
+ buf += data_len;
+ copied += data_len;
+
+ tail = tx_advance(lp, tail);
+ }
+
+ err = set_tx_tail(lp, tail);
+ if (!err) {
+ lp->snd_nxt = seq;
+ err = size;
+ }
+
+ return err;
+}
+
+static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
+ struct ldc_packet *first_frag)
+{
+ int err;
+
+ if (first_frag)
+ lp->rcv_nxt = first_frag->seqid - 1;
+
+ err = send_data_nack(lp, p);
+ if (err)
+ return err;
+
+ err = __set_rx_head(lp, lp->rx_tail);
+ if (err < 0)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
+{
+ if (p->stype & LDC_ACK) {
+ int err = process_data_ack(lp, p);
+ if (err)
+ return err;
+ }
+ if (p->stype & LDC_NACK)
+ return ldc_abort(lp);
+
+ return 0;
+}
+
+static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
+{
+ unsigned long dummy;
+ int limit = 1000;
+
+ ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
+ cur_head, lp->rx_head, lp->rx_tail);
+ while (limit-- > 0) {
+ unsigned long hv_err;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &dummy,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (cur_head != lp->rx_tail) {
+ ldcdbg(DATA, "DATA WAIT DONE "
+ "head[%lx] tail[%lx] chan_state[%lx]\n",
+ dummy, lp->rx_tail, lp->chan_state);
+ return 0;
+ }
+
+ udelay(1);
+ }
+ return -EAGAIN;
+}
+
+static int rx_set_head(struct ldc_channel *lp, unsigned long head)
+{
+ int err = __set_rx_head(lp, head);
+
+ if (err < 0)
+ return ldc_abort(lp);
+
+ lp->rx_head = head;
+ return 0;
+}
+
+static void send_data_ack(struct ldc_channel *lp)
+{
+ unsigned long new_tail;
+ struct ldc_packet *p;
+
+ p = data_get_tx_packet(lp, &new_tail);
+ if (likely(p)) {
+ int err;
+
+ memset(p, 0, sizeof(*p));
+ p->type = LDC_DATA;
+ p->stype = LDC_ACK;
+ p->ctrl = 0;
+ p->seqid = lp->snd_nxt + 1;
+ p->u.r.ackid = lp->rcv_nxt;
+
+ err = send_tx_packet(lp, p, new_tail);
+ if (!err)
+ lp->snd_nxt++;
+ }
+}
+
+static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ struct ldc_packet *first_frag;
+ unsigned long hv_err, new;
+ int err, copied;
+
+ hv_err = sun4v_ldc_rx_get_state(lp->id,
+ &lp->rx_head,
+ &lp->rx_tail,
+ &lp->chan_state);
+ if (hv_err)
+ return ldc_abort(lp);
+
+ if (lp->chan_state == LDC_CHANNEL_DOWN ||
+ lp->chan_state == LDC_CHANNEL_RESETTING)
+ return -ECONNRESET;
+
+ if (lp->rx_head == lp->rx_tail)
+ return 0;
+
+ first_frag = NULL;
+ copied = err = 0;
+ new = lp->rx_head;
+ while (1) {
+ struct ldc_packet *p;
+ int pkt_len;
+
+ BUG_ON(new == lp->rx_tail);
+ p = lp->rx_base + (new / LDC_PACKET_SIZE);
+
+ ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
+ "rcv_nxt[%08x]\n",
+ p->type,
+ p->stype,
+ p->ctrl,
+ p->env,
+ p->seqid,
+ p->u.r.ackid,
+ lp->rcv_nxt);
+
+ if (unlikely(!rx_seq_ok(lp, p->seqid))) {
+ err = rx_bad_seq(lp, p, first_frag);
+ copied = 0;
+ break;
+ }
+
+ if (p->type & LDC_CTRL) {
+ err = process_control_frame(lp, p);
+ if (err < 0)
+ break;
+ err = 0;
+ }
+
+ lp->rcv_nxt = p->seqid;
+
+ if (!(p->type & LDC_DATA)) {
+ new = rx_advance(lp, new);
+ goto no_data;
+ }
+ if (p->stype & (LDC_ACK | LDC_NACK)) {
+ err = data_ack_nack(lp, p);
+ if (err)
+ break;
+ }
+ if (!(p->stype & LDC_INFO)) {
+ new = rx_advance(lp, new);
+ err = rx_set_head(lp, new);
+ if (err)
+ break;
+ goto no_data;
+ }
+
+ pkt_len = p->env & LDC_LEN;
+
+ /* Every initial packet starts with the START bit set.
+ *
+ * Singleton packets will have both START+STOP set.
+ *
+ * Fragments will have START set in the first frame, STOP
+ * set in the last frame, and neither bit set in middle
+ * frames of the packet.
+ *
+ * Therefore if we are at the beginning of a packet and
+ * we don't see START, or we are in the middle of a fragmented
+ * packet and do see START, we are unsynchronized and should
+ * flush the RX queue.
+ */
+ if ((first_frag == NULL && !(p->env & LDC_START)) ||
+ (first_frag != NULL && (p->env & LDC_START))) {
+ if (!first_frag)
+ new = rx_advance(lp, new);
+
+ err = rx_set_head(lp, new);
+ if (err)
+ break;
+
+ if (!first_frag)
+ goto no_data;
+ }
+ if (!first_frag)
+ first_frag = p;
+
+ if (pkt_len > size - copied) {
+ /* User didn't give us a big enough buffer,
+ * what to do? This is a pretty serious error.
+ *
+ * Since we haven't updated the RX ring head to
+ * consume any of the packets, signal the error
+ * to the user and just leave the RX ring alone.
+ *
+ * This seems the best behavior because this allows
+ * a user of the LDC layer to start with a small
+ * RX buffer for ldc_read() calls and use -EMSGSIZE
+ * as a cue to enlarge it's read buffer.
+ */
+ err = -EMSGSIZE;
+ break;
+ }
+
+ /* Ok, we are gonna eat this one. */
+ new = rx_advance(lp, new);
+
+ memcpy(buf,
+ (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
+ p->u.u_data : p->u.r.r_data), pkt_len);
+ buf += pkt_len;
+ copied += pkt_len;
+
+ if (p->env & LDC_STOP)
+ break;
+
+no_data:
+ if (new == lp->rx_tail) {
+ err = rx_data_wait(lp, new);
+ if (err)
+ break;
+ }
+ }
+
+ if (!err)
+ err = rx_set_head(lp, new);
+
+ if (err && first_frag)
+ lp->rcv_nxt = first_frag->seqid - 1;
+
+ if (!err) {
+ err = copied;
+ if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
+ send_data_ack(lp);
+ }
+
+ return err;
+}
+
+static const struct ldc_mode_ops nonraw_ops = {
+ .write = write_nonraw,
+ .read = read_nonraw,
+};
+
+static int write_stream(struct ldc_channel *lp, const void *buf,
+ unsigned int size)
+{
+ if (size > lp->cfg.mtu)
+ size = lp->cfg.mtu;
+ return write_nonraw(lp, buf, size);
+}
+
+static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ if (!lp->mssbuf_len) {
+ int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
+ if (err < 0)
+ return err;
+
+ lp->mssbuf_len = err;
+ lp->mssbuf_off = 0;
+ }
+
+ if (size > lp->mssbuf_len)
+ size = lp->mssbuf_len;
+ memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
+
+ lp->mssbuf_off += size;
+ lp->mssbuf_len -= size;
+
+ return size;
+}
+
+static const struct ldc_mode_ops stream_ops = {
+ .write = write_stream,
+ .read = read_stream,
+};
+
+int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+ unsigned long flags;
+ int err;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!size)
+ return 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (lp->hs_state != LDC_HS_COMPLETE)
+ err = -ENOTCONN;
+ else
+ err = lp->mops->write(lp, buf, size);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_write);
+
+int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+ unsigned long flags;
+ int err;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!size)
+ return 0;
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ if (lp->hs_state != LDC_HS_COMPLETE)
+ err = -ENOTCONN;
+ else
+ err = lp->mops->read(lp, buf, size);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return err;
+}
+EXPORT_SYMBOL(ldc_read);
+
+static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long n, i, start, end, limit;
+ int pass;
+
+ limit = arena->limit;
+ start = arena->hint;
+ pass = 0;
+
+again:
+ n = find_next_zero_bit(arena->map, limit, start);
+ end = n + npages;
+ if (unlikely(end >= limit)) {
+ if (likely(pass < 1)) {
+ limit = start;
+ start = 0;
+ pass++;
+ goto again;
+ } else {
+ /* Scanned the whole thing, give up. */
+ return -1;
+ }
+ }
+
+ for (i = n; i < end; i++) {
+ if (test_bit(i, arena->map)) {
+ start = i + 1;
+ goto again;
+ }
+ }
+
+ for (i = n; i < end; i++)
+ __set_bit(i, arena->map);
+
+ arena->hint = end;
+
+ return n;
+}
+
+#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
+#define COOKIE_PGSZ_CODE_SHIFT 60ULL
+
+static u64 pagesize_code(void)
+{
+ switch (PAGE_SIZE) {
+ default:
+ case (8ULL * 1024ULL):
+ return 0;
+ case (64ULL * 1024ULL):
+ return 1;
+ case (512ULL * 1024ULL):
+ return 2;
+ case (4ULL * 1024ULL * 1024ULL):
+ return 3;
+ case (32ULL * 1024ULL * 1024ULL):
+ return 4;
+ case (256ULL * 1024ULL * 1024ULL):
+ return 5;
+ }
+}
+
+static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
+{
+ return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
+ (index << PAGE_SHIFT) |
+ page_offset);
+}
+
+static u64 cookie_to_index(u64 cookie, unsigned long *shift)
+{
+ u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
+
+ cookie &= ~COOKIE_PGSZ_CODE;
+
+ *shift = szcode * 3;
+
+ return (cookie >> (13ULL + (szcode * 3ULL)));
+}
+
+static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
+ unsigned long npages)
+{
+ long entry;
+
+ entry = arena_alloc(iommu, npages);
+ if (unlikely(entry < 0))
+ return NULL;
+
+ return iommu->page_table + entry;
+}
+
+static u64 perm_to_mte(unsigned int map_perm)
+{
+ u64 mte_base;
+
+ mte_base = pagesize_code();
+
+ if (map_perm & LDC_MAP_SHADOW) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_COPY_R;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_COPY_W;
+ }
+ if (map_perm & LDC_MAP_DIRECT) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_READ;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_WRITE;
+ if (map_perm & LDC_MAP_X)
+ mte_base |= LDC_MTE_EXEC;
+ }
+ if (map_perm & LDC_MAP_IO) {
+ if (map_perm & LDC_MAP_R)
+ mte_base |= LDC_MTE_IOMMU_R;
+ if (map_perm & LDC_MAP_W)
+ mte_base |= LDC_MTE_IOMMU_W;
+ }
+
+ return mte_base;
+}
+
+static int pages_in_region(unsigned long base, long len)
+{
+ int count = 0;
+
+ do {
+ unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
+
+ len -= (new - base);
+ base = new;
+ count++;
+ } while (len > 0);
+
+ return count;
+}
+
+struct cookie_state {
+ struct ldc_mtable_entry *page_table;
+ struct ldc_trans_cookie *cookies;
+ u64 mte_base;
+ u64 prev_cookie;
+ u32 pte_idx;
+ u32 nc;
+};
+
+static void fill_cookies(struct cookie_state *sp, unsigned long pa,
+ unsigned long off, unsigned long len)
+{
+ do {
+ unsigned long tlen, new = pa + PAGE_SIZE;
+ u64 this_cookie;
+
+ sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
+
+ tlen = PAGE_SIZE;
+ if (off)
+ tlen = PAGE_SIZE - off;
+ if (tlen > len)
+ tlen = len;
+
+ this_cookie = make_cookie(sp->pte_idx,
+ pagesize_code(), off);
+
+ off = 0;
+
+ if (this_cookie == sp->prev_cookie) {
+ sp->cookies[sp->nc - 1].cookie_size += tlen;
+ } else {
+ sp->cookies[sp->nc].cookie_addr = this_cookie;
+ sp->cookies[sp->nc].cookie_size = tlen;
+ sp->nc++;
+ }
+ sp->prev_cookie = this_cookie + tlen;
+
+ sp->pte_idx++;
+
+ len -= tlen;
+ pa = new;
+ } while (len > 0);
+}
+
+static int sg_count_one(struct scatterlist *sg)
+{
+ unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
+ long len = sg->length;
+
+ if ((sg->offset | len) & (8UL - 1))
+ return -EFAULT;
+
+ return pages_in_region(base + sg->offset, len);
+}
+
+static int sg_count_pages(struct scatterlist *sg, int num_sg)
+{
+ int count;
+ int i;
+
+ count = 0;
+ for (i = 0; i < num_sg; i++) {
+ int err = sg_count_one(sg + i);
+ if (err < 0)
+ return err;
+ count += err;
+ }
+
+ return count;
+}
+
+int ldc_map_sg(struct ldc_channel *lp,
+ struct scatterlist *sg, int num_sg,
+ struct ldc_trans_cookie *cookies, int ncookies,
+ unsigned int map_perm)
+{
+ unsigned long i, npages, flags;
+ struct ldc_mtable_entry *base;
+ struct cookie_state state;
+ struct ldc_iommu *iommu;
+ int err;
+
+ if (map_perm & ~LDC_MAP_ALL)
+ return -EINVAL;
+
+ err = sg_count_pages(sg, num_sg);
+ if (err < 0)
+ return err;
+
+ npages = err;
+ if (err > ncookies)
+ return -EMSGSIZE;
+
+ iommu = &lp->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(iommu, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (!base)
+ return -ENOMEM;
+
+ state.page_table = iommu->page_table;
+ state.cookies = cookies;
+ state.mte_base = perm_to_mte(map_perm);
+ state.prev_cookie = ~(u64)0;
+ state.pte_idx = (base - iommu->page_table);
+ state.nc = 0;
+
+ for (i = 0; i < num_sg; i++)
+ fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT,
+ sg[i].offset, sg[i].length);
+
+ return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_sg);
+
+int ldc_map_single(struct ldc_channel *lp,
+ void *buf, unsigned int len,
+ struct ldc_trans_cookie *cookies, int ncookies,
+ unsigned int map_perm)
+{
+ unsigned long npages, pa, flags;
+ struct ldc_mtable_entry *base;
+ struct cookie_state state;
+ struct ldc_iommu *iommu;
+
+ if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
+ return -EINVAL;
+
+ pa = __pa(buf);
+ if ((pa | len) & (8UL - 1))
+ return -EFAULT;
+
+ npages = pages_in_region(pa, len);
+
+ iommu = &lp->iommu;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ base = alloc_npages(iommu, npages);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (!base)
+ return -ENOMEM;
+
+ state.page_table = iommu->page_table;
+ state.cookies = cookies;
+ state.mte_base = perm_to_mte(map_perm);
+ state.prev_cookie = ~(u64)0;
+ state.pte_idx = (base - iommu->page_table);
+ state.nc = 0;
+ fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
+ BUG_ON(state.nc != 1);
+
+ return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_single);
+
+static void free_npages(unsigned long id, struct ldc_iommu *iommu,
+ u64 cookie, u64 size)
+{
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long i, shift, index, npages;
+ struct ldc_mtable_entry *base;
+
+ npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
+ index = cookie_to_index(cookie, &shift);
+ base = iommu->page_table + index;
+
+ BUG_ON(index > arena->limit ||
+ (index + npages) > arena->limit);
+
+ for (i = 0; i < npages; i++) {
+ if (base->cookie)
+ sun4v_ldc_revoke(id, cookie + (i << shift),
+ base->cookie);
+ base->mte = 0;
+ __clear_bit(index + i, arena->map);
+ }
+}
+
+void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
+ int ncookies)
+{
+ struct ldc_iommu *iommu = &lp->iommu;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ for (i = 0; i < ncookies; i++) {
+ u64 addr = cookies[i].cookie_addr;
+ u64 size = cookies[i].cookie_size;
+
+ free_npages(lp->id, iommu, addr, size);
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+EXPORT_SYMBOL(ldc_unmap);
+
+int ldc_copy(struct ldc_channel *lp, int copy_dir,
+ void *buf, unsigned int len, unsigned long offset,
+ struct ldc_trans_cookie *cookies, int ncookies)
+{
+ unsigned int orig_len;
+ unsigned long ra;
+ int i;
+
+ if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
+ lp->id, copy_dir);
+ return -EINVAL;
+ }
+
+ ra = __pa(buf);
+ if ((ra | len | offset) & (8UL - 1)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
+ "ra[%lx] len[%x] offset[%lx]\n",
+ lp->id, ra, len, offset);
+ return -EFAULT;
+ }
+
+ if (lp->hs_state != LDC_HS_COMPLETE ||
+ (lp->flags & LDC_FLAG_RESET)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
+ "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
+ return -ECONNRESET;
+ }
+
+ orig_len = len;
+ for (i = 0; i < ncookies; i++) {
+ unsigned long cookie_raddr = cookies[i].cookie_addr;
+ unsigned long this_len = cookies[i].cookie_size;
+ unsigned long actual_len;
+
+ if (unlikely(offset)) {
+ unsigned long this_off = offset;
+
+ if (this_off > this_len)
+ this_off = this_len;
+
+ offset -= this_off;
+ this_len -= this_off;
+ if (!this_len)
+ continue;
+ cookie_raddr += this_off;
+ }
+
+ if (this_len > len)
+ this_len = len;
+
+ while (1) {
+ unsigned long hv_err;
+
+ hv_err = sun4v_ldc_copy(lp->id, copy_dir,
+ cookie_raddr, ra,
+ this_len, &actual_len);
+ if (unlikely(hv_err)) {
+ printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
+ "HV error %lu\n",
+ lp->id, hv_err);
+ if (lp->hs_state != LDC_HS_COMPLETE ||
+ (lp->flags & LDC_FLAG_RESET))
+ return -ECONNRESET;
+ else
+ return -EFAULT;
+ }
+
+ cookie_raddr += actual_len;
+ ra += actual_len;
+ len -= actual_len;
+ if (actual_len == this_len)
+ break;
+
+ this_len -= actual_len;
+ }
+
+ if (!len)
+ break;
+ }
+
+ /* It is caller policy what to do about short copies.
+ * For example, a networking driver can declare the
+ * packet a runt and drop it.
+ */
+
+ return orig_len - len;
+}
+EXPORT_SYMBOL(ldc_copy);
+
+void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
+ struct ldc_trans_cookie *cookies, int *ncookies,
+ unsigned int map_perm)
+{
+ void *buf;
+ int err;
+
+ if (len & (8UL - 1))
+ return ERR_PTR(-EINVAL);
+
+ buf = kzalloc(len, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
+ if (err < 0) {
+ kfree(buf);
+ return ERR_PTR(err);
+ }
+ *ncookies = err;
+
+ return buf;
+}
+EXPORT_SYMBOL(ldc_alloc_exp_dring);
+
+void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
+ struct ldc_trans_cookie *cookies, int ncookies)
+{
+ ldc_unmap(lp, cookies, ncookies);
+ kfree(buf);
+}
+EXPORT_SYMBOL(ldc_free_exp_dring);
+
+static int __init ldc_init(void)
+{
+ unsigned long major, minor;
+ struct mdesc_handle *hp;
+ const u64 *v;
+ u64 mp;
+
+ hp = mdesc_grab();
+ if (!hp)
+ return -ENODEV;
+
+ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+ if (mp == MDESC_NODE_NULL)
+ return -ENODEV;
+
+ v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
+ if (!v)
+ return -ENODEV;
+
+ major = 1;
+ minor = 0;
+ if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
+ printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
+ return -ENODEV;
+ }
+
+ printk(KERN_INFO "%s", version);
+
+ if (!*v) {
+ printk(KERN_INFO PFX "Domaining disabled.\n");
+ return -ENODEV;
+ }
+ ldom_domaining_enabled = 1;
+
+ return 0;
+}
+
+core_initcall(ldc_init);
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/chmc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/chmc.c
@@ -1,7 +1,6 @@
-/* $Id: chmc.c,v 1.4 2002/01/08 16:00:14 davem Exp $
- * memctrlr.c: Driver for UltraSPARC-III memory controller.
+/* memctrlr.c: Driver for UltraSPARC-III memory controller.
*
- * Copyright (C) 2001 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2001, 2007 David S. Miller (davem@davemloft.net)
*/
#include
@@ -16,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -242,8 +242,11 @@
*/
static u64 read_mcreg(struct mctrl_info *mp, unsigned long offset)
{
- unsigned long ret;
- int this_cpu = get_cpu();
+ unsigned long ret, this_cpu;
+
+ preempt_disable();
+
+ this_cpu = real_hard_smp_processor_id();
if (mp->portid == this_cpu) {
__asm__ __volatile__("ldxa [%1] %2, %0"
@@ -255,7 +258,8 @@
: "r" (mp->regs + offset),
"i" (ASI_PHYS_BYPASS_EC_E));
}
- put_cpu();
+
+ preempt_enable();
return ret;
}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/pci_sun4v.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/pci_sun4v.c
@@ -928,13 +928,13 @@
if (msi_num < 0)
return msi_num;
- devino = sun4v_build_msi(pbm->devhandle, virt_irq_p,
- pbm->msiq_first_devino,
- (pbm->msiq_first_devino +
- pbm->msiq_num));
- err = -ENOMEM;
- if (!devino)
+ err = sun4v_build_msi(pbm->devhandle, virt_irq_p,
+ pbm->msiq_first_devino,
+ (pbm->msiq_first_devino +
+ pbm->msiq_num));
+ if (err < 0)
goto out_err;
+ devino = err;
msiqid = ((devino - pbm->msiq_first_devino) +
pbm->msiq_first);
@@ -959,7 +959,7 @@
if (pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_VALID))
goto out_err;
- pdev->dev.archdata.msi_num = msi_num;
+ sparc64_set_msi(*virt_irq_p, msi_num);
if (entry->msi_attrib.is_64) {
msg.address_hi = pbm->msi64_start >> 32;
@@ -981,8 +981,6 @@
out_err:
free_msi(pbm, msi_num);
- sun4v_destroy_msi(*virt_irq_p);
- *virt_irq_p = 0;
return err;
}
@@ -994,7 +992,7 @@
unsigned long msiqid, err;
unsigned int msi_num;
- msi_num = pdev->dev.archdata.msi_num;
+ msi_num = sparc64_get_msi(virt_irq);
err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi_num, &msiqid);
if (err) {
printk(KERN_ERR "%s: getmsiq gives error %lu\n",
@@ -1129,7 +1127,7 @@
}
#endif /* !(CONFIG_PCI_MSI) */
-static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle)
+static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle)
{
struct pci_pbm_info *pbm;
@@ -1163,7 +1161,7 @@
pci_sun4v_msi_init(pbm);
}
-void sun4v_pci_init(struct device_node *dp, char *model_name)
+void __init sun4v_pci_init(struct device_node *dp, char *model_name)
{
static int hvapi_negotiated = 0;
struct pci_controller_info *p;
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/entry.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/entry.S
@@ -2593,3 +2593,15 @@
retl
nop
.size sun4v_mmustat_info, .-sun4v_mmustat_info
+
+ .globl sun4v_mmu_demap_all
+ .type sun4v_mmu_demap_all,#function
+sun4v_mmu_demap_all:
+ clr %o0
+ clr %o1
+ mov HV_MMU_ALL, %o2
+ mov HV_FAST_MMU_DEMAP_ALL, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ .size sun4v_mmu_demap_all, .-sun4v_mmu_demap_all
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/kernel/pci_common.c
+++ linux-source-2.6.22-2.6.22/arch/sparc64/kernel/pci_common.c
@@ -44,6 +44,67 @@
return (void *) (pbm->config_space | bus | devfn | reg);
}
+/* At least on Sabre, it is necessary to access all PCI host controller
+ * registers at their natural size, otherwise zeros are returned.
+ * Strange but true, and I see no language in the UltraSPARC-IIi
+ * programmer's manual that mentions this even indirectly.
+ */
+static int sun4u_read_pci_cfg_host(struct pci_pbm_info *pbm,
+ unsigned char bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ u32 tmp32, *addr;
+ u16 tmp16;
+ u8 tmp8;
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ if (where < 8) {
+ unsigned long align = (unsigned long) addr;
+
+ align &= ~1;
+ pci_config_read16((u16 *)align, &tmp16);
+ if (where & 1)
+ *value = tmp16 >> 8;
+ else
+ *value = tmp16 & 0xff;
+ } else {
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ }
+ break;
+
+ case 2:
+ if (where < 8) {
+ pci_config_read16((u16 *)addr, &tmp16);
+ *value = (u32) tmp16;
+ } else {
+ pci_config_read8((u8 *)addr, &tmp8);
+ *value = (u32) tmp8;
+ pci_config_read8(((u8 *)addr) + 1, &tmp8);
+ *value |= ((u32) tmp8) << 8;
+ }
+ break;
+
+ case 4:
+ tmp32 = 0xffffffff;
+ sun4u_read_pci_cfg_host(pbm, bus, devfn,
+ where, 2, &tmp32);
+ *value = tmp32;
+
+ tmp32 = 0xffffffff;
+ sun4u_read_pci_cfg_host(pbm, bus, devfn,
+ where + 2, 2, &tmp32);
+ *value |= tmp32 << 16;
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
static int sun4u_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
int where, int size, u32 *value)
{
@@ -53,10 +114,6 @@
u16 tmp16;
u8 tmp8;
- if (bus_dev == pbm->pci_bus && devfn == 0x00)
- return pci_host_bridge_read_pci_cfg(bus_dev, devfn, where,
- size, value);
-
switch (size) {
case 1:
*value = 0xff;
@@ -69,6 +126,10 @@
break;
}
+ if (!bus_dev->number && !PCI_SLOT(devfn))
+ return sun4u_read_pci_cfg_host(pbm, bus, devfn, where,
+ size, value);
+
addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
if (!addr)
return PCIBIOS_SUCCESSFUL;
@@ -101,6 +162,53 @@
return PCIBIOS_SUCCESSFUL;
}
+static int sun4u_write_pci_cfg_host(struct pci_pbm_info *pbm,
+ unsigned char bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ u32 *addr;
+
+ addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+ if (!addr)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (size) {
+ case 1:
+ if (where < 8) {
+ unsigned long align = (unsigned long) addr;
+ u16 tmp16;
+
+ align &= ~1;
+ pci_config_read16((u16 *)align, &tmp16);
+ if (where & 1) {
+ tmp16 &= 0x00ff;
+ tmp16 |= value << 8;
+ } else {
+ tmp16 &= 0xff00;
+ tmp16 |= value;
+ }
+ pci_config_write16((u16 *)align, tmp16);
+ } else
+ pci_config_write8((u8 *)addr, value);
+ break;
+ case 2:
+ if (where < 8) {
+ pci_config_write16((u16 *)addr, value);
+ } else {
+ pci_config_write8((u8 *)addr, value & 0xff);
+ pci_config_write8(((u8 *)addr) + 1, value >> 8);
+ }
+ break;
+ case 4:
+ sun4u_write_pci_cfg_host(pbm, bus, devfn,
+ where, 2, value & 0xffff);
+ sun4u_write_pci_cfg_host(pbm, bus, devfn,
+ where + 2, 2, value >> 16);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
static int sun4u_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
int where, int size, u32 value)
{
@@ -108,9 +216,10 @@
unsigned char bus = bus_dev->number;
u32 *addr;
- if (bus_dev == pbm->pci_bus && devfn == 0x00)
- return pci_host_bridge_write_pci_cfg(bus_dev, devfn, where,
- size, value);
+ if (!bus_dev->number && !PCI_SLOT(devfn))
+ return sun4u_write_pci_cfg_host(pbm, bus, devfn, where,
+ size, value);
+
addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
if (!addr)
return PCIBIOS_SUCCESSFUL;
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/Kconfig
+++ linux-source-2.6.22-2.6.22/arch/sparc64/Kconfig
@@ -108,6 +108,15 @@
source kernel/Kconfig.hz
+config HOTPLUG_CPU
+ bool "Support for hot-pluggable CPUs"
+ depends on SMP
+ select HOTPLUG
+ ---help---
+ Say Y here to experiment with turning CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu/cpu#.
+ Say N if you want to disable CPU hotplug.
+
source "init/Kconfig"
config SYSVIPC_COMPAT
@@ -305,6 +314,12 @@
bool
default y
+config SUN_LDOMS
+ bool "Sun Logical Domains support"
+ help
+ Say Y here is you want to support virtual devices via
+ Logical Domains.
+
config PCI
bool "PCI support"
select ARCH_SUPPORTS_MSI
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NGcopy_to_user.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NGcopy_to_user.S
@@ -1,6 +1,6 @@
/* NGcopy_to_user.S: Niagara optimized copy to userspace.
*
- * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/
#define EX_ST(x) \
@@ -8,8 +8,8 @@
.section .fixup; \
.align 4; \
99: wr %g0, ASI_AIUS, %asi;\
- retl; \
- mov 1, %o0; \
+ ret; \
+ restore %g0, 1, %o0; \
.section __ex_table,"a";\
.align 4; \
.word 98b, 99b; \
@@ -23,7 +23,7 @@
#define FUNC_NAME NGcopy_to_user
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
-#define EX_RETVAL(x) 0
+#define EX_RETVAL(x) %g0
#ifdef __KERNEL__
/* Writing to %asi is _expensive_ so we hardcode it.
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/GENpatch.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/GENpatch.S
@@ -0,0 +1,33 @@
+/* GENpatch.S: Patch Ultra-I routines with generic variant.
+ *
+ * Copyright (C) 2007 David S. Miller
+ */
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define GEN_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl generic_patch_copyops
+ .type generic_patch_copyops,#function
+generic_patch_copyops:
+ GEN_DO_PATCH(memcpy, GENmemcpy)
+ GEN_DO_PATCH(___copy_from_user, GENcopy_from_user)
+ GEN_DO_PATCH(___copy_to_user, GENcopy_to_user)
+ retl
+ nop
+ .size generic_patch_copyops,.-generic_patch_copyops
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NG2copy_to_user.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NG2copy_to_user.S
@@ -0,0 +1,49 @@
+/* NG2copy_to_user.S: Niagara-2 optimized copy to userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x) \
+98: x; \
+ .section .fixup; \
+ .align 4; \
+99: wr %g0, ASI_AIUS, %asi;\
+ retl; \
+ mov 1, %o0; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#ifndef ASI_BLK_AIUS_4V
+#define ASI_BLK_AIUS_4V 0x17
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME NG2copy_to_user
+#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
+#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
+#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS_4V
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+ /* Writing to %asi is _expensive_ so we hardcode it.
+ * Reading %asi to check for KERNEL_DS is comparatively
+ * cheap.
+ */
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, memcpy_user_stub; \
+ nop
+#endif
+
+#include "NG2memcpy.S"
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/GENcopy_from_user.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/GENcopy_from_user.S
@@ -0,0 +1,34 @@
+/* GENcopy_from_user.S: Generic sparc64 copy from userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x) \
+98: x; \
+ .section .fixup; \
+ .align 4; \
+99: retl; \
+ mov 1, %o0; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#define FUNC_NAME GENcopy_from_user
+#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, memcpy_user_stub; \
+ nop
+#endif
+
+#include "GENmemcpy.S"
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/Makefile
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.25 2000/12/14 22:57:25 davem Exp $
+#
# Makefile for Sparc64 library files..
#
@@ -13,7 +13,11 @@
U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
NGpage.o NGbzero.o \
+ NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o NG2patch.o \
+ NG2page.o \
+ GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \
+ GENpage.o GENbzero.o \
copy_in_user.o user_fixup.o memmove.o \
- mcount.o ipcsum.o rwsem.o xor.o delay.o
+ mcount.o ipcsum.o rwsem.o xor.o
obj-y += iomap.o
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/xor.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/xor.S
@@ -491,12 +491,12 @@
ldda [%i1 + 0x10] %asi, %i2 /* %i2/%i3 = src1 + 0x10 */
xor %g2, %i4, %g2
xor %g3, %i5, %g3
- ldda [%i7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */
+ ldda [%l7 + 0x10] %asi, %i4 /* %i4/%i5 = src2 + 0x10 */
xor %l0, %g2, %l0
xor %l1, %g3, %l1
stxa %l0, [%i0 + 0x00] %asi
stxa %l1, [%i0 + 0x08] %asi
- ldda [%i6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */
+ ldda [%l6 + 0x10] %asi, %g2 /* %g2/%g3 = src3 + 0x10 */
ldda [%i0 + 0x10] %asi, %l0 /* %l0/%l1 = dest + 0x10 */
xor %i4, %i2, %i4
@@ -504,12 +504,12 @@
ldda [%i1 + 0x20] %asi, %i2 /* %i2/%i3 = src1 + 0x20 */
xor %g2, %i4, %g2
xor %g3, %i5, %g3
- ldda [%i7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */
+ ldda [%l7 + 0x20] %asi, %i4 /* %i4/%i5 = src2 + 0x20 */
xor %l0, %g2, %l0
xor %l1, %g3, %l1
stxa %l0, [%i0 + 0x10] %asi
stxa %l1, [%i0 + 0x18] %asi
- ldda [%i6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */
+ ldda [%l6 + 0x20] %asi, %g2 /* %g2/%g3 = src3 + 0x20 */
ldda [%i0 + 0x20] %asi, %l0 /* %l0/%l1 = dest + 0x20 */
xor %i4, %i2, %i4
@@ -517,12 +517,12 @@
ldda [%i1 + 0x30] %asi, %i2 /* %i2/%i3 = src1 + 0x30 */
xor %g2, %i4, %g2
xor %g3, %i5, %g3
- ldda [%i7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */
+ ldda [%l7 + 0x30] %asi, %i4 /* %i4/%i5 = src2 + 0x30 */
xor %l0, %g2, %l0
xor %l1, %g3, %l1
stxa %l0, [%i0 + 0x20] %asi
stxa %l1, [%i0 + 0x28] %asi
- ldda [%i6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */
+ ldda [%l6 + 0x30] %asi, %g2 /* %g2/%g3 = src3 + 0x30 */
ldda [%i0 + 0x30] %asi, %l0 /* %l0/%l1 = dest + 0x30 */
prefetch [%i1 + 0x40], #one_read
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NG2copy_from_user.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NG2copy_from_user.S
@@ -0,0 +1,40 @@
+/* NG2copy_from_user.S: Niagara-2 optimized copy from userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x) \
+98: x; \
+ .section .fixup; \
+ .align 4; \
+99: wr %g0, ASI_AIUS, %asi;\
+ retl; \
+ mov 1, %o0; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#ifndef ASI_BLK_AIUS_4V
+#define ASI_BLK_AIUS_4V 0x17
+#endif
+
+#define FUNC_NAME NG2copy_from_user
+#define LOAD(type,addr,dest) type##a [addr] %asi, dest
+#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS_4V, dest
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, memcpy_user_stub; \
+ nop
+#endif
+
+#include "NG2memcpy.S"
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NG2page.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NG2page.S
@@ -0,0 +1,61 @@
+/* NG2page.S: Niagara-2 optimized clear and copy page.
+ *
+ * Copyright (C) 2007 (davem@davemloft.net)
+ */
+
+#include
+#include
+#include
+
+ .text
+ .align 32
+
+ /* This is heavily simplified from the sun4u variants
+ * because Niagara-2 does not have any D-cache aliasing issues.
+ */
+NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
+ prefetch [%o1 + 0x00], #one_read
+ prefetch [%o1 + 0x40], #one_read
+ VISEntryHalf
+ set PAGE_SIZE, %g7
+ sub %o0, %o1, %g3
+1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+ subcc %g7, 64, %g7
+ ldda [%o1] ASI_BLK_P, %f0
+ stda %f0, [%o1 + %g3] ASI_BLK_P
+ add %o1, 64, %o1
+ bne,pt %xcc, 1b
+ prefetch [%o1 + 0x40], #one_read
+ membar #Sync
+ VISExitHalf
+ retl
+ nop
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl niagara2_patch_pageops
+ .type niagara2_patch_pageops,#function
+niagara2_patch_pageops:
+ NG_DO_PATCH(copy_user_page, NG2copy_user_page)
+ NG_DO_PATCH(_clear_page, NGclear_page)
+ NG_DO_PATCH(clear_user_page, NGclear_user_page)
+ retl
+ nop
+ .size niagara2_patch_pageops,.-niagara2_patch_pageops
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NGmemcpy.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NGmemcpy.S
@@ -1,6 +1,6 @@
/* NGmemcpy.S: Niagara optimized memcpy.
*
- * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/
#ifdef __KERNEL__
@@ -16,6 +16,12 @@
wr %g0, ASI_PNF, %asi
#endif
+#ifdef __sparc_v9__
+#define SAVE_AMOUNT 128
+#else
+#define SAVE_AMOUNT 64
+#endif
+
#ifndef STORE_ASI
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
#endif
@@ -50,7 +56,11 @@
#endif
#ifndef STORE_INIT
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
#define STORE_INIT(src,addr) stxa src, [addr] %asi
+#else
+#define STORE_INIT(src,addr) stx src, [addr + 0x00]
+#endif
#endif
#ifndef FUNC_NAME
@@ -73,18 +83,19 @@
.globl FUNC_NAME
.type FUNC_NAME,#function
-FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
- srlx %o2, 31, %g2
+FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
+ PREAMBLE
+ save %sp, -SAVE_AMOUNT, %sp
+ srlx %i2, 31, %g2
cmp %g2, 0
tne %xcc, 5
- PREAMBLE
- mov %o0, GLOBAL_SPARE
- cmp %o2, 0
+ mov %i0, %o0
+ cmp %i2, 0
be,pn %XCC, 85f
- or %o0, %o1, %o3
- cmp %o2, 16
+ or %o0, %i1, %i3
+ cmp %i2, 16
blu,a,pn %XCC, 80f
- or %o3, %o2, %o3
+ or %i3, %i2, %i3
/* 2 blocks (128 bytes) is the minimum we can do the block
* copy with. We need to ensure that we'll iterate at least
@@ -93,31 +104,31 @@
* to (64 - 1) bytes from the length before we perform the
* block copy loop.
*/
- cmp %o2, (2 * 64)
+ cmp %i2, (2 * 64)
blu,pt %XCC, 70f
- andcc %o3, 0x7, %g0
+ andcc %i3, 0x7, %g0
/* %o0: dst
- * %o1: src
- * %o2: len (known to be >= 128)
+ * %i1: src
+ * %i2: len (known to be >= 128)
*
- * The block copy loops will use %o4/%o5,%g2/%g3 as
+ * The block copy loops will use %i4/%i5,%g2/%g3 as
* temporaries while copying the data.
*/
- LOAD(prefetch, %o1, #one_read)
+ LOAD(prefetch, %i1, #one_read)
wr %g0, STORE_ASI, %asi
/* Align destination on 64-byte boundary. */
- andcc %o0, (64 - 1), %o4
+ andcc %o0, (64 - 1), %i4
be,pt %XCC, 2f
- sub %o4, 64, %o4
- sub %g0, %o4, %o4 ! bytes to align dst
- sub %o2, %o4, %o2
-1: subcc %o4, 1, %o4
- EX_LD(LOAD(ldub, %o1, %g1))
+ sub %i4, 64, %i4
+ sub %g0, %i4, %i4 ! bytes to align dst
+ sub %i2, %i4, %i2
+1: subcc %i4, 1, %i4
+ EX_LD(LOAD(ldub, %i1, %g1))
EX_ST(STORE(stb, %g1, %o0))
- add %o1, 1, %o1
+ add %i1, 1, %i1
bne,pt %XCC, 1b
add %o0, 1, %o0
@@ -136,111 +147,155 @@
* aligned store data at a time, this is easy to ensure.
*/
2:
- andcc %o1, (16 - 1), %o4
- andn %o2, (64 - 1), %g1 ! block copy loop iterator
- sub %o2, %g1, %o2 ! final sub-block copy bytes
+ andcc %i1, (16 - 1), %i4
+ andn %i2, (64 - 1), %g1 ! block copy loop iterator
be,pt %XCC, 50f
- cmp %o4, 8
- be,a,pt %XCC, 10f
- sub %o1, 0x8, %o1
+ sub %i2, %g1, %i2 ! final sub-block copy bytes
+
+ cmp %i4, 8
+ be,pt %XCC, 10f
+ sub %i1, %i4, %i1
/* Neither 8-byte nor 16-byte aligned, shift and mask. */
- mov %g1, %o4
- and %o1, 0x7, %g1
- sll %g1, 3, %g1
- mov 64, %o3
- andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1, %g2))
- sub %o3, %g1, %o3
- sllx %g2, %g1, %g2
+ and %i4, 0x7, GLOBAL_SPARE
+ sll GLOBAL_SPARE, 3, GLOBAL_SPARE
+ mov 64, %i5
+ EX_LD(LOAD_TWIN(%i1, %g2, %g3))
+ sub %i5, GLOBAL_SPARE, %i5
+ mov 16, %o4
+ mov 32, %o5
+ mov 48, %o7
+ mov 64, %i3
-#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\
- EX_LD(LOAD(ldx, SRC, TMP1)); \
- srlx TMP1, PRE_SHIFT, TMP2; \
- or TMP2, PRE_VAL, TMP2; \
- EX_ST(STORE_INIT(TMP2, DST)); \
- sllx TMP1, POST_SHIFT, PRE_VAL;
-
-1: add %o1, 0x8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00)
- add %o1, 0x8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08)
- add %o1, 0x8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10)
- add %o1, 0x8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18)
- add %o1, 32, %o1
- LOAD(prefetch, %o1, #one_read)
- sub %o1, 32 - 8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20)
- add %o1, 8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28)
- add %o1, 8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30)
- add %o1, 8, %o1
- SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38)
- subcc %o4, 64, %o4
- bne,pt %XCC, 1b
+ bg,pn %XCC, 9f
+ nop
+
+#define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \
+ sllx WORD1, POST_SHIFT, WORD1; \
+ srlx WORD2, PRE_SHIFT, TMP; \
+ sllx WORD2, POST_SHIFT, WORD2; \
+ or WORD1, TMP, WORD1; \
+ srlx WORD3, PRE_SHIFT, TMP; \
+ or WORD2, TMP, WORD2;
+
+8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+ MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+ LOAD(prefetch, %i1 + %i3, #one_read)
+
+ EX_ST(STORE_INIT(%g2, %o0 + 0x00))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x08))
+
+ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+ MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+
+ EX_ST(STORE_INIT(%o2, %o0 + 0x10))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+
+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+ MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+
+ EX_ST(STORE_INIT(%g2, %o0 + 0x20))
+ EX_ST(STORE_INIT(%g3, %o0 + 0x28))
+
+ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+ add %i1, 64, %i1
+ MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+
+ EX_ST(STORE_INIT(%o2, %o0 + 0x30))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 8b
add %o0, 64, %o0
-#undef SWIVEL_ONE_DWORD
+ ba,pt %XCC, 60f
+ add %i1, %i4, %i1
+
+9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+ MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+ LOAD(prefetch, %i1 + %i3, #one_read)
+
+ EX_ST(STORE_INIT(%g3, %o0 + 0x00))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+
+ EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+ MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+
+ EX_ST(STORE_INIT(%o3, %o0 + 0x10))
+ EX_ST(STORE_INIT(%g2, %o0 + 0x18))
+
+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+ MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+
+ EX_ST(STORE_INIT(%g3, %o0 + 0x20))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+
+ EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+ add %i1, 64, %i1
+ MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+
+ EX_ST(STORE_INIT(%o3, %o0 + 0x30))
+ EX_ST(STORE_INIT(%g2, %o0 + 0x38))
+
+ subcc %g1, 64, %g1
+ bne,pt %XCC, 9b
+ add %o0, 64, %o0
- srl %g1, 3, %g1
ba,pt %XCC, 60f
- add %o1, %g1, %o1
+ add %i1, %i4, %i1
10: /* Destination is 64-byte aligned, source was only 8-byte
* aligned but it has been subtracted by 8 and we perform
* one twin load ahead, then add 8 back into source when
* we finish the loop.
*/
- EX_LD(LOAD_TWIN(%o1, %o4, %o5))
-1: add %o1, 16, %o1
- EX_LD(LOAD_TWIN(%o1, %g2, %g3))
- add %o1, 16 + 32, %o1
- LOAD(prefetch, %o1, #one_read)
- sub %o1, 32, %o1
+ EX_LD(LOAD_TWIN(%i1, %o4, %o5))
+ mov 16, %o7
+ mov 32, %g2
+ mov 48, %g3
+ mov 64, %o1
+1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+ LOAD(prefetch, %i1 + %o1, #one_read)
EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line
- EX_ST(STORE_INIT(%g2, %o0 + 0x08))
- EX_LD(LOAD_TWIN(%o1, %o4, %o5))
- add %o1, 16, %o1
- EX_ST(STORE_INIT(%g3, %o0 + 0x10))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x10))
EX_ST(STORE_INIT(%o4, %o0 + 0x18))
- EX_LD(LOAD_TWIN(%o1, %g2, %g3))
- add %o1, 16, %o1
+ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
EX_ST(STORE_INIT(%o5, %o0 + 0x20))
- EX_ST(STORE_INIT(%g2, %o0 + 0x28))
- EX_LD(LOAD_TWIN(%o1, %o4, %o5))
- EX_ST(STORE_INIT(%g3, %o0 + 0x30))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+ EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
+ add %i1, 64, %i1
+ EX_ST(STORE_INIT(%o3, %o0 + 0x30))
EX_ST(STORE_INIT(%o4, %o0 + 0x38))
subcc %g1, 64, %g1
bne,pt %XCC, 1b
add %o0, 64, %o0
ba,pt %XCC, 60f
- add %o1, 0x8, %o1
+ add %i1, 0x8, %i1
50: /* Destination is 64-byte aligned, and source is 16-byte
* aligned.
*/
-1: EX_LD(LOAD_TWIN(%o1, %o4, %o5))
- add %o1, 16, %o1
- EX_LD(LOAD_TWIN(%o1, %g2, %g3))
- add %o1, 16 + 32, %o1
- LOAD(prefetch, %o1, #one_read)
- sub %o1, 32, %o1
+ mov 16, %o7
+ mov 32, %g2
+ mov 48, %g3
+ mov 64, %o1
+1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
+ EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+ LOAD(prefetch, %i1 + %o1, #one_read)
EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line
EX_ST(STORE_INIT(%o5, %o0 + 0x08))
- EX_LD(LOAD_TWIN(%o1, %o4, %o5))
- add %o1, 16, %o1
- EX_ST(STORE_INIT(%g2, %o0 + 0x10))
- EX_ST(STORE_INIT(%g3, %o0 + 0x18))
- EX_LD(LOAD_TWIN(%o1, %g2, %g3))
- add %o1, 16, %o1
+ EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x10))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+ EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+ add %i1, 64, %i1
EX_ST(STORE_INIT(%o4, %o0 + 0x20))
EX_ST(STORE_INIT(%o5, %o0 + 0x28))
- EX_ST(STORE_INIT(%g2, %o0 + 0x30))
- EX_ST(STORE_INIT(%g3, %o0 + 0x38))
+ EX_ST(STORE_INIT(%o2, %o0 + 0x30))
+ EX_ST(STORE_INIT(%o3, %o0 + 0x38))
subcc %g1, 64, %g1
bne,pt %XCC, 1b
add %o0, 64, %o0
@@ -249,47 +304,47 @@
60:
membar #Sync
- /* %o2 contains any final bytes still needed to be copied
+ /* %i2 contains any final bytes still needed to be copied
* over. If anything is left, we copy it one byte at a time.
*/
- RESTORE_ASI(%o3)
- brz,pt %o2, 85f
- sub %o0, %o1, %o3
+ RESTORE_ASI(%i3)
+ brz,pt %i2, 85f
+ sub %o0, %i1, %i3
ba,a,pt %XCC, 90f
.align 64
70: /* 16 < len <= 64 */
bne,pn %XCC, 75f
- sub %o0, %o1, %o3
+ sub %o0, %i1, %i3
72:
- andn %o2, 0xf, %o4
- and %o2, 0xf, %o2
-1: subcc %o4, 0x10, %o4
- EX_LD(LOAD(ldx, %o1, %o5))
- add %o1, 0x08, %o1
- EX_LD(LOAD(ldx, %o1, %g1))
- sub %o1, 0x08, %o1
- EX_ST(STORE(stx, %o5, %o1 + %o3))
- add %o1, 0x8, %o1
- EX_ST(STORE(stx, %g1, %o1 + %o3))
+ andn %i2, 0xf, %i4
+ and %i2, 0xf, %i2
+1: subcc %i4, 0x10, %i4
+ EX_LD(LOAD(ldx, %i1, %o4))
+ add %i1, 0x08, %i1
+ EX_LD(LOAD(ldx, %i1, %g1))
+ sub %i1, 0x08, %i1
+ EX_ST(STORE(stx, %o4, %i1 + %i3))
+ add %i1, 0x8, %i1
+ EX_ST(STORE(stx, %g1, %i1 + %i3))
bgu,pt %XCC, 1b
- add %o1, 0x8, %o1
-73: andcc %o2, 0x8, %g0
+ add %i1, 0x8, %i1
+73: andcc %i2, 0x8, %g0
be,pt %XCC, 1f
nop
- sub %o2, 0x8, %o2
- EX_LD(LOAD(ldx, %o1, %o5))
- EX_ST(STORE(stx, %o5, %o1 + %o3))
- add %o1, 0x8, %o1
-1: andcc %o2, 0x4, %g0
+ sub %i2, 0x8, %i2
+ EX_LD(LOAD(ldx, %i1, %o4))
+ EX_ST(STORE(stx, %o4, %i1 + %i3))
+ add %i1, 0x8, %i1
+1: andcc %i2, 0x4, %g0
be,pt %XCC, 1f
nop
- sub %o2, 0x4, %o2
- EX_LD(LOAD(lduw, %o1, %o5))
- EX_ST(STORE(stw, %o5, %o1 + %o3))
- add %o1, 0x4, %o1
-1: cmp %o2, 0
+ sub %i2, 0x4, %i2
+ EX_LD(LOAD(lduw, %i1, %i5))
+ EX_ST(STORE(stw, %i5, %i1 + %i3))
+ add %i1, 0x4, %i1
+1: cmp %i2, 0
be,pt %XCC, 85f
nop
ba,pt %xcc, 90f
@@ -300,71 +355,71 @@
sub %g1, 0x8, %g1
be,pn %icc, 2f
sub %g0, %g1, %g1
- sub %o2, %g1, %o2
+ sub %i2, %g1, %i2
1: subcc %g1, 1, %g1
- EX_LD(LOAD(ldub, %o1, %o5))
- EX_ST(STORE(stb, %o5, %o1 + %o3))
+ EX_LD(LOAD(ldub, %i1, %i5))
+ EX_ST(STORE(stb, %i5, %i1 + %i3))
bgu,pt %icc, 1b
- add %o1, 1, %o1
+ add %i1, 1, %i1
-2: add %o1, %o3, %o0
- andcc %o1, 0x7, %g1
+2: add %i1, %i3, %o0
+ andcc %i1, 0x7, %g1
bne,pt %icc, 8f
sll %g1, 3, %g1
- cmp %o2, 16
+ cmp %i2, 16
bgeu,pt %icc, 72b
nop
ba,a,pt %xcc, 73b
-8: mov 64, %o3
- andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1, %g2))
- sub %o3, %g1, %o3
- andn %o2, 0x7, %o4
+8: mov 64, %i3
+ andn %i1, 0x7, %i1
+ EX_LD(LOAD(ldx, %i1, %g2))
+ sub %i3, %g1, %i3
+ andn %i2, 0x7, %i4
sllx %g2, %g1, %g2
-1: add %o1, 0x8, %o1
- EX_LD(LOAD(ldx, %o1, %g3))
- subcc %o4, 0x8, %o4
- srlx %g3, %o3, %o5
- or %o5, %g2, %o5
- EX_ST(STORE(stx, %o5, %o0))
+1: add %i1, 0x8, %i1
+ EX_LD(LOAD(ldx, %i1, %g3))
+ subcc %i4, 0x8, %i4
+ srlx %g3, %i3, %i5
+ or %i5, %g2, %i5
+ EX_ST(STORE(stx, %i5, %o0))
add %o0, 0x8, %o0
bgu,pt %icc, 1b
sllx %g3, %g1, %g2
srl %g1, 3, %g1
- andcc %o2, 0x7, %o2
+ andcc %i2, 0x7, %i2
be,pn %icc, 85f
- add %o1, %g1, %o1
+ add %i1, %g1, %i1
ba,pt %xcc, 90f
- sub %o0, %o1, %o3
+ sub %o0, %i1, %i3
.align 64
80: /* 0 < len <= 16 */
- andcc %o3, 0x3, %g0
+ andcc %i3, 0x3, %g0
bne,pn %XCC, 90f
- sub %o0, %o1, %o3
+ sub %o0, %i1, %i3
1:
- subcc %o2, 4, %o2
- EX_LD(LOAD(lduw, %o1, %g1))
- EX_ST(STORE(stw, %g1, %o1 + %o3))
+ subcc %i2, 4, %i2
+ EX_LD(LOAD(lduw, %i1, %g1))
+ EX_ST(STORE(stw, %g1, %i1 + %i3))
bgu,pt %XCC, 1b
- add %o1, 4, %o1
+ add %i1, 4, %i1
-85: retl
- mov EX_RETVAL(GLOBAL_SPARE), %o0
+85: ret
+ restore EX_RETVAL(%i0), %g0, %o0
.align 32
90:
- subcc %o2, 1, %o2
- EX_LD(LOAD(ldub, %o1, %g1))
- EX_ST(STORE(stb, %g1, %o1 + %o3))
+ subcc %i2, 1, %i2
+ EX_LD(LOAD(ldub, %i1, %g1))
+ EX_ST(STORE(stb, %g1, %i1 + %i3))
bgu,pt %XCC, 90b
- add %o1, 1, %o1
- retl
- mov EX_RETVAL(GLOBAL_SPARE), %o0
+ add %i1, 1, %i1
+ ret
+ restore EX_RETVAL(%i0), %g0, %o0
.size FUNC_NAME, .-FUNC_NAME
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/GENcopy_to_user.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/GENcopy_to_user.S
@@ -0,0 +1,38 @@
+/* GENcopy_to_user.S: Generic sparc64 copy to userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x) \
+98: x; \
+ .section .fixup; \
+ .align 4; \
+99: retl; \
+ mov 1, %o0; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS 0x11
+#endif
+
+#define FUNC_NAME GENcopy_to_user
+#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
+#define EX_RETVAL(x) 0
+
+#ifdef __KERNEL__
+ /* Writing to %asi is _expensive_ so we hardcode it.
+ * Reading %asi to check for KERNEL_DS is comparatively
+ * cheap.
+ */
+#define PREAMBLE \
+ rd %asi, %g1; \
+ cmp %g1, ASI_AIUS; \
+ bne,pn %icc, memcpy_user_stub; \
+ nop
+#endif
+
+#include "GENmemcpy.S"
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NGcopy_from_user.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NGcopy_from_user.S
@@ -1,6 +1,6 @@
/* NGcopy_from_user.S: Niagara optimized copy from userspace.
*
- * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
*/
#define EX_LD(x) \
@@ -8,8 +8,8 @@
.section .fixup; \
.align 4; \
99: wr %g0, ASI_AIUS, %asi;\
- retl; \
- mov 1, %o0; \
+ ret; \
+ restore %g0, 1, %o0; \
.section __ex_table,"a";\
.align 4; \
.word 98b, 99b; \
@@ -24,7 +24,7 @@
#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest
#define LOAD_TWIN(addr_reg,dest0,dest1) \
ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0
-#define EX_RETVAL(x) 0
+#define EX_RETVAL(x) %g0
#ifdef __KERNEL__
#define PREAMBLE \
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NG2patch.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NG2patch.S
@@ -0,0 +1,33 @@
+/* NG2patch.S: Patch Ultra-I routines with Niagara-2 variant.
+ *
+ * Copyright (C) 2007 David S. Miller
+ */
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl niagara2_patch_copyops
+ .type niagara2_patch_copyops,#function
+niagara2_patch_copyops:
+ NG_DO_PATCH(memcpy, NG2memcpy)
+ NG_DO_PATCH(___copy_from_user, NG2copy_from_user)
+ NG_DO_PATCH(___copy_to_user, NG2copy_to_user)
+ retl
+ nop
+ .size niagara2_patch_copyops,.-niagara2_patch_copyops
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/GENbzero.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/GENbzero.S
@@ -0,0 +1,160 @@
+/* GENbzero.S: Generic sparc64 memset/clear_user.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include
+
+#define EX_ST(x,y) \
+98: x,y; \
+ .section .fixup; \
+ .align 4; \
+99: retl; \
+ mov %o1, %o0; \
+ .section __ex_table; \
+ .align 4; \
+ .word 98b, 99b; \
+ .text; \
+ .align 4;
+
+ .align 32
+ .text
+
+ .globl GENmemset
+ .type GENmemset, #function
+GENmemset: /* %o0=buf, %o1=pat, %o2=len */
+ and %o1, 0xff, %o3
+ mov %o2, %o1
+ sllx %o3, 8, %g1
+ or %g1, %o3, %o2
+ sllx %o2, 16, %g1
+ or %g1, %o2, %o2
+ sllx %o2, 32, %g1
+ ba,pt %xcc, 1f
+ or %g1, %o2, %o2
+
+ .globl GENbzero
+ .type GENbzero, #function
+GENbzero:
+ clr %o2
+1: brz,pn %o1, GENbzero_return
+ mov %o0, %o3
+
+ /* %o5: saved %asi, restored at GENbzero_done
+ * %o4: store %asi to use
+ */
+ rd %asi, %o5
+ mov ASI_P, %o4
+ wr %o4, 0x0, %asi
+
+GENbzero_from_clear_user:
+ cmp %o1, 15
+ bl,pn %icc, GENbzero_tiny
+ andcc %o0, 0x7, %g1
+ be,pt %xcc, 2f
+ mov 8, %g2
+ sub %g2, %g1, %g1
+ sub %o1, %g1, %o1
+1: EX_ST(stba %o2, [%o0 + 0x00] %asi)
+ subcc %g1, 1, %g1
+ bne,pt %xcc, 1b
+ add %o0, 1, %o0
+2: cmp %o1, 128
+ bl,pn %icc, GENbzero_medium
+ andcc %o0, (64 - 1), %g1
+ be,pt %xcc, GENbzero_pre_loop
+ mov 64, %g2
+ sub %g2, %g1, %g1
+ sub %o1, %g1, %o1
+1: EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+ subcc %g1, 8, %g1
+ bne,pt %xcc, 1b
+ add %o0, 8, %o0
+
+GENbzero_pre_loop:
+ andn %o1, (64 - 1), %g1
+ sub %o1, %g1, %o1
+GENbzero_loop:
+ EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x08] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x10] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x18] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x20] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x28] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x30] %asi)
+ EX_ST(stxa %o2, [%o0 + 0x38] %asi)
+ subcc %g1, 64, %g1
+ bne,pt %xcc, GENbzero_loop
+ add %o0, 64, %o0
+
+ membar #Sync
+ wr %o4, 0x0, %asi
+ brz,pn %o1, GENbzero_done
+GENbzero_medium:
+ andncc %o1, 0x7, %g1
+ be,pn %xcc, 2f
+ sub %o1, %g1, %o1
+1: EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+ subcc %g1, 8, %g1
+ bne,pt %xcc, 1b
+ add %o0, 8, %o0
+2: brz,pt %o1, GENbzero_done
+ nop
+
+GENbzero_tiny:
+1: EX_ST(stba %o2, [%o0 + 0x00] %asi)
+ subcc %o1, 1, %o1
+ bne,pt %icc, 1b
+ add %o0, 1, %o0
+
+ /* fallthrough */
+
+GENbzero_done:
+ wr %o5, 0x0, %asi
+
+GENbzero_return:
+ retl
+ mov %o3, %o0
+ .size GENbzero, .-GENbzero
+ .size GENmemset, .-GENmemset
+
+ .globl GENclear_user
+ .type GENclear_user, #function
+GENclear_user: /* %o0=buf, %o1=len */
+ rd %asi, %o5
+ brz,pn %o1, GENbzero_done
+ clr %o3
+ cmp %o5, ASI_AIUS
+ bne,pn %icc, GENbzero
+ clr %o2
+ ba,pt %xcc, GENbzero_from_clear_user
+ mov ASI_AIUS, %o4
+ .size GENclear_user, .-GENclear_user
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define GEN_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl generic_patch_bzero
+ .type generic_patch_bzero,#function
+generic_patch_bzero:
+ GEN_DO_PATCH(memset, GENmemset)
+ GEN_DO_PATCH(__bzero, GENbzero)
+ GEN_DO_PATCH(__clear_user, GENclear_user)
+ retl
+ nop
+ .size generic_patch_bzero,.-generic_patch_bzero
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NG2memcpy.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NG2memcpy.S
@@ -0,0 +1,520 @@
+/* NG2memcpy.S: Niagara-2 optimized memcpy.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include
+#include
+#define GLOBAL_SPARE %g7
+#else
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define FPRS_FEF 0x04
+#ifdef MEMCPY_DEBUG
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
+ clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#else
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#endif
+#define GLOBAL_SPARE %g5
+#endif
+
+#ifndef STORE_ASI
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
+#else
+#define STORE_ASI 0x80 /* ASI_P */
+#endif
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x) x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x) x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest) type [addr], dest
+#endif
+
+#ifndef LOAD_BLK
+#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest
+#endif
+
+#ifndef STORE
+#ifndef MEMCPY_DEBUG
+#define STORE(type,src,addr) type src, [addr]
+#else
+#define STORE(type,src,addr) type##a src, [addr] 0x80
+#endif
+#endif
+
+#ifndef STORE_BLK
+#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P
+#endif
+
+#ifndef STORE_INIT
+#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME NG2memcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
+ faligndata %x0, %x1, %f0; \
+ faligndata %x1, %x2, %f2; \
+ faligndata %x2, %x3, %f4; \
+ faligndata %x3, %x4, %f6; \
+ faligndata %x4, %x5, %f8; \
+ faligndata %x5, %x6, %f10; \
+ faligndata %x6, %x7, %f12; \
+ faligndata %x7, %x8, %f14;
+
+#define FREG_MOVE_1(x0) \
+ fmovd %x0, %f0;
+#define FREG_MOVE_2(x0, x1) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2;
+#define FREG_MOVE_3(x0, x1, x2) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2; \
+ fmovd %x2, %f4;
+#define FREG_MOVE_4(x0, x1, x2, x3) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2; \
+ fmovd %x2, %f4; \
+ fmovd %x3, %f6;
+#define FREG_MOVE_5(x0, x1, x2, x3, x4) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2; \
+ fmovd %x2, %f4; \
+ fmovd %x3, %f6; \
+ fmovd %x4, %f8;
+#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2; \
+ fmovd %x2, %f4; \
+ fmovd %x3, %f6; \
+ fmovd %x4, %f8; \
+ fmovd %x5, %f10;
+#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2; \
+ fmovd %x2, %f4; \
+ fmovd %x3, %f6; \
+ fmovd %x4, %f8; \
+ fmovd %x5, %f10; \
+ fmovd %x6, %f12;
+#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
+ fmovd %x0, %f0; \
+ fmovd %x1, %f2; \
+ fmovd %x2, %f4; \
+ fmovd %x3, %f6; \
+ fmovd %x4, %f8; \
+ fmovd %x5, %f10; \
+ fmovd %x6, %f12; \
+ fmovd %x7, %f14;
+#define FREG_LOAD_1(base, x0) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0))
+#define FREG_LOAD_2(base, x0, x1) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD(LOAD(ldd, base + 0x08, %x1));
+#define FREG_LOAD_3(base, x0, x1, x2) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD(LOAD(ldd, base + 0x10, %x2));
+#define FREG_LOAD_4(base, x0, x1, x2, x3) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD(LOAD(ldd, base + 0x18, %x3));
+#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD(LOAD(ldd, base + 0x18, %x3)); \
+ EX_LD(LOAD(ldd, base + 0x20, %x4));
+#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD(LOAD(ldd, base + 0x18, %x3)); \
+ EX_LD(LOAD(ldd, base + 0x20, %x4)); \
+ EX_LD(LOAD(ldd, base + 0x28, %x5));
+#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
+ EX_LD(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD(LOAD(ldd, base + 0x18, %x3)); \
+ EX_LD(LOAD(ldd, base + 0x20, %x4)); \
+ EX_LD(LOAD(ldd, base + 0x28, %x5)); \
+ EX_LD(LOAD(ldd, base + 0x30, %x6));
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
+ .align 64
+
+ .globl FUNC_NAME
+ .type FUNC_NAME,#function
+FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ srlx %o2, 31, %g2
+ cmp %g2, 0
+ tne %xcc, 5
+ PREAMBLE
+ mov %o0, GLOBAL_SPARE
+ cmp %o2, 0
+ be,pn %XCC, 85f
+ or %o0, %o1, %o3
+ cmp %o2, 16
+ blu,a,pn %XCC, 80f
+ or %o3, %o2, %o3
+
+ /* 2 blocks (128 bytes) is the minimum we can do the block
+ * copy with. We need to ensure that we'll iterate at least
+ * once in the block copy loop. At worst we'll need to align
+ * the destination to a 64-byte boundary which can chew up
+ * to (64 - 1) bytes from the length before we perform the
+ * block copy loop.
+ *
+ * However, the cut-off point, performance wise, is around
+ * 4 64-byte blocks.
+ */
+ cmp %o2, (4 * 64)
+ blu,pt %XCC, 75f
+ andcc %o3, 0x7, %g0
+
+ /* %o0: dst
+ * %o1: src
+ * %o2: len (known to be >= 128)
+ *
+ * The block copy loops can use %o4, %g2, %g3 as
+ * temporaries while copying the data. %o5 must
+ * be preserved between VISEntryHalf and VISExitHalf
+ */
+
+ LOAD(prefetch, %o1 + 0x000, #one_read)
+ LOAD(prefetch, %o1 + 0x040, #one_read)
+ LOAD(prefetch, %o1 + 0x080, #one_read)
+
+ /* Align destination on 64-byte boundary. */
+ andcc %o0, (64 - 1), %o4
+ be,pt %XCC, 2f
+ sub %o4, 64, %o4
+ sub %g0, %o4, %o4 ! bytes to align dst
+ sub %o2, %o4, %o2
+1: subcc %o4, 1, %o4
+ EX_LD(LOAD(ldub, %o1, %g1))
+ EX_ST(STORE(stb, %g1, %o0))
+ add %o1, 1, %o1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+
+2:
+ /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve
+ * o5 from here until we hit VISExitHalf.
+ */
+ VISEntryHalf
+
+ alignaddr %o1, %g0, %g0
+
+ add %o1, (64 - 1), %o4
+ andn %o4, (64 - 1), %o4
+ andn %o2, (64 - 1), %g1
+ sub %o2, %g1, %o2
+
+ and %o1, (64 - 1), %g2
+ add %o1, %g1, %o1
+ sub %o0, %o4, %g3
+ brz,pt %g2, 190f
+ cmp %g2, 32
+ blu,a 5f
+ cmp %g2, 16
+ cmp %g2, 48
+ blu,a 4f
+ cmp %g2, 40
+ cmp %g2, 56
+ blu 170f
+ nop
+ ba,a,pt %xcc, 180f
+
+4: /* 32 <= low bits < 48 */
+ blu 150f
+ nop
+ ba,a,pt %xcc, 160f
+5: /* 0 < low bits < 32 */
+ blu,a 6f
+ cmp %g2, 8
+ cmp %g2, 24
+ blu 130f
+ nop
+ ba,a,pt %xcc, 140f
+6: /* 0 < low bits < 16 */
+ bgeu 120f
+ nop
+ /* fall through for 0 < low bits < 8 */
+110: sub %o4, 64, %g2
+ EX_LD(LOAD_BLK(%g2, %f0))
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+120: sub %o4, 56, %g2
+ FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+130: sub %o4, 48, %g2
+ FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+140: sub %o4, 40, %g2
+ FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_5(f22, f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+150: sub %o4, 32, %g2
+ FREG_LOAD_4(%g2, f0, f2, f4, f6)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_4(f24, f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+160: sub %o4, 24, %g2
+ FREG_LOAD_3(%g2, f0, f2, f4)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_3(f26, f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+170: sub %o4, 16, %g2
+ FREG_LOAD_2(%g2, f0, f2)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_2(f28, f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+180: sub %o4, 8, %g2
+ FREG_LOAD_1(%g2, f0)
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD(LOAD_BLK(%o4, %f16))
+ FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ FREG_MOVE_1(f30)
+ subcc %g1, 64, %g1
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+ ba,pt %xcc, 195f
+ nop
+
+190:
+1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+ subcc %g1, 64, %g1
+ EX_LD(LOAD_BLK(%o4, %f0))
+ EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ add %o4, 64, %o4
+ bne,pt %xcc, 1b
+ LOAD(prefetch, %o4 + 64, #one_read)
+
+195:
+ add %o4, %g3, %o0
+ membar #Sync
+
+ VISExitHalf
+
+ /* %o2 contains any final bytes still needed to be copied
+ * over. If anything is left, we copy it one byte at a time.
+ */
+ brz,pt %o2, 85f
+ sub %o0, %o1, %o3
+ ba,a,pt %XCC, 90f
+
+ .align 64
+75: /* 16 < len <= 64 */
+ bne,pn %XCC, 75f
+ sub %o0, %o1, %o3
+
+72:
+ andn %o2, 0xf, %o4
+ and %o2, 0xf, %o2
+1: subcc %o4, 0x10, %o4
+ EX_LD(LOAD(ldx, %o1, %o5))
+ add %o1, 0x08, %o1
+ EX_LD(LOAD(ldx, %o1, %g1))
+ sub %o1, 0x08, %o1
+ EX_ST(STORE(stx, %o5, %o1 + %o3))
+ add %o1, 0x8, %o1
+ EX_ST(STORE(stx, %g1, %o1 + %o3))
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+73: andcc %o2, 0x8, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x8, %o2
+ EX_LD(LOAD(ldx, %o1, %o5))
+ EX_ST(STORE(stx, %o5, %o1 + %o3))
+ add %o1, 0x8, %o1
+1: andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+ EX_LD(LOAD(lduw, %o1, %o5))
+ EX_ST(STORE(stw, %o5, %o1 + %o3))
+ add %o1, 0x4, %o1
+1: cmp %o2, 0
+ be,pt %XCC, 85f
+ nop
+ ba,pt %xcc, 90f
+ nop
+
+75:
+ andcc %o0, 0x7, %g1
+ sub %g1, 0x8, %g1
+ be,pn %icc, 2f
+ sub %g0, %g1, %g1
+ sub %o2, %g1, %o2
+
+1: subcc %g1, 1, %g1
+ EX_LD(LOAD(ldub, %o1, %o5))
+ EX_ST(STORE(stb, %o5, %o1 + %o3))
+ bgu,pt %icc, 1b
+ add %o1, 1, %o1
+
+2: add %o1, %o3, %o0
+ andcc %o1, 0x7, %g1
+ bne,pt %icc, 8f
+ sll %g1, 3, %g1
+
+ cmp %o2, 16
+ bgeu,pt %icc, 72b
+ nop
+ ba,a,pt %xcc, 73b
+
+8: mov 64, %o3
+ andn %o1, 0x7, %o1
+ EX_LD(LOAD(ldx, %o1, %g2))
+ sub %o3, %g1, %o3
+ andn %o2, 0x7, %o4
+ sllx %g2, %g1, %g2
+1: add %o1, 0x8, %o1
+ EX_LD(LOAD(ldx, %o1, %g3))
+ subcc %o4, 0x8, %o4
+ srlx %g3, %o3, %o5
+ or %o5, %g2, %o5
+ EX_ST(STORE(stx, %o5, %o0))
+ add %o0, 0x8, %o0
+ bgu,pt %icc, 1b
+ sllx %g3, %g1, %g2
+
+ srl %g1, 3, %g1
+ andcc %o2, 0x7, %o2
+ be,pn %icc, 85f
+ add %o1, %g1, %o1
+ ba,pt %xcc, 90f
+ sub %o0, %o1, %o3
+
+ .align 64
+80: /* 0 < len <= 16 */
+ andcc %o3, 0x3, %g0
+ bne,pn %XCC, 90f
+ sub %o0, %o1, %o3
+
+1:
+ subcc %o2, 4, %o2
+ EX_LD(LOAD(lduw, %o1, %g1))
+ EX_ST(STORE(stw, %g1, %o1 + %o3))
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+85: retl
+ mov EX_RETVAL(GLOBAL_SPARE), %o0
+
+ .align 32
+90:
+ subcc %o2, 1, %o2
+ EX_LD(LOAD(ldub, %o1, %g1))
+ EX_ST(STORE(stb, %g1, %o1 + %o3))
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+ mov EX_RETVAL(GLOBAL_SPARE), %o0
+
+ .size FUNC_NAME, .-FUNC_NAME
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/GENpage.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/GENpage.S
@@ -0,0 +1,77 @@
+/* GENpage.S: Generic clear and copy page.
+ *
+ * Copyright (C) 2007 (davem@davemloft.net)
+ */
+#include
+
+ .text
+ .align 32
+
+GENcopy_user_page:
+ set PAGE_SIZE, %g7
+1: ldx [%o1 + 0x00], %o2
+ ldx [%o1 + 0x08], %o3
+ ldx [%o1 + 0x10], %o4
+ ldx [%o1 + 0x18], %o5
+ stx %o2, [%o0 + 0x00]
+ stx %o3, [%o0 + 0x08]
+ stx %o4, [%o0 + 0x10]
+ stx %o5, [%o0 + 0x18]
+ ldx [%o1 + 0x20], %o2
+ ldx [%o1 + 0x28], %o3
+ ldx [%o1 + 0x30], %o4
+ ldx [%o1 + 0x38], %o5
+ stx %o2, [%o0 + 0x20]
+ stx %o3, [%o0 + 0x28]
+ stx %o4, [%o0 + 0x30]
+ stx %o5, [%o0 + 0x38]
+ subcc %g7, 64, %g7
+ add %o1, 64, %o1
+ bne,pt %xcc, 1b
+ add %o0, 64, %o0
+ retl
+ nop
+
+GENclear_page:
+GENclear_user_page:
+ set PAGE_SIZE, %g7
+1: stx %g0, [%o0 + 0x00]
+ stx %g0, [%o0 + 0x08]
+ stx %g0, [%o0 + 0x10]
+ stx %g0, [%o0 + 0x18]
+ stx %g0, [%o0 + 0x20]
+ stx %g0, [%o0 + 0x28]
+ stx %g0, [%o0 + 0x30]
+ stx %g0, [%o0 + 0x38]
+ subcc %g7, 64, %g7
+ bne,pt %xcc, 1b
+ add %o0, 64, %o0
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define GEN_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ .globl generic_patch_pageops
+ .type generic_patch_pageops,#function
+generic_patch_pageops:
+ GEN_DO_PATCH(copy_user_page, GENcopy_user_page)
+ GEN_DO_PATCH(_clear_page, GENclear_page)
+ GEN_DO_PATCH(clear_user_page, GENclear_user_page)
+ retl
+ nop
+ .size generic_patch_pageops,.-generic_patch_pageops
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/GENmemcpy.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/GENmemcpy.S
@@ -0,0 +1,121 @@
+/* GENmemcpy.S: Generic sparc64 memcpy.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#define GLOBAL_SPARE %g7
+#else
+#define GLOBAL_SPARE %g5
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x) x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x) x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest) type [addr], dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr) type src, [addr]
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME GENmemcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+ .text
+ .align 64
+
+ .globl FUNC_NAME
+ .type FUNC_NAME,#function
+FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
+ srlx %o2, 31, %g2
+ cmp %g2, 0
+ tne %XCC, 5
+ PREAMBLE
+ mov %o0, GLOBAL_SPARE
+
+ cmp %o2, 0
+ be,pn %XCC, 85f
+ or %o0, %o1, %o3
+ cmp %o2, 16
+ blu,a,pn %XCC, 80f
+ or %o3, %o2, %o3
+
+ xor %o0, %o1, %o4
+ andcc %o4, 0x7, %g0
+ bne,a,pn %XCC, 90f
+ sub %o0, %o1, %o3
+
+ and %o0, 0x7, %o4
+ sub %o4, 0x8, %o4
+ sub %g0, %o4, %o4
+ sub %o2, %o4, %o2
+1: subcc %o4, 1, %o4
+ EX_LD(LOAD(ldub, %o1, %g1))
+ EX_ST(STORE(stb, %g1, %o0))
+ add %o1, 1, %o1
+ bne,pt %XCC, 1b
+ add %o0, 1, %o0
+
+ andn %o2, 0x7, %g1
+ sub %o2, %g1, %o2
+1: subcc %g1, 0x8, %g1
+ EX_LD(LOAD(ldx, %o1, %g2))
+ EX_ST(STORE(stx, %g2, %o0))
+ add %o1, 0x8, %o1
+ bne,pt %XCC, 1b
+ add %o0, 0x8, %o0
+
+ brz,pt %o2, 85f
+ sub %o0, %o1, %o3
+ ba,a,pt %XCC, 90f
+
+ .align 64
+80: /* 0 < len <= 16 */
+ andcc %o3, 0x3, %g0
+ bne,pn %XCC, 90f
+ sub %o0, %o1, %o3
+
+1:
+ subcc %o2, 4, %o2
+ EX_LD(LOAD(lduw, %o1, %g1))
+ EX_ST(STORE(stw, %g1, %o1 + %o3))
+ bgu,pt %XCC, 1b
+ add %o1, 4, %o1
+
+85: retl
+ mov EX_RETVAL(GLOBAL_SPARE), %o0
+
+ .align 32
+90:
+ subcc %o2, 1, %o2
+ EX_LD(LOAD(ldub, %o1, %g1))
+ EX_ST(STORE(stb, %g1, %o1 + %o3))
+ bgu,pt %XCC, 90b
+ add %o1, 1, %o1
+ retl
+ mov EX_RETVAL(GLOBAL_SPARE), %o0
+
+ .size FUNC_NAME, .-FUNC_NAME
--- linux-source-2.6.22-2.6.22.orig/arch/sparc64/lib/NGpage.S
+++ linux-source-2.6.22-2.6.22/arch/sparc64/lib/NGpage.S
@@ -45,6 +45,7 @@
retl
nop
+ .globl NGclear_page, NGclear_user_page
NGclear_page: /* %o0=dest */
NGclear_user_page: /* %o0=dest, %o1=vaddr */
mov 8, %g1
--- linux-source-2.6.22-2.6.22.orig/arch/parisc/kernel/syscall_table.S
+++ linux-source-2.6.22-2.6.22/arch/parisc/kernel/syscall_table.S
@@ -222,9 +222,7 @@
ENTRY_SAME(setfsgid)
/* I think this might work */
ENTRY_SAME(llseek) /* 140 */
- /* struct linux_dirent has longs, like 'unsigned long d_ino' which
- * almost definitely should be 'ino_t d_ino' but it's too late now */
- ENTRY_DIFF(getdents)
+ ENTRY_COMP(getdents)
/* it is POSSIBLE that select will be OK because even though fd_set
* contains longs, the macros and sizes are clever. */
ENTRY_COMP(select)
--- linux-source-2.6.22-2.6.22.orig/arch/parisc/kernel/sys_parisc32.c
+++ linux-source-2.6.22-2.6.22/arch/parisc/kernel/sys_parisc32.c
@@ -285,147 +285,6 @@
return err;
}
-struct linux32_dirent {
- u32 d_ino;
- compat_off_t d_off;
- u16 d_reclen;
- char d_name[1];
-};
-
-struct old_linux32_dirent {
- u32 d_ino;
- u32 d_offset;
- u16 d_namlen;
- char d_name[1];
-};
-
-struct getdents32_callback {
- struct linux32_dirent __user * current_dir;
- struct linux32_dirent __user * previous;
- int count;
- int error;
-};
-
-struct readdir32_callback {
- struct old_linux32_dirent __user * dirent;
- int count;
-};
-
-#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
-static int filldir32 (void *__buf, const char *name, int namlen,
- loff_t offset, u64 ino, unsigned int d_type)
-{
- struct linux32_dirent __user * dirent;
- struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
- int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, 4);
- u32 d_ino;
-
- buf->error = -EINVAL; /* only used if we fail.. */
- if (reclen > buf->count)
- return -EINVAL;
- d_ino = ino;
- if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
- return -EOVERFLOW;
- dirent = buf->previous;
- if (dirent)
- put_user(offset, &dirent->d_off);
- dirent = buf->current_dir;
- buf->previous = dirent;
- put_user(d_ino, &dirent->d_ino);
- put_user(reclen, &dirent->d_reclen);
- copy_to_user(dirent->d_name, name, namlen);
- put_user(0, dirent->d_name + namlen);
- dirent = ((void __user *)dirent) + reclen;
- buf->current_dir = dirent;
- buf->count -= reclen;
- return 0;
-}
-
-asmlinkage long
-sys32_getdents (unsigned int fd, void __user * dirent, unsigned int count)
-{
- struct file * file;
- struct linux32_dirent __user * lastdirent;
- struct getdents32_callback buf;
- int error;
-
- error = -EFAULT;
- if (!access_ok(VERIFY_WRITE, dirent, count))
- goto out;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
-
- buf.current_dir = (struct linux32_dirent __user *) dirent;
- buf.previous = NULL;
- buf.count = count;
- buf.error = 0;
-
- error = vfs_readdir(file, filldir32, &buf);
- if (error < 0)
- goto out_putf;
- error = buf.error;
- lastdirent = buf.previous;
- if (lastdirent) {
- if (put_user(file->f_pos, &lastdirent->d_off))
- error = -EFAULT;
- else
- error = count - buf.count;
- }
-
-out_putf:
- fput(file);
-out:
- return error;
-}
-
-static int fillonedir32(void * __buf, const char * name, int namlen,
- loff_t offset, u64 ino, unsigned int d_type)
-{
- struct readdir32_callback * buf = (struct readdir32_callback *) __buf;
- struct old_linux32_dirent __user * dirent;
- u32 d_ino;
-
- if (buf->count)
- return -EINVAL;
- d_ino = ino;
- if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
- return -EOVERFLOW;
- buf->count++;
- dirent = buf->dirent;
- put_user(d_ino, &dirent->d_ino);
- put_user(offset, &dirent->d_offset);
- put_user(namlen, &dirent->d_namlen);
- copy_to_user(dirent->d_name, name, namlen);
- put_user(0, dirent->d_name + namlen);
- return 0;
-}
-
-asmlinkage long
-sys32_readdir (unsigned int fd, void __user * dirent, unsigned int count)
-{
- int error;
- struct file * file;
- struct readdir32_callback buf;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
-
- buf.count = 0;
- buf.dirent = dirent;
-
- error = vfs_readdir(file, fillonedir32, &buf);
- if (error >= 0)
- error = buf.count;
- fput(file);
-out:
- return error;
-}
-
/*** copied from mips64 ***/
/*
* Ooo, nasty. We need here to frob 32-bit unsigned longs to
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/mm/slice.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/mm/slice.c
@@ -405,6 +405,8 @@
if (len > mm->task_size)
return -ENOMEM;
+ if (len & ((1ul << pshift) - 1))
+ return -EINVAL;
if (fixed && (addr & ((1ul << pshift) - 1)))
return -EINVAL;
if (fixed && addr > (mm->task_size - len))
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/kernel/misc_32.S
+++ linux-source-2.6.22-2.6.22/arch/powerpc/kernel/misc_32.S
@@ -728,6 +728,27 @@
or r4,r4,r7 # LSW |= t2
blr
+/*
+ * __ucmpdi2: 64-bit comparison
+ *
+ * R3/R4 has 64 bit value A
+ * R5/R6 has 64 bit value B
+ * result in R3: 0 for A < B
+ * 1 for A == B
+ * 2 for A > B
+ */
+_GLOBAL(__ucmpdi2)
+ cmplw r7,r3,r5 # compare high words
+ li r3,0
+ blt r7,2f # a < b ... return 0
+ bgt r7,1f # a > b ... return 2
+ cmplw r6,r4,r6 # compare low words
+ blt r6,2f # a < b ... return 0
+ li r3,1
+ ble r6,2f # a = b ... return 1
+1: li r3,2
+2: blr
+
_GLOBAL(abs)
srawi r4,r3,31
xor r3,r3,r4
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/kernel/process.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/kernel/process.c
@@ -83,7 +83,7 @@
*/
BUG_ON(tsk != current);
#endif
- giveup_fpu(current);
+ giveup_fpu(tsk);
}
preempt_enable();
}
@@ -143,7 +143,7 @@
#ifdef CONFIG_SMP
BUG_ON(tsk != current);
#endif
- giveup_altivec(current);
+ giveup_altivec(tsk);
}
preempt_enable();
}
@@ -182,7 +182,7 @@
#ifdef CONFIG_SMP
BUG_ON(tsk != current);
#endif
- giveup_spe(current);
+ giveup_spe(tsk);
}
preempt_enable();
}
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/kernel/prom_parse.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/kernel/prom_parse.c
@@ -24,7 +24,7 @@
/* Max address size we deal with */
#define OF_MAX_ADDR_CELLS 4
#define OF_CHECK_COUNTS(na, ns) ((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \
- (ns) >= 0)
+ (ns) > 0)
static struct of_bus *of_match_bus(struct device_node *np);
static int __of_address_to_resource(struct device_node *dev,
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/kernel/ppc_ksyms.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/kernel/ppc_ksyms.c
@@ -158,9 +158,11 @@
long long __ashrdi3(long long, int);
long long __ashldi3(long long, int);
long long __lshrdi3(long long, int);
+int __ucmpdi2(uint64_t, uint64_t);
EXPORT_SYMBOL(__ashrdi3);
EXPORT_SYMBOL(__ashldi3);
EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__ucmpdi2);
#endif
EXPORT_SYMBOL(memcpy);
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/platforms/chrp/setup.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/platforms/chrp/setup.c
@@ -116,7 +116,7 @@
seq_printf(m, "machine\t\t: CHRP %s\n", model);
/* longtrail (goldengate) stuff */
- if (!strncmp(model, "IBM,LongTrail", 13)) {
+ if (model && !strncmp(model, "IBM,LongTrail", 13)) {
/* VLSI VAS96011/12 `Golden Gate 2' */
/* Memory banks */
sdramen = (in_le32(gg2_pci_config_base + GG2_PCI_DRAM_CTRL)
@@ -204,15 +204,20 @@
static void __init sio_init(void)
{
struct device_node *root;
+ const char *model;
- if ((root = of_find_node_by_path("/")) &&
- !strncmp(of_get_property(root, "model", NULL),
- "IBM,LongTrail", 13)) {
+ root = of_find_node_by_path("/");
+ if (!root)
+ return;
+
+ model = of_get_property(root, "model", NULL);
+ if (model && !strncmp(model, "IBM,LongTrail", 13)) {
/* logical device 0 (KBC/Keyboard) */
sio_fixup_irq("keyboard", 0, 1, 2);
/* select logical device 1 (KBC/Mouse) */
sio_fixup_irq("mouse", 1, 12, 2);
}
+
of_node_put(root);
}
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -147,7 +147,6 @@
{ .type = "soc", },
{ .compatible = "soc", },
{ .type = "qe", },
- { .type = "mdio", },
{},
};
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -75,7 +75,6 @@
{ .type = "soc", },
{ .compatible = "soc", },
{ .type = "qe", },
- { .type = "mdio", },
{},
};
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -111,7 +111,6 @@
{ .type = "soc", },
{ .compatible = "soc", },
{ .type = "qe", },
- { .type = "mdio", },
{},
};
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -118,7 +118,6 @@
{ .type = "soc", },
{ .compatible = "soc", },
{ .type = "qe", },
- { .type = "mdio", },
{},
};
--- linux-source-2.6.22-2.6.22.orig/arch/powerpc/platforms/powermac/setup.c
+++ linux-source-2.6.22-2.6.22/arch/powerpc/platforms/powermac/setup.c
@@ -643,12 +643,10 @@
DMA_MODE_READ = 1;
DMA_MODE_WRITE = 2;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
+#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC)
ppc_ide_md.ide_init_hwif = pmac_ide_init_hwif_ports;
ppc_ide_md.default_io_base = pmac_ide_get_base;
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
-#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
+#endif
#endif /* CONFIG_PPC32 */
--- linux-source-2.6.22-2.6.22.orig/arch/um/drivers/ubd_kern.c
+++ linux-source-2.6.22-2.6.22/arch/um/drivers/ubd_kern.c
@@ -612,6 +612,8 @@
ubd_dev->fd = fd;
if(ubd_dev->cow.file != NULL){
+ blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
+
err = -ENOMEM;
ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len);
if(ubd_dev->cow.bitmap == NULL){
--- linux-source-2.6.22-2.6.22.orig/arch/um/os-Linux/user_syms.c
+++ linux-source-2.6.22-2.6.22/arch/um/os-Linux/user_syms.c
@@ -5,7 +5,8 @@
* so I *must* declare good prototypes for them and then EXPORT them.
* The kernel code uses the macro defined by include/linux/string.h,
* so I undef macros; the userspace code does not include that and I
- * add an EXPORT for the glibc one.*/
+ * add an EXPORT for the glibc one.
+ */
#undef strlen
#undef strstr
@@ -61,12 +62,18 @@
EXPORT_SYMBOL_PROTO(__xstat);
EXPORT_SYMBOL_PROTO(__lxstat);
EXPORT_SYMBOL_PROTO(__lxstat64);
+EXPORT_SYMBOL_PROTO(__fxstat64);
EXPORT_SYMBOL_PROTO(lseek);
EXPORT_SYMBOL_PROTO(lseek64);
EXPORT_SYMBOL_PROTO(chown);
+EXPORT_SYMBOL_PROTO(fchown);
EXPORT_SYMBOL_PROTO(truncate);
+EXPORT_SYMBOL_PROTO(ftruncate64);
EXPORT_SYMBOL_PROTO(utime);
+EXPORT_SYMBOL_PROTO(utimes);
+EXPORT_SYMBOL_PROTO(futimes);
EXPORT_SYMBOL_PROTO(chmod);
+EXPORT_SYMBOL_PROTO(fchmod);
EXPORT_SYMBOL_PROTO(rename);
EXPORT_SYMBOL_PROTO(__xmknod);
@@ -102,14 +109,3 @@
extern long __guard __attribute__((weak));
EXPORT_SYMBOL(__guard);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/prom/console.c
+++ linux-source-2.6.22-2.6.22/arch/sparc/prom/console.c
@@ -102,119 +102,3 @@
while(prom_nbputchar(c) == -1) ;
return;
}
-
-/* Query for input device type */
-enum prom_input_device
-prom_query_input_device(void)
-{
- unsigned long flags;
- int st_p;
- char propb[64];
- char *p;
- int propl;
-
- switch(prom_vers) {
- case PROM_V0:
- case PROM_V2:
- case PROM_SUN4:
- default:
- switch(*romvec->pv_stdin) {
- case PROMDEV_KBD: return PROMDEV_IKBD;
- case PROMDEV_TTYA: return PROMDEV_ITTYA;
- case PROMDEV_TTYB: return PROMDEV_ITTYB;
- default:
- return PROMDEV_I_UNK;
- };
- case PROM_V3:
- spin_lock_irqsave(&prom_lock, flags);
- st_p = (*romvec->pv_v2devops.v2_inst2pkg)(*romvec->pv_v2bootargs.fd_stdin);
- restore_current();
- spin_unlock_irqrestore(&prom_lock, flags);
- if(prom_node_has_property(st_p, "keyboard"))
- return PROMDEV_IKBD;
- if (prom_getproperty(st_p, "name", propb, sizeof(propb)) != -1) {
- if(strncmp(propb, "keyboard", sizeof("serial")) == 0)
- return PROMDEV_IKBD;
- }
- if (prom_getproperty(st_p, "device_type", propb, sizeof(propb)) != -1) {
- if(strncmp(propb, "serial", sizeof("serial")))
- return PROMDEV_I_UNK;
- }
- propl = prom_getproperty(prom_root_node, "stdin-path", propb, sizeof(propb));
- if(propl > 2) {
- p = propb;
- while(*p) p++; p -= 2;
- if(p[0] == ':') {
- if(p[1] == 'a')
- return PROMDEV_ITTYA;
- else if(p[1] == 'b')
- return PROMDEV_ITTYB;
- }
- }
- return PROMDEV_I_UNK;
- }
-}
-
-/* Query for output device type */
-
-enum prom_output_device
-prom_query_output_device(void)
-{
- unsigned long flags;
- int st_p;
- char propb[64];
- char *p;
- int propl;
-
- switch(prom_vers) {
- case PROM_V0:
- case PROM_SUN4:
- switch(*romvec->pv_stdin) {
- case PROMDEV_SCREEN: return PROMDEV_OSCREEN;
- case PROMDEV_TTYA: return PROMDEV_OTTYA;
- case PROMDEV_TTYB: return PROMDEV_OTTYB;
- };
- break;
- case PROM_V2:
- case PROM_V3:
- spin_lock_irqsave(&prom_lock, flags);
- st_p = (*romvec->pv_v2devops.v2_inst2pkg)(*romvec->pv_v2bootargs.fd_stdout);
- restore_current();
- spin_unlock_irqrestore(&prom_lock, flags);
- propl = prom_getproperty(st_p, "device_type", propb, sizeof(propb));
- if (propl == sizeof("display") &&
- strncmp("display", propb, sizeof("display")) == 0)
- {
- return PROMDEV_OSCREEN;
- }
- if(prom_vers == PROM_V3) {
- if(propl >= 0 &&
- strncmp("serial", propb, sizeof("serial")) != 0)
- return PROMDEV_O_UNK;
- propl = prom_getproperty(prom_root_node, "stdout-path",
- propb, sizeof(propb));
- if(propl == CON_SIZE_JMC &&
- strncmp(propb, con_name_jmc, CON_SIZE_JMC) == 0)
- return PROMDEV_OTTYA;
- if(propl > 2) {
- p = propb;
- while(*p) p++; p-= 2;
- if(p[0]==':') {
- if(p[1] == 'a')
- return PROMDEV_OTTYA;
- else if(p[1] == 'b')
- return PROMDEV_OTTYB;
- }
- }
- } else {
- switch(*romvec->pv_stdin) {
- case PROMDEV_TTYA: return PROMDEV_OTTYA;
- case PROMDEV_TTYB: return PROMDEV_OTTYB;
- };
- }
- break;
- default:
- ;
- };
- return PROMDEV_O_UNK;
-}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/prom/misc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc/prom/misc.c
@@ -58,7 +58,7 @@
extern void install_linux_ticker(void);
unsigned long flags;
- if(!serial_console && prom_palette)
+ if (prom_palette)
prom_palette (1);
spin_lock_irqsave(&prom_lock, flags);
install_obp_ticker();
@@ -69,7 +69,7 @@
#ifdef CONFIG_SUN_AUXIO
set_auxio(AUXIO_LED, 0);
#endif
- if(!serial_console && prom_palette)
+ if (prom_palette)
prom_palette (0);
}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/kernel/sys_sparc.c
+++ linux-source-2.6.22-2.6.22/arch/sparc/kernel/sys_sparc.c
@@ -224,8 +224,7 @@
{
if (ARCH_SUN4C_SUN4 &&
(len > 0x20000000 ||
- ((flags & MAP_FIXED) &&
- addr < 0xe0000000 && addr + len > 0x20000000)))
+ (addr < 0xe0000000 && addr + len > 0x20000000)))
return -EINVAL;
/* See asm-sparc/uaccess.h */
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/kernel/prom.c
+++ linux-source-2.6.22-2.6.22/arch/sparc/kernel/prom.c
@@ -271,6 +271,21 @@
}
EXPORT_SYMBOL(of_set_property);
+int of_find_in_proplist(const char *list, const char *match, int len)
+{
+ while (len > 0) {
+ int l;
+
+ if (!strcmp(list, match))
+ return 1;
+ l = strlen(list) + 1;
+ list += l;
+ len -= l;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(of_find_in_proplist);
+
static unsigned int prom_early_allocated;
static void * __init prom_early_alloc(unsigned long size)
@@ -566,6 +581,135 @@
return dp;
}
+struct device_node *of_console_device;
+EXPORT_SYMBOL(of_console_device);
+
+char *of_console_path;
+EXPORT_SYMBOL(of_console_path);
+
+char *of_console_options;
+EXPORT_SYMBOL(of_console_options);
+
+extern void restore_current(void);
+
+static void __init of_console_init(void)
+{
+ char *msg = "OF stdout device is: %s\n";
+ struct device_node *dp;
+ unsigned long flags;
+ const char *type;
+ phandle node;
+ int skip, fd;
+
+ of_console_path = prom_early_alloc(256);
+
+ switch (prom_vers) {
+ case PROM_V0:
+ case PROM_SUN4:
+ skip = 0;
+ switch (*romvec->pv_stdout) {
+ case PROMDEV_SCREEN:
+ type = "display";
+ break;
+
+ case PROMDEV_TTYB:
+ skip = 1;
+ /* FALLTHRU */
+
+ case PROMDEV_TTYA:
+ type = "serial";
+ break;
+
+ default:
+ prom_printf("Invalid PROM_V0 stdout value %u\n",
+ *romvec->pv_stdout);
+ prom_halt();
+ }
+
+ for_each_node_by_type(dp, type) {
+ if (!skip--)
+ break;
+ }
+ if (!dp) {
+ prom_printf("Cannot find PROM_V0 console node.\n");
+ prom_halt();
+ }
+ of_console_device = dp;
+
+ strcpy(of_console_path, dp->full_name);
+ if (!strcmp(type, "serial")) {
+ strcat(of_console_path,
+ (skip ? ":b" : ":a"));
+ }
+ break;
+
+ default:
+ case PROM_V2:
+ case PROM_V3:
+ fd = *romvec->pv_v2bootargs.fd_stdout;
+
+ spin_lock_irqsave(&prom_lock, flags);
+ node = (*romvec->pv_v2devops.v2_inst2pkg)(fd);
+ restore_current();
+ spin_unlock_irqrestore(&prom_lock, flags);
+
+ if (!node) {
+ prom_printf("Cannot resolve stdout node from "
+ "instance %08x.\n", fd);
+ prom_halt();
+ }
+ dp = of_find_node_by_phandle(node);
+ type = of_get_property(dp, "device_type", NULL);
+
+ if (!type) {
+ prom_printf("Console stdout lacks "
+ "device_type property.\n");
+ prom_halt();
+ }
+
+ if (strcmp(type, "display") && strcmp(type, "serial")) {
+ prom_printf("Console device_type is neither display "
+ "nor serial.\n");
+ prom_halt();
+ }
+
+ of_console_device = dp;
+
+ if (prom_vers == PROM_V2) {
+ strcpy(of_console_path, dp->full_name);
+ switch (*romvec->pv_stdout) {
+ case PROMDEV_TTYA:
+ strcat(of_console_path, ":a");
+ break;
+ case PROMDEV_TTYB:
+ strcat(of_console_path, ":b");
+ break;
+ }
+ } else {
+ const char *path;
+
+ dp = of_find_node_by_path("/");
+ path = of_get_property(dp, "stdout-path", NULL);
+ if (!path) {
+ prom_printf("No stdout-path in root node.\n");
+ prom_halt();
+ }
+ strcpy(of_console_path, path);
+ }
+ break;
+ }
+
+ of_console_options = strrchr(of_console_path, ':');
+ if (of_console_options) {
+ of_console_options++;
+ if (*of_console_options == '\0')
+ of_console_options = NULL;
+ }
+
+ prom_printf(msg, of_console_path);
+ printk(msg, of_console_path);
+}
+
void __init prom_build_devicetree(void)
{
struct device_node **nextp;
@@ -578,6 +722,8 @@
allnodes->child = build_tree(allnodes,
prom_getchild(allnodes->node),
&nextp);
+ of_console_init();
+
printk("PROM: Built device tree with %u bytes of memory.\n",
prom_early_allocated);
}
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/kernel/process.c
+++ linux-source-2.6.22-2.6.22/arch/sparc/kernel/process.c
@@ -39,6 +39,7 @@
#include
#include
#include
+#include
#include
/*
@@ -150,7 +151,7 @@
local_irq_enable();
mdelay(8);
local_irq_disable();
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette (1);
prom_halt();
panic("Halt failed!");
@@ -166,7 +167,7 @@
p = strchr (reboot_command, '\n');
if (p) *p = 0;
- if (!serial_console && prom_palette)
+ if (prom_palette)
prom_palette (1);
if (cmd)
prom_reboot(cmd);
@@ -179,7 +180,8 @@
void machine_power_off(void)
{
#ifdef CONFIG_SUN_AUXIO
- if (auxio_power_register && (!serial_console || scons_pwroff))
+ if (auxio_power_register &&
+ (strcmp(of_console_device->type, "serial") || scons_pwroff))
*auxio_power_register |= AUXIO_POWER_OFF;
#endif
machine_halt();
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/kernel/setup.c
+++ linux-source-2.6.22-2.6.22/arch/sparc/kernel/setup.c
@@ -146,31 +146,6 @@
}
}
-static void __init process_console(char *commands)
-{
- serial_console = 0;
- commands += 8;
- /* Linux-style serial */
- if (!strncmp(commands, "ttyS", 4))
- serial_console = simple_strtoul(commands + 4, NULL, 10) + 1;
- else if (!strncmp(commands, "tty", 3)) {
- char c = *(commands + 3);
- /* Solaris-style serial */
- if (c == 'a' || c == 'b')
- serial_console = c - 'a' + 1;
- /* else Linux-style fbcon, not serial */
- }
-#if defined(CONFIG_PROM_CONSOLE)
- if (!strncmp(commands, "prom", 4)) {
- char *p;
-
- for (p = commands - 8; *p && *p != ' '; p++)
- *p = ' ';
- conswitchp = &prom_con;
- }
-#endif
-}
-
static void __init boot_flags_init(char *commands)
{
while (*commands) {
@@ -187,9 +162,7 @@
process_switch(*commands++);
continue;
}
- if (!strncmp(commands, "console=", 8)) {
- process_console(commands);
- } else if (!strncmp(commands, "mem=", 4)) {
+ if (!strncmp(commands, "mem=", 4)) {
/*
* "mem=XXX[kKmM] overrides the PROM-reported
* memory size.
@@ -341,41 +314,6 @@
smp_setup_cpu_possible_map();
}
-static int __init set_preferred_console(void)
-{
- int idev, odev;
-
- /* The user has requested a console so this is already set up. */
- if (serial_console >= 0)
- return -EBUSY;
-
- idev = prom_query_input_device();
- odev = prom_query_output_device();
- if (idev == PROMDEV_IKBD && odev == PROMDEV_OSCREEN) {
- serial_console = 0;
- } else if (idev == PROMDEV_ITTYA && odev == PROMDEV_OTTYA) {
- serial_console = 1;
- } else if (idev == PROMDEV_ITTYB && odev == PROMDEV_OTTYB) {
- serial_console = 2;
- } else if (idev == PROMDEV_I_UNK && odev == PROMDEV_OTTYA) {
- prom_printf("MrCoffee ttya\n");
- serial_console = 1;
- } else if (idev == PROMDEV_I_UNK && odev == PROMDEV_OSCREEN) {
- serial_console = 0;
- prom_printf("MrCoffee keyboard\n");
- } else {
- prom_printf("Confusing console (idev %d, odev %d)\n",
- idev, odev);
- serial_console = 1;
- }
-
- if (serial_console)
- return add_preferred_console("ttyS", serial_console - 1, NULL);
-
- return -ENODEV;
-}
-console_initcall(set_preferred_console);
-
extern char *sparc_cpu_type;
extern char *sparc_fpu_type;
@@ -461,7 +399,6 @@
prom_cmdline();
}
-int serial_console = -1;
int stop_a_enabled = 1;
static int __init topology_init(void)
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/kernel/entry.S
+++ linux-source-2.6.22-2.6.22/arch/sparc/kernel/entry.S
@@ -1749,8 +1749,8 @@
__ndelay:
save %sp, -STACKFRAME_SZ, %sp
mov %i0, %o0
- call .umul
- mov 0x1ad, %o1 ! 2**32 / (1 000 000 000 / HZ)
+ call .umul ! round multiplier up so large ns ok
+ mov 0x1ae, %o1 ! 2**32 / (1 000 000 000 / HZ)
call .umul
mov %i1, %o1 ! udelay_val
ba delay_continue
@@ -1760,11 +1760,17 @@
__udelay:
save %sp, -STACKFRAME_SZ, %sp
mov %i0, %o0
- sethi %hi(0x10c6), %o1
+ sethi %hi(0x10c7), %o1 ! round multiplier up so large us ok
call .umul
- or %o1, %lo(0x10c6), %o1 ! 2**32 / 1 000 000
+ or %o1, %lo(0x10c7), %o1 ! 2**32 / 1 000 000
call .umul
mov %i1, %o1 ! udelay_val
+ sethi %hi(0x028f4b62), %l0 ! Add in rounding constant * 2**32,
+ or %g0, %lo(0x028f4b62), %l0
+ addcc %o0, %l0, %o0 ! 2**32 * 0.009 999
+ bcs,a 3f
+ add %o1, 0x01, %o1
+3:
call .umul
mov HZ, %o0 ! >>32 earlier for wider range
--- linux-source-2.6.22-2.6.22.orig/arch/sparc/lib/memset.S
+++ linux-source-2.6.22-2.6.22/arch/sparc/lib/memset.S
@@ -162,7 +162,7 @@
8:
add %o0, 1, %o0
subcc %o1, 1, %o1
- bne,a 8b
+ bne 8b
EX(stb %g3, [%o0 - 1], add %o1, 1)
0:
retl
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/Makefile
+++ linux-source-2.6.22-2.6.22/arch/x86_64/Makefile
@@ -57,8 +57,8 @@
cflags-y += -maccumulate-outgoing-args
# do binutils support CFI?
-cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
# is .cfi_signal_frame supported too?
cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/Kconfig.debug
+++ linux-source-2.6.22-2.6.22/arch/x86_64/Kconfig.debug
@@ -6,6 +6,12 @@
source "lib/Kconfig.debug"
+config WRAPPER_PRINT
+ bool "Boot wrapper print" if EMBEDDED
+ default y
+ help
+ Enable informational output from the bootwrapper (bzImage and zImage).
+
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
depends on DEBUG_KERNEL
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/kernel/head.S
+++ linux-source-2.6.22-2.6.22/arch/x86_64/kernel/head.S
@@ -326,8 +326,7 @@
/* 40MB kernel mapping. The kernel code cannot be bigger than that.
When you change this change KERNEL_TEXT_SIZE in page.h too. */
/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
- KERNEL_TEXT_SIZE/PMD_SIZE)
+ PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
/* Module mapping starts here */
.fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/kernel/ptrace.c
+++ linux-source-2.6.22-2.6.22/arch/x86_64/kernel/ptrace.c
@@ -223,10 +223,6 @@
{
unsigned long tmp;
- /* Some code in the 64bit emulation may not be 64bit clean.
- Don't take any chances. */
- if (test_tsk_thread_flag(child, TIF_IA32))
- value &= 0xffffffff;
switch (regno) {
case offsetof(struct user_regs_struct,fs):
if (value && (value & 3) != 3)
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/kernel/Makefile
+++ linux-source-2.6.22-2.6.22/arch/x86_64/kernel/Makefile
@@ -32,7 +32,6 @@
obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
-obj-$(CONFIG_SERIAL_8250) += legacy_serial.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
obj-$(CONFIG_X86_VSMP) += vsmp.o
@@ -50,7 +49,6 @@
therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
bootflag-y += ../../i386/kernel/bootflag.o
-legacy_serial-y += ../../i386/kernel/legacy_serial.o
cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
topology-y += ../../i386/kernel/topology.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/kernel/entry.S
+++ linux-source-2.6.22-2.6.22/arch/x86_64/kernel/entry.S
@@ -775,7 +775,7 @@
swapgs
paranoid_restore\trace:
RESTORE_ALL 8
- iretq
+ jmp iret_label
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/ia32/ia32entry.S
+++ linux-source-2.6.22-2.6.22/arch/x86_64/ia32/ia32entry.S
@@ -38,6 +38,18 @@
movq %rax,R8(%rsp)
.endm
+ .macro LOAD_ARGS32 offset
+ movl \offset(%rsp),%r11d
+ movl \offset+8(%rsp),%r10d
+ movl \offset+16(%rsp),%r9d
+ movl \offset+24(%rsp),%r8d
+ movl \offset+40(%rsp),%ecx
+ movl \offset+48(%rsp),%edx
+ movl \offset+56(%rsp),%esi
+ movl \offset+64(%rsp),%edi
+ movl \offset+72(%rsp),%eax
+ .endm
+
.macro CFI_STARTPROC32 simple
CFI_STARTPROC \simple
CFI_UNDEFINED r8
@@ -152,7 +164,7 @@
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
movl %ebp, %ebp
/* no need to do an access_ok check here because rbp has been
@@ -255,7 +267,7 @@
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
movl RSP-ARGOFFSET(%rsp), %r8d
/* no need to do an access_ok check here because r8 has been
@@ -333,7 +345,7 @@
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
jmp ia32_do_syscall
END(ia32_syscall)
--- linux-source-2.6.22-2.6.22.orig/arch/x86_64/boot/compressed/misc.c
+++ linux-source-2.6.22-2.6.22/arch/x86_64/boot/compressed/misc.c
@@ -184,8 +184,6 @@
static void *memset(void *s, int c, unsigned n);
static void *memcpy(void *dest, const void *src, unsigned n);
-static void putstr(const char *);
-
static long free_mem_ptr;
static long free_mem_end_ptr;
@@ -228,7 +226,8 @@
{
free_mem_ptr = (long) *ptr;
}
-
+
+#ifdef CONFIG_WRAPPER_PRINT
static void scroll(void)
{
int i;
@@ -274,6 +273,9 @@
outb_p(15, vidport);
outb_p(0xff & (pos >> 1), vidport+1);
}
+#else
+#define putstr(__x) do{}while(0)
+#endif /* CONFIG_WRAPPER_PRINT */
static void* memset(void* s, int c, unsigned n)
{
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/events/evgpeblk.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/events/evgpeblk.c
@@ -586,6 +586,10 @@
flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
if (gpe_xrupt->previous) {
gpe_xrupt->previous->next = gpe_xrupt->next;
+ } else {
+ /* No previous, update list head */
+
+ acpi_gbl_gpe_xrupt_list_head = gpe_xrupt->next;
}
if (gpe_xrupt->next) {
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/processor_idle.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/processor_idle.c
@@ -324,6 +324,23 @@
#endif
+/*
+ * Suspend / resume control
+ */
+static int acpi_idle_suspend;
+
+int acpi_processor_suspend(struct acpi_device * device, pm_message_t state)
+{
+ acpi_idle_suspend = 1;
+ return 0;
+}
+
+int acpi_processor_resume(struct acpi_device * device)
+{
+ acpi_idle_suspend = 0;
+ return 0;
+}
+
static void acpi_processor_idle(void)
{
struct acpi_processor *pr = NULL;
@@ -354,7 +371,7 @@
}
cx = pr->power.state;
- if (!cx) {
+ if (!cx || acpi_idle_suspend) {
if (pm_idle_save)
pm_idle_save();
else
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/osl.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/osl.c
@@ -275,6 +275,67 @@
return AE_OK;
}
+#ifdef CONFIG_ACPI_CUSTOM_DSDT_INITRD
+struct acpi_table_header * acpi_find_dsdt_initrd(void)
+{
+ struct file *firmware_file;
+ mm_segment_t oldfs;
+ unsigned long len, len2;
+ struct acpi_table_header *dsdt_buffer, *ret = NULL;
+ struct kstat stat;
+ /* maybe this could be an argument on the cmd line, but let's keep it simple for now */
+ char *ramfs_dsdt_name = "/DSDT.aml";
+
+ printk(KERN_INFO PREFIX "Looking for DSDT in initramfs... ");
+
+ /*
+ * Never do this at home, only the user-space is allowed to open a file.
+ * The clean way would be to use the firmware loader. But this code must be run
+ * before there is any userspace available. So we need a static/init firmware
+ * infrastructure, which doesn't exist yet...
+ */
+ if (vfs_stat(ramfs_dsdt_name, &stat) < 0) {
+ printk("error, file %s not found.\n", ramfs_dsdt_name);
+ return ret;
+ }
+
+ len = stat.size;
+ /* check especially against empty files */
+ if (len <= 4) {
+ printk("error file is too small, only %lu bytes.\n", len);
+ return ret;
+ }
+
+ firmware_file = filp_open(ramfs_dsdt_name, O_RDONLY, 0);
+ if (IS_ERR(firmware_file)) {
+ printk("error, could not open file %s.\n", ramfs_dsdt_name);
+ return ret;
+ }
+
+ dsdt_buffer = ACPI_ALLOCATE(len);
+ if (!dsdt_buffer) {
+ printk("error when allocating %lu bytes of memory.\n", len);
+ goto err;
+ }
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ len2 = vfs_read(firmware_file, (char __user *)dsdt_buffer, len, &firmware_file->f_pos);
+ set_fs(oldfs);
+ if (len2 < len) {
+ printk("error trying to read %lu bytes from %s.\n", len, ramfs_dsdt_name);
+ ACPI_FREE(dsdt_buffer);
+ goto err;
+ }
+
+ printk("successfully read %lu bytes from %s.\n", len, ramfs_dsdt_name);
+ ret = dsdt_buffer;
+err:
+ filp_close(firmware_file, NULL);
+ return ret;
+}
+#endif
+
acpi_status
acpi_os_table_override(struct acpi_table_header * existing_table,
struct acpi_table_header ** new_table)
@@ -282,13 +343,18 @@
if (!existing_table || !new_table)
return AE_BAD_PARAMETER;
+ *new_table = NULL;
+
#ifdef CONFIG_ACPI_CUSTOM_DSDT
if (strncmp(existing_table->signature, "DSDT", 4) == 0)
*new_table = (struct acpi_table_header *)AmlCode;
- else
- *new_table = NULL;
-#else
- *new_table = NULL;
+#endif
+#ifdef CONFIG_ACPI_CUSTOM_DSDT_INITRD
+ if (strncmp(existing_table->signature, "DSDT", 4) == 0) {
+ struct acpi_table_header* initrd_table = acpi_find_dsdt_initrd();
+ if (initrd_table)
+ *new_table = initrd_table;
+ }
#endif
return AE_OK;
}
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/ec.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/ec.c
@@ -801,8 +801,8 @@
ec->handle = handle;
- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "GPE=0x%02lx, ports=0x%2lx, 0x%2lx",
- ec->gpe, ec->command_addr, ec->data_addr));
+ printk(KERN_INFO PREFIX "GPE=0x%02lx, ports=0x%2lx, 0x%2lx\n",
+ ec->gpe, ec->command_addr, ec->data_addr);
return AE_CTRL_TERMINATE;
}
@@ -819,19 +819,22 @@
/*
* Generate a boot ec context
*/
-
status = acpi_get_table(ACPI_SIG_ECDT, 1,
(struct acpi_table_header **)&ecdt_ptr);
- if (ACPI_FAILURE(status))
- goto error;
-
- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found ECDT"));
-
- boot_ec->command_addr = ecdt_ptr->control.address;
- boot_ec->data_addr = ecdt_ptr->data.address;
- boot_ec->gpe = ecdt_ptr->gpe;
- boot_ec->handle = ACPI_ROOT_OBJECT;
-
+ if (ACPI_SUCCESS(status)) {
+ printk(KERN_INFO PREFIX "Found ECDT\n");
+ boot_ec->command_addr = ecdt_ptr->control.address;
+ boot_ec->data_addr = ecdt_ptr->data.address;
+ boot_ec->gpe = ecdt_ptr->gpe;
+ boot_ec->handle = ACPI_ROOT_OBJECT;
+ } else {
+ printk(KERN_DEBUG PREFIX "Look up EC in DSDT\n");
+ status = acpi_get_devices(ACPI_EC_HID, ec_parse_device,
+ boot_ec, NULL);
+ if (ACPI_FAILURE(status))
+ goto error;
+ }
+
ret = ec_install_handlers(boot_ec);
if (!ret) {
first_ec = boot_ec;
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/processor_core.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/processor_core.c
@@ -93,6 +93,8 @@
.add = acpi_processor_add,
.remove = acpi_processor_remove,
.start = acpi_processor_start,
+ .suspend = acpi_processor_suspend,
+ .resume = acpi_processor_resume,
},
};
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/scan.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/scan.c
@@ -9,6 +9,8 @@
#include
#include /* for acpi_ex_eisa_id_to_string() */
+#include
+#include
#define _COMPONENT ACPI_BUS_COMPONENT
ACPI_MODULE_NAME("scan");
@@ -1380,6 +1382,19 @@
return result;
}
+int acpi_method_notify_enable(char *pathname)
+{
+ struct acpi_namespace_node *method;
+ acpi_ns_get_node (ACPI_NS_ALL, pathname, 0, &method);
+ if (!method)
+ return -ENODEV;
+
+ acpi_ns_get_attached_object(method)->method.method_flags |= AML_METHOD_NOTIFY;
+ return 0;
+}
+
+EXPORT_SYMBOL (acpi_method_notify_enable);
+
static int __init acpi_scan_init(void)
{
int result;
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/toshiba_acpi.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/toshiba_acpi.c
@@ -27,13 +27,28 @@
* engineering the Windows drivers
* Yasushi Nagato - changes for linux kernel 2.4 -> 2.5
* Rob Miller - TV out and hotkeys help
+ * Daniel Silverstone - Punting of hotkeys via acpi using a thread
*
+ * PLEASE NOTE
+ *
+ * This is an experimental version of toshiba_acpi which includes emulation
+ * of the original toshiba driver's /proc/toshiba and /dev/toshiba,
+ * allowing Toshiba userspace utilities to work. The relevant code was
+ * based on toshiba.c (copyright 1996-2001 Jonathan A. Buzzard) and
+ * incorporated into this driver with help from Gintautas Miliauskas,
+ * Charles Schwieters, and Christoph Burger-Scheidlin.
+ *
+ * Caveats:
+ * * hotkey status in /proc/toshiba is not implemented
+ * * to make accesses to /dev/toshiba load this driver instead of
+ * the original driver, you will have to modify your module
+ * auto-loading configuration
*
* TODO
*
*/
-#define TOSHIBA_ACPI_VERSION "0.18"
+#define TOSHIBA_ACPI_VERSION "0.19a-dev"
#define PROC_INTERFACE_VERSION 1
#include
@@ -41,12 +56,32 @@
#include
#include
#include
+#include
+#include
+#include
+#include
+#include
#include
-
+#include
#include
#include
+/* Some compatibility for isa legacy interface */
+#ifndef isa_readb
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+#endif
+
MODULE_AUTHOR("John Belmonte");
MODULE_DESCRIPTION("Toshiba Laptop ACPI Extras Driver");
MODULE_LICENSE("GPL");
@@ -216,6 +251,11 @@
static int force_fan;
static int last_key_event;
static int key_event_valid;
+static int hotkeys_over_acpi = 1;
+static int hotkeys_check_per_sec = 2;
+
+module_param(hotkeys_over_acpi, uint, 0400);
+module_param(hotkeys_check_per_sec, uint, 0400);
typedef struct _ProcItem {
const char *name;
@@ -443,27 +483,34 @@
u32 hci_result;
u32 value;
- if (!key_event_valid) {
- hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result);
- if (hci_result == HCI_SUCCESS) {
- key_event_valid = 1;
- last_key_event = value;
- } else if (hci_result == HCI_EMPTY) {
- /* better luck next time */
- } else if (hci_result == HCI_NOT_SUPPORTED) {
- /* This is a workaround for an unresolved issue on
- * some machines where system events sporadically
- * become disabled. */
- hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result);
- printk(MY_NOTICE "Re-enabled hotkeys\n");
- } else {
- printk(MY_ERR "Error reading hotkey status\n");
- goto end;
+ if (!hotkeys_over_acpi) {
+ if (!key_event_valid) {
+ hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result);
+ if (hci_result == HCI_SUCCESS) {
+ key_event_valid = 1;
+ last_key_event = value;
+ } else if (hci_result == HCI_EMPTY) {
+ /* better luck next time */
+ } else if (hci_result == HCI_NOT_SUPPORTED) {
+ /* This is a workaround for an
+ * unresolved issue on some machines
+ * where system events sporadically
+ * become disabled. */
+ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result);
+ printk(MY_NOTICE "Re-enabled hotkeys\n");
+ } else {
+ printk(MY_ERR "Error reading hotkey status\n");
+ goto end;
+ }
}
+ } else {
+ key_event_valid = 0;
+ last_key_event = 0;
}
p += sprintf(p, "hotkey_ready: %d\n", key_event_valid);
p += sprintf(p, "hotkey: 0x%04x\n", last_key_event);
+ p += sprintf(p, "hotkeys_via_acpi: %d\n", hotkeys_over_acpi);
end:
return p;
@@ -490,6 +537,179 @@
return p;
}
+/* /dev/toshiba and /proc/toshiba handlers {{{
+ *
+ * ISSUE: lots of magic numbers and mysterious code
+ */
+
+#define TOSH_MINOR_DEV 181
+#define OLD_PROC_TOSHIBA "toshiba"
+
+static int
+tosh_acpi_bridge(SMMRegisters* regs)
+{
+ acpi_status status;
+
+ /* assert(sizeof(SMMRegisters) == sizeof(u32)*HCI_WORDS); */
+ status = hci_raw((u32*)regs, (u32*)regs);
+ if (status == AE_OK && (regs->eax & 0xff00) == HCI_SUCCESS)
+ return 0;
+
+ return -EINVAL;
+}
+
+static int
+tosh_ioctl(struct inode* ip, struct file* fp, unsigned int cmd,
+ unsigned long arg)
+{
+ SMMRegisters regs;
+ unsigned short ax,bx;
+ int err;
+
+ if ((!arg) || (cmd != TOSH_SMM))
+ return -EINVAL;
+
+ if (copy_from_user(®s, (SMMRegisters*)arg, sizeof(SMMRegisters)))
+ return -EFAULT;
+
+ ax = regs.eax & 0xff00;
+ bx = regs.ebx & 0xffff;
+
+ /* block HCI calls to read/write memory & PCI devices */
+ if (((ax==HCI_SET) || (ax==HCI_GET)) && (bx>0x0069))
+ return -EINVAL;
+
+ err = tosh_acpi_bridge(®s);
+
+ if (copy_to_user((SMMRegisters*)arg, ®s, sizeof(SMMRegisters)))
+ return -EFAULT;
+
+ return err;
+}
+
+static int
+tosh_get_machine_id(void)
+{
+ int id;
+ unsigned short bx,cx;
+ unsigned long address;
+
+ id = (0x100*(int)isa_readb(0xffffe))+((int)isa_readb(0xffffa));
+
+ /* do we have a SCTTable machine identication number on our hands */
+ if (id==0xfc2f) {
+ bx = 0xe6f5; /* cheat */
+ /* now twiddle with our pointer a bit */
+ address = 0x000f0000+bx;
+ cx = isa_readw(address);
+ address = 0x000f0009+bx+cx;
+ cx = isa_readw(address);
+ address = 0x000f000a+cx;
+ cx = isa_readw(address);
+ /* now construct our machine identification number */
+ id = ((cx & 0xff)<<8)+((cx & 0xff00)>>8);
+ }
+
+ return id;
+}
+
+static int tosh_id;
+static int tosh_bios;
+static int tosh_date;
+static int tosh_sci;
+
+static struct file_operations tosh_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = tosh_ioctl
+};
+
+static struct miscdevice tosh_device = {
+ TOSH_MINOR_DEV,
+ "toshiba",
+ &tosh_fops
+};
+
+static void
+setup_tosh_info(void __iomem *bios)
+{
+ int major, minor;
+ int day, month, year;
+
+ tosh_id = tosh_get_machine_id();
+
+ /* get the BIOS version */
+ major = isa_readb(0xfe009)-'0';
+ minor = ((isa_readb(0xfe00b)-'0')*10)+(isa_readb(0xfe00c)-'0');
+ tosh_bios = (major*0x100)+minor;
+
+ /* get the BIOS date */
+ day = ((isa_readb(0xffff5)-'0')*10)+(isa_readb(0xffff6)-'0');
+ month = ((isa_readb(0xffff8)-'0')*10)+(isa_readb(0xffff9)-'0');
+ year = ((isa_readb(0xffffb)-'0')*10)+(isa_readb(0xffffc)-'0');
+ tosh_date = (((year-90) & 0x1f)<<10) | ((month & 0xf)<<6)
+ | ((day & 0x1f)<<1);
+}
+
+/* /proc/toshiba read handler */
+static int
+tosh_get_info(char* buffer, char** start, off_t fpos, int length)
+{
+ char* temp = buffer;
+ /* TODO: tosh_fn_status() */
+ int key = 0;
+
+ /* Format:
+ * 0) Linux driver version (this will change if format changes)
+ * 1) Machine ID
+ * 2) SCI version
+ * 3) BIOS version (major, minor)
+ * 4) BIOS date (in SCI date format)
+ * 5) Fn Key status
+ */
+
+ temp += sprintf(temp, "1.1 0x%04x %d.%d %d.%d 0x%04x 0x%02x\n",
+ tosh_id,
+ (tosh_sci & 0xff00)>>8,
+ tosh_sci & 0xff,
+ (tosh_bios & 0xff00)>>8,
+ tosh_bios & 0xff,
+ tosh_date,
+ key);
+
+ return temp-buffer;
+}
+
+static int __init
+old_driver_emulation_init(void)
+{
+ int status;
+ void __iomem *bios = ioremap(0xf0000, 0x10000);
+ if (!bios)
+ return -ENOMEM;
+
+ if ((status = misc_register(&tosh_device))) {
+ printk(MY_ERR "failed to register misc device %d (\"%s\")\n",
+ tosh_device.minor, tosh_device.name);
+ return status;
+ }
+
+ setup_tosh_info(bios);
+ create_proc_info_entry(OLD_PROC_TOSHIBA, 0, NULL, tosh_get_info);
+
+ iounmap(bios);
+
+ return 0;
+}
+
+static void __exit
+old_driver_emulation_exit(void)
+{
+ remove_proc_entry(OLD_PROC_TOSHIBA, NULL);
+ misc_deregister(&tosh_device);
+}
+
+/* }}} end of /dev/toshiba and /proc/toshiba handlers */
+
/* proc and module init
*/
@@ -538,16 +758,144 @@
.update_status = set_lcd_status,
};
+static DECLARE_MUTEX_LOCKED(thread_sem);
+static int thread_should_die;
+
+static struct acpi_device *threaded_device = 0;
+
+static void thread_deliver_button_event(u32 value)
+{
+ if (!threaded_device) return;
+ if( value == 0x0100 ) {
+ /* Ignore FN on its own */
+ } else if( value & 0x80 ) {
+ acpi_bus_generate_event( threaded_device, 1, value & ~0x80 );
+ } else {
+ acpi_bus_generate_event( threaded_device, 0, value );
+ }
+}
+
+static int toshiba_acpi_thread(void *data)
+{
+ int dropped = 0;
+ u32 hci_result, value;
+
+ daemonize("ktoshkeyd");
+ set_user_nice(current, 4);
+ thread_should_die = 0;
+
+ up(&thread_sem);
+
+ do {
+ /* In case we get stuck; we can rmmod the module here */
+ if (thread_should_die)
+ break;
+
+ hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result);
+ if (hci_result == HCI_SUCCESS) {
+ dropped++;
+ } else if (hci_result == HCI_EMPTY) {
+ /* better luck next time */
+ } else if (hci_result == HCI_NOT_SUPPORTED) {
+ /* This is a workaround for an unresolved issue on
+ * some machines where system events sporadically
+ * become disabled. */
+ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result);
+ printk(MY_NOTICE "Re-enabled hotkeys\n");
+ }
+ } while (hci_result != HCI_EMPTY);
+
+ printk(MY_INFO "Dropped %d keys from the queue on startup\n", dropped);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ / hotkeys_check_per_sec);
+
+ if (thread_should_die)
+ break;
+
+ if (try_to_freeze())
+ continue;
+
+ do {
+ hci_read1(HCI_SYSTEM_EVENT, &value, &hci_result);
+ if (hci_result == HCI_SUCCESS) {
+ thread_deliver_button_event(value);
+ } else if (hci_result == HCI_EMPTY) {
+ /* better luck next time */
+ } else if (hci_result == HCI_NOT_SUPPORTED) {
+ /* This is a workaround for an
+ * unresolved issue on some machines
+ * where system events sporadically
+ * become disabled. */
+ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result);
+ printk(MY_NOTICE "Re-enabled hotkeys\n");
+ }
+ } while (hci_result == HCI_SUCCESS);
+ }
+ set_user_nice(current, -20); /* Become nasty so we are cleaned up
+ * before the module exits making us oops */
+ up(&thread_sem);
+ return 0;
+}
+
+static int acpi_toshkeys_add (struct acpi_device *device)
+{
+ threaded_device = device;
+ strcpy(acpi_device_name(device), "Toshiba laptop hotkeys");
+ strcpy(acpi_device_class(device), "hkey");
+ return 0;
+}
+
+static int acpi_toshkeys_remove (struct acpi_device *device, int type)
+{
+ if (threaded_device == device)
+ threaded_device = 0;
+ return 0;
+}
+
+static struct acpi_driver acpi_threaded_toshkeys = {
+ .name = "Toshiba laptop hotkeys driver",
+ .class = "hkey",
+ .ids = "TOS6200,TOS6207,TOS6208",
+ .ops = {
+ .add = acpi_toshkeys_add,
+ .remove = acpi_toshkeys_remove,
+ },
+};
+
+static int __init init_threaded_acpi(void)
+{
+ acpi_status result = AE_OK;
+ result = acpi_bus_register_driver(&acpi_threaded_toshkeys);
+ if( result < 0 )
+ printk(MY_ERR "Registration of toshkeys acpi device failed\n");
+ return result;
+}
+
+static void kill_threaded_acpi(void)
+{
+ acpi_bus_unregister_driver(&acpi_threaded_toshkeys);
+}
+
static void toshiba_acpi_exit(void)
{
if (toshiba_backlight_device)
backlight_device_unregister(toshiba_backlight_device);
+ if (hotkeys_over_acpi) {
+ thread_should_die = 1;
+ down(&thread_sem);
+ kill_threaded_acpi();
+ }
+
remove_device();
if (toshiba_proc_dir)
remove_proc_entry(PROC_TOSHIBA, acpi_root_dir);
+ old_driver_emulation_exit();
+
return;
}
@@ -555,6 +903,7 @@
{
acpi_status status = AE_OK;
u32 hci_result;
+ int status2;
if (acpi_disabled)
return -ENODEV;
@@ -571,6 +920,9 @@
TOSHIBA_ACPI_VERSION);
printk(MY_INFO " HCI method: %s\n", method_hci);
+ if ((status2 = old_driver_emulation_init()))
+ return status2;
+
force_fan = 0;
key_event_valid = 0;
@@ -597,6 +949,25 @@
}
toshiba_backlight_device->props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
+ if (hotkeys_over_acpi && ACPI_SUCCESS(status)) {
+ printk(MY_INFO "Toshiba hotkeys are sent as ACPI events\n");
+ if (hotkeys_check_per_sec < 1)
+ hotkeys_check_per_sec = 1;
+ if (hotkeys_check_per_sec > 10)
+ hotkeys_check_per_sec = 10;
+ printk(MY_INFO "ktoshkeyd will check %d time%s per second\n",
+ hotkeys_check_per_sec, hotkeys_check_per_sec==1?"":"s");
+ if (init_threaded_acpi() >= 0) {
+ kernel_thread(toshiba_acpi_thread, NULL, CLONE_KERNEL);
+ down(&thread_sem);
+ } else {
+ remove_device();
+ remove_proc_entry(PROC_TOSHIBA, acpi_root_dir);
+ status = AE_ERROR;
+ printk(MY_INFO "ktoshkeyd initialisation failed. Refusing to load module\n");
+ }
+ }
+
return (ACPI_SUCCESS(status)) ? 0 : -ENODEV;
}
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/video.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/video.c
@@ -70,6 +70,10 @@
MODULE_DESCRIPTION("ACPI Video Driver");
MODULE_LICENSE("GPL");
+static int no_automatic_changes = 1;
+
+module_param(no_automatic_changes, uint, 0600);
+
static int acpi_video_bus_add(struct acpi_device *device);
static int acpi_video_bus_remove(struct acpi_device *device, int type);
@@ -381,7 +385,6 @@
arg0.integer.value = level;
status = acpi_evaluate_object(device->dev->handle, "_BCM", &args, NULL);
- printk(KERN_DEBUG "set_level status: %x\n", status);
return status;
}
@@ -1715,8 +1718,6 @@
struct acpi_video_bus *video = data;
struct acpi_device *device = NULL;
- printk("video bus notify\n");
-
if (!video)
return;
@@ -1772,7 +1773,8 @@
case ACPI_VIDEO_NOTIFY_DEC_BRIGHTNESS: /* Decrease brightness */
case ACPI_VIDEO_NOTIFY_ZERO_BRIGHTNESS: /* zero brightnesss */
case ACPI_VIDEO_NOTIFY_DISPLAY_OFF: /* display device off */
- acpi_video_switch_brightness(video_device, event);
+ if (!no_automatic_changes)
+ acpi_video_switch_brightness(video_device, event);
acpi_bus_generate_event(device, event, 0);
break;
default:
@@ -1783,6 +1785,7 @@
return;
}
+static int instance;
static int acpi_video_bus_add(struct acpi_device *device)
{
int result = 0;
@@ -1797,6 +1800,13 @@
if (!video)
return -ENOMEM;
+ /* a hack to fix the duplicate name "VID" problem on T61 */
+ if (!strcmp(device->pnp.bus_id, "VID")) {
+ if (instance)
+ device->pnp.bus_id[3] = '0' + instance;
+ instance ++;
+ }
+
video->device = device;
strcpy(acpi_device_name(device), ACPI_VIDEO_BUS_NAME);
strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS);
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/button.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/button.c
@@ -68,6 +68,7 @@
static int acpi_button_add(struct acpi_device *device);
static int acpi_button_remove(struct acpi_device *device, int type);
+static int acpi_button_resume(struct acpi_device *device);
static int acpi_button_info_open_fs(struct inode *inode, struct file *file);
static int acpi_button_state_open_fs(struct inode *inode, struct file *file);
@@ -77,6 +78,7 @@
.ids = "button_power,button_sleep,PNP0C0D,PNP0C0C,PNP0C0E",
.ops = {
.add = acpi_button_add,
+ .resume = acpi_button_resume,
.remove = acpi_button_remove,
},
};
@@ -487,6 +489,29 @@
return 0;
}
+/* this is needed to learn about changes made in suspended state */
+static int acpi_button_resume(struct acpi_device *device)
+{
+ struct acpi_button *button;
+ struct acpi_handle *handle;
+ struct input_dev *input;
+ unsigned long state;
+
+ button = device->driver_data;
+ handle = button->device->handle;
+ input = button->input;
+
+ /*
+ * On resume we send the state; if it matches to what input layer
+ * thinks then the event will not even reach userspace.
+ */
+ if (!ACPI_FAILURE(acpi_evaluate_integer(handle, "_LID",
+ NULL, &state)))
+ input_report_switch(input, SW_LID, !state);
+
+ return 0;
+}
+
static int __init acpi_button_init(void)
{
int result;
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/parser/psparse.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/parser/psparse.c
@@ -56,6 +56,7 @@
#include
#include
#include
+#include
#define _COMPONENT ACPI_PARSER
ACPI_MODULE_NAME("psparse")
@@ -452,6 +453,7 @@
struct acpi_thread_state *thread;
struct acpi_thread_state *prev_walk_list = acpi_gbl_current_walk_list;
struct acpi_walk_state *previous_walk_state;
+ struct acpi_namespace_node *method_node;
ACPI_FUNCTION_TRACE(ps_parse_aml);
@@ -495,6 +497,18 @@
status = AE_OK;
while (walk_state) {
+ method_node = walk_state->method_call_node;
+
+ if (method_node && ((acpi_ns_get_attached_object(method_node))->method.method_flags & AML_METHOD_NOTIFY)) {
+ /* This is suboptimal */
+ struct acpi_device *device = kzalloc(sizeof(struct acpi_device), GFP_ATOMIC);
+ strcpy (device->pnp.device_class, "METHOD");
+ strcpy (device->pnp.bus_id, method_node->name.ascii);
+ acpi_bus_generate_event (device, 0, 0);
+
+ kfree(device);
+ }
+
if (ACPI_SUCCESS(status)) {
/*
* The parse_loop executes AML until the method terminates
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/Kconfig
+++ linux-source-2.6.22-2.6.22/drivers/acpi/Kconfig
@@ -261,6 +261,23 @@
Enter the full path name to the file which includes the AmlCode
declaration.
+config ACPI_CUSTOM_DSDT_INITRD
+ bool "Read Custom DSDT from initramfs"
+ depends on BLK_DEV_INITRD
+ default y
+ help
+ The DSDT (Differentiated System Description Table) often needs to be
+ overridden because of broken BIOS implementations. If this feature is
+ activated you will be able to provide a customized DSDT by adding it
+ to your initramfs. For now you need to use a special mkinitrd tool.
+ For more details see or
+ . If there is no table found, it
+ will fallback to the custom DSDT in-kernel (if activated) or to the
+ DSDT from the BIOS.
+
+ Even if you do not need a new one at the moment, you may want to use a
+ better implemented DSDT later. It is safe to say Y here.
+
config ACPI_BLACKLIST_YEAR
int "Disable ACPI for systems before Jan 1st this year" if X86_32
default 0
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/dock.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/dock.c
@@ -716,6 +716,7 @@
if (ret) {
printk(KERN_ERR PREFIX "Error %d registering dock device\n", ret);
kfree(dock_station);
+ dock_station = NULL;
return ret;
}
ret = device_create_file(&dock_device.dev, &dev_attr_docked);
@@ -723,6 +724,7 @@
printk("Error %d adding sysfs file\n", ret);
platform_device_unregister(&dock_device);
kfree(dock_station);
+ dock_station = NULL;
return ret;
}
ret = device_create_file(&dock_device.dev, &dev_attr_undock);
@@ -731,6 +733,7 @@
device_remove_file(&dock_device.dev, &dev_attr_docked);
platform_device_unregister(&dock_device);
kfree(dock_station);
+ dock_station = NULL;
return ret;
}
ret = device_create_file(&dock_device.dev, &dev_attr_uid);
@@ -738,6 +741,7 @@
printk("Error %d adding sysfs file\n", ret);
platform_device_unregister(&dock_device);
kfree(dock_station);
+ dock_station = NULL;
return ret;
}
@@ -750,6 +754,7 @@
dd = alloc_dock_dependent_device(handle);
if (!dd) {
kfree(dock_station);
+ dock_station = NULL;
ret = -ENOMEM;
goto dock_add_err_unregister;
}
@@ -777,6 +782,7 @@
device_remove_file(&dock_device.dev, &dev_attr_undock);
platform_device_unregister(&dock_device);
kfree(dock_station);
+ dock_station = NULL;
return ret;
}
@@ -810,6 +816,7 @@
/* free dock station memory */
kfree(dock_station);
+ dock_station = NULL;
return 0;
}
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/tables/tbfadt.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/tables/tbfadt.c
@@ -211,14 +211,17 @@
* DESCRIPTION: Get a local copy of the FADT and convert it to a common format.
* Performs validation on some important FADT fields.
*
+ * NOTE: We create a local copy of the FADT regardless of the version.
+ *
******************************************************************************/
void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length)
{
/*
- * Check if the FADT is larger than what we know about (ACPI 2.0 version).
- * Truncate the table, but make some noise.
+ * Check if the FADT is larger than the largest table that we expect
+ * (the ACPI 2.0/3.0 version). If so, truncate the table, and issue
+ * a warning.
*/
if (length > sizeof(struct acpi_table_fadt)) {
ACPI_WARNING((AE_INFO,
@@ -227,10 +230,12 @@
sizeof(struct acpi_table_fadt)));
}
- /* Copy the entire FADT locally. Zero first for tb_convert_fadt */
+ /* Clear the entire local FADT */
ACPI_MEMSET(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt));
+ /* Copy the original FADT, up to sizeof (struct acpi_table_fadt) */
+
ACPI_MEMCPY(&acpi_gbl_FADT, table,
ACPI_MIN(length, sizeof(struct acpi_table_fadt)));
@@ -251,7 +256,7 @@
* RETURN: None
*
* DESCRIPTION: Converts all versions of the FADT to a common internal format.
- * -> Expand all 32-bit addresses to 64-bit.
+ * Expand all 32-bit addresses to 64-bit.
*
* NOTE: acpi_gbl_FADT must be of size (struct acpi_table_fadt),
* and must contain a copy of the actual FADT.
@@ -292,8 +297,23 @@
}
/*
- * Expand the 32-bit V1.0 addresses to the 64-bit "X" generic address
- * structures as necessary.
+ * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which
+ * should be zero are indeed zero. This will workaround BIOSs that
+ * inadvertently place values in these fields.
+ *
+ * The ACPI 1.0 reserved fields that will be zeroed are the bytes located at
+ * offset 45, 55, 95, and the word located at offset 109, 110.
+ */
+ if (acpi_gbl_FADT.header.revision < 3) {
+ acpi_gbl_FADT.preferred_profile = 0;
+ acpi_gbl_FADT.pstate_control = 0;
+ acpi_gbl_FADT.cst_control = 0;
+ acpi_gbl_FADT.boot_flags = 0;
+ }
+
+ /*
+ * Expand the ACPI 1.0 32-bit V1.0 addresses to the ACPI 2.0 64-bit "X"
+ * generic address structures as necessary.
*/
for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++) {
target =
@@ -349,18 +369,6 @@
acpi_gbl_FADT.xpm1a_event_block.space_id;
}
-
- /*
- * For ACPI 1.0 FADTs, ensure that reserved fields (which should be zero)
- * are indeed zero. This will workaround BIOSs that inadvertently placed
- * values in these fields.
- */
- if (acpi_gbl_FADT.header.revision < 3) {
- acpi_gbl_FADT.preferred_profile = 0;
- acpi_gbl_FADT.pstate_control = 0;
- acpi_gbl_FADT.cst_control = 0;
- acpi_gbl_FADT.boot_flags = 0;
- }
}
/******************************************************************************
--- linux-source-2.6.22-2.6.22.orig/drivers/acpi/tables/tbutils.c
+++ linux-source-2.6.22-2.6.22/drivers/acpi/tables/tbutils.c
@@ -51,6 +51,65 @@
static acpi_physical_address
acpi_tb_get_root_table_entry(u8 * table_entry,
acpi_native_uint table_entry_size);
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_tb_check_xsdt
+ *
+ * PARAMETERS: address - Pointer to the XSDT
+ *
+ * RETURN: status
+ * AE_OK - XSDT is okay
+ * AE_NO_MEMORY - can't map XSDT
+ * AE_INVALID_TABLE_LENGTH - invalid table length
+ * AE_NULL_ENTRY - XSDT has NULL entry
+ *
+ * DESCRIPTION: validate XSDT
+******************************************************************************/
+
+static acpi_status
+acpi_tb_check_xsdt(acpi_physical_address address)
+{
+ struct acpi_table_header *table;
+ u32 length;
+ u64 xsdt_entry_address;
+ u8 *table_entry;
+ u32 table_count;
+ int i;
+
+ table = acpi_os_map_memory(address, sizeof(struct acpi_table_header));
+ if (!table)
+ return AE_NO_MEMORY;
+
+ length = table->length;
+ acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));
+ if (length < sizeof(struct acpi_table_header))
+ return AE_INVALID_TABLE_LENGTH;
+
+ table = acpi_os_map_memory(address, length);
+ if (!table)
+ return AE_NO_MEMORY;
+
+ /* Calculate the number of tables described in XSDT */
+ table_count =
+ (u32) ((table->length -
+ sizeof(struct acpi_table_header)) / sizeof(u64));
+ table_entry =
+ ACPI_CAST_PTR(u8, table) + sizeof(struct acpi_table_header);
+ for (i = 0; i < table_count; i++) {
+ ACPI_MOVE_64_TO_64(&xsdt_entry_address, table_entry);
+ if (!xsdt_entry_address) {
+ /* XSDT has NULL entry */
+ break;
+ }
+ table_entry += sizeof(u64);
+ }
+ acpi_os_unmap_memory(table, length);
+
+ if (i < table_count)
+ return AE_NULL_ENTRY;
+ else
+ return AE_OK;
+}
/*******************************************************************************
*
@@ -341,6 +400,7 @@
u32 table_count;
struct acpi_table_header *table;
acpi_physical_address address;
+ acpi_physical_address rsdt_address;
u32 length;
u8 *table_entry;
acpi_status status;
@@ -369,6 +429,8 @@
*/
address = (acpi_physical_address) rsdp->xsdt_physical_address;
table_entry_size = sizeof(u64);
+ rsdt_address = (acpi_physical_address)
+ rsdp->rsdt_physical_address;
} else {
/* Root table is an RSDT (32-bit physical addresses) */
@@ -382,6 +444,15 @@
*/
acpi_os_unmap_memory(rsdp, sizeof(struct acpi_table_rsdp));
+ if (table_entry_size == sizeof(u64)) {
+ if (acpi_tb_check_xsdt(address) == AE_NULL_ENTRY) {
+ /* XSDT has NULL entry, RSDT is used */
+ address = rsdt_address;
+ table_entry_size = sizeof(u32);
+ ACPI_WARNING((AE_INFO, "BIOS XSDT has NULL entry,"
+ "using RSDT"));
+ }
+ }
/* Map the RSDT/XSDT table header to get the full table length */
table = acpi_os_map_memory(address, sizeof(struct acpi_table_header));
--- linux-source-2.6.22-2.6.22.orig/drivers/i2c/busses/Makefile
+++ linux-source-2.6.22-2.6.22/drivers/i2c/busses/Makefile
@@ -32,6 +32,7 @@
obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o
obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o
obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o
+obj-$(CONFIG_I2C_POULSBO) += i2c-sch.o
obj-$(CONFIG_I2C_PNX) += i2c-pnx.o
obj-$(CONFIG_I2C_PROSAVAGE) += i2c-prosavage.o
obj-$(CONFIG_I2C_PXA) += i2c-pxa.o
--- linux-source-2.6.22-2.6.22.orig/drivers/i2c/busses/i2c-sch.c
+++ linux-source-2.6.22-2.6.22/drivers/i2c/busses/i2c-sch.c
@@ -0,0 +1,396 @@
+/*
+ * i2c-sch.c - Part of lm_sensors, Linux kernel modules for hardware
+ * monitoring
+ *
+ * Based on piix4.c
+ * Copyright (c) 1998 - 2002 Frodo Looijaard and
+ * Philip Edelbrock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Supports:
+ * Intel POULSBO
+ *
+ * Note: we assume there can only be one device, with one SMBus interface.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+struct sd {
+ const unsigned short mfr;
+ const unsigned short dev;
+ const unsigned char fn;
+ const char *name;
+};
+
+/* POULSBO SMBus address offsets */
+#define SMBHSTCNT (0 + poulsbo_smba)
+#define SMBHSTSTS (1 + poulsbo_smba)
+#define SMBHSTADD (4 + poulsbo_smba) /* TSA */
+#define SMBHSTCMD (5 + poulsbo_smba)
+#define SMBHSTDAT0 (6 + poulsbo_smba)
+#define SMBHSTDAT1 (7 + poulsbo_smba)
+#define SMBBLKDAT (0x20 + poulsbo_smba)
+
+/* count for request_region */
+#define SMBIOSIZE 8
+
+/* PCI Address Constants */
+#define SMBBA_SCH 0x040
+
+/* Other settings */
+#define MAX_TIMEOUT 500
+#define ENABLE_INT9 0
+
+/* POULSBO constants */
+#define POULSBO_QUICK 0x00
+#define POULSBO_BYTE 0x01
+#define POULSBO_BYTE_DATA 0x02
+#define POULSBO_WORD_DATA 0x03
+#define POULSBO_BLOCK_DATA 0x05
+
+/* insmod parameters */
+
+/* If force is set to anything different from 0, we forcibly enable the
+ POULSBO. DANGEROUS! */
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Forcibly enable the POULSBO. DANGEROUS!");
+
+static int poulsbo_transaction(void);
+
+static unsigned short poulsbo_smba;
+static struct pci_driver poulsbo_driver;
+static struct i2c_adapter poulsbo_adapter;
+
+static int __devinit poulsbo_setup(struct pci_dev *POULSBO_dev,
+ const struct pci_device_id *id)
+{
+ unsigned short smbase;
+ if (POULSBO_dev->device != PCI_DEVICE_ID_INTEL_POULSBO_LPC) {
+ /* match up the function */
+ if (PCI_FUNC(POULSBO_dev->devfn) != id->driver_data)
+ return -ENODEV;
+ dev_info(&POULSBO_dev->dev, "Found %s device\n",
+ pci_name(POULSBO_dev));
+ } else {
+ dev_info(&POULSBO_dev->dev, "Found POULSBO SMBUS %s device\n",
+ pci_name(POULSBO_dev));
+ /* find SMBUS base address */
+ pci_read_config_word(POULSBO_dev, 0x40, &smbase);
+ dev_info(&POULSBO_dev->dev, "POULSBO SM base = 0x%04x\n",
+ smbase);
+ }
+
+ /* Determine the address of the SMBus areas */
+ if (POULSBO_dev->device == PCI_DEVICE_ID_INTEL_POULSBO_LPC)
+ pci_read_config_word(POULSBO_dev, SMBBA_SCH, &poulsbo_smba);
+ else
+ poulsbo_smba = 0;
+
+ poulsbo_smba &= 0xfff0;
+ if (poulsbo_smba == 0) {
+ dev_err(&POULSBO_dev->dev, "SMB base address "
+ "uninitialized - upgrade BIOS or use "
+ "force_addr=0xaddr\n");
+ return -ENODEV;
+ }
+
+ if (!request_region(poulsbo_smba, SMBIOSIZE, poulsbo_driver.name)) {
+ dev_err(&POULSBO_dev->dev, "SMB region 0x%x already in use!\n",
+ poulsbo_smba);
+ return -ENODEV;
+ }
+
+ dev_dbg(&POULSBO_dev->dev, "SMBA = 0x%X\n", poulsbo_smba);
+
+ return 0;
+}
+
+/* Another internally used function */
+static int poulsbo_transaction(void)
+{
+ int temp;
+ int result = 0;
+ int timeout = 0;
+
+ dev_dbg(&poulsbo_adapter.dev, "Transaction (pre): CNT=%02x, CMD=%02x, "
+ "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb_p(SMBHSTCNT),
+ inb_p(SMBHSTCMD), inb_p(SMBHSTADD), inb_p(SMBHSTDAT0),
+ inb_p(SMBHSTDAT1));
+
+ /* Make sure the SMBus host is ready to start transmitting */
+ temp = inb_p(SMBHSTSTS);
+ if (temp != 0x00) {
+ if (temp == 1) {
+ dev_dbg(&poulsbo_adapter.dev, "Completion (%02x). "
+ "clear...\n", temp);
+ outb_p(temp, SMBHSTSTS);
+
+ } else if (temp & 0xe) {
+ dev_dbg(&poulsbo_adapter.dev, "SMBus error (%02x). "
+ "Resetting...\n", temp);
+ outb_p(temp, SMBHSTSTS);
+ }
+ temp = inb_p(SMBHSTSTS);
+ if (temp != 0x00) {
+ dev_err(&poulsbo_adapter.dev, "Failed! (%02x)\n", temp);
+ return -1;
+ } else {
+ dev_dbg(&poulsbo_adapter.dev, "Successfull!\n");
+ }
+ }
+
+ /* start the transaction by setting bit 4 */
+ outb_p(inb(SMBHSTCNT) | 0x10, SMBHSTCNT);
+
+ /* Errata: We will always wait for a fraction of a second! */
+ do {
+ msleep(1);
+ temp = inb_p(SMBHSTSTS);
+ } while ((temp & 0x08) && (timeout++ < MAX_TIMEOUT));
+
+ /* If the SMBus is still busy, we give up */
+ if (timeout >= MAX_TIMEOUT) {
+ dev_err(&poulsbo_adapter.dev, "SMBus Timeout!\n");
+ result = -1;
+ }
+
+ if (temp & 0x10) {
+ result = -1;
+ dev_err(&poulsbo_adapter.dev,
+ "Error: Failed bus transaction\n");
+ }
+
+ if (temp & 0x08) {
+ result = -1;
+ dev_dbg(&poulsbo_adapter.dev, "Bus collision! SMBus may be "
+ "locked until next hard reset. (sorry!)\n");
+ /* Clock stops and slave is stuck in mid-transmission */
+ }
+
+ if (temp & 0x04) {
+ result = -1;
+ dev_dbg(&poulsbo_adapter.dev, "Error: no response!\n");
+ }
+
+ temp = inb_p(SMBHSTSTS);
+ if (temp != 0x00) {
+ if (temp == 0x1) {
+ dev_dbg(&poulsbo_adapter.dev, "post complete!\n");
+ outb_p(temp, SMBHSTSTS);
+ } else if (temp & 0xe) {
+ dev_dbg(&poulsbo_adapter.dev, "Error: bus, etc!\n");
+ outb_p(inb(SMBHSTSTS), SMBHSTSTS);
+ }
+ }
+ msleep(1);
+
+ temp = inb_p(SMBHSTSTS);
+ if (temp & 0xe) {
+ /* BSY, device or bus error */
+ dev_err(&poulsbo_adapter.dev, "Failed reset at end of "
+ "transaction (%02x), Bus error\n", temp);
+ }
+ dev_dbg(&poulsbo_adapter.dev, "Transaction (post): CNT=%02x, CMD=%02x, "
+ "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb_p(SMBHSTCNT),
+ inb_p(SMBHSTCMD), inb_p(SMBHSTADD), inb_p(SMBHSTDAT0),
+ inb_p(SMBHSTDAT1));
+ return result;
+}
+
+/* Return -1 on error. */
+static s32 poulsbo_access(struct i2c_adapter *adap, u16 addr,
+ unsigned short flags, char read_write,
+ u8 command, int size, union i2c_smbus_data *data)
+{
+ int i, len;
+ dev_dbg(&poulsbo_adapter.dev, "access size: %d %s\n", size,
+ (read_write) ? "READ" : "WRITE");
+ switch (size) {
+ case I2C_SMBUS_PROC_CALL:
+ dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
+ return -1;
+ case I2C_SMBUS_QUICK:
+ outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), SMBHSTADD);
+ size = POULSBO_QUICK;
+ break;
+ case I2C_SMBUS_BYTE:
+ outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), SMBHSTADD);
+ if (read_write == I2C_SMBUS_WRITE)
+ outb_p(command, SMBHSTCMD);
+ size = POULSBO_BYTE;
+ break;
+ case I2C_SMBUS_BYTE_DATA:
+ outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), SMBHSTADD);
+ outb_p(command, SMBHSTCMD);
+ if (read_write == I2C_SMBUS_WRITE)
+ outb_p(data->byte, SMBHSTDAT0);
+ size = POULSBO_BYTE_DATA;
+ break;
+ case I2C_SMBUS_WORD_DATA:
+ outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), SMBHSTADD);
+ outb_p(command, SMBHSTCMD);
+ if (read_write == I2C_SMBUS_WRITE) {
+ outb_p(data->word & 0xff, SMBHSTDAT0);
+ outb_p((data->word & 0xff00) >> 8, SMBHSTDAT1);
+ }
+ size = POULSBO_WORD_DATA;
+ break;
+ case I2C_SMBUS_BLOCK_DATA:
+ outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), SMBHSTADD);
+ outb_p(command, SMBHSTCMD);
+ if (read_write == I2C_SMBUS_WRITE) {
+ len = data->block[0];
+ if (len < 0)
+ len = 0;
+ if (len > 32)
+ len = 32;
+ outb_p(len, SMBHSTDAT0);
+ i = inb_p(SMBHSTCNT); /* Reset SMBBLKDAT */
+ for (i = 1; i <= len; i++)
+ outb_p(data->block[i], SMBBLKDAT);
+ }
+ size = POULSBO_BLOCK_DATA;
+ break;
+ }
+ dev_dbg(&poulsbo_adapter.dev, "write size %d to 0x%04x\n", size,
+ SMBHSTCNT);
+ outb_p((size & 0x7), SMBHSTCNT);
+
+ if (poulsbo_transaction()) /* Error in transaction */
+ return -1;
+
+ if ((read_write == I2C_SMBUS_WRITE) || (size == POULSBO_QUICK))
+ return 0;
+
+ switch (size) {
+ case POULSBO_BYTE:
+ /* FIXME: Where is the result put? I assume here it is in
+ * SMBHSTDAT0 but it might just as well be in the SMBHSTCMD.
+ * No clue in the docs */
+ data->byte = inb_p(SMBHSTDAT0);
+ break;
+ case POULSBO_BYTE_DATA:
+ data->byte = inb_p(SMBHSTDAT0);
+ break;
+ case POULSBO_WORD_DATA:
+ data->word = inb_p(SMBHSTDAT0) + (inb_p(SMBHSTDAT1) << 8);
+ break;
+ case POULSBO_BLOCK_DATA:
+ data->block[0] = inb_p(SMBHSTDAT0);
+ i = inb_p(SMBHSTCNT); /* Reset SMBBLKDAT */
+ for (i = 1; i <= data->block[0]; i++)
+ data->block[i] = inb_p(SMBBLKDAT);
+ break;
+ }
+ return 0;
+}
+
+static u32 poulsbo_func(struct i2c_adapter *adapter)
+{
+ return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+ I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+ I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+static const struct i2c_algorithm smbus_algorithm = {
+ .smbus_xfer = poulsbo_access,
+ .functionality = poulsbo_func,
+};
+
+static struct i2c_adapter poulsbo_adapter = {
+ .owner = THIS_MODULE,
+ .class = I2C_CLASS_HWMON,
+ .algo = &smbus_algorithm,
+};
+
+static struct pci_device_id poulsbo_ids[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_POULSBO_LPC),
+ .driver_data = 0xf8},
+ {0, }
+};
+
+MODULE_DEVICE_TABLE(pci, poulsbo_ids);
+
+static int __devinit poulsbo_probe(struct pci_dev *dev,
+ const struct pci_device_id *id)
+{
+ int retval;
+ retval = poulsbo_setup(dev, id);
+ if (retval)
+ return retval;
+
+ /* set up the driverfs linkage to our parent device */
+ poulsbo_adapter.dev.parent = &dev->dev;
+
+ snprintf(poulsbo_adapter.name, I2C_NAME_SIZE,
+ "SMBus POULSBO adapter at %04x", poulsbo_smba);
+
+ retval = i2c_add_adapter(&poulsbo_adapter);
+ if (retval) {
+ dev_err(&dev->dev, "Couldn't register adapter!\n");
+ release_region(poulsbo_smba, SMBIOSIZE);
+ poulsbo_smba = 0;
+ }
+
+ return retval;
+}
+
+static void __devexit poulsbo_remove(struct pci_dev *dev)
+{
+ if (poulsbo_smba) {
+ i2c_del_adapter(&poulsbo_adapter);
+ release_region(poulsbo_smba, SMBIOSIZE);
+ poulsbo_smba = 0;
+ }
+}
+
+static struct pci_driver poulsbo_driver = {
+ .name = "poulsbo_smbus",
+ .id_table = poulsbo_ids,
+ .probe = poulsbo_probe,
+ .remove = __devexit_p(poulsbo_remove),
+};
+
+static int __init i2c_poulsbo_init(void)
+{
+ return pci_register_driver(&poulsbo_driver);
+}
+
+static void __exit i2c_poulsbo_exit(void)
+{
+ pci_unregister_driver(&poulsbo_driver);
+}
+
+MODULE_AUTHOR("Jacob Pan ");
+MODULE_DESCRIPTION("POULSBO SMBus driver");
+MODULE_LICENSE("GPL");
+
+module_init(i2c_poulsbo_init);
+module_exit(i2c_poulsbo_exit);
--- linux-source-2.6.22-2.6.22.orig/drivers/i2c/busses/Kconfig
+++ linux-source-2.6.22-2.6.22/drivers/i2c/busses/Kconfig
@@ -216,6 +216,18 @@
This driver can also be built as a module. If so, the module
will be called i2c-piix4.
+config I2C_POULSBO
+ tristate "Intel SCH (Poulsbo SMBUS 1.0)"
+ depends on I2C && PCI
+ help
+ If you say yes to this option, support will be included for the Intel
+ SCH (POULSBO)
+
+ This driver can also be built as a module. If so, the module
+ will be called i2c-sch.
+
+
+
config I2C_IBM_IIC
tristate "IBM PPC 4xx on-chip I2C interface"
depends on IBM_OCP
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/sunzilog.c
+++ linux-source-2.6.22-2.6.22/drivers/serial/sunzilog.c
@@ -9,7 +9,7 @@
* C. Dost, Pete Zaitcev, Ted Ts'o and Alex Buell for their
* work there.
*
- * Copyright (C) 2002, 2006 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2002, 2006, 2007 David S. Miller (davem@davemloft.net)
*/
#include
@@ -1151,11 +1151,22 @@
{
struct uart_sunzilog_port *up = &sunzilog_port_table[con->index];
unsigned long flags;
+ int locked = 1;
+
+ local_irq_save(flags);
+ if (up->port.sysrq) {
+ locked = 0;
+ } else if (oops_in_progress) {
+ locked = spin_trylock(&up->port.lock);
+ } else
+ spin_lock(&up->port.lock);
- spin_lock_irqsave(&up->port.lock, flags);
uart_console_write(&up->port, s, count, sunzilog_putchar);
udelay(2);
- spin_unlock_irqrestore(&up->port.lock, flags);
+
+ if (locked)
+ spin_unlock(&up->port.lock);
+ local_irq_restore(flags);
}
static int __init sunzilog_console_setup(struct console *con, char *options)
@@ -1215,23 +1226,6 @@
static inline struct console *SUNZILOG_CONSOLE(void)
{
- int i;
-
- if (con_is_present())
- return NULL;
-
- for (i = 0; i < NUM_CHANNELS; i++) {
- int this_minor = sunzilog_reg.minor + i;
-
- if ((this_minor - 64) == (serial_console - 1))
- break;
- }
- if (i == NUM_CHANNELS)
- return NULL;
-
- sunzilog_console_ops.index = i;
- sunzilog_port_table[i].flags |= SUNZILOG_FLAG_IS_CONS;
-
return &sunzilog_console_ops;
}
@@ -1417,12 +1411,18 @@
sunzilog_init_hw(&up[1]);
if (!keyboard_mouse) {
+ if (sunserial_console_match(SUNZILOG_CONSOLE(), op->node,
+ &sunzilog_reg, up[0].port.line))
+ up->flags |= SUNZILOG_FLAG_IS_CONS;
err = uart_add_one_port(&sunzilog_reg, &up[0].port);
if (err) {
of_iounmap(&op->resource[0],
rp, sizeof(struct zilog_layout));
return err;
}
+ if (sunserial_console_match(SUNZILOG_CONSOLE(), op->node,
+ &sunzilog_reg, up[1].port.line))
+ up->flags |= SUNZILOG_FLAG_IS_CONS;
err = uart_add_one_port(&sunzilog_reg, &up[1].port);
if (err) {
uart_remove_one_port(&sunzilog_reg, &up[0].port);
@@ -1520,7 +1520,6 @@
goto out_free_tables;
sunzilog_reg.tty_driver->name_base = sunzilog_reg.minor - 64;
- sunzilog_reg.cons = SUNZILOG_CONSOLE();
sunserial_current_minor += uart_count;
}
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/sunhv.c
+++ linux-source-2.6.22-2.6.22/drivers/serial/sunhv.c
@@ -258,17 +258,7 @@
/* port->lock held by caller. */
static void sunhv_start_tx(struct uart_port *port)
{
- struct circ_buf *xmit = &port->info->xmit;
-
- while (!uart_circ_empty(xmit)) {
- long status = sun4v_con_putchar(xmit->buf[xmit->tail]);
-
- if (status != HV_EOK)
- break;
-
- xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
- port->icount.tx++;
- }
+ transmit_chars(port);
}
/* port->lock is not held. */
@@ -440,8 +430,16 @@
{
struct uart_port *port = sunhv_port;
unsigned long flags;
+ int locked = 1;
+
+ local_irq_save(flags);
+ if (port->sysrq) {
+ locked = 0;
+ } else if (oops_in_progress) {
+ locked = spin_trylock(&port->lock);
+ } else
+ spin_lock(&port->lock);
- spin_lock_irqsave(&port->lock, flags);
while (n > 0) {
unsigned long ra = __pa(con_write_page);
unsigned long page_bytes;
@@ -469,7 +467,10 @@
ra += written;
}
}
- spin_unlock_irqrestore(&port->lock, flags);
+
+ if (locked)
+ spin_unlock(&port->lock);
+ local_irq_restore(flags);
}
static inline void sunhv_console_putchar(struct uart_port *port, char c)
@@ -488,7 +489,15 @@
{
struct uart_port *port = sunhv_port;
unsigned long flags;
- int i;
+ int i, locked = 1;
+
+ local_irq_save(flags);
+ if (port->sysrq) {
+ locked = 0;
+ } else if (oops_in_progress) {
+ locked = spin_trylock(&port->lock);
+ } else
+ spin_lock(&port->lock);
spin_lock_irqsave(&port->lock, flags);
for (i = 0; i < n; i++) {
@@ -496,7 +505,10 @@
sunhv_console_putchar(port, '\r');
sunhv_console_putchar(port, *s++);
}
- spin_unlock_irqrestore(&port->lock, flags);
+
+ if (locked)
+ spin_unlock(&port->lock);
+ local_irq_restore(flags);
}
static struct console sunhv_console = {
@@ -508,16 +520,6 @@
.data = &sunhv_reg,
};
-static inline struct console *SUNHV_CONSOLE(void)
-{
- if (con_is_present())
- return NULL;
-
- sunhv_console.index = 0;
-
- return &sunhv_console;
-}
-
static int __devinit hv_probe(struct of_device *op, const struct of_device_id *match)
{
struct uart_port *port;
@@ -570,7 +572,8 @@
sunhv_reg.tty_driver->name_base = sunhv_reg.minor - 64;
sunserial_current_minor += 1;
- sunhv_reg.cons = SUNHV_CONSOLE();
+ sunserial_console_match(&sunhv_console, op->node,
+ &sunhv_reg, port->line);
err = uart_add_one_port(&sunhv_reg, port);
if (err)
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/Kconfig
+++ linux-source-2.6.22-2.6.22/drivers/serial/Kconfig
@@ -74,21 +74,17 @@
depends on SERIAL_8250 && PCI
default SERIAL_8250
help
- Say Y here if you have PCI serial ports.
-
- To compile this driver as a module, choose M here: the module
- will be called 8250_pci.
+ This builds standard PCI serial support. You may be able to
+ disable this feature if you only need legacy serial support.
+ Saves about 9K.
config SERIAL_8250_PNP
tristate "8250/16550 PNP device support" if EMBEDDED
depends on SERIAL_8250 && PNP
default SERIAL_8250
help
- Say Y here if you have serial ports described by PNPBIOS or ACPI.
- These are typically ports built into the system board.
-
- To compile this driver as a module, choose M here: the module
- will be called 8250_pnp.
+ This builds standard PNP serial support. You may be able to
+ disable this feature if you only need legacy serial support.
config SERIAL_8250_HP300
tristate
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/sunsab.c
+++ linux-source-2.6.22-2.6.22/drivers/serial/sunsab.c
@@ -38,7 +38,7 @@
#include
#include
-#if defined(CONFIG_SERIAL_SUNZILOG_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#if defined(CONFIG_SERIAL_SUNSAB_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
#define SUPPORT_SYSRQ
#endif
@@ -58,6 +58,7 @@
unsigned char interrupt_mask1;/* ISR1 masking */
unsigned char pvr_dtr_bit; /* Which PVR bit is DTR */
unsigned char pvr_dsr_bit; /* Which PVR bit is DSR */
+ unsigned int gis_shift;
int type; /* SAB82532 version */
/* Setting configuration bits while the transmitter is active
@@ -305,13 +306,15 @@
struct tty_struct *tty;
union sab82532_irq_status status;
unsigned long flags;
+ unsigned char gis;
spin_lock_irqsave(&up->port.lock, flags);
status.stat = 0;
- if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA0)
+ gis = readb(&up->regs->r.gis) >> up->gis_shift;
+ if (gis & 1)
status.sreg.isr0 = readb(&up->regs->r.isr0);
- if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA1)
+ if (gis & 2)
status.sreg.isr1 = readb(&up->regs->r.isr1);
tty = NULL;
@@ -327,35 +330,6 @@
transmit_chars(up, &status);
}
- spin_unlock(&up->port.lock);
-
- if (tty)
- tty_flip_buffer_push(tty);
-
- up++;
-
- spin_lock(&up->port.lock);
-
- status.stat = 0;
- if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB0)
- status.sreg.isr0 = readb(&up->regs->r.isr0);
- if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB1)
- status.sreg.isr1 = readb(&up->regs->r.isr1);
-
- tty = NULL;
- if (status.stat) {
- if ((status.sreg.isr0 & (SAB82532_ISR0_TCD | SAB82532_ISR0_TIME |
- SAB82532_ISR0_RFO | SAB82532_ISR0_RPF)) ||
- (status.sreg.isr1 & SAB82532_ISR1_BRK))
-
- tty = receive_chars(up, &status);
- if ((status.sreg.isr0 & SAB82532_ISR0_CDSC) ||
- (status.sreg.isr1 & (SAB82532_ISR1_BRK | SAB82532_ISR1_CSC)))
- check_status(up, &status);
- if (status.sreg.isr1 & (SAB82532_ISR1_ALLS | SAB82532_ISR1_XPR))
- transmit_chars(up, &status);
- }
-
spin_unlock_irqrestore(&up->port.lock, flags);
if (tty)
@@ -539,6 +513,10 @@
struct uart_sunsab_port *up = (struct uart_sunsab_port *) port;
unsigned long flags;
unsigned char tmp;
+ int err = request_irq(up->port.irq, sunsab_interrupt,
+ IRQF_SHARED, "sab", up);
+ if (err)
+ return err;
spin_lock_irqsave(&up->port.lock, flags);
@@ -641,6 +619,7 @@
#endif
spin_unlock_irqrestore(&up->port.lock, flags);
+ free_irq(up->port.irq, up);
}
/*
@@ -860,22 +839,31 @@
static void sunsab_console_putchar(struct uart_port *port, int c)
{
struct uart_sunsab_port *up = (struct uart_sunsab_port *)port;
- unsigned long flags;
-
- spin_lock_irqsave(&up->port.lock, flags);
sunsab_tec_wait(up);
writeb(c, &up->regs->w.tic);
-
- spin_unlock_irqrestore(&up->port.lock, flags);
}
static void sunsab_console_write(struct console *con, const char *s, unsigned n)
{
struct uart_sunsab_port *up = &sunsab_ports[con->index];
+ unsigned long flags;
+ int locked = 1;
+
+ local_irq_save(flags);
+ if (up->port.sysrq) {
+ locked = 0;
+ } else if (oops_in_progress) {
+ locked = spin_trylock(&up->port.lock);
+ } else
+ spin_lock(&up->port.lock);
uart_console_write(&up->port, s, n, sunsab_console_putchar);
sunsab_tec_wait(up);
+
+ if (locked)
+ spin_unlock(&up->port.lock);
+ local_irq_restore(flags);
}
static int sunsab_console_setup(struct console *con, char *options)
@@ -959,22 +947,6 @@
static inline struct console *SUNSAB_CONSOLE(void)
{
- int i;
-
- if (con_is_present())
- return NULL;
-
- for (i = 0; i < num_channels; i++) {
- int this_minor = sunsab_reg.minor + i;
-
- if ((this_minor - 64) == (serial_console - 1))
- break;
- }
- if (i == num_channels)
- return NULL;
-
- sunsab_console.index = i;
-
return &sunsab_console;
}
#else
@@ -1015,9 +987,11 @@
if ((up->port.line & 0x1) == 0) {
up->pvr_dsr_bit = (1 << 0);
up->pvr_dtr_bit = (1 << 1);
+ up->gis_shift = 2;
} else {
up->pvr_dsr_bit = (1 << 3);
up->pvr_dtr_bit = (1 << 2);
+ up->gis_shift = 0;
}
up->cached_pvr = (1 << 1) | (1 << 2) | (1 << 4);
writeb(up->cached_pvr, &up->regs->w.pvr);
@@ -1030,19 +1004,6 @@
up->tec_timeout = SAB82532_MAX_TEC_TIMEOUT;
up->cec_timeout = SAB82532_MAX_CEC_TIMEOUT;
- if (!(up->port.line & 0x01)) {
- int err;
-
- err = request_irq(up->port.irq, sunsab_interrupt,
- IRQF_SHARED, "sab", up);
- if (err) {
- of_iounmap(&op->resource[0],
- up->port.membase,
- sizeof(union sab82532_async_regs));
- return err;
- }
- }
-
return 0;
}
@@ -1058,47 +1019,60 @@
0,
(inst * 2) + 0);
if (err)
- return err;
+ goto out;
err = sunsab_init_one(&up[1], op,
sizeof(union sab82532_async_regs),
(inst * 2) + 1);
- if (err) {
- of_iounmap(&op->resource[0],
- up[0].port.membase,
- sizeof(union sab82532_async_regs));
- free_irq(up[0].port.irq, &up[0]);
- return err;
- }
+ if (err)
+ goto out1;
- uart_add_one_port(&sunsab_reg, &up[0].port);
- uart_add_one_port(&sunsab_reg, &up[1].port);
+ sunserial_console_match(SUNSAB_CONSOLE(), op->node,
+ &sunsab_reg, up[0].port.line);
+
+ sunserial_console_match(SUNSAB_CONSOLE(), op->node,
+ &sunsab_reg, up[1].port.line);
+
+ err = uart_add_one_port(&sunsab_reg, &up[0].port);
+ if (err)
+ goto out2;
+
+ err = uart_add_one_port(&sunsab_reg, &up[1].port);
+ if (err)
+ goto out3;
dev_set_drvdata(&op->dev, &up[0]);
inst++;
return 0;
-}
-
-static void __devexit sab_remove_one(struct uart_sunsab_port *up)
-{
- struct of_device *op = to_of_device(up->port.dev);
- uart_remove_one_port(&sunsab_reg, &up->port);
- if (!(up->port.line & 1))
- free_irq(up->port.irq, up);
+out3:
+ uart_remove_one_port(&sunsab_reg, &up[0].port);
+out2:
of_iounmap(&op->resource[0],
- up->port.membase,
+ up[1].port.membase,
sizeof(union sab82532_async_regs));
+out1:
+ of_iounmap(&op->resource[0],
+ up[0].port.membase,
+ sizeof(union sab82532_async_regs));
+out:
+ return err;
}
static int __devexit sab_remove(struct of_device *op)
{
struct uart_sunsab_port *up = dev_get_drvdata(&op->dev);
- sab_remove_one(&up[0]);
- sab_remove_one(&up[1]);
+ uart_remove_one_port(&sunsab_reg, &up[1].port);
+ uart_remove_one_port(&sunsab_reg, &up[0].port);
+ of_iounmap(&op->resource[0],
+ up[1].port.membase,
+ sizeof(union sab82532_async_regs));
+ of_iounmap(&op->resource[0],
+ up[0].port.membase,
+ sizeof(union sab82532_async_regs));
dev_set_drvdata(&op->dev, NULL);
@@ -1145,6 +1119,7 @@
sunsab_reg.minor = sunserial_current_minor;
sunsab_reg.nr = num_channels;
+ sunsab_reg.cons = SUNSAB_CONSOLE();
err = uart_register_driver(&sunsab_reg);
if (err) {
@@ -1155,7 +1130,6 @@
}
sunsab_reg.tty_driver->name_base = sunsab_reg.minor - 64;
- sunsab_reg.cons = SUNSAB_CONSOLE();
sunserial_current_minor += num_channels;
}
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/sunsu.c
+++ linux-source-2.6.22-2.6.22/drivers/serial/sunsu.c
@@ -1288,7 +1288,17 @@
unsigned int count)
{
struct uart_sunsu_port *up = &sunsu_ports[co->index];
+ unsigned long flags;
unsigned int ier;
+ int locked = 1;
+
+ local_irq_save(flags);
+ if (up->port.sysrq) {
+ locked = 0;
+ } else if (oops_in_progress) {
+ locked = spin_trylock(&up->port.lock);
+ } else
+ spin_lock(&up->port.lock);
/*
* First save the UER then disable the interrupts
@@ -1304,6 +1314,10 @@
*/
wait_for_xmitr(up);
serial_out(up, UART_IER, ier);
+
+ if (locked)
+ spin_unlock(&up->port.lock);
+ local_irq_restore(flags);
}
/*
@@ -1357,28 +1371,12 @@
* Register console.
*/
-static inline struct console *SUNSU_CONSOLE(int num_uart)
+static inline struct console *SUNSU_CONSOLE(void)
{
- int i;
-
- if (con_is_present())
- return NULL;
-
- for (i = 0; i < num_uart; i++) {
- int this_minor = sunsu_reg.minor + i;
-
- if ((this_minor - 64) == (serial_console - 1))
- break;
- }
- if (i == num_uart)
- return NULL;
-
- sunsu_console.index = i;
-
return &sunsu_console;
}
#else
-#define SUNSU_CONSOLE(num_uart) (NULL)
+#define SUNSU_CONSOLE() (NULL)
#define sunsu_serial_console_init() do { } while (0)
#endif
@@ -1468,6 +1466,8 @@
up->port.ops = &sunsu_pops;
+ sunserial_console_match(SUNSU_CONSOLE(), dp,
+ &sunsu_reg, up->port.line);
err = uart_add_one_port(&sunsu_reg, &up->port);
if (err)
goto out_unmap;
@@ -1558,7 +1558,6 @@
return err;
sunsu_reg.tty_driver->name_base = sunsu_reg.minor - 64;
sunserial_current_minor += num_uart;
- sunsu_reg.cons = SUNSU_CONSOLE(num_uart);
}
err = of_register_driver(&su_driver, &of_bus_type);
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/8250_pnp.c
+++ linux-source-2.6.22-2.6.22/drivers/serial/8250_pnp.c
@@ -327,6 +327,7 @@
{ "WACF004", 0 },
{ "WACF005", 0 },
{ "WACF006", 0 },
+ { "WACF008", 0 },
/* Compaq touchscreen */
{ "FPI2002", 0 },
/* Fujitsu Stylistic touchscreens */
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/suncore.c
+++ linux-source-2.6.22-2.6.22/drivers/serial/suncore.c
@@ -16,9 +16,10 @@
#include
#include
#include
+#include
#include
-#include
+#include
#include "suncore.h"
@@ -26,92 +27,60 @@
EXPORT_SYMBOL(sunserial_current_minor);
-void
-sunserial_console_termios(struct console *con)
+int sunserial_console_match(struct console *con, struct device_node *dp,
+ struct uart_driver *drv, int line)
{
- char mode[16], buf[16], *s;
- char mode_prop[] = "ttyX-mode";
- char cd_prop[] = "ttyX-ignore-cd";
- char dtr_prop[] = "ttyX-rts-dtr-off";
- char *ssp_console_modes_prop = "ssp-console-modes";
- int baud, bits, stop, cflag;
- char parity;
- int carrier = 0;
- int rtsdtr = 1;
- int topnd, nd;
-
- if (!serial_console)
- return;
-
- switch (serial_console) {
- case PROMDEV_OTTYA:
- mode_prop[3] = 'a';
- cd_prop[3] = 'a';
- dtr_prop[3] = 'a';
- break;
-
- case PROMDEV_OTTYB:
- mode_prop[3] = 'b';
- cd_prop[3] = 'b';
- dtr_prop[3] = 'b';
- break;
-
- case PROMDEV_ORSC:
-
- nd = prom_pathtoinode("rsc");
- if (!nd) {
- strcpy(mode, "115200,8,n,1,-");
- goto no_options;
- }
+ int off;
- if (!prom_node_has_property(nd, ssp_console_modes_prop)) {
- strcpy(mode, "115200,8,n,1,-");
- goto no_options;
- }
+ if (!con || of_console_device != dp)
+ return 0;
- memset(mode, 0, sizeof(mode));
- prom_getstring(nd, ssp_console_modes_prop, mode, sizeof(mode));
- goto no_options;
+ off = 0;
+ if (of_console_options &&
+ *of_console_options == 'b')
+ off = 1;
- default:
- strcpy(mode, "9600,8,n,1,-");
- goto no_options;
- }
+ if ((line & 1) != off)
+ return 0;
- topnd = prom_getchild(prom_root_node);
- nd = prom_searchsiblings(topnd, "options");
- if (!nd) {
- strcpy(mode, "9600,8,n,1,-");
- goto no_options;
- }
-
- if (!prom_node_has_property(nd, mode_prop)) {
- strcpy(mode, "9600,8,n,1,-");
- goto no_options;
- }
+ con->index = line;
+ drv->cons = con;
+ add_preferred_console(con->name, line, NULL);
- memset(mode, 0, sizeof(mode));
- prom_getstring(nd, mode_prop, mode, sizeof(mode));
-
- if (prom_node_has_property(nd, cd_prop)) {
- memset(buf, 0, sizeof(buf));
- prom_getstring(nd, cd_prop, buf, sizeof(buf));
- if (!strcmp(buf, "false"))
- carrier = 1;
-
- /* XXX: this is unused below. */
- }
+ return 1;
+}
+EXPORT_SYMBOL(sunserial_console_match);
- if (prom_node_has_property(nd, dtr_prop)) {
- memset(buf, 0, sizeof(buf));
- prom_getstring(nd, dtr_prop, buf, sizeof(buf));
- if (!strcmp(buf, "false"))
- rtsdtr = 0;
+void
+sunserial_console_termios(struct console *con)
+{
+ struct device_node *dp;
+ const char *od, *mode, *s;
+ char mode_prop[] = "ttyX-mode";
+ int baud, bits, stop, cflag;
+ char parity;
- /* XXX: this is unused below. */
+ dp = of_find_node_by_path("/options");
+ od = of_get_property(dp, "output-device", NULL);
+ if (!strcmp(od, "rsc")) {
+ mode = of_get_property(of_console_device,
+ "ssp-console-modes", NULL);
+ if (!mode)
+ mode = "115200,8,n,1,-";
+ } else {
+ char c;
+
+ c = 'a';
+ if (of_console_options)
+ c = *of_console_options;
+
+ mode_prop[3] = c;
+
+ mode = of_get_property(dp, mode_prop, NULL);
+ if (!mode)
+ mode = "9600,8,n,1,-";
}
-no_options:
cflag = CREAD | HUPCL | CLOCAL;
s = mode;
--- linux-source-2.6.22-2.6.22.orig/drivers/serial/suncore.h
+++ linux-source-2.6.22-2.6.22/drivers/serial/suncore.h
@@ -24,6 +24,8 @@
extern int sunserial_current_minor;
+extern int sunserial_console_match(struct console *, struct device_node *,
+ struct uart_driver *, int);
extern void sunserial_console_termios(struct console *);
#endif /* !(_SERIAL_SUN_H) */
--- linux-source-2.6.22-2.6.22.orig/drivers/firewire/fw-ohci.c
+++ linux-source-2.6.22-2.6.22/drivers/firewire/fw-ohci.c
@@ -586,7 +586,7 @@
break;
fw_notify("context_stop: still active (0x%08x)\n", reg);
- msleep(1);
+ mdelay(1);
}
}
@@ -1934,14 +1934,12 @@
free_irq(pdev->irq, ohci);
err = pci_save_state(pdev);
if (err) {
- fw_error("pci_save_state failed with %d", err);
+ fw_error("pci_save_state failed with %d\n", err);
return err;
}
err = pci_set_power_state(pdev, pci_choose_state(pdev, state));
- if (err) {
- fw_error("pci_set_power_state failed with %d", err);
- return err;
- }
+ if (err)
+ fw_error("pci_set_power_state failed with %d\n", err);
return 0;
}
@@ -1955,7 +1953,7 @@
pci_restore_state(pdev);
err = pci_enable_device(pdev);
if (err) {
- fw_error("pci_enable_device failed with %d", err);
+ fw_error("pci_enable_device failed with %d\n", err);
return err;
}
--- linux-source-2.6.22-2.6.22.orig/drivers/firewire/fw-transaction.c
+++ linux-source-2.6.22-2.6.22/drivers/firewire/fw-transaction.c
@@ -605,8 +605,10 @@
* check is sufficient to ensure we don't send response to
* broadcast packets or posted writes.
*/
- if (request->ack != ACK_PENDING)
+ if (request->ack != ACK_PENDING) {
+ kfree(request);
return;
+ }
if (rcode == RCODE_COMPLETE)
fw_fill_response(&request->response, request->request_header,
--- linux-source-2.6.22-2.6.22.orig/drivers/firewire/fw-transaction.h
+++ linux-source-2.6.22-2.6.22/drivers/firewire/fw-transaction.h
@@ -124,6 +124,10 @@
size_t length,
void *callback_data);
+/*
+ * Important note: The callback must guarantee that either fw_send_response()
+ * or kfree() is called on the @request.
+ */
typedef void (*fw_address_callback_t)(struct fw_card *card,
struct fw_request *request,
int tcode, int destination, int source,
@@ -228,7 +232,7 @@
unsigned long reset_jiffies;
unsigned long long guid;
- int max_receive;
+ unsigned max_receive;
int link_speed;
int config_rom_generation;
--- linux-source-2.6.22-2.6.22.orig/drivers/firewire/fw-sbp2.c
+++ linux-source-2.6.22-2.6.22/drivers/firewire/fw-sbp2.c
@@ -985,6 +985,7 @@
struct fw_unit *unit = sd->unit;
struct fw_device *device = fw_device(unit->device.parent);
struct sbp2_command_orb *orb;
+ unsigned max_payload;
/*
* Bidirectional commands are not yet implemented, and unknown
@@ -1023,8 +1024,10 @@
* specifies the max payload size as 2 ^ (max_payload + 2), so
* if we set this to max_speed + 7, we get the right value.
*/
+ max_payload = device->node->max_speed + 7;
+ max_payload = min(max_payload, device->card->max_receive - 1);
orb->request.misc =
- COMMAND_ORB_MAX_PAYLOAD(device->node->max_speed + 7) |
+ COMMAND_ORB_MAX_PAYLOAD(max_payload) |
COMMAND_ORB_SPEED(device->node->max_speed) |
COMMAND_ORB_NOTIFY;
--- linux-source-2.6.22-2.6.22.orig/drivers/ide/ide-cd.h
+++ linux-source-2.6.22-2.6.22/drivers/ide/ide-cd.h
@@ -15,7 +15,7 @@
memory, though. */
#ifndef VERBOSE_IDE_CD_ERRORS
-#define VERBOSE_IDE_CD_ERRORS 1
+#define VERBOSE_IDE_CD_ERRORS 0
#endif
--- linux-source-2.6.22-2.6.22.orig/drivers/ide/ide-disk.c
+++ linux-source-2.6.22-2.6.22/drivers/ide/ide-disk.c
@@ -481,6 +481,19 @@
&& id->lba_capacity_2;
}
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ /* REMOVE FOR HARDY */
+/*
+ * Some disks report total number of sectors instead of
+ * maximum sector address. We list them here.
+ */
+static const struct drive_list_entry hpa_list[] = {
+ { "ST340823A", NULL },
+ { "ST320413A", NULL },
+ { NULL, NULL }
+};
+#endif
+
static void idedisk_check_hpa(ide_drive_t *drive)
{
unsigned long long capacity, set_max;
@@ -492,6 +505,18 @@
else
set_max = idedisk_read_native_max_address(drive);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+ /* REMOVE FOR HARDY */
+ if (ide_in_drive_list(drive->id, hpa_list)) {
+ /*
+ * Since we are inclusive wrt to firmware revisions do this
+ * extra check and apply the workaround only when needed.
+ */
+ if (set_max == capacity + 1)
+ set_max--;
+ }
+#endif
+
if (set_max <= capacity)
return;
--- linux-source-2.6.22-2.6.22.orig/drivers/ide/pci/piix.c
+++ linux-source-2.6.22-2.6.22/drivers/ide/pci/piix.c
@@ -350,6 +350,7 @@
case PCI_DEVICE_ID_INTEL_ICH7_21:
case PCI_DEVICE_ID_INTEL_ESB2_18:
case PCI_DEVICE_ID_INTEL_ICH8_6:
+ case PCI_DEVICE_ID_INTEL_POULSBO_IDE:
return 1;
}
@@ -512,8 +513,18 @@
/* 22 */ DECLARE_PIIX_DEV("ICH4", 0x3f), /* udma0-5 */
/* 23 */ DECLARE_PIIX_DEV("ESB2", 0x3f), /* udma0-5 */
/* 24 */ DECLARE_PIIX_DEV("ICH8M", 0x3f), /* udma0-5 */
+ /* 25 */
+ {
+ .name = "POULSBO",
+ .init_chipset = init_chipset_piix,
+ .init_hwif = init_hwif_piix,
+ .channels = 2,
+ .autodma = AUTODMA,
+ .enablebits = {{0x83, 0x80, 0x80}, {0x87, 0x80, 0x80}},
+ .bootable = ON_BOARD,
+ .udma_mask = 0x3f,
+ },
};
-
/**
* piix_init_one - called when a PIIX is found
* @dev: the piix device
@@ -589,6 +600,7 @@
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 22},
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_18, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 23},
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 24},
+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_POULSBO_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 25},
{ 0, },
};
MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
--- linux-source-2.6.22-2.6.22.orig/drivers/ide/ide-dma.c
+++ linux-source-2.6.22-2.6.22/drivers/ide/ide-dma.c
@@ -153,6 +153,8 @@
return 0;
}
+EXPORT_SYMBOL_GPL(ide_in_drive_list);
+
/**
* ide_dma_intr - IDE DMA interrupt handler
* @drive: the drive the interrupt is for
--- linux-source-2.6.22-2.6.22.orig/drivers/hid/usbhid/hid-quirks.c
+++ linux-source-2.6.22-2.6.22/drivers/hid/usbhid/hid-quirks.c
@@ -63,6 +63,9 @@
#define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY 0x030b
#define USB_DEVICE_ID_APPLE_IR 0x8240
+#define USB_VENDOR_ID_ASUS 0x0b05
+#define USB_DEVICE_ID_ASUS_LCM 0x1726
+
#define USB_VENDOR_ID_ATEN 0x0557
#define USB_DEVICE_ID_ATEN_UC100KM 0x2004
#define USB_DEVICE_ID_ATEN_CS124U 0x2202
@@ -452,6 +455,8 @@
{ USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1, HID_QUIRK_SWAPPED_MIN_MAX },
{ USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2, HID_QUIRK_SWAPPED_MIN_MAX },
+ { USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_LCM, HID_QUIRK_IGNORE},
+
{ 0, 0 }
};
--- linux-source-2.6.22-2.6.22.orig/drivers/Makefile
+++ linux-source-2.6.22-2.6.22/drivers/Makefile
@@ -71,6 +71,7 @@
obj-$(CONFIG_EISA) += eisa/
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_MMC) += mmc/
+obj-$(CONFIG_MSS) += mmc/
obj-$(CONFIG_NEW_LEDS) += leds/
obj-$(CONFIG_INFINIBAND) += infiniband/
obj-$(CONFIG_SGI_SN) += sn/
--- linux-source-2.6.22-2.6.22.orig/drivers/cpufreq/cpufreq_ondemand.c
+++ linux-source-2.6.22-2.6.22/drivers/cpufreq/cpufreq_ondemand.c
@@ -96,15 +96,25 @@
static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
{
- cputime64_t retval;
+ cputime64_t idle_time;
+ cputime64_t cur_jiffies;
+ cputime64_t busy_time;
- retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
- kstat_cpu(cpu).cpustat.iowait);
+ cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
+ busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
+ kstat_cpu(cpu).cpustat.system);
- if (dbs_tuners_ins.ignore_nice)
- retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
- return retval;
+ if (!dbs_tuners_ins.ignore_nice) {
+ busy_time = cputime64_add(busy_time,
+ kstat_cpu(cpu).cpustat.nice);
+ }
+
+ idle_time = cputime64_sub(cur_jiffies, busy_time);
+ return idle_time;
}
/*
@@ -325,7 +335,7 @@
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
{
unsigned int idle_ticks, total_ticks;
- unsigned int load;
+ unsigned int load = 0;
cputime64_t cur_jiffies;
struct cpufreq_policy *policy;
@@ -339,7 +349,8 @@
cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
this_dbs_info->prev_cpu_wall);
- this_dbs_info->prev_cpu_wall = cur_jiffies;
+ this_dbs_info->prev_cpu_wall = get_jiffies_64();
+
if (!total_ticks)
return;
/*
@@ -370,7 +381,8 @@
if (tmp_idle_ticks < idle_ticks)
idle_ticks = tmp_idle_ticks;
}
- load = (100 * (total_ticks - idle_ticks)) / total_ticks;
+ if (likely(total_ticks > idle_ticks))
+ load = (100 * (total_ticks - idle_ticks)) / total_ticks;
/* Check for frequency increase */
if (load > dbs_tuners_ins.up_threshold) {
--- linux-source-2.6.22-2.6.22.orig/drivers/isdn/i4l/isdn_net.c
+++ linux-source-2.6.22-2.6.22/drivers/isdn/i4l/isdn_net.c
@@ -2126,7 +2126,7 @@
u_long flags;
isdn_net_dev *p;
isdn_net_phone *n;
- char nr[32];
+ char nr[ISDN_MSNLEN];
char *my_eaz;
/* Search name in netdev-chain */
@@ -2135,7 +2135,7 @@
nr[1] = '\0';
printk(KERN_INFO "isdn_net: Incoming call without OAD, assuming '0'\n");
} else
- strcpy(nr, setup->phone);
+ strlcpy(nr, setup->phone, ISDN_MSNLEN);
si1 = (int) setup->si1;
si2 = (int) setup->si2;
if (!setup->eazmsn[0]) {
@@ -2802,7 +2802,7 @@
chidx = -1;
}
}
- strcpy(lp->msn, cfg->eaz);
+ strlcpy(lp->msn, cfg->eaz, sizeof(lp->msn));
lp->pre_device = drvidx;
lp->pre_channel = chidx;
lp->onhtime = cfg->onhtime;
@@ -2951,7 +2951,7 @@
if (p) {
if (!(n = kmalloc(sizeof(isdn_net_phone), GFP_KERNEL)))
return -ENOMEM;
- strcpy(n->num, phone->phone);
+ strlcpy(n->num, phone->phone, sizeof(n->num));
n->next = p->local->phone[phone->outgoing & 1];
p->local->phone[phone->outgoing & 1] = n;
return 0;
--- linux-source-2.6.22-2.6.22.orig/drivers/isdn/i4l/isdn_common.c
+++ linux-source-2.6.22-2.6.22/drivers/isdn/i4l/isdn_common.c
@@ -1514,6 +1514,7 @@
if (copy_from_user(&iocts, argp,
sizeof(isdn_ioctl_struct)))
return -EFAULT;
+ iocts.drvid[sizeof(iocts.drvid)-1] = 0;
if (strlen(iocts.drvid)) {
if ((p = strchr(iocts.drvid, ',')))
*p = 0;
@@ -1598,6 +1599,7 @@
if (copy_from_user(&iocts, argp,
sizeof(isdn_ioctl_struct)))
return -EFAULT;
+ iocts.drvid[sizeof(iocts.drvid)-1] = 0;
if (strlen(iocts.drvid)) {
drvidx = -1;
for (i = 0; i < ISDN_MAX_DRIVERS; i++)
@@ -1642,7 +1644,7 @@
} else {
p = (char __user *) iocts.arg;
for (i = 0; i < 10; i++) {
- sprintf(bname, "%s%s",
+ snprintf(bname, sizeof(bname), "%s%s",
strlen(dev->drv[drvidx]->msn2eaz[i]) ?
dev->drv[drvidx]->msn2eaz[i] : "_",
(i < 9) ? "," : "\0");
@@ -1672,6 +1674,7 @@
char *p;
if (copy_from_user(&iocts, argp, sizeof(isdn_ioctl_struct)))
return -EFAULT;
+ iocts.drvid[sizeof(iocts.drvid)-1] = 0;
if (strlen(iocts.drvid)) {
if ((p = strchr(iocts.drvid, ',')))
*p = 0;
--- linux-source-2.6.22-2.6.22.orig/drivers/ieee1394/sbp2.c
+++ linux-source-2.6.22-2.6.22/drivers/ieee1394/sbp2.c
@@ -774,11 +774,6 @@
SBP2_ERR("failed to register lower 4GB address range");
goto failed_alloc;
}
-#else
- if (dma_set_mask(hi->host->device.parent, DMA_32BIT_MASK)) {
- SBP2_ERR("failed to set 4GB DMA mask");
- goto failed_alloc;
- }
#endif
}
--- linux-source-2.6.22-2.6.22.orig/drivers/ieee1394/ieee1394_core.c
+++ linux-source-2.6.22-2.6.22/drivers/ieee1394/ieee1394_core.c
@@ -1279,7 +1279,7 @@
unregister_chrdev_region(IEEE1394_CORE_DEV, 256);
}
-fs_initcall(ieee1394_init); /* same as ohci1394 */
+module_init(ieee1394_init);
module_exit(ieee1394_cleanup);
/* Exported symbols */
--- linux-source-2.6.22-2.6.22.orig/drivers/ieee1394/ohci1394.c
+++ linux-source-2.6.22-2.6.22/drivers/ieee1394/ohci1394.c
@@ -3773,7 +3773,5 @@
return pci_register_driver(&ohci1394_pci_driver);
}
-/* Register before most other device drivers.
- * Useful for remote debugging via physical DMA, e.g. using firescope. */
-fs_initcall(ohci1394_init);
+module_init(ohci1394_init);
module_exit(ohci1394_cleanup);
--- linux-source-2.6.22-2.6.22.orig/drivers/input/mouse/lifebook.c
+++ linux-source-2.6.22-2.6.22/drivers/input/mouse/lifebook.c
@@ -109,7 +109,7 @@
{
struct lifebook_data *priv = psmouse->private;
struct input_dev *dev1 = psmouse->dev;
- struct input_dev *dev2 = priv->dev2;
+ struct input_dev *dev2 = priv ? priv->dev2 : NULL;
unsigned char *packet = psmouse->packet;
int relative_packet = packet[0] & 0x08;
--- linux-source-2.6.22-2.6.22.orig/drivers/input/mouse/alps.c
+++ linux-source-2.6.22-2.6.22/drivers/input/mouse/alps.c
@@ -53,6 +53,7 @@
{ { 0x20, 0x02, 0x0e }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* XXX */
{ { 0x22, 0x02, 0x0a }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT },
{ { 0x22, 0x02, 0x14 }, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude D600 */
+ { { 0x73, 0x02, 0x50 }, 0xcf, 0xff, ALPS_FW_BK_1 }, /* Dell Vostro 1400 */
};
/*
@@ -419,7 +420,8 @@
static int alps_reconnect(struct psmouse *psmouse)
{
- psmouse_reset(psmouse);
+ /* UBUNTU: Causes lockups on resume */
+ /* psmouse_reset(psmouse); */
if (alps_hw_init(psmouse, NULL))
return -1;
--- linux-source-2.6.22-2.6.22.orig/drivers/input/mouse/appletouch.c
+++ linux-source-2.6.22-2.6.22/drivers/input/mouse/appletouch.c
@@ -155,6 +155,8 @@
int xy_acc[ATP_XSENSORS + ATP_YSENSORS];
int overflowwarn; /* overflow warning printed? */
int datalen; /* size of an USB urb transfer */
+ int idlecount; /* number of empty packets */
+ struct work_struct work;
};
#define dbg_dump(msg, tab) \
@@ -208,6 +210,55 @@
(productId == GEYSER4_JIS_PRODUCT_ID);
}
+/*
+ * By default Geyser 3 device sends standard USB HID mouse
+ * packets (Report ID 2). This code changes device mode, so it
+ * sends raw sensor reports (Report ID 5).
+ */
+static int atp_geyser3_init(struct usb_device *udev)
+{
+ char data[8];
+ int size;
+
+ size = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ ATP_GEYSER3_MODE_READ_REQUEST_ID,
+ USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+ ATP_GEYSER3_MODE_REQUEST_VALUE,
+ ATP_GEYSER3_MODE_REQUEST_INDEX, &data, 8, 5000);
+
+ if (size != 8) {
+ err("Could not do mode read request from device"
+ " (Geyser 3 mode)");
+ return -EIO;
+ }
+
+ /* Apply the mode switch */
+ data[0] = ATP_GEYSER3_MODE_VENDOR_VALUE;
+
+ size = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+ ATP_GEYSER3_MODE_WRITE_REQUEST_ID,
+ USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+ ATP_GEYSER3_MODE_REQUEST_VALUE,
+ ATP_GEYSER3_MODE_REQUEST_INDEX, &data, 8, 5000);
+
+ if (size != 8) {
+ err("Could not do mode write request to device"
+ " (Geyser 3 mode)");
+ return -EIO;
+ }
+ return 0;
+}
+
+/* Reinitialise the device if it's a geyser 3 */
+static void atp_reinit(struct work_struct *work)
+{
+ struct atp *dev = container_of(work, struct atp, work);
+ struct usb_device *udev = dev->udev;
+
+ dev->idlecount = 0;
+ atp_geyser3_init(udev);
+}
+
static int atp_calculate_abs(int *xy_sensors, int nb_sensors, int fact,
int *z, int *fingers)
{
@@ -276,7 +327,7 @@
static void atp_complete(struct urb* urb)
{
int x, y, x_z, y_z, x_f, y_f;
- int retval, i, j;
+ int retval, i, j, key;
struct atp *dev = urb->context;
switch (urb->status) {
@@ -418,6 +469,8 @@
y = atp_calculate_abs(dev->xy_acc + ATP_XSENSORS, ATP_YSENSORS,
ATP_YFACT, &y_z, &y_f);
+ key = dev->data[dev->datalen - 1] & 1;
+
if (x && y) {
if (dev->x_old != -1) {
x = (dev->x_old * 3 + x) >> 2;
@@ -439,9 +492,8 @@
}
dev->x_old = x;
dev->y_old = y;
- }
- else if (!x && !y) {
+ } else if (!x && !y) {
dev->x_old = dev->y_old = -1;
input_report_key(dev->input, BTN_TOUCH, 0);
input_report_abs(dev->input, ABS_PRESSURE, 0);
@@ -451,8 +503,22 @@
memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
}
- input_report_key(dev->input, BTN_LEFT,
- !!dev->data[dev->datalen - 1]);
+ /* Geyser 3 will continue to send packets continually after
+ the first touch unless reinitialised. Do so if it's been
+ idle for a while in order to avoid waking the kernel up
+ several hundred times a second */
+ if (atp_is_geyser_3(dev)) {
+ if (!x && !y && !key) {
+ dev->idlecount++;
+ if (dev->idlecount == 10) {
+ dev->valid = 0;
+ schedule_work(&dev->work);
+ }
+ } else
+ dev->idlecount=0;
+ }
+
+ input_report_key(dev->input, BTN_LEFT, key);
input_sync(dev->input);
@@ -480,6 +546,7 @@
struct atp *dev = input_get_drvdata(input);
usb_kill_urb(dev->urb);
+ cancel_work_sync(&dev->work);
dev->open = 0;
}
@@ -528,40 +595,10 @@
dev->datalen = 81;
if (atp_is_geyser_3(dev)) {
- /*
- * By default Geyser 3 device sends standard USB HID mouse
- * packets (Report ID 2). This code changes device mode, so it
- * sends raw sensor reports (Report ID 5).
- */
- char data[8];
- int size;
-
- size = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
- ATP_GEYSER3_MODE_READ_REQUEST_ID,
- USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
- ATP_GEYSER3_MODE_REQUEST_VALUE,
- ATP_GEYSER3_MODE_REQUEST_INDEX, &data, 8, 5000);
-
- if (size != 8) {
- err("Could not do mode read request from device"
- " (Geyser 3 mode)");
+ /* switch to raw sensor mode */
+ if (atp_geyser3_init(udev))
goto err_free_devs;
- }
-
- /* Apply the mode switch */
- data[0] = ATP_GEYSER3_MODE_VENDOR_VALUE;
-
- size = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
- ATP_GEYSER3_MODE_WRITE_REQUEST_ID,
- USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
- ATP_GEYSER3_MODE_REQUEST_VALUE,
- ATP_GEYSER3_MODE_REQUEST_INDEX, &data, 8, 5000);
- if (size != 8) {
- err("Could not do mode write request to device"
- " (Geyser 3 mode)");
- goto err_free_devs;
- }
printk("appletouch Geyser 3 inited.\n");
}
@@ -636,6 +673,8 @@
/* save our data pointer in this interface device */
usb_set_intfdata(iface, dev);
+ INIT_WORK(&dev->work, atp_reinit);
+
return 0;
err_free_buffer:
@@ -669,14 +708,17 @@
static int atp_suspend(struct usb_interface *iface, pm_message_t message)
{
struct atp *dev = usb_get_intfdata(iface);
+
usb_kill_urb(dev->urb);
dev->valid = 0;
+
return 0;
}
static int atp_resume(struct usb_interface *iface)
{
struct atp *dev = usb_get_intfdata(iface);
+
if (dev->open && usb_submit_urb(dev->urb, GFP_ATOMIC))
return -EIO;
--- linux-source-2.6.22-2.6.22.orig/drivers/input/input.c
+++ linux-source-2.6.22-2.6.22/drivers/input/input.c
@@ -71,7 +71,7 @@
case EV_KEY:
- if (code > KEY_MAX || !test_bit(code, dev->keybit) || !!test_bit(code, dev->key) == value)
+ if (code > KEY_MAX || !!test_bit(code, dev->key) == value)
return;
if (value == 2)
--- linux-source-2.6.22-2.6.22.orig/drivers/input/joystick/xpad.c
+++ linux-source-2.6.22-2.6.22/drivers/input/joystick/xpad.c
@@ -1,13 +1,18 @@
/*
- * X-Box gamepad - v0.0.6
+ * Xbox input device driver for Linux - v0.1.6
+ *
+ * Copyright (c) 2002 - 2004 Marko Friedemann
+ *
+ * Contributors:
+ * Vojtech Pavlik ,
+ * Oliver Schwartz ,
+ * Thomas Pedley ,
+ * Steven Toth ,
+ * Franz Lehner ,
+ * Ivan Hawkes
+ * Edgar Hucek
+ * Niklas Lundberg
*
- * Copyright (c) 2002 Marko Friedemann
- * 2004 Oliver Schwartz ,
- * Steven Toth ,
- * Franz Lehner ,
- * Ivan Hawkes
- * 2005 Dominic Cerquetti
- * 2006 Adam Buchbinder
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -25,7 +30,7 @@
*
*
* This driver is based on:
- * - information from http://euc.jp/periphs/xbox-controller.ja.html
+ * - information from http://euc.jp/periphs/xbox-controller.en.html
* - the iForce driver drivers/char/joystick/iforce.c
* - the skeleton-driver drivers/usb/usb-skeleton.c
*
@@ -33,116 +38,88 @@
* - ITO Takayuki for providing essential xpad information on his website
* - Vojtech Pavlik - iforce driver / input subsystem
* - Greg Kroah-Hartman - usb-skeleton driver
- * - XBOX Linux project - extra USB id's
*
* TODO:
- * - fine tune axes (especially trigger axes)
- * - fix "analog" buttons (reported as digital now)
- * - get rumble working
- * - need USB IDs for other dance pads
+ * - fine tune axes
+ * - NEW: Test right thumb stick Y-axis to see if it needs flipping.
+ * - NEW: get rumble working correctly, fix all the bugs and support multiple
+ * simultaneous effects
+ * - NEW: split funtionality mouse/joustick into two source files
+ * - NEW: implement /proc interface (toggle mouse/rumble enable/disable, etc.)
+ * - NEW: implement user space daemon application that handles that interface
*
- * History:
- *
- * 2002-06-27 - 0.0.1 : first version, just said "XBOX HID controller"
- *
- * 2002-07-02 - 0.0.2 : basic working version
- * - all axes and 9 of the 10 buttons work (german InterAct device)
- * - the black button does not work
- *
- * 2002-07-14 - 0.0.3 : rework by Vojtech Pavlik
- * - indentation fixes
- * - usb + input init sequence fixes
- *
- * 2002-07-16 - 0.0.4 : minor changes, merge with Vojtech's v0.0.3
- * - verified the lack of HID and report descriptors
- * - verified that ALL buttons WORK
- * - fixed d-pad to axes mapping
- *
- * 2002-07-17 - 0.0.5 : simplified d-pad handling
- *
- * 2004-10-02 - 0.0.6 : DDR pad support
- * - borrowed from the XBOX linux kernel
- * - USB id's for commonly used dance pads are present
- * - dance pads will map D-PAD to buttons, not axes
- * - pass the module paramater 'dpad_to_buttons' to force
- * the D-PAD to map to buttons if your pad is not detected
+ * History: moved to end of file
*/
-
+
#include
#include
#include